" }
+
+ override string getAPrimaryQlClass() { result = "ErrorExpr" }
+}
+
+/**
+ * A Microsoft C/C++ __assume expression.
+ *
+ * Unlike `assert`, `__assume` is evaluated at compile time and
+ * is treated as a hint to the optimizer
+ * ```
+ * __assume(ptr < end_buf);
+ * ```
+ */
+class AssumeExpr extends Expr, @assume {
+ override string toString() { result = "__assume(...)" }
+
+ override string getAPrimaryQlClass() { result = "AssumeExpr" }
+
+ /**
+ * Gets the operand of the `__assume` expressions.
+ */
+ Expr getOperand() { this.hasChild(result, 0) }
+}
+
+/**
+ * A C/C++ comma expression.
+ * ```
+ * int c = compute1(), compute2(), resulting_value;
+ * ```
+ */
+class CommaExpr extends Expr, @commaexpr {
+ override string getAPrimaryQlClass() { result = "CommaExpr" }
+
+ /**
+ * Gets the left operand, which is the one whose value is discarded.
+ */
+ Expr getLeftOperand() { this.hasChild(result, 0) }
+
+ /**
+ * Gets the right operand, which is the one whose value is equal to the value
+ * of the comma expression itself.
+ */
+ Expr getRightOperand() { this.hasChild(result, 1) }
+
+ override string toString() { result = "... , ..." }
+
+ override int getPrecedence() { result = 0 }
+
+ override predicate mayBeImpure() {
+ this.getLeftOperand().mayBeImpure() or
+ this.getRightOperand().mayBeImpure()
+ }
+
+ override predicate mayBeGloballyImpure() {
+ this.getLeftOperand().mayBeGloballyImpure() or
+ this.getRightOperand().mayBeGloballyImpure()
+ }
+}
+
+/**
+ * A C/C++ address-of expression.
+ * ```
+ * int *ptr = &var;
+ * ```
+ */
+class AddressOfExpr extends UnaryOperation, @address_of {
+ override string getAPrimaryQlClass() { result = "AddressOfExpr" }
+
+ /** Gets the function or variable whose address is taken. */
+ Declaration getAddressable() {
+ result = this.getOperand().(Access).getTarget()
+ or
+ // this handles the case where we are taking the address of a reference variable
+ result = this.getOperand().(ReferenceDereferenceExpr).getChild(0).(Access).getTarget()
+ }
+
+ override string getOperator() { result = "&" }
+
+ override int getPrecedence() { result = 16 }
+
+ override predicate mayBeImpure() { this.getOperand().mayBeImpure() }
+
+ override predicate mayBeGloballyImpure() { this.getOperand().mayBeGloballyImpure() }
+}
+
+/**
+ * An implicit conversion from type `T` to type `T &`.
+ *
+ * This typically occurs when an expression of type `T` is used to initialize a variable or parameter of
+ * type `T &`, and is to reference types what `AddressOfExpr` is to pointer types, though this class is
+ * considered to be a conversion rather than an operation, and as such doesn't occur in the main AST.
+ * ```
+ * int &var_ref = var;
+ * ```
+ */
+class ReferenceToExpr extends Conversion, @reference_to {
+ override string toString() { result = "(reference to)" }
+
+ override string getAPrimaryQlClass() { result = "ReferenceToExpr" }
+
+ override int getPrecedence() { result = 16 }
+}
+
+/**
+ * An instance of the built-in unary `operator *` applied to a type.
+ *
+ * For user-defined overloads of `operator *`, see `OverloadedPointerDereferenceExpr`.
+ * ```
+ * int var = *varptr;
+ * ```
+ */
+class PointerDereferenceExpr extends UnaryOperation, @indirect {
+ override string getAPrimaryQlClass() { result = "PointerDereferenceExpr" }
+
+ /**
+ * DEPRECATED: Use getOperand() instead.
+ *
+ * Gets the expression that is being dereferenced.
+ */
+ deprecated Expr getExpr() { result = getOperand() }
+
+ override string getOperator() { result = "*" }
+
+ override int getPrecedence() { result = 16 }
+
+ override predicate mayBeImpure() {
+ this.getChild(0).mayBeImpure() or
+ this.getChild(0).getFullyConverted().getType().(DerivedType).getBaseType().isVolatile()
+ }
+
+ override predicate mayBeGloballyImpure() {
+ this.getChild(0).mayBeGloballyImpure() or
+ this.getChild(0).getFullyConverted().getType().(DerivedType).getBaseType().isVolatile()
+ }
+}
+
+/**
+ * An implicit conversion from type `T &` to type `T`.
+ *
+ * This typically occurs when an variable of type `T &` is used in a context which expects type `T`, and
+ * is to reference types what `PointerDereferenceExpr` is to pointer types - though this class is
+ * considered to be a conversion rather than an operation, and as such doesn't occur in the main AST.
+ * ```
+ * float &f_ref = get_ref();
+ * float f = f_ref;
+ * ```
+ */
+class ReferenceDereferenceExpr extends Conversion, @ref_indirect {
+ override string toString() { result = "(reference dereference)" }
+
+ override string getAPrimaryQlClass() { result = "ReferenceDereferenceExpr" }
+}
+
+/**
+ * A C++ `new` or `new[]` expression.
+ */
+class NewOrNewArrayExpr extends Expr, @any_new_expr {
+ override int getPrecedence() { result = 16 }
+
+ /**
+ * Gets the `operator new` or `operator new[]` that allocates storage.
+ */
+ Function getAllocator() { expr_allocator(underlyingElement(this), unresolveElement(result), _) }
+
+ /**
+ * Holds if the allocation function is the version that expects an alignment
+ * argument of type `std::align_val_t`.
+ */
+ predicate hasAlignedAllocation() { expr_allocator(underlyingElement(this), _, 1) }
+
+ /**
+ * Gets the alignment argument passed to the allocation function, if any.
+ */
+ Expr getAlignmentArgument() {
+ hasAlignedAllocation() and
+ (
+ // If we have an allocator call, the alignment is the second argument to
+ // that call.
+ result = getAllocatorCall().getArgument(1)
+ or
+ // Otherwise, the alignment winds up as child number 3 of the `new`
+ // itself.
+ result = getChild(3)
+ )
+ }
+
+ /**
+ * Gets the call to a non-default `operator new` that allocates storage, if any.
+ *
+ * As a rule of thumb, there will be an allocator call precisely when the type
+ * being allocated has a custom `operator new`, or when an argument list appears
+ * after the `new` keyword and before the name of the type being allocated.
+ *
+ * In particular note that uses of placement-new and nothrow-new will have an
+ * allocator call.
+ */
+ FunctionCall getAllocatorCall() { result = this.getChild(0) }
+
+ /**
+ * Gets the `operator delete` that deallocates storage if the initialization
+ * throws an exception, if any.
+ */
+ Function getDeallocator() {
+ expr_deallocator(underlyingElement(this), unresolveElement(result), _)
+ }
+
+ /**
+ * Holds if the deallocation function expects a size argument.
+ */
+ predicate hasSizedDeallocation() {
+ exists(int form |
+ expr_deallocator(underlyingElement(this), _, form) and
+ form.bitAnd(1) != 0 // Bit zero is the "size" bit
+ )
+ }
+
+ /**
+ * Holds if the deallocation function expects an alignment argument.
+ */
+ predicate hasAlignedDeallocation() {
+ exists(int form |
+ expr_deallocator(underlyingElement(this), _, form) and
+ form.bitAnd(2) != 0 // Bit one is the "alignment" bit
+ )
+ }
+
+ /**
+ * Gets the type that is being allocated.
+ *
+ * For example, for `new int` the result is `int`.
+ * For `new int[5]` the result is `int[5]`.
+ */
+ Type getAllocatedType() { none() } // overridden in subclasses
+
+ /**
+ * Gets the pointer `p` if this expression is of the form `new(p) T...`.
+ * Invocations of this form are non-allocating `new` expressions that may
+ * call the constructor of `T` but will not allocate memory.
+ */
+ Expr getPlacementPointer() {
+ result =
+ this.getAllocatorCall()
+ .getArgument(this.getAllocator().(OperatorNewAllocationFunction).getPlacementArgument())
+ }
+
+ /**
+ * For `operator new`, this gets the call or expression that initializes the allocated object, if any.
+ *
+ * As examples, for `new int(4)`, this will be `4`, and for `new std::vector(4)`, this will
+ * be a call to the constructor `std::vector::vector(size_t)` with `4` as an argument.
+ *
+ * For `operator new[]`, this gets the call or expression that initializes the first element of the
+ * array, if any.
+ *
+ * This will either be a call to the default constructor for the array's element type (as
+ * in `new std::string[10]`), or a literal zero for arrays of scalars which are zero-initialized
+ * due to extra parentheses (as in `new int[10]()`).
+ *
+ * At runtime, the constructor will be called once for each element in the array, but the
+ * constructor call only exists once in the AST.
+ */
+ final Expr getInitializer() { result = this.getChild(1) }
+}
+
+/**
+ * A C++ `new` (non-array) expression.
+ * ```
+ * Foo *ptr = new Foo(3);
+ * ```
+ */
+class NewExpr extends NewOrNewArrayExpr, @new_expr {
+ override string toString() { result = "new" }
+
+ override string getAPrimaryQlClass() { result = "NewExpr" }
+
+ /**
+ * Gets the type that is being allocated.
+ *
+ * For example, for `new int` the result is `int`.
+ */
+ override Type getAllocatedType() {
+ new_allocated_type(underlyingElement(this), unresolveElement(result))
+ }
+}
+
+/**
+ * A C++ `new[]` (array) expression.
+ * ```
+ * Foo *foo = new Foo[]{1, 3, 5};
+ * Bar *bar = new Bar[5];
+ * ```
+ */
+class NewArrayExpr extends NewOrNewArrayExpr, @new_array_expr {
+ override string toString() { result = "new[]" }
+
+ override string getAPrimaryQlClass() { result = "NewArrayExpr" }
+
+ /**
+ * Gets the type that is being allocated.
+ *
+ * For example, for `new int[5]` the result is `int[5]`.
+ */
+ override Type getAllocatedType() {
+ new_array_allocated_type(underlyingElement(this), unresolveElement(result))
+ }
+
+ /**
+ * Gets the element type of the array being allocated.
+ */
+ Type getAllocatedElementType() {
+ result = getType().getUnderlyingType().(PointerType).getBaseType()
+ }
+
+ /**
+ * Gets the extent of the non-constant array dimension, if any.
+ *
+ * As examples, for `new char[n]` and `new char[n][10]`, this gives `n`, but for `new char[10]` this
+ * gives nothing, as the 10 is considered part of the type.
+ */
+ Expr getExtent() { result = this.getChild(2) }
+}
+
+/**
+ * A C++ `delete` (non-array) expression.
+ * ```
+ * delete ptr;
+ * ```
+ */
+class DeleteExpr extends Expr, @delete_expr {
+ override string toString() { result = "delete" }
+
+ override string getAPrimaryQlClass() { result = "DeleteExpr" }
+
+ override int getPrecedence() { result = 16 }
+
+ /**
+ * Gets the compile-time type of the object being deleted.
+ */
+ Type getDeletedObjectType() {
+ result =
+ getExpr().getFullyConverted().getType().stripTopLevelSpecifiers().(PointerType).getBaseType()
+ }
+
+ /**
+ * Gets the call to a destructor that occurs prior to the object's memory being deallocated, if any.
+ */
+ DestructorCall getDestructorCall() { result = this.getChild(1) }
+
+ /**
+ * Gets the destructor to be called to destroy the object, if any.
+ */
+ Destructor getDestructor() { result = getDestructorCall().getTarget() }
+
+ /**
+ * Gets the `operator delete` that deallocates storage. Does not hold
+ * if the type being destroyed has a virtual destructor. In that case, the
+ * `operator delete` that will be called is determined at runtime based on the
+ * dynamic type of the object.
+ */
+ Function getDeallocator() {
+ expr_deallocator(underlyingElement(this), unresolveElement(result), _)
+ }
+
+ /**
+ * Holds if the deallocation function expects a size argument.
+ */
+ predicate hasSizedDeallocation() {
+ exists(int form |
+ expr_deallocator(underlyingElement(this), _, form) and
+ form.bitAnd(1) != 0 // Bit zero is the "size" bit
+ )
+ }
+
+ /**
+ * Holds if the deallocation function expects an alignment argument.
+ */
+ predicate hasAlignedDeallocation() {
+ exists(int form |
+ expr_deallocator(underlyingElement(this), _, form) and
+ form.bitAnd(2) != 0 // Bit one is the "alignment" bit
+ )
+ }
+
+ /**
+ * Gets the call to a non-default `operator delete` that deallocates storage, if any.
+ *
+ * This will only be present when the type being deleted has a custom `operator delete`.
+ */
+ FunctionCall getAllocatorCall() { result = this.getChild(0) }
+
+ /**
+ * Gets the object being deleted.
+ */
+ Expr getExpr() { result = this.getChild(3) or result = this.getChild(1).getChild(-1) }
+}
+
+/**
+ * A C++ `delete[]` (array) expression.
+ * ```
+ * delete[] arr;
+ * ```
+ */
+class DeleteArrayExpr extends Expr, @delete_array_expr {
+ override string toString() { result = "delete[]" }
+
+ override string getAPrimaryQlClass() { result = "DeleteArrayExpr" }
+
+ override int getPrecedence() { result = 16 }
+
+ /**
+ * Gets the element type of the array being deleted.
+ */
+ Type getDeletedElementType() {
+ result =
+ getExpr().getFullyConverted().getType().stripTopLevelSpecifiers().(PointerType).getBaseType()
+ }
+
+ /**
+ * Gets the call to a destructor that occurs prior to the array's memory being deallocated, if any.
+ *
+ * At runtime, the destructor will be called once for each element in the array, but the
+ * destructor call only exists once in the AST.
+ */
+ DestructorCall getDestructorCall() { result = this.getChild(1) }
+
+ /**
+ * Gets the destructor to be called to destroy each element in the array, if any.
+ */
+ Destructor getDestructor() { result = getDestructorCall().getTarget() }
+
+ /**
+ * Gets the `operator delete[]` that deallocates storage.
+ */
+ Function getDeallocator() {
+ expr_deallocator(underlyingElement(this), unresolveElement(result), _)
+ }
+
+ /**
+ * Holds if the deallocation function expects a size argument.
+ */
+ predicate hasSizedDeallocation() {
+ exists(int form |
+ expr_deallocator(underlyingElement(this), _, form) and
+ form.bitAnd(1) != 0 // Bit zero is the "size" bit
+ )
+ }
+
+ /**
+ * Holds if the deallocation function expects an alignment argument.
+ */
+ predicate hasAlignedDeallocation() {
+ exists(int form |
+ expr_deallocator(underlyingElement(this), _, form) and
+ form.bitAnd(2) != 0 // Bit one is the "alignment" bit
+ )
+ }
+
+ /**
+ * Gets the call to a non-default `operator delete` that deallocates storage, if any.
+ *
+ * This will only be present when the type being deleted has a custom `operator delete`.
+ */
+ FunctionCall getAllocatorCall() { result = this.getChild(0) }
+
+ /**
+ * Gets the array being deleted.
+ */
+ Expr getExpr() { result = this.getChild(3) or result = this.getChild(1).getChild(-1) }
+}
+
+/**
+ * A compound statement enclosed in parentheses used as an expression (a GNU extension to C/C++).
+ * In the example below, `b` is the return value from the compound statement.
+ * ```
+ * int a = ({ int b = c + d; b; });
+ * ```
+ */
+class StmtExpr extends Expr, @expr_stmt {
+ override string toString() { result = "(statement expression)" }
+
+ /**
+ * Gets the statement enclosed by this `StmtExpr`.
+ */
+ Stmt getStmt() { result.getParent() = this }
+
+ override string getAPrimaryQlClass() { result = "StmtExpr" }
+
+ /**
+ * Gets the result expression of the enclosed statement. For example,
+ * `a+b` is the result expression in this example:
+ *
+ * ```
+ * x = ({ dosomething(); a+b; });
+ * ```
+ */
+ Expr getResultExpr() { result = getStmtResultExpr(getStmt()) }
+}
+
+/** Get the result expression of a statement. (Helper function for StmtExpr.) */
+private Expr getStmtResultExpr(Stmt stmt) {
+ result = stmt.(ExprStmt).getExpr() or
+ result = getStmtResultExpr(stmt.(BlockStmt).getLastStmt())
+}
+
+/**
+ * A C++ `this` expression.
+ */
+class ThisExpr extends Expr, @thisaccess {
+ override string toString() { result = "this" }
+
+ override string getAPrimaryQlClass() { result = "ThisExpr" }
+
+ override predicate mayBeImpure() { none() }
+
+ override predicate mayBeGloballyImpure() { none() }
+}
+
+/**
+ * A code block expression, for example:
+ * ```
+ * ^ int (int x, int y) {return x + y;}
+ * ```
+ * Blocks are a language extension supported by Clang, and by Apple's
+ * branch of GCC.
+ */
+class BlockExpr extends Literal {
+ BlockExpr() { code_block(underlyingElement(this), _) }
+
+ override string toString() { result = "^ { ... }" }
+
+ /**
+ * Gets the (anonymous) function associated with this code block expression.
+ */
+ Function getFunction() { code_block(underlyingElement(this), unresolveElement(result)) }
+}
+
+/**
+ * A C++ `throw` expression.
+ * ```
+ * throw Exc(2);
+ * ```
+ */
+class ThrowExpr extends Expr, @throw_expr {
+ /**
+ * Gets the expression that will be thrown, if any. There is no result if
+ * `this` is a `ReThrowExpr`.
+ */
+ Expr getExpr() { result = this.getChild(0) }
+
+ override string getAPrimaryQlClass() { result = "ThrowExpr" }
+
+ override string toString() { result = "throw ..." }
+
+ override int getPrecedence() { result = 1 }
+}
+
+/**
+ * A C++ `throw` expression with no argument (which causes the current exception to be re-thrown).
+ * ```
+ * throw;
+ * ```
+ */
+class ReThrowExpr extends ThrowExpr {
+ ReThrowExpr() { this.getType() instanceof VoidType }
+
+ override string getAPrimaryQlClass() { result = "ReThrowExpr" }
+
+ override string toString() { result = "re-throw exception " }
+}
+
+/**
+ * A C++11 `noexcept` expression, returning `true` if its subexpression is guaranteed
+ * not to `throw` exceptions. For example:
+ * ```
+ * if (noexcept(func_1() + func_2())) { }
+ * ```
+ */
+class NoExceptExpr extends Expr, @noexceptexpr {
+ override string toString() { result = "noexcept(...)" }
+
+ override string getAPrimaryQlClass() { result = "NoExceptExpr" }
+
+ /**
+ * Gets the expression inside this noexcept expression.
+ */
+ Expr getExpr() { result = this.getChild(0) }
+}
+
+/**
+ * A C++17 fold expression. This will only appear in an uninstantiated template; any instantiations
+ * of the template will instead contain the sequence of expressions given by expanding the fold.
+ * ```
+ * template < typename... T >
+ * auto sum ( T... t ) { return ( t + ... + 0 ); }
+ * ```
+ */
+class FoldExpr extends Expr, @foldexpr {
+ override string toString() {
+ exists(string op |
+ op = this.getOperatorString() and
+ if this.isUnaryFold()
+ then
+ if this.isLeftFold()
+ then result = "( ... " + op + " pack )"
+ else result = "( pack " + op + " ... )"
+ else
+ if this.isLeftFold()
+ then result = "( init " + op + " ... " + op + " pack )"
+ else result = "( pack " + op + " ... " + op + " init )"
+ )
+ }
+
+ override string getAPrimaryQlClass() { result = "FoldExpr" }
+
+ /** Gets the binary operator used in this fold expression, as a string. */
+ string getOperatorString() { fold(underlyingElement(this), result, _) }
+
+ /** Holds if this is a left-fold expression. */
+ predicate isLeftFold() { fold(underlyingElement(this), _, true) }
+
+ /** Holds if this is a right-fold expression. */
+ predicate isRightFold() { fold(underlyingElement(this), _, false) }
+
+ /** Holds if this is a unary fold expression. */
+ predicate isUnaryFold() { getNumChild() = 1 }
+
+ /** Holds if this is a binary fold expression. */
+ predicate isBinaryFold() { getNumChild() = 2 }
+
+ /**
+ * Gets the child expression containing the unexpanded parameter pack.
+ */
+ Expr getPackExpr() {
+ this.isUnaryFold() and
+ result = getChild(0)
+ or
+ this.isBinaryFold() and
+ if this.isRightFold() then result = getChild(0) else result = getChild(1)
+ }
+
+ /**
+ * If this is a binary fold, gets the expression representing the initial value.
+ */
+ Expr getInitExpr() {
+ this.isBinaryFold() and
+ if this.isRightFold() then result = getChild(1) else result = getChild(0)
+ }
+}
+
+/**
+ * Holds if `child` is the `n`th child of `parent` in an alternative syntax
+ * tree that has `Conversion`s as part of the tree.
+ */
+private predicate convparents(Expr child, int idx, Element parent) {
+ child.getConversion() = parent and
+ idx = 0
+ or
+ exists(Expr astChild |
+ exprparents(unresolveElement(astChild), idx, unresolveElement(parent)) and
+ child = astChild.getFullyConverted()
+ )
+}
+
+// Pulled out for performance. See
+// https://github.com/github/codeql-coreql-team/issues/1044.
+private predicate hasNoConversions(Expr e) { not e.hasConversion() }
+
+/**
+ * Holds if `e` is a literal of unknown value in a template, or a cast thereof.
+ * We assume that such literals are constant.
+ */
+private predicate constantTemplateLiteral(Expr e) {
+ // Unknown literals in uninstantiated templates could be enum constant
+ // accesses or pointer-to-member literals.
+ e instanceof Literal and
+ e.isFromUninstantiatedTemplate(_) and
+ not exists(e.getValue())
+ or
+ constantTemplateLiteral(e.(Cast).getExpr())
+}
+
+/**
+ * A C++ three-way comparison operation, also known as the _spaceship
+ * operation_. This is specific to C++20 and later.
+ * ```
+ * auto c = (a <=> b);
+ * ```
+ */
+class SpaceshipExpr extends BinaryOperation, @spaceshipexpr {
+ override string getAPrimaryQlClass() { result = "SpaceshipExpr" }
+
+ override int getPrecedence() { result = 11 }
+
+ override string getOperator() { result = "<=>" }
+}
+
+/**
+ * A C/C++ `co_await` expression.
+ * ```
+ * co_await foo();
+ * ```
+ */
+class CoAwaitExpr extends UnaryOperation, @co_await {
+ override string getAPrimaryQlClass() { result = "CoAwaitExpr" }
+
+ override string getOperator() { result = "co_await" }
+
+ override int getPrecedence() { result = 16 }
+}
+
+/**
+ * A C/C++ `co_yield` expression.
+ * ```
+ * co_yield 1;
+ * ```
+ */
+class CoYieldExpr extends UnaryOperation, @co_yield {
+ override string getAPrimaryQlClass() { result = "CoYieldExpr" }
+
+ override string getOperator() { result = "co_yield" }
+
+ override int getPrecedence() { result = 2 }
+}
diff --git a/repo-tests/codeql/cpp/ql/lib/semmle/code/cpp/exprs/Lambda.qll b/repo-tests/codeql/cpp/ql/lib/semmle/code/cpp/exprs/Lambda.qll
new file mode 100644
index 00000000000..8a51001f4d5
--- /dev/null
+++ b/repo-tests/codeql/cpp/ql/lib/semmle/code/cpp/exprs/Lambda.qll
@@ -0,0 +1,153 @@
+/**
+ * Provides classes for modeling lambda expressions and their captures.
+ */
+
+import semmle.code.cpp.exprs.Expr
+import semmle.code.cpp.Class
+
+/**
+ * A C++11 lambda expression, for example the expression initializing `a` in
+ * the following code:
+ * ```
+ * auto a = [x, y](int z) -> int {
+ * return x + y + z;
+ * };
+ * ```
+ *
+ * The type given by `getType()` will be an instance of `Closure`.
+ */
+class LambdaExpression extends Expr, @lambdaexpr {
+ override string toString() { result = "[...](...){...}" }
+
+ override string getAPrimaryQlClass() { result = "LambdaExpression" }
+
+ /**
+ * Gets an implicitly or explicitly captured value of this lambda expression.
+ */
+ LambdaCapture getACapture() { result = getCapture(_) }
+
+ /**
+ * Gets the nth implicitly or explicitly captured value of this lambda expression.
+ */
+ LambdaCapture getCapture(int index) {
+ lambda_capture(result, underlyingElement(this), index, _, _, _, _)
+ }
+
+ /**
+ * Gets the default variable capture mode for the lambda expression.
+ *
+ * Will be one of:
+ * - "" if no default was specified, meaning that all captures must be explicit.
+ * - "&" if capture-by-reference is the default for implicit captures.
+ * - "=" if capture-by-value is the default for implicit captures.
+ */
+ string getDefaultCaptureMode() { lambdas(underlyingElement(this), result, _) }
+
+ /**
+ * Holds if the return type (of the call operator of the resulting object) was explicitly specified.
+ */
+ predicate returnTypeIsExplicit() { lambdas(underlyingElement(this), _, true) }
+
+ /**
+ * Gets the function which will be invoked when the resulting object is called.
+ *
+ * Various components of the lambda expression can be obtained from components of this
+ * function, such as:
+ * - The number and type of parameters.
+ * - Whether the mutable keyword was used (iff this function is not const).
+ * - The return type.
+ * - The statements comprising the lambda body.
+ */
+ Operator getLambdaFunction() { result = getType().(Closure).getLambdaFunction() }
+
+ /**
+ * Gets the initializer that initializes the captured variables in the closure, if any.
+ * A lambda that does not capture any variables will not have an initializer.
+ */
+ ClassAggregateLiteral getInitializer() { result = getChild(0) }
+}
+
+/**
+ * A class written by the compiler to be the type of a C++11 lambda expression.
+ * For example the variable `a` in the following code has a closure type:
+ * ```
+ * auto a = [x, y](int z) -> int {
+ * return x + y + z;
+ * };
+ * ```
+ */
+class Closure extends Class {
+ Closure() { exists(LambdaExpression e | this = e.getType()) }
+
+ override string getAPrimaryQlClass() { result = "Closure" }
+
+ /** Gets the lambda expression of which this is the type. */
+ LambdaExpression getLambdaExpression() { result.getType() = this }
+
+ /** Gets the compiler-generated operator() of this closure type. */
+ Operator getLambdaFunction() {
+ result = this.getAMember() and
+ result.getName() = "operator()"
+ }
+
+ override string getDescription() { result = "decltype([...](...){...})" }
+}
+
+/**
+ * Information about a value captured as part of a lambda expression. For
+ * example in the following code, information about `x` and `y` is captured:
+ * ```
+ * auto a = [x, y](int z) -> int {
+ * return x + y + z;
+ * };
+ * ```
+ */
+class LambdaCapture extends Locatable, @lambdacapture {
+ override string toString() { result = getField().getName() }
+
+ override string getAPrimaryQlClass() { result = "LambdaCapture" }
+
+ /**
+ * Holds if this capture was made implicitly.
+ */
+ predicate isImplicit() { lambda_capture(this, _, _, _, _, true, _) }
+
+ /**
+ * Holds if the variable was captured by reference.
+ *
+ * An identifier is captured by reference if:
+ * - It is explicitly captured by reference.
+ * - It is implicitly captured, and the lambda's default capture mode is by-reference.
+ * - The identifier is "this". [Said behaviour is dictated by the C++11 standard, but it
+ * is actually "*this" being captured rather than "this".]
+ */
+ predicate isCapturedByReference() { lambda_capture(this, _, _, _, true, _, _) }
+
+ /**
+ * Gets the location of the declaration of this capture.
+ *
+ * For explicit captures, this is a location within the "[...]" part of the lambda expression.
+ *
+ * For implicit captures, this is the first location within the "{...}" part of the lambda
+ * expression which accesses the captured variable.
+ */
+ override Location getLocation() { lambda_capture(this, _, _, _, _, _, result) }
+
+ /**
+ * Gets the field of the lambda expression's closure type which is used to store this capture.
+ */
+ MemberVariable getField() { lambda_capture(this, _, _, result, _, _, _) }
+
+ /**
+ * Gets the expression which yields the final captured value.
+ *
+ * In many cases, this will be an instance of VariableAccess.
+ * If a this-pointer is being captured, this will be an instance of ThisExpr.
+ * For by-value captures of non-primitive types, this will be a call to a copy constructor.
+ */
+ Expr getInitializer() {
+ exists(LambdaExpression lambda | this = lambda.getCapture(_) |
+ result = lambda.getInitializer().getFieldExpr(this.getField())
+ )
+ }
+}
diff --git a/repo-tests/codeql/cpp/ql/lib/semmle/code/cpp/exprs/Literal.qll b/repo-tests/codeql/cpp/ql/lib/semmle/code/cpp/exprs/Literal.qll
new file mode 100644
index 00000000000..31790f85bfb
--- /dev/null
+++ b/repo-tests/codeql/cpp/ql/lib/semmle/code/cpp/exprs/Literal.qll
@@ -0,0 +1,346 @@
+/**
+ * Provides classes for modeling literals in the source code such as `0`, `'c'`
+ * or `"string"`.
+ */
+
+import semmle.code.cpp.exprs.Expr
+
+/**
+ * A C/C++ literal.
+ *
+ * The is the QL root class for all literals.
+ */
+class Literal extends Expr, @literal {
+ /** Gets a textual representation of this literal. */
+ override string toString() {
+ result = this.getValue()
+ or
+ not exists(this.getValue()) and
+ result = "Unknown literal"
+ }
+
+ override string getAPrimaryQlClass() { result = "Literal" }
+
+ override predicate mayBeImpure() { none() }
+
+ override predicate mayBeGloballyImpure() { none() }
+}
+
+/**
+ * A label literal, that is, a use of the '&&' operator to take the address of a
+ * label for use in a computed goto statement. This is a non-standard C/C++ extension.
+ *
+ * For example:
+ * ```
+ * void *label_ptr = &&myLabel; // &&myLabel is a LabelLiteral
+ * goto *label_ptr; // this is a ComputedGotoStmt
+ * myLabel: // this is a LabelStmt
+ * ```
+ */
+class LabelLiteral extends Literal {
+ LabelLiteral() { jumpinfo(underlyingElement(this), _, _) }
+
+ override string getAPrimaryQlClass() { result = "LabelLiteral" }
+
+ /** Gets the corresponding label statement. */
+ LabelStmt getLabel() { jumpinfo(underlyingElement(this), _, unresolveElement(result)) }
+}
+
+/** A character literal or a string literal. */
+class TextLiteral extends Literal {
+ TextLiteral() {
+ // String Literal
+ // Note that `AggregateLiteral`s can also have an array type, but they derive from
+ // @aggregateliteral rather than @literal.
+ this.getType() instanceof ArrayType
+ or
+ // Char literal
+ this.getValueText().regexpMatch("(?s)\\s*L?'.*")
+ }
+
+ /** Gets a hex escape sequence that appears in the character or string literal (see [lex.ccon] in the C++ Standard). */
+ string getAHexEscapeSequence(int occurrence, int offset) {
+ result = getValueText().regexpFind("(?= 0 and
+ elementIndex < getArraySize()
+ }
+
+ /**
+ * Holds if the element `elementIndex` is value initialized because it is not
+ * explicitly initialized by this initializer list.
+ *
+ * Value initialization (see [dcl.init]/8) recursively initializes all fields
+ * of an object to `false`, `0`, `nullptr`, or by calling the default
+ * constructor, as appropriate to the type.
+ */
+ bindingset[elementIndex]
+ predicate isValueInitialized(int elementIndex) {
+ isInitialized(elementIndex) and
+ not exists(getElementExpr(elementIndex))
+ }
+}
+
+/**
+ * A C/C++ aggregate literal that initializes an array
+ * ```
+ * S s[4] = { s_1, s_2, s_3, s_n };
+ * ```
+ */
+class ArrayAggregateLiteral extends ArrayOrVectorAggregateLiteral {
+ ArrayType arrayType;
+
+ ArrayAggregateLiteral() { arrayType = this.getUnspecifiedType() }
+
+ override string getAPrimaryQlClass() { result = "ArrayAggregateLiteral" }
+
+ override int getArraySize() { result = arrayType.getArraySize() }
+
+ override Type getElementType() { result = arrayType.getBaseType() }
+}
+
+/**
+ * A C/C++ aggregate literal that initializes a GNU vector type.
+ *
+ * Braced initializer lists are used, similarly to what is done
+ * for arrays.
+ * ```
+ * typedef int v4si __attribute__ (( vector_size(4*sizeof(int)) ));
+ * v4si v = (v4si){ 1, 2, 3, 4 };
+ * typedef float float4 __attribute__((ext_vector_type(4)));
+ * float4 vf = {1.0f, 2.0f, 3.0f, 4.0f};
+ * ```
+ */
+class VectorAggregateLiteral extends ArrayOrVectorAggregateLiteral {
+ GNUVectorType vectorType;
+
+ VectorAggregateLiteral() { vectorType = this.getUnspecifiedType() }
+
+ override string getAPrimaryQlClass() { result = "VectorAggregateLiteral" }
+
+ override int getArraySize() { result = vectorType.getNumElements() }
+
+ override Type getElementType() { result = vectorType.getBaseType() }
+}
diff --git a/repo-tests/codeql/cpp/ql/lib/semmle/code/cpp/exprs/LogicalOperation.qll b/repo-tests/codeql/cpp/ql/lib/semmle/code/cpp/exprs/LogicalOperation.qll
new file mode 100644
index 00000000000..5b92fbbf2f5
--- /dev/null
+++ b/repo-tests/codeql/cpp/ql/lib/semmle/code/cpp/exprs/LogicalOperation.qll
@@ -0,0 +1,131 @@
+/**
+ * Provides classes for modeling logical operations such as `!`, `&&`, `||`, and
+ * the ternary `? :` expression.
+ */
+
+import semmle.code.cpp.exprs.Expr
+
+/**
+ * A C/C++ unary logical operation.
+ */
+class UnaryLogicalOperation extends UnaryOperation, @un_log_op_expr { }
+
+/**
+ * A C/C++ logical not expression.
+ * ```
+ * c = !a;
+ * ```
+ */
+class NotExpr extends UnaryLogicalOperation, @notexpr {
+ override string getOperator() { result = "!" }
+
+ override string getAPrimaryQlClass() { result = "NotExpr" }
+
+ override int getPrecedence() { result = 16 }
+}
+
+/**
+ * A C/C++ binary logical operation.
+ */
+class BinaryLogicalOperation extends BinaryOperation, @bin_log_op_expr {
+ /**
+ * Holds if the truth of this binary logical expression having value `wholeIsTrue`
+ * implies that the truth of the child expression `part` has truth value `partIsTrue`.
+ *
+ * For example if the binary operation:
+ * ```
+ * x && y
+ * ```
+ * is true, `x` and `y` must also be true, so `impliesValue(x, true, true)` and
+ * `impliesValue(y, true, true)` hold.
+ */
+ predicate impliesValue(Expr part, boolean partIsTrue, boolean wholeIsTrue) { none() } // overridden in subclasses
+}
+
+/**
+ * A C/C++ logical AND expression.
+ * ```
+ * if (a && b) { }
+ * ```
+ */
+class LogicalAndExpr extends BinaryLogicalOperation, @andlogicalexpr {
+ override string getOperator() { result = "&&" }
+
+ override string getAPrimaryQlClass() { result = "LogicalAndExpr" }
+
+ override int getPrecedence() { result = 5 }
+
+ override predicate impliesValue(Expr part, boolean partIsTrue, boolean wholeIsTrue) {
+ wholeIsTrue = true and partIsTrue = true and part = this.getAnOperand()
+ or
+ wholeIsTrue = true and
+ this.getAnOperand().(BinaryLogicalOperation).impliesValue(part, partIsTrue, true)
+ }
+}
+
+/**
+ * A C/C++ logical OR expression.
+ * ```
+ * if (a || b) { }
+ * ```
+ */
+class LogicalOrExpr extends BinaryLogicalOperation, @orlogicalexpr {
+ override string getOperator() { result = "||" }
+
+ override string getAPrimaryQlClass() { result = "LogicalOrExpr" }
+
+ override int getPrecedence() { result = 4 }
+
+ override predicate impliesValue(Expr part, boolean partIsTrue, boolean wholeIsTrue) {
+ wholeIsTrue = false and partIsTrue = false and part = this.getAnOperand()
+ or
+ wholeIsTrue = false and
+ this.getAnOperand().(BinaryLogicalOperation).impliesValue(part, partIsTrue, false)
+ }
+}
+
+/**
+ * A C/C++ conditional ternary expression.
+ * ```
+ * a = (b > c ? d : e);
+ * ```
+ */
+class ConditionalExpr extends Operation, @conditionalexpr {
+ /** Gets the condition of this conditional expression. */
+ Expr getCondition() { expr_cond_guard(underlyingElement(this), unresolveElement(result)) }
+
+ override string getAPrimaryQlClass() { result = "ConditionalExpr" }
+
+ /** Gets the 'then' expression of this conditional expression. */
+ Expr getThen() {
+ if this.isTwoOperand()
+ then result = this.getCondition()
+ else expr_cond_true(underlyingElement(this), unresolveElement(result))
+ }
+
+ /** Gets the 'else' expression of this conditional expression. */
+ Expr getElse() { expr_cond_false(underlyingElement(this), unresolveElement(result)) }
+
+ /**
+ * Holds if this expression used the two operand form `guard ? : false`.
+ */
+ predicate isTwoOperand() { expr_cond_two_operand(underlyingElement(this)) }
+
+ override string getOperator() { result = "?" }
+
+ override string toString() { result = "... ? ... : ..." }
+
+ override int getPrecedence() { result = 3 }
+
+ override predicate mayBeImpure() {
+ this.getCondition().mayBeImpure() or
+ this.getThen().mayBeImpure() or
+ this.getElse().mayBeImpure()
+ }
+
+ override predicate mayBeGloballyImpure() {
+ this.getCondition().mayBeGloballyImpure() or
+ this.getThen().mayBeGloballyImpure() or
+ this.getElse().mayBeGloballyImpure()
+ }
+}
diff --git a/repo-tests/codeql/cpp/ql/lib/semmle/code/cpp/exprs/ObjectiveC.qll b/repo-tests/codeql/cpp/ql/lib/semmle/code/cpp/exprs/ObjectiveC.qll
new file mode 100644
index 00000000000..c651ae9b153
--- /dev/null
+++ b/repo-tests/codeql/cpp/ql/lib/semmle/code/cpp/exprs/ObjectiveC.qll
@@ -0,0 +1,297 @@
+/**
+ * DEPRECATED: Objective-C is no longer supported.
+ */
+
+import semmle.code.cpp.exprs.Expr
+import semmle.code.cpp.Class
+import semmle.code.cpp.ObjectiveC
+private import semmle.code.cpp.internal.ResolveClass
+
+/**
+ * DEPRECATED: Objective-C is no longer supported.
+ * An Objective C message expression, for example `[myColor changeColorToRed:5.0 green:2.0 blue:6.0]`.
+ */
+deprecated class MessageExpr extends Expr, Call {
+ MessageExpr() { none() }
+
+ override string toString() { none() }
+
+ /**
+ * Gets the selector of this message expression, for example `-changeColorToRed:green:blue:`.
+ */
+ string getSelector() { none() }
+
+ /**
+ * Gets the function invoked by this message expression, as inferred by the compiler.
+ *
+ * If the compiler could infer the type of the receiver, and that type had a method
+ * whose name matched the selector, then the result of this predicate is said method.
+ * Otherwise this predicate has no result.
+ *
+ * In all cases, actual function dispatch isn't performed until runtime, but the
+ * lack of a static target is often cause for concern.
+ */
+ MemberFunction getStaticTarget() { none() }
+
+ /**
+ * Provided for compatibility with Call. It is the same as the static target.
+ */
+ override MemberFunction getTarget() { none() }
+
+ /**
+ * Holds if the compiler could infer a function as the target of this message.
+ *
+ * In all cases, actual function dispatch isn't performed until runtime, but the
+ * lack of a static target is often cause for concern.
+ */
+ predicate hasStaticTarget() { none() }
+
+ /**
+ * Gets the number of arguments passed by this message expression.
+ *
+ * In most cases, this equals the number of colons in the selector, but this needn't be the
+ * case for variadic methods like "-initWithFormat:", which can have more than one argument.
+ */
+ override int getNumberOfArguments() { none() }
+
+ /**
+ * Gets an argument passed by this message expression.
+ */
+ override Expr getAnArgument() { none() }
+
+ /**
+ * Gets the nth argument passed by this message expression.
+ *
+ * The range of `n` is [`0` .. `getNumberOfArguments()`].
+ */
+ override Expr getArgument(int n) { none() }
+
+ override int getPrecedence() { none() }
+}
+
+/**
+ * DEPRECATED: Objective-C is no longer supported.
+ * An Objective C message expression whose receiver is `super`, for example `[super init]`.
+ */
+deprecated class SuperMessageExpr extends MessageExpr {
+ SuperMessageExpr() { none() }
+}
+
+/**
+ * DEPRECATED: Objective-C is no longer supported.
+ * An Objective C message expression whose receiver is the name of a class, and
+ * is therefore calling a class method rather than an instance method. This occurs
+ * most commonly for the "+alloc", "+new", and "+class" selectors.
+ */
+deprecated class ClassMessageExpr extends MessageExpr {
+ ClassMessageExpr() { none() }
+
+ /**
+ * Gets the class which is the receiver of this message.
+ */
+ Type getReceiver() { none() }
+}
+
+/**
+ * DEPRECATED: Objective-C is no longer supported.
+ * An Objective C message expression whose receiver is an expression (which includes the
+ * common case of the receiver being "self").
+ */
+deprecated class ExprMessageExpr extends MessageExpr {
+ ExprMessageExpr() { none() }
+
+ /**
+ * Gets the expression which gives the receiver of this message.
+ */
+ Expr getReceiver() { none() }
+
+ /**
+ * Gets the Objective C class of which the receiving expression is an instance.
+ *
+ * If the receiving expression has type `id` or type `id` for some protocol `P`,
+ * then there will be no result. If the receiving expression has type `C*` or type
+ * `C
*` for some protocol `P`, then the result will be the type `C`.
+ */
+ ObjectiveClass getReceiverClass() { none() }
+
+ /**
+ * Gets the Objective C classes and/or protocols which are statically implemented
+ * by the receiving expression.
+ *
+ * If the receiving expression has type `id`, then there will be no result.
+ * If the receiving expression has type `id
`, then `P` will be the sole result.
+ * If the receiving expression has type `C*`, then `C` will be the sole result.
+ * If the receiving expression has type `C
*`, then `C` and `P` will both be results.
+ */
+ Class getAReceiverClassOrProtocol() { none() }
+}
+
+/**
+ * DEPRECATED: Objective-C is no longer supported.
+ * An access to an Objective C property using dot syntax.
+ *
+ * Such accesses are de-sugared into a message expression to the property's getter or setter.
+ */
+deprecated class PropertyAccess extends ExprMessageExpr {
+ PropertyAccess() { none() }
+
+ /**
+ * Gets the property being accessed by this expression.
+ */
+ Property getProperty() { none() }
+}
+
+/**
+ * DEPRECATED: Objective-C is no longer supported.
+ * An Objective C `@selector` expression, for example `@selector(driveForDistance:)`.
+ */
+deprecated class AtSelectorExpr extends Expr {
+ AtSelectorExpr() { none() }
+
+ override string toString() { none() }
+
+ /**
+ * Gets the selector of this `@selector` expression, for example `driveForDistance:`.
+ */
+ string getSelector() { none() }
+}
+
+/**
+ * DEPRECATED: Objective-C is no longer supported.
+ * An Objective C `@protocol` expression, for example `@protocol(SomeProtocol)`.
+ */
+deprecated class AtProtocolExpr extends Expr {
+ AtProtocolExpr() { none() }
+
+ override string toString() { none() }
+
+ /**
+ * Gets the protocol of this `@protocol` expression, for example `SomeProtocol`.
+ */
+ Protocol getProtocol() { none() }
+}
+
+/**
+ * DEPRECATED: Objective-C is no longer supported.
+ * An Objective C `@encode` expression, for example `@encode(int *)`.
+ */
+deprecated class AtEncodeExpr extends Expr {
+ AtEncodeExpr() { none() }
+
+ override string toString() { none() }
+
+ /**
+ * Gets the type this `@encode` expression encodes, for example `int *`.
+ */
+ Type getEncodedType() { none() }
+}
+
+/**
+ * DEPRECATED: Objective-C is no longer supported.
+ * An Objective C throw expression.
+ */
+deprecated class ObjcThrowExpr extends ThrowExpr {
+ ObjcThrowExpr() { none() }
+
+ override string toString() { none() }
+}
+
+/**
+ * DEPRECATED: Objective-C is no longer supported.
+ * An Objective C throw expression with no argument (which causes the
+ * current exception to be re-thrown).
+ */
+deprecated class ObjcReThrowExpr extends ReThrowExpr, ObjcThrowExpr {
+ ObjcReThrowExpr() { none() }
+
+ override string toString() { none() }
+}
+
+/**
+ * DEPRECATED: Objective-C is no longer supported.
+ * An Objective C @ expression which boxes a single value, such as @(22).
+ */
+deprecated class AtExpr extends UnaryOperation {
+ AtExpr() { none() }
+
+ override string toString() { none() }
+
+ override string getOperator() { none() }
+}
+
+/**
+ * DEPRECATED: Objective-C is no longer supported.
+ * An Objective C @[...] literal.
+ */
+deprecated class ArrayLiteral extends Expr {
+ ArrayLiteral() { none() }
+
+ /** Gets a textual representation of this array literal. */
+ override string toString() { none() }
+
+ /** An element of the array */
+ Expr getElement(int i) { none() }
+}
+
+/**
+ * DEPRECATED: Objective-C is no longer supported.
+ * An Objective C @{...} literal.
+ */
+deprecated class DictionaryLiteral extends Expr {
+ DictionaryLiteral() { none() }
+
+ /** Gets a textual representation of this dictionary literal. */
+ override string toString() { none() }
+}
+
+/**
+ * DEPRECATED: Objective-C is no longer supported.
+ * An Objective C @"..." string literal.
+ */
+deprecated class ObjCLiteralString extends TextLiteral {
+ ObjCLiteralString() { none() }
+}
+
+/**
+ * DEPRECATED: Objective-C is no longer supported.
+ * An Objective C/C++ overloaded subscripting access expression.
+ *
+ * Either
+ * obj[idx]
+ * or
+ * obj[idx] = expr
+ */
+deprecated class SubscriptExpr extends Expr {
+ SubscriptExpr() { none() }
+
+ /**
+ * Gets the object expression being subscripted.
+ */
+ Expr getSubscriptBase() { none() }
+
+ /**
+ * Gets the expression giving the index into the object.
+ */
+ Expr getSubscriptIndex() { none() }
+
+ /**
+ * Gets the expression being assigned (if this is an assignment).
+ */
+ Expr getAssignedExpr() { none() }
+
+ override string toString() { none() }
+}
+
+/**
+ * DEPRECATED: Objective-C is no longer supported.
+ * An Objective C _cmd expression.
+ */
+deprecated class CmdExpr extends Expr {
+ CmdExpr() { none() }
+
+ override string toString() { none() }
+
+ override predicate mayBeImpure() { none() }
+
+ override predicate mayBeGloballyImpure() { none() }
+}
diff --git a/repo-tests/codeql/cpp/ql/lib/semmle/code/cpp/headers/MultipleInclusion.qll b/repo-tests/codeql/cpp/ql/lib/semmle/code/cpp/headers/MultipleInclusion.qll
new file mode 100644
index 00000000000..5a12af29694
--- /dev/null
+++ b/repo-tests/codeql/cpp/ql/lib/semmle/code/cpp/headers/MultipleInclusion.qll
@@ -0,0 +1,182 @@
+/**
+ * Provides definitions related to _include guards_: techniques for preventing
+ * multiple inclusion of header files.
+ */
+
+import cpp
+
+/**
+ * A header file with an include guard.
+ */
+abstract class IncludeGuardedHeader extends HeaderFile { }
+
+/**
+ * A header file that uses a non-portable mechanism to prevent multiple
+ * inclusion.
+ */
+abstract class BadIncludeGuard extends IncludeGuardedHeader {
+ /** Gets the element to blame for this bad include guard pattern. */
+ abstract Element blame();
+}
+
+/**
+ * A header file with the correct include guard: `#ifndef` (or equivalent),
+ * `#define`, and `#endif`.
+ */
+class CorrectIncludeGuard extends IncludeGuardedHeader {
+ CorrectIncludeGuard() { correctIncludeGuard(this, _, _, _, _) }
+
+ /** Gets the name of the preprocessor define used to prevent multiple inclusion of this file. */
+ string getIncludeGuardName() { correctIncludeGuard(this, _, _, _, result) }
+
+ /** Gets the preprocessor macro used to prevent multiple inclusion of this file. */
+ Macro getDefine() { correctIncludeGuard(this, _, result, _, _) }
+
+ /** Gets the `#ifndef` or `#if` directive used to prevent multiple inclusion of this file. */
+ PreprocessorDirective getIfndef() { correctIncludeGuard(this, result, _, _, _) }
+
+ /** Gets the `#endif` directive closing this file. */
+ PreprocessorEndif getEndif() { correctIncludeGuard(this, _, _, result, _) }
+}
+
+/**
+ * DEPRECATED: no longer useful.
+ */
+deprecated class NotIncludedGuard extends IncludeGuardedHeader {
+ NotIncludedGuard() { none() }
+
+ /** Gets the `#ifndef` directive used to prevent multiple inclusion of this file. */
+ PreprocessorIfndef getIfndef() { result.getFile() = this }
+
+ /** Gets the `#endif` directive closing this file. */
+ PreprocessorEndif getEndif() { result.getFile() = this }
+}
+
+/**
+ * A file with no code in it.
+ */
+class EmptyFile extends IncludeGuardedHeader {
+ EmptyFile() { this.(MetricFile).getNumberOfLinesOfCode() = 0 }
+}
+
+private predicate hasMacro(HeaderFile hf, string name, Macro define) {
+ define.getFile() = hf and define.getName() = name
+}
+
+/**
+ * Holds if `hf` begins with an `#ifndef` or `#if` directive `ifndef`, to test
+ * the macro named `includeGuard`, and ends with the matching `endif`.
+ */
+predicate hasIncludeGuard(
+ HeaderFile hf, PreprocessorDirective ifndef, PreprocessorEndif endif, string includeGuard
+) {
+ startsWithIfndef(hf, ifndef, includeGuard) and
+ endsWithEndif(hf, endif) and
+ endif.getIf() = ifndef
+}
+
+/**
+ * Holds if `hf` uses a valid include guard with the macro named `includeGuard`
+ * and the preprocessor directives `ifndef`, `define`, and `endif`. This
+ * analysis is also exposed in an object-oriented style through the class
+ * `CorrectIncludeGuard`.
+ */
+pragma[noopt]
+predicate correctIncludeGuard(
+ HeaderFile hf, PreprocessorDirective ifndef, Macro define, PreprocessorEndif endif,
+ string includeGuard
+) {
+ hasIncludeGuard(hf, ifndef, endif, includeGuard) and
+ hasMacro(hf, includeGuard, define) and
+ // we already know the ifndef is first and the endif last, so we just need
+ // to check there is nothing before the define that isn't the ifndef.
+ not exists(
+ int relevant, Location ifndefLocation, int ifndefLine, Location defineLocation, int defineLine
+ |
+ includeGuardRelevantLine(hf, relevant) and
+ ifndefLocation = ifndef.getLocation() and
+ ifndefLine = ifndefLocation.getStartLine() and
+ relevant != ifndefLine and
+ defineLocation = define.getLocation() and
+ defineLine = defineLocation.getStartLine() and
+ relevant < defineLine
+ )
+}
+
+/**
+ * Holds if `hf` begins with an `#ifndef` or `#if` directive `ifndef`, to test
+ * the macro named `macroName`.
+ */
+predicate startsWithIfndef(HeaderFile hf, PreprocessorDirective ifndef, string macroName) {
+ ifndefDirective(ifndef, macroName) and
+ exists(Location loc |
+ loc = ifndef.getLocation() and
+ loc.getFile() = hf and
+ loc.getStartLine() = min(int l | includeGuardRelevantLine(hf, l))
+ )
+}
+
+private predicate endifLocation(PreprocessorEndif endif, File f, int line) {
+ endif.getFile() = f and
+ endif.getLocation().getStartLine() = line
+}
+
+private predicate lastEndifLocation(PreprocessorEndif endif, File f, int line) {
+ endifLocation(endif, f, line) and
+ line = max(int line2 | endifLocation(_, f, line2))
+}
+
+/**
+ * Holds if `hf` ends with `endif`.
+ */
+predicate endsWithEndif(HeaderFile hf, PreprocessorEndif endif) {
+ exists(int line | lastEndifLocation(endif, hf, line) |
+ line = max(int l | includeGuardRelevantLine(hf, l) | l)
+ )
+}
+
+private predicate includeGuardRelevantLine(HeaderFile hf, int line) {
+ exists(Location l | l.getFile() = hf and line = l.getStartLine() |
+ // any declaration
+ exists(Declaration d | l = d.getADeclarationLocation())
+ or
+ // most preprocessor directives
+ exists(PreprocessorDirective p |
+ l = p.getLocation() and
+ // included files may be outside the include guards, as they
+ // should contain an include guarding mechanism of their own.
+ not p instanceof Include
+ )
+ )
+}
+
+/**
+ * Holds if `ppd` is effectively an `#ifndef` directive that tests `macro`.
+ * This includes `#if !defined(macro)`.
+ */
+predicate ifndefDirective(PreprocessorDirective ppd, string macro) {
+ ppd instanceof PreprocessorIfndef and macro = ppd.getHead()
+ or
+ ppd instanceof PreprocessorIf and
+ exists(string head | head = ppd.getHead() |
+ macro =
+ head.replaceAll("(", " ")
+ .replaceAll(")", "")
+ .replaceAll("\t", " ")
+ .regexpCapture("[ ]*![ ]*defined[ ]+([^ ]*)[ ]*", 1)
+ .trim()
+ )
+}
+
+/**
+ * A header file with the `#pragma once` include guard.
+ */
+class PragmaOnceIncludeGuard extends BadIncludeGuard {
+ PragmaOnceIncludeGuard() {
+ exists(PreprocessorPragma p | p.getFile() = this and p.getHead() = "once")
+ }
+
+ override Element blame() {
+ exists(PreprocessorPragma p | p.getFile() = this and p = result and p.getHead() = "once")
+ }
+}
diff --git a/repo-tests/codeql/cpp/ql/lib/semmle/code/cpp/internal/AddressConstantExpression.qll b/repo-tests/codeql/cpp/ql/lib/semmle/code/cpp/internal/AddressConstantExpression.qll
new file mode 100644
index 00000000000..436be8384e8
--- /dev/null
+++ b/repo-tests/codeql/cpp/ql/lib/semmle/code/cpp/internal/AddressConstantExpression.qll
@@ -0,0 +1,201 @@
+/*
+ * Maintainer note: this file is one of several files that are similar but not
+ * identical. Many changes to this file will also apply to the others:
+ * - AddressConstantExpression.qll
+ * - AddressFlow.qll
+ * - EscapesTree.qll
+ */
+
+private import cpp
+
+predicate addressConstantExpression(Expr e) {
+ constantAddressPointer(e)
+ or
+ constantAddressReference(e)
+ or
+ // Special case for function pointers, where `fp == *fp`.
+ constantAddressLValue(e) and
+ e.getType() instanceof FunctionPointerType
+}
+
+/** Holds if `v` is a constexpr variable initialized to a constant address. */
+private predicate addressConstantVariable(Variable v) {
+ addressConstantExpression(v.getInitializer().getExpr().getFullyConverted()) and
+ v.isConstexpr()
+}
+
+/**
+ * Holds if `lvalue` is an lvalue whose address is an _address constant
+ * expression_.
+ */
+private predicate constantAddressLValue(Expr lvalue) {
+ lvalue.(VariableAccess).getTarget() =
+ any(Variable v |
+ v.(Variable).isStatic()
+ or
+ v instanceof GlobalOrNamespaceVariable
+ )
+ or
+ // There is no `Conversion` for the implicit conversion from a function type
+ // to a function _pointer_ type. Instead, the type of a `FunctionAccess`
+ // tells us how it's going to be used.
+ lvalue.(FunctionAccess).getType() instanceof RoutineType
+ or
+ // Pointer-to-member literals in uninstantiated templates
+ lvalue instanceof Literal and
+ not exists(lvalue.getValue()) and
+ lvalue.isFromUninstantiatedTemplate(_)
+ or
+ // String literals have array types and undergo array-to-pointer conversion.
+ lvalue instanceof StringLiteral
+ or
+ // lvalue -> lvalue
+ exists(Expr prev |
+ constantAddressLValue(prev) and
+ lvalueToLvalueStep(prev, lvalue)
+ )
+ or
+ // pointer -> lvalue
+ exists(Expr prev |
+ constantAddressPointer(prev) and
+ pointerToLvalueStep(prev, lvalue)
+ )
+ or
+ // reference -> lvalue
+ exists(Expr prev |
+ constantAddressReference(prev) and
+ referenceToLvalueStep(prev, lvalue)
+ )
+}
+
+/** Holds if `pointer` is an _address constant expression_ of pointer type. */
+private predicate constantAddressPointer(Expr pointer) {
+ // There is no `Conversion` for the implicit conversion from a function type
+ // to a function _pointer_ type. Instead, the type of a `FunctionAccess`
+ // tells us how it's going to be used.
+ pointer.(FunctionAccess).getType() instanceof FunctionPointerType
+ or
+ // Pointer to member function. These accesses are always pointers even though
+ // their type is `RoutineType`.
+ pointer.(FunctionAccess).getTarget() instanceof MemberFunction
+ or
+ addressConstantVariable(pointer.(VariableAccess).getTarget()) and
+ pointer.getType().getUnderlyingType() instanceof PointerType
+ or
+ // pointer -> pointer
+ exists(Expr prev |
+ constantAddressPointer(prev) and
+ pointerToPointerStep(prev, pointer)
+ )
+ or
+ // lvalue -> pointer
+ exists(Expr prev |
+ constantAddressLValue(prev) and
+ lvalueToPointerStep(prev, pointer)
+ )
+}
+
+/** Holds if `reference` is an _address constant expression_ of reference type. */
+private predicate constantAddressReference(Expr reference) {
+ addressConstantVariable(reference.(VariableAccess).getTarget()) and
+ reference.getType().getUnderlyingType() instanceof ReferenceType
+ or
+ addressConstantVariable(reference.(VariableAccess).getTarget()) and
+ reference.getType().getUnderlyingType() instanceof FunctionReferenceType // not a ReferenceType
+ or
+ // reference -> reference
+ exists(Expr prev |
+ constantAddressReference(prev) and
+ referenceToReferenceStep(prev, reference)
+ )
+ or
+ // lvalue -> reference
+ exists(Expr prev |
+ constantAddressLValue(prev) and
+ lvalueToReferenceStep(prev, reference)
+ )
+}
+
+private predicate lvalueToLvalueStep(Expr lvalueIn, Expr lvalueOut) {
+ lvalueIn = lvalueOut.(DotFieldAccess).getQualifier().getFullyConverted()
+ or
+ lvalueIn.getConversion() = lvalueOut.(ParenthesisExpr)
+ or
+ // Special case for function pointers, where `fp == *fp`.
+ lvalueIn = lvalueOut.(PointerDereferenceExpr).getOperand().getFullyConverted() and
+ lvalueIn.getType() instanceof FunctionPointerType
+}
+
+private predicate pointerToLvalueStep(Expr pointerIn, Expr lvalueOut) {
+ lvalueOut =
+ any(ArrayExpr ae |
+ pointerIn = ae.getArrayBase().getFullyConverted() and
+ hasConstantValue(ae.getArrayOffset().getFullyConverted())
+ )
+ or
+ pointerIn = lvalueOut.(PointerDereferenceExpr).getOperand().getFullyConverted()
+ or
+ pointerIn = lvalueOut.(PointerFieldAccess).getQualifier().getFullyConverted()
+}
+
+private predicate lvalueToPointerStep(Expr lvalueIn, Expr pointerOut) {
+ lvalueIn.getConversion() = pointerOut.(ArrayToPointerConversion)
+ or
+ lvalueIn = pointerOut.(AddressOfExpr).getOperand().getFullyConverted()
+}
+
+private predicate pointerToPointerStep(Expr pointerIn, Expr pointerOut) {
+ (
+ pointerOut instanceof PointerAddExpr
+ or
+ pointerOut instanceof PointerSubExpr
+ ) and
+ pointerIn = pointerOut.getAChild().getFullyConverted() and
+ pointerIn.getUnspecifiedType() instanceof PointerType and
+ // The pointer arg won't be constant in the sense of `hasConstantValue`, so
+ // this will have to match the integer argument.
+ hasConstantValue(pointerOut.getAChild().getFullyConverted())
+ or
+ pointerIn = pointerOut.(UnaryPlusExpr).getOperand().getFullyConverted()
+ or
+ pointerIn.getConversion() = pointerOut.(Cast)
+ or
+ pointerIn.getConversion() = pointerOut.(ParenthesisExpr)
+ or
+ pointerOut =
+ any(ConditionalExpr cond |
+ cond.getCondition().getFullyConverted().getValue().toInt() != 0 and
+ pointerIn = cond.getThen().getFullyConverted()
+ or
+ cond.getCondition().getFullyConverted().getValue().toInt() = 0 and
+ pointerIn = cond.getElse().getFullyConverted()
+ )
+ or
+ // The comma operator is allowed by C++17 but disallowed by C99. This
+ // disjunct is a compromise that's chosen for being easy to implement.
+ pointerOut =
+ any(CommaExpr comma |
+ hasConstantValue(comma.getLeftOperand()) and
+ pointerIn = comma.getRightOperand().getFullyConverted()
+ )
+}
+
+private predicate lvalueToReferenceStep(Expr lvalueIn, Expr referenceOut) {
+ lvalueIn.getConversion() = referenceOut.(ReferenceToExpr)
+}
+
+private predicate referenceToLvalueStep(Expr referenceIn, Expr lvalueOut) {
+ // This probably cannot happen. It would require an expression to be
+ // converted to a reference and back again without an intermediate variable
+ // assignment.
+ referenceIn.getConversion() = lvalueOut.(ReferenceDereferenceExpr)
+}
+
+private predicate referenceToReferenceStep(Expr referenceIn, Expr referenceOut) {
+ referenceIn.getConversion() = referenceOut.(Cast)
+ or
+ referenceIn.getConversion() = referenceOut.(ParenthesisExpr)
+}
+
+/** Holds if `e` is constant according to the database. */
+private predicate hasConstantValue(Expr e) { valuebind(_, underlyingElement(e)) }
diff --git a/repo-tests/codeql/cpp/ql/lib/semmle/code/cpp/internal/QualifiedName.qll b/repo-tests/codeql/cpp/ql/lib/semmle/code/cpp/internal/QualifiedName.qll
new file mode 100644
index 00000000000..692ce1fee19
--- /dev/null
+++ b/repo-tests/codeql/cpp/ql/lib/semmle/code/cpp/internal/QualifiedName.qll
@@ -0,0 +1,307 @@
+/**
+ * INTERNAL: Do not use. Provides classes and predicates for getting names of
+ * declarations, especially qualified names. Import this library `private` and
+ * qualified.
+ *
+ * This file contains classes that mirror the standard AST classes for C++, but
+ * these classes are only concerned with naming. The other difference is that
+ * these classes don't use the `ResolveClass.qll` mechanisms like
+ * `unresolveElement` because these classes should eventually be part of the
+ * implementation of `ResolveClass.qll`, allowing it to match up classes when
+ * their qualified names and parameters match.
+ */
+
+private import semmle.code.cpp.Declaration as D
+
+class Namespace extends @namespace {
+ string toString() { result = "QualifiedName Namespace" }
+
+ string getName() { namespaces(this, result) }
+
+ string getQualifiedName() {
+ if namespacembrs(_, this)
+ then
+ exists(Namespace ns |
+ namespacembrs(ns, this) and
+ result = ns.getQualifiedName() + "::" + this.getName()
+ )
+ else result = this.getName()
+ }
+
+ /**
+ * Gets a namespace qualifier, like `"namespace1::namespace2"`, through which
+ * the members of this namespace can be named. When `inline namespace` is
+ * used, this predicate may have multiple results.
+ *
+ * This predicate does not take namespace aliases into account. Unlike inline
+ * namespaces, specialization of templates cannot happen through an alias.
+ * Aliases are also local to the compilation unit, while inline namespaces
+ * affect the whole program.
+ */
+ string getAQualifierForMembers() {
+ if namespacembrs(_, this)
+ then
+ exists(Namespace ns | namespacembrs(ns, this) |
+ result = ns.getAQualifierForMembers() + "::" + this.getName()
+ or
+ // If this is an inline namespace, its members are also visible in any
+ // namespace where the members of the parent are visible.
+ namespace_inline(this) and
+ result = ns.getAQualifierForMembers()
+ )
+ else result = this.getName()
+ }
+
+ Declaration getADeclaration() {
+ if this.getName() = ""
+ then result.isTopLevel() and not namespacembrs(_, result)
+ else namespacembrs(this, result)
+ }
+}
+
+class Declaration extends @declaration {
+ string toString() { result = "QualifiedName Declaration" }
+
+ /** Gets the name of this declaration. */
+ final string getName() { result = this.(D::Declaration).getName() }
+
+ string getTypeQualifierWithoutArgs() {
+ exists(UserType declaringType |
+ declaringType = this.(EnumConstant).getDeclaringEnum()
+ or
+ declaringType = this.getDeclaringType()
+ |
+ result = getTypeQualifierForMembersWithoutArgs(declaringType)
+ )
+ }
+
+ string getTypeQualifierWithArgs() {
+ exists(UserType declaringType |
+ declaringType = this.(EnumConstant).getDeclaringEnum()
+ or
+ declaringType = this.getDeclaringType()
+ |
+ result = getTypeQualifierForMembersWithArgs(declaringType)
+ )
+ }
+
+ Namespace getNamespace() {
+ // Top level declaration in a namespace ...
+ result.getADeclaration() = this
+ or
+ // ... or nested in another structure.
+ exists(Declaration m | m = this and result = m.getDeclaringType().getNamespace())
+ or
+ exists(EnumConstant c | c = this and result = c.getDeclaringEnum().getNamespace())
+ }
+
+ predicate hasQualifiedName(string namespaceQualifier, string typeQualifier, string baseName) {
+ declarationHasQualifiedName(baseName, typeQualifier, namespaceQualifier, this)
+ }
+
+ string getQualifiedName() {
+ exists(string ns, string name |
+ ns = this.getNamespace().getQualifiedName() and
+ name = this.getName() and
+ this.canHaveQualifiedName()
+ |
+ exists(string t | t = this.getTypeQualifierWithArgs() |
+ if ns != "" then result = ns + "::" + t + "::" + name else result = t + "::" + name
+ )
+ or
+ not hasTypeQualifier(this) and
+ if ns != "" then result = ns + "::" + name else result = name
+ )
+ }
+
+ predicate canHaveQualifiedName() {
+ this.hasDeclaringType()
+ or
+ this instanceof EnumConstant
+ or
+ this instanceof Function
+ or
+ this instanceof UserType
+ or
+ this instanceof GlobalOrNamespaceVariable
+ }
+
+ predicate isTopLevel() {
+ not (
+ this.isMember() or
+ this instanceof FriendDecl or
+ this instanceof EnumConstant or
+ this instanceof Parameter or
+ this instanceof ProxyClass or
+ this instanceof LocalVariable or
+ this instanceof TemplateParameter or
+ this.(UserType).isLocal()
+ )
+ }
+
+ /** Holds if this declaration is a member of a class/struct/union. */
+ predicate isMember() { this.hasDeclaringType() }
+
+ /** Holds if this declaration is a member of a class/struct/union. */
+ predicate hasDeclaringType() { exists(this.getDeclaringType()) }
+
+ /**
+ * Gets the class where this member is declared, if it is a member.
+ * For templates, both the template itself and all instantiations of
+ * the template are considered to have the same declaring class.
+ */
+ UserType getDeclaringType() { this = result.getAMember() }
+}
+
+class Variable extends Declaration, @variable {
+ VariableDeclarationEntry getADeclarationEntry() { result.getDeclaration() = this }
+}
+
+class TemplateVariable extends Variable {
+ TemplateVariable() { is_variable_template(this) }
+
+ Variable getAnInstantiation() { variable_instantiation(result, this) }
+}
+
+class LocalScopeVariable extends Variable, @localscopevariable { }
+
+class LocalVariable extends LocalScopeVariable, @localvariable { }
+
+/**
+ * A particular declaration or definition of a C/C++ variable.
+ */
+class VariableDeclarationEntry extends @var_decl {
+ string toString() { result = "QualifiedName DeclarationEntry" }
+
+ Variable getDeclaration() { result = getVariable() }
+
+ /**
+ * Gets the variable which is being declared or defined.
+ */
+ Variable getVariable() { var_decls(this, result, _, _, _) }
+
+ predicate isDefinition() { var_def(this) }
+
+ string getName() { var_decls(this, _, _, result, _) and result != "" }
+}
+
+class Parameter extends LocalScopeVariable, @parameter {
+ @functionorblock function;
+ int index;
+
+ Parameter() { params(this, function, index, _) }
+}
+
+class GlobalOrNamespaceVariable extends Variable, @globalvariable { }
+
+// Unlike the usual `EnumConstant`, this one doesn't have a
+// `getDeclaringType()`. This simplifies the recursive computation of type
+// qualifier names since it can assume that any declaration with a
+// `getDeclaringType()` should use that type in its type qualifier name.
+class EnumConstant extends Declaration, @enumconstant {
+ UserType getDeclaringEnum() { enumconstants(this, result, _, _, _, _) }
+}
+
+class Function extends Declaration, @function {
+ predicate isConstructedFrom(Function f) { function_instantiation(this, f) }
+
+ Parameter getParameter(int n) { params(result, this, n, _) }
+}
+
+class TemplateFunction extends Function {
+ TemplateFunction() { is_function_template(this) and function_template_argument(this, _, _) }
+
+ Function getAnInstantiation() {
+ function_instantiation(result, this) and
+ not exists(@fun_decl fd | fun_decls(fd, this, _, _, _) and fun_specialized(fd))
+ }
+}
+
+class UserType extends Declaration, @usertype {
+ predicate isLocal() { enclosingfunction(this, _) }
+
+ // Gets a member of this class, if it's a class.
+ Declaration getAMember() {
+ exists(Declaration d | member(this, _, d) |
+ result = d or
+ result = d.(TemplateClass).getAnInstantiation() or
+ result = d.(TemplateFunction).getAnInstantiation() or
+ result = d.(TemplateVariable).getAnInstantiation()
+ )
+ }
+}
+
+class ProxyClass extends UserType {
+ ProxyClass() { usertypes(this, _, 9) }
+}
+
+class TemplateParameter extends UserType {
+ TemplateParameter() { usertypes(this, _, 7) or usertypes(this, _, 8) }
+}
+
+class TemplateClass extends UserType {
+ TemplateClass() { usertypes(this, _, 6) }
+
+ UserType getAnInstantiation() {
+ class_instantiation(result, this) and
+ class_template_argument(result, _, _)
+ }
+}
+
+class FriendDecl extends Declaration, @frienddecl {
+ UserType getDeclaringClass() { frienddecls(this, result, _, _) }
+}
+
+private string getUserTypeNameWithArgs(UserType t) { usertypes(t, result, _) }
+
+private string getUserTypeNameWithoutArgs(UserType t) {
+ result = getUserTypeNameWithArgs(t).splitAt("<", 0)
+}
+
+private predicate hasTypeQualifier(Declaration d) {
+ d instanceof EnumConstant
+ or
+ d.hasDeclaringType()
+}
+
+private string getTypeQualifierForMembersWithArgs(UserType t) {
+ result = t.getTypeQualifierWithArgs() + "::" + getUserTypeNameWithArgs(t)
+ or
+ not hasTypeQualifier(t) and
+ result = getUserTypeNameWithArgs(t)
+}
+
+private string getTypeQualifierForMembersWithoutArgs(UserType t) {
+ result = t.getTypeQualifierWithoutArgs() + "::" + getUserTypeNameWithoutArgs(t)
+ or
+ not hasTypeQualifier(t) and
+ result = getUserTypeNameWithoutArgs(t)
+}
+
+// The order of parameters on this predicate is chosen to match the most common
+// use case: finding a declaration that has a specific name. The declaration
+// comes last because it's the output.
+cached
+private predicate declarationHasQualifiedName(
+ string baseName, string typeQualifier, string namespaceQualifier, Declaration d
+) {
+ namespaceQualifier = d.getNamespace().getAQualifierForMembers() and
+ (
+ if hasTypeQualifier(d)
+ then typeQualifier = d.getTypeQualifierWithoutArgs()
+ else typeQualifier = ""
+ ) and
+ (
+ baseName = getUserTypeNameWithoutArgs(d)
+ or
+ // If a declaration isn't a `UserType`, there are two ways it can still
+ // contain `<`:
+ // 1. If it's `operator<` or `operator<<`.
+ // 2. If it's a conversion operator like `operator TemplateClass`.
+ // Perhaps these names ought to be fixed up, but we don't do that
+ // currently.
+ not d instanceof UserType and
+ baseName = d.getName()
+ ) and
+ d.canHaveQualifiedName()
+}
diff --git a/repo-tests/codeql/cpp/ql/lib/semmle/code/cpp/internal/ResolveClass.qll b/repo-tests/codeql/cpp/ql/lib/semmle/code/cpp/internal/ResolveClass.qll
new file mode 100644
index 00000000000..42568a5c58d
--- /dev/null
+++ b/repo-tests/codeql/cpp/ql/lib/semmle/code/cpp/internal/ResolveClass.qll
@@ -0,0 +1,135 @@
+import semmle.code.cpp.Type
+
+/** For upgraded databases without mangled name info. */
+pragma[noinline]
+private string getTopLevelClassName(@usertype c) {
+ not mangled_name(_, _) and
+ isClass(c) and
+ usertypes(c, result, _) and
+ not namespacembrs(_, c) and // not in a namespace
+ not member(_, _, c) and // not in some structure
+ not class_instantiation(c, _) // not a template instantiation
+}
+
+/**
+ * For upgraded databases without mangled name info.
+ * Holds if `d` is a unique complete class named `name`.
+ */
+pragma[noinline]
+private predicate existsCompleteWithName(string name, @usertype d) {
+ not mangled_name(_, _) and
+ is_complete(d) and
+ name = getTopLevelClassName(d) and
+ onlyOneCompleteClassExistsWithName(name)
+}
+
+/** For upgraded databases without mangled name info. */
+pragma[noinline]
+private predicate onlyOneCompleteClassExistsWithName(string name) {
+ not mangled_name(_, _) and
+ strictcount(@usertype c | is_complete(c) and getTopLevelClassName(c) = name) = 1
+}
+
+/**
+ * For upgraded databases without mangled name info.
+ * Holds if `c` is an incomplete class named `name`.
+ */
+pragma[noinline]
+private predicate existsIncompleteWithName(string name, @usertype c) {
+ not mangled_name(_, _) and
+ not is_complete(c) and
+ name = getTopLevelClassName(c)
+}
+
+/**
+ * For upgraded databases without mangled name info.
+ * Holds if `c` is an incomplete class, and there exists a unique complete class `d`
+ * with the same name.
+ */
+private predicate oldHasCompleteTwin(@usertype c, @usertype d) {
+ not mangled_name(_, _) and
+ exists(string name |
+ existsIncompleteWithName(name, c) and
+ existsCompleteWithName(name, d)
+ )
+}
+
+pragma[noinline]
+private @mangledname getClassMangledName(@usertype c) {
+ isClass(c) and
+ mangled_name(c, result)
+}
+
+/** Holds if `d` is a unique complete class named `name`. */
+pragma[noinline]
+private predicate existsCompleteWithMangledName(@mangledname name, @usertype d) {
+ is_complete(d) and
+ name = getClassMangledName(d) and
+ onlyOneCompleteClassExistsWithMangledName(name)
+}
+
+pragma[noinline]
+private predicate onlyOneCompleteClassExistsWithMangledName(@mangledname name) {
+ strictcount(@usertype c | is_complete(c) and getClassMangledName(c) = name) = 1
+}
+
+/** Holds if `c` is an incomplete class named `name`. */
+pragma[noinline]
+private predicate existsIncompleteWithMangledName(@mangledname name, @usertype c) {
+ not is_complete(c) and
+ name = getClassMangledName(c)
+}
+
+/**
+ * Holds if `c` is an incomplete class, and there exists a unique complete class `d`
+ * with the same name.
+ */
+private predicate hasCompleteTwin(@usertype c, @usertype d) {
+ exists(@mangledname name |
+ existsIncompleteWithMangledName(name, c) and
+ existsCompleteWithMangledName(name, d)
+ )
+}
+
+import Cached
+
+cached
+private module Cached {
+ /**
+ * If `c` is incomplete, and there exists a unique complete class with the same name,
+ * then the result is that complete class. Otherwise, the result is `c`.
+ */
+ cached
+ @usertype resolveClass(@usertype c) {
+ hasCompleteTwin(c, result)
+ or
+ oldHasCompleteTwin(c, result)
+ or
+ not hasCompleteTwin(c, _) and
+ not oldHasCompleteTwin(c, _) and
+ result = c
+ }
+
+ /**
+ * Holds if `t` is a struct, class, union, or template.
+ */
+ cached
+ predicate isClass(@usertype t) {
+ (
+ usertypes(t, _, 1) or
+ usertypes(t, _, 2) or
+ usertypes(t, _, 3) or
+ usertypes(t, _, 6) or
+ usertypes(t, _, 10) or
+ usertypes(t, _, 11) or
+ usertypes(t, _, 12)
+ )
+ }
+
+ cached
+ predicate isType(@type t) {
+ not isClass(t)
+ or
+ t = resolveClass(_)
+ }
+}
diff --git a/repo-tests/codeql/cpp/ql/lib/semmle/code/cpp/ir/IR.qll b/repo-tests/codeql/cpp/ql/lib/semmle/code/cpp/ir/IR.qll
new file mode 100644
index 00000000000..381adad5e41
--- /dev/null
+++ b/repo-tests/codeql/cpp/ql/lib/semmle/code/cpp/ir/IR.qll
@@ -0,0 +1,47 @@
+/**
+ * Provides classes that describe the Intermediate Representation (IR) of the program.
+ *
+ * The IR is a representation of the semantics of the program, with very little dependence on the
+ * syntax that was used to write the program. For example, in C++, the statements `i += 1;`, `i++`,
+ * and `++i` all have the same semantic effect, but appear in the AST as three different types of
+ * `Expr` node. In the IR, all three statements are broken down into a sequence of fundamental
+ * operations similar to:
+ *
+ * ```
+ * r1(int*) = VariableAddress[i] // Compute the address of variable `i`
+ * r2(int) = Load &:r1, m0 // Load the value of `i`
+ * r3(int) = Constant[1] // An integer constant with the value `1`
+ * r4(int) = Add r2, r3 // Add `1` to the value of `i`
+ * r5(int) = Store &r1, r4 // Store the new value back into the variable `i`
+ * ```
+ *
+ * This allows IR-based analysis to focus on the fundamental operations, rather than having to be
+ * concerned with the various ways of expressing those operations in source code.
+ *
+ * The key classes in the IR are:
+ *
+ * - `IRFunction` - Contains the IR for an entire function definition, including all of that
+ * function's `Instruction`s, `IRBlock`s, and `IRVariables`.
+ * - `Instruction` - A single operation in the IR. An instruction specifies the operation to be
+ * performed, the operands that produce the inputs to that operation, and the type of the result
+ * of the operation. Control flows from an `Instruction` to one of a set of successor
+ * `Instruction`s.
+ * - `Operand` - An input value of an `Instruction`. All inputs of an `Instruction` are explicitly
+ * represented as `Operand`s, even if the input was implicit in the source code. An `Operand` has
+ * a link to the `Instruction` that consumes its value (its "use") and a link to the `Instruction`
+ * that produces its value (its "definition").
+ * - `IRVariable` - A variable accessed by the IR for a particular function. An `IRVariable` is
+ * created for each variable directly accessed by the function. In addition, `IRVariable`s are
+ * created to represent certain temporary storage locations that do not have explicitly declared
+ * variables in the source code, such as the return value of the function.
+ * - `IRBlock` - A "basic block" in the control flow graph of a function. An `IRBlock` contains a
+ * sequence of instructions such that control flow can only enter the block at the first
+ * instruction, and can only leave the block from the last instruction.
+ * - `IRType` - The type of a value accessed in the IR. Unlike the `Type` class in the AST, `IRType`
+ * is language-neutral. For example, in C++, `unsigned int`, `char32_t`, and `wchar_t` might all
+ * be represented as the `IRType` `uint4`, a four-byte unsigned integer.
+ */
+
+// Most queries should operate on the aliased SSA IR, so that's what we expose
+// publicly as the "IR".
+import implementation.aliased_ssa.IR
diff --git a/repo-tests/codeql/cpp/ql/lib/semmle/code/cpp/ir/IRConfiguration.qll b/repo-tests/codeql/cpp/ql/lib/semmle/code/cpp/ir/IRConfiguration.qll
new file mode 100644
index 00000000000..b8abef8a547
--- /dev/null
+++ b/repo-tests/codeql/cpp/ql/lib/semmle/code/cpp/ir/IRConfiguration.qll
@@ -0,0 +1,5 @@
+/**
+ * Module used to configure the IR generation process.
+ */
+
+import implementation.IRConfiguration
diff --git a/repo-tests/codeql/cpp/ql/lib/semmle/code/cpp/ir/IRConsistency.ql b/repo-tests/codeql/cpp/ql/lib/semmle/code/cpp/ir/IRConsistency.ql
new file mode 100644
index 00000000000..1a1c2e369cc
--- /dev/null
+++ b/repo-tests/codeql/cpp/ql/lib/semmle/code/cpp/ir/IRConsistency.ql
@@ -0,0 +1,8 @@
+/**
+ * @name IR Consistency Check
+ * @description Performs consistency checks on the Intermediate Representation. This query should have no results.
+ * @kind table
+ * @id cpp/ir-consistency-check
+ */
+
+import implementation.aliased_ssa.IRConsistency
diff --git a/repo-tests/codeql/cpp/ql/lib/semmle/code/cpp/ir/PrintIR.ql b/repo-tests/codeql/cpp/ql/lib/semmle/code/cpp/ir/PrintIR.ql
new file mode 100644
index 00000000000..1f6f0545840
--- /dev/null
+++ b/repo-tests/codeql/cpp/ql/lib/semmle/code/cpp/ir/PrintIR.ql
@@ -0,0 +1,8 @@
+/**
+ * @name Print IR
+ * @description Outputs a representation of the IR graph
+ * @id cpp/print-ir
+ * @kind graph
+ */
+
+import implementation.aliased_ssa.PrintIR
diff --git a/repo-tests/codeql/cpp/ql/lib/semmle/code/cpp/ir/PrintIR.qll b/repo-tests/codeql/cpp/ql/lib/semmle/code/cpp/ir/PrintIR.qll
new file mode 100644
index 00000000000..c4ebf2f1eba
--- /dev/null
+++ b/repo-tests/codeql/cpp/ql/lib/semmle/code/cpp/ir/PrintIR.qll
@@ -0,0 +1,11 @@
+/**
+ * Outputs a representation of the IR as a control flow graph.
+ *
+ * This file contains the actual implementation of `PrintIR.ql`. For test cases and very small
+ * databases, `PrintIR.ql` can be run directly to dump the IR for the entire database. For most
+ * uses, however, it is better to write a query that imports `PrintIR.qll`, extends
+ * `PrintIRConfiguration`, and overrides `shouldPrintFunction()` to select a subset of functions to
+ * dump.
+ */
+
+import implementation.aliased_ssa.PrintIR
diff --git a/repo-tests/codeql/cpp/ql/lib/semmle/code/cpp/ir/ValueNumbering.qll b/repo-tests/codeql/cpp/ql/lib/semmle/code/cpp/ir/ValueNumbering.qll
new file mode 100644
index 00000000000..bd02afc58fb
--- /dev/null
+++ b/repo-tests/codeql/cpp/ql/lib/semmle/code/cpp/ir/ValueNumbering.qll
@@ -0,0 +1 @@
+import implementation.aliased_ssa.gvn.ValueNumbering
diff --git a/repo-tests/codeql/cpp/ql/lib/semmle/code/cpp/ir/dataflow/DataFlow.qll b/repo-tests/codeql/cpp/ql/lib/semmle/code/cpp/ir/dataflow/DataFlow.qll
new file mode 100644
index 00000000000..2aaa10e0331
--- /dev/null
+++ b/repo-tests/codeql/cpp/ql/lib/semmle/code/cpp/ir/dataflow/DataFlow.qll
@@ -0,0 +1,26 @@
+/**
+ * Provides a library for local (intra-procedural) and global (inter-procedural)
+ * data flow analysis: deciding whether data can flow from a _source_ to a
+ * _sink_. This library differs from the one in `semmle.code.cpp.dataflow` in that
+ * this library uses the IR (Intermediate Representation) library, which provides
+ * a more precise semantic representation of the program, whereas the other dataflow
+ * library uses the more syntax-oriented ASTs. This library should provide more accurate
+ * results than the AST-based library in most scenarios.
+ *
+ * Unless configured otherwise, _flow_ means that the exact value of
+ * the source may reach the sink. We do not track flow across pointer
+ * dereferences or array indexing.
+ *
+ * To use global (interprocedural) data flow, extend the class
+ * `DataFlow::Configuration` as documented on that class. To use local
+ * (intraprocedural) data flow between expressions, call
+ * `DataFlow::localExprFlow`. For more general cases of local data flow, call
+ * `DataFlow::localFlow` or `DataFlow::localFlowStep` with arguments of type
+ * `DataFlow::Node`.
+ */
+
+import cpp
+
+module DataFlow {
+ import semmle.code.cpp.ir.dataflow.internal.DataFlowImpl
+}
diff --git a/repo-tests/codeql/cpp/ql/lib/semmle/code/cpp/ir/dataflow/DataFlow2.qll b/repo-tests/codeql/cpp/ql/lib/semmle/code/cpp/ir/dataflow/DataFlow2.qll
new file mode 100644
index 00000000000..95eb979192d
--- /dev/null
+++ b/repo-tests/codeql/cpp/ql/lib/semmle/code/cpp/ir/dataflow/DataFlow2.qll
@@ -0,0 +1,16 @@
+/**
+ * Provides a `DataFlow2` module, which is a copy of the `DataFlow` module. Use
+ * this class when data-flow configurations must depend on each other. Two
+ * classes extending `DataFlow::Configuration` should never depend on each
+ * other, but one of them should instead depend on a
+ * `DataFlow2::Configuration`, a `DataFlow3::Configuration`, or a
+ * `DataFlow4::Configuration`.
+ *
+ * See `semmle.code.cpp.ir.dataflow.DataFlow` for the full documentation.
+ */
+
+import cpp
+
+module DataFlow2 {
+ import semmle.code.cpp.ir.dataflow.internal.DataFlowImpl2
+}
diff --git a/repo-tests/codeql/cpp/ql/lib/semmle/code/cpp/ir/dataflow/DataFlow3.qll b/repo-tests/codeql/cpp/ql/lib/semmle/code/cpp/ir/dataflow/DataFlow3.qll
new file mode 100644
index 00000000000..42529f78e5b
--- /dev/null
+++ b/repo-tests/codeql/cpp/ql/lib/semmle/code/cpp/ir/dataflow/DataFlow3.qll
@@ -0,0 +1,16 @@
+/**
+ * Provides a `DataFlow3` module, which is a copy of the `DataFlow` module. Use
+ * this class when data-flow configurations must depend on each other. Two
+ * classes extending `DataFlow::Configuration` should never depend on each
+ * other, but one of them should instead depend on a
+ * `DataFlow2::Configuration`, a `DataFlow3::Configuration`, or a
+ * `DataFlow4::Configuration`.
+ *
+ * See `semmle.code.cpp.ir.dataflow.DataFlow` for the full documentation.
+ */
+
+import cpp
+
+module DataFlow3 {
+ import semmle.code.cpp.ir.dataflow.internal.DataFlowImpl3
+}
diff --git a/repo-tests/codeql/cpp/ql/lib/semmle/code/cpp/ir/dataflow/DataFlow4.qll b/repo-tests/codeql/cpp/ql/lib/semmle/code/cpp/ir/dataflow/DataFlow4.qll
new file mode 100644
index 00000000000..6cd49e39e71
--- /dev/null
+++ b/repo-tests/codeql/cpp/ql/lib/semmle/code/cpp/ir/dataflow/DataFlow4.qll
@@ -0,0 +1,16 @@
+/**
+ * Provides a `DataFlow4` module, which is a copy of the `DataFlow` module. Use
+ * this class when data-flow configurations must depend on each other. Two
+ * classes extending `DataFlow::Configuration` should never depend on each
+ * other, but one of them should instead depend on a
+ * `DataFlow2::Configuration`, a `DataFlow3::Configuration`, or a
+ * `DataFlow4::Configuration`.
+ *
+ * See `semmle.code.cpp.ir.dataflow.DataFlow` for the full documentation.
+ */
+
+import cpp
+
+module DataFlow4 {
+ import semmle.code.cpp.ir.dataflow.internal.DataFlowImpl4
+}
diff --git a/repo-tests/codeql/cpp/ql/lib/semmle/code/cpp/ir/dataflow/DefaultTaintTracking.qll b/repo-tests/codeql/cpp/ql/lib/semmle/code/cpp/ir/dataflow/DefaultTaintTracking.qll
new file mode 100644
index 00000000000..ece55d181bf
--- /dev/null
+++ b/repo-tests/codeql/cpp/ql/lib/semmle/code/cpp/ir/dataflow/DefaultTaintTracking.qll
@@ -0,0 +1,618 @@
+import cpp
+import semmle.code.cpp.security.Security
+private import semmle.code.cpp.ir.dataflow.DataFlow
+private import semmle.code.cpp.ir.dataflow.internal.DataFlowUtil
+private import semmle.code.cpp.ir.dataflow.DataFlow3
+private import semmle.code.cpp.ir.IR
+private import semmle.code.cpp.ir.dataflow.ResolveCall
+private import semmle.code.cpp.controlflow.IRGuards
+private import semmle.code.cpp.models.interfaces.Taint
+private import semmle.code.cpp.models.interfaces.DataFlow
+private import semmle.code.cpp.ir.dataflow.TaintTracking
+private import semmle.code.cpp.ir.dataflow.TaintTracking2
+private import semmle.code.cpp.ir.dataflow.TaintTracking3
+private import semmle.code.cpp.ir.dataflow.internal.ModelUtil
+
+/**
+ * A predictable instruction is one where an external user can predict
+ * the value. For example, a literal in the source code is considered
+ * predictable.
+ */
+private predicate predictableInstruction(Instruction instr) {
+ instr instanceof ConstantInstruction
+ or
+ instr instanceof StringConstantInstruction
+ or
+ // This could be a conversion on a string literal
+ predictableInstruction(instr.(UnaryInstruction).getUnary())
+}
+
+/**
+ * Functions that we should only allow taint to flow through (to the return
+ * value) if all but the source argument are 'predictable'. This is done to
+ * emulate the old security library's implementation rather than due to any
+ * strong belief that this is the right approach.
+ *
+ * Note that the list itself is not very principled; it consists of all the
+ * functions listed in the old security library's [default] `isPureFunction`
+ * that have more than one argument, but are not in the old taint tracking
+ * library's `returnArgument` predicate.
+ */
+predicate predictableOnlyFlow(string name) {
+ name =
+ [
+ "strcasestr", "strchnul", "strchr", "strchrnul", "strcmp", "strcspn", "strncmp", "strndup",
+ "strnlen", "strrchr", "strspn", "strstr", "strtod", "strtof", "strtol", "strtoll", "strtoq",
+ "strtoul"
+ ]
+}
+
+private DataFlow::Node getNodeForSource(Expr source) {
+ isUserInput(source, _) and
+ result = getNodeForExpr(source)
+}
+
+private DataFlow::Node getNodeForExpr(Expr node) {
+ result = DataFlow::exprNode(node)
+ or
+ // Some of the sources in `isUserInput` are intended to match the value of
+ // an expression, while others (those modeled below) are intended to match
+ // the taint that propagates out of an argument, like the `char *` argument
+ // to `gets`. It's impossible here to tell which is which, but the "access
+ // to argv" source is definitely not intended to match an output argument,
+ // and it causes false positives if we let it.
+ //
+ // This case goes together with the similar (but not identical) rule in
+ // `nodeIsBarrierIn`.
+ result = DataFlow::definitionByReferenceNodeFromArgument(node) and
+ not argv(node.(VariableAccess).getTarget())
+}
+
+private class DefaultTaintTrackingCfg extends TaintTracking::Configuration {
+ DefaultTaintTrackingCfg() { this = "DefaultTaintTrackingCfg" }
+
+ override predicate isSource(DataFlow::Node source) { source = getNodeForSource(_) }
+
+ override predicate isSink(DataFlow::Node sink) { exists(adjustedSink(sink)) }
+
+ override predicate isSanitizer(DataFlow::Node node) { nodeIsBarrier(node) }
+
+ override predicate isSanitizerIn(DataFlow::Node node) { nodeIsBarrierIn(node) }
+}
+
+private class ToGlobalVarTaintTrackingCfg extends TaintTracking::Configuration {
+ ToGlobalVarTaintTrackingCfg() { this = "GlobalVarTaintTrackingCfg" }
+
+ override predicate isSource(DataFlow::Node source) { source = getNodeForSource(_) }
+
+ override predicate isSink(DataFlow::Node sink) {
+ sink.asVariable() instanceof GlobalOrNamespaceVariable
+ }
+
+ override predicate isAdditionalTaintStep(DataFlow::Node n1, DataFlow::Node n2) {
+ writesVariable(n1.asInstruction(), n2.asVariable().(GlobalOrNamespaceVariable))
+ or
+ readsVariable(n2.asInstruction(), n1.asVariable().(GlobalOrNamespaceVariable))
+ }
+
+ override predicate isSanitizer(DataFlow::Node node) { nodeIsBarrier(node) }
+
+ override predicate isSanitizerIn(DataFlow::Node node) { nodeIsBarrierIn(node) }
+}
+
+private class FromGlobalVarTaintTrackingCfg extends TaintTracking2::Configuration {
+ FromGlobalVarTaintTrackingCfg() { this = "FromGlobalVarTaintTrackingCfg" }
+
+ override predicate isSource(DataFlow::Node source) {
+ // This set of sources should be reasonably small, which is good for
+ // performance since the set of sinks is very large.
+ exists(ToGlobalVarTaintTrackingCfg otherCfg | otherCfg.hasFlowTo(source))
+ }
+
+ override predicate isSink(DataFlow::Node sink) { exists(adjustedSink(sink)) }
+
+ override predicate isAdditionalTaintStep(DataFlow::Node n1, DataFlow::Node n2) {
+ // Additional step for flow out of variables. There is no flow _into_
+ // variables in this configuration, so this step only serves to take flow
+ // out of a variable that's a source.
+ readsVariable(n2.asInstruction(), n1.asVariable())
+ }
+
+ override predicate isSanitizer(DataFlow::Node node) { nodeIsBarrier(node) }
+
+ override predicate isSanitizerIn(DataFlow::Node node) { nodeIsBarrierIn(node) }
+}
+
+private predicate readsVariable(LoadInstruction load, Variable var) {
+ load.getSourceAddress().(VariableAddressInstruction).getASTVariable() = var
+}
+
+private predicate writesVariable(StoreInstruction store, Variable var) {
+ store.getDestinationAddress().(VariableAddressInstruction).getASTVariable() = var
+}
+
+/**
+ * A variable that has any kind of upper-bound check anywhere in the program. This is
+ * biased towards being inclusive because there are a lot of valid ways of doing an
+ * upper bounds checks if we don't consider where it occurs, for example:
+ * ```
+ * if (x < 10) { sink(x); }
+ *
+ * if (10 > y) { sink(y); }
+ *
+ * if (z > 10) { z = 10; }
+ * sink(z);
+ * ```
+ */
+// TODO: This coarse overapproximation, ported from the old taint tracking
+// library, could be replaced with an actual semantic check that a particular
+// variable _access_ is guarded by an upper-bound check. We probably don't want
+// to do this right away since it could expose a lot of FPs that were
+// previously suppressed by this predicate by coincidence.
+private predicate hasUpperBoundsCheck(Variable var) {
+ exists(RelationalOperation oper, VariableAccess access |
+ oper.getAnOperand() = access and
+ access.getTarget() = var and
+ // Comparing to 0 is not an upper bound check
+ not oper.getAnOperand().getValue() = "0"
+ )
+}
+
+private predicate nodeIsBarrierEqualityCandidate(
+ DataFlow::Node node, Operand access, Variable checkedVar
+) {
+ readsVariable(node.asInstruction(), checkedVar) and
+ any(IRGuardCondition guard).ensuresEq(access, _, _, node.asInstruction().getBlock(), true)
+}
+
+cached
+private module Cached {
+ cached
+ predicate nodeIsBarrier(DataFlow::Node node) {
+ exists(Variable checkedVar |
+ readsVariable(node.asInstruction(), checkedVar) and
+ hasUpperBoundsCheck(checkedVar)
+ )
+ or
+ exists(Variable checkedVar, Operand access |
+ /*
+ * This node is guarded by a condition that forces the accessed variable
+ * to equal something else. For example:
+ * ```
+ * x = taintsource()
+ * if (x == 10) {
+ * taintsink(x); // not considered tainted
+ * }
+ * ```
+ */
+
+ nodeIsBarrierEqualityCandidate(node, access, checkedVar) and
+ readsVariable(access.getDef(), checkedVar)
+ )
+ }
+
+ cached
+ predicate nodeIsBarrierIn(DataFlow::Node node) {
+ // don't use dataflow into taint sources, as this leads to duplicate results.
+ exists(Expr source | isUserInput(source, _) |
+ node = DataFlow::exprNode(source)
+ or
+ // This case goes together with the similar (but not identical) rule in
+ // `getNodeForSource`.
+ node = DataFlow::definitionByReferenceNodeFromArgument(source)
+ )
+ or
+ // don't use dataflow into binary instructions if both operands are unpredictable
+ exists(BinaryInstruction iTo |
+ iTo = node.asInstruction() and
+ not predictableInstruction(iTo.getLeft()) and
+ not predictableInstruction(iTo.getRight()) and
+ // propagate taint from either the pointer or the offset, regardless of predictability
+ not iTo instanceof PointerArithmeticInstruction
+ )
+ or
+ // don't use dataflow through calls to pure functions if two or more operands
+ // are unpredictable
+ exists(Instruction iFrom1, Instruction iFrom2, CallInstruction iTo |
+ iTo = node.asInstruction() and
+ isPureFunction(iTo.getStaticCallTarget().getName()) and
+ iFrom1 = iTo.getAnArgument() and
+ iFrom2 = iTo.getAnArgument() and
+ not predictableInstruction(iFrom1) and
+ not predictableInstruction(iFrom2) and
+ iFrom1 != iFrom2
+ )
+ }
+
+ cached
+ Element adjustedSink(DataFlow::Node sink) {
+ // TODO: is it more appropriate to use asConvertedExpr here and avoid
+ // `getConversion*`? Or will that cause us to miss some cases where there's
+ // flow to a conversion (like a `ReferenceDereferenceExpr`) and we want to
+ // pretend there was flow to the converted `Expr` for the sake of
+ // compatibility.
+ sink.asExpr().getConversion*() = result
+ or
+ // For compatibility, send flow from arguments to parameters, even for
+ // functions with no body.
+ exists(FunctionCall call, int i |
+ sink.asExpr() = call.getArgument(i) and
+ result = resolveCall(call).getParameter(i)
+ )
+ or
+ // For compatibility, send flow into a `Variable` if there is flow to any
+ // Load or Store of that variable.
+ exists(CopyInstruction copy |
+ copy.getSourceValue() = sink.asInstruction() and
+ (
+ readsVariable(copy, result) or
+ writesVariable(copy, result)
+ ) and
+ not hasUpperBoundsCheck(result)
+ )
+ or
+ // For compatibility, send flow into a `NotExpr` even if it's part of a
+ // short-circuiting condition and thus might get skipped.
+ result.(NotExpr).getOperand() = sink.asExpr()
+ or
+ // Taint postfix and prefix crement operations when their operand is tainted.
+ result.(CrementOperation).getAnOperand() = sink.asExpr()
+ or
+ // Taint `e1 += e2`, `e &= e2` and friends when `e1` or `e2` is tainted.
+ result.(AssignOperation).getAnOperand() = sink.asExpr()
+ or
+ result =
+ sink.asOperand()
+ .(SideEffectOperand)
+ .getUse()
+ .(ReadSideEffectInstruction)
+ .getArgumentDef()
+ .getUnconvertedResultExpression()
+ }
+
+ /**
+ * Step to return value of a modeled function when an input taints the
+ * dereference of the return value.
+ */
+ cached
+ predicate additionalTaintStep(DataFlow::Node n1, DataFlow::Node n2) {
+ exists(CallInstruction call, Function func, FunctionInput modelIn, FunctionOutput modelOut |
+ n1.asOperand() = callInput(call, modelIn) and
+ (
+ func.(TaintFunction).hasTaintFlow(modelIn, modelOut)
+ or
+ func.(DataFlowFunction).hasDataFlow(modelIn, modelOut)
+ ) and
+ call.getStaticCallTarget() = func and
+ modelOut.isReturnValueDeref() and
+ call = n2.asInstruction()
+ )
+ }
+}
+
+private import Cached
+
+/**
+ * Holds if `tainted` may contain taint from `source`.
+ *
+ * A tainted expression is either directly user input, or is
+ * computed from user input in a way that users can probably
+ * control the exact output of the computation.
+ *
+ * This doesn't include data flow through global variables.
+ * If you need that you must call `taintedIncludingGlobalVars`.
+ */
+cached
+predicate tainted(Expr source, Element tainted) {
+ exists(DefaultTaintTrackingCfg cfg, DataFlow::Node sink |
+ cfg.hasFlow(getNodeForSource(source), sink) and
+ tainted = adjustedSink(sink)
+ )
+}
+
+/**
+ * Holds if `tainted` may contain taint from `source`, where the taint passed
+ * through a global variable named `globalVar`.
+ *
+ * A tainted expression is either directly user input, or is
+ * computed from user input in a way that users can probably
+ * control the exact output of the computation.
+ *
+ * This version gives the same results as tainted but also includes
+ * data flow through global variables.
+ *
+ * The parameter `globalVar` is the qualified name of the last global variable
+ * used to move the value from source to tainted. If the taint did not pass
+ * through a global variable, then `globalVar = ""`.
+ */
+cached
+predicate taintedIncludingGlobalVars(Expr source, Element tainted, string globalVar) {
+ tainted(source, tainted) and
+ globalVar = ""
+ or
+ exists(
+ ToGlobalVarTaintTrackingCfg toCfg, FromGlobalVarTaintTrackingCfg fromCfg,
+ DataFlow::VariableNode variableNode, GlobalOrNamespaceVariable global, DataFlow::Node sink
+ |
+ global = variableNode.getVariable() and
+ toCfg.hasFlow(getNodeForSource(source), variableNode) and
+ fromCfg.hasFlow(variableNode, sink) and
+ tainted = adjustedSink(sink) and
+ global = globalVarFromId(globalVar)
+ )
+}
+
+/**
+ * Gets the global variable whose qualified name is `id`. Use this predicate
+ * together with `taintedIncludingGlobalVars`. Example:
+ *
+ * ```
+ * exists(string varName |
+ * taintedIncludingGlobalVars(source, tainted, varName) and
+ * var = globalVarFromId(varName)
+ * )
+ * ```
+ */
+GlobalOrNamespaceVariable globalVarFromId(string id) { id = result.getQualifiedName() }
+
+/**
+ * Provides definitions for augmenting source/sink pairs with data-flow paths
+ * between them. From a `@kind path-problem` query, import this module in the
+ * global scope, extend `TaintTrackingConfiguration`, and use `taintedWithPath`
+ * in place of `tainted`.
+ *
+ * Importing this module will also import the query predicates that contain the
+ * taint paths.
+ */
+module TaintedWithPath {
+ private newtype TSingleton = MkSingleton()
+
+ /**
+ * A taint-tracking configuration that matches sources and sinks in the same
+ * way as the `tainted` predicate.
+ *
+ * Override `isSink` and `taintThroughGlobals` as needed, but do not provide
+ * a characteristic predicate.
+ */
+ class TaintTrackingConfiguration extends TSingleton {
+ /** Override this to specify which elements are sources in this configuration. */
+ predicate isSource(Expr source) { exists(getNodeForSource(source)) }
+
+ /** Override this to specify which elements are sinks in this configuration. */
+ abstract predicate isSink(Element e);
+
+ /** Override this to specify which expressions are barriers in this configuration. */
+ predicate isBarrier(Expr e) { nodeIsBarrier(getNodeForExpr(e)) }
+
+ /**
+ * Override this predicate to `any()` to allow taint to flow through global
+ * variables.
+ */
+ predicate taintThroughGlobals() { none() }
+
+ /** Gets a textual representation of this element. */
+ string toString() { result = "TaintTrackingConfiguration" }
+ }
+
+ private class AdjustedConfiguration extends TaintTracking3::Configuration {
+ AdjustedConfiguration() { this = "AdjustedConfiguration" }
+
+ override predicate isSource(DataFlow::Node source) {
+ exists(TaintTrackingConfiguration cfg, Expr e |
+ cfg.isSource(e) and source = getNodeForExpr(e)
+ )
+ }
+
+ override predicate isSink(DataFlow::Node sink) {
+ exists(TaintTrackingConfiguration cfg | cfg.isSink(adjustedSink(sink)))
+ }
+
+ override predicate isAdditionalTaintStep(DataFlow::Node n1, DataFlow::Node n2) {
+ // Steps into and out of global variables
+ exists(TaintTrackingConfiguration cfg | cfg.taintThroughGlobals() |
+ writesVariable(n1.asInstruction(), n2.asVariable().(GlobalOrNamespaceVariable))
+ or
+ readsVariable(n2.asInstruction(), n1.asVariable().(GlobalOrNamespaceVariable))
+ )
+ or
+ additionalTaintStep(n1, n2)
+ }
+
+ override predicate isSanitizer(DataFlow::Node node) {
+ exists(TaintTrackingConfiguration cfg, Expr e | cfg.isBarrier(e) and node = getNodeForExpr(e))
+ }
+
+ override predicate isSanitizerIn(DataFlow::Node node) { nodeIsBarrierIn(node) }
+ }
+
+ /*
+ * A sink `Element` may map to multiple `DataFlowX::PathNode`s via (the
+ * inverse of) `adjustedSink`. For example, an `Expr` maps to all its
+ * conversions, and a `Variable` maps to all loads and stores from it. Because
+ * the path node is part of the tuple that constitutes the alert, this leads
+ * to duplicate alerts.
+ *
+ * To avoid showing duplicates, we edit the graph to replace the final node
+ * coming from the data-flow library with a node that matches exactly the
+ * `Element` sink that's requested.
+ *
+ * The same is done for sources.
+ */
+
+ private newtype TPathNode =
+ TWrapPathNode(DataFlow3::PathNode n) or
+ // There's a single newtype constructor for both sources and sinks since
+ // that makes it easiest to deal with the case where source = sink.
+ TEndpointPathNode(Element e) {
+ exists(AdjustedConfiguration cfg, DataFlow3::Node sourceNode, DataFlow3::Node sinkNode |
+ cfg.hasFlow(sourceNode, sinkNode)
+ |
+ sourceNode = getNodeForExpr(e) and
+ exists(TaintTrackingConfiguration ttCfg | ttCfg.isSource(e))
+ or
+ e = adjustedSink(sinkNode) and
+ exists(TaintTrackingConfiguration ttCfg | ttCfg.isSink(e))
+ )
+ }
+
+ /** An opaque type used for the nodes of a data-flow path. */
+ class PathNode extends TPathNode {
+ /** Gets a textual representation of this element. */
+ string toString() { none() }
+
+ /**
+ * Holds if this element is at the specified location.
+ * The location spans column `startcolumn` of line `startline` to
+ * column `endcolumn` of line `endline` in file `filepath`.
+ * For more information, see
+ * [Locations](https://codeql.github.com/docs/writing-codeql-queries/providing-locations-in-codeql-queries/).
+ */
+ predicate hasLocationInfo(
+ string filepath, int startline, int startcolumn, int endline, int endcolumn
+ ) {
+ none()
+ }
+ }
+
+ private class WrapPathNode extends PathNode, TWrapPathNode {
+ DataFlow3::PathNode inner() { this = TWrapPathNode(result) }
+
+ override string toString() { result = this.inner().toString() }
+
+ override predicate hasLocationInfo(
+ string filepath, int startline, int startcolumn, int endline, int endcolumn
+ ) {
+ this.inner().hasLocationInfo(filepath, startline, startcolumn, endline, endcolumn)
+ }
+ }
+
+ private class EndpointPathNode extends PathNode, TEndpointPathNode {
+ Expr inner() { this = TEndpointPathNode(result) }
+
+ override string toString() { result = this.inner().toString() }
+
+ override predicate hasLocationInfo(
+ string filepath, int startline, int startcolumn, int endline, int endcolumn
+ ) {
+ this.inner()
+ .getLocation()
+ .hasLocationInfo(filepath, startline, startcolumn, endline, endcolumn)
+ }
+ }
+
+ /** A PathNode whose `Element` is a source. It may also be a sink. */
+ private class InitialPathNode extends EndpointPathNode {
+ InitialPathNode() { exists(TaintTrackingConfiguration cfg | cfg.isSource(this.inner())) }
+ }
+
+ /** A PathNode whose `Element` is a sink. It may also be a source. */
+ private class FinalPathNode extends EndpointPathNode {
+ FinalPathNode() { exists(TaintTrackingConfiguration cfg | cfg.isSink(this.inner())) }
+ }
+
+ /** Holds if `(a,b)` is an edge in the graph of data flow path explanations. */
+ query predicate edges(PathNode a, PathNode b) {
+ DataFlow3::PathGraph::edges(a.(WrapPathNode).inner(), b.(WrapPathNode).inner())
+ or
+ // To avoid showing trivial-looking steps, we _replace_ the last node instead
+ // of adding an edge out of it.
+ exists(WrapPathNode sinkNode |
+ DataFlow3::PathGraph::edges(a.(WrapPathNode).inner(), sinkNode.inner()) and
+ b.(FinalPathNode).inner() = adjustedSink(sinkNode.inner().getNode())
+ )
+ or
+ // Same for the first node
+ exists(WrapPathNode sourceNode |
+ DataFlow3::PathGraph::edges(sourceNode.inner(), b.(WrapPathNode).inner()) and
+ sourceNode.inner().getNode() = getNodeForExpr(a.(InitialPathNode).inner())
+ )
+ or
+ // Finally, handle the case where the path goes directly from a source to a
+ // sink, meaning that they both need to be translated.
+ exists(WrapPathNode sinkNode, WrapPathNode sourceNode |
+ DataFlow3::PathGraph::edges(sourceNode.inner(), sinkNode.inner()) and
+ sourceNode.inner().getNode() = getNodeForExpr(a.(InitialPathNode).inner()) and
+ b.(FinalPathNode).inner() = adjustedSink(sinkNode.inner().getNode())
+ )
+ }
+
+ /**
+ * Holds if there is flow from `arg` to `out` across a call that can by summarized by the flow
+ * from `par` to `ret` within it, in the graph of data flow path explanations.
+ */
+ query predicate subpaths(PathNode arg, PathNode par, PathNode ret, PathNode out) {
+ DataFlow3::PathGraph::subpaths(arg.(WrapPathNode).inner(), par.(WrapPathNode).inner(),
+ ret.(WrapPathNode).inner(), out.(WrapPathNode).inner())
+ or
+ // To avoid showing trivial-looking steps, we _replace_ the last node instead
+ // of adding an edge out of it.
+ exists(WrapPathNode sinkNode |
+ DataFlow3::PathGraph::subpaths(arg.(WrapPathNode).inner(), par.(WrapPathNode).inner(),
+ ret.(WrapPathNode).inner(), sinkNode.inner()) and
+ out.(FinalPathNode).inner() = adjustedSink(sinkNode.inner().getNode())
+ )
+ or
+ // Same for the first node
+ exists(WrapPathNode sourceNode |
+ DataFlow3::PathGraph::subpaths(sourceNode.inner(), par.(WrapPathNode).inner(),
+ ret.(WrapPathNode).inner(), out.(WrapPathNode).inner()) and
+ sourceNode.inner().getNode() = getNodeForExpr(arg.(InitialPathNode).inner())
+ )
+ or
+ // Finally, handle the case where the path goes directly from a source to a
+ // sink, meaning that they both need to be translated.
+ exists(WrapPathNode sinkNode, WrapPathNode sourceNode |
+ DataFlow3::PathGraph::subpaths(sourceNode.inner(), par.(WrapPathNode).inner(),
+ ret.(WrapPathNode).inner(), sinkNode.inner()) and
+ sourceNode.inner().getNode() = getNodeForExpr(arg.(InitialPathNode).inner()) and
+ out.(FinalPathNode).inner() = adjustedSink(sinkNode.inner().getNode())
+ )
+ }
+
+ /** Holds if `n` is a node in the graph of data flow path explanations. */
+ query predicate nodes(PathNode n, string key, string val) {
+ key = "semmle.label" and val = n.toString()
+ }
+
+ /**
+ * Holds if `tainted` may contain taint from `source`, where `sourceNode` and
+ * `sinkNode` are the corresponding `PathNode`s that can be used in a query
+ * to provide path explanations. Extend `TaintTrackingConfiguration` to use
+ * this predicate.
+ *
+ * A tainted expression is either directly user input, or is computed from
+ * user input in a way that users can probably control the exact output of
+ * the computation.
+ */
+ predicate taintedWithPath(Expr source, Element tainted, PathNode sourceNode, PathNode sinkNode) {
+ exists(AdjustedConfiguration cfg, DataFlow3::Node flowSource, DataFlow3::Node flowSink |
+ source = sourceNode.(InitialPathNode).inner() and
+ flowSource = getNodeForExpr(source) and
+ cfg.hasFlow(flowSource, flowSink) and
+ tainted = adjustedSink(flowSink) and
+ tainted = sinkNode.(FinalPathNode).inner()
+ )
+ }
+
+ private predicate isGlobalVariablePathNode(WrapPathNode n) {
+ n.inner().getNode().asVariable() instanceof GlobalOrNamespaceVariable
+ }
+
+ private predicate edgesWithoutGlobals(PathNode a, PathNode b) {
+ edges(a, b) and
+ not isGlobalVariablePathNode(a) and
+ not isGlobalVariablePathNode(b)
+ }
+
+ /**
+ * Holds if `tainted` can be reached from a taint source without passing
+ * through a global variable.
+ */
+ predicate taintedWithoutGlobals(Element tainted) {
+ exists(AdjustedConfiguration cfg, PathNode sourceNode, FinalPathNode sinkNode |
+ cfg.isSource(sourceNode.(WrapPathNode).inner().getNode()) and
+ edgesWithoutGlobals+(sourceNode, sinkNode) and
+ tainted = sinkNode.inner()
+ )
+ }
+}
diff --git a/repo-tests/codeql/cpp/ql/lib/semmle/code/cpp/ir/dataflow/ResolveCall.qll b/repo-tests/codeql/cpp/ql/lib/semmle/code/cpp/ir/dataflow/ResolveCall.qll
new file mode 100644
index 00000000000..f25386d3ba8
--- /dev/null
+++ b/repo-tests/codeql/cpp/ql/lib/semmle/code/cpp/ir/dataflow/ResolveCall.qll
@@ -0,0 +1,23 @@
+/**
+ * Provides a predicate for non-contextual virtual dispatch and function
+ * pointer resolution.
+ */
+
+import cpp
+private import semmle.code.cpp.ir.ValueNumbering
+private import internal.DataFlowDispatch
+private import semmle.code.cpp.ir.IR
+
+/**
+ * Resolve potential target function(s) for `call`.
+ *
+ * If `call` is a call through a function pointer (`ExprCall`) or its target is
+ * a virtual member function, simple data flow analysis is performed in order
+ * to identify the possible target(s).
+ */
+Function resolveCall(Call call) {
+ exists(CallInstruction callInstruction |
+ callInstruction.getAST() = call and
+ result = viableCallable(callInstruction)
+ )
+}
diff --git a/repo-tests/codeql/cpp/ql/lib/semmle/code/cpp/ir/dataflow/TaintTracking.qll b/repo-tests/codeql/cpp/ql/lib/semmle/code/cpp/ir/dataflow/TaintTracking.qll
new file mode 100644
index 00000000000..c95fcd3f574
--- /dev/null
+++ b/repo-tests/codeql/cpp/ql/lib/semmle/code/cpp/ir/dataflow/TaintTracking.qll
@@ -0,0 +1,29 @@
+/**
+ * Provides classes for performing local (intra-procedural) and
+ * global (inter-procedural) taint-tracking analyses.
+ *
+ * We define _taint propagation_ informally to mean that a substantial part of
+ * the information from the source is preserved at the sink. For example, taint
+ * propagates from `x` to `x + 100`, but it does not propagate from `x` to `x >
+ * 100` since we consider a single bit of information to be too little.
+ *
+ * To use global (interprocedural) taint tracking, extend the class
+ * `TaintTracking::Configuration` as documented on that class. To use local
+ * (intraprocedural) taint tracking between expressions, call
+ * `TaintTracking::localExprTaint`. For more general cases of local taint
+ * tracking, call `TaintTracking::localTaint` or
+ * `TaintTracking::localTaintStep` with arguments of type `DataFlow::Node`.
+ */
+
+import semmle.code.cpp.ir.dataflow.DataFlow
+import semmle.code.cpp.ir.dataflow.DataFlow2
+
+module TaintTracking {
+ import semmle.code.cpp.ir.dataflow.internal.tainttracking1.TaintTrackingImpl
+ private import semmle.code.cpp.ir.dataflow.TaintTracking2
+
+ /**
+ * DEPRECATED: Use TaintTracking2::Configuration instead.
+ */
+ deprecated class Configuration2 = TaintTracking2::Configuration;
+}
diff --git a/repo-tests/codeql/cpp/ql/lib/semmle/code/cpp/ir/dataflow/TaintTracking2.qll b/repo-tests/codeql/cpp/ql/lib/semmle/code/cpp/ir/dataflow/TaintTracking2.qll
new file mode 100644
index 00000000000..3ef03a3bd2c
--- /dev/null
+++ b/repo-tests/codeql/cpp/ql/lib/semmle/code/cpp/ir/dataflow/TaintTracking2.qll
@@ -0,0 +1,15 @@
+/**
+ * Provides a `TaintTracking2` module, which is a copy of the `TaintTracking`
+ * module. Use this class when data-flow configurations or taint-tracking
+ * configurations must depend on each other. Two classes extending
+ * `DataFlow::Configuration` should never depend on each other, but one of them
+ * should instead depend on a `DataFlow2::Configuration`, a
+ * `DataFlow3::Configuration`, or a `DataFlow4::Configuration`. The
+ * `TaintTracking::Configuration` class extends `DataFlow::Configuration`, and
+ * `TaintTracking2::Configuration` extends `DataFlow2::Configuration`.
+ *
+ * See `semmle.code.cpp.ir.dataflow.TaintTracking` for the full documentation.
+ */
+module TaintTracking2 {
+ import semmle.code.cpp.ir.dataflow.internal.tainttracking2.TaintTrackingImpl
+}
diff --git a/repo-tests/codeql/cpp/ql/lib/semmle/code/cpp/ir/dataflow/TaintTracking3.qll b/repo-tests/codeql/cpp/ql/lib/semmle/code/cpp/ir/dataflow/TaintTracking3.qll
new file mode 100644
index 00000000000..98e1caebf38
--- /dev/null
+++ b/repo-tests/codeql/cpp/ql/lib/semmle/code/cpp/ir/dataflow/TaintTracking3.qll
@@ -0,0 +1,15 @@
+/**
+ * Provides a `TaintTracking3` module, which is a copy of the `TaintTracking`
+ * module. Use this class when data-flow configurations or taint-tracking
+ * configurations must depend on each other. Two classes extending
+ * `DataFlow::Configuration` should never depend on each other, but one of them
+ * should instead depend on a `DataFlow2::Configuration`, a
+ * `DataFlow3::Configuration`, or a `DataFlow4::Configuration`. The
+ * `TaintTracking::Configuration` class extends `DataFlow::Configuration`, and
+ * `TaintTracking2::Configuration` extends `DataFlow2::Configuration`.
+ *
+ * See `semmle.code.cpp.ir.dataflow.TaintTracking` for the full documentation.
+ */
+module TaintTracking3 {
+ import semmle.code.cpp.ir.dataflow.internal.tainttracking3.TaintTrackingImpl
+}
diff --git a/repo-tests/codeql/cpp/ql/lib/semmle/code/cpp/ir/dataflow/internal/DataFlowDispatch.qll b/repo-tests/codeql/cpp/ql/lib/semmle/code/cpp/ir/dataflow/internal/DataFlowDispatch.qll
new file mode 100644
index 00000000000..4ebd8cbf758
--- /dev/null
+++ b/repo-tests/codeql/cpp/ql/lib/semmle/code/cpp/ir/dataflow/internal/DataFlowDispatch.qll
@@ -0,0 +1,265 @@
+private import cpp
+private import semmle.code.cpp.ir.IR
+private import semmle.code.cpp.ir.dataflow.DataFlow
+private import semmle.code.cpp.ir.dataflow.internal.DataFlowPrivate
+private import DataFlowImplCommon as DataFlowImplCommon
+
+/**
+ * Gets a function that might be called by `call`.
+ */
+cached
+Function viableCallable(CallInstruction call) {
+ DataFlowImplCommon::forceCachingInSameStage() and
+ result = call.getStaticCallTarget()
+ or
+ // If the target of the call does not have a body in the snapshot, it might
+ // be because the target is just a header declaration, and the real target
+ // will be determined at run time when the caller and callee are linked
+ // together by the operating system's dynamic linker. In case a _unique_
+ // function with the right signature is present in the database, we return
+ // that as a potential callee.
+ exists(string qualifiedName, int nparams |
+ callSignatureWithoutBody(qualifiedName, nparams, call) and
+ functionSignatureWithBody(qualifiedName, nparams, result) and
+ strictcount(Function other | functionSignatureWithBody(qualifiedName, nparams, other)) = 1
+ )
+ or
+ // Virtual dispatch
+ result = call.(VirtualDispatch::DataSensitiveCall).resolve()
+}
+
+/**
+ * Provides virtual dispatch support compatible with the original
+ * implementation of `semmle.code.cpp.security.TaintTracking`.
+ */
+private module VirtualDispatch {
+ /** A call that may dispatch differently depending on the qualifier value. */
+ abstract class DataSensitiveCall extends DataFlowCall {
+ /**
+ * Gets the node whose value determines the target of this call. This node
+ * could be the qualifier of a virtual dispatch or the function-pointer
+ * expression in a call to a function pointer. What they have in common is
+ * that we need to find out which data flows there, and then it's up to the
+ * `resolve` predicate to stitch that information together and resolve the
+ * call.
+ */
+ abstract DataFlow::Node getDispatchValue();
+
+ /** Gets a candidate target for this call. */
+ abstract Function resolve();
+
+ /**
+ * Whether `src` can flow to this call.
+ *
+ * Searches backwards from `getDispatchValue()` to `src`. The `allowFromArg`
+ * parameter is true when the search is allowed to continue backwards into
+ * a parameter; non-recursive callers should pass `_` for `allowFromArg`.
+ */
+ predicate flowsFrom(DataFlow::Node src, boolean allowFromArg) {
+ src = this.getDispatchValue() and allowFromArg = true
+ or
+ exists(DataFlow::Node other, boolean allowOtherFromArg |
+ this.flowsFrom(other, allowOtherFromArg)
+ |
+ // Call argument
+ exists(DataFlowCall call, int i |
+ other.(DataFlow::ParameterNode).isParameterOf(call.getStaticCallTarget(), i) and
+ src.(ArgumentNode).argumentOf(call, i)
+ ) and
+ allowOtherFromArg = true and
+ allowFromArg = true
+ or
+ // Call return
+ exists(DataFlowCall call, ReturnKind returnKind |
+ other = getAnOutNode(call, returnKind) and
+ returnNodeWithKindAndEnclosingCallable(src, returnKind, call.getStaticCallTarget())
+ ) and
+ allowFromArg = false
+ or
+ // Local flow
+ DataFlow::localFlowStep(src, other) and
+ allowFromArg = allowOtherFromArg
+ or
+ // Flow from global variable to load.
+ exists(LoadInstruction load, GlobalOrNamespaceVariable var |
+ var = src.asVariable() and
+ other.asInstruction() = load and
+ addressOfGlobal(load.getSourceAddress(), var) and
+ // The `allowFromArg` concept doesn't play a role when `src` is a
+ // global variable, so we just set it to a single arbitrary value for
+ // performance.
+ allowFromArg = true
+ )
+ or
+ // Flow from store to global variable.
+ exists(StoreInstruction store, GlobalOrNamespaceVariable var |
+ var = other.asVariable() and
+ store = src.asInstruction() and
+ storeIntoGlobal(store, var) and
+ // Setting `allowFromArg` to `true` like in the base case means we
+ // treat a store to a global variable like the dispatch itself: flow
+ // may come from anywhere.
+ allowFromArg = true
+ )
+ )
+ }
+ }
+
+ pragma[noinline]
+ private predicate storeIntoGlobal(StoreInstruction store, GlobalOrNamespaceVariable var) {
+ addressOfGlobal(store.getDestinationAddress(), var)
+ }
+
+ /** Holds if `addressInstr` is an instruction that produces the address of `var`. */
+ private predicate addressOfGlobal(Instruction addressInstr, GlobalOrNamespaceVariable var) {
+ // Access directly to the global variable
+ addressInstr.(VariableAddressInstruction).getASTVariable() = var
+ or
+ // Access to a field on a global union
+ exists(FieldAddressInstruction fa |
+ fa = addressInstr and
+ fa.getObjectAddress().(VariableAddressInstruction).getASTVariable() = var and
+ fa.getField().getDeclaringType() instanceof Union
+ )
+ }
+
+ /**
+ * A ReturnNode with its ReturnKind and its enclosing callable.
+ *
+ * Used to fix a join ordering issue in flowsFrom.
+ */
+ private predicate returnNodeWithKindAndEnclosingCallable(
+ ReturnNode node, ReturnKind kind, DataFlowCallable callable
+ ) {
+ node.getKind() = kind and
+ node.getEnclosingCallable() = callable
+ }
+
+ /** Call through a function pointer. */
+ private class DataSensitiveExprCall extends DataSensitiveCall {
+ DataSensitiveExprCall() { not exists(this.getStaticCallTarget()) }
+
+ override DataFlow::Node getDispatchValue() { result.asInstruction() = this.getCallTarget() }
+
+ override Function resolve() {
+ exists(FunctionInstruction fi |
+ this.flowsFrom(DataFlow::instructionNode(fi), _) and
+ result = fi.getFunctionSymbol()
+ ) and
+ (
+ this.getNumberOfArguments() <= result.getEffectiveNumberOfParameters() and
+ this.getNumberOfArguments() >= result.getEffectiveNumberOfParameters()
+ or
+ result.isVarargs()
+ )
+ }
+ }
+
+ /** Call to a virtual function. */
+ private class DataSensitiveOverriddenFunctionCall extends DataSensitiveCall {
+ DataSensitiveOverriddenFunctionCall() {
+ exists(this.getStaticCallTarget().(VirtualFunction).getAnOverridingFunction())
+ }
+
+ override DataFlow::Node getDispatchValue() { result.asInstruction() = this.getThisArgument() }
+
+ override MemberFunction resolve() {
+ exists(Class overridingClass |
+ this.overrideMayAffectCall(overridingClass, result) and
+ this.hasFlowFromCastFrom(overridingClass)
+ )
+ }
+
+ /**
+ * Holds if `this` is a virtual function call whose static target is
+ * overridden by `overridingFunction` in `overridingClass`.
+ */
+ pragma[noinline]
+ private predicate overrideMayAffectCall(Class overridingClass, MemberFunction overridingFunction) {
+ overridingFunction.getAnOverriddenFunction+() = this.getStaticCallTarget().(VirtualFunction) and
+ overridingFunction.getDeclaringType() = overridingClass
+ }
+
+ /**
+ * Holds if the qualifier of `this` has flow from an upcast from
+ * `derivedClass`.
+ */
+ pragma[noinline]
+ private predicate hasFlowFromCastFrom(Class derivedClass) {
+ exists(ConvertToBaseInstruction toBase |
+ this.flowsFrom(DataFlow::instructionNode(toBase), _) and
+ derivedClass = toBase.getDerivedClass()
+ )
+ }
+ }
+}
+
+/**
+ * Holds if `f` is a function with a body that has name `qualifiedName` and
+ * `nparams` parameter count. See `functionSignature`.
+ */
+private predicate functionSignatureWithBody(string qualifiedName, int nparams, Function f) {
+ functionSignature(f, qualifiedName, nparams) and
+ exists(f.getBlock())
+}
+
+/**
+ * Holds if the target of `call` is a function _with no definition_ that has
+ * name `qualifiedName` and `nparams` parameter count. See `functionSignature`.
+ */
+pragma[noinline]
+private predicate callSignatureWithoutBody(string qualifiedName, int nparams, CallInstruction call) {
+ exists(Function target |
+ target = call.getStaticCallTarget() and
+ not exists(target.getBlock()) and
+ functionSignature(target, qualifiedName, nparams)
+ )
+}
+
+/**
+ * Holds if `f` has name `qualifiedName` and `nparams` parameter count. This is
+ * an approximation of its signature for the purpose of matching functions that
+ * might be the same across link targets.
+ */
+private predicate functionSignature(Function f, string qualifiedName, int nparams) {
+ qualifiedName = f.getQualifiedName() and
+ nparams = f.getNumberOfParameters() and
+ not f.isStatic()
+}
+
+/**
+ * Holds if the set of viable implementations that can be called by `call`
+ * might be improved by knowing the call context.
+ */
+predicate mayBenefitFromCallContext(CallInstruction call, Function f) {
+ mayBenefitFromCallContext(call, f, _)
+}
+
+/**
+ * Holds if `call` is a call through a function pointer, and the pointer
+ * value is given as the `arg`'th argument to `f`.
+ */
+private predicate mayBenefitFromCallContext(
+ VirtualDispatch::DataSensitiveCall call, Function f, int arg
+) {
+ f = pragma[only_bind_out](call).getEnclosingCallable() and
+ exists(InitializeParameterInstruction init |
+ not exists(call.getStaticCallTarget()) and
+ init.getEnclosingFunction() = f and
+ call.flowsFrom(DataFlow::instructionNode(init), _) and
+ init.getParameter().getIndex() = arg
+ )
+}
+
+/**
+ * Gets a viable dispatch target of `call` in the context `ctx`. This is
+ * restricted to those `call`s for which a context might make a difference.
+ */
+Function viableImplInCallContext(CallInstruction call, CallInstruction ctx) {
+ result = viableCallable(call) and
+ exists(int i, Function f |
+ mayBenefitFromCallContext(pragma[only_bind_into](call), f, i) and
+ f = ctx.getStaticCallTarget() and
+ result = ctx.getArgument(i).getUnconvertedResultExpression().(FunctionAccess).getTarget()
+ )
+}
diff --git a/repo-tests/codeql/cpp/ql/lib/semmle/code/cpp/ir/dataflow/internal/DataFlowImpl.qll b/repo-tests/codeql/cpp/ql/lib/semmle/code/cpp/ir/dataflow/internal/DataFlowImpl.qll
new file mode 100644
index 00000000000..4ca06c93362
--- /dev/null
+++ b/repo-tests/codeql/cpp/ql/lib/semmle/code/cpp/ir/dataflow/internal/DataFlowImpl.qll
@@ -0,0 +1,4559 @@
+/**
+ * Provides an implementation of global (interprocedural) data flow. This file
+ * re-exports the local (intraprocedural) data flow analysis from
+ * `DataFlowImplSpecific::Public` and adds a global analysis, mainly exposed
+ * through the `Configuration` class. This file exists in several identical
+ * copies, allowing queries to use multiple `Configuration` classes that depend
+ * on each other without introducing mutual recursion among those configurations.
+ */
+
+private import DataFlowImplCommon
+private import DataFlowImplSpecific::Private
+import DataFlowImplSpecific::Public
+
+/**
+ * A configuration of interprocedural data flow analysis. This defines
+ * sources, sinks, and any other configurable aspect of the analysis. Each
+ * use of the global data flow library must define its own unique extension
+ * of this abstract class. To create a configuration, extend this class with
+ * a subclass whose characteristic predicate is a unique singleton string.
+ * For example, write
+ *
+ * ```ql
+ * class MyAnalysisConfiguration extends DataFlow::Configuration {
+ * MyAnalysisConfiguration() { this = "MyAnalysisConfiguration" }
+ * // Override `isSource` and `isSink`.
+ * // Optionally override `isBarrier`.
+ * // Optionally override `isAdditionalFlowStep`.
+ * }
+ * ```
+ * Conceptually, this defines a graph where the nodes are `DataFlow::Node`s and
+ * the edges are those data-flow steps that preserve the value of the node
+ * along with any additional edges defined by `isAdditionalFlowStep`.
+ * Specifying nodes in `isBarrier` will remove those nodes from the graph, and
+ * specifying nodes in `isBarrierIn` and/or `isBarrierOut` will remove in-going
+ * and/or out-going edges from those nodes, respectively.
+ *
+ * Then, to query whether there is flow between some `source` and `sink`,
+ * write
+ *
+ * ```ql
+ * exists(MyAnalysisConfiguration cfg | cfg.hasFlow(source, sink))
+ * ```
+ *
+ * Multiple configurations can coexist, but two classes extending
+ * `DataFlow::Configuration` should never depend on each other. One of them
+ * should instead depend on a `DataFlow2::Configuration`, a
+ * `DataFlow3::Configuration`, or a `DataFlow4::Configuration`.
+ */
+abstract class Configuration extends string {
+ bindingset[this]
+ Configuration() { any() }
+
+ /**
+ * Holds if `source` is a relevant data flow source.
+ */
+ abstract predicate isSource(Node source);
+
+ /**
+ * Holds if `sink` is a relevant data flow sink.
+ */
+ abstract predicate isSink(Node sink);
+
+ /**
+ * Holds if data flow through `node` is prohibited. This completely removes
+ * `node` from the data flow graph.
+ */
+ predicate isBarrier(Node node) { none() }
+
+ /** Holds if data flow into `node` is prohibited. */
+ predicate isBarrierIn(Node node) { none() }
+
+ /** Holds if data flow out of `node` is prohibited. */
+ predicate isBarrierOut(Node node) { none() }
+
+ /** Holds if data flow through nodes guarded by `guard` is prohibited. */
+ predicate isBarrierGuard(BarrierGuard guard) { none() }
+
+ /**
+ * Holds if the additional flow step from `node1` to `node2` must be taken
+ * into account in the analysis.
+ */
+ predicate isAdditionalFlowStep(Node node1, Node node2) { none() }
+
+ /**
+ * Holds if an arbitrary number of implicit read steps of content `c` may be
+ * taken at `node`.
+ */
+ predicate allowImplicitRead(Node node, Content c) { none() }
+
+ /**
+ * Gets the virtual dispatch branching limit when calculating field flow.
+ * This can be overridden to a smaller value to improve performance (a
+ * value of 0 disables field flow), or a larger value to get more results.
+ */
+ int fieldFlowBranchLimit() { result = 2 }
+
+ /**
+ * Holds if data may flow from `source` to `sink` for this configuration.
+ */
+ predicate hasFlow(Node source, Node sink) { flowsTo(source, sink, this) }
+
+ /**
+ * Holds if data may flow from `source` to `sink` for this configuration.
+ *
+ * The corresponding paths are generated from the end-points and the graph
+ * included in the module `PathGraph`.
+ */
+ predicate hasFlowPath(PathNode source, PathNode sink) { flowsTo(source, sink, _, _, this) }
+
+ /**
+ * Holds if data may flow from some source to `sink` for this configuration.
+ */
+ predicate hasFlowTo(Node sink) { hasFlow(_, sink) }
+
+ /**
+ * Holds if data may flow from some source to `sink` for this configuration.
+ */
+ predicate hasFlowToExpr(DataFlowExpr sink) { hasFlowTo(exprNode(sink)) }
+
+ /**
+ * Gets the exploration limit for `hasPartialFlow` and `hasPartialFlowRev`
+ * measured in approximate number of interprocedural steps.
+ */
+ int explorationLimit() { none() }
+
+ /**
+ * Holds if there is a partial data flow path from `source` to `node`. The
+ * approximate distance between `node` and the closest source is `dist` and
+ * is restricted to be less than or equal to `explorationLimit()`. This
+ * predicate completely disregards sink definitions.
+ *
+ * This predicate is intended for data-flow exploration and debugging and may
+ * perform poorly if the number of sources is too big and/or the exploration
+ * limit is set too high without using barriers.
+ *
+ * This predicate is disabled (has no results) by default. Override
+ * `explorationLimit()` with a suitable number to enable this predicate.
+ *
+ * To use this in a `path-problem` query, import the module `PartialPathGraph`.
+ */
+ final predicate hasPartialFlow(PartialPathNode source, PartialPathNode node, int dist) {
+ partialFlow(source, node, this) and
+ dist = node.getSourceDistance()
+ }
+
+ /**
+ * Holds if there is a partial data flow path from `node` to `sink`. The
+ * approximate distance between `node` and the closest sink is `dist` and
+ * is restricted to be less than or equal to `explorationLimit()`. This
+ * predicate completely disregards source definitions.
+ *
+ * This predicate is intended for data-flow exploration and debugging and may
+ * perform poorly if the number of sinks is too big and/or the exploration
+ * limit is set too high without using barriers.
+ *
+ * This predicate is disabled (has no results) by default. Override
+ * `explorationLimit()` with a suitable number to enable this predicate.
+ *
+ * To use this in a `path-problem` query, import the module `PartialPathGraph`.
+ *
+ * Note that reverse flow has slightly lower precision than the corresponding
+ * forward flow, as reverse flow disregards type pruning among other features.
+ */
+ final predicate hasPartialFlowRev(PartialPathNode node, PartialPathNode sink, int dist) {
+ revPartialFlow(node, sink, this) and
+ dist = node.getSinkDistance()
+ }
+}
+
+/**
+ * This class exists to prevent mutual recursion between the user-overridden
+ * member predicates of `Configuration` and the rest of the data-flow library.
+ * Good performance cannot be guaranteed in the presence of such recursion, so
+ * it should be replaced by using more than one copy of the data flow library.
+ */
+abstract private class ConfigurationRecursionPrevention extends Configuration {
+ bindingset[this]
+ ConfigurationRecursionPrevention() { any() }
+
+ override predicate hasFlow(Node source, Node sink) {
+ strictcount(Node n | this.isSource(n)) < 0
+ or
+ strictcount(Node n | this.isSink(n)) < 0
+ or
+ strictcount(Node n1, Node n2 | this.isAdditionalFlowStep(n1, n2)) < 0
+ or
+ super.hasFlow(source, sink)
+ }
+}
+
+private newtype TNodeEx =
+ TNodeNormal(Node n) or
+ TNodeImplicitRead(Node n, boolean hasRead) {
+ any(Configuration c).allowImplicitRead(n, _) and hasRead = [false, true]
+ }
+
+private class NodeEx extends TNodeEx {
+ string toString() {
+ result = this.asNode().toString()
+ or
+ exists(Node n | this.isImplicitReadNode(n, _) | result = n.toString() + " [Ext]")
+ }
+
+ Node asNode() { this = TNodeNormal(result) }
+
+ predicate isImplicitReadNode(Node n, boolean hasRead) { this = TNodeImplicitRead(n, hasRead) }
+
+ Node projectToNode() { this = TNodeNormal(result) or this = TNodeImplicitRead(result, _) }
+
+ pragma[nomagic]
+ private DataFlowCallable getEnclosingCallable0() {
+ nodeEnclosingCallable(this.projectToNode(), result)
+ }
+
+ pragma[inline]
+ DataFlowCallable getEnclosingCallable() {
+ pragma[only_bind_out](this).getEnclosingCallable0() = pragma[only_bind_into](result)
+ }
+
+ pragma[nomagic]
+ private DataFlowType getDataFlowType0() { nodeDataFlowType(this.asNode(), result) }
+
+ pragma[inline]
+ DataFlowType getDataFlowType() {
+ pragma[only_bind_out](this).getDataFlowType0() = pragma[only_bind_into](result)
+ }
+
+ predicate hasLocationInfo(
+ string filepath, int startline, int startcolumn, int endline, int endcolumn
+ ) {
+ this.projectToNode().hasLocationInfo(filepath, startline, startcolumn, endline, endcolumn)
+ }
+}
+
+private class ArgNodeEx extends NodeEx {
+ ArgNodeEx() { this.asNode() instanceof ArgNode }
+}
+
+private class ParamNodeEx extends NodeEx {
+ ParamNodeEx() { this.asNode() instanceof ParamNode }
+
+ predicate isParameterOf(DataFlowCallable c, int i) {
+ this.asNode().(ParamNode).isParameterOf(c, i)
+ }
+
+ int getPosition() { this.isParameterOf(_, result) }
+}
+
+private class RetNodeEx extends NodeEx {
+ RetNodeEx() { this.asNode() instanceof ReturnNodeExt }
+
+ ReturnPosition getReturnPosition() { result = getReturnPosition(this.asNode()) }
+
+ ReturnKindExt getKind() { result = this.asNode().(ReturnNodeExt).getKind() }
+}
+
+private predicate inBarrier(NodeEx node, Configuration config) {
+ exists(Node n |
+ node.asNode() = n and
+ config.isBarrierIn(n) and
+ config.isSource(n)
+ )
+}
+
+private predicate outBarrier(NodeEx node, Configuration config) {
+ exists(Node n |
+ node.asNode() = n and
+ config.isBarrierOut(n) and
+ config.isSink(n)
+ )
+}
+
+private predicate fullBarrier(NodeEx node, Configuration config) {
+ exists(Node n | node.asNode() = n |
+ config.isBarrier(n)
+ or
+ config.isBarrierIn(n) and
+ not config.isSource(n)
+ or
+ config.isBarrierOut(n) and
+ not config.isSink(n)
+ or
+ exists(BarrierGuard g |
+ config.isBarrierGuard(g) and
+ n = g.getAGuardedNode()
+ )
+ )
+}
+
+pragma[nomagic]
+private predicate sourceNode(NodeEx node, Configuration config) { config.isSource(node.asNode()) }
+
+pragma[nomagic]
+private predicate sinkNode(NodeEx node, Configuration config) { config.isSink(node.asNode()) }
+
+/**
+ * Holds if data can flow in one local step from `node1` to `node2`.
+ */
+private predicate localFlowStep(NodeEx node1, NodeEx node2, Configuration config) {
+ exists(Node n1, Node n2 |
+ node1.asNode() = n1 and
+ node2.asNode() = n2 and
+ simpleLocalFlowStepExt(n1, n2) and
+ not outBarrier(node1, config) and
+ not inBarrier(node2, config) and
+ not fullBarrier(node1, config) and
+ not fullBarrier(node2, config)
+ )
+ or
+ exists(Node n |
+ config.allowImplicitRead(n, _) and
+ node1.asNode() = n and
+ node2.isImplicitReadNode(n, false)
+ )
+}
+
+/**
+ * Holds if the additional step from `node1` to `node2` does not jump between callables.
+ */
+private predicate additionalLocalFlowStep(NodeEx node1, NodeEx node2, Configuration config) {
+ exists(Node n1, Node n2 |
+ node1.asNode() = n1 and
+ node2.asNode() = n2 and
+ config.isAdditionalFlowStep(n1, n2) and
+ getNodeEnclosingCallable(n1) = getNodeEnclosingCallable(n2) and
+ not outBarrier(node1, config) and
+ not inBarrier(node2, config) and
+ not fullBarrier(node1, config) and
+ not fullBarrier(node2, config)
+ )
+ or
+ exists(Node n |
+ config.allowImplicitRead(n, _) and
+ node1.isImplicitReadNode(n, true) and
+ node2.asNode() = n
+ )
+}
+
+/**
+ * Holds if data can flow from `node1` to `node2` in a way that discards call contexts.
+ */
+private predicate jumpStep(NodeEx node1, NodeEx node2, Configuration config) {
+ exists(Node n1, Node n2 |
+ node1.asNode() = n1 and
+ node2.asNode() = n2 and
+ jumpStepCached(n1, n2) and
+ not outBarrier(node1, config) and
+ not inBarrier(node2, config) and
+ not fullBarrier(node1, config) and
+ not fullBarrier(node2, config)
+ )
+}
+
+/**
+ * Holds if the additional step from `node1` to `node2` jumps between callables.
+ */
+private predicate additionalJumpStep(NodeEx node1, NodeEx node2, Configuration config) {
+ exists(Node n1, Node n2 |
+ node1.asNode() = n1 and
+ node2.asNode() = n2 and
+ config.isAdditionalFlowStep(n1, n2) and
+ getNodeEnclosingCallable(n1) != getNodeEnclosingCallable(n2) and
+ not outBarrier(node1, config) and
+ not inBarrier(node2, config) and
+ not fullBarrier(node1, config) and
+ not fullBarrier(node2, config)
+ )
+}
+
+private predicate read(NodeEx node1, Content c, NodeEx node2, Configuration config) {
+ read(node1.asNode(), c, node2.asNode())
+ or
+ exists(Node n |
+ node2.isImplicitReadNode(n, true) and
+ node1.isImplicitReadNode(n, _) and
+ config.allowImplicitRead(n, c)
+ )
+}
+
+private predicate store(
+ NodeEx node1, TypedContent tc, NodeEx node2, DataFlowType contentType, Configuration config
+) {
+ store(node1.asNode(), tc, node2.asNode(), contentType) and
+ read(_, tc.getContent(), _, config)
+}
+
+pragma[nomagic]
+private predicate viableReturnPosOutEx(DataFlowCall call, ReturnPosition pos, NodeEx out) {
+ viableReturnPosOut(call, pos, out.asNode())
+}
+
+pragma[nomagic]
+private predicate viableParamArgEx(DataFlowCall call, ParamNodeEx p, ArgNodeEx arg) {
+ viableParamArg(call, p.asNode(), arg.asNode())
+}
+
+/**
+ * Holds if field flow should be used for the given configuration.
+ */
+private predicate useFieldFlow(Configuration config) { config.fieldFlowBranchLimit() >= 1 }
+
+private module Stage1 {
+ class ApApprox = Unit;
+
+ class Ap = Unit;
+
+ class ApOption = Unit;
+
+ class Cc = boolean;
+
+ /* Begin: Stage 1 logic. */
+ /**
+ * Holds if `node` is reachable from a source in the configuration `config`.
+ *
+ * The Boolean `cc` records whether the node is reached through an
+ * argument in a call.
+ */
+ predicate fwdFlow(NodeEx node, Cc cc, Configuration config) {
+ not fullBarrier(node, config) and
+ (
+ sourceNode(node, config) and
+ cc = false
+ or
+ exists(NodeEx mid |
+ fwdFlow(mid, cc, config) and
+ localFlowStep(mid, node, config)
+ )
+ or
+ exists(NodeEx mid |
+ fwdFlow(mid, cc, config) and
+ additionalLocalFlowStep(mid, node, config)
+ )
+ or
+ exists(NodeEx mid |
+ fwdFlow(mid, _, config) and
+ jumpStep(mid, node, config) and
+ cc = false
+ )
+ or
+ exists(NodeEx mid |
+ fwdFlow(mid, _, config) and
+ additionalJumpStep(mid, node, config) and
+ cc = false
+ )
+ or
+ // store
+ exists(NodeEx mid |
+ useFieldFlow(config) and
+ fwdFlow(mid, cc, config) and
+ store(mid, _, node, _, config) and
+ not outBarrier(mid, config)
+ )
+ or
+ // read
+ exists(Content c |
+ fwdFlowRead(c, node, cc, config) and
+ fwdFlowConsCand(c, config) and
+ not inBarrier(node, config)
+ )
+ or
+ // flow into a callable
+ exists(NodeEx arg |
+ fwdFlow(arg, _, config) and
+ viableParamArgEx(_, node, arg) and
+ cc = true
+ )
+ or
+ // flow out of a callable
+ exists(DataFlowCall call |
+ fwdFlowOut(call, node, false, config) and
+ cc = false
+ or
+ fwdFlowOutFromArg(call, node, config) and
+ fwdFlowIsEntered(call, cc, config)
+ )
+ )
+ }
+
+ private predicate fwdFlow(NodeEx node, Configuration config) { fwdFlow(node, _, config) }
+
+ pragma[nomagic]
+ private predicate fwdFlowRead(Content c, NodeEx node, Cc cc, Configuration config) {
+ exists(NodeEx mid |
+ fwdFlow(mid, cc, config) and
+ read(mid, c, node, config)
+ )
+ }
+
+ /**
+ * Holds if `c` is the target of a store in the flow covered by `fwdFlow`.
+ */
+ pragma[nomagic]
+ private predicate fwdFlowConsCand(Content c, Configuration config) {
+ exists(NodeEx mid, NodeEx node, TypedContent tc |
+ not fullBarrier(node, config) and
+ useFieldFlow(config) and
+ fwdFlow(mid, _, config) and
+ store(mid, tc, node, _, config) and
+ c = tc.getContent()
+ )
+ }
+
+ pragma[nomagic]
+ private predicate fwdFlowReturnPosition(ReturnPosition pos, Cc cc, Configuration config) {
+ exists(RetNodeEx ret |
+ fwdFlow(ret, cc, config) and
+ ret.getReturnPosition() = pos
+ )
+ }
+
+ pragma[nomagic]
+ private predicate fwdFlowOut(DataFlowCall call, NodeEx out, Cc cc, Configuration config) {
+ exists(ReturnPosition pos |
+ fwdFlowReturnPosition(pos, cc, config) and
+ viableReturnPosOutEx(call, pos, out)
+ )
+ }
+
+ pragma[nomagic]
+ private predicate fwdFlowOutFromArg(DataFlowCall call, NodeEx out, Configuration config) {
+ fwdFlowOut(call, out, true, config)
+ }
+
+ /**
+ * Holds if an argument to `call` is reached in the flow covered by `fwdFlow`.
+ */
+ pragma[nomagic]
+ private predicate fwdFlowIsEntered(DataFlowCall call, Cc cc, Configuration config) {
+ exists(ArgNodeEx arg |
+ fwdFlow(arg, cc, config) and
+ viableParamArgEx(call, _, arg)
+ )
+ }
+
+ /**
+ * Holds if `node` is part of a path from a source to a sink in the
+ * configuration `config`.
+ *
+ * The Boolean `toReturn` records whether the node must be returned from
+ * the enclosing callable in order to reach a sink.
+ */
+ pragma[nomagic]
+ predicate revFlow(NodeEx node, boolean toReturn, Configuration config) {
+ revFlow0(node, toReturn, config) and
+ fwdFlow(node, config)
+ }
+
+ pragma[nomagic]
+ private predicate revFlow0(NodeEx node, boolean toReturn, Configuration config) {
+ fwdFlow(node, config) and
+ sinkNode(node, config) and
+ toReturn = false
+ or
+ exists(NodeEx mid |
+ localFlowStep(node, mid, config) and
+ revFlow(mid, toReturn, config)
+ )
+ or
+ exists(NodeEx mid |
+ additionalLocalFlowStep(node, mid, config) and
+ revFlow(mid, toReturn, config)
+ )
+ or
+ exists(NodeEx mid |
+ jumpStep(node, mid, config) and
+ revFlow(mid, _, config) and
+ toReturn = false
+ )
+ or
+ exists(NodeEx mid |
+ additionalJumpStep(node, mid, config) and
+ revFlow(mid, _, config) and
+ toReturn = false
+ )
+ or
+ // store
+ exists(Content c |
+ revFlowStore(c, node, toReturn, config) and
+ revFlowConsCand(c, config)
+ )
+ or
+ // read
+ exists(NodeEx mid, Content c |
+ read(node, c, mid, config) and
+ fwdFlowConsCand(c, pragma[only_bind_into](config)) and
+ revFlow(mid, toReturn, pragma[only_bind_into](config))
+ )
+ or
+ // flow into a callable
+ exists(DataFlowCall call |
+ revFlowIn(call, node, false, config) and
+ toReturn = false
+ or
+ revFlowInToReturn(call, node, config) and
+ revFlowIsReturned(call, toReturn, config)
+ )
+ or
+ // flow out of a callable
+ exists(ReturnPosition pos |
+ revFlowOut(pos, config) and
+ node.(RetNodeEx).getReturnPosition() = pos and
+ toReturn = true
+ )
+ }
+
+ /**
+ * Holds if `c` is the target of a read in the flow covered by `revFlow`.
+ */
+ pragma[nomagic]
+ private predicate revFlowConsCand(Content c, Configuration config) {
+ exists(NodeEx mid, NodeEx node |
+ fwdFlow(node, pragma[only_bind_into](config)) and
+ read(node, c, mid, config) and
+ fwdFlowConsCand(c, pragma[only_bind_into](config)) and
+ revFlow(pragma[only_bind_into](mid), _, pragma[only_bind_into](config))
+ )
+ }
+
+ pragma[nomagic]
+ private predicate revFlowStore(Content c, NodeEx node, boolean toReturn, Configuration config) {
+ exists(NodeEx mid, TypedContent tc |
+ revFlow(mid, toReturn, pragma[only_bind_into](config)) and
+ fwdFlowConsCand(c, pragma[only_bind_into](config)) and
+ store(node, tc, mid, _, config) and
+ c = tc.getContent()
+ )
+ }
+
+ /**
+ * Holds if `c` is the target of both a read and a store in the flow covered
+ * by `revFlow`.
+ */
+ private predicate revFlowIsReadAndStored(Content c, Configuration conf) {
+ revFlowConsCand(c, conf) and
+ revFlowStore(c, _, _, conf)
+ }
+
+ pragma[nomagic]
+ predicate viableReturnPosOutNodeCandFwd1(
+ DataFlowCall call, ReturnPosition pos, NodeEx out, Configuration config
+ ) {
+ fwdFlowReturnPosition(pos, _, config) and
+ viableReturnPosOutEx(call, pos, out)
+ }
+
+ pragma[nomagic]
+ private predicate revFlowOut(ReturnPosition pos, Configuration config) {
+ exists(DataFlowCall call, NodeEx out |
+ revFlow(out, _, config) and
+ viableReturnPosOutNodeCandFwd1(call, pos, out, config)
+ )
+ }
+
+ pragma[nomagic]
+ predicate viableParamArgNodeCandFwd1(
+ DataFlowCall call, ParamNodeEx p, ArgNodeEx arg, Configuration config
+ ) {
+ viableParamArgEx(call, p, arg) and
+ fwdFlow(arg, config)
+ }
+
+ pragma[nomagic]
+ private predicate revFlowIn(
+ DataFlowCall call, ArgNodeEx arg, boolean toReturn, Configuration config
+ ) {
+ exists(ParamNodeEx p |
+ revFlow(p, toReturn, config) and
+ viableParamArgNodeCandFwd1(call, p, arg, config)
+ )
+ }
+
+ pragma[nomagic]
+ private predicate revFlowInToReturn(DataFlowCall call, ArgNodeEx arg, Configuration config) {
+ revFlowIn(call, arg, true, config)
+ }
+
+ /**
+ * Holds if an output from `call` is reached in the flow covered by `revFlow`
+ * and data might flow through the target callable resulting in reverse flow
+ * reaching an argument of `call`.
+ */
+ pragma[nomagic]
+ private predicate revFlowIsReturned(DataFlowCall call, boolean toReturn, Configuration config) {
+ exists(NodeEx out |
+ revFlow(out, toReturn, config) and
+ fwdFlowOutFromArg(call, out, config)
+ )
+ }
+
+ pragma[nomagic]
+ predicate storeStepCand(
+ NodeEx node1, Ap ap1, TypedContent tc, NodeEx node2, DataFlowType contentType,
+ Configuration config
+ ) {
+ exists(Content c |
+ revFlowIsReadAndStored(c, pragma[only_bind_into](config)) and
+ revFlow(node2, pragma[only_bind_into](config)) and
+ store(node1, tc, node2, contentType, config) and
+ c = tc.getContent() and
+ exists(ap1)
+ )
+ }
+
+ pragma[nomagic]
+ predicate readStepCand(NodeEx n1, Content c, NodeEx n2, Configuration config) {
+ revFlowIsReadAndStored(c, pragma[only_bind_into](config)) and
+ revFlow(n2, pragma[only_bind_into](config)) and
+ read(n1, c, n2, pragma[only_bind_into](config))
+ }
+
+ pragma[nomagic]
+ predicate revFlow(NodeEx node, Configuration config) { revFlow(node, _, config) }
+
+ predicate revFlow(NodeEx node, boolean toReturn, ApOption returnAp, Ap ap, Configuration config) {
+ revFlow(node, toReturn, config) and exists(returnAp) and exists(ap)
+ }
+
+ private predicate throughFlowNodeCand(NodeEx node, Configuration config) {
+ revFlow(node, true, config) and
+ fwdFlow(node, true, config) and
+ not inBarrier(node, config) and
+ not outBarrier(node, config)
+ }
+
+ /** Holds if flow may return from `callable`. */
+ pragma[nomagic]
+ private predicate returnFlowCallableNodeCand(
+ DataFlowCallable callable, ReturnKindExt kind, Configuration config
+ ) {
+ exists(RetNodeEx ret |
+ throughFlowNodeCand(ret, config) and
+ callable = ret.getEnclosingCallable() and
+ kind = ret.getKind()
+ )
+ }
+
+ /**
+ * Holds if flow may enter through `p` and reach a return node making `p` a
+ * candidate for the origin of a summary.
+ */
+ predicate parameterMayFlowThrough(ParamNodeEx p, DataFlowCallable c, Ap ap, Configuration config) {
+ exists(ReturnKindExt kind |
+ throughFlowNodeCand(p, config) and
+ returnFlowCallableNodeCand(c, kind, config) and
+ p.getEnclosingCallable() = c and
+ exists(ap) and
+ // we don't expect a parameter to return stored in itself
+ not kind.(ParamUpdateReturnKind).getPosition() = p.getPosition()
+ )
+ }
+
+ pragma[nomagic]
+ predicate callMayFlowThroughRev(DataFlowCall call, Configuration config) {
+ exists(ArgNodeEx arg, boolean toReturn |
+ revFlow(arg, toReturn, config) and
+ revFlowInToReturn(call, arg, config) and
+ revFlowIsReturned(call, toReturn, config)
+ )
+ }
+
+ predicate stats(boolean fwd, int nodes, int fields, int conscand, int tuples, Configuration config) {
+ fwd = true and
+ nodes = count(NodeEx node | fwdFlow(node, config)) and
+ fields = count(Content f0 | fwdFlowConsCand(f0, config)) and
+ conscand = -1 and
+ tuples = count(NodeEx n, boolean b | fwdFlow(n, b, config))
+ or
+ fwd = false and
+ nodes = count(NodeEx node | revFlow(node, _, config)) and
+ fields = count(Content f0 | revFlowConsCand(f0, config)) and
+ conscand = -1 and
+ tuples = count(NodeEx n, boolean b | revFlow(n, b, config))
+ }
+ /* End: Stage 1 logic. */
+}
+
+pragma[noinline]
+private predicate localFlowStepNodeCand1(NodeEx node1, NodeEx node2, Configuration config) {
+ Stage1::revFlow(node2, config) and
+ localFlowStep(node1, node2, config)
+}
+
+pragma[noinline]
+private predicate additionalLocalFlowStepNodeCand1(NodeEx node1, NodeEx node2, Configuration config) {
+ Stage1::revFlow(node2, config) and
+ additionalLocalFlowStep(node1, node2, config)
+}
+
+pragma[nomagic]
+private predicate viableReturnPosOutNodeCand1(
+ DataFlowCall call, ReturnPosition pos, NodeEx out, Configuration config
+) {
+ Stage1::revFlow(out, config) and
+ Stage1::viableReturnPosOutNodeCandFwd1(call, pos, out, config)
+}
+
+/**
+ * Holds if data can flow out of `call` from `ret` to `out`, either
+ * through a `ReturnNode` or through an argument that has been mutated, and
+ * that this step is part of a path from a source to a sink.
+ */
+pragma[nomagic]
+private predicate flowOutOfCallNodeCand1(
+ DataFlowCall call, RetNodeEx ret, NodeEx out, Configuration config
+) {
+ viableReturnPosOutNodeCand1(call, ret.getReturnPosition(), out, config) and
+ Stage1::revFlow(ret, config) and
+ not outBarrier(ret, config) and
+ not inBarrier(out, config)
+}
+
+pragma[nomagic]
+private predicate viableParamArgNodeCand1(
+ DataFlowCall call, ParamNodeEx p, ArgNodeEx arg, Configuration config
+) {
+ Stage1::viableParamArgNodeCandFwd1(call, p, arg, config) and
+ Stage1::revFlow(arg, config)
+}
+
+/**
+ * Holds if data can flow into `call` and that this step is part of a
+ * path from a source to a sink.
+ */
+pragma[nomagic]
+private predicate flowIntoCallNodeCand1(
+ DataFlowCall call, ArgNodeEx arg, ParamNodeEx p, Configuration config
+) {
+ viableParamArgNodeCand1(call, p, arg, config) and
+ Stage1::revFlow(p, config) and
+ not outBarrier(arg, config) and
+ not inBarrier(p, config)
+}
+
+/**
+ * Gets the amount of forward branching on the origin of a cross-call path
+ * edge in the graph of paths between sources and sinks that ignores call
+ * contexts.
+ */
+private int branch(NodeEx n1, Configuration conf) {
+ result =
+ strictcount(NodeEx n |
+ flowOutOfCallNodeCand1(_, n1, n, conf) or flowIntoCallNodeCand1(_, n1, n, conf)
+ )
+}
+
+/**
+ * Gets the amount of backward branching on the target of a cross-call path
+ * edge in the graph of paths between sources and sinks that ignores call
+ * contexts.
+ */
+private int join(NodeEx n2, Configuration conf) {
+ result =
+ strictcount(NodeEx n |
+ flowOutOfCallNodeCand1(_, n, n2, conf) or flowIntoCallNodeCand1(_, n, n2, conf)
+ )
+}
+
+/**
+ * Holds if data can flow out of `call` from `ret` to `out`, either
+ * through a `ReturnNode` or through an argument that has been mutated, and
+ * that this step is part of a path from a source to a sink. The
+ * `allowsFieldFlow` flag indicates whether the branching is within the limit
+ * specified by the configuration.
+ */
+pragma[nomagic]
+private predicate flowOutOfCallNodeCand1(
+ DataFlowCall call, RetNodeEx ret, NodeEx out, boolean allowsFieldFlow, Configuration config
+) {
+ flowOutOfCallNodeCand1(call, ret, out, config) and
+ exists(int b, int j |
+ b = branch(ret, config) and
+ j = join(out, config) and
+ if b.minimum(j) <= config.fieldFlowBranchLimit()
+ then allowsFieldFlow = true
+ else allowsFieldFlow = false
+ )
+}
+
+/**
+ * Holds if data can flow into `call` and that this step is part of a
+ * path from a source to a sink. The `allowsFieldFlow` flag indicates whether
+ * the branching is within the limit specified by the configuration.
+ */
+pragma[nomagic]
+private predicate flowIntoCallNodeCand1(
+ DataFlowCall call, ArgNodeEx arg, ParamNodeEx p, boolean allowsFieldFlow, Configuration config
+) {
+ flowIntoCallNodeCand1(call, arg, p, config) and
+ exists(int b, int j |
+ b = branch(arg, config) and
+ j = join(p, config) and
+ if b.minimum(j) <= config.fieldFlowBranchLimit()
+ then allowsFieldFlow = true
+ else allowsFieldFlow = false
+ )
+}
+
+private module Stage2 {
+ module PrevStage = Stage1;
+
+ class ApApprox = PrevStage::Ap;
+
+ class Ap = boolean;
+
+ class ApNil extends Ap {
+ ApNil() { this = false }
+ }
+
+ bindingset[result, ap]
+ private ApApprox getApprox(Ap ap) { any() }
+
+ private ApNil getApNil(NodeEx node) { PrevStage::revFlow(node, _) and exists(result) }
+
+ bindingset[tc, tail]
+ private Ap apCons(TypedContent tc, Ap tail) { result = true and exists(tc) and exists(tail) }
+
+ pragma[inline]
+ private Content getHeadContent(Ap ap) { exists(result) and ap = true }
+
+ class ApOption = BooleanOption;
+
+ ApOption apNone() { result = TBooleanNone() }
+
+ ApOption apSome(Ap ap) { result = TBooleanSome(ap) }
+
+ class Cc = CallContext;
+
+ class CcCall = CallContextCall;
+
+ class CcNoCall = CallContextNoCall;
+
+ Cc ccNone() { result instanceof CallContextAny }
+
+ private class LocalCc = Unit;
+
+ bindingset[call, c, outercc]
+ private CcCall getCallContextCall(DataFlowCall call, DataFlowCallable c, Cc outercc) {
+ checkCallContextCall(outercc, call, c) and
+ if recordDataFlowCallSiteDispatch(call, c)
+ then result = TSpecificCall(call)
+ else result = TSomeCall()
+ }
+
+ bindingset[call, c, innercc]
+ private CcNoCall getCallContextReturn(DataFlowCallable c, DataFlowCall call, Cc innercc) {
+ checkCallContextReturn(innercc, c, call) and
+ if reducedViableImplInReturn(c, call) then result = TReturn(c, call) else result = ccNone()
+ }
+
+ bindingset[node, cc, config]
+ private LocalCc getLocalCc(NodeEx node, Cc cc, Configuration config) { any() }
+
+ private predicate localStep(
+ NodeEx node1, NodeEx node2, boolean preservesValue, ApNil ap, Configuration config, LocalCc lcc
+ ) {
+ (
+ preservesValue = true and
+ localFlowStepNodeCand1(node1, node2, config)
+ or
+ preservesValue = false and
+ additionalLocalFlowStepNodeCand1(node1, node2, config)
+ ) and
+ exists(ap) and
+ exists(lcc)
+ }
+
+ private predicate flowOutOfCall = flowOutOfCallNodeCand1/5;
+
+ private predicate flowIntoCall = flowIntoCallNodeCand1/5;
+
+ bindingset[ap, contentType]
+ private predicate typecheckStore(Ap ap, DataFlowType contentType) { any() }
+
+ /* Begin: Stage 2 logic. */
+ private predicate flowCand(NodeEx node, ApApprox apa, Configuration config) {
+ PrevStage::revFlow(node, _, _, apa, config)
+ }
+
+ pragma[nomagic]
+ private predicate flowThroughOutOfCall(
+ DataFlowCall call, RetNodeEx ret, NodeEx out, boolean allowsFieldFlow, Configuration config
+ ) {
+ flowOutOfCall(call, ret, out, allowsFieldFlow, pragma[only_bind_into](config)) and
+ PrevStage::callMayFlowThroughRev(call, pragma[only_bind_into](config)) and
+ PrevStage::parameterMayFlowThrough(_, ret.getEnclosingCallable(), _,
+ pragma[only_bind_into](config))
+ }
+
+ /**
+ * Holds if `node` is reachable with access path `ap` from a source in the
+ * configuration `config`.
+ *
+ * The call context `cc` records whether the node is reached through an
+ * argument in a call, and if so, `argAp` records the access path of that
+ * argument.
+ */
+ pragma[nomagic]
+ predicate fwdFlow(NodeEx node, Cc cc, ApOption argAp, Ap ap, Configuration config) {
+ flowCand(node, _, config) and
+ sourceNode(node, config) and
+ cc = ccNone() and
+ argAp = apNone() and
+ ap = getApNil(node)
+ or
+ exists(NodeEx mid, Ap ap0, LocalCc localCc |
+ fwdFlow(mid, cc, argAp, ap0, config) and
+ localCc = getLocalCc(mid, cc, config)
+ |
+ localStep(mid, node, true, _, config, localCc) and
+ ap = ap0
+ or
+ localStep(mid, node, false, ap, config, localCc) and
+ ap0 instanceof ApNil
+ )
+ or
+ exists(NodeEx mid |
+ fwdFlow(mid, _, _, ap, pragma[only_bind_into](config)) and
+ flowCand(node, _, pragma[only_bind_into](config)) and
+ jumpStep(mid, node, config) and
+ cc = ccNone() and
+ argAp = apNone()
+ )
+ or
+ exists(NodeEx mid, ApNil nil |
+ fwdFlow(mid, _, _, nil, pragma[only_bind_into](config)) and
+ flowCand(node, _, pragma[only_bind_into](config)) and
+ additionalJumpStep(mid, node, config) and
+ cc = ccNone() and
+ argAp = apNone() and
+ ap = getApNil(node)
+ )
+ or
+ // store
+ exists(TypedContent tc, Ap ap0 |
+ fwdFlowStore(_, ap0, tc, node, cc, argAp, config) and
+ ap = apCons(tc, ap0)
+ )
+ or
+ // read
+ exists(Ap ap0, Content c |
+ fwdFlowRead(ap0, c, _, node, cc, argAp, config) and
+ fwdFlowConsCand(ap0, c, ap, config)
+ )
+ or
+ // flow into a callable
+ exists(ApApprox apa |
+ fwdFlowIn(_, node, _, cc, _, ap, config) and
+ apa = getApprox(ap) and
+ if PrevStage::parameterMayFlowThrough(node, _, apa, config)
+ then argAp = apSome(ap)
+ else argAp = apNone()
+ )
+ or
+ // flow out of a callable
+ fwdFlowOutNotFromArg(node, cc, argAp, ap, config)
+ or
+ exists(DataFlowCall call, Ap argAp0 |
+ fwdFlowOutFromArg(call, node, argAp0, ap, config) and
+ fwdFlowIsEntered(call, cc, argAp, argAp0, config)
+ )
+ }
+
+ pragma[nomagic]
+ private predicate fwdFlowStore(
+ NodeEx node1, Ap ap1, TypedContent tc, NodeEx node2, Cc cc, ApOption argAp, Configuration config
+ ) {
+ exists(DataFlowType contentType |
+ fwdFlow(node1, cc, argAp, ap1, config) and
+ PrevStage::storeStepCand(node1, getApprox(ap1), tc, node2, contentType, config) and
+ typecheckStore(ap1, contentType)
+ )
+ }
+
+ /**
+ * Holds if forward flow with access path `tail` reaches a store of `c`
+ * resulting in access path `cons`.
+ */
+ pragma[nomagic]
+ private predicate fwdFlowConsCand(Ap cons, Content c, Ap tail, Configuration config) {
+ exists(TypedContent tc |
+ fwdFlowStore(_, tail, tc, _, _, _, config) and
+ tc.getContent() = c and
+ cons = apCons(tc, tail)
+ )
+ }
+
+ pragma[nomagic]
+ private predicate fwdFlowRead(
+ Ap ap, Content c, NodeEx node1, NodeEx node2, Cc cc, ApOption argAp, Configuration config
+ ) {
+ fwdFlow(node1, cc, argAp, ap, config) and
+ PrevStage::readStepCand(node1, c, node2, config) and
+ getHeadContent(ap) = c
+ }
+
+ pragma[nomagic]
+ private predicate fwdFlowIn(
+ DataFlowCall call, ParamNodeEx p, Cc outercc, Cc innercc, ApOption argAp, Ap ap,
+ Configuration config
+ ) {
+ exists(ArgNodeEx arg, boolean allowsFieldFlow |
+ fwdFlow(arg, outercc, argAp, ap, config) and
+ flowIntoCall(call, arg, p, allowsFieldFlow, config) and
+ innercc = getCallContextCall(call, p.getEnclosingCallable(), outercc)
+ |
+ ap instanceof ApNil or allowsFieldFlow = true
+ )
+ }
+
+ pragma[nomagic]
+ private predicate fwdFlowOutNotFromArg(
+ NodeEx out, Cc ccOut, ApOption argAp, Ap ap, Configuration config
+ ) {
+ exists(
+ DataFlowCall call, RetNodeEx ret, boolean allowsFieldFlow, CcNoCall innercc,
+ DataFlowCallable inner
+ |
+ fwdFlow(ret, innercc, argAp, ap, config) and
+ flowOutOfCall(call, ret, out, allowsFieldFlow, config) and
+ inner = ret.getEnclosingCallable() and
+ ccOut = getCallContextReturn(inner, call, innercc)
+ |
+ ap instanceof ApNil or allowsFieldFlow = true
+ )
+ }
+
+ pragma[nomagic]
+ private predicate fwdFlowOutFromArg(
+ DataFlowCall call, NodeEx out, Ap argAp, Ap ap, Configuration config
+ ) {
+ exists(RetNodeEx ret, boolean allowsFieldFlow, CcCall ccc |
+ fwdFlow(ret, ccc, apSome(argAp), ap, config) and
+ flowThroughOutOfCall(call, ret, out, allowsFieldFlow, config) and
+ ccc.matchesCall(call)
+ |
+ ap instanceof ApNil or allowsFieldFlow = true
+ )
+ }
+
+ /**
+ * Holds if an argument to `call` is reached in the flow covered by `fwdFlow`
+ * and data might flow through the target callable and back out at `call`.
+ */
+ pragma[nomagic]
+ private predicate fwdFlowIsEntered(
+ DataFlowCall call, Cc cc, ApOption argAp, Ap ap, Configuration config
+ ) {
+ exists(ParamNodeEx p |
+ fwdFlowIn(call, p, cc, _, argAp, ap, config) and
+ PrevStage::parameterMayFlowThrough(p, _, getApprox(ap), config)
+ )
+ }
+
+ pragma[nomagic]
+ private predicate storeStepFwd(
+ NodeEx node1, Ap ap1, TypedContent tc, NodeEx node2, Ap ap2, Configuration config
+ ) {
+ fwdFlowStore(node1, ap1, tc, node2, _, _, config) and
+ ap2 = apCons(tc, ap1) and
+ fwdFlowRead(ap2, tc.getContent(), _, _, _, _, config)
+ }
+
+ private predicate readStepFwd(
+ NodeEx n1, Ap ap1, Content c, NodeEx n2, Ap ap2, Configuration config
+ ) {
+ fwdFlowRead(ap1, c, n1, n2, _, _, config) and
+ fwdFlowConsCand(ap1, c, ap2, config)
+ }
+
+ pragma[nomagic]
+ private predicate callMayFlowThroughFwd(DataFlowCall call, Configuration config) {
+ exists(Ap argAp0, NodeEx out, Cc cc, ApOption argAp, Ap ap |
+ fwdFlow(out, pragma[only_bind_into](cc), pragma[only_bind_into](argAp), ap,
+ pragma[only_bind_into](config)) and
+ fwdFlowOutFromArg(call, out, argAp0, ap, config) and
+ fwdFlowIsEntered(pragma[only_bind_into](call), pragma[only_bind_into](cc),
+ pragma[only_bind_into](argAp), pragma[only_bind_into](argAp0),
+ pragma[only_bind_into](config))
+ )
+ }
+
+ pragma[nomagic]
+ private predicate flowThroughIntoCall(
+ DataFlowCall call, ArgNodeEx arg, ParamNodeEx p, boolean allowsFieldFlow, Configuration config
+ ) {
+ flowIntoCall(call, arg, p, allowsFieldFlow, config) and
+ fwdFlow(arg, _, _, _, pragma[only_bind_into](config)) and
+ PrevStage::parameterMayFlowThrough(p, _, _, pragma[only_bind_into](config)) and
+ callMayFlowThroughFwd(call, pragma[only_bind_into](config))
+ }
+
+ /**
+ * Holds if `node` with access path `ap` is part of a path from a source to a
+ * sink in the configuration `config`.
+ *
+ * The Boolean `toReturn` records whether the node must be returned from the
+ * enclosing callable in order to reach a sink, and if so, `returnAp` records
+ * the access path of the returned value.
+ */
+ pragma[nomagic]
+ predicate revFlow(NodeEx node, boolean toReturn, ApOption returnAp, Ap ap, Configuration config) {
+ revFlow0(node, toReturn, returnAp, ap, config) and
+ fwdFlow(node, _, _, ap, config)
+ }
+
+ pragma[nomagic]
+ private predicate revFlow0(
+ NodeEx node, boolean toReturn, ApOption returnAp, Ap ap, Configuration config
+ ) {
+ fwdFlow(node, _, _, ap, config) and
+ sinkNode(node, config) and
+ toReturn = false and
+ returnAp = apNone() and
+ ap instanceof ApNil
+ or
+ exists(NodeEx mid |
+ localStep(node, mid, true, _, config, _) and
+ revFlow(mid, toReturn, returnAp, ap, config)
+ )
+ or
+ exists(NodeEx mid, ApNil nil |
+ fwdFlow(node, _, _, ap, pragma[only_bind_into](config)) and
+ localStep(node, mid, false, _, config, _) and
+ revFlow(mid, toReturn, returnAp, nil, pragma[only_bind_into](config)) and
+ ap instanceof ApNil
+ )
+ or
+ exists(NodeEx mid |
+ jumpStep(node, mid, config) and
+ revFlow(mid, _, _, ap, config) and
+ toReturn = false and
+ returnAp = apNone()
+ )
+ or
+ exists(NodeEx mid, ApNil nil |
+ fwdFlow(node, _, _, ap, pragma[only_bind_into](config)) and
+ additionalJumpStep(node, mid, config) and
+ revFlow(pragma[only_bind_into](mid), _, _, nil, pragma[only_bind_into](config)) and
+ toReturn = false and
+ returnAp = apNone() and
+ ap instanceof ApNil
+ )
+ or
+ // store
+ exists(Ap ap0, Content c |
+ revFlowStore(ap0, c, ap, node, _, _, toReturn, returnAp, config) and
+ revFlowConsCand(ap0, c, ap, config)
+ )
+ or
+ // read
+ exists(NodeEx mid, Ap ap0 |
+ revFlow(mid, toReturn, returnAp, ap0, config) and
+ readStepFwd(node, ap, _, mid, ap0, config)
+ )
+ or
+ // flow into a callable
+ revFlowInNotToReturn(node, returnAp, ap, config) and
+ toReturn = false
+ or
+ exists(DataFlowCall call, Ap returnAp0 |
+ revFlowInToReturn(call, node, returnAp0, ap, config) and
+ revFlowIsReturned(call, toReturn, returnAp, returnAp0, config)
+ )
+ or
+ // flow out of a callable
+ revFlowOut(_, node, _, _, ap, config) and
+ toReturn = true and
+ if fwdFlow(node, any(CcCall ccc), apSome(_), ap, config)
+ then returnAp = apSome(ap)
+ else returnAp = apNone()
+ }
+
+ pragma[nomagic]
+ private predicate revFlowStore(
+ Ap ap0, Content c, Ap ap, NodeEx node, TypedContent tc, NodeEx mid, boolean toReturn,
+ ApOption returnAp, Configuration config
+ ) {
+ revFlow(mid, toReturn, returnAp, ap0, config) and
+ storeStepFwd(node, ap, tc, mid, ap0, config) and
+ tc.getContent() = c
+ }
+
+ /**
+ * Holds if reverse flow with access path `tail` reaches a read of `c`
+ * resulting in access path `cons`.
+ */
+ pragma[nomagic]
+ private predicate revFlowConsCand(Ap cons, Content c, Ap tail, Configuration config) {
+ exists(NodeEx mid, Ap tail0 |
+ revFlow(mid, _, _, tail, config) and
+ tail = pragma[only_bind_into](tail0) and
+ readStepFwd(_, cons, c, mid, tail0, config)
+ )
+ }
+
+ pragma[nomagic]
+ private predicate revFlowOut(
+ DataFlowCall call, RetNodeEx ret, boolean toReturn, ApOption returnAp, Ap ap,
+ Configuration config
+ ) {
+ exists(NodeEx out, boolean allowsFieldFlow |
+ revFlow(out, toReturn, returnAp, ap, config) and
+ flowOutOfCall(call, ret, out, allowsFieldFlow, config)
+ |
+ ap instanceof ApNil or allowsFieldFlow = true
+ )
+ }
+
+ pragma[nomagic]
+ private predicate revFlowInNotToReturn(
+ ArgNodeEx arg, ApOption returnAp, Ap ap, Configuration config
+ ) {
+ exists(ParamNodeEx p, boolean allowsFieldFlow |
+ revFlow(p, false, returnAp, ap, config) and
+ flowIntoCall(_, arg, p, allowsFieldFlow, config)
+ |
+ ap instanceof ApNil or allowsFieldFlow = true
+ )
+ }
+
+ pragma[nomagic]
+ private predicate revFlowInToReturn(
+ DataFlowCall call, ArgNodeEx arg, Ap returnAp, Ap ap, Configuration config
+ ) {
+ exists(ParamNodeEx p, boolean allowsFieldFlow |
+ revFlow(p, true, apSome(returnAp), ap, config) and
+ flowThroughIntoCall(call, arg, p, allowsFieldFlow, config)
+ |
+ ap instanceof ApNil or allowsFieldFlow = true
+ )
+ }
+
+ /**
+ * Holds if an output from `call` is reached in the flow covered by `revFlow`
+ * and data might flow through the target callable resulting in reverse flow
+ * reaching an argument of `call`.
+ */
+ pragma[nomagic]
+ private predicate revFlowIsReturned(
+ DataFlowCall call, boolean toReturn, ApOption returnAp, Ap ap, Configuration config
+ ) {
+ exists(RetNodeEx ret, CcCall ccc |
+ revFlowOut(call, ret, toReturn, returnAp, ap, config) and
+ fwdFlow(ret, ccc, apSome(_), ap, config) and
+ ccc.matchesCall(call)
+ )
+ }
+
+ pragma[nomagic]
+ predicate storeStepCand(
+ NodeEx node1, Ap ap1, TypedContent tc, NodeEx node2, DataFlowType contentType,
+ Configuration config
+ ) {
+ exists(Ap ap2, Content c |
+ store(node1, tc, node2, contentType, config) and
+ revFlowStore(ap2, c, ap1, node1, tc, node2, _, _, config) and
+ revFlowConsCand(ap2, c, ap1, config)
+ )
+ }
+
+ predicate readStepCand(NodeEx node1, Content c, NodeEx node2, Configuration config) {
+ exists(Ap ap1, Ap ap2 |
+ revFlow(node2, _, _, pragma[only_bind_into](ap2), pragma[only_bind_into](config)) and
+ readStepFwd(node1, ap1, c, node2, ap2, config) and
+ revFlowStore(ap1, c, pragma[only_bind_into](ap2), _, _, _, _, _,
+ pragma[only_bind_into](config))
+ )
+ }
+
+ predicate revFlow(NodeEx node, Configuration config) { revFlow(node, _, _, _, config) }
+
+ private predicate fwdConsCand(TypedContent tc, Ap ap, Configuration config) {
+ storeStepFwd(_, ap, tc, _, _, config)
+ }
+
+ predicate consCand(TypedContent tc, Ap ap, Configuration config) {
+ storeStepCand(_, ap, tc, _, _, config)
+ }
+
+ pragma[noinline]
+ private predicate parameterFlow(
+ ParamNodeEx p, Ap ap, Ap ap0, DataFlowCallable c, Configuration config
+ ) {
+ revFlow(p, true, apSome(ap0), ap, config) and
+ c = p.getEnclosingCallable()
+ }
+
+ predicate parameterMayFlowThrough(ParamNodeEx p, DataFlowCallable c, Ap ap, Configuration config) {
+ exists(RetNodeEx ret, Ap ap0, ReturnKindExt kind, int pos |
+ parameterFlow(p, ap, ap0, c, config) and
+ c = ret.getEnclosingCallable() and
+ revFlow(pragma[only_bind_into](ret), true, apSome(_), pragma[only_bind_into](ap0),
+ pragma[only_bind_into](config)) and
+ fwdFlow(ret, any(CcCall ccc), apSome(ap), ap0, config) and
+ kind = ret.getKind() and
+ p.getPosition() = pos and
+ // we don't expect a parameter to return stored in itself
+ not kind.(ParamUpdateReturnKind).getPosition() = pos
+ )
+ }
+
+ pragma[nomagic]
+ predicate callMayFlowThroughRev(DataFlowCall call, Configuration config) {
+ exists(Ap returnAp0, ArgNodeEx arg, boolean toReturn, ApOption returnAp, Ap ap |
+ revFlow(arg, toReturn, returnAp, ap, config) and
+ revFlowInToReturn(call, arg, returnAp0, ap, config) and
+ revFlowIsReturned(call, toReturn, returnAp, returnAp0, config)
+ )
+ }
+
+ predicate stats(boolean fwd, int nodes, int fields, int conscand, int tuples, Configuration config) {
+ fwd = true and
+ nodes = count(NodeEx node | fwdFlow(node, _, _, _, config)) and
+ fields = count(TypedContent f0 | fwdConsCand(f0, _, config)) and
+ conscand = count(TypedContent f0, Ap ap | fwdConsCand(f0, ap, config)) and
+ tuples = count(NodeEx n, Cc cc, ApOption argAp, Ap ap | fwdFlow(n, cc, argAp, ap, config))
+ or
+ fwd = false and
+ nodes = count(NodeEx node | revFlow(node, _, _, _, config)) and
+ fields = count(TypedContent f0 | consCand(f0, _, config)) and
+ conscand = count(TypedContent f0, Ap ap | consCand(f0, ap, config)) and
+ tuples = count(NodeEx n, boolean b, ApOption retAp, Ap ap | revFlow(n, b, retAp, ap, config))
+ }
+ /* End: Stage 2 logic. */
+}
+
+pragma[nomagic]
+private predicate flowOutOfCallNodeCand2(
+ DataFlowCall call, RetNodeEx node1, NodeEx node2, boolean allowsFieldFlow, Configuration config
+) {
+ flowOutOfCallNodeCand1(call, node1, node2, allowsFieldFlow, config) and
+ Stage2::revFlow(node2, pragma[only_bind_into](config)) and
+ Stage2::revFlow(node1, pragma[only_bind_into](config))
+}
+
+pragma[nomagic]
+private predicate flowIntoCallNodeCand2(
+ DataFlowCall call, ArgNodeEx node1, ParamNodeEx node2, boolean allowsFieldFlow,
+ Configuration config
+) {
+ flowIntoCallNodeCand1(call, node1, node2, allowsFieldFlow, config) and
+ Stage2::revFlow(node2, pragma[only_bind_into](config)) and
+ Stage2::revFlow(node1, pragma[only_bind_into](config))
+}
+
+private module LocalFlowBigStep {
+ /**
+ * A node where some checking is required, and hence the big-step relation
+ * is not allowed to step over.
+ */
+ private class FlowCheckNode extends NodeEx {
+ FlowCheckNode() {
+ castNode(this.asNode()) or
+ clearsContentCached(this.asNode(), _)
+ }
+ }
+
+ /**
+ * Holds if `node` can be the first node in a maximal subsequence of local
+ * flow steps in a dataflow path.
+ */
+ predicate localFlowEntry(NodeEx node, Configuration config) {
+ Stage2::revFlow(node, config) and
+ (
+ sourceNode(node, config) or
+ jumpStep(_, node, config) or
+ additionalJumpStep(_, node, config) or
+ node instanceof ParamNodeEx or
+ node.asNode() instanceof OutNodeExt or
+ store(_, _, node, _, config) or
+ read(_, _, node, config) or
+ node instanceof FlowCheckNode
+ )
+ }
+
+ /**
+ * Holds if `node` can be the last node in a maximal subsequence of local
+ * flow steps in a dataflow path.
+ */
+ private predicate localFlowExit(NodeEx node, Configuration config) {
+ exists(NodeEx next | Stage2::revFlow(next, config) |
+ jumpStep(node, next, config) or
+ additionalJumpStep(node, next, config) or
+ flowIntoCallNodeCand1(_, node, next, config) or
+ flowOutOfCallNodeCand1(_, node, next, config) or
+ store(node, _, next, _, config) or
+ read(node, _, next, config)
+ )
+ or
+ node instanceof FlowCheckNode
+ or
+ sinkNode(node, config)
+ }
+
+ pragma[noinline]
+ private predicate additionalLocalFlowStepNodeCand2(
+ NodeEx node1, NodeEx node2, Configuration config
+ ) {
+ additionalLocalFlowStepNodeCand1(node1, node2, config) and
+ Stage2::revFlow(node1, _, _, false, pragma[only_bind_into](config)) and
+ Stage2::revFlow(node2, _, _, false, pragma[only_bind_into](config))
+ }
+
+ /**
+ * Holds if the local path from `node1` to `node2` is a prefix of a maximal
+ * subsequence of local flow steps in a dataflow path.
+ *
+ * This is the transitive closure of `[additional]localFlowStep` beginning
+ * at `localFlowEntry`.
+ */
+ pragma[nomagic]
+ private predicate localFlowStepPlus(
+ NodeEx node1, NodeEx node2, boolean preservesValue, DataFlowType t, Configuration config,
+ LocalCallContext cc
+ ) {
+ not isUnreachableInCallCached(node2.asNode(), cc.(LocalCallContextSpecificCall).getCall()) and
+ (
+ localFlowEntry(node1, pragma[only_bind_into](config)) and
+ (
+ localFlowStepNodeCand1(node1, node2, config) and
+ preservesValue = true and
+ t = node1.getDataFlowType() // irrelevant dummy value
+ or
+ additionalLocalFlowStepNodeCand2(node1, node2, config) and
+ preservesValue = false and
+ t = node2.getDataFlowType()
+ ) and
+ node1 != node2 and
+ cc.relevantFor(node1.getEnclosingCallable()) and
+ not isUnreachableInCallCached(node1.asNode(), cc.(LocalCallContextSpecificCall).getCall()) and
+ Stage2::revFlow(node2, pragma[only_bind_into](config))
+ or
+ exists(NodeEx mid |
+ localFlowStepPlus(node1, mid, preservesValue, t, pragma[only_bind_into](config), cc) and
+ localFlowStepNodeCand1(mid, node2, config) and
+ not mid instanceof FlowCheckNode and
+ Stage2::revFlow(node2, pragma[only_bind_into](config))
+ )
+ or
+ exists(NodeEx mid |
+ localFlowStepPlus(node1, mid, _, _, pragma[only_bind_into](config), cc) and
+ additionalLocalFlowStepNodeCand2(mid, node2, config) and
+ not mid instanceof FlowCheckNode and
+ preservesValue = false and
+ t = node2.getDataFlowType() and
+ Stage2::revFlow(node2, pragma[only_bind_into](config))
+ )
+ )
+ }
+
+ /**
+ * Holds if `node1` can step to `node2` in one or more local steps and this
+ * path can occur as a maximal subsequence of local steps in a dataflow path.
+ */
+ pragma[nomagic]
+ predicate localFlowBigStep(
+ NodeEx node1, NodeEx node2, boolean preservesValue, AccessPathFrontNil apf,
+ Configuration config, LocalCallContext callContext
+ ) {
+ localFlowStepPlus(node1, node2, preservesValue, apf.getType(), config, callContext) and
+ localFlowExit(node2, config)
+ }
+}
+
+private import LocalFlowBigStep
+
+private module Stage3 {
+ module PrevStage = Stage2;
+
+ class ApApprox = PrevStage::Ap;
+
+ class Ap = AccessPathFront;
+
+ class ApNil = AccessPathFrontNil;
+
+ private ApApprox getApprox(Ap ap) { result = ap.toBoolNonEmpty() }
+
+ private ApNil getApNil(NodeEx node) {
+ PrevStage::revFlow(node, _) and result = TFrontNil(node.getDataFlowType())
+ }
+
+ bindingset[tc, tail]
+ private Ap apCons(TypedContent tc, Ap tail) { result.getHead() = tc and exists(tail) }
+
+ pragma[noinline]
+ private Content getHeadContent(Ap ap) { result = ap.getHead().getContent() }
+
+ class ApOption = AccessPathFrontOption;
+
+ ApOption apNone() { result = TAccessPathFrontNone() }
+
+ ApOption apSome(Ap ap) { result = TAccessPathFrontSome(ap) }
+
+ class Cc = boolean;
+
+ class CcCall extends Cc {
+ CcCall() { this = true }
+
+ /** Holds if this call context may be `call`. */
+ predicate matchesCall(DataFlowCall call) { any() }
+ }
+
+ class CcNoCall extends Cc {
+ CcNoCall() { this = false }
+ }
+
+ Cc ccNone() { result = false }
+
+ private class LocalCc = Unit;
+
+ bindingset[call, c, outercc]
+ private CcCall getCallContextCall(DataFlowCall call, DataFlowCallable c, Cc outercc) { any() }
+
+ bindingset[call, c, innercc]
+ private CcNoCall getCallContextReturn(DataFlowCallable c, DataFlowCall call, Cc innercc) { any() }
+
+ bindingset[node, cc, config]
+ private LocalCc getLocalCc(NodeEx node, Cc cc, Configuration config) { any() }
+
+ private predicate localStep(
+ NodeEx node1, NodeEx node2, boolean preservesValue, ApNil ap, Configuration config, LocalCc lcc
+ ) {
+ localFlowBigStep(node1, node2, preservesValue, ap, config, _) and exists(lcc)
+ }
+
+ private predicate flowOutOfCall = flowOutOfCallNodeCand2/5;
+
+ private predicate flowIntoCall = flowIntoCallNodeCand2/5;
+
+ pragma[nomagic]
+ private predicate clear(NodeEx node, Ap ap) { ap.isClearedAt(node.asNode()) }
+
+ pragma[nomagic]
+ private predicate castingNodeEx(NodeEx node) { node.asNode() instanceof CastingNode }
+
+ bindingset[node, ap]
+ private predicate filter(NodeEx node, Ap ap) {
+ not clear(node, ap) and
+ if castingNodeEx(node) then compatibleTypes(node.getDataFlowType(), ap.getType()) else any()
+ }
+
+ bindingset[ap, contentType]
+ private predicate typecheckStore(Ap ap, DataFlowType contentType) {
+ // We need to typecheck stores here, since reverse flow through a getter
+ // might have a different type here compared to inside the getter.
+ compatibleTypes(ap.getType(), contentType)
+ }
+
+ /* Begin: Stage 3 logic. */
+ private predicate flowCand(NodeEx node, ApApprox apa, Configuration config) {
+ PrevStage::revFlow(node, _, _, apa, config)
+ }
+
+ bindingset[result, apa]
+ private ApApprox unbindApa(ApApprox apa) {
+ exists(ApApprox apa0 |
+ apa = pragma[only_bind_into](apa0) and result = pragma[only_bind_into](apa0)
+ )
+ }
+
+ pragma[nomagic]
+ private predicate flowThroughOutOfCall(
+ DataFlowCall call, RetNodeEx ret, NodeEx out, boolean allowsFieldFlow, Configuration config
+ ) {
+ flowOutOfCall(call, ret, out, allowsFieldFlow, pragma[only_bind_into](config)) and
+ PrevStage::callMayFlowThroughRev(call, pragma[only_bind_into](config)) and
+ PrevStage::parameterMayFlowThrough(_, ret.getEnclosingCallable(), _,
+ pragma[only_bind_into](config))
+ }
+
+ /**
+ * Holds if `node` is reachable with access path `ap` from a source in the
+ * configuration `config`.
+ *
+ * The call context `cc` records whether the node is reached through an
+ * argument in a call, and if so, `argAp` records the access path of that
+ * argument.
+ */
+ pragma[nomagic]
+ predicate fwdFlow(NodeEx node, Cc cc, ApOption argAp, Ap ap, Configuration config) {
+ fwdFlow0(node, cc, argAp, ap, config) and
+ flowCand(node, unbindApa(getApprox(ap)), config) and
+ filter(node, ap)
+ }
+
+ pragma[nomagic]
+ private predicate fwdFlow0(NodeEx node, Cc cc, ApOption argAp, Ap ap, Configuration config) {
+ flowCand(node, _, config) and
+ sourceNode(node, config) and
+ cc = ccNone() and
+ argAp = apNone() and
+ ap = getApNil(node)
+ or
+ exists(NodeEx mid, Ap ap0, LocalCc localCc |
+ fwdFlow(mid, cc, argAp, ap0, config) and
+ localCc = getLocalCc(mid, cc, config)
+ |
+ localStep(mid, node, true, _, config, localCc) and
+ ap = ap0
+ or
+ localStep(mid, node, false, ap, config, localCc) and
+ ap0 instanceof ApNil
+ )
+ or
+ exists(NodeEx mid |
+ fwdFlow(mid, _, _, ap, pragma[only_bind_into](config)) and
+ flowCand(node, _, pragma[only_bind_into](config)) and
+ jumpStep(mid, node, config) and
+ cc = ccNone() and
+ argAp = apNone()
+ )
+ or
+ exists(NodeEx mid, ApNil nil |
+ fwdFlow(mid, _, _, nil, pragma[only_bind_into](config)) and
+ flowCand(node, _, pragma[only_bind_into](config)) and
+ additionalJumpStep(mid, node, config) and
+ cc = ccNone() and
+ argAp = apNone() and
+ ap = getApNil(node)
+ )
+ or
+ // store
+ exists(TypedContent tc, Ap ap0 |
+ fwdFlowStore(_, ap0, tc, node, cc, argAp, config) and
+ ap = apCons(tc, ap0)
+ )
+ or
+ // read
+ exists(Ap ap0, Content c |
+ fwdFlowRead(ap0, c, _, node, cc, argAp, config) and
+ fwdFlowConsCand(ap0, c, ap, config)
+ )
+ or
+ // flow into a callable
+ exists(ApApprox apa |
+ fwdFlowIn(_, node, _, cc, _, ap, config) and
+ apa = getApprox(ap) and
+ if PrevStage::parameterMayFlowThrough(node, _, apa, config)
+ then argAp = apSome(ap)
+ else argAp = apNone()
+ )
+ or
+ // flow out of a callable
+ fwdFlowOutNotFromArg(node, cc, argAp, ap, config)
+ or
+ exists(DataFlowCall call, Ap argAp0 |
+ fwdFlowOutFromArg(call, node, argAp0, ap, config) and
+ fwdFlowIsEntered(call, cc, argAp, argAp0, config)
+ )
+ }
+
+ pragma[nomagic]
+ private predicate fwdFlowStore(
+ NodeEx node1, Ap ap1, TypedContent tc, NodeEx node2, Cc cc, ApOption argAp, Configuration config
+ ) {
+ exists(DataFlowType contentType |
+ fwdFlow(node1, cc, argAp, ap1, config) and
+ PrevStage::storeStepCand(node1, unbindApa(getApprox(ap1)), tc, node2, contentType, config) and
+ typecheckStore(ap1, contentType)
+ )
+ }
+
+ /**
+ * Holds if forward flow with access path `tail` reaches a store of `c`
+ * resulting in access path `cons`.
+ */
+ pragma[nomagic]
+ private predicate fwdFlowConsCand(Ap cons, Content c, Ap tail, Configuration config) {
+ exists(TypedContent tc |
+ fwdFlowStore(_, tail, tc, _, _, _, config) and
+ tc.getContent() = c and
+ cons = apCons(tc, tail)
+ )
+ }
+
+ pragma[nomagic]
+ private predicate fwdFlowRead(
+ Ap ap, Content c, NodeEx node1, NodeEx node2, Cc cc, ApOption argAp, Configuration config
+ ) {
+ fwdFlow(node1, cc, argAp, ap, config) and
+ PrevStage::readStepCand(node1, c, node2, config) and
+ getHeadContent(ap) = c
+ }
+
+ pragma[nomagic]
+ private predicate fwdFlowIn(
+ DataFlowCall call, ParamNodeEx p, Cc outercc, Cc innercc, ApOption argAp, Ap ap,
+ Configuration config
+ ) {
+ exists(ArgNodeEx arg, boolean allowsFieldFlow |
+ fwdFlow(arg, outercc, argAp, ap, config) and
+ flowIntoCall(call, arg, p, allowsFieldFlow, config) and
+ innercc = getCallContextCall(call, p.getEnclosingCallable(), outercc)
+ |
+ ap instanceof ApNil or allowsFieldFlow = true
+ )
+ }
+
+ pragma[nomagic]
+ private predicate fwdFlowOutNotFromArg(
+ NodeEx out, Cc ccOut, ApOption argAp, Ap ap, Configuration config
+ ) {
+ exists(
+ DataFlowCall call, RetNodeEx ret, boolean allowsFieldFlow, CcNoCall innercc,
+ DataFlowCallable inner
+ |
+ fwdFlow(ret, innercc, argAp, ap, config) and
+ flowOutOfCall(call, ret, out, allowsFieldFlow, config) and
+ inner = ret.getEnclosingCallable() and
+ ccOut = getCallContextReturn(inner, call, innercc)
+ |
+ ap instanceof ApNil or allowsFieldFlow = true
+ )
+ }
+
+ pragma[nomagic]
+ private predicate fwdFlowOutFromArg(
+ DataFlowCall call, NodeEx out, Ap argAp, Ap ap, Configuration config
+ ) {
+ exists(RetNodeEx ret, boolean allowsFieldFlow, CcCall ccc |
+ fwdFlow(ret, ccc, apSome(argAp), ap, config) and
+ flowThroughOutOfCall(call, ret, out, allowsFieldFlow, config) and
+ ccc.matchesCall(call)
+ |
+ ap instanceof ApNil or allowsFieldFlow = true
+ )
+ }
+
+ /**
+ * Holds if an argument to `call` is reached in the flow covered by `fwdFlow`
+ * and data might flow through the target callable and back out at `call`.
+ */
+ pragma[nomagic]
+ private predicate fwdFlowIsEntered(
+ DataFlowCall call, Cc cc, ApOption argAp, Ap ap, Configuration config
+ ) {
+ exists(ParamNodeEx p |
+ fwdFlowIn(call, p, cc, _, argAp, ap, config) and
+ PrevStage::parameterMayFlowThrough(p, _, unbindApa(getApprox(ap)), config)
+ )
+ }
+
+ pragma[nomagic]
+ private predicate storeStepFwd(
+ NodeEx node1, Ap ap1, TypedContent tc, NodeEx node2, Ap ap2, Configuration config
+ ) {
+ fwdFlowStore(node1, ap1, tc, node2, _, _, config) and
+ ap2 = apCons(tc, ap1) and
+ fwdFlowRead(ap2, tc.getContent(), _, _, _, _, config)
+ }
+
+ private predicate readStepFwd(
+ NodeEx n1, Ap ap1, Content c, NodeEx n2, Ap ap2, Configuration config
+ ) {
+ fwdFlowRead(ap1, c, n1, n2, _, _, config) and
+ fwdFlowConsCand(ap1, c, ap2, config)
+ }
+
+ pragma[nomagic]
+ private predicate callMayFlowThroughFwd(DataFlowCall call, Configuration config) {
+ exists(Ap argAp0, NodeEx out, Cc cc, ApOption argAp, Ap ap |
+ fwdFlow(out, pragma[only_bind_into](cc), pragma[only_bind_into](argAp), ap,
+ pragma[only_bind_into](config)) and
+ fwdFlowOutFromArg(call, out, argAp0, ap, config) and
+ fwdFlowIsEntered(pragma[only_bind_into](call), pragma[only_bind_into](cc),
+ pragma[only_bind_into](argAp), pragma[only_bind_into](argAp0),
+ pragma[only_bind_into](config))
+ )
+ }
+
+ pragma[nomagic]
+ private predicate flowThroughIntoCall(
+ DataFlowCall call, ArgNodeEx arg, ParamNodeEx p, boolean allowsFieldFlow, Configuration config
+ ) {
+ flowIntoCall(call, arg, p, allowsFieldFlow, config) and
+ fwdFlow(arg, _, _, _, pragma[only_bind_into](config)) and
+ PrevStage::parameterMayFlowThrough(p, _, _, pragma[only_bind_into](config)) and
+ callMayFlowThroughFwd(call, pragma[only_bind_into](config))
+ }
+
+ /**
+ * Holds if `node` with access path `ap` is part of a path from a source to a
+ * sink in the configuration `config`.
+ *
+ * The Boolean `toReturn` records whether the node must be returned from the
+ * enclosing callable in order to reach a sink, and if so, `returnAp` records
+ * the access path of the returned value.
+ */
+ pragma[nomagic]
+ predicate revFlow(NodeEx node, boolean toReturn, ApOption returnAp, Ap ap, Configuration config) {
+ revFlow0(node, toReturn, returnAp, ap, config) and
+ fwdFlow(node, _, _, ap, config)
+ }
+
+ pragma[nomagic]
+ private predicate revFlow0(
+ NodeEx node, boolean toReturn, ApOption returnAp, Ap ap, Configuration config
+ ) {
+ fwdFlow(node, _, _, ap, config) and
+ sinkNode(node, config) and
+ toReturn = false and
+ returnAp = apNone() and
+ ap instanceof ApNil
+ or
+ exists(NodeEx mid |
+ localStep(node, mid, true, _, config, _) and
+ revFlow(mid, toReturn, returnAp, ap, config)
+ )
+ or
+ exists(NodeEx mid, ApNil nil |
+ fwdFlow(node, _, _, ap, pragma[only_bind_into](config)) and
+ localStep(node, mid, false, _, config, _) and
+ revFlow(mid, toReturn, returnAp, nil, pragma[only_bind_into](config)) and
+ ap instanceof ApNil
+ )
+ or
+ exists(NodeEx mid |
+ jumpStep(node, mid, config) and
+ revFlow(mid, _, _, ap, config) and
+ toReturn = false and
+ returnAp = apNone()
+ )
+ or
+ exists(NodeEx mid, ApNil nil |
+ fwdFlow(node, _, _, ap, pragma[only_bind_into](config)) and
+ additionalJumpStep(node, mid, config) and
+ revFlow(pragma[only_bind_into](mid), _, _, nil, pragma[only_bind_into](config)) and
+ toReturn = false and
+ returnAp = apNone() and
+ ap instanceof ApNil
+ )
+ or
+ // store
+ exists(Ap ap0, Content c |
+ revFlowStore(ap0, c, ap, node, _, _, toReturn, returnAp, config) and
+ revFlowConsCand(ap0, c, ap, config)
+ )
+ or
+ // read
+ exists(NodeEx mid, Ap ap0 |
+ revFlow(mid, toReturn, returnAp, ap0, config) and
+ readStepFwd(node, ap, _, mid, ap0, config)
+ )
+ or
+ // flow into a callable
+ revFlowInNotToReturn(node, returnAp, ap, config) and
+ toReturn = false
+ or
+ exists(DataFlowCall call, Ap returnAp0 |
+ revFlowInToReturn(call, node, returnAp0, ap, config) and
+ revFlowIsReturned(call, toReturn, returnAp, returnAp0, config)
+ )
+ or
+ // flow out of a callable
+ revFlowOut(_, node, _, _, ap, config) and
+ toReturn = true and
+ if fwdFlow(node, any(CcCall ccc), apSome(_), ap, config)
+ then returnAp = apSome(ap)
+ else returnAp = apNone()
+ }
+
+ pragma[nomagic]
+ private predicate revFlowStore(
+ Ap ap0, Content c, Ap ap, NodeEx node, TypedContent tc, NodeEx mid, boolean toReturn,
+ ApOption returnAp, Configuration config
+ ) {
+ revFlow(mid, toReturn, returnAp, ap0, config) and
+ storeStepFwd(node, ap, tc, mid, ap0, config) and
+ tc.getContent() = c
+ }
+
+ /**
+ * Holds if reverse flow with access path `tail` reaches a read of `c`
+ * resulting in access path `cons`.
+ */
+ pragma[nomagic]
+ private predicate revFlowConsCand(Ap cons, Content c, Ap tail, Configuration config) {
+ exists(NodeEx mid, Ap tail0 |
+ revFlow(mid, _, _, tail, config) and
+ tail = pragma[only_bind_into](tail0) and
+ readStepFwd(_, cons, c, mid, tail0, config)
+ )
+ }
+
+ pragma[nomagic]
+ private predicate revFlowOut(
+ DataFlowCall call, RetNodeEx ret, boolean toReturn, ApOption returnAp, Ap ap,
+ Configuration config
+ ) {
+ exists(NodeEx out, boolean allowsFieldFlow |
+ revFlow(out, toReturn, returnAp, ap, config) and
+ flowOutOfCall(call, ret, out, allowsFieldFlow, config)
+ |
+ ap instanceof ApNil or allowsFieldFlow = true
+ )
+ }
+
+ pragma[nomagic]
+ private predicate revFlowInNotToReturn(
+ ArgNodeEx arg, ApOption returnAp, Ap ap, Configuration config
+ ) {
+ exists(ParamNodeEx p, boolean allowsFieldFlow |
+ revFlow(p, false, returnAp, ap, config) and
+ flowIntoCall(_, arg, p, allowsFieldFlow, config)
+ |
+ ap instanceof ApNil or allowsFieldFlow = true
+ )
+ }
+
+ pragma[nomagic]
+ private predicate revFlowInToReturn(
+ DataFlowCall call, ArgNodeEx arg, Ap returnAp, Ap ap, Configuration config
+ ) {
+ exists(ParamNodeEx p, boolean allowsFieldFlow |
+ revFlow(p, true, apSome(returnAp), ap, config) and
+ flowThroughIntoCall(call, arg, p, allowsFieldFlow, config)
+ |
+ ap instanceof ApNil or allowsFieldFlow = true
+ )
+ }
+
+ /**
+ * Holds if an output from `call` is reached in the flow covered by `revFlow`
+ * and data might flow through the target callable resulting in reverse flow
+ * reaching an argument of `call`.
+ */
+ pragma[nomagic]
+ private predicate revFlowIsReturned(
+ DataFlowCall call, boolean toReturn, ApOption returnAp, Ap ap, Configuration config
+ ) {
+ exists(RetNodeEx ret, CcCall ccc |
+ revFlowOut(call, ret, toReturn, returnAp, ap, config) and
+ fwdFlow(ret, ccc, apSome(_), ap, config) and
+ ccc.matchesCall(call)
+ )
+ }
+
+ pragma[nomagic]
+ predicate storeStepCand(
+ NodeEx node1, Ap ap1, TypedContent tc, NodeEx node2, DataFlowType contentType,
+ Configuration config
+ ) {
+ exists(Ap ap2, Content c |
+ store(node1, tc, node2, contentType, config) and
+ revFlowStore(ap2, c, ap1, node1, tc, node2, _, _, config) and
+ revFlowConsCand(ap2, c, ap1, config)
+ )
+ }
+
+ predicate readStepCand(NodeEx node1, Content c, NodeEx node2, Configuration config) {
+ exists(Ap ap1, Ap ap2 |
+ revFlow(node2, _, _, pragma[only_bind_into](ap2), pragma[only_bind_into](config)) and
+ readStepFwd(node1, ap1, c, node2, ap2, config) and
+ revFlowStore(ap1, c, pragma[only_bind_into](ap2), _, _, _, _, _,
+ pragma[only_bind_into](config))
+ )
+ }
+
+ predicate revFlow(NodeEx node, Configuration config) { revFlow(node, _, _, _, config) }
+
+ private predicate fwdConsCand(TypedContent tc, Ap ap, Configuration config) {
+ storeStepFwd(_, ap, tc, _, _, config)
+ }
+
+ predicate consCand(TypedContent tc, Ap ap, Configuration config) {
+ storeStepCand(_, ap, tc, _, _, config)
+ }
+
+ pragma[noinline]
+ private predicate parameterFlow(
+ ParamNodeEx p, Ap ap, Ap ap0, DataFlowCallable c, Configuration config
+ ) {
+ revFlow(p, true, apSome(ap0), ap, config) and
+ c = p.getEnclosingCallable()
+ }
+
+ predicate parameterMayFlowThrough(ParamNodeEx p, DataFlowCallable c, Ap ap, Configuration config) {
+ exists(RetNodeEx ret, Ap ap0, ReturnKindExt kind, int pos |
+ parameterFlow(p, ap, ap0, c, config) and
+ c = ret.getEnclosingCallable() and
+ revFlow(pragma[only_bind_into](ret), true, apSome(_), pragma[only_bind_into](ap0),
+ pragma[only_bind_into](config)) and
+ fwdFlow(ret, any(CcCall ccc), apSome(ap), ap0, config) and
+ kind = ret.getKind() and
+ p.getPosition() = pos and
+ // we don't expect a parameter to return stored in itself
+ not kind.(ParamUpdateReturnKind).getPosition() = pos
+ )
+ }
+
+ pragma[nomagic]
+ predicate callMayFlowThroughRev(DataFlowCall call, Configuration config) {
+ exists(Ap returnAp0, ArgNodeEx arg, boolean toReturn, ApOption returnAp, Ap ap |
+ revFlow(arg, toReturn, returnAp, ap, config) and
+ revFlowInToReturn(call, arg, returnAp0, ap, config) and
+ revFlowIsReturned(call, toReturn, returnAp, returnAp0, config)
+ )
+ }
+
+ predicate stats(boolean fwd, int nodes, int fields, int conscand, int tuples, Configuration config) {
+ fwd = true and
+ nodes = count(NodeEx node | fwdFlow(node, _, _, _, config)) and
+ fields = count(TypedContent f0 | fwdConsCand(f0, _, config)) and
+ conscand = count(TypedContent f0, Ap ap | fwdConsCand(f0, ap, config)) and
+ tuples = count(NodeEx n, Cc cc, ApOption argAp, Ap ap | fwdFlow(n, cc, argAp, ap, config))
+ or
+ fwd = false and
+ nodes = count(NodeEx node | revFlow(node, _, _, _, config)) and
+ fields = count(TypedContent f0 | consCand(f0, _, config)) and
+ conscand = count(TypedContent f0, Ap ap | consCand(f0, ap, config)) and
+ tuples = count(NodeEx n, boolean b, ApOption retAp, Ap ap | revFlow(n, b, retAp, ap, config))
+ }
+ /* End: Stage 3 logic. */
+}
+
+/**
+ * Holds if `argApf` is recorded as the summary context for flow reaching `node`
+ * and remains relevant for the following pruning stage.
+ */
+private predicate flowCandSummaryCtx(NodeEx node, AccessPathFront argApf, Configuration config) {
+ exists(AccessPathFront apf |
+ Stage3::revFlow(node, true, _, apf, config) and
+ Stage3::fwdFlow(node, any(Stage3::CcCall ccc), TAccessPathFrontSome(argApf), apf, config)
+ )
+}
+
+/**
+ * Holds if a length 2 access path approximation with the head `tc` is expected
+ * to be expensive.
+ */
+private predicate expensiveLen2unfolding(TypedContent tc, Configuration config) {
+ exists(int tails, int nodes, int apLimit, int tupleLimit |
+ tails = strictcount(AccessPathFront apf | Stage3::consCand(tc, apf, config)) and
+ nodes =
+ strictcount(NodeEx n |
+ Stage3::revFlow(n, _, _, any(AccessPathFrontHead apf | apf.getHead() = tc), config)
+ or
+ flowCandSummaryCtx(n, any(AccessPathFrontHead apf | apf.getHead() = tc), config)
+ ) and
+ accessPathApproxCostLimits(apLimit, tupleLimit) and
+ apLimit < tails and
+ tupleLimit < (tails - 1) * nodes and
+ not tc.forceHighPrecision()
+ )
+}
+
+private newtype TAccessPathApprox =
+ TNil(DataFlowType t) or
+ TConsNil(TypedContent tc, DataFlowType t) {
+ Stage3::consCand(tc, TFrontNil(t), _) and
+ not expensiveLen2unfolding(tc, _)
+ } or
+ TConsCons(TypedContent tc1, TypedContent tc2, int len) {
+ Stage3::consCand(tc1, TFrontHead(tc2), _) and
+ len in [2 .. accessPathLimit()] and
+ not expensiveLen2unfolding(tc1, _)
+ } or
+ TCons1(TypedContent tc, int len) {
+ len in [1 .. accessPathLimit()] and
+ expensiveLen2unfolding(tc, _)
+ }
+
+/**
+ * Conceptually a list of `TypedContent`s followed by a `DataFlowType`, but only
+ * the first two elements of the list and its length are tracked. If data flows
+ * from a source to a given node with a given `AccessPathApprox`, this indicates
+ * the sequence of dereference operations needed to get from the value in the node
+ * to the tracked object. The final type indicates the type of the tracked object.
+ */
+abstract private class AccessPathApprox extends TAccessPathApprox {
+ abstract string toString();
+
+ abstract TypedContent getHead();
+
+ abstract int len();
+
+ abstract DataFlowType getType();
+
+ abstract AccessPathFront getFront();
+
+ /** Gets the access path obtained by popping `head` from this path, if any. */
+ abstract AccessPathApprox pop(TypedContent head);
+}
+
+private class AccessPathApproxNil extends AccessPathApprox, TNil {
+ private DataFlowType t;
+
+ AccessPathApproxNil() { this = TNil(t) }
+
+ override string toString() { result = concat(": " + ppReprType(t)) }
+
+ override TypedContent getHead() { none() }
+
+ override int len() { result = 0 }
+
+ override DataFlowType getType() { result = t }
+
+ override AccessPathFront getFront() { result = TFrontNil(t) }
+
+ override AccessPathApprox pop(TypedContent head) { none() }
+}
+
+abstract private class AccessPathApproxCons extends AccessPathApprox { }
+
+private class AccessPathApproxConsNil extends AccessPathApproxCons, TConsNil {
+ private TypedContent tc;
+ private DataFlowType t;
+
+ AccessPathApproxConsNil() { this = TConsNil(tc, t) }
+
+ override string toString() {
+ // The `concat` becomes "" if `ppReprType` has no result.
+ result = "[" + tc.toString() + "]" + concat(" : " + ppReprType(t))
+ }
+
+ override TypedContent getHead() { result = tc }
+
+ override int len() { result = 1 }
+
+ override DataFlowType getType() { result = tc.getContainerType() }
+
+ override AccessPathFront getFront() { result = TFrontHead(tc) }
+
+ override AccessPathApprox pop(TypedContent head) { head = tc and result = TNil(t) }
+}
+
+private class AccessPathApproxConsCons extends AccessPathApproxCons, TConsCons {
+ private TypedContent tc1;
+ private TypedContent tc2;
+ private int len;
+
+ AccessPathApproxConsCons() { this = TConsCons(tc1, tc2, len) }
+
+ override string toString() {
+ if len = 2
+ then result = "[" + tc1.toString() + ", " + tc2.toString() + "]"
+ else result = "[" + tc1.toString() + ", " + tc2.toString() + ", ... (" + len.toString() + ")]"
+ }
+
+ override TypedContent getHead() { result = tc1 }
+
+ override int len() { result = len }
+
+ override DataFlowType getType() { result = tc1.getContainerType() }
+
+ override AccessPathFront getFront() { result = TFrontHead(tc1) }
+
+ override AccessPathApprox pop(TypedContent head) {
+ head = tc1 and
+ (
+ result = TConsCons(tc2, _, len - 1)
+ or
+ len = 2 and
+ result = TConsNil(tc2, _)
+ or
+ result = TCons1(tc2, len - 1)
+ )
+ }
+}
+
+private class AccessPathApproxCons1 extends AccessPathApproxCons, TCons1 {
+ private TypedContent tc;
+ private int len;
+
+ AccessPathApproxCons1() { this = TCons1(tc, len) }
+
+ override string toString() {
+ if len = 1
+ then result = "[" + tc.toString() + "]"
+ else result = "[" + tc.toString() + ", ... (" + len.toString() + ")]"
+ }
+
+ override TypedContent getHead() { result = tc }
+
+ override int len() { result = len }
+
+ override DataFlowType getType() { result = tc.getContainerType() }
+
+ override AccessPathFront getFront() { result = TFrontHead(tc) }
+
+ override AccessPathApprox pop(TypedContent head) {
+ head = tc and
+ (
+ exists(TypedContent tc2 | Stage3::consCand(tc, TFrontHead(tc2), _) |
+ result = TConsCons(tc2, _, len - 1)
+ or
+ len = 2 and
+ result = TConsNil(tc2, _)
+ or
+ result = TCons1(tc2, len - 1)
+ )
+ or
+ exists(DataFlowType t |
+ len = 1 and
+ Stage3::consCand(tc, TFrontNil(t), _) and
+ result = TNil(t)
+ )
+ )
+ }
+}
+
+/** Gets the access path obtained by popping `tc` from `ap`, if any. */
+private AccessPathApprox pop(TypedContent tc, AccessPathApprox apa) { result = apa.pop(tc) }
+
+/** Gets the access path obtained by pushing `tc` onto `ap`. */
+private AccessPathApprox push(TypedContent tc, AccessPathApprox apa) { apa = pop(tc, result) }
+
+private newtype TAccessPathApproxOption =
+ TAccessPathApproxNone() or
+ TAccessPathApproxSome(AccessPathApprox apa)
+
+private class AccessPathApproxOption extends TAccessPathApproxOption {
+ string toString() {
+ this = TAccessPathApproxNone() and result = ""
+ or
+ this = TAccessPathApproxSome(any(AccessPathApprox apa | result = apa.toString()))
+ }
+}
+
+private module Stage4 {
+ module PrevStage = Stage3;
+
+ class ApApprox = PrevStage::Ap;
+
+ class Ap = AccessPathApprox;
+
+ class ApNil = AccessPathApproxNil;
+
+ private ApApprox getApprox(Ap ap) { result = ap.getFront() }
+
+ private ApNil getApNil(NodeEx node) {
+ PrevStage::revFlow(node, _) and result = TNil(node.getDataFlowType())
+ }
+
+ bindingset[tc, tail]
+ private Ap apCons(TypedContent tc, Ap tail) { result = push(tc, tail) }
+
+ pragma[noinline]
+ private Content getHeadContent(Ap ap) { result = ap.getHead().getContent() }
+
+ class ApOption = AccessPathApproxOption;
+
+ ApOption apNone() { result = TAccessPathApproxNone() }
+
+ ApOption apSome(Ap ap) { result = TAccessPathApproxSome(ap) }
+
+ class Cc = CallContext;
+
+ class CcCall = CallContextCall;
+
+ class CcNoCall = CallContextNoCall;
+
+ Cc ccNone() { result instanceof CallContextAny }
+
+ private class LocalCc = LocalCallContext;
+
+ bindingset[call, c, outercc]
+ private CcCall getCallContextCall(DataFlowCall call, DataFlowCallable c, Cc outercc) {
+ checkCallContextCall(outercc, call, c) and
+ if recordDataFlowCallSite(call, c) then result = TSpecificCall(call) else result = TSomeCall()
+ }
+
+ bindingset[call, c, innercc]
+ private CcNoCall getCallContextReturn(DataFlowCallable c, DataFlowCall call, Cc innercc) {
+ checkCallContextReturn(innercc, c, call) and
+ if reducedViableImplInReturn(c, call) then result = TReturn(c, call) else result = ccNone()
+ }
+
+ bindingset[node, cc, config]
+ private LocalCc getLocalCc(NodeEx node, Cc cc, Configuration config) {
+ localFlowEntry(node, config) and
+ result =
+ getLocalCallContext(pragma[only_bind_into](pragma[only_bind_out](cc)),
+ node.getEnclosingCallable())
+ }
+
+ private predicate localStep(
+ NodeEx node1, NodeEx node2, boolean preservesValue, ApNil ap, Configuration config, LocalCc lcc
+ ) {
+ localFlowBigStep(node1, node2, preservesValue, ap.getFront(), config, lcc)
+ }
+
+ pragma[nomagic]
+ private predicate flowOutOfCall(
+ DataFlowCall call, RetNodeEx node1, NodeEx node2, boolean allowsFieldFlow, Configuration config
+ ) {
+ flowOutOfCallNodeCand2(call, node1, node2, allowsFieldFlow, config) and
+ PrevStage::revFlow(node2, _, _, _, pragma[only_bind_into](config)) and
+ PrevStage::revFlow(node1, _, _, _, pragma[only_bind_into](config))
+ }
+
+ pragma[nomagic]
+ private predicate flowIntoCall(
+ DataFlowCall call, ArgNodeEx node1, ParamNodeEx node2, boolean allowsFieldFlow,
+ Configuration config
+ ) {
+ flowIntoCallNodeCand2(call, node1, node2, allowsFieldFlow, config) and
+ PrevStage::revFlow(node2, _, _, _, pragma[only_bind_into](config)) and
+ PrevStage::revFlow(node1, _, _, _, pragma[only_bind_into](config))
+ }
+
+ bindingset[node, ap]
+ private predicate filter(NodeEx node, Ap ap) { any() }
+
+ // Type checking is not necessary here as it has already been done in stage 3.
+ bindingset[ap, contentType]
+ private predicate typecheckStore(Ap ap, DataFlowType contentType) { any() }
+
+ /* Begin: Stage 4 logic. */
+ private predicate flowCand(NodeEx node, ApApprox apa, Configuration config) {
+ PrevStage::revFlow(node, _, _, apa, config)
+ }
+
+ bindingset[result, apa]
+ private ApApprox unbindApa(ApApprox apa) {
+ exists(ApApprox apa0 |
+ apa = pragma[only_bind_into](apa0) and result = pragma[only_bind_into](apa0)
+ )
+ }
+
+ pragma[nomagic]
+ private predicate flowThroughOutOfCall(
+ DataFlowCall call, RetNodeEx ret, NodeEx out, boolean allowsFieldFlow, Configuration config
+ ) {
+ flowOutOfCall(call, ret, out, allowsFieldFlow, pragma[only_bind_into](config)) and
+ PrevStage::callMayFlowThroughRev(call, pragma[only_bind_into](config)) and
+ PrevStage::parameterMayFlowThrough(_, ret.getEnclosingCallable(), _,
+ pragma[only_bind_into](config))
+ }
+
+ /**
+ * Holds if `node` is reachable with access path `ap` from a source in the
+ * configuration `config`.
+ *
+ * The call context `cc` records whether the node is reached through an
+ * argument in a call, and if so, `argAp` records the access path of that
+ * argument.
+ */
+ pragma[nomagic]
+ predicate fwdFlow(NodeEx node, Cc cc, ApOption argAp, Ap ap, Configuration config) {
+ fwdFlow0(node, cc, argAp, ap, config) and
+ flowCand(node, unbindApa(getApprox(ap)), config) and
+ filter(node, ap)
+ }
+
+ pragma[nomagic]
+ private predicate fwdFlow0(NodeEx node, Cc cc, ApOption argAp, Ap ap, Configuration config) {
+ flowCand(node, _, config) and
+ sourceNode(node, config) and
+ cc = ccNone() and
+ argAp = apNone() and
+ ap = getApNil(node)
+ or
+ exists(NodeEx mid, Ap ap0, LocalCc localCc |
+ fwdFlow(mid, cc, argAp, ap0, config) and
+ localCc = getLocalCc(mid, cc, config)
+ |
+ localStep(mid, node, true, _, config, localCc) and
+ ap = ap0
+ or
+ localStep(mid, node, false, ap, config, localCc) and
+ ap0 instanceof ApNil
+ )
+ or
+ exists(NodeEx mid |
+ fwdFlow(mid, _, _, ap, pragma[only_bind_into](config)) and
+ flowCand(node, _, pragma[only_bind_into](config)) and
+ jumpStep(mid, node, config) and
+ cc = ccNone() and
+ argAp = apNone()
+ )
+ or
+ exists(NodeEx mid, ApNil nil |
+ fwdFlow(mid, _, _, nil, pragma[only_bind_into](config)) and
+ flowCand(node, _, pragma[only_bind_into](config)) and
+ additionalJumpStep(mid, node, config) and
+ cc = ccNone() and
+ argAp = apNone() and
+ ap = getApNil(node)
+ )
+ or
+ // store
+ exists(TypedContent tc, Ap ap0 |
+ fwdFlowStore(_, ap0, tc, node, cc, argAp, config) and
+ ap = apCons(tc, ap0)
+ )
+ or
+ // read
+ exists(Ap ap0, Content c |
+ fwdFlowRead(ap0, c, _, node, cc, argAp, config) and
+ fwdFlowConsCand(ap0, c, ap, config)
+ )
+ or
+ // flow into a callable
+ exists(ApApprox apa |
+ fwdFlowIn(_, node, _, cc, _, ap, config) and
+ apa = getApprox(ap) and
+ if PrevStage::parameterMayFlowThrough(node, _, apa, config)
+ then argAp = apSome(ap)
+ else argAp = apNone()
+ )
+ or
+ // flow out of a callable
+ fwdFlowOutNotFromArg(node, cc, argAp, ap, config)
+ or
+ exists(DataFlowCall call, Ap argAp0 |
+ fwdFlowOutFromArg(call, node, argAp0, ap, config) and
+ fwdFlowIsEntered(call, cc, argAp, argAp0, config)
+ )
+ }
+
+ pragma[nomagic]
+ private predicate fwdFlowStore(
+ NodeEx node1, Ap ap1, TypedContent tc, NodeEx node2, Cc cc, ApOption argAp, Configuration config
+ ) {
+ exists(DataFlowType contentType |
+ fwdFlow(node1, cc, argAp, ap1, config) and
+ PrevStage::storeStepCand(node1, unbindApa(getApprox(ap1)), tc, node2, contentType, config) and
+ typecheckStore(ap1, contentType)
+ )
+ }
+
+ /**
+ * Holds if forward flow with access path `tail` reaches a store of `c`
+ * resulting in access path `cons`.
+ */
+ pragma[nomagic]
+ private predicate fwdFlowConsCand(Ap cons, Content c, Ap tail, Configuration config) {
+ exists(TypedContent tc |
+ fwdFlowStore(_, tail, tc, _, _, _, config) and
+ tc.getContent() = c and
+ cons = apCons(tc, tail)
+ )
+ }
+
+ pragma[nomagic]
+ private predicate fwdFlowRead(
+ Ap ap, Content c, NodeEx node1, NodeEx node2, Cc cc, ApOption argAp, Configuration config
+ ) {
+ fwdFlow(node1, cc, argAp, ap, config) and
+ PrevStage::readStepCand(node1, c, node2, config) and
+ getHeadContent(ap) = c
+ }
+
+ pragma[nomagic]
+ private predicate fwdFlowIn(
+ DataFlowCall call, ParamNodeEx p, Cc outercc, Cc innercc, ApOption argAp, Ap ap,
+ Configuration config
+ ) {
+ exists(ArgNodeEx arg, boolean allowsFieldFlow |
+ fwdFlow(arg, outercc, argAp, ap, config) and
+ flowIntoCall(call, arg, p, allowsFieldFlow, config) and
+ innercc = getCallContextCall(call, p.getEnclosingCallable(), outercc)
+ |
+ ap instanceof ApNil or allowsFieldFlow = true
+ )
+ }
+
+ pragma[nomagic]
+ private predicate fwdFlowOutNotFromArg(
+ NodeEx out, Cc ccOut, ApOption argAp, Ap ap, Configuration config
+ ) {
+ exists(
+ DataFlowCall call, RetNodeEx ret, boolean allowsFieldFlow, CcNoCall innercc,
+ DataFlowCallable inner
+ |
+ fwdFlow(ret, innercc, argAp, ap, config) and
+ flowOutOfCall(call, ret, out, allowsFieldFlow, config) and
+ inner = ret.getEnclosingCallable() and
+ ccOut = getCallContextReturn(inner, call, innercc)
+ |
+ ap instanceof ApNil or allowsFieldFlow = true
+ )
+ }
+
+ pragma[nomagic]
+ private predicate fwdFlowOutFromArg(
+ DataFlowCall call, NodeEx out, Ap argAp, Ap ap, Configuration config
+ ) {
+ exists(RetNodeEx ret, boolean allowsFieldFlow, CcCall ccc |
+ fwdFlow(ret, ccc, apSome(argAp), ap, config) and
+ flowThroughOutOfCall(call, ret, out, allowsFieldFlow, config) and
+ ccc.matchesCall(call)
+ |
+ ap instanceof ApNil or allowsFieldFlow = true
+ )
+ }
+
+ /**
+ * Holds if an argument to `call` is reached in the flow covered by `fwdFlow`
+ * and data might flow through the target callable and back out at `call`.
+ */
+ pragma[nomagic]
+ private predicate fwdFlowIsEntered(
+ DataFlowCall call, Cc cc, ApOption argAp, Ap ap, Configuration config
+ ) {
+ exists(ParamNodeEx p |
+ fwdFlowIn(call, p, cc, _, argAp, ap, config) and
+ PrevStage::parameterMayFlowThrough(p, _, unbindApa(getApprox(ap)), config)
+ )
+ }
+
+ pragma[nomagic]
+ private predicate storeStepFwd(
+ NodeEx node1, Ap ap1, TypedContent tc, NodeEx node2, Ap ap2, Configuration config
+ ) {
+ fwdFlowStore(node1, ap1, tc, node2, _, _, config) and
+ ap2 = apCons(tc, ap1) and
+ fwdFlowRead(ap2, tc.getContent(), _, _, _, _, config)
+ }
+
+ private predicate readStepFwd(
+ NodeEx n1, Ap ap1, Content c, NodeEx n2, Ap ap2, Configuration config
+ ) {
+ fwdFlowRead(ap1, c, n1, n2, _, _, config) and
+ fwdFlowConsCand(ap1, c, ap2, config)
+ }
+
+ pragma[nomagic]
+ private predicate callMayFlowThroughFwd(DataFlowCall call, Configuration config) {
+ exists(Ap argAp0, NodeEx out, Cc cc, ApOption argAp, Ap ap |
+ fwdFlow(out, pragma[only_bind_into](cc), pragma[only_bind_into](argAp), ap,
+ pragma[only_bind_into](config)) and
+ fwdFlowOutFromArg(call, out, argAp0, ap, config) and
+ fwdFlowIsEntered(pragma[only_bind_into](call), pragma[only_bind_into](cc),
+ pragma[only_bind_into](argAp), pragma[only_bind_into](argAp0),
+ pragma[only_bind_into](config))
+ )
+ }
+
+ pragma[nomagic]
+ private predicate flowThroughIntoCall(
+ DataFlowCall call, ArgNodeEx arg, ParamNodeEx p, boolean allowsFieldFlow, Configuration config
+ ) {
+ flowIntoCall(call, arg, p, allowsFieldFlow, config) and
+ fwdFlow(arg, _, _, _, pragma[only_bind_into](config)) and
+ PrevStage::parameterMayFlowThrough(p, _, _, pragma[only_bind_into](config)) and
+ callMayFlowThroughFwd(call, pragma[only_bind_into](config))
+ }
+
+ /**
+ * Holds if `node` with access path `ap` is part of a path from a source to a
+ * sink in the configuration `config`.
+ *
+ * The Boolean `toReturn` records whether the node must be returned from the
+ * enclosing callable in order to reach a sink, and if so, `returnAp` records
+ * the access path of the returned value.
+ */
+ pragma[nomagic]
+ predicate revFlow(NodeEx node, boolean toReturn, ApOption returnAp, Ap ap, Configuration config) {
+ revFlow0(node, toReturn, returnAp, ap, config) and
+ fwdFlow(node, _, _, ap, config)
+ }
+
+ pragma[nomagic]
+ private predicate revFlow0(
+ NodeEx node, boolean toReturn, ApOption returnAp, Ap ap, Configuration config
+ ) {
+ fwdFlow(node, _, _, ap, config) and
+ sinkNode(node, config) and
+ toReturn = false and
+ returnAp = apNone() and
+ ap instanceof ApNil
+ or
+ exists(NodeEx mid |
+ localStep(node, mid, true, _, config, _) and
+ revFlow(mid, toReturn, returnAp, ap, config)
+ )
+ or
+ exists(NodeEx mid, ApNil nil |
+ fwdFlow(node, _, _, ap, pragma[only_bind_into](config)) and
+ localStep(node, mid, false, _, config, _) and
+ revFlow(mid, toReturn, returnAp, nil, pragma[only_bind_into](config)) and
+ ap instanceof ApNil
+ )
+ or
+ exists(NodeEx mid |
+ jumpStep(node, mid, config) and
+ revFlow(mid, _, _, ap, config) and
+ toReturn = false and
+ returnAp = apNone()
+ )
+ or
+ exists(NodeEx mid, ApNil nil |
+ fwdFlow(node, _, _, ap, pragma[only_bind_into](config)) and
+ additionalJumpStep(node, mid, config) and
+ revFlow(pragma[only_bind_into](mid), _, _, nil, pragma[only_bind_into](config)) and
+ toReturn = false and
+ returnAp = apNone() and
+ ap instanceof ApNil
+ )
+ or
+ // store
+ exists(Ap ap0, Content c |
+ revFlowStore(ap0, c, ap, node, _, _, toReturn, returnAp, config) and
+ revFlowConsCand(ap0, c, ap, config)
+ )
+ or
+ // read
+ exists(NodeEx mid, Ap ap0 |
+ revFlow(mid, toReturn, returnAp, ap0, config) and
+ readStepFwd(node, ap, _, mid, ap0, config)
+ )
+ or
+ // flow into a callable
+ revFlowInNotToReturn(node, returnAp, ap, config) and
+ toReturn = false
+ or
+ exists(DataFlowCall call, Ap returnAp0 |
+ revFlowInToReturn(call, node, returnAp0, ap, config) and
+ revFlowIsReturned(call, toReturn, returnAp, returnAp0, config)
+ )
+ or
+ // flow out of a callable
+ revFlowOut(_, node, _, _, ap, config) and
+ toReturn = true and
+ if fwdFlow(node, any(CcCall ccc), apSome(_), ap, config)
+ then returnAp = apSome(ap)
+ else returnAp = apNone()
+ }
+
+ pragma[nomagic]
+ private predicate revFlowStore(
+ Ap ap0, Content c, Ap ap, NodeEx node, TypedContent tc, NodeEx mid, boolean toReturn,
+ ApOption returnAp, Configuration config
+ ) {
+ revFlow(mid, toReturn, returnAp, ap0, config) and
+ storeStepFwd(node, ap, tc, mid, ap0, config) and
+ tc.getContent() = c
+ }
+
+ /**
+ * Holds if reverse flow with access path `tail` reaches a read of `c`
+ * resulting in access path `cons`.
+ */
+ pragma[nomagic]
+ private predicate revFlowConsCand(Ap cons, Content c, Ap tail, Configuration config) {
+ exists(NodeEx mid, Ap tail0 |
+ revFlow(mid, _, _, tail, config) and
+ tail = pragma[only_bind_into](tail0) and
+ readStepFwd(_, cons, c, mid, tail0, config)
+ )
+ }
+
+ pragma[nomagic]
+ private predicate revFlowOut(
+ DataFlowCall call, RetNodeEx ret, boolean toReturn, ApOption returnAp, Ap ap,
+ Configuration config
+ ) {
+ exists(NodeEx out, boolean allowsFieldFlow |
+ revFlow(out, toReturn, returnAp, ap, config) and
+ flowOutOfCall(call, ret, out, allowsFieldFlow, config)
+ |
+ ap instanceof ApNil or allowsFieldFlow = true
+ )
+ }
+
+ pragma[nomagic]
+ private predicate revFlowInNotToReturn(
+ ArgNodeEx arg, ApOption returnAp, Ap ap, Configuration config
+ ) {
+ exists(ParamNodeEx p, boolean allowsFieldFlow |
+ revFlow(p, false, returnAp, ap, config) and
+ flowIntoCall(_, arg, p, allowsFieldFlow, config)
+ |
+ ap instanceof ApNil or allowsFieldFlow = true
+ )
+ }
+
+ pragma[nomagic]
+ private predicate revFlowInToReturn(
+ DataFlowCall call, ArgNodeEx arg, Ap returnAp, Ap ap, Configuration config
+ ) {
+ exists(ParamNodeEx p, boolean allowsFieldFlow |
+ revFlow(p, true, apSome(returnAp), ap, config) and
+ flowThroughIntoCall(call, arg, p, allowsFieldFlow, config)
+ |
+ ap instanceof ApNil or allowsFieldFlow = true
+ )
+ }
+
+ /**
+ * Holds if an output from `call` is reached in the flow covered by `revFlow`
+ * and data might flow through the target callable resulting in reverse flow
+ * reaching an argument of `call`.
+ */
+ pragma[nomagic]
+ private predicate revFlowIsReturned(
+ DataFlowCall call, boolean toReturn, ApOption returnAp, Ap ap, Configuration config
+ ) {
+ exists(RetNodeEx ret, CcCall ccc |
+ revFlowOut(call, ret, toReturn, returnAp, ap, config) and
+ fwdFlow(ret, ccc, apSome(_), ap, config) and
+ ccc.matchesCall(call)
+ )
+ }
+
+ pragma[nomagic]
+ predicate storeStepCand(
+ NodeEx node1, Ap ap1, TypedContent tc, NodeEx node2, DataFlowType contentType,
+ Configuration config
+ ) {
+ exists(Ap ap2, Content c |
+ store(node1, tc, node2, contentType, config) and
+ revFlowStore(ap2, c, ap1, node1, tc, node2, _, _, config) and
+ revFlowConsCand(ap2, c, ap1, config)
+ )
+ }
+
+ predicate readStepCand(NodeEx node1, Content c, NodeEx node2, Configuration config) {
+ exists(Ap ap1, Ap ap2 |
+ revFlow(node2, _, _, pragma[only_bind_into](ap2), pragma[only_bind_into](config)) and
+ readStepFwd(node1, ap1, c, node2, ap2, config) and
+ revFlowStore(ap1, c, pragma[only_bind_into](ap2), _, _, _, _, _,
+ pragma[only_bind_into](config))
+ )
+ }
+
+ predicate revFlow(NodeEx node, Configuration config) { revFlow(node, _, _, _, config) }
+
+ private predicate fwdConsCand(TypedContent tc, Ap ap, Configuration config) {
+ storeStepFwd(_, ap, tc, _, _, config)
+ }
+
+ predicate consCand(TypedContent tc, Ap ap, Configuration config) {
+ storeStepCand(_, ap, tc, _, _, config)
+ }
+
+ pragma[noinline]
+ private predicate parameterFlow(
+ ParamNodeEx p, Ap ap, Ap ap0, DataFlowCallable c, Configuration config
+ ) {
+ revFlow(p, true, apSome(ap0), ap, config) and
+ c = p.getEnclosingCallable()
+ }
+
+ predicate parameterMayFlowThrough(ParamNodeEx p, DataFlowCallable c, Ap ap, Configuration config) {
+ exists(RetNodeEx ret, Ap ap0, ReturnKindExt kind, int pos |
+ parameterFlow(p, ap, ap0, c, config) and
+ c = ret.getEnclosingCallable() and
+ revFlow(pragma[only_bind_into](ret), true, apSome(_), pragma[only_bind_into](ap0),
+ pragma[only_bind_into](config)) and
+ fwdFlow(ret, any(CcCall ccc), apSome(ap), ap0, config) and
+ kind = ret.getKind() and
+ p.getPosition() = pos and
+ // we don't expect a parameter to return stored in itself
+ not kind.(ParamUpdateReturnKind).getPosition() = pos
+ )
+ }
+
+ pragma[nomagic]
+ predicate callMayFlowThroughRev(DataFlowCall call, Configuration config) {
+ exists(Ap returnAp0, ArgNodeEx arg, boolean toReturn, ApOption returnAp, Ap ap |
+ revFlow(arg, toReturn, returnAp, ap, config) and
+ revFlowInToReturn(call, arg, returnAp0, ap, config) and
+ revFlowIsReturned(call, toReturn, returnAp, returnAp0, config)
+ )
+ }
+
+ predicate stats(boolean fwd, int nodes, int fields, int conscand, int tuples, Configuration config) {
+ fwd = true and
+ nodes = count(NodeEx node | fwdFlow(node, _, _, _, config)) and
+ fields = count(TypedContent f0 | fwdConsCand(f0, _, config)) and
+ conscand = count(TypedContent f0, Ap ap | fwdConsCand(f0, ap, config)) and
+ tuples = count(NodeEx n, Cc cc, ApOption argAp, Ap ap | fwdFlow(n, cc, argAp, ap, config))
+ or
+ fwd = false and
+ nodes = count(NodeEx node | revFlow(node, _, _, _, config)) and
+ fields = count(TypedContent f0 | consCand(f0, _, config)) and
+ conscand = count(TypedContent f0, Ap ap | consCand(f0, ap, config)) and
+ tuples = count(NodeEx n, boolean b, ApOption retAp, Ap ap | revFlow(n, b, retAp, ap, config))
+ }
+ /* End: Stage 4 logic. */
+}
+
+bindingset[conf, result]
+private Configuration unbindConf(Configuration conf) {
+ exists(Configuration c | result = pragma[only_bind_into](c) and conf = pragma[only_bind_into](c))
+}
+
+private predicate nodeMayUseSummary(NodeEx n, AccessPathApprox apa, Configuration config) {
+ exists(DataFlowCallable c, AccessPathApprox apa0 |
+ Stage4::parameterMayFlowThrough(_, c, apa, _) and
+ Stage4::revFlow(n, true, _, apa0, config) and
+ Stage4::fwdFlow(n, any(CallContextCall ccc), TAccessPathApproxSome(apa), apa0, config) and
+ n.getEnclosingCallable() = c
+ )
+}
+
+private newtype TSummaryCtx =
+ TSummaryCtxNone() or
+ TSummaryCtxSome(ParamNodeEx p, AccessPath ap) {
+ Stage4::parameterMayFlowThrough(p, _, ap.getApprox(), _)
+ }
+
+/**
+ * A context for generating flow summaries. This represents flow entry through
+ * a specific parameter with an access path of a specific shape.
+ *
+ * Summaries are only created for parameters that may flow through.
+ */
+abstract private class SummaryCtx extends TSummaryCtx {
+ abstract string toString();
+}
+
+/** A summary context from which no flow summary can be generated. */
+private class SummaryCtxNone extends SummaryCtx, TSummaryCtxNone {
+ override string toString() { result = "" }
+}
+
+/** A summary context from which a flow summary can be generated. */
+private class SummaryCtxSome extends SummaryCtx, TSummaryCtxSome {
+ private ParamNodeEx p;
+ private AccessPath ap;
+
+ SummaryCtxSome() { this = TSummaryCtxSome(p, ap) }
+
+ int getParameterPos() { p.isParameterOf(_, result) }
+
+ override string toString() { result = p + ": " + ap }
+
+ predicate hasLocationInfo(
+ string filepath, int startline, int startcolumn, int endline, int endcolumn
+ ) {
+ p.hasLocationInfo(filepath, startline, startcolumn, endline, endcolumn)
+ }
+}
+
+/**
+ * Gets the number of length 2 access path approximations that correspond to `apa`.
+ */
+private int count1to2unfold(AccessPathApproxCons1 apa, Configuration config) {
+ exists(TypedContent tc, int len |
+ tc = apa.getHead() and
+ len = apa.len() and
+ result =
+ strictcount(AccessPathFront apf |
+ Stage4::consCand(tc, any(AccessPathApprox ap | ap.getFront() = apf and ap.len() = len - 1),
+ config)
+ )
+ )
+}
+
+private int countNodesUsingAccessPath(AccessPathApprox apa, Configuration config) {
+ result =
+ strictcount(NodeEx n |
+ Stage4::revFlow(n, _, _, apa, config) or nodeMayUseSummary(n, apa, config)
+ )
+}
+
+/**
+ * Holds if a length 2 access path approximation matching `apa` is expected
+ * to be expensive.
+ */
+private predicate expensiveLen1to2unfolding(AccessPathApproxCons1 apa, Configuration config) {
+ exists(int aps, int nodes, int apLimit, int tupleLimit |
+ aps = count1to2unfold(apa, config) and
+ nodes = countNodesUsingAccessPath(apa, config) and
+ accessPathCostLimits(apLimit, tupleLimit) and
+ apLimit < aps and
+ tupleLimit < (aps - 1) * nodes
+ )
+}
+
+private AccessPathApprox getATail(AccessPathApprox apa, Configuration config) {
+ exists(TypedContent head |
+ apa.pop(head) = result and
+ Stage4::consCand(head, result, config)
+ )
+}
+
+/**
+ * Holds with `unfold = false` if a precise head-tail representation of `apa` is
+ * expected to be expensive. Holds with `unfold = true` otherwise.
+ */
+private predicate evalUnfold(AccessPathApprox apa, boolean unfold, Configuration config) {
+ if apa.getHead().forceHighPrecision()
+ then unfold = true
+ else
+ exists(int aps, int nodes, int apLimit, int tupleLimit |
+ aps = countPotentialAps(apa, config) and
+ nodes = countNodesUsingAccessPath(apa, config) and
+ accessPathCostLimits(apLimit, tupleLimit) and
+ if apLimit < aps and tupleLimit < (aps - 1) * nodes then unfold = false else unfold = true
+ )
+}
+
+/**
+ * Gets the number of `AccessPath`s that correspond to `apa`.
+ */
+private int countAps(AccessPathApprox apa, Configuration config) {
+ evalUnfold(apa, false, config) and
+ result = 1 and
+ (not apa instanceof AccessPathApproxCons1 or expensiveLen1to2unfolding(apa, config))
+ or
+ evalUnfold(apa, false, config) and
+ result = count1to2unfold(apa, config) and
+ not expensiveLen1to2unfolding(apa, config)
+ or
+ evalUnfold(apa, true, config) and
+ result = countPotentialAps(apa, config)
+}
+
+/**
+ * Gets the number of `AccessPath`s that would correspond to `apa` assuming
+ * that it is expanded to a precise head-tail representation.
+ */
+language[monotonicAggregates]
+private int countPotentialAps(AccessPathApprox apa, Configuration config) {
+ apa instanceof AccessPathApproxNil and result = 1
+ or
+ result = strictsum(AccessPathApprox tail | tail = getATail(apa, config) | countAps(tail, config))
+}
+
+private newtype TAccessPath =
+ TAccessPathNil(DataFlowType t) or
+ TAccessPathCons(TypedContent head, AccessPath tail) {
+ exists(AccessPathApproxCons apa |
+ not evalUnfold(apa, false, _) and
+ head = apa.getHead() and
+ tail.getApprox() = getATail(apa, _)
+ )
+ } or
+ TAccessPathCons2(TypedContent head1, TypedContent head2, int len) {
+ exists(AccessPathApproxCons apa |
+ evalUnfold(apa, false, _) and
+ not expensiveLen1to2unfolding(apa, _) and
+ apa.len() = len and
+ head1 = apa.getHead() and
+ head2 = getATail(apa, _).getHead()
+ )
+ } or
+ TAccessPathCons1(TypedContent head, int len) {
+ exists(AccessPathApproxCons apa |
+ evalUnfold(apa, false, _) and
+ expensiveLen1to2unfolding(apa, _) and
+ apa.len() = len and
+ head = apa.getHead()
+ )
+ }
+
+private newtype TPathNode =
+ TPathNodeMid(NodeEx node, CallContext cc, SummaryCtx sc, AccessPath ap, Configuration config) {
+ // A PathNode is introduced by a source ...
+ Stage4::revFlow(node, config) and
+ sourceNode(node, config) and
+ cc instanceof CallContextAny and
+ sc instanceof SummaryCtxNone and
+ ap = TAccessPathNil(node.getDataFlowType())
+ or
+ // ... or a step from an existing PathNode to another node.
+ exists(PathNodeMid mid |
+ pathStep(mid, node, cc, sc, ap) and
+ pragma[only_bind_into](config) = mid.getConfiguration() and
+ Stage4::revFlow(node, _, _, ap.getApprox(), pragma[only_bind_into](config))
+ )
+ } or
+ TPathNodeSink(NodeEx node, Configuration config) {
+ sinkNode(node, pragma[only_bind_into](config)) and
+ Stage4::revFlow(node, pragma[only_bind_into](config)) and
+ (
+ // A sink that is also a source ...
+ sourceNode(node, config)
+ or
+ // ... or a sink that can be reached from a source
+ exists(PathNodeMid mid |
+ pathStep(mid, node, _, _, TAccessPathNil(_)) and
+ pragma[only_bind_into](config) = mid.getConfiguration()
+ )
+ )
+ }
+
+/**
+ * A list of `TypedContent`s followed by a `DataFlowType`. If data flows from a
+ * source to a given node with a given `AccessPath`, this indicates the sequence
+ * of dereference operations needed to get from the value in the node to the
+ * tracked object. The final type indicates the type of the tracked object.
+ */
+abstract private class AccessPath extends TAccessPath {
+ /** Gets the head of this access path, if any. */
+ abstract TypedContent getHead();
+
+ /** Gets the tail of this access path, if any. */
+ abstract AccessPath getTail();
+
+ /** Gets the front of this access path. */
+ abstract AccessPathFront getFront();
+
+ /** Gets the approximation of this access path. */
+ abstract AccessPathApprox getApprox();
+
+ /** Gets the length of this access path. */
+ abstract int length();
+
+ /** Gets a textual representation of this access path. */
+ abstract string toString();
+
+ /** Gets the access path obtained by popping `tc` from this access path, if any. */
+ final AccessPath pop(TypedContent tc) {
+ result = this.getTail() and
+ tc = this.getHead()
+ }
+
+ /** Gets the access path obtained by pushing `tc` onto this access path. */
+ final AccessPath push(TypedContent tc) { this = result.pop(tc) }
+}
+
+private class AccessPathNil extends AccessPath, TAccessPathNil {
+ private DataFlowType t;
+
+ AccessPathNil() { this = TAccessPathNil(t) }
+
+ DataFlowType getType() { result = t }
+
+ override TypedContent getHead() { none() }
+
+ override AccessPath getTail() { none() }
+
+ override AccessPathFrontNil getFront() { result = TFrontNil(t) }
+
+ override AccessPathApproxNil getApprox() { result = TNil(t) }
+
+ override int length() { result = 0 }
+
+ override string toString() { result = concat(": " + ppReprType(t)) }
+}
+
+private class AccessPathCons extends AccessPath, TAccessPathCons {
+ private TypedContent head;
+ private AccessPath tail;
+
+ AccessPathCons() { this = TAccessPathCons(head, tail) }
+
+ override TypedContent getHead() { result = head }
+
+ override AccessPath getTail() { result = tail }
+
+ override AccessPathFrontHead getFront() { result = TFrontHead(head) }
+
+ override AccessPathApproxCons getApprox() {
+ result = TConsNil(head, tail.(AccessPathNil).getType())
+ or
+ result = TConsCons(head, tail.getHead(), this.length())
+ or
+ result = TCons1(head, this.length())
+ }
+
+ override int length() { result = 1 + tail.length() }
+
+ private string toStringImpl(boolean needsSuffix) {
+ exists(DataFlowType t |
+ tail = TAccessPathNil(t) and
+ needsSuffix = false and
+ result = head.toString() + "]" + concat(" : " + ppReprType(t))
+ )
+ or
+ result = head + ", " + tail.(AccessPathCons).toStringImpl(needsSuffix)
+ or
+ exists(TypedContent tc2, TypedContent tc3, int len | tail = TAccessPathCons2(tc2, tc3, len) |
+ result = head + ", " + tc2 + ", " + tc3 + ", ... (" and len > 2 and needsSuffix = true
+ or
+ result = head + ", " + tc2 + ", " + tc3 + "]" and len = 2 and needsSuffix = false
+ )
+ or
+ exists(TypedContent tc2, int len | tail = TAccessPathCons1(tc2, len) |
+ result = head + ", " + tc2 + ", ... (" and len > 1 and needsSuffix = true
+ or
+ result = head + ", " + tc2 + "]" and len = 1 and needsSuffix = false
+ )
+ }
+
+ override string toString() {
+ result = "[" + this.toStringImpl(true) + length().toString() + ")]"
+ or
+ result = "[" + this.toStringImpl(false)
+ }
+}
+
+private class AccessPathCons2 extends AccessPath, TAccessPathCons2 {
+ private TypedContent head1;
+ private TypedContent head2;
+ private int len;
+
+ AccessPathCons2() { this = TAccessPathCons2(head1, head2, len) }
+
+ override TypedContent getHead() { result = head1 }
+
+ override AccessPath getTail() {
+ Stage4::consCand(head1, result.getApprox(), _) and
+ result.getHead() = head2 and
+ result.length() = len - 1
+ }
+
+ override AccessPathFrontHead getFront() { result = TFrontHead(head1) }
+
+ override AccessPathApproxCons getApprox() {
+ result = TConsCons(head1, head2, len) or
+ result = TCons1(head1, len)
+ }
+
+ override int length() { result = len }
+
+ override string toString() {
+ if len = 2
+ then result = "[" + head1.toString() + ", " + head2.toString() + "]"
+ else
+ result = "[" + head1.toString() + ", " + head2.toString() + ", ... (" + len.toString() + ")]"
+ }
+}
+
+private class AccessPathCons1 extends AccessPath, TAccessPathCons1 {
+ private TypedContent head;
+ private int len;
+
+ AccessPathCons1() { this = TAccessPathCons1(head, len) }
+
+ override TypedContent getHead() { result = head }
+
+ override AccessPath getTail() {
+ Stage4::consCand(head, result.getApprox(), _) and result.length() = len - 1
+ }
+
+ override AccessPathFrontHead getFront() { result = TFrontHead(head) }
+
+ override AccessPathApproxCons getApprox() { result = TCons1(head, len) }
+
+ override int length() { result = len }
+
+ override string toString() {
+ if len = 1
+ then result = "[" + head.toString() + "]"
+ else result = "[" + head.toString() + ", ... (" + len.toString() + ")]"
+ }
+}
+
+/**
+ * A `Node` augmented with a call context (except for sinks), an access path, and a configuration.
+ * Only those `PathNode`s that are reachable from a source are generated.
+ */
+class PathNode extends TPathNode {
+ /** Gets a textual representation of this element. */
+ string toString() { none() }
+
+ /**
+ * Gets a textual representation of this element, including a textual
+ * representation of the call context.
+ */
+ string toStringWithContext() { none() }
+
+ /**
+ * Holds if this element is at the specified location.
+ * The location spans column `startcolumn` of line `startline` to
+ * column `endcolumn` of line `endline` in file `filepath`.
+ * For more information, see
+ * [Locations](https://codeql.github.com/docs/writing-codeql-queries/providing-locations-in-codeql-queries/).
+ */
+ predicate hasLocationInfo(
+ string filepath, int startline, int startcolumn, int endline, int endcolumn
+ ) {
+ none()
+ }
+
+ /** Gets the underlying `Node`. */
+ final Node getNode() { this.(PathNodeImpl).getNodeEx().projectToNode() = result }
+
+ /** Gets the associated configuration. */
+ Configuration getConfiguration() { none() }
+
+ private PathNode getASuccessorIfHidden() {
+ this.(PathNodeImpl).isHidden() and
+ result = this.(PathNodeImpl).getASuccessorImpl()
+ }
+
+ /** Gets a successor of this node, if any. */
+ final PathNode getASuccessor() {
+ result = this.(PathNodeImpl).getASuccessorImpl().getASuccessorIfHidden*() and
+ not this.(PathNodeImpl).isHidden() and
+ not result.(PathNodeImpl).isHidden()
+ }
+
+ /** Holds if this node is a source. */
+ predicate isSource() { none() }
+}
+
+abstract private class PathNodeImpl extends PathNode {
+ abstract PathNode getASuccessorImpl();
+
+ abstract NodeEx getNodeEx();
+
+ predicate isHidden() {
+ hiddenNode(this.getNodeEx().asNode()) and
+ not this.isSource() and
+ not this instanceof PathNodeSink
+ or
+ this.getNodeEx() instanceof TNodeImplicitRead
+ }
+
+ private string ppAp() {
+ this instanceof PathNodeSink and result = ""
+ or
+ exists(string s | s = this.(PathNodeMid).getAp().toString() |
+ if s = "" then result = "" else result = " " + s
+ )
+ }
+
+ private string ppCtx() {
+ this instanceof PathNodeSink and result = ""
+ or
+ result = " <" + this.(PathNodeMid).getCallContext().toString() + ">"
+ }
+
+ override string toString() { result = this.getNodeEx().toString() + ppAp() }
+
+ override string toStringWithContext() { result = this.getNodeEx().toString() + ppAp() + ppCtx() }
+
+ override predicate hasLocationInfo(
+ string filepath, int startline, int startcolumn, int endline, int endcolumn
+ ) {
+ this.getNodeEx().hasLocationInfo(filepath, startline, startcolumn, endline, endcolumn)
+ }
+}
+
+/** Holds if `n` can reach a sink. */
+private predicate directReach(PathNode n) {
+ n instanceof PathNodeSink or directReach(n.getASuccessor())
+}
+
+/** Holds if `n` can reach a sink or is used in a subpath. */
+private predicate reach(PathNode n) { directReach(n) or Subpaths::retReach(n) }
+
+/** Holds if `n1.getASuccessor() = n2` and `n2` can reach a sink. */
+private predicate pathSucc(PathNode n1, PathNode n2) { n1.getASuccessor() = n2 and directReach(n2) }
+
+private predicate pathSuccPlus(PathNode n1, PathNode n2) = fastTC(pathSucc/2)(n1, n2)
+
+/**
+ * Provides the query predicates needed to include a graph in a path-problem query.
+ */
+module PathGraph {
+ /** Holds if `(a,b)` is an edge in the graph of data flow path explanations. */
+ query predicate edges(PathNode a, PathNode b) { a.getASuccessor() = b and reach(b) }
+
+ /** Holds if `n` is a node in the graph of data flow path explanations. */
+ query predicate nodes(PathNode n, string key, string val) {
+ reach(n) and key = "semmle.label" and val = n.toString()
+ }
+
+ query predicate subpaths = Subpaths::subpaths/4;
+}
+
+/**
+ * An intermediate flow graph node. This is a triple consisting of a `Node`,
+ * a `CallContext`, and a `Configuration`.
+ */
+private class PathNodeMid extends PathNodeImpl, TPathNodeMid {
+ NodeEx node;
+ CallContext cc;
+ SummaryCtx sc;
+ AccessPath ap;
+ Configuration config;
+
+ PathNodeMid() { this = TPathNodeMid(node, cc, sc, ap, config) }
+
+ override NodeEx getNodeEx() { result = node }
+
+ CallContext getCallContext() { result = cc }
+
+ SummaryCtx getSummaryCtx() { result = sc }
+
+ AccessPath getAp() { result = ap }
+
+ override Configuration getConfiguration() { result = config }
+
+ private PathNodeMid getSuccMid() {
+ pathStep(this, result.getNodeEx(), result.getCallContext(), result.getSummaryCtx(),
+ result.getAp()) and
+ result.getConfiguration() = unbindConf(this.getConfiguration())
+ }
+
+ override PathNodeImpl getASuccessorImpl() {
+ // an intermediate step to another intermediate node
+ result = getSuccMid()
+ or
+ // a final step to a sink via zero steps means we merge the last two steps to prevent trivial-looking edges
+ exists(PathNodeMid mid, PathNodeSink sink |
+ mid = getSuccMid() and
+ mid.getNodeEx() = sink.getNodeEx() and
+ mid.getAp() instanceof AccessPathNil and
+ sink.getConfiguration() = unbindConf(mid.getConfiguration()) and
+ result = sink
+ )
+ }
+
+ override predicate isSource() {
+ sourceNode(node, config) and
+ cc instanceof CallContextAny and
+ sc instanceof SummaryCtxNone and
+ ap instanceof AccessPathNil
+ }
+}
+
+/**
+ * A flow graph node corresponding to a sink. This is disjoint from the
+ * intermediate nodes in order to uniquely correspond to a given sink by
+ * excluding the `CallContext`.
+ */
+private class PathNodeSink extends PathNodeImpl, TPathNodeSink {
+ NodeEx node;
+ Configuration config;
+
+ PathNodeSink() { this = TPathNodeSink(node, config) }
+
+ override NodeEx getNodeEx() { result = node }
+
+ override Configuration getConfiguration() { result = config }
+
+ override PathNode getASuccessorImpl() { none() }
+
+ override predicate isSource() { sourceNode(node, config) }
+}
+
+/**
+ * Holds if data may flow from `mid` to `node`. The last step in or out of
+ * a callable is recorded by `cc`.
+ */
+private predicate pathStep(
+ PathNodeMid mid, NodeEx node, CallContext cc, SummaryCtx sc, AccessPath ap
+) {
+ exists(AccessPath ap0, NodeEx midnode, Configuration conf, LocalCallContext localCC |
+ midnode = mid.getNodeEx() and
+ conf = mid.getConfiguration() and
+ cc = mid.getCallContext() and
+ sc = mid.getSummaryCtx() and
+ localCC =
+ getLocalCallContext(pragma[only_bind_into](pragma[only_bind_out](cc)),
+ midnode.getEnclosingCallable()) and
+ ap0 = mid.getAp()
+ |
+ localFlowBigStep(midnode, node, true, _, conf, localCC) and
+ ap = ap0
+ or
+ localFlowBigStep(midnode, node, false, ap.getFront(), conf, localCC) and
+ ap0 instanceof AccessPathNil
+ )
+ or
+ jumpStep(mid.getNodeEx(), node, mid.getConfiguration()) and
+ cc instanceof CallContextAny and
+ sc instanceof SummaryCtxNone and
+ ap = mid.getAp()
+ or
+ additionalJumpStep(mid.getNodeEx(), node, mid.getConfiguration()) and
+ cc instanceof CallContextAny and
+ sc instanceof SummaryCtxNone and
+ mid.getAp() instanceof AccessPathNil and
+ ap = TAccessPathNil(node.getDataFlowType())
+ or
+ exists(TypedContent tc | pathStoreStep(mid, node, ap.pop(tc), tc, cc)) and
+ sc = mid.getSummaryCtx()
+ or
+ exists(TypedContent tc | pathReadStep(mid, node, ap.push(tc), tc, cc)) and
+ sc = mid.getSummaryCtx()
+ or
+ pathIntoCallable(mid, node, _, cc, sc, _) and ap = mid.getAp()
+ or
+ pathOutOfCallable(mid, node, cc) and ap = mid.getAp() and sc instanceof SummaryCtxNone
+ or
+ pathThroughCallable(mid, node, cc, ap) and sc = mid.getSummaryCtx()
+}
+
+pragma[nomagic]
+private predicate pathReadStep(
+ PathNodeMid mid, NodeEx node, AccessPath ap0, TypedContent tc, CallContext cc
+) {
+ ap0 = mid.getAp() and
+ tc = ap0.getHead() and
+ Stage4::readStepCand(mid.getNodeEx(), tc.getContent(), node, mid.getConfiguration()) and
+ cc = mid.getCallContext()
+}
+
+pragma[nomagic]
+private predicate pathStoreStep(
+ PathNodeMid mid, NodeEx node, AccessPath ap0, TypedContent tc, CallContext cc
+) {
+ ap0 = mid.getAp() and
+ Stage4::storeStepCand(mid.getNodeEx(), _, tc, node, _, mid.getConfiguration()) and
+ cc = mid.getCallContext()
+}
+
+private predicate pathOutOfCallable0(
+ PathNodeMid mid, ReturnPosition pos, CallContext innercc, AccessPathApprox apa,
+ Configuration config
+) {
+ pos = mid.getNodeEx().(RetNodeEx).getReturnPosition() and
+ innercc = mid.getCallContext() and
+ innercc instanceof CallContextNoCall and
+ apa = mid.getAp().getApprox() and
+ config = mid.getConfiguration()
+}
+
+pragma[nomagic]
+private predicate pathOutOfCallable1(
+ PathNodeMid mid, DataFlowCall call, ReturnKindExt kind, CallContext cc, AccessPathApprox apa,
+ Configuration config
+) {
+ exists(ReturnPosition pos, DataFlowCallable c, CallContext innercc |
+ pathOutOfCallable0(mid, pos, innercc, apa, config) and
+ c = pos.getCallable() and
+ kind = pos.getKind() and
+ resolveReturn(innercc, c, call)
+ |
+ if reducedViableImplInReturn(c, call) then cc = TReturn(c, call) else cc = TAnyCallContext()
+ )
+}
+
+pragma[noinline]
+private NodeEx getAnOutNodeFlow(
+ ReturnKindExt kind, DataFlowCall call, AccessPathApprox apa, Configuration config
+) {
+ result.asNode() = kind.getAnOutNode(call) and
+ Stage4::revFlow(result, _, _, apa, config)
+}
+
+/**
+ * Holds if data may flow from `mid` to `out`. The last step of this path
+ * is a return from a callable and is recorded by `cc`, if needed.
+ */
+pragma[noinline]
+private predicate pathOutOfCallable(PathNodeMid mid, NodeEx out, CallContext cc) {
+ exists(ReturnKindExt kind, DataFlowCall call, AccessPathApprox apa, Configuration config |
+ pathOutOfCallable1(mid, call, kind, cc, apa, config) and
+ out = getAnOutNodeFlow(kind, call, apa, config)
+ )
+}
+
+/**
+ * Holds if data may flow from `mid` to the `i`th argument of `call` in `cc`.
+ */
+pragma[noinline]
+private predicate pathIntoArg(
+ PathNodeMid mid, int i, CallContext cc, DataFlowCall call, AccessPath ap, AccessPathApprox apa
+) {
+ exists(ArgNode arg |
+ arg = mid.getNodeEx().asNode() and
+ cc = mid.getCallContext() and
+ arg.argumentOf(call, i) and
+ ap = mid.getAp() and
+ apa = ap.getApprox()
+ )
+}
+
+pragma[noinline]
+private predicate parameterCand(
+ DataFlowCallable callable, int i, AccessPathApprox apa, Configuration config
+) {
+ exists(ParamNodeEx p |
+ Stage4::revFlow(p, _, _, apa, config) and
+ p.isParameterOf(callable, i)
+ )
+}
+
+pragma[nomagic]
+private predicate pathIntoCallable0(
+ PathNodeMid mid, DataFlowCallable callable, int i, CallContext outercc, DataFlowCall call,
+ AccessPath ap
+) {
+ exists(AccessPathApprox apa |
+ pathIntoArg(mid, i, outercc, call, ap, apa) and
+ callable = resolveCall(call, outercc) and
+ parameterCand(callable, any(int j | j <= i and j >= i), apa, mid.getConfiguration())
+ )
+}
+
+/**
+ * Holds if data may flow from `mid` to `p` through `call`. The contexts
+ * before and after entering the callable are `outercc` and `innercc`,
+ * respectively.
+ */
+private predicate pathIntoCallable(
+ PathNodeMid mid, ParamNodeEx p, CallContext outercc, CallContextCall innercc, SummaryCtx sc,
+ DataFlowCall call
+) {
+ exists(int i, DataFlowCallable callable, AccessPath ap |
+ pathIntoCallable0(mid, callable, i, outercc, call, ap) and
+ p.isParameterOf(callable, i) and
+ (
+ sc = TSummaryCtxSome(p, ap)
+ or
+ not exists(TSummaryCtxSome(p, ap)) and
+ sc = TSummaryCtxNone()
+ )
+ |
+ if recordDataFlowCallSite(call, callable)
+ then innercc = TSpecificCall(call)
+ else innercc = TSomeCall()
+ )
+}
+
+/** Holds if data may flow from a parameter given by `sc` to a return of kind `kind`. */
+pragma[nomagic]
+private predicate paramFlowsThrough(
+ ReturnKindExt kind, CallContextCall cc, SummaryCtxSome sc, AccessPath ap, AccessPathApprox apa,
+ Configuration config
+) {
+ exists(PathNodeMid mid, RetNodeEx ret, int pos |
+ mid.getNodeEx() = ret and
+ kind = ret.getKind() and
+ cc = mid.getCallContext() and
+ sc = mid.getSummaryCtx() and
+ config = mid.getConfiguration() and
+ ap = mid.getAp() and
+ apa = ap.getApprox() and
+ pos = sc.getParameterPos() and
+ not kind.(ParamUpdateReturnKind).getPosition() = pos
+ )
+}
+
+pragma[nomagic]
+private predicate pathThroughCallable0(
+ DataFlowCall call, PathNodeMid mid, ReturnKindExt kind, CallContext cc, AccessPath ap,
+ AccessPathApprox apa
+) {
+ exists(CallContext innercc, SummaryCtx sc |
+ pathIntoCallable(mid, _, cc, innercc, sc, call) and
+ paramFlowsThrough(kind, innercc, sc, ap, apa, unbindConf(mid.getConfiguration()))
+ )
+}
+
+/**
+ * Holds if data may flow from `mid` through a callable to the node `out`.
+ * The context `cc` is restored to its value prior to entering the callable.
+ */
+pragma[noinline]
+private predicate pathThroughCallable(PathNodeMid mid, NodeEx out, CallContext cc, AccessPath ap) {
+ exists(DataFlowCall call, ReturnKindExt kind, AccessPathApprox apa |
+ pathThroughCallable0(call, mid, kind, cc, ap, apa) and
+ out = getAnOutNodeFlow(kind, call, apa, unbindConf(mid.getConfiguration()))
+ )
+}
+
+private module Subpaths {
+ /**
+ * Holds if `(arg, par, ret, out)` forms a subpath-tuple and `ret` is determined by
+ * `kind`, `sc`, `apout`, and `innercc`.
+ */
+ pragma[nomagic]
+ private predicate subpaths01(
+ PathNode arg, ParamNodeEx par, SummaryCtxSome sc, CallContext innercc, ReturnKindExt kind,
+ NodeEx out, AccessPath apout
+ ) {
+ pathThroughCallable(arg, out, _, pragma[only_bind_into](apout)) and
+ pathIntoCallable(arg, par, _, innercc, sc, _) and
+ paramFlowsThrough(kind, innercc, sc, pragma[only_bind_into](apout), _,
+ unbindConf(arg.getConfiguration()))
+ }
+
+ /**
+ * Holds if `(arg, par, ret, out)` forms a subpath-tuple and `ret` is determined by
+ * `kind`, `sc`, `apout`, and `innercc`.
+ */
+ pragma[nomagic]
+ private predicate subpaths02(
+ PathNode arg, ParamNodeEx par, SummaryCtxSome sc, CallContext innercc, ReturnKindExt kind,
+ NodeEx out, AccessPath apout
+ ) {
+ subpaths01(arg, par, sc, innercc, kind, out, apout) and
+ out.asNode() = kind.getAnOutNode(_)
+ }
+
+ pragma[nomagic]
+ private Configuration getPathNodeConf(PathNode n) { result = n.getConfiguration() }
+
+ /**
+ * Holds if `(arg, par, ret, out)` forms a subpath-tuple.
+ */
+ pragma[nomagic]
+ private predicate subpaths03(
+ PathNode arg, ParamNodeEx par, PathNodeMid ret, NodeEx out, AccessPath apout
+ ) {
+ exists(SummaryCtxSome sc, CallContext innercc, ReturnKindExt kind, RetNodeEx retnode |
+ subpaths02(arg, par, sc, innercc, kind, out, apout) and
+ ret.getNodeEx() = retnode and
+ kind = retnode.getKind() and
+ innercc = ret.getCallContext() and
+ sc = ret.getSummaryCtx() and
+ ret.getConfiguration() = unbindConf(getPathNodeConf(arg)) and
+ apout = ret.getAp() and
+ not ret.isHidden()
+ )
+ }
+
+ /**
+ * Holds if `(arg, par, ret, out)` forms a subpath-tuple, that is, flow through
+ * a subpath between `par` and `ret` with the connecting edges `arg -> par` and
+ * `ret -> out` is summarized as the edge `arg -> out`.
+ */
+ predicate subpaths(PathNode arg, PathNodeImpl par, PathNodeMid ret, PathNodeMid out) {
+ exists(ParamNodeEx p, NodeEx o, AccessPath apout |
+ pragma[only_bind_into](arg).getASuccessor() = par and
+ pragma[only_bind_into](arg).getASuccessor() = out and
+ subpaths03(arg, p, ret, o, apout) and
+ par.getNodeEx() = p and
+ out.getNodeEx() = o and
+ out.getAp() = apout
+ )
+ }
+
+ /**
+ * Holds if `n` can reach a return node in a summarized subpath.
+ */
+ predicate retReach(PathNode n) {
+ subpaths(_, _, n, _)
+ or
+ exists(PathNode mid |
+ retReach(mid) and
+ n.getASuccessor() = mid and
+ not subpaths(_, mid, _, _)
+ )
+ }
+}
+
+/**
+ * Holds if data can flow (inter-procedurally) from `source` to `sink`.
+ *
+ * Will only have results if `configuration` has non-empty sources and
+ * sinks.
+ */
+private predicate flowsTo(
+ PathNode flowsource, PathNodeSink flowsink, Node source, Node sink, Configuration configuration
+) {
+ flowsource.isSource() and
+ flowsource.getConfiguration() = configuration and
+ flowsource.(PathNodeImpl).getNodeEx().asNode() = source and
+ (flowsource = flowsink or pathSuccPlus(flowsource, flowsink)) and
+ flowsink.getNodeEx().asNode() = sink
+}
+
+/**
+ * Holds if data can flow (inter-procedurally) from `source` to `sink`.
+ *
+ * Will only have results if `configuration` has non-empty sources and
+ * sinks.
+ */
+predicate flowsTo(Node source, Node sink, Configuration configuration) {
+ flowsTo(_, _, source, sink, configuration)
+}
+
+private predicate finalStats(boolean fwd, int nodes, int fields, int conscand, int tuples) {
+ fwd = true and
+ nodes = count(NodeEx n0 | exists(PathNodeImpl pn | pn.getNodeEx() = n0)) and
+ fields = count(TypedContent f0 | exists(PathNodeMid pn | pn.getAp().getHead() = f0)) and
+ conscand = count(AccessPath ap | exists(PathNodeMid pn | pn.getAp() = ap)) and
+ tuples = count(PathNode pn)
+ or
+ fwd = false and
+ nodes = count(NodeEx n0 | exists(PathNodeImpl pn | pn.getNodeEx() = n0 and reach(pn))) and
+ fields = count(TypedContent f0 | exists(PathNodeMid pn | pn.getAp().getHead() = f0 and reach(pn))) and
+ conscand = count(AccessPath ap | exists(PathNodeMid pn | pn.getAp() = ap and reach(pn))) and
+ tuples = count(PathNode pn | reach(pn))
+}
+
+/**
+ * INTERNAL: Only for debugging.
+ *
+ * Calculates per-stage metrics for data flow.
+ */
+predicate stageStats(
+ int n, string stage, int nodes, int fields, int conscand, int tuples, Configuration config
+) {
+ stage = "1 Fwd" and n = 10 and Stage1::stats(true, nodes, fields, conscand, tuples, config)
+ or
+ stage = "1 Rev" and n = 15 and Stage1::stats(false, nodes, fields, conscand, tuples, config)
+ or
+ stage = "2 Fwd" and n = 20 and Stage2::stats(true, nodes, fields, conscand, tuples, config)
+ or
+ stage = "2 Rev" and n = 25 and Stage2::stats(false, nodes, fields, conscand, tuples, config)
+ or
+ stage = "3 Fwd" and n = 30 and Stage3::stats(true, nodes, fields, conscand, tuples, config)
+ or
+ stage = "3 Rev" and n = 35 and Stage3::stats(false, nodes, fields, conscand, tuples, config)
+ or
+ stage = "4 Fwd" and n = 40 and Stage4::stats(true, nodes, fields, conscand, tuples, config)
+ or
+ stage = "4 Rev" and n = 45 and Stage4::stats(false, nodes, fields, conscand, tuples, config)
+ or
+ stage = "5 Fwd" and n = 50 and finalStats(true, nodes, fields, conscand, tuples)
+ or
+ stage = "5 Rev" and n = 55 and finalStats(false, nodes, fields, conscand, tuples)
+}
+
+private module FlowExploration {
+ private predicate callableStep(DataFlowCallable c1, DataFlowCallable c2, Configuration config) {
+ exists(NodeEx node1, NodeEx node2 |
+ jumpStep(node1, node2, config)
+ or
+ additionalJumpStep(node1, node2, config)
+ or
+ // flow into callable
+ viableParamArgEx(_, node2, node1)
+ or
+ // flow out of a callable
+ viableReturnPosOutEx(_, node1.(RetNodeEx).getReturnPosition(), node2)
+ |
+ c1 = node1.getEnclosingCallable() and
+ c2 = node2.getEnclosingCallable() and
+ c1 != c2
+ )
+ }
+
+ private predicate interestingCallableSrc(DataFlowCallable c, Configuration config) {
+ exists(Node n | config.isSource(n) and c = getNodeEnclosingCallable(n))
+ or
+ exists(DataFlowCallable mid |
+ interestingCallableSrc(mid, config) and callableStep(mid, c, config)
+ )
+ }
+
+ private predicate interestingCallableSink(DataFlowCallable c, Configuration config) {
+ exists(Node n | config.isSink(n) and c = getNodeEnclosingCallable(n))
+ or
+ exists(DataFlowCallable mid |
+ interestingCallableSink(mid, config) and callableStep(c, mid, config)
+ )
+ }
+
+ private newtype TCallableExt =
+ TCallable(DataFlowCallable c, Configuration config) {
+ interestingCallableSrc(c, config) or
+ interestingCallableSink(c, config)
+ } or
+ TCallableSrc() or
+ TCallableSink()
+
+ private predicate callableExtSrc(TCallableSrc src) { any() }
+
+ private predicate callableExtSink(TCallableSink sink) { any() }
+
+ private predicate callableExtStepFwd(TCallableExt ce1, TCallableExt ce2) {
+ exists(DataFlowCallable c1, DataFlowCallable c2, Configuration config |
+ callableStep(c1, c2, config) and
+ ce1 = TCallable(c1, pragma[only_bind_into](config)) and
+ ce2 = TCallable(c2, pragma[only_bind_into](config))
+ )
+ or
+ exists(Node n, Configuration config |
+ ce1 = TCallableSrc() and
+ config.isSource(n) and
+ ce2 = TCallable(getNodeEnclosingCallable(n), config)
+ )
+ or
+ exists(Node n, Configuration config |
+ ce2 = TCallableSink() and
+ config.isSink(n) and
+ ce1 = TCallable(getNodeEnclosingCallable(n), config)
+ )
+ }
+
+ private predicate callableExtStepRev(TCallableExt ce1, TCallableExt ce2) {
+ callableExtStepFwd(ce2, ce1)
+ }
+
+ private int distSrcExt(TCallableExt c) =
+ shortestDistances(callableExtSrc/1, callableExtStepFwd/2)(_, c, result)
+
+ private int distSinkExt(TCallableExt c) =
+ shortestDistances(callableExtSink/1, callableExtStepRev/2)(_, c, result)
+
+ private int distSrc(DataFlowCallable c, Configuration config) {
+ result = distSrcExt(TCallable(c, config)) - 1
+ }
+
+ private int distSink(DataFlowCallable c, Configuration config) {
+ result = distSinkExt(TCallable(c, config)) - 1
+ }
+
+ private newtype TPartialAccessPath =
+ TPartialNil(DataFlowType t) or
+ TPartialCons(TypedContent tc, int len) { len in [1 .. accessPathLimit()] }
+
+ /**
+ * Conceptually a list of `TypedContent`s followed by a `Type`, but only the first
+ * element of the list and its length are tracked. If data flows from a source to
+ * a given node with a given `AccessPath`, this indicates the sequence of
+ * dereference operations needed to get from the value in the node to the
+ * tracked object. The final type indicates the type of the tracked object.
+ */
+ private class PartialAccessPath extends TPartialAccessPath {
+ abstract string toString();
+
+ TypedContent getHead() { this = TPartialCons(result, _) }
+
+ int len() {
+ this = TPartialNil(_) and result = 0
+ or
+ this = TPartialCons(_, result)
+ }
+
+ DataFlowType getType() {
+ this = TPartialNil(result)
+ or
+ exists(TypedContent head | this = TPartialCons(head, _) | result = head.getContainerType())
+ }
+ }
+
+ private class PartialAccessPathNil extends PartialAccessPath, TPartialNil {
+ override string toString() {
+ exists(DataFlowType t | this = TPartialNil(t) | result = concat(": " + ppReprType(t)))
+ }
+ }
+
+ private class PartialAccessPathCons extends PartialAccessPath, TPartialCons {
+ override string toString() {
+ exists(TypedContent tc, int len | this = TPartialCons(tc, len) |
+ if len = 1
+ then result = "[" + tc.toString() + "]"
+ else result = "[" + tc.toString() + ", ... (" + len.toString() + ")]"
+ )
+ }
+ }
+
+ private newtype TRevPartialAccessPath =
+ TRevPartialNil() or
+ TRevPartialCons(Content c, int len) { len in [1 .. accessPathLimit()] }
+
+ /**
+ * Conceptually a list of `Content`s, but only the first
+ * element of the list and its length are tracked.
+ */
+ private class RevPartialAccessPath extends TRevPartialAccessPath {
+ abstract string toString();
+
+ Content getHead() { this = TRevPartialCons(result, _) }
+
+ int len() {
+ this = TRevPartialNil() and result = 0
+ or
+ this = TRevPartialCons(_, result)
+ }
+ }
+
+ private class RevPartialAccessPathNil extends RevPartialAccessPath, TRevPartialNil {
+ override string toString() { result = "" }
+ }
+
+ private class RevPartialAccessPathCons extends RevPartialAccessPath, TRevPartialCons {
+ override string toString() {
+ exists(Content c, int len | this = TRevPartialCons(c, len) |
+ if len = 1
+ then result = "[" + c.toString() + "]"
+ else result = "[" + c.toString() + ", ... (" + len.toString() + ")]"
+ )
+ }
+ }
+
+ private newtype TSummaryCtx1 =
+ TSummaryCtx1None() or
+ TSummaryCtx1Param(ParamNodeEx p)
+
+ private newtype TSummaryCtx2 =
+ TSummaryCtx2None() or
+ TSummaryCtx2Some(PartialAccessPath ap)
+
+ private newtype TRevSummaryCtx1 =
+ TRevSummaryCtx1None() or
+ TRevSummaryCtx1Some(ReturnPosition pos)
+
+ private newtype TRevSummaryCtx2 =
+ TRevSummaryCtx2None() or
+ TRevSummaryCtx2Some(RevPartialAccessPath ap)
+
+ private newtype TPartialPathNode =
+ TPartialPathNodeFwd(
+ NodeEx node, CallContext cc, TSummaryCtx1 sc1, TSummaryCtx2 sc2, PartialAccessPath ap,
+ Configuration config
+ ) {
+ sourceNode(node, config) and
+ cc instanceof CallContextAny and
+ sc1 = TSummaryCtx1None() and
+ sc2 = TSummaryCtx2None() and
+ ap = TPartialNil(node.getDataFlowType()) and
+ not fullBarrier(node, config) and
+ exists(config.explorationLimit())
+ or
+ partialPathNodeMk0(node, cc, sc1, sc2, ap, config) and
+ distSrc(node.getEnclosingCallable(), config) <= config.explorationLimit()
+ } or
+ TPartialPathNodeRev(
+ NodeEx node, TRevSummaryCtx1 sc1, TRevSummaryCtx2 sc2, RevPartialAccessPath ap,
+ Configuration config
+ ) {
+ sinkNode(node, config) and
+ sc1 = TRevSummaryCtx1None() and
+ sc2 = TRevSummaryCtx2None() and
+ ap = TRevPartialNil() and
+ not fullBarrier(node, config) and
+ exists(config.explorationLimit())
+ or
+ exists(PartialPathNodeRev mid |
+ revPartialPathStep(mid, node, sc1, sc2, ap, config) and
+ not clearsContentCached(node.asNode(), ap.getHead()) and
+ not fullBarrier(node, config) and
+ distSink(node.getEnclosingCallable(), config) <= config.explorationLimit()
+ )
+ }
+
+ pragma[nomagic]
+ private predicate partialPathNodeMk0(
+ NodeEx node, CallContext cc, TSummaryCtx1 sc1, TSummaryCtx2 sc2, PartialAccessPath ap,
+ Configuration config
+ ) {
+ exists(PartialPathNodeFwd mid |
+ partialPathStep(mid, node, cc, sc1, sc2, ap, config) and
+ not fullBarrier(node, config) and
+ not clearsContentCached(node.asNode(), ap.getHead().getContent()) and
+ if node.asNode() instanceof CastingNode
+ then compatibleTypes(node.getDataFlowType(), ap.getType())
+ else any()
+ )
+ }
+
+ /**
+ * A `Node` augmented with a call context, an access path, and a configuration.
+ */
+ class PartialPathNode extends TPartialPathNode {
+ /** Gets a textual representation of this element. */
+ string toString() { result = this.getNodeEx().toString() + this.ppAp() }
+
+ /**
+ * Gets a textual representation of this element, including a textual
+ * representation of the call context.
+ */
+ string toStringWithContext() {
+ result = this.getNodeEx().toString() + this.ppAp() + this.ppCtx()
+ }
+
+ /**
+ * Holds if this element is at the specified location.
+ * The location spans column `startcolumn` of line `startline` to
+ * column `endcolumn` of line `endline` in file `filepath`.
+ * For more information, see
+ * [Locations](https://codeql.github.com/docs/writing-codeql-queries/providing-locations-in-codeql-queries/).
+ */
+ predicate hasLocationInfo(
+ string filepath, int startline, int startcolumn, int endline, int endcolumn
+ ) {
+ this.getNodeEx().hasLocationInfo(filepath, startline, startcolumn, endline, endcolumn)
+ }
+
+ /** Gets the underlying `Node`. */
+ final Node getNode() { this.getNodeEx().projectToNode() = result }
+
+ private NodeEx getNodeEx() {
+ result = this.(PartialPathNodeFwd).getNodeEx() or
+ result = this.(PartialPathNodeRev).getNodeEx()
+ }
+
+ /** Gets the associated configuration. */
+ Configuration getConfiguration() { none() }
+
+ /** Gets a successor of this node, if any. */
+ PartialPathNode getASuccessor() { none() }
+
+ /**
+ * Gets the approximate distance to the nearest source measured in number
+ * of interprocedural steps.
+ */
+ int getSourceDistance() {
+ result = distSrc(this.getNodeEx().getEnclosingCallable(), this.getConfiguration())
+ }
+
+ /**
+ * Gets the approximate distance to the nearest sink measured in number
+ * of interprocedural steps.
+ */
+ int getSinkDistance() {
+ result = distSink(this.getNodeEx().getEnclosingCallable(), this.getConfiguration())
+ }
+
+ private string ppAp() {
+ exists(string s |
+ s = this.(PartialPathNodeFwd).getAp().toString() or
+ s = this.(PartialPathNodeRev).getAp().toString()
+ |
+ if s = "" then result = "" else result = " " + s
+ )
+ }
+
+ private string ppCtx() {
+ result = " <" + this.(PartialPathNodeFwd).getCallContext().toString() + ">"
+ }
+
+ /** Holds if this is a source in a forward-flow path. */
+ predicate isFwdSource() { this.(PartialPathNodeFwd).isSource() }
+
+ /** Holds if this is a sink in a reverse-flow path. */
+ predicate isRevSink() { this.(PartialPathNodeRev).isSink() }
+ }
+
+ /**
+ * Provides the query predicates needed to include a graph in a path-problem query.
+ */
+ module PartialPathGraph {
+ /** Holds if `(a,b)` is an edge in the graph of data flow path explanations. */
+ query predicate edges(PartialPathNode a, PartialPathNode b) { a.getASuccessor() = b }
+ }
+
+ private class PartialPathNodeFwd extends PartialPathNode, TPartialPathNodeFwd {
+ NodeEx node;
+ CallContext cc;
+ TSummaryCtx1 sc1;
+ TSummaryCtx2 sc2;
+ PartialAccessPath ap;
+ Configuration config;
+
+ PartialPathNodeFwd() { this = TPartialPathNodeFwd(node, cc, sc1, sc2, ap, config) }
+
+ NodeEx getNodeEx() { result = node }
+
+ CallContext getCallContext() { result = cc }
+
+ TSummaryCtx1 getSummaryCtx1() { result = sc1 }
+
+ TSummaryCtx2 getSummaryCtx2() { result = sc2 }
+
+ PartialAccessPath getAp() { result = ap }
+
+ override Configuration getConfiguration() { result = config }
+
+ override PartialPathNodeFwd getASuccessor() {
+ partialPathStep(this, result.getNodeEx(), result.getCallContext(), result.getSummaryCtx1(),
+ result.getSummaryCtx2(), result.getAp(), result.getConfiguration())
+ }
+
+ predicate isSource() {
+ sourceNode(node, config) and
+ cc instanceof CallContextAny and
+ sc1 = TSummaryCtx1None() and
+ sc2 = TSummaryCtx2None() and
+ ap instanceof TPartialNil
+ }
+ }
+
+ private class PartialPathNodeRev extends PartialPathNode, TPartialPathNodeRev {
+ NodeEx node;
+ TRevSummaryCtx1 sc1;
+ TRevSummaryCtx2 sc2;
+ RevPartialAccessPath ap;
+ Configuration config;
+
+ PartialPathNodeRev() { this = TPartialPathNodeRev(node, sc1, sc2, ap, config) }
+
+ NodeEx getNodeEx() { result = node }
+
+ TRevSummaryCtx1 getSummaryCtx1() { result = sc1 }
+
+ TRevSummaryCtx2 getSummaryCtx2() { result = sc2 }
+
+ RevPartialAccessPath getAp() { result = ap }
+
+ override Configuration getConfiguration() { result = config }
+
+ override PartialPathNodeRev getASuccessor() {
+ revPartialPathStep(result, this.getNodeEx(), this.getSummaryCtx1(), this.getSummaryCtx2(),
+ this.getAp(), this.getConfiguration())
+ }
+
+ predicate isSink() {
+ sinkNode(node, config) and
+ sc1 = TRevSummaryCtx1None() and
+ sc2 = TRevSummaryCtx2None() and
+ ap = TRevPartialNil()
+ }
+ }
+
+ private predicate partialPathStep(
+ PartialPathNodeFwd mid, NodeEx node, CallContext cc, TSummaryCtx1 sc1, TSummaryCtx2 sc2,
+ PartialAccessPath ap, Configuration config
+ ) {
+ not isUnreachableInCallCached(node.asNode(), cc.(CallContextSpecificCall).getCall()) and
+ (
+ localFlowStep(mid.getNodeEx(), node, config) and
+ cc = mid.getCallContext() and
+ sc1 = mid.getSummaryCtx1() and
+ sc2 = mid.getSummaryCtx2() and
+ ap = mid.getAp() and
+ config = mid.getConfiguration()
+ or
+ additionalLocalFlowStep(mid.getNodeEx(), node, config) and
+ cc = mid.getCallContext() and
+ sc1 = mid.getSummaryCtx1() and
+ sc2 = mid.getSummaryCtx2() and
+ mid.getAp() instanceof PartialAccessPathNil and
+ ap = TPartialNil(node.getDataFlowType()) and
+ config = mid.getConfiguration()
+ )
+ or
+ jumpStep(mid.getNodeEx(), node, config) and
+ cc instanceof CallContextAny and
+ sc1 = TSummaryCtx1None() and
+ sc2 = TSummaryCtx2None() and
+ ap = mid.getAp() and
+ config = mid.getConfiguration()
+ or
+ additionalJumpStep(mid.getNodeEx(), node, config) and
+ cc instanceof CallContextAny and
+ sc1 = TSummaryCtx1None() and
+ sc2 = TSummaryCtx2None() and
+ mid.getAp() instanceof PartialAccessPathNil and
+ ap = TPartialNil(node.getDataFlowType()) and
+ config = mid.getConfiguration()
+ or
+ partialPathStoreStep(mid, _, _, node, ap) and
+ cc = mid.getCallContext() and
+ sc1 = mid.getSummaryCtx1() and
+ sc2 = mid.getSummaryCtx2() and
+ config = mid.getConfiguration()
+ or
+ exists(PartialAccessPath ap0, TypedContent tc |
+ partialPathReadStep(mid, ap0, tc, node, cc, config) and
+ sc1 = mid.getSummaryCtx1() and
+ sc2 = mid.getSummaryCtx2() and
+ apConsFwd(ap, tc, ap0, config)
+ )
+ or
+ partialPathIntoCallable(mid, node, _, cc, sc1, sc2, _, ap, config)
+ or
+ partialPathOutOfCallable(mid, node, cc, ap, config) and
+ sc1 = TSummaryCtx1None() and
+ sc2 = TSummaryCtx2None()
+ or
+ partialPathThroughCallable(mid, node, cc, ap, config) and
+ sc1 = mid.getSummaryCtx1() and
+ sc2 = mid.getSummaryCtx2()
+ }
+
+ bindingset[result, i]
+ private int unbindInt(int i) { i <= result and i >= result }
+
+ pragma[inline]
+ private predicate partialPathStoreStep(
+ PartialPathNodeFwd mid, PartialAccessPath ap1, TypedContent tc, NodeEx node,
+ PartialAccessPath ap2
+ ) {
+ exists(NodeEx midNode, DataFlowType contentType |
+ midNode = mid.getNodeEx() and
+ ap1 = mid.getAp() and
+ store(midNode, tc, node, contentType, mid.getConfiguration()) and
+ ap2.getHead() = tc and
+ ap2.len() = unbindInt(ap1.len() + 1) and
+ compatibleTypes(ap1.getType(), contentType)
+ )
+ }
+
+ pragma[nomagic]
+ private predicate apConsFwd(
+ PartialAccessPath ap1, TypedContent tc, PartialAccessPath ap2, Configuration config
+ ) {
+ exists(PartialPathNodeFwd mid |
+ partialPathStoreStep(mid, ap1, tc, _, ap2) and
+ config = mid.getConfiguration()
+ )
+ }
+
+ pragma[nomagic]
+ private predicate partialPathReadStep(
+ PartialPathNodeFwd mid, PartialAccessPath ap, TypedContent tc, NodeEx node, CallContext cc,
+ Configuration config
+ ) {
+ exists(NodeEx midNode |
+ midNode = mid.getNodeEx() and
+ ap = mid.getAp() and
+ read(midNode, tc.getContent(), node, pragma[only_bind_into](config)) and
+ ap.getHead() = tc and
+ pragma[only_bind_into](config) = mid.getConfiguration() and
+ cc = mid.getCallContext()
+ )
+ }
+
+ private predicate partialPathOutOfCallable0(
+ PartialPathNodeFwd mid, ReturnPosition pos, CallContext innercc, PartialAccessPath ap,
+ Configuration config
+ ) {
+ pos = mid.getNodeEx().(RetNodeEx).getReturnPosition() and
+ innercc = mid.getCallContext() and
+ innercc instanceof CallContextNoCall and
+ ap = mid.getAp() and
+ config = mid.getConfiguration()
+ }
+
+ pragma[nomagic]
+ private predicate partialPathOutOfCallable1(
+ PartialPathNodeFwd mid, DataFlowCall call, ReturnKindExt kind, CallContext cc,
+ PartialAccessPath ap, Configuration config
+ ) {
+ exists(ReturnPosition pos, DataFlowCallable c, CallContext innercc |
+ partialPathOutOfCallable0(mid, pos, innercc, ap, config) and
+ c = pos.getCallable() and
+ kind = pos.getKind() and
+ resolveReturn(innercc, c, call)
+ |
+ if reducedViableImplInReturn(c, call) then cc = TReturn(c, call) else cc = TAnyCallContext()
+ )
+ }
+
+ private predicate partialPathOutOfCallable(
+ PartialPathNodeFwd mid, NodeEx out, CallContext cc, PartialAccessPath ap, Configuration config
+ ) {
+ exists(ReturnKindExt kind, DataFlowCall call |
+ partialPathOutOfCallable1(mid, call, kind, cc, ap, config)
+ |
+ out.asNode() = kind.getAnOutNode(call)
+ )
+ }
+
+ pragma[noinline]
+ private predicate partialPathIntoArg(
+ PartialPathNodeFwd mid, int i, CallContext cc, DataFlowCall call, PartialAccessPath ap,
+ Configuration config
+ ) {
+ exists(ArgNode arg |
+ arg = mid.getNodeEx().asNode() and
+ cc = mid.getCallContext() and
+ arg.argumentOf(call, i) and
+ ap = mid.getAp() and
+ config = mid.getConfiguration()
+ )
+ }
+
+ pragma[nomagic]
+ private predicate partialPathIntoCallable0(
+ PartialPathNodeFwd mid, DataFlowCallable callable, int i, CallContext outercc,
+ DataFlowCall call, PartialAccessPath ap, Configuration config
+ ) {
+ partialPathIntoArg(mid, i, outercc, call, ap, config) and
+ callable = resolveCall(call, outercc)
+ }
+
+ private predicate partialPathIntoCallable(
+ PartialPathNodeFwd mid, ParamNodeEx p, CallContext outercc, CallContextCall innercc,
+ TSummaryCtx1 sc1, TSummaryCtx2 sc2, DataFlowCall call, PartialAccessPath ap,
+ Configuration config
+ ) {
+ exists(int i, DataFlowCallable callable |
+ partialPathIntoCallable0(mid, callable, i, outercc, call, ap, config) and
+ p.isParameterOf(callable, i) and
+ sc1 = TSummaryCtx1Param(p) and
+ sc2 = TSummaryCtx2Some(ap)
+ |
+ if recordDataFlowCallSite(call, callable)
+ then innercc = TSpecificCall(call)
+ else innercc = TSomeCall()
+ )
+ }
+
+ pragma[nomagic]
+ private predicate paramFlowsThroughInPartialPath(
+ ReturnKindExt kind, CallContextCall cc, TSummaryCtx1 sc1, TSummaryCtx2 sc2,
+ PartialAccessPath ap, Configuration config
+ ) {
+ exists(PartialPathNodeFwd mid, RetNodeEx ret |
+ mid.getNodeEx() = ret and
+ kind = ret.getKind() and
+ cc = mid.getCallContext() and
+ sc1 = mid.getSummaryCtx1() and
+ sc2 = mid.getSummaryCtx2() and
+ config = mid.getConfiguration() and
+ ap = mid.getAp()
+ )
+ }
+
+ pragma[noinline]
+ private predicate partialPathThroughCallable0(
+ DataFlowCall call, PartialPathNodeFwd mid, ReturnKindExt kind, CallContext cc,
+ PartialAccessPath ap, Configuration config
+ ) {
+ exists(CallContext innercc, TSummaryCtx1 sc1, TSummaryCtx2 sc2 |
+ partialPathIntoCallable(mid, _, cc, innercc, sc1, sc2, call, _, config) and
+ paramFlowsThroughInPartialPath(kind, innercc, sc1, sc2, ap, config)
+ )
+ }
+
+ private predicate partialPathThroughCallable(
+ PartialPathNodeFwd mid, NodeEx out, CallContext cc, PartialAccessPath ap, Configuration config
+ ) {
+ exists(DataFlowCall call, ReturnKindExt kind |
+ partialPathThroughCallable0(call, mid, kind, cc, ap, config) and
+ out.asNode() = kind.getAnOutNode(call)
+ )
+ }
+
+ private predicate revPartialPathStep(
+ PartialPathNodeRev mid, NodeEx node, TRevSummaryCtx1 sc1, TRevSummaryCtx2 sc2,
+ RevPartialAccessPath ap, Configuration config
+ ) {
+ localFlowStep(node, mid.getNodeEx(), config) and
+ sc1 = mid.getSummaryCtx1() and
+ sc2 = mid.getSummaryCtx2() and
+ ap = mid.getAp() and
+ config = mid.getConfiguration()
+ or
+ additionalLocalFlowStep(node, mid.getNodeEx(), config) and
+ sc1 = mid.getSummaryCtx1() and
+ sc2 = mid.getSummaryCtx2() and
+ mid.getAp() instanceof RevPartialAccessPathNil and
+ ap = TRevPartialNil() and
+ config = mid.getConfiguration()
+ or
+ jumpStep(node, mid.getNodeEx(), config) and
+ sc1 = TRevSummaryCtx1None() and
+ sc2 = TRevSummaryCtx2None() and
+ ap = mid.getAp() and
+ config = mid.getConfiguration()
+ or
+ additionalJumpStep(node, mid.getNodeEx(), config) and
+ sc1 = TRevSummaryCtx1None() and
+ sc2 = TRevSummaryCtx2None() and
+ mid.getAp() instanceof RevPartialAccessPathNil and
+ ap = TRevPartialNil() and
+ config = mid.getConfiguration()
+ or
+ revPartialPathReadStep(mid, _, _, node, ap) and
+ sc1 = mid.getSummaryCtx1() and
+ sc2 = mid.getSummaryCtx2() and
+ config = mid.getConfiguration()
+ or
+ exists(RevPartialAccessPath ap0, Content c |
+ revPartialPathStoreStep(mid, ap0, c, node, config) and
+ sc1 = mid.getSummaryCtx1() and
+ sc2 = mid.getSummaryCtx2() and
+ apConsRev(ap, c, ap0, config)
+ )
+ or
+ exists(ParamNodeEx p |
+ mid.getNodeEx() = p and
+ viableParamArgEx(_, p, node) and
+ sc1 = mid.getSummaryCtx1() and
+ sc2 = mid.getSummaryCtx2() and
+ sc1 = TRevSummaryCtx1None() and
+ sc2 = TRevSummaryCtx2None() and
+ ap = mid.getAp() and
+ config = mid.getConfiguration()
+ )
+ or
+ exists(ReturnPosition pos |
+ revPartialPathIntoReturn(mid, pos, sc1, sc2, _, ap, config) and
+ pos = getReturnPosition(node.asNode())
+ )
+ or
+ revPartialPathThroughCallable(mid, node, ap, config) and
+ sc1 = mid.getSummaryCtx1() and
+ sc2 = mid.getSummaryCtx2()
+ }
+
+ pragma[inline]
+ private predicate revPartialPathReadStep(
+ PartialPathNodeRev mid, RevPartialAccessPath ap1, Content c, NodeEx node,
+ RevPartialAccessPath ap2
+ ) {
+ exists(NodeEx midNode |
+ midNode = mid.getNodeEx() and
+ ap1 = mid.getAp() and
+ read(node, c, midNode, mid.getConfiguration()) and
+ ap2.getHead() = c and
+ ap2.len() = unbindInt(ap1.len() + 1)
+ )
+ }
+
+ pragma[nomagic]
+ private predicate apConsRev(
+ RevPartialAccessPath ap1, Content c, RevPartialAccessPath ap2, Configuration config
+ ) {
+ exists(PartialPathNodeRev mid |
+ revPartialPathReadStep(mid, ap1, c, _, ap2) and
+ config = mid.getConfiguration()
+ )
+ }
+
+ pragma[nomagic]
+ private predicate revPartialPathStoreStep(
+ PartialPathNodeRev mid, RevPartialAccessPath ap, Content c, NodeEx node, Configuration config
+ ) {
+ exists(NodeEx midNode, TypedContent tc |
+ midNode = mid.getNodeEx() and
+ ap = mid.getAp() and
+ store(node, tc, midNode, _, config) and
+ ap.getHead() = c and
+ config = mid.getConfiguration() and
+ tc.getContent() = c
+ )
+ }
+
+ pragma[nomagic]
+ private predicate revPartialPathIntoReturn(
+ PartialPathNodeRev mid, ReturnPosition pos, TRevSummaryCtx1Some sc1, TRevSummaryCtx2Some sc2,
+ DataFlowCall call, RevPartialAccessPath ap, Configuration config
+ ) {
+ exists(NodeEx out |
+ mid.getNodeEx() = out and
+ viableReturnPosOutEx(call, pos, out) and
+ sc1 = TRevSummaryCtx1Some(pos) and
+ sc2 = TRevSummaryCtx2Some(ap) and
+ ap = mid.getAp() and
+ config = mid.getConfiguration()
+ )
+ }
+
+ pragma[nomagic]
+ private predicate revPartialPathFlowsThrough(
+ int pos, TRevSummaryCtx1Some sc1, TRevSummaryCtx2Some sc2, RevPartialAccessPath ap,
+ Configuration config
+ ) {
+ exists(PartialPathNodeRev mid, ParamNodeEx p |
+ mid.getNodeEx() = p and
+ p.getPosition() = pos and
+ sc1 = mid.getSummaryCtx1() and
+ sc2 = mid.getSummaryCtx2() and
+ ap = mid.getAp() and
+ config = mid.getConfiguration()
+ )
+ }
+
+ pragma[nomagic]
+ private predicate revPartialPathThroughCallable0(
+ DataFlowCall call, PartialPathNodeRev mid, int pos, RevPartialAccessPath ap,
+ Configuration config
+ ) {
+ exists(TRevSummaryCtx1Some sc1, TRevSummaryCtx2Some sc2 |
+ revPartialPathIntoReturn(mid, _, sc1, sc2, call, _, config) and
+ revPartialPathFlowsThrough(pos, sc1, sc2, ap, config)
+ )
+ }
+
+ pragma[nomagic]
+ private predicate revPartialPathThroughCallable(
+ PartialPathNodeRev mid, ArgNodeEx node, RevPartialAccessPath ap, Configuration config
+ ) {
+ exists(DataFlowCall call, int pos |
+ revPartialPathThroughCallable0(call, mid, pos, ap, config) and
+ node.asNode().(ArgNode).argumentOf(call, pos)
+ )
+ }
+}
+
+import FlowExploration
+
+private predicate partialFlow(
+ PartialPathNode source, PartialPathNode node, Configuration configuration
+) {
+ source.getConfiguration() = configuration and
+ source.isFwdSource() and
+ node = source.getASuccessor+()
+}
+
+private predicate revPartialFlow(
+ PartialPathNode node, PartialPathNode sink, Configuration configuration
+) {
+ sink.getConfiguration() = configuration and
+ sink.isRevSink() and
+ node.getASuccessor+() = sink
+}
diff --git a/repo-tests/codeql/cpp/ql/lib/semmle/code/cpp/ir/dataflow/internal/DataFlowImpl2.qll b/repo-tests/codeql/cpp/ql/lib/semmle/code/cpp/ir/dataflow/internal/DataFlowImpl2.qll
new file mode 100644
index 00000000000..4ca06c93362
--- /dev/null
+++ b/repo-tests/codeql/cpp/ql/lib/semmle/code/cpp/ir/dataflow/internal/DataFlowImpl2.qll
@@ -0,0 +1,4559 @@
+/**
+ * Provides an implementation of global (interprocedural) data flow. This file
+ * re-exports the local (intraprocedural) data flow analysis from
+ * `DataFlowImplSpecific::Public` and adds a global analysis, mainly exposed
+ * through the `Configuration` class. This file exists in several identical
+ * copies, allowing queries to use multiple `Configuration` classes that depend
+ * on each other without introducing mutual recursion among those configurations.
+ */
+
+private import DataFlowImplCommon
+private import DataFlowImplSpecific::Private
+import DataFlowImplSpecific::Public
+
+/**
+ * A configuration of interprocedural data flow analysis. This defines
+ * sources, sinks, and any other configurable aspect of the analysis. Each
+ * use of the global data flow library must define its own unique extension
+ * of this abstract class. To create a configuration, extend this class with
+ * a subclass whose characteristic predicate is a unique singleton string.
+ * For example, write
+ *
+ * ```ql
+ * class MyAnalysisConfiguration extends DataFlow::Configuration {
+ * MyAnalysisConfiguration() { this = "MyAnalysisConfiguration" }
+ * // Override `isSource` and `isSink`.
+ * // Optionally override `isBarrier`.
+ * // Optionally override `isAdditionalFlowStep`.
+ * }
+ * ```
+ * Conceptually, this defines a graph where the nodes are `DataFlow::Node`s and
+ * the edges are those data-flow steps that preserve the value of the node
+ * along with any additional edges defined by `isAdditionalFlowStep`.
+ * Specifying nodes in `isBarrier` will remove those nodes from the graph, and
+ * specifying nodes in `isBarrierIn` and/or `isBarrierOut` will remove in-going
+ * and/or out-going edges from those nodes, respectively.
+ *
+ * Then, to query whether there is flow between some `source` and `sink`,
+ * write
+ *
+ * ```ql
+ * exists(MyAnalysisConfiguration cfg | cfg.hasFlow(source, sink))
+ * ```
+ *
+ * Multiple configurations can coexist, but two classes extending
+ * `DataFlow::Configuration` should never depend on each other. One of them
+ * should instead depend on a `DataFlow2::Configuration`, a
+ * `DataFlow3::Configuration`, or a `DataFlow4::Configuration`.
+ */
+abstract class Configuration extends string {
+ bindingset[this]
+ Configuration() { any() }
+
+ /**
+ * Holds if `source` is a relevant data flow source.
+ */
+ abstract predicate isSource(Node source);
+
+ /**
+ * Holds if `sink` is a relevant data flow sink.
+ */
+ abstract predicate isSink(Node sink);
+
+ /**
+ * Holds if data flow through `node` is prohibited. This completely removes
+ * `node` from the data flow graph.
+ */
+ predicate isBarrier(Node node) { none() }
+
+ /** Holds if data flow into `node` is prohibited. */
+ predicate isBarrierIn(Node node) { none() }
+
+ /** Holds if data flow out of `node` is prohibited. */
+ predicate isBarrierOut(Node node) { none() }
+
+ /** Holds if data flow through nodes guarded by `guard` is prohibited. */
+ predicate isBarrierGuard(BarrierGuard guard) { none() }
+
+ /**
+ * Holds if the additional flow step from `node1` to `node2` must be taken
+ * into account in the analysis.
+ */
+ predicate isAdditionalFlowStep(Node node1, Node node2) { none() }
+
+ /**
+ * Holds if an arbitrary number of implicit read steps of content `c` may be
+ * taken at `node`.
+ */
+ predicate allowImplicitRead(Node node, Content c) { none() }
+
+ /**
+ * Gets the virtual dispatch branching limit when calculating field flow.
+ * This can be overridden to a smaller value to improve performance (a
+ * value of 0 disables field flow), or a larger value to get more results.
+ */
+ int fieldFlowBranchLimit() { result = 2 }
+
+ /**
+ * Holds if data may flow from `source` to `sink` for this configuration.
+ */
+ predicate hasFlow(Node source, Node sink) { flowsTo(source, sink, this) }
+
+ /**
+ * Holds if data may flow from `source` to `sink` for this configuration.
+ *
+ * The corresponding paths are generated from the end-points and the graph
+ * included in the module `PathGraph`.
+ */
+ predicate hasFlowPath(PathNode source, PathNode sink) { flowsTo(source, sink, _, _, this) }
+
+ /**
+ * Holds if data may flow from some source to `sink` for this configuration.
+ */
+ predicate hasFlowTo(Node sink) { hasFlow(_, sink) }
+
+ /**
+ * Holds if data may flow from some source to `sink` for this configuration.
+ */
+ predicate hasFlowToExpr(DataFlowExpr sink) { hasFlowTo(exprNode(sink)) }
+
+ /**
+ * Gets the exploration limit for `hasPartialFlow` and `hasPartialFlowRev`
+ * measured in approximate number of interprocedural steps.
+ */
+ int explorationLimit() { none() }
+
+ /**
+ * Holds if there is a partial data flow path from `source` to `node`. The
+ * approximate distance between `node` and the closest source is `dist` and
+ * is restricted to be less than or equal to `explorationLimit()`. This
+ * predicate completely disregards sink definitions.
+ *
+ * This predicate is intended for data-flow exploration and debugging and may
+ * perform poorly if the number of sources is too big and/or the exploration
+ * limit is set too high without using barriers.
+ *
+ * This predicate is disabled (has no results) by default. Override
+ * `explorationLimit()` with a suitable number to enable this predicate.
+ *
+ * To use this in a `path-problem` query, import the module `PartialPathGraph`.
+ */
+ final predicate hasPartialFlow(PartialPathNode source, PartialPathNode node, int dist) {
+ partialFlow(source, node, this) and
+ dist = node.getSourceDistance()
+ }
+
+ /**
+ * Holds if there is a partial data flow path from `node` to `sink`. The
+ * approximate distance between `node` and the closest sink is `dist` and
+ * is restricted to be less than or equal to `explorationLimit()`. This
+ * predicate completely disregards source definitions.
+ *
+ * This predicate is intended for data-flow exploration and debugging and may
+ * perform poorly if the number of sinks is too big and/or the exploration
+ * limit is set too high without using barriers.
+ *
+ * This predicate is disabled (has no results) by default. Override
+ * `explorationLimit()` with a suitable number to enable this predicate.
+ *
+ * To use this in a `path-problem` query, import the module `PartialPathGraph`.
+ *
+ * Note that reverse flow has slightly lower precision than the corresponding
+ * forward flow, as reverse flow disregards type pruning among other features.
+ */
+ final predicate hasPartialFlowRev(PartialPathNode node, PartialPathNode sink, int dist) {
+ revPartialFlow(node, sink, this) and
+ dist = node.getSinkDistance()
+ }
+}
+
+/**
+ * This class exists to prevent mutual recursion between the user-overridden
+ * member predicates of `Configuration` and the rest of the data-flow library.
+ * Good performance cannot be guaranteed in the presence of such recursion, so
+ * it should be replaced by using more than one copy of the data flow library.
+ */
+abstract private class ConfigurationRecursionPrevention extends Configuration {
+ bindingset[this]
+ ConfigurationRecursionPrevention() { any() }
+
+ override predicate hasFlow(Node source, Node sink) {
+ strictcount(Node n | this.isSource(n)) < 0
+ or
+ strictcount(Node n | this.isSink(n)) < 0
+ or
+ strictcount(Node n1, Node n2 | this.isAdditionalFlowStep(n1, n2)) < 0
+ or
+ super.hasFlow(source, sink)
+ }
+}
+
+private newtype TNodeEx =
+ TNodeNormal(Node n) or
+ TNodeImplicitRead(Node n, boolean hasRead) {
+ any(Configuration c).allowImplicitRead(n, _) and hasRead = [false, true]
+ }
+
+private class NodeEx extends TNodeEx {
+ string toString() {
+ result = this.asNode().toString()
+ or
+ exists(Node n | this.isImplicitReadNode(n, _) | result = n.toString() + " [Ext]")
+ }
+
+ Node asNode() { this = TNodeNormal(result) }
+
+ predicate isImplicitReadNode(Node n, boolean hasRead) { this = TNodeImplicitRead(n, hasRead) }
+
+ Node projectToNode() { this = TNodeNormal(result) or this = TNodeImplicitRead(result, _) }
+
+ pragma[nomagic]
+ private DataFlowCallable getEnclosingCallable0() {
+ nodeEnclosingCallable(this.projectToNode(), result)
+ }
+
+ pragma[inline]
+ DataFlowCallable getEnclosingCallable() {
+ pragma[only_bind_out](this).getEnclosingCallable0() = pragma[only_bind_into](result)
+ }
+
+ pragma[nomagic]
+ private DataFlowType getDataFlowType0() { nodeDataFlowType(this.asNode(), result) }
+
+ pragma[inline]
+ DataFlowType getDataFlowType() {
+ pragma[only_bind_out](this).getDataFlowType0() = pragma[only_bind_into](result)
+ }
+
+ predicate hasLocationInfo(
+ string filepath, int startline, int startcolumn, int endline, int endcolumn
+ ) {
+ this.projectToNode().hasLocationInfo(filepath, startline, startcolumn, endline, endcolumn)
+ }
+}
+
+private class ArgNodeEx extends NodeEx {
+ ArgNodeEx() { this.asNode() instanceof ArgNode }
+}
+
+private class ParamNodeEx extends NodeEx {
+ ParamNodeEx() { this.asNode() instanceof ParamNode }
+
+ predicate isParameterOf(DataFlowCallable c, int i) {
+ this.asNode().(ParamNode).isParameterOf(c, i)
+ }
+
+ int getPosition() { this.isParameterOf(_, result) }
+}
+
+private class RetNodeEx extends NodeEx {
+ RetNodeEx() { this.asNode() instanceof ReturnNodeExt }
+
+ ReturnPosition getReturnPosition() { result = getReturnPosition(this.asNode()) }
+
+ ReturnKindExt getKind() { result = this.asNode().(ReturnNodeExt).getKind() }
+}
+
+private predicate inBarrier(NodeEx node, Configuration config) {
+ exists(Node n |
+ node.asNode() = n and
+ config.isBarrierIn(n) and
+ config.isSource(n)
+ )
+}
+
+private predicate outBarrier(NodeEx node, Configuration config) {
+ exists(Node n |
+ node.asNode() = n and
+ config.isBarrierOut(n) and
+ config.isSink(n)
+ )
+}
+
+private predicate fullBarrier(NodeEx node, Configuration config) {
+ exists(Node n | node.asNode() = n |
+ config.isBarrier(n)
+ or
+ config.isBarrierIn(n) and
+ not config.isSource(n)
+ or
+ config.isBarrierOut(n) and
+ not config.isSink(n)
+ or
+ exists(BarrierGuard g |
+ config.isBarrierGuard(g) and
+ n = g.getAGuardedNode()
+ )
+ )
+}
+
+pragma[nomagic]
+private predicate sourceNode(NodeEx node, Configuration config) { config.isSource(node.asNode()) }
+
+pragma[nomagic]
+private predicate sinkNode(NodeEx node, Configuration config) { config.isSink(node.asNode()) }
+
+/**
+ * Holds if data can flow in one local step from `node1` to `node2`.
+ */
+private predicate localFlowStep(NodeEx node1, NodeEx node2, Configuration config) {
+ exists(Node n1, Node n2 |
+ node1.asNode() = n1 and
+ node2.asNode() = n2 and
+ simpleLocalFlowStepExt(n1, n2) and
+ not outBarrier(node1, config) and
+ not inBarrier(node2, config) and
+ not fullBarrier(node1, config) and
+ not fullBarrier(node2, config)
+ )
+ or
+ exists(Node n |
+ config.allowImplicitRead(n, _) and
+ node1.asNode() = n and
+ node2.isImplicitReadNode(n, false)
+ )
+}
+
+/**
+ * Holds if the additional step from `node1` to `node2` does not jump between callables.
+ */
+private predicate additionalLocalFlowStep(NodeEx node1, NodeEx node2, Configuration config) {
+ exists(Node n1, Node n2 |
+ node1.asNode() = n1 and
+ node2.asNode() = n2 and
+ config.isAdditionalFlowStep(n1, n2) and
+ getNodeEnclosingCallable(n1) = getNodeEnclosingCallable(n2) and
+ not outBarrier(node1, config) and
+ not inBarrier(node2, config) and
+ not fullBarrier(node1, config) and
+ not fullBarrier(node2, config)
+ )
+ or
+ exists(Node n |
+ config.allowImplicitRead(n, _) and
+ node1.isImplicitReadNode(n, true) and
+ node2.asNode() = n
+ )
+}
+
+/**
+ * Holds if data can flow from `node1` to `node2` in a way that discards call contexts.
+ */
+private predicate jumpStep(NodeEx node1, NodeEx node2, Configuration config) {
+ exists(Node n1, Node n2 |
+ node1.asNode() = n1 and
+ node2.asNode() = n2 and
+ jumpStepCached(n1, n2) and
+ not outBarrier(node1, config) and
+ not inBarrier(node2, config) and
+ not fullBarrier(node1, config) and
+ not fullBarrier(node2, config)
+ )
+}
+
+/**
+ * Holds if the additional step from `node1` to `node2` jumps between callables.
+ */
+private predicate additionalJumpStep(NodeEx node1, NodeEx node2, Configuration config) {
+ exists(Node n1, Node n2 |
+ node1.asNode() = n1 and
+ node2.asNode() = n2 and
+ config.isAdditionalFlowStep(n1, n2) and
+ getNodeEnclosingCallable(n1) != getNodeEnclosingCallable(n2) and
+ not outBarrier(node1, config) and
+ not inBarrier(node2, config) and
+ not fullBarrier(node1, config) and
+ not fullBarrier(node2, config)
+ )
+}
+
+private predicate read(NodeEx node1, Content c, NodeEx node2, Configuration config) {
+ read(node1.asNode(), c, node2.asNode())
+ or
+ exists(Node n |
+ node2.isImplicitReadNode(n, true) and
+ node1.isImplicitReadNode(n, _) and
+ config.allowImplicitRead(n, c)
+ )
+}
+
+private predicate store(
+ NodeEx node1, TypedContent tc, NodeEx node2, DataFlowType contentType, Configuration config
+) {
+ store(node1.asNode(), tc, node2.asNode(), contentType) and
+ read(_, tc.getContent(), _, config)
+}
+
+pragma[nomagic]
+private predicate viableReturnPosOutEx(DataFlowCall call, ReturnPosition pos, NodeEx out) {
+ viableReturnPosOut(call, pos, out.asNode())
+}
+
+pragma[nomagic]
+private predicate viableParamArgEx(DataFlowCall call, ParamNodeEx p, ArgNodeEx arg) {
+ viableParamArg(call, p.asNode(), arg.asNode())
+}
+
+/**
+ * Holds if field flow should be used for the given configuration.
+ */
+private predicate useFieldFlow(Configuration config) { config.fieldFlowBranchLimit() >= 1 }
+
+private module Stage1 {
+ class ApApprox = Unit;
+
+ class Ap = Unit;
+
+ class ApOption = Unit;
+
+ class Cc = boolean;
+
+ /* Begin: Stage 1 logic. */
+ /**
+ * Holds if `node` is reachable from a source in the configuration `config`.
+ *
+ * The Boolean `cc` records whether the node is reached through an
+ * argument in a call.
+ */
+ predicate fwdFlow(NodeEx node, Cc cc, Configuration config) {
+ not fullBarrier(node, config) and
+ (
+ sourceNode(node, config) and
+ cc = false
+ or
+ exists(NodeEx mid |
+ fwdFlow(mid, cc, config) and
+ localFlowStep(mid, node, config)
+ )
+ or
+ exists(NodeEx mid |
+ fwdFlow(mid, cc, config) and
+ additionalLocalFlowStep(mid, node, config)
+ )
+ or
+ exists(NodeEx mid |
+ fwdFlow(mid, _, config) and
+ jumpStep(mid, node, config) and
+ cc = false
+ )
+ or
+ exists(NodeEx mid |
+ fwdFlow(mid, _, config) and
+ additionalJumpStep(mid, node, config) and
+ cc = false
+ )
+ or
+ // store
+ exists(NodeEx mid |
+ useFieldFlow(config) and
+ fwdFlow(mid, cc, config) and
+ store(mid, _, node, _, config) and
+ not outBarrier(mid, config)
+ )
+ or
+ // read
+ exists(Content c |
+ fwdFlowRead(c, node, cc, config) and
+ fwdFlowConsCand(c, config) and
+ not inBarrier(node, config)
+ )
+ or
+ // flow into a callable
+ exists(NodeEx arg |
+ fwdFlow(arg, _, config) and
+ viableParamArgEx(_, node, arg) and
+ cc = true
+ )
+ or
+ // flow out of a callable
+ exists(DataFlowCall call |
+ fwdFlowOut(call, node, false, config) and
+ cc = false
+ or
+ fwdFlowOutFromArg(call, node, config) and
+ fwdFlowIsEntered(call, cc, config)
+ )
+ )
+ }
+
+ private predicate fwdFlow(NodeEx node, Configuration config) { fwdFlow(node, _, config) }
+
+ pragma[nomagic]
+ private predicate fwdFlowRead(Content c, NodeEx node, Cc cc, Configuration config) {
+ exists(NodeEx mid |
+ fwdFlow(mid, cc, config) and
+ read(mid, c, node, config)
+ )
+ }
+
+ /**
+ * Holds if `c` is the target of a store in the flow covered by `fwdFlow`.
+ */
+ pragma[nomagic]
+ private predicate fwdFlowConsCand(Content c, Configuration config) {
+ exists(NodeEx mid, NodeEx node, TypedContent tc |
+ not fullBarrier(node, config) and
+ useFieldFlow(config) and
+ fwdFlow(mid, _, config) and
+ store(mid, tc, node, _, config) and
+ c = tc.getContent()
+ )
+ }
+
+ pragma[nomagic]
+ private predicate fwdFlowReturnPosition(ReturnPosition pos, Cc cc, Configuration config) {
+ exists(RetNodeEx ret |
+ fwdFlow(ret, cc, config) and
+ ret.getReturnPosition() = pos
+ )
+ }
+
+ pragma[nomagic]
+ private predicate fwdFlowOut(DataFlowCall call, NodeEx out, Cc cc, Configuration config) {
+ exists(ReturnPosition pos |
+ fwdFlowReturnPosition(pos, cc, config) and
+ viableReturnPosOutEx(call, pos, out)
+ )
+ }
+
+ pragma[nomagic]
+ private predicate fwdFlowOutFromArg(DataFlowCall call, NodeEx out, Configuration config) {
+ fwdFlowOut(call, out, true, config)
+ }
+
+ /**
+ * Holds if an argument to `call` is reached in the flow covered by `fwdFlow`.
+ */
+ pragma[nomagic]
+ private predicate fwdFlowIsEntered(DataFlowCall call, Cc cc, Configuration config) {
+ exists(ArgNodeEx arg |
+ fwdFlow(arg, cc, config) and
+ viableParamArgEx(call, _, arg)
+ )
+ }
+
+ /**
+ * Holds if `node` is part of a path from a source to a sink in the
+ * configuration `config`.
+ *
+ * The Boolean `toReturn` records whether the node must be returned from
+ * the enclosing callable in order to reach a sink.
+ */
+ pragma[nomagic]
+ predicate revFlow(NodeEx node, boolean toReturn, Configuration config) {
+ revFlow0(node, toReturn, config) and
+ fwdFlow(node, config)
+ }
+
+ pragma[nomagic]
+ private predicate revFlow0(NodeEx node, boolean toReturn, Configuration config) {
+ fwdFlow(node, config) and
+ sinkNode(node, config) and
+ toReturn = false
+ or
+ exists(NodeEx mid |
+ localFlowStep(node, mid, config) and
+ revFlow(mid, toReturn, config)
+ )
+ or
+ exists(NodeEx mid |
+ additionalLocalFlowStep(node, mid, config) and
+ revFlow(mid, toReturn, config)
+ )
+ or
+ exists(NodeEx mid |
+ jumpStep(node, mid, config) and
+ revFlow(mid, _, config) and
+ toReturn = false
+ )
+ or
+ exists(NodeEx mid |
+ additionalJumpStep(node, mid, config) and
+ revFlow(mid, _, config) and
+ toReturn = false
+ )
+ or
+ // store
+ exists(Content c |
+ revFlowStore(c, node, toReturn, config) and
+ revFlowConsCand(c, config)
+ )
+ or
+ // read
+ exists(NodeEx mid, Content c |
+ read(node, c, mid, config) and
+ fwdFlowConsCand(c, pragma[only_bind_into](config)) and
+ revFlow(mid, toReturn, pragma[only_bind_into](config))
+ )
+ or
+ // flow into a callable
+ exists(DataFlowCall call |
+ revFlowIn(call, node, false, config) and
+ toReturn = false
+ or
+ revFlowInToReturn(call, node, config) and
+ revFlowIsReturned(call, toReturn, config)
+ )
+ or
+ // flow out of a callable
+ exists(ReturnPosition pos |
+ revFlowOut(pos, config) and
+ node.(RetNodeEx).getReturnPosition() = pos and
+ toReturn = true
+ )
+ }
+
+ /**
+ * Holds if `c` is the target of a read in the flow covered by `revFlow`.
+ */
+ pragma[nomagic]
+ private predicate revFlowConsCand(Content c, Configuration config) {
+ exists(NodeEx mid, NodeEx node |
+ fwdFlow(node, pragma[only_bind_into](config)) and
+ read(node, c, mid, config) and
+ fwdFlowConsCand(c, pragma[only_bind_into](config)) and
+ revFlow(pragma[only_bind_into](mid), _, pragma[only_bind_into](config))
+ )
+ }
+
+ pragma[nomagic]
+ private predicate revFlowStore(Content c, NodeEx node, boolean toReturn, Configuration config) {
+ exists(NodeEx mid, TypedContent tc |
+ revFlow(mid, toReturn, pragma[only_bind_into](config)) and
+ fwdFlowConsCand(c, pragma[only_bind_into](config)) and
+ store(node, tc, mid, _, config) and
+ c = tc.getContent()
+ )
+ }
+
+ /**
+ * Holds if `c` is the target of both a read and a store in the flow covered
+ * by `revFlow`.
+ */
+ private predicate revFlowIsReadAndStored(Content c, Configuration conf) {
+ revFlowConsCand(c, conf) and
+ revFlowStore(c, _, _, conf)
+ }
+
+ pragma[nomagic]
+ predicate viableReturnPosOutNodeCandFwd1(
+ DataFlowCall call, ReturnPosition pos, NodeEx out, Configuration config
+ ) {
+ fwdFlowReturnPosition(pos, _, config) and
+ viableReturnPosOutEx(call, pos, out)
+ }
+
+ pragma[nomagic]
+ private predicate revFlowOut(ReturnPosition pos, Configuration config) {
+ exists(DataFlowCall call, NodeEx out |
+ revFlow(out, _, config) and
+ viableReturnPosOutNodeCandFwd1(call, pos, out, config)
+ )
+ }
+
+ pragma[nomagic]
+ predicate viableParamArgNodeCandFwd1(
+ DataFlowCall call, ParamNodeEx p, ArgNodeEx arg, Configuration config
+ ) {
+ viableParamArgEx(call, p, arg) and
+ fwdFlow(arg, config)
+ }
+
+ pragma[nomagic]
+ private predicate revFlowIn(
+ DataFlowCall call, ArgNodeEx arg, boolean toReturn, Configuration config
+ ) {
+ exists(ParamNodeEx p |
+ revFlow(p, toReturn, config) and
+ viableParamArgNodeCandFwd1(call, p, arg, config)
+ )
+ }
+
+ pragma[nomagic]
+ private predicate revFlowInToReturn(DataFlowCall call, ArgNodeEx arg, Configuration config) {
+ revFlowIn(call, arg, true, config)
+ }
+
+ /**
+ * Holds if an output from `call` is reached in the flow covered by `revFlow`
+ * and data might flow through the target callable resulting in reverse flow
+ * reaching an argument of `call`.
+ */
+ pragma[nomagic]
+ private predicate revFlowIsReturned(DataFlowCall call, boolean toReturn, Configuration config) {
+ exists(NodeEx out |
+ revFlow(out, toReturn, config) and
+ fwdFlowOutFromArg(call, out, config)
+ )
+ }
+
+ pragma[nomagic]
+ predicate storeStepCand(
+ NodeEx node1, Ap ap1, TypedContent tc, NodeEx node2, DataFlowType contentType,
+ Configuration config
+ ) {
+ exists(Content c |
+ revFlowIsReadAndStored(c, pragma[only_bind_into](config)) and
+ revFlow(node2, pragma[only_bind_into](config)) and
+ store(node1, tc, node2, contentType, config) and
+ c = tc.getContent() and
+ exists(ap1)
+ )
+ }
+
+ pragma[nomagic]
+ predicate readStepCand(NodeEx n1, Content c, NodeEx n2, Configuration config) {
+ revFlowIsReadAndStored(c, pragma[only_bind_into](config)) and
+ revFlow(n2, pragma[only_bind_into](config)) and
+ read(n1, c, n2, pragma[only_bind_into](config))
+ }
+
+ pragma[nomagic]
+ predicate revFlow(NodeEx node, Configuration config) { revFlow(node, _, config) }
+
+ predicate revFlow(NodeEx node, boolean toReturn, ApOption returnAp, Ap ap, Configuration config) {
+ revFlow(node, toReturn, config) and exists(returnAp) and exists(ap)
+ }
+
+ private predicate throughFlowNodeCand(NodeEx node, Configuration config) {
+ revFlow(node, true, config) and
+ fwdFlow(node, true, config) and
+ not inBarrier(node, config) and
+ not outBarrier(node, config)
+ }
+
+ /** Holds if flow may return from `callable`. */
+ pragma[nomagic]
+ private predicate returnFlowCallableNodeCand(
+ DataFlowCallable callable, ReturnKindExt kind, Configuration config
+ ) {
+ exists(RetNodeEx ret |
+ throughFlowNodeCand(ret, config) and
+ callable = ret.getEnclosingCallable() and
+ kind = ret.getKind()
+ )
+ }
+
+ /**
+ * Holds if flow may enter through `p` and reach a return node making `p` a
+ * candidate for the origin of a summary.
+ */
+ predicate parameterMayFlowThrough(ParamNodeEx p, DataFlowCallable c, Ap ap, Configuration config) {
+ exists(ReturnKindExt kind |
+ throughFlowNodeCand(p, config) and
+ returnFlowCallableNodeCand(c, kind, config) and
+ p.getEnclosingCallable() = c and
+ exists(ap) and
+ // we don't expect a parameter to return stored in itself
+ not kind.(ParamUpdateReturnKind).getPosition() = p.getPosition()
+ )
+ }
+
+ pragma[nomagic]
+ predicate callMayFlowThroughRev(DataFlowCall call, Configuration config) {
+ exists(ArgNodeEx arg, boolean toReturn |
+ revFlow(arg, toReturn, config) and
+ revFlowInToReturn(call, arg, config) and
+ revFlowIsReturned(call, toReturn, config)
+ )
+ }
+
+ predicate stats(boolean fwd, int nodes, int fields, int conscand, int tuples, Configuration config) {
+ fwd = true and
+ nodes = count(NodeEx node | fwdFlow(node, config)) and
+ fields = count(Content f0 | fwdFlowConsCand(f0, config)) and
+ conscand = -1 and
+ tuples = count(NodeEx n, boolean b | fwdFlow(n, b, config))
+ or
+ fwd = false and
+ nodes = count(NodeEx node | revFlow(node, _, config)) and
+ fields = count(Content f0 | revFlowConsCand(f0, config)) and
+ conscand = -1 and
+ tuples = count(NodeEx n, boolean b | revFlow(n, b, config))
+ }
+ /* End: Stage 1 logic. */
+}
+
+pragma[noinline]
+private predicate localFlowStepNodeCand1(NodeEx node1, NodeEx node2, Configuration config) {
+ Stage1::revFlow(node2, config) and
+ localFlowStep(node1, node2, config)
+}
+
+pragma[noinline]
+private predicate additionalLocalFlowStepNodeCand1(NodeEx node1, NodeEx node2, Configuration config) {
+ Stage1::revFlow(node2, config) and
+ additionalLocalFlowStep(node1, node2, config)
+}
+
+pragma[nomagic]
+private predicate viableReturnPosOutNodeCand1(
+ DataFlowCall call, ReturnPosition pos, NodeEx out, Configuration config
+) {
+ Stage1::revFlow(out, config) and
+ Stage1::viableReturnPosOutNodeCandFwd1(call, pos, out, config)
+}
+
+/**
+ * Holds if data can flow out of `call` from `ret` to `out`, either
+ * through a `ReturnNode` or through an argument that has been mutated, and
+ * that this step is part of a path from a source to a sink.
+ */
+pragma[nomagic]
+private predicate flowOutOfCallNodeCand1(
+ DataFlowCall call, RetNodeEx ret, NodeEx out, Configuration config
+) {
+ viableReturnPosOutNodeCand1(call, ret.getReturnPosition(), out, config) and
+ Stage1::revFlow(ret, config) and
+ not outBarrier(ret, config) and
+ not inBarrier(out, config)
+}
+
+pragma[nomagic]
+private predicate viableParamArgNodeCand1(
+ DataFlowCall call, ParamNodeEx p, ArgNodeEx arg, Configuration config
+) {
+ Stage1::viableParamArgNodeCandFwd1(call, p, arg, config) and
+ Stage1::revFlow(arg, config)
+}
+
+/**
+ * Holds if data can flow into `call` and that this step is part of a
+ * path from a source to a sink.
+ */
+pragma[nomagic]
+private predicate flowIntoCallNodeCand1(
+ DataFlowCall call, ArgNodeEx arg, ParamNodeEx p, Configuration config
+) {
+ viableParamArgNodeCand1(call, p, arg, config) and
+ Stage1::revFlow(p, config) and
+ not outBarrier(arg, config) and
+ not inBarrier(p, config)
+}
+
+/**
+ * Gets the amount of forward branching on the origin of a cross-call path
+ * edge in the graph of paths between sources and sinks that ignores call
+ * contexts.
+ */
+private int branch(NodeEx n1, Configuration conf) {
+ result =
+ strictcount(NodeEx n |
+ flowOutOfCallNodeCand1(_, n1, n, conf) or flowIntoCallNodeCand1(_, n1, n, conf)
+ )
+}
+
+/**
+ * Gets the amount of backward branching on the target of a cross-call path
+ * edge in the graph of paths between sources and sinks that ignores call
+ * contexts.
+ */
+private int join(NodeEx n2, Configuration conf) {
+ result =
+ strictcount(NodeEx n |
+ flowOutOfCallNodeCand1(_, n, n2, conf) or flowIntoCallNodeCand1(_, n, n2, conf)
+ )
+}
+
+/**
+ * Holds if data can flow out of `call` from `ret` to `out`, either
+ * through a `ReturnNode` or through an argument that has been mutated, and
+ * that this step is part of a path from a source to a sink. The
+ * `allowsFieldFlow` flag indicates whether the branching is within the limit
+ * specified by the configuration.
+ */
+pragma[nomagic]
+private predicate flowOutOfCallNodeCand1(
+ DataFlowCall call, RetNodeEx ret, NodeEx out, boolean allowsFieldFlow, Configuration config
+) {
+ flowOutOfCallNodeCand1(call, ret, out, config) and
+ exists(int b, int j |
+ b = branch(ret, config) and
+ j = join(out, config) and
+ if b.minimum(j) <= config.fieldFlowBranchLimit()
+ then allowsFieldFlow = true
+ else allowsFieldFlow = false
+ )
+}
+
+/**
+ * Holds if data can flow into `call` and that this step is part of a
+ * path from a source to a sink. The `allowsFieldFlow` flag indicates whether
+ * the branching is within the limit specified by the configuration.
+ */
+pragma[nomagic]
+private predicate flowIntoCallNodeCand1(
+ DataFlowCall call, ArgNodeEx arg, ParamNodeEx p, boolean allowsFieldFlow, Configuration config
+) {
+ flowIntoCallNodeCand1(call, arg, p, config) and
+ exists(int b, int j |
+ b = branch(arg, config) and
+ j = join(p, config) and
+ if b.minimum(j) <= config.fieldFlowBranchLimit()
+ then allowsFieldFlow = true
+ else allowsFieldFlow = false
+ )
+}
+
+private module Stage2 {
+ module PrevStage = Stage1;
+
+ class ApApprox = PrevStage::Ap;
+
+ class Ap = boolean;
+
+ class ApNil extends Ap {
+ ApNil() { this = false }
+ }
+
+ bindingset[result, ap]
+ private ApApprox getApprox(Ap ap) { any() }
+
+ private ApNil getApNil(NodeEx node) { PrevStage::revFlow(node, _) and exists(result) }
+
+ bindingset[tc, tail]
+ private Ap apCons(TypedContent tc, Ap tail) { result = true and exists(tc) and exists(tail) }
+
+ pragma[inline]
+ private Content getHeadContent(Ap ap) { exists(result) and ap = true }
+
+ class ApOption = BooleanOption;
+
+ ApOption apNone() { result = TBooleanNone() }
+
+ ApOption apSome(Ap ap) { result = TBooleanSome(ap) }
+
+ class Cc = CallContext;
+
+ class CcCall = CallContextCall;
+
+ class CcNoCall = CallContextNoCall;
+
+ Cc ccNone() { result instanceof CallContextAny }
+
+ private class LocalCc = Unit;
+
+ bindingset[call, c, outercc]
+ private CcCall getCallContextCall(DataFlowCall call, DataFlowCallable c, Cc outercc) {
+ checkCallContextCall(outercc, call, c) and
+ if recordDataFlowCallSiteDispatch(call, c)
+ then result = TSpecificCall(call)
+ else result = TSomeCall()
+ }
+
+ bindingset[call, c, innercc]
+ private CcNoCall getCallContextReturn(DataFlowCallable c, DataFlowCall call, Cc innercc) {
+ checkCallContextReturn(innercc, c, call) and
+ if reducedViableImplInReturn(c, call) then result = TReturn(c, call) else result = ccNone()
+ }
+
+ bindingset[node, cc, config]
+ private LocalCc getLocalCc(NodeEx node, Cc cc, Configuration config) { any() }
+
+ private predicate localStep(
+ NodeEx node1, NodeEx node2, boolean preservesValue, ApNil ap, Configuration config, LocalCc lcc
+ ) {
+ (
+ preservesValue = true and
+ localFlowStepNodeCand1(node1, node2, config)
+ or
+ preservesValue = false and
+ additionalLocalFlowStepNodeCand1(node1, node2, config)
+ ) and
+ exists(ap) and
+ exists(lcc)
+ }
+
+ private predicate flowOutOfCall = flowOutOfCallNodeCand1/5;
+
+ private predicate flowIntoCall = flowIntoCallNodeCand1/5;
+
+ bindingset[ap, contentType]
+ private predicate typecheckStore(Ap ap, DataFlowType contentType) { any() }
+
+ /* Begin: Stage 2 logic. */
+ private predicate flowCand(NodeEx node, ApApprox apa, Configuration config) {
+ PrevStage::revFlow(node, _, _, apa, config)
+ }
+
+ pragma[nomagic]
+ private predicate flowThroughOutOfCall(
+ DataFlowCall call, RetNodeEx ret, NodeEx out, boolean allowsFieldFlow, Configuration config
+ ) {
+ flowOutOfCall(call, ret, out, allowsFieldFlow, pragma[only_bind_into](config)) and
+ PrevStage::callMayFlowThroughRev(call, pragma[only_bind_into](config)) and
+ PrevStage::parameterMayFlowThrough(_, ret.getEnclosingCallable(), _,
+ pragma[only_bind_into](config))
+ }
+
+ /**
+ * Holds if `node` is reachable with access path `ap` from a source in the
+ * configuration `config`.
+ *
+ * The call context `cc` records whether the node is reached through an
+ * argument in a call, and if so, `argAp` records the access path of that
+ * argument.
+ */
+ pragma[nomagic]
+ predicate fwdFlow(NodeEx node, Cc cc, ApOption argAp, Ap ap, Configuration config) {
+ flowCand(node, _, config) and
+ sourceNode(node, config) and
+ cc = ccNone() and
+ argAp = apNone() and
+ ap = getApNil(node)
+ or
+ exists(NodeEx mid, Ap ap0, LocalCc localCc |
+ fwdFlow(mid, cc, argAp, ap0, config) and
+ localCc = getLocalCc(mid, cc, config)
+ |
+ localStep(mid, node, true, _, config, localCc) and
+ ap = ap0
+ or
+ localStep(mid, node, false, ap, config, localCc) and
+ ap0 instanceof ApNil
+ )
+ or
+ exists(NodeEx mid |
+ fwdFlow(mid, _, _, ap, pragma[only_bind_into](config)) and
+ flowCand(node, _, pragma[only_bind_into](config)) and
+ jumpStep(mid, node, config) and
+ cc = ccNone() and
+ argAp = apNone()
+ )
+ or
+ exists(NodeEx mid, ApNil nil |
+ fwdFlow(mid, _, _, nil, pragma[only_bind_into](config)) and
+ flowCand(node, _, pragma[only_bind_into](config)) and
+ additionalJumpStep(mid, node, config) and
+ cc = ccNone() and
+ argAp = apNone() and
+ ap = getApNil(node)
+ )
+ or
+ // store
+ exists(TypedContent tc, Ap ap0 |
+ fwdFlowStore(_, ap0, tc, node, cc, argAp, config) and
+ ap = apCons(tc, ap0)
+ )
+ or
+ // read
+ exists(Ap ap0, Content c |
+ fwdFlowRead(ap0, c, _, node, cc, argAp, config) and
+ fwdFlowConsCand(ap0, c, ap, config)
+ )
+ or
+ // flow into a callable
+ exists(ApApprox apa |
+ fwdFlowIn(_, node, _, cc, _, ap, config) and
+ apa = getApprox(ap) and
+ if PrevStage::parameterMayFlowThrough(node, _, apa, config)
+ then argAp = apSome(ap)
+ else argAp = apNone()
+ )
+ or
+ // flow out of a callable
+ fwdFlowOutNotFromArg(node, cc, argAp, ap, config)
+ or
+ exists(DataFlowCall call, Ap argAp0 |
+ fwdFlowOutFromArg(call, node, argAp0, ap, config) and
+ fwdFlowIsEntered(call, cc, argAp, argAp0, config)
+ )
+ }
+
+ pragma[nomagic]
+ private predicate fwdFlowStore(
+ NodeEx node1, Ap ap1, TypedContent tc, NodeEx node2, Cc cc, ApOption argAp, Configuration config
+ ) {
+ exists(DataFlowType contentType |
+ fwdFlow(node1, cc, argAp, ap1, config) and
+ PrevStage::storeStepCand(node1, getApprox(ap1), tc, node2, contentType, config) and
+ typecheckStore(ap1, contentType)
+ )
+ }
+
+ /**
+ * Holds if forward flow with access path `tail` reaches a store of `c`
+ * resulting in access path `cons`.
+ */
+ pragma[nomagic]
+ private predicate fwdFlowConsCand(Ap cons, Content c, Ap tail, Configuration config) {
+ exists(TypedContent tc |
+ fwdFlowStore(_, tail, tc, _, _, _, config) and
+ tc.getContent() = c and
+ cons = apCons(tc, tail)
+ )
+ }
+
+ pragma[nomagic]
+ private predicate fwdFlowRead(
+ Ap ap, Content c, NodeEx node1, NodeEx node2, Cc cc, ApOption argAp, Configuration config
+ ) {
+ fwdFlow(node1, cc, argAp, ap, config) and
+ PrevStage::readStepCand(node1, c, node2, config) and
+ getHeadContent(ap) = c
+ }
+
+ pragma[nomagic]
+ private predicate fwdFlowIn(
+ DataFlowCall call, ParamNodeEx p, Cc outercc, Cc innercc, ApOption argAp, Ap ap,
+ Configuration config
+ ) {
+ exists(ArgNodeEx arg, boolean allowsFieldFlow |
+ fwdFlow(arg, outercc, argAp, ap, config) and
+ flowIntoCall(call, arg, p, allowsFieldFlow, config) and
+ innercc = getCallContextCall(call, p.getEnclosingCallable(), outercc)
+ |
+ ap instanceof ApNil or allowsFieldFlow = true
+ )
+ }
+
+ pragma[nomagic]
+ private predicate fwdFlowOutNotFromArg(
+ NodeEx out, Cc ccOut, ApOption argAp, Ap ap, Configuration config
+ ) {
+ exists(
+ DataFlowCall call, RetNodeEx ret, boolean allowsFieldFlow, CcNoCall innercc,
+ DataFlowCallable inner
+ |
+ fwdFlow(ret, innercc, argAp, ap, config) and
+ flowOutOfCall(call, ret, out, allowsFieldFlow, config) and
+ inner = ret.getEnclosingCallable() and
+ ccOut = getCallContextReturn(inner, call, innercc)
+ |
+ ap instanceof ApNil or allowsFieldFlow = true
+ )
+ }
+
+ pragma[nomagic]
+ private predicate fwdFlowOutFromArg(
+ DataFlowCall call, NodeEx out, Ap argAp, Ap ap, Configuration config
+ ) {
+ exists(RetNodeEx ret, boolean allowsFieldFlow, CcCall ccc |
+ fwdFlow(ret, ccc, apSome(argAp), ap, config) and
+ flowThroughOutOfCall(call, ret, out, allowsFieldFlow, config) and
+ ccc.matchesCall(call)
+ |
+ ap instanceof ApNil or allowsFieldFlow = true
+ )
+ }
+
+ /**
+ * Holds if an argument to `call` is reached in the flow covered by `fwdFlow`
+ * and data might flow through the target callable and back out at `call`.
+ */
+ pragma[nomagic]
+ private predicate fwdFlowIsEntered(
+ DataFlowCall call, Cc cc, ApOption argAp, Ap ap, Configuration config
+ ) {
+ exists(ParamNodeEx p |
+ fwdFlowIn(call, p, cc, _, argAp, ap, config) and
+ PrevStage::parameterMayFlowThrough(p, _, getApprox(ap), config)
+ )
+ }
+
+ pragma[nomagic]
+ private predicate storeStepFwd(
+ NodeEx node1, Ap ap1, TypedContent tc, NodeEx node2, Ap ap2, Configuration config
+ ) {
+ fwdFlowStore(node1, ap1, tc, node2, _, _, config) and
+ ap2 = apCons(tc, ap1) and
+ fwdFlowRead(ap2, tc.getContent(), _, _, _, _, config)
+ }
+
+ private predicate readStepFwd(
+ NodeEx n1, Ap ap1, Content c, NodeEx n2, Ap ap2, Configuration config
+ ) {
+ fwdFlowRead(ap1, c, n1, n2, _, _, config) and
+ fwdFlowConsCand(ap1, c, ap2, config)
+ }
+
+ pragma[nomagic]
+ private predicate callMayFlowThroughFwd(DataFlowCall call, Configuration config) {
+ exists(Ap argAp0, NodeEx out, Cc cc, ApOption argAp, Ap ap |
+ fwdFlow(out, pragma[only_bind_into](cc), pragma[only_bind_into](argAp), ap,
+ pragma[only_bind_into](config)) and
+ fwdFlowOutFromArg(call, out, argAp0, ap, config) and
+ fwdFlowIsEntered(pragma[only_bind_into](call), pragma[only_bind_into](cc),
+ pragma[only_bind_into](argAp), pragma[only_bind_into](argAp0),
+ pragma[only_bind_into](config))
+ )
+ }
+
+ pragma[nomagic]
+ private predicate flowThroughIntoCall(
+ DataFlowCall call, ArgNodeEx arg, ParamNodeEx p, boolean allowsFieldFlow, Configuration config
+ ) {
+ flowIntoCall(call, arg, p, allowsFieldFlow, config) and
+ fwdFlow(arg, _, _, _, pragma[only_bind_into](config)) and
+ PrevStage::parameterMayFlowThrough(p, _, _, pragma[only_bind_into](config)) and
+ callMayFlowThroughFwd(call, pragma[only_bind_into](config))
+ }
+
+ /**
+ * Holds if `node` with access path `ap` is part of a path from a source to a
+ * sink in the configuration `config`.
+ *
+ * The Boolean `toReturn` records whether the node must be returned from the
+ * enclosing callable in order to reach a sink, and if so, `returnAp` records
+ * the access path of the returned value.
+ */
+ pragma[nomagic]
+ predicate revFlow(NodeEx node, boolean toReturn, ApOption returnAp, Ap ap, Configuration config) {
+ revFlow0(node, toReturn, returnAp, ap, config) and
+ fwdFlow(node, _, _, ap, config)
+ }
+
+ pragma[nomagic]
+ private predicate revFlow0(
+ NodeEx node, boolean toReturn, ApOption returnAp, Ap ap, Configuration config
+ ) {
+ fwdFlow(node, _, _, ap, config) and
+ sinkNode(node, config) and
+ toReturn = false and
+ returnAp = apNone() and
+ ap instanceof ApNil
+ or
+ exists(NodeEx mid |
+ localStep(node, mid, true, _, config, _) and
+ revFlow(mid, toReturn, returnAp, ap, config)
+ )
+ or
+ exists(NodeEx mid, ApNil nil |
+ fwdFlow(node, _, _, ap, pragma[only_bind_into](config)) and
+ localStep(node, mid, false, _, config, _) and
+ revFlow(mid, toReturn, returnAp, nil, pragma[only_bind_into](config)) and
+ ap instanceof ApNil
+ )
+ or
+ exists(NodeEx mid |
+ jumpStep(node, mid, config) and
+ revFlow(mid, _, _, ap, config) and
+ toReturn = false and
+ returnAp = apNone()
+ )
+ or
+ exists(NodeEx mid, ApNil nil |
+ fwdFlow(node, _, _, ap, pragma[only_bind_into](config)) and
+ additionalJumpStep(node, mid, config) and
+ revFlow(pragma[only_bind_into](mid), _, _, nil, pragma[only_bind_into](config)) and
+ toReturn = false and
+ returnAp = apNone() and
+ ap instanceof ApNil
+ )
+ or
+ // store
+ exists(Ap ap0, Content c |
+ revFlowStore(ap0, c, ap, node, _, _, toReturn, returnAp, config) and
+ revFlowConsCand(ap0, c, ap, config)
+ )
+ or
+ // read
+ exists(NodeEx mid, Ap ap0 |
+ revFlow(mid, toReturn, returnAp, ap0, config) and
+ readStepFwd(node, ap, _, mid, ap0, config)
+ )
+ or
+ // flow into a callable
+ revFlowInNotToReturn(node, returnAp, ap, config) and
+ toReturn = false
+ or
+ exists(DataFlowCall call, Ap returnAp0 |
+ revFlowInToReturn(call, node, returnAp0, ap, config) and
+ revFlowIsReturned(call, toReturn, returnAp, returnAp0, config)
+ )
+ or
+ // flow out of a callable
+ revFlowOut(_, node, _, _, ap, config) and
+ toReturn = true and
+ if fwdFlow(node, any(CcCall ccc), apSome(_), ap, config)
+ then returnAp = apSome(ap)
+ else returnAp = apNone()
+ }
+
+ pragma[nomagic]
+ private predicate revFlowStore(
+ Ap ap0, Content c, Ap ap, NodeEx node, TypedContent tc, NodeEx mid, boolean toReturn,
+ ApOption returnAp, Configuration config
+ ) {
+ revFlow(mid, toReturn, returnAp, ap0, config) and
+ storeStepFwd(node, ap, tc, mid, ap0, config) and
+ tc.getContent() = c
+ }
+
+ /**
+ * Holds if reverse flow with access path `tail` reaches a read of `c`
+ * resulting in access path `cons`.
+ */
+ pragma[nomagic]
+ private predicate revFlowConsCand(Ap cons, Content c, Ap tail, Configuration config) {
+ exists(NodeEx mid, Ap tail0 |
+ revFlow(mid, _, _, tail, config) and
+ tail = pragma[only_bind_into](tail0) and
+ readStepFwd(_, cons, c, mid, tail0, config)
+ )
+ }
+
+ pragma[nomagic]
+ private predicate revFlowOut(
+ DataFlowCall call, RetNodeEx ret, boolean toReturn, ApOption returnAp, Ap ap,
+ Configuration config
+ ) {
+ exists(NodeEx out, boolean allowsFieldFlow |
+ revFlow(out, toReturn, returnAp, ap, config) and
+ flowOutOfCall(call, ret, out, allowsFieldFlow, config)
+ |
+ ap instanceof ApNil or allowsFieldFlow = true
+ )
+ }
+
+ pragma[nomagic]
+ private predicate revFlowInNotToReturn(
+ ArgNodeEx arg, ApOption returnAp, Ap ap, Configuration config
+ ) {
+ exists(ParamNodeEx p, boolean allowsFieldFlow |
+ revFlow(p, false, returnAp, ap, config) and
+ flowIntoCall(_, arg, p, allowsFieldFlow, config)
+ |
+ ap instanceof ApNil or allowsFieldFlow = true
+ )
+ }
+
+ pragma[nomagic]
+ private predicate revFlowInToReturn(
+ DataFlowCall call, ArgNodeEx arg, Ap returnAp, Ap ap, Configuration config
+ ) {
+ exists(ParamNodeEx p, boolean allowsFieldFlow |
+ revFlow(p, true, apSome(returnAp), ap, config) and
+ flowThroughIntoCall(call, arg, p, allowsFieldFlow, config)
+ |
+ ap instanceof ApNil or allowsFieldFlow = true
+ )
+ }
+
+ /**
+ * Holds if an output from `call` is reached in the flow covered by `revFlow`
+ * and data might flow through the target callable resulting in reverse flow
+ * reaching an argument of `call`.
+ */
+ pragma[nomagic]
+ private predicate revFlowIsReturned(
+ DataFlowCall call, boolean toReturn, ApOption returnAp, Ap ap, Configuration config
+ ) {
+ exists(RetNodeEx ret, CcCall ccc |
+ revFlowOut(call, ret, toReturn, returnAp, ap, config) and
+ fwdFlow(ret, ccc, apSome(_), ap, config) and
+ ccc.matchesCall(call)
+ )
+ }
+
+ pragma[nomagic]
+ predicate storeStepCand(
+ NodeEx node1, Ap ap1, TypedContent tc, NodeEx node2, DataFlowType contentType,
+ Configuration config
+ ) {
+ exists(Ap ap2, Content c |
+ store(node1, tc, node2, contentType, config) and
+ revFlowStore(ap2, c, ap1, node1, tc, node2, _, _, config) and
+ revFlowConsCand(ap2, c, ap1, config)
+ )
+ }
+
+ predicate readStepCand(NodeEx node1, Content c, NodeEx node2, Configuration config) {
+ exists(Ap ap1, Ap ap2 |
+ revFlow(node2, _, _, pragma[only_bind_into](ap2), pragma[only_bind_into](config)) and
+ readStepFwd(node1, ap1, c, node2, ap2, config) and
+ revFlowStore(ap1, c, pragma[only_bind_into](ap2), _, _, _, _, _,
+ pragma[only_bind_into](config))
+ )
+ }
+
+ predicate revFlow(NodeEx node, Configuration config) { revFlow(node, _, _, _, config) }
+
+ private predicate fwdConsCand(TypedContent tc, Ap ap, Configuration config) {
+ storeStepFwd(_, ap, tc, _, _, config)
+ }
+
+ predicate consCand(TypedContent tc, Ap ap, Configuration config) {
+ storeStepCand(_, ap, tc, _, _, config)
+ }
+
+ pragma[noinline]
+ private predicate parameterFlow(
+ ParamNodeEx p, Ap ap, Ap ap0, DataFlowCallable c, Configuration config
+ ) {
+ revFlow(p, true, apSome(ap0), ap, config) and
+ c = p.getEnclosingCallable()
+ }
+
+ predicate parameterMayFlowThrough(ParamNodeEx p, DataFlowCallable c, Ap ap, Configuration config) {
+ exists(RetNodeEx ret, Ap ap0, ReturnKindExt kind, int pos |
+ parameterFlow(p, ap, ap0, c, config) and
+ c = ret.getEnclosingCallable() and
+ revFlow(pragma[only_bind_into](ret), true, apSome(_), pragma[only_bind_into](ap0),
+ pragma[only_bind_into](config)) and
+ fwdFlow(ret, any(CcCall ccc), apSome(ap), ap0, config) and
+ kind = ret.getKind() and
+ p.getPosition() = pos and
+ // we don't expect a parameter to return stored in itself
+ not kind.(ParamUpdateReturnKind).getPosition() = pos
+ )
+ }
+
+ pragma[nomagic]
+ predicate callMayFlowThroughRev(DataFlowCall call, Configuration config) {
+ exists(Ap returnAp0, ArgNodeEx arg, boolean toReturn, ApOption returnAp, Ap ap |
+ revFlow(arg, toReturn, returnAp, ap, config) and
+ revFlowInToReturn(call, arg, returnAp0, ap, config) and
+ revFlowIsReturned(call, toReturn, returnAp, returnAp0, config)
+ )
+ }
+
+ predicate stats(boolean fwd, int nodes, int fields, int conscand, int tuples, Configuration config) {
+ fwd = true and
+ nodes = count(NodeEx node | fwdFlow(node, _, _, _, config)) and
+ fields = count(TypedContent f0 | fwdConsCand(f0, _, config)) and
+ conscand = count(TypedContent f0, Ap ap | fwdConsCand(f0, ap, config)) and
+ tuples = count(NodeEx n, Cc cc, ApOption argAp, Ap ap | fwdFlow(n, cc, argAp, ap, config))
+ or
+ fwd = false and
+ nodes = count(NodeEx node | revFlow(node, _, _, _, config)) and
+ fields = count(TypedContent f0 | consCand(f0, _, config)) and
+ conscand = count(TypedContent f0, Ap ap | consCand(f0, ap, config)) and
+ tuples = count(NodeEx n, boolean b, ApOption retAp, Ap ap | revFlow(n, b, retAp, ap, config))
+ }
+ /* End: Stage 2 logic. */
+}
+
+pragma[nomagic]
+private predicate flowOutOfCallNodeCand2(
+ DataFlowCall call, RetNodeEx node1, NodeEx node2, boolean allowsFieldFlow, Configuration config
+) {
+ flowOutOfCallNodeCand1(call, node1, node2, allowsFieldFlow, config) and
+ Stage2::revFlow(node2, pragma[only_bind_into](config)) and
+ Stage2::revFlow(node1, pragma[only_bind_into](config))
+}
+
+pragma[nomagic]
+private predicate flowIntoCallNodeCand2(
+ DataFlowCall call, ArgNodeEx node1, ParamNodeEx node2, boolean allowsFieldFlow,
+ Configuration config
+) {
+ flowIntoCallNodeCand1(call, node1, node2, allowsFieldFlow, config) and
+ Stage2::revFlow(node2, pragma[only_bind_into](config)) and
+ Stage2::revFlow(node1, pragma[only_bind_into](config))
+}
+
+private module LocalFlowBigStep {
+ /**
+ * A node where some checking is required, and hence the big-step relation
+ * is not allowed to step over.
+ */
+ private class FlowCheckNode extends NodeEx {
+ FlowCheckNode() {
+ castNode(this.asNode()) or
+ clearsContentCached(this.asNode(), _)
+ }
+ }
+
+ /**
+ * Holds if `node` can be the first node in a maximal subsequence of local
+ * flow steps in a dataflow path.
+ */
+ predicate localFlowEntry(NodeEx node, Configuration config) {
+ Stage2::revFlow(node, config) and
+ (
+ sourceNode(node, config) or
+ jumpStep(_, node, config) or
+ additionalJumpStep(_, node, config) or
+ node instanceof ParamNodeEx or
+ node.asNode() instanceof OutNodeExt or
+ store(_, _, node, _, config) or
+ read(_, _, node, config) or
+ node instanceof FlowCheckNode
+ )
+ }
+
+ /**
+ * Holds if `node` can be the last node in a maximal subsequence of local
+ * flow steps in a dataflow path.
+ */
+ private predicate localFlowExit(NodeEx node, Configuration config) {
+ exists(NodeEx next | Stage2::revFlow(next, config) |
+ jumpStep(node, next, config) or
+ additionalJumpStep(node, next, config) or
+ flowIntoCallNodeCand1(_, node, next, config) or
+ flowOutOfCallNodeCand1(_, node, next, config) or
+ store(node, _, next, _, config) or
+ read(node, _, next, config)
+ )
+ or
+ node instanceof FlowCheckNode
+ or
+ sinkNode(node, config)
+ }
+
+ pragma[noinline]
+ private predicate additionalLocalFlowStepNodeCand2(
+ NodeEx node1, NodeEx node2, Configuration config
+ ) {
+ additionalLocalFlowStepNodeCand1(node1, node2, config) and
+ Stage2::revFlow(node1, _, _, false, pragma[only_bind_into](config)) and
+ Stage2::revFlow(node2, _, _, false, pragma[only_bind_into](config))
+ }
+
+ /**
+ * Holds if the local path from `node1` to `node2` is a prefix of a maximal
+ * subsequence of local flow steps in a dataflow path.
+ *
+ * This is the transitive closure of `[additional]localFlowStep` beginning
+ * at `localFlowEntry`.
+ */
+ pragma[nomagic]
+ private predicate localFlowStepPlus(
+ NodeEx node1, NodeEx node2, boolean preservesValue, DataFlowType t, Configuration config,
+ LocalCallContext cc
+ ) {
+ not isUnreachableInCallCached(node2.asNode(), cc.(LocalCallContextSpecificCall).getCall()) and
+ (
+ localFlowEntry(node1, pragma[only_bind_into](config)) and
+ (
+ localFlowStepNodeCand1(node1, node2, config) and
+ preservesValue = true and
+ t = node1.getDataFlowType() // irrelevant dummy value
+ or
+ additionalLocalFlowStepNodeCand2(node1, node2, config) and
+ preservesValue = false and
+ t = node2.getDataFlowType()
+ ) and
+ node1 != node2 and
+ cc.relevantFor(node1.getEnclosingCallable()) and
+ not isUnreachableInCallCached(node1.asNode(), cc.(LocalCallContextSpecificCall).getCall()) and
+ Stage2::revFlow(node2, pragma[only_bind_into](config))
+ or
+ exists(NodeEx mid |
+ localFlowStepPlus(node1, mid, preservesValue, t, pragma[only_bind_into](config), cc) and
+ localFlowStepNodeCand1(mid, node2, config) and
+ not mid instanceof FlowCheckNode and
+ Stage2::revFlow(node2, pragma[only_bind_into](config))
+ )
+ or
+ exists(NodeEx mid |
+ localFlowStepPlus(node1, mid, _, _, pragma[only_bind_into](config), cc) and
+ additionalLocalFlowStepNodeCand2(mid, node2, config) and
+ not mid instanceof FlowCheckNode and
+ preservesValue = false and
+ t = node2.getDataFlowType() and
+ Stage2::revFlow(node2, pragma[only_bind_into](config))
+ )
+ )
+ }
+
+ /**
+ * Holds if `node1` can step to `node2` in one or more local steps and this
+ * path can occur as a maximal subsequence of local steps in a dataflow path.
+ */
+ pragma[nomagic]
+ predicate localFlowBigStep(
+ NodeEx node1, NodeEx node2, boolean preservesValue, AccessPathFrontNil apf,
+ Configuration config, LocalCallContext callContext
+ ) {
+ localFlowStepPlus(node1, node2, preservesValue, apf.getType(), config, callContext) and
+ localFlowExit(node2, config)
+ }
+}
+
+private import LocalFlowBigStep
+
+private module Stage3 {
+ module PrevStage = Stage2;
+
+ class ApApprox = PrevStage::Ap;
+
+ class Ap = AccessPathFront;
+
+ class ApNil = AccessPathFrontNil;
+
+ private ApApprox getApprox(Ap ap) { result = ap.toBoolNonEmpty() }
+
+ private ApNil getApNil(NodeEx node) {
+ PrevStage::revFlow(node, _) and result = TFrontNil(node.getDataFlowType())
+ }
+
+ bindingset[tc, tail]
+ private Ap apCons(TypedContent tc, Ap tail) { result.getHead() = tc and exists(tail) }
+
+ pragma[noinline]
+ private Content getHeadContent(Ap ap) { result = ap.getHead().getContent() }
+
+ class ApOption = AccessPathFrontOption;
+
+ ApOption apNone() { result = TAccessPathFrontNone() }
+
+ ApOption apSome(Ap ap) { result = TAccessPathFrontSome(ap) }
+
+ class Cc = boolean;
+
+ class CcCall extends Cc {
+ CcCall() { this = true }
+
+ /** Holds if this call context may be `call`. */
+ predicate matchesCall(DataFlowCall call) { any() }
+ }
+
+ class CcNoCall extends Cc {
+ CcNoCall() { this = false }
+ }
+
+ Cc ccNone() { result = false }
+
+ private class LocalCc = Unit;
+
+ bindingset[call, c, outercc]
+ private CcCall getCallContextCall(DataFlowCall call, DataFlowCallable c, Cc outercc) { any() }
+
+ bindingset[call, c, innercc]
+ private CcNoCall getCallContextReturn(DataFlowCallable c, DataFlowCall call, Cc innercc) { any() }
+
+ bindingset[node, cc, config]
+ private LocalCc getLocalCc(NodeEx node, Cc cc, Configuration config) { any() }
+
+ private predicate localStep(
+ NodeEx node1, NodeEx node2, boolean preservesValue, ApNil ap, Configuration config, LocalCc lcc
+ ) {
+ localFlowBigStep(node1, node2, preservesValue, ap, config, _) and exists(lcc)
+ }
+
+ private predicate flowOutOfCall = flowOutOfCallNodeCand2/5;
+
+ private predicate flowIntoCall = flowIntoCallNodeCand2/5;
+
+ pragma[nomagic]
+ private predicate clear(NodeEx node, Ap ap) { ap.isClearedAt(node.asNode()) }
+
+ pragma[nomagic]
+ private predicate castingNodeEx(NodeEx node) { node.asNode() instanceof CastingNode }
+
+ bindingset[node, ap]
+ private predicate filter(NodeEx node, Ap ap) {
+ not clear(node, ap) and
+ if castingNodeEx(node) then compatibleTypes(node.getDataFlowType(), ap.getType()) else any()
+ }
+
+ bindingset[ap, contentType]
+ private predicate typecheckStore(Ap ap, DataFlowType contentType) {
+ // We need to typecheck stores here, since reverse flow through a getter
+ // might have a different type here compared to inside the getter.
+ compatibleTypes(ap.getType(), contentType)
+ }
+
+ /* Begin: Stage 3 logic. */
+ private predicate flowCand(NodeEx node, ApApprox apa, Configuration config) {
+ PrevStage::revFlow(node, _, _, apa, config)
+ }
+
+ bindingset[result, apa]
+ private ApApprox unbindApa(ApApprox apa) {
+ exists(ApApprox apa0 |
+ apa = pragma[only_bind_into](apa0) and result = pragma[only_bind_into](apa0)
+ )
+ }
+
+ pragma[nomagic]
+ private predicate flowThroughOutOfCall(
+ DataFlowCall call, RetNodeEx ret, NodeEx out, boolean allowsFieldFlow, Configuration config
+ ) {
+ flowOutOfCall(call, ret, out, allowsFieldFlow, pragma[only_bind_into](config)) and
+ PrevStage::callMayFlowThroughRev(call, pragma[only_bind_into](config)) and
+ PrevStage::parameterMayFlowThrough(_, ret.getEnclosingCallable(), _,
+ pragma[only_bind_into](config))
+ }
+
+ /**
+ * Holds if `node` is reachable with access path `ap` from a source in the
+ * configuration `config`.
+ *
+ * The call context `cc` records whether the node is reached through an
+ * argument in a call, and if so, `argAp` records the access path of that
+ * argument.
+ */
+ pragma[nomagic]
+ predicate fwdFlow(NodeEx node, Cc cc, ApOption argAp, Ap ap, Configuration config) {
+ fwdFlow0(node, cc, argAp, ap, config) and
+ flowCand(node, unbindApa(getApprox(ap)), config) and
+ filter(node, ap)
+ }
+
+ pragma[nomagic]
+ private predicate fwdFlow0(NodeEx node, Cc cc, ApOption argAp, Ap ap, Configuration config) {
+ flowCand(node, _, config) and
+ sourceNode(node, config) and
+ cc = ccNone() and
+ argAp = apNone() and
+ ap = getApNil(node)
+ or
+ exists(NodeEx mid, Ap ap0, LocalCc localCc |
+ fwdFlow(mid, cc, argAp, ap0, config) and
+ localCc = getLocalCc(mid, cc, config)
+ |
+ localStep(mid, node, true, _, config, localCc) and
+ ap = ap0
+ or
+ localStep(mid, node, false, ap, config, localCc) and
+ ap0 instanceof ApNil
+ )
+ or
+ exists(NodeEx mid |
+ fwdFlow(mid, _, _, ap, pragma[only_bind_into](config)) and
+ flowCand(node, _, pragma[only_bind_into](config)) and
+ jumpStep(mid, node, config) and
+ cc = ccNone() and
+ argAp = apNone()
+ )
+ or
+ exists(NodeEx mid, ApNil nil |
+ fwdFlow(mid, _, _, nil, pragma[only_bind_into](config)) and
+ flowCand(node, _, pragma[only_bind_into](config)) and
+ additionalJumpStep(mid, node, config) and
+ cc = ccNone() and
+ argAp = apNone() and
+ ap = getApNil(node)
+ )
+ or
+ // store
+ exists(TypedContent tc, Ap ap0 |
+ fwdFlowStore(_, ap0, tc, node, cc, argAp, config) and
+ ap = apCons(tc, ap0)
+ )
+ or
+ // read
+ exists(Ap ap0, Content c |
+ fwdFlowRead(ap0, c, _, node, cc, argAp, config) and
+ fwdFlowConsCand(ap0, c, ap, config)
+ )
+ or
+ // flow into a callable
+ exists(ApApprox apa |
+ fwdFlowIn(_, node, _, cc, _, ap, config) and
+ apa = getApprox(ap) and
+ if PrevStage::parameterMayFlowThrough(node, _, apa, config)
+ then argAp = apSome(ap)
+ else argAp = apNone()
+ )
+ or
+ // flow out of a callable
+ fwdFlowOutNotFromArg(node, cc, argAp, ap, config)
+ or
+ exists(DataFlowCall call, Ap argAp0 |
+ fwdFlowOutFromArg(call, node, argAp0, ap, config) and
+ fwdFlowIsEntered(call, cc, argAp, argAp0, config)
+ )
+ }
+
+ pragma[nomagic]
+ private predicate fwdFlowStore(
+ NodeEx node1, Ap ap1, TypedContent tc, NodeEx node2, Cc cc, ApOption argAp, Configuration config
+ ) {
+ exists(DataFlowType contentType |
+ fwdFlow(node1, cc, argAp, ap1, config) and
+ PrevStage::storeStepCand(node1, unbindApa(getApprox(ap1)), tc, node2, contentType, config) and
+ typecheckStore(ap1, contentType)
+ )
+ }
+
+ /**
+ * Holds if forward flow with access path `tail` reaches a store of `c`
+ * resulting in access path `cons`.
+ */
+ pragma[nomagic]
+ private predicate fwdFlowConsCand(Ap cons, Content c, Ap tail, Configuration config) {
+ exists(TypedContent tc |
+ fwdFlowStore(_, tail, tc, _, _, _, config) and
+ tc.getContent() = c and
+ cons = apCons(tc, tail)
+ )
+ }
+
+ pragma[nomagic]
+ private predicate fwdFlowRead(
+ Ap ap, Content c, NodeEx node1, NodeEx node2, Cc cc, ApOption argAp, Configuration config
+ ) {
+ fwdFlow(node1, cc, argAp, ap, config) and
+ PrevStage::readStepCand(node1, c, node2, config) and
+ getHeadContent(ap) = c
+ }
+
+ pragma[nomagic]
+ private predicate fwdFlowIn(
+ DataFlowCall call, ParamNodeEx p, Cc outercc, Cc innercc, ApOption argAp, Ap ap,
+ Configuration config
+ ) {
+ exists(ArgNodeEx arg, boolean allowsFieldFlow |
+ fwdFlow(arg, outercc, argAp, ap, config) and
+ flowIntoCall(call, arg, p, allowsFieldFlow, config) and
+ innercc = getCallContextCall(call, p.getEnclosingCallable(), outercc)
+ |
+ ap instanceof ApNil or allowsFieldFlow = true
+ )
+ }
+
+ pragma[nomagic]
+ private predicate fwdFlowOutNotFromArg(
+ NodeEx out, Cc ccOut, ApOption argAp, Ap ap, Configuration config
+ ) {
+ exists(
+ DataFlowCall call, RetNodeEx ret, boolean allowsFieldFlow, CcNoCall innercc,
+ DataFlowCallable inner
+ |
+ fwdFlow(ret, innercc, argAp, ap, config) and
+ flowOutOfCall(call, ret, out, allowsFieldFlow, config) and
+ inner = ret.getEnclosingCallable() and
+ ccOut = getCallContextReturn(inner, call, innercc)
+ |
+ ap instanceof ApNil or allowsFieldFlow = true
+ )
+ }
+
+ pragma[nomagic]
+ private predicate fwdFlowOutFromArg(
+ DataFlowCall call, NodeEx out, Ap argAp, Ap ap, Configuration config
+ ) {
+ exists(RetNodeEx ret, boolean allowsFieldFlow, CcCall ccc |
+ fwdFlow(ret, ccc, apSome(argAp), ap, config) and
+ flowThroughOutOfCall(call, ret, out, allowsFieldFlow, config) and
+ ccc.matchesCall(call)
+ |
+ ap instanceof ApNil or allowsFieldFlow = true
+ )
+ }
+
+ /**
+ * Holds if an argument to `call` is reached in the flow covered by `fwdFlow`
+ * and data might flow through the target callable and back out at `call`.
+ */
+ pragma[nomagic]
+ private predicate fwdFlowIsEntered(
+ DataFlowCall call, Cc cc, ApOption argAp, Ap ap, Configuration config
+ ) {
+ exists(ParamNodeEx p |
+ fwdFlowIn(call, p, cc, _, argAp, ap, config) and
+ PrevStage::parameterMayFlowThrough(p, _, unbindApa(getApprox(ap)), config)
+ )
+ }
+
+ pragma[nomagic]
+ private predicate storeStepFwd(
+ NodeEx node1, Ap ap1, TypedContent tc, NodeEx node2, Ap ap2, Configuration config
+ ) {
+ fwdFlowStore(node1, ap1, tc, node2, _, _, config) and
+ ap2 = apCons(tc, ap1) and
+ fwdFlowRead(ap2, tc.getContent(), _, _, _, _, config)
+ }
+
+ private predicate readStepFwd(
+ NodeEx n1, Ap ap1, Content c, NodeEx n2, Ap ap2, Configuration config
+ ) {
+ fwdFlowRead(ap1, c, n1, n2, _, _, config) and
+ fwdFlowConsCand(ap1, c, ap2, config)
+ }
+
+ pragma[nomagic]
+ private predicate callMayFlowThroughFwd(DataFlowCall call, Configuration config) {
+ exists(Ap argAp0, NodeEx out, Cc cc, ApOption argAp, Ap ap |
+ fwdFlow(out, pragma[only_bind_into](cc), pragma[only_bind_into](argAp), ap,
+ pragma[only_bind_into](config)) and
+ fwdFlowOutFromArg(call, out, argAp0, ap, config) and
+ fwdFlowIsEntered(pragma[only_bind_into](call), pragma[only_bind_into](cc),
+ pragma[only_bind_into](argAp), pragma[only_bind_into](argAp0),
+ pragma[only_bind_into](config))
+ )
+ }
+
+ pragma[nomagic]
+ private predicate flowThroughIntoCall(
+ DataFlowCall call, ArgNodeEx arg, ParamNodeEx p, boolean allowsFieldFlow, Configuration config
+ ) {
+ flowIntoCall(call, arg, p, allowsFieldFlow, config) and
+ fwdFlow(arg, _, _, _, pragma[only_bind_into](config)) and
+ PrevStage::parameterMayFlowThrough(p, _, _, pragma[only_bind_into](config)) and
+ callMayFlowThroughFwd(call, pragma[only_bind_into](config))
+ }
+
+ /**
+ * Holds if `node` with access path `ap` is part of a path from a source to a
+ * sink in the configuration `config`.
+ *
+ * The Boolean `toReturn` records whether the node must be returned from the
+ * enclosing callable in order to reach a sink, and if so, `returnAp` records
+ * the access path of the returned value.
+ */
+ pragma[nomagic]
+ predicate revFlow(NodeEx node, boolean toReturn, ApOption returnAp, Ap ap, Configuration config) {
+ revFlow0(node, toReturn, returnAp, ap, config) and
+ fwdFlow(node, _, _, ap, config)
+ }
+
+ pragma[nomagic]
+ private predicate revFlow0(
+ NodeEx node, boolean toReturn, ApOption returnAp, Ap ap, Configuration config
+ ) {
+ fwdFlow(node, _, _, ap, config) and
+ sinkNode(node, config) and
+ toReturn = false and
+ returnAp = apNone() and
+ ap instanceof ApNil
+ or
+ exists(NodeEx mid |
+ localStep(node, mid, true, _, config, _) and
+ revFlow(mid, toReturn, returnAp, ap, config)
+ )
+ or
+ exists(NodeEx mid, ApNil nil |
+ fwdFlow(node, _, _, ap, pragma[only_bind_into](config)) and
+ localStep(node, mid, false, _, config, _) and
+ revFlow(mid, toReturn, returnAp, nil, pragma[only_bind_into](config)) and
+ ap instanceof ApNil
+ )
+ or
+ exists(NodeEx mid |
+ jumpStep(node, mid, config) and
+ revFlow(mid, _, _, ap, config) and
+ toReturn = false and
+ returnAp = apNone()
+ )
+ or
+ exists(NodeEx mid, ApNil nil |
+ fwdFlow(node, _, _, ap, pragma[only_bind_into](config)) and
+ additionalJumpStep(node, mid, config) and
+ revFlow(pragma[only_bind_into](mid), _, _, nil, pragma[only_bind_into](config)) and
+ toReturn = false and
+ returnAp = apNone() and
+ ap instanceof ApNil
+ )
+ or
+ // store
+ exists(Ap ap0, Content c |
+ revFlowStore(ap0, c, ap, node, _, _, toReturn, returnAp, config) and
+ revFlowConsCand(ap0, c, ap, config)
+ )
+ or
+ // read
+ exists(NodeEx mid, Ap ap0 |
+ revFlow(mid, toReturn, returnAp, ap0, config) and
+ readStepFwd(node, ap, _, mid, ap0, config)
+ )
+ or
+ // flow into a callable
+ revFlowInNotToReturn(node, returnAp, ap, config) and
+ toReturn = false
+ or
+ exists(DataFlowCall call, Ap returnAp0 |
+ revFlowInToReturn(call, node, returnAp0, ap, config) and
+ revFlowIsReturned(call, toReturn, returnAp, returnAp0, config)
+ )
+ or
+ // flow out of a callable
+ revFlowOut(_, node, _, _, ap, config) and
+ toReturn = true and
+ if fwdFlow(node, any(CcCall ccc), apSome(_), ap, config)
+ then returnAp = apSome(ap)
+ else returnAp = apNone()
+ }
+
+ pragma[nomagic]
+ private predicate revFlowStore(
+ Ap ap0, Content c, Ap ap, NodeEx node, TypedContent tc, NodeEx mid, boolean toReturn,
+ ApOption returnAp, Configuration config
+ ) {
+ revFlow(mid, toReturn, returnAp, ap0, config) and
+ storeStepFwd(node, ap, tc, mid, ap0, config) and
+ tc.getContent() = c
+ }
+
+ /**
+ * Holds if reverse flow with access path `tail` reaches a read of `c`
+ * resulting in access path `cons`.
+ */
+ pragma[nomagic]
+ private predicate revFlowConsCand(Ap cons, Content c, Ap tail, Configuration config) {
+ exists(NodeEx mid, Ap tail0 |
+ revFlow(mid, _, _, tail, config) and
+ tail = pragma[only_bind_into](tail0) and
+ readStepFwd(_, cons, c, mid, tail0, config)
+ )
+ }
+
+ pragma[nomagic]
+ private predicate revFlowOut(
+ DataFlowCall call, RetNodeEx ret, boolean toReturn, ApOption returnAp, Ap ap,
+ Configuration config
+ ) {
+ exists(NodeEx out, boolean allowsFieldFlow |
+ revFlow(out, toReturn, returnAp, ap, config) and
+ flowOutOfCall(call, ret, out, allowsFieldFlow, config)
+ |
+ ap instanceof ApNil or allowsFieldFlow = true
+ )
+ }
+
+ pragma[nomagic]
+ private predicate revFlowInNotToReturn(
+ ArgNodeEx arg, ApOption returnAp, Ap ap, Configuration config
+ ) {
+ exists(ParamNodeEx p, boolean allowsFieldFlow |
+ revFlow(p, false, returnAp, ap, config) and
+ flowIntoCall(_, arg, p, allowsFieldFlow, config)
+ |
+ ap instanceof ApNil or allowsFieldFlow = true
+ )
+ }
+
+ pragma[nomagic]
+ private predicate revFlowInToReturn(
+ DataFlowCall call, ArgNodeEx arg, Ap returnAp, Ap ap, Configuration config
+ ) {
+ exists(ParamNodeEx p, boolean allowsFieldFlow |
+ revFlow(p, true, apSome(returnAp), ap, config) and
+ flowThroughIntoCall(call, arg, p, allowsFieldFlow, config)
+ |
+ ap instanceof ApNil or allowsFieldFlow = true
+ )
+ }
+
+ /**
+ * Holds if an output from `call` is reached in the flow covered by `revFlow`
+ * and data might flow through the target callable resulting in reverse flow
+ * reaching an argument of `call`.
+ */
+ pragma[nomagic]
+ private predicate revFlowIsReturned(
+ DataFlowCall call, boolean toReturn, ApOption returnAp, Ap ap, Configuration config
+ ) {
+ exists(RetNodeEx ret, CcCall ccc |
+ revFlowOut(call, ret, toReturn, returnAp, ap, config) and
+ fwdFlow(ret, ccc, apSome(_), ap, config) and
+ ccc.matchesCall(call)
+ )
+ }
+
+ pragma[nomagic]
+ predicate storeStepCand(
+ NodeEx node1, Ap ap1, TypedContent tc, NodeEx node2, DataFlowType contentType,
+ Configuration config
+ ) {
+ exists(Ap ap2, Content c |
+ store(node1, tc, node2, contentType, config) and
+ revFlowStore(ap2, c, ap1, node1, tc, node2, _, _, config) and
+ revFlowConsCand(ap2, c, ap1, config)
+ )
+ }
+
+ predicate readStepCand(NodeEx node1, Content c, NodeEx node2, Configuration config) {
+ exists(Ap ap1, Ap ap2 |
+ revFlow(node2, _, _, pragma[only_bind_into](ap2), pragma[only_bind_into](config)) and
+ readStepFwd(node1, ap1, c, node2, ap2, config) and
+ revFlowStore(ap1, c, pragma[only_bind_into](ap2), _, _, _, _, _,
+ pragma[only_bind_into](config))
+ )
+ }
+
+ predicate revFlow(NodeEx node, Configuration config) { revFlow(node, _, _, _, config) }
+
+ private predicate fwdConsCand(TypedContent tc, Ap ap, Configuration config) {
+ storeStepFwd(_, ap, tc, _, _, config)
+ }
+
+ predicate consCand(TypedContent tc, Ap ap, Configuration config) {
+ storeStepCand(_, ap, tc, _, _, config)
+ }
+
+ pragma[noinline]
+ private predicate parameterFlow(
+ ParamNodeEx p, Ap ap, Ap ap0, DataFlowCallable c, Configuration config
+ ) {
+ revFlow(p, true, apSome(ap0), ap, config) and
+ c = p.getEnclosingCallable()
+ }
+
+ predicate parameterMayFlowThrough(ParamNodeEx p, DataFlowCallable c, Ap ap, Configuration config) {
+ exists(RetNodeEx ret, Ap ap0, ReturnKindExt kind, int pos |
+ parameterFlow(p, ap, ap0, c, config) and
+ c = ret.getEnclosingCallable() and
+ revFlow(pragma[only_bind_into](ret), true, apSome(_), pragma[only_bind_into](ap0),
+ pragma[only_bind_into](config)) and
+ fwdFlow(ret, any(CcCall ccc), apSome(ap), ap0, config) and
+ kind = ret.getKind() and
+ p.getPosition() = pos and
+ // we don't expect a parameter to return stored in itself
+ not kind.(ParamUpdateReturnKind).getPosition() = pos
+ )
+ }
+
+ pragma[nomagic]
+ predicate callMayFlowThroughRev(DataFlowCall call, Configuration config) {
+ exists(Ap returnAp0, ArgNodeEx arg, boolean toReturn, ApOption returnAp, Ap ap |
+ revFlow(arg, toReturn, returnAp, ap, config) and
+ revFlowInToReturn(call, arg, returnAp0, ap, config) and
+ revFlowIsReturned(call, toReturn, returnAp, returnAp0, config)
+ )
+ }
+
+ predicate stats(boolean fwd, int nodes, int fields, int conscand, int tuples, Configuration config) {
+ fwd = true and
+ nodes = count(NodeEx node | fwdFlow(node, _, _, _, config)) and
+ fields = count(TypedContent f0 | fwdConsCand(f0, _, config)) and
+ conscand = count(TypedContent f0, Ap ap | fwdConsCand(f0, ap, config)) and
+ tuples = count(NodeEx n, Cc cc, ApOption argAp, Ap ap | fwdFlow(n, cc, argAp, ap, config))
+ or
+ fwd = false and
+ nodes = count(NodeEx node | revFlow(node, _, _, _, config)) and
+ fields = count(TypedContent f0 | consCand(f0, _, config)) and
+ conscand = count(TypedContent f0, Ap ap | consCand(f0, ap, config)) and
+ tuples = count(NodeEx n, boolean b, ApOption retAp, Ap ap | revFlow(n, b, retAp, ap, config))
+ }
+ /* End: Stage 3 logic. */
+}
+
+/**
+ * Holds if `argApf` is recorded as the summary context for flow reaching `node`
+ * and remains relevant for the following pruning stage.
+ */
+private predicate flowCandSummaryCtx(NodeEx node, AccessPathFront argApf, Configuration config) {
+ exists(AccessPathFront apf |
+ Stage3::revFlow(node, true, _, apf, config) and
+ Stage3::fwdFlow(node, any(Stage3::CcCall ccc), TAccessPathFrontSome(argApf), apf, config)
+ )
+}
+
+/**
+ * Holds if a length 2 access path approximation with the head `tc` is expected
+ * to be expensive.
+ */
+private predicate expensiveLen2unfolding(TypedContent tc, Configuration config) {
+ exists(int tails, int nodes, int apLimit, int tupleLimit |
+ tails = strictcount(AccessPathFront apf | Stage3::consCand(tc, apf, config)) and
+ nodes =
+ strictcount(NodeEx n |
+ Stage3::revFlow(n, _, _, any(AccessPathFrontHead apf | apf.getHead() = tc), config)
+ or
+ flowCandSummaryCtx(n, any(AccessPathFrontHead apf | apf.getHead() = tc), config)
+ ) and
+ accessPathApproxCostLimits(apLimit, tupleLimit) and
+ apLimit < tails and
+ tupleLimit < (tails - 1) * nodes and
+ not tc.forceHighPrecision()
+ )
+}
+
+private newtype TAccessPathApprox =
+ TNil(DataFlowType t) or
+ TConsNil(TypedContent tc, DataFlowType t) {
+ Stage3::consCand(tc, TFrontNil(t), _) and
+ not expensiveLen2unfolding(tc, _)
+ } or
+ TConsCons(TypedContent tc1, TypedContent tc2, int len) {
+ Stage3::consCand(tc1, TFrontHead(tc2), _) and
+ len in [2 .. accessPathLimit()] and
+ not expensiveLen2unfolding(tc1, _)
+ } or
+ TCons1(TypedContent tc, int len) {
+ len in [1 .. accessPathLimit()] and
+ expensiveLen2unfolding(tc, _)
+ }
+
+/**
+ * Conceptually a list of `TypedContent`s followed by a `DataFlowType`, but only
+ * the first two elements of the list and its length are tracked. If data flows
+ * from a source to a given node with a given `AccessPathApprox`, this indicates
+ * the sequence of dereference operations needed to get from the value in the node
+ * to the tracked object. The final type indicates the type of the tracked object.
+ */
+abstract private class AccessPathApprox extends TAccessPathApprox {
+ abstract string toString();
+
+ abstract TypedContent getHead();
+
+ abstract int len();
+
+ abstract DataFlowType getType();
+
+ abstract AccessPathFront getFront();
+
+ /** Gets the access path obtained by popping `head` from this path, if any. */
+ abstract AccessPathApprox pop(TypedContent head);
+}
+
+private class AccessPathApproxNil extends AccessPathApprox, TNil {
+ private DataFlowType t;
+
+ AccessPathApproxNil() { this = TNil(t) }
+
+ override string toString() { result = concat(": " + ppReprType(t)) }
+
+ override TypedContent getHead() { none() }
+
+ override int len() { result = 0 }
+
+ override DataFlowType getType() { result = t }
+
+ override AccessPathFront getFront() { result = TFrontNil(t) }
+
+ override AccessPathApprox pop(TypedContent head) { none() }
+}
+
+abstract private class AccessPathApproxCons extends AccessPathApprox { }
+
+private class AccessPathApproxConsNil extends AccessPathApproxCons, TConsNil {
+ private TypedContent tc;
+ private DataFlowType t;
+
+ AccessPathApproxConsNil() { this = TConsNil(tc, t) }
+
+ override string toString() {
+ // The `concat` becomes "" if `ppReprType` has no result.
+ result = "[" + tc.toString() + "]" + concat(" : " + ppReprType(t))
+ }
+
+ override TypedContent getHead() { result = tc }
+
+ override int len() { result = 1 }
+
+ override DataFlowType getType() { result = tc.getContainerType() }
+
+ override AccessPathFront getFront() { result = TFrontHead(tc) }
+
+ override AccessPathApprox pop(TypedContent head) { head = tc and result = TNil(t) }
+}
+
+private class AccessPathApproxConsCons extends AccessPathApproxCons, TConsCons {
+ private TypedContent tc1;
+ private TypedContent tc2;
+ private int len;
+
+ AccessPathApproxConsCons() { this = TConsCons(tc1, tc2, len) }
+
+ override string toString() {
+ if len = 2
+ then result = "[" + tc1.toString() + ", " + tc2.toString() + "]"
+ else result = "[" + tc1.toString() + ", " + tc2.toString() + ", ... (" + len.toString() + ")]"
+ }
+
+ override TypedContent getHead() { result = tc1 }
+
+ override int len() { result = len }
+
+ override DataFlowType getType() { result = tc1.getContainerType() }
+
+ override AccessPathFront getFront() { result = TFrontHead(tc1) }
+
+ override AccessPathApprox pop(TypedContent head) {
+ head = tc1 and
+ (
+ result = TConsCons(tc2, _, len - 1)
+ or
+ len = 2 and
+ result = TConsNil(tc2, _)
+ or
+ result = TCons1(tc2, len - 1)
+ )
+ }
+}
+
+private class AccessPathApproxCons1 extends AccessPathApproxCons, TCons1 {
+ private TypedContent tc;
+ private int len;
+
+ AccessPathApproxCons1() { this = TCons1(tc, len) }
+
+ override string toString() {
+ if len = 1
+ then result = "[" + tc.toString() + "]"
+ else result = "[" + tc.toString() + ", ... (" + len.toString() + ")]"
+ }
+
+ override TypedContent getHead() { result = tc }
+
+ override int len() { result = len }
+
+ override DataFlowType getType() { result = tc.getContainerType() }
+
+ override AccessPathFront getFront() { result = TFrontHead(tc) }
+
+ override AccessPathApprox pop(TypedContent head) {
+ head = tc and
+ (
+ exists(TypedContent tc2 | Stage3::consCand(tc, TFrontHead(tc2), _) |
+ result = TConsCons(tc2, _, len - 1)
+ or
+ len = 2 and
+ result = TConsNil(tc2, _)
+ or
+ result = TCons1(tc2, len - 1)
+ )
+ or
+ exists(DataFlowType t |
+ len = 1 and
+ Stage3::consCand(tc, TFrontNil(t), _) and
+ result = TNil(t)
+ )
+ )
+ }
+}
+
+/** Gets the access path obtained by popping `tc` from `ap`, if any. */
+private AccessPathApprox pop(TypedContent tc, AccessPathApprox apa) { result = apa.pop(tc) }
+
+/** Gets the access path obtained by pushing `tc` onto `ap`. */
+private AccessPathApprox push(TypedContent tc, AccessPathApprox apa) { apa = pop(tc, result) }
+
+private newtype TAccessPathApproxOption =
+ TAccessPathApproxNone() or
+ TAccessPathApproxSome(AccessPathApprox apa)
+
+private class AccessPathApproxOption extends TAccessPathApproxOption {
+ string toString() {
+ this = TAccessPathApproxNone() and result = ""
+ or
+ this = TAccessPathApproxSome(any(AccessPathApprox apa | result = apa.toString()))
+ }
+}
+
+private module Stage4 {
+ module PrevStage = Stage3;
+
+ class ApApprox = PrevStage::Ap;
+
+ class Ap = AccessPathApprox;
+
+ class ApNil = AccessPathApproxNil;
+
+ private ApApprox getApprox(Ap ap) { result = ap.getFront() }
+
+ private ApNil getApNil(NodeEx node) {
+ PrevStage::revFlow(node, _) and result = TNil(node.getDataFlowType())
+ }
+
+ bindingset[tc, tail]
+ private Ap apCons(TypedContent tc, Ap tail) { result = push(tc, tail) }
+
+ pragma[noinline]
+ private Content getHeadContent(Ap ap) { result = ap.getHead().getContent() }
+
+ class ApOption = AccessPathApproxOption;
+
+ ApOption apNone() { result = TAccessPathApproxNone() }
+
+ ApOption apSome(Ap ap) { result = TAccessPathApproxSome(ap) }
+
+ class Cc = CallContext;
+
+ class CcCall = CallContextCall;
+
+ class CcNoCall = CallContextNoCall;
+
+ Cc ccNone() { result instanceof CallContextAny }
+
+ private class LocalCc = LocalCallContext;
+
+ bindingset[call, c, outercc]
+ private CcCall getCallContextCall(DataFlowCall call, DataFlowCallable c, Cc outercc) {
+ checkCallContextCall(outercc, call, c) and
+ if recordDataFlowCallSite(call, c) then result = TSpecificCall(call) else result = TSomeCall()
+ }
+
+ bindingset[call, c, innercc]
+ private CcNoCall getCallContextReturn(DataFlowCallable c, DataFlowCall call, Cc innercc) {
+ checkCallContextReturn(innercc, c, call) and
+ if reducedViableImplInReturn(c, call) then result = TReturn(c, call) else result = ccNone()
+ }
+
+ bindingset[node, cc, config]
+ private LocalCc getLocalCc(NodeEx node, Cc cc, Configuration config) {
+ localFlowEntry(node, config) and
+ result =
+ getLocalCallContext(pragma[only_bind_into](pragma[only_bind_out](cc)),
+ node.getEnclosingCallable())
+ }
+
+ private predicate localStep(
+ NodeEx node1, NodeEx node2, boolean preservesValue, ApNil ap, Configuration config, LocalCc lcc
+ ) {
+ localFlowBigStep(node1, node2, preservesValue, ap.getFront(), config, lcc)
+ }
+
+ pragma[nomagic]
+ private predicate flowOutOfCall(
+ DataFlowCall call, RetNodeEx node1, NodeEx node2, boolean allowsFieldFlow, Configuration config
+ ) {
+ flowOutOfCallNodeCand2(call, node1, node2, allowsFieldFlow, config) and
+ PrevStage::revFlow(node2, _, _, _, pragma[only_bind_into](config)) and
+ PrevStage::revFlow(node1, _, _, _, pragma[only_bind_into](config))
+ }
+
+ pragma[nomagic]
+ private predicate flowIntoCall(
+ DataFlowCall call, ArgNodeEx node1, ParamNodeEx node2, boolean allowsFieldFlow,
+ Configuration config
+ ) {
+ flowIntoCallNodeCand2(call, node1, node2, allowsFieldFlow, config) and
+ PrevStage::revFlow(node2, _, _, _, pragma[only_bind_into](config)) and
+ PrevStage::revFlow(node1, _, _, _, pragma[only_bind_into](config))
+ }
+
+ bindingset[node, ap]
+ private predicate filter(NodeEx node, Ap ap) { any() }
+
+ // Type checking is not necessary here as it has already been done in stage 3.
+ bindingset[ap, contentType]
+ private predicate typecheckStore(Ap ap, DataFlowType contentType) { any() }
+
+ /* Begin: Stage 4 logic. */
+ private predicate flowCand(NodeEx node, ApApprox apa, Configuration config) {
+ PrevStage::revFlow(node, _, _, apa, config)
+ }
+
+ bindingset[result, apa]
+ private ApApprox unbindApa(ApApprox apa) {
+ exists(ApApprox apa0 |
+ apa = pragma[only_bind_into](apa0) and result = pragma[only_bind_into](apa0)
+ )
+ }
+
+ pragma[nomagic]
+ private predicate flowThroughOutOfCall(
+ DataFlowCall call, RetNodeEx ret, NodeEx out, boolean allowsFieldFlow, Configuration config
+ ) {
+ flowOutOfCall(call, ret, out, allowsFieldFlow, pragma[only_bind_into](config)) and
+ PrevStage::callMayFlowThroughRev(call, pragma[only_bind_into](config)) and
+ PrevStage::parameterMayFlowThrough(_, ret.getEnclosingCallable(), _,
+ pragma[only_bind_into](config))
+ }
+
+ /**
+ * Holds if `node` is reachable with access path `ap` from a source in the
+ * configuration `config`.
+ *
+ * The call context `cc` records whether the node is reached through an
+ * argument in a call, and if so, `argAp` records the access path of that
+ * argument.
+ */
+ pragma[nomagic]
+ predicate fwdFlow(NodeEx node, Cc cc, ApOption argAp, Ap ap, Configuration config) {
+ fwdFlow0(node, cc, argAp, ap, config) and
+ flowCand(node, unbindApa(getApprox(ap)), config) and
+ filter(node, ap)
+ }
+
+ pragma[nomagic]
+ private predicate fwdFlow0(NodeEx node, Cc cc, ApOption argAp, Ap ap, Configuration config) {
+ flowCand(node, _, config) and
+ sourceNode(node, config) and
+ cc = ccNone() and
+ argAp = apNone() and
+ ap = getApNil(node)
+ or
+ exists(NodeEx mid, Ap ap0, LocalCc localCc |
+ fwdFlow(mid, cc, argAp, ap0, config) and
+ localCc = getLocalCc(mid, cc, config)
+ |
+ localStep(mid, node, true, _, config, localCc) and
+ ap = ap0
+ or
+ localStep(mid, node, false, ap, config, localCc) and
+ ap0 instanceof ApNil
+ )
+ or
+ exists(NodeEx mid |
+ fwdFlow(mid, _, _, ap, pragma[only_bind_into](config)) and
+ flowCand(node, _, pragma[only_bind_into](config)) and
+ jumpStep(mid, node, config) and
+ cc = ccNone() and
+ argAp = apNone()
+ )
+ or
+ exists(NodeEx mid, ApNil nil |
+ fwdFlow(mid, _, _, nil, pragma[only_bind_into](config)) and
+ flowCand(node, _, pragma[only_bind_into](config)) and
+ additionalJumpStep(mid, node, config) and
+ cc = ccNone() and
+ argAp = apNone() and
+ ap = getApNil(node)
+ )
+ or
+ // store
+ exists(TypedContent tc, Ap ap0 |
+ fwdFlowStore(_, ap0, tc, node, cc, argAp, config) and
+ ap = apCons(tc, ap0)
+ )
+ or
+ // read
+ exists(Ap ap0, Content c |
+ fwdFlowRead(ap0, c, _, node, cc, argAp, config) and
+ fwdFlowConsCand(ap0, c, ap, config)
+ )
+ or
+ // flow into a callable
+ exists(ApApprox apa |
+ fwdFlowIn(_, node, _, cc, _, ap, config) and
+ apa = getApprox(ap) and
+ if PrevStage::parameterMayFlowThrough(node, _, apa, config)
+ then argAp = apSome(ap)
+ else argAp = apNone()
+ )
+ or
+ // flow out of a callable
+ fwdFlowOutNotFromArg(node, cc, argAp, ap, config)
+ or
+ exists(DataFlowCall call, Ap argAp0 |
+ fwdFlowOutFromArg(call, node, argAp0, ap, config) and
+ fwdFlowIsEntered(call, cc, argAp, argAp0, config)
+ )
+ }
+
+ pragma[nomagic]
+ private predicate fwdFlowStore(
+ NodeEx node1, Ap ap1, TypedContent tc, NodeEx node2, Cc cc, ApOption argAp, Configuration config
+ ) {
+ exists(DataFlowType contentType |
+ fwdFlow(node1, cc, argAp, ap1, config) and
+ PrevStage::storeStepCand(node1, unbindApa(getApprox(ap1)), tc, node2, contentType, config) and
+ typecheckStore(ap1, contentType)
+ )
+ }
+
+ /**
+ * Holds if forward flow with access path `tail` reaches a store of `c`
+ * resulting in access path `cons`.
+ */
+ pragma[nomagic]
+ private predicate fwdFlowConsCand(Ap cons, Content c, Ap tail, Configuration config) {
+ exists(TypedContent tc |
+ fwdFlowStore(_, tail, tc, _, _, _, config) and
+ tc.getContent() = c and
+ cons = apCons(tc, tail)
+ )
+ }
+
+ pragma[nomagic]
+ private predicate fwdFlowRead(
+ Ap ap, Content c, NodeEx node1, NodeEx node2, Cc cc, ApOption argAp, Configuration config
+ ) {
+ fwdFlow(node1, cc, argAp, ap, config) and
+ PrevStage::readStepCand(node1, c, node2, config) and
+ getHeadContent(ap) = c
+ }
+
+ pragma[nomagic]
+ private predicate fwdFlowIn(
+ DataFlowCall call, ParamNodeEx p, Cc outercc, Cc innercc, ApOption argAp, Ap ap,
+ Configuration config
+ ) {
+ exists(ArgNodeEx arg, boolean allowsFieldFlow |
+ fwdFlow(arg, outercc, argAp, ap, config) and
+ flowIntoCall(call, arg, p, allowsFieldFlow, config) and
+ innercc = getCallContextCall(call, p.getEnclosingCallable(), outercc)
+ |
+ ap instanceof ApNil or allowsFieldFlow = true
+ )
+ }
+
+ pragma[nomagic]
+ private predicate fwdFlowOutNotFromArg(
+ NodeEx out, Cc ccOut, ApOption argAp, Ap ap, Configuration config
+ ) {
+ exists(
+ DataFlowCall call, RetNodeEx ret, boolean allowsFieldFlow, CcNoCall innercc,
+ DataFlowCallable inner
+ |
+ fwdFlow(ret, innercc, argAp, ap, config) and
+ flowOutOfCall(call, ret, out, allowsFieldFlow, config) and
+ inner = ret.getEnclosingCallable() and
+ ccOut = getCallContextReturn(inner, call, innercc)
+ |
+ ap instanceof ApNil or allowsFieldFlow = true
+ )
+ }
+
+ pragma[nomagic]
+ private predicate fwdFlowOutFromArg(
+ DataFlowCall call, NodeEx out, Ap argAp, Ap ap, Configuration config
+ ) {
+ exists(RetNodeEx ret, boolean allowsFieldFlow, CcCall ccc |
+ fwdFlow(ret, ccc, apSome(argAp), ap, config) and
+ flowThroughOutOfCall(call, ret, out, allowsFieldFlow, config) and
+ ccc.matchesCall(call)
+ |
+ ap instanceof ApNil or allowsFieldFlow = true
+ )
+ }
+
+ /**
+ * Holds if an argument to `call` is reached in the flow covered by `fwdFlow`
+ * and data might flow through the target callable and back out at `call`.
+ */
+ pragma[nomagic]
+ private predicate fwdFlowIsEntered(
+ DataFlowCall call, Cc cc, ApOption argAp, Ap ap, Configuration config
+ ) {
+ exists(ParamNodeEx p |
+ fwdFlowIn(call, p, cc, _, argAp, ap, config) and
+ PrevStage::parameterMayFlowThrough(p, _, unbindApa(getApprox(ap)), config)
+ )
+ }
+
+ pragma[nomagic]
+ private predicate storeStepFwd(
+ NodeEx node1, Ap ap1, TypedContent tc, NodeEx node2, Ap ap2, Configuration config
+ ) {
+ fwdFlowStore(node1, ap1, tc, node2, _, _, config) and
+ ap2 = apCons(tc, ap1) and
+ fwdFlowRead(ap2, tc.getContent(), _, _, _, _, config)
+ }
+
+ private predicate readStepFwd(
+ NodeEx n1, Ap ap1, Content c, NodeEx n2, Ap ap2, Configuration config
+ ) {
+ fwdFlowRead(ap1, c, n1, n2, _, _, config) and
+ fwdFlowConsCand(ap1, c, ap2, config)
+ }
+
+ pragma[nomagic]
+ private predicate callMayFlowThroughFwd(DataFlowCall call, Configuration config) {
+ exists(Ap argAp0, NodeEx out, Cc cc, ApOption argAp, Ap ap |
+ fwdFlow(out, pragma[only_bind_into](cc), pragma[only_bind_into](argAp), ap,
+ pragma[only_bind_into](config)) and
+ fwdFlowOutFromArg(call, out, argAp0, ap, config) and
+ fwdFlowIsEntered(pragma[only_bind_into](call), pragma[only_bind_into](cc),
+ pragma[only_bind_into](argAp), pragma[only_bind_into](argAp0),
+ pragma[only_bind_into](config))
+ )
+ }
+
+ pragma[nomagic]
+ private predicate flowThroughIntoCall(
+ DataFlowCall call, ArgNodeEx arg, ParamNodeEx p, boolean allowsFieldFlow, Configuration config
+ ) {
+ flowIntoCall(call, arg, p, allowsFieldFlow, config) and
+ fwdFlow(arg, _, _, _, pragma[only_bind_into](config)) and
+ PrevStage::parameterMayFlowThrough(p, _, _, pragma[only_bind_into](config)) and
+ callMayFlowThroughFwd(call, pragma[only_bind_into](config))
+ }
+
+ /**
+ * Holds if `node` with access path `ap` is part of a path from a source to a
+ * sink in the configuration `config`.
+ *
+ * The Boolean `toReturn` records whether the node must be returned from the
+ * enclosing callable in order to reach a sink, and if so, `returnAp` records
+ * the access path of the returned value.
+ */
+ pragma[nomagic]
+ predicate revFlow(NodeEx node, boolean toReturn, ApOption returnAp, Ap ap, Configuration config) {
+ revFlow0(node, toReturn, returnAp, ap, config) and
+ fwdFlow(node, _, _, ap, config)
+ }
+
+ pragma[nomagic]
+ private predicate revFlow0(
+ NodeEx node, boolean toReturn, ApOption returnAp, Ap ap, Configuration config
+ ) {
+ fwdFlow(node, _, _, ap, config) and
+ sinkNode(node, config) and
+ toReturn = false and
+ returnAp = apNone() and
+ ap instanceof ApNil
+ or
+ exists(NodeEx mid |
+ localStep(node, mid, true, _, config, _) and
+ revFlow(mid, toReturn, returnAp, ap, config)
+ )
+ or
+ exists(NodeEx mid, ApNil nil |
+ fwdFlow(node, _, _, ap, pragma[only_bind_into](config)) and
+ localStep(node, mid, false, _, config, _) and
+ revFlow(mid, toReturn, returnAp, nil, pragma[only_bind_into](config)) and
+ ap instanceof ApNil
+ )
+ or
+ exists(NodeEx mid |
+ jumpStep(node, mid, config) and
+ revFlow(mid, _, _, ap, config) and
+ toReturn = false and
+ returnAp = apNone()
+ )
+ or
+ exists(NodeEx mid, ApNil nil |
+ fwdFlow(node, _, _, ap, pragma[only_bind_into](config)) and
+ additionalJumpStep(node, mid, config) and
+ revFlow(pragma[only_bind_into](mid), _, _, nil, pragma[only_bind_into](config)) and
+ toReturn = false and
+ returnAp = apNone() and
+ ap instanceof ApNil
+ )
+ or
+ // store
+ exists(Ap ap0, Content c |
+ revFlowStore(ap0, c, ap, node, _, _, toReturn, returnAp, config) and
+ revFlowConsCand(ap0, c, ap, config)
+ )
+ or
+ // read
+ exists(NodeEx mid, Ap ap0 |
+ revFlow(mid, toReturn, returnAp, ap0, config) and
+ readStepFwd(node, ap, _, mid, ap0, config)
+ )
+ or
+ // flow into a callable
+ revFlowInNotToReturn(node, returnAp, ap, config) and
+ toReturn = false
+ or
+ exists(DataFlowCall call, Ap returnAp0 |
+ revFlowInToReturn(call, node, returnAp0, ap, config) and
+ revFlowIsReturned(call, toReturn, returnAp, returnAp0, config)
+ )
+ or
+ // flow out of a callable
+ revFlowOut(_, node, _, _, ap, config) and
+ toReturn = true and
+ if fwdFlow(node, any(CcCall ccc), apSome(_), ap, config)
+ then returnAp = apSome(ap)
+ else returnAp = apNone()
+ }
+
+ pragma[nomagic]
+ private predicate revFlowStore(
+ Ap ap0, Content c, Ap ap, NodeEx node, TypedContent tc, NodeEx mid, boolean toReturn,
+ ApOption returnAp, Configuration config
+ ) {
+ revFlow(mid, toReturn, returnAp, ap0, config) and
+ storeStepFwd(node, ap, tc, mid, ap0, config) and
+ tc.getContent() = c
+ }
+
+ /**
+ * Holds if reverse flow with access path `tail` reaches a read of `c`
+ * resulting in access path `cons`.
+ */
+ pragma[nomagic]
+ private predicate revFlowConsCand(Ap cons, Content c, Ap tail, Configuration config) {
+ exists(NodeEx mid, Ap tail0 |
+ revFlow(mid, _, _, tail, config) and
+ tail = pragma[only_bind_into](tail0) and
+ readStepFwd(_, cons, c, mid, tail0, config)
+ )
+ }
+
+ pragma[nomagic]
+ private predicate revFlowOut(
+ DataFlowCall call, RetNodeEx ret, boolean toReturn, ApOption returnAp, Ap ap,
+ Configuration config
+ ) {
+ exists(NodeEx out, boolean allowsFieldFlow |
+ revFlow(out, toReturn, returnAp, ap, config) and
+ flowOutOfCall(call, ret, out, allowsFieldFlow, config)
+ |
+ ap instanceof ApNil or allowsFieldFlow = true
+ )
+ }
+
+ pragma[nomagic]
+ private predicate revFlowInNotToReturn(
+ ArgNodeEx arg, ApOption returnAp, Ap ap, Configuration config
+ ) {
+ exists(ParamNodeEx p, boolean allowsFieldFlow |
+ revFlow(p, false, returnAp, ap, config) and
+ flowIntoCall(_, arg, p, allowsFieldFlow, config)
+ |
+ ap instanceof ApNil or allowsFieldFlow = true
+ )
+ }
+
+ pragma[nomagic]
+ private predicate revFlowInToReturn(
+ DataFlowCall call, ArgNodeEx arg, Ap returnAp, Ap ap, Configuration config
+ ) {
+ exists(ParamNodeEx p, boolean allowsFieldFlow |
+ revFlow(p, true, apSome(returnAp), ap, config) and
+ flowThroughIntoCall(call, arg, p, allowsFieldFlow, config)
+ |
+ ap instanceof ApNil or allowsFieldFlow = true
+ )
+ }
+
+ /**
+ * Holds if an output from `call` is reached in the flow covered by `revFlow`
+ * and data might flow through the target callable resulting in reverse flow
+ * reaching an argument of `call`.
+ */
+ pragma[nomagic]
+ private predicate revFlowIsReturned(
+ DataFlowCall call, boolean toReturn, ApOption returnAp, Ap ap, Configuration config
+ ) {
+ exists(RetNodeEx ret, CcCall ccc |
+ revFlowOut(call, ret, toReturn, returnAp, ap, config) and
+ fwdFlow(ret, ccc, apSome(_), ap, config) and
+ ccc.matchesCall(call)
+ )
+ }
+
+ pragma[nomagic]
+ predicate storeStepCand(
+ NodeEx node1, Ap ap1, TypedContent tc, NodeEx node2, DataFlowType contentType,
+ Configuration config
+ ) {
+ exists(Ap ap2, Content c |
+ store(node1, tc, node2, contentType, config) and
+ revFlowStore(ap2, c, ap1, node1, tc, node2, _, _, config) and
+ revFlowConsCand(ap2, c, ap1, config)
+ )
+ }
+
+ predicate readStepCand(NodeEx node1, Content c, NodeEx node2, Configuration config) {
+ exists(Ap ap1, Ap ap2 |
+ revFlow(node2, _, _, pragma[only_bind_into](ap2), pragma[only_bind_into](config)) and
+ readStepFwd(node1, ap1, c, node2, ap2, config) and
+ revFlowStore(ap1, c, pragma[only_bind_into](ap2), _, _, _, _, _,
+ pragma[only_bind_into](config))
+ )
+ }
+
+ predicate revFlow(NodeEx node, Configuration config) { revFlow(node, _, _, _, config) }
+
+ private predicate fwdConsCand(TypedContent tc, Ap ap, Configuration config) {
+ storeStepFwd(_, ap, tc, _, _, config)
+ }
+
+ predicate consCand(TypedContent tc, Ap ap, Configuration config) {
+ storeStepCand(_, ap, tc, _, _, config)
+ }
+
+ pragma[noinline]
+ private predicate parameterFlow(
+ ParamNodeEx p, Ap ap, Ap ap0, DataFlowCallable c, Configuration config
+ ) {
+ revFlow(p, true, apSome(ap0), ap, config) and
+ c = p.getEnclosingCallable()
+ }
+
+ predicate parameterMayFlowThrough(ParamNodeEx p, DataFlowCallable c, Ap ap, Configuration config) {
+ exists(RetNodeEx ret, Ap ap0, ReturnKindExt kind, int pos |
+ parameterFlow(p, ap, ap0, c, config) and
+ c = ret.getEnclosingCallable() and
+ revFlow(pragma[only_bind_into](ret), true, apSome(_), pragma[only_bind_into](ap0),
+ pragma[only_bind_into](config)) and
+ fwdFlow(ret, any(CcCall ccc), apSome(ap), ap0, config) and
+ kind = ret.getKind() and
+ p.getPosition() = pos and
+ // we don't expect a parameter to return stored in itself
+ not kind.(ParamUpdateReturnKind).getPosition() = pos
+ )
+ }
+
+ pragma[nomagic]
+ predicate callMayFlowThroughRev(DataFlowCall call, Configuration config) {
+ exists(Ap returnAp0, ArgNodeEx arg, boolean toReturn, ApOption returnAp, Ap ap |
+ revFlow(arg, toReturn, returnAp, ap, config) and
+ revFlowInToReturn(call, arg, returnAp0, ap, config) and
+ revFlowIsReturned(call, toReturn, returnAp, returnAp0, config)
+ )
+ }
+
+ predicate stats(boolean fwd, int nodes, int fields, int conscand, int tuples, Configuration config) {
+ fwd = true and
+ nodes = count(NodeEx node | fwdFlow(node, _, _, _, config)) and
+ fields = count(TypedContent f0 | fwdConsCand(f0, _, config)) and
+ conscand = count(TypedContent f0, Ap ap | fwdConsCand(f0, ap, config)) and
+ tuples = count(NodeEx n, Cc cc, ApOption argAp, Ap ap | fwdFlow(n, cc, argAp, ap, config))
+ or
+ fwd = false and
+ nodes = count(NodeEx node | revFlow(node, _, _, _, config)) and
+ fields = count(TypedContent f0 | consCand(f0, _, config)) and
+ conscand = count(TypedContent f0, Ap ap | consCand(f0, ap, config)) and
+ tuples = count(NodeEx n, boolean b, ApOption retAp, Ap ap | revFlow(n, b, retAp, ap, config))
+ }
+ /* End: Stage 4 logic. */
+}
+
+bindingset[conf, result]
+private Configuration unbindConf(Configuration conf) {
+ exists(Configuration c | result = pragma[only_bind_into](c) and conf = pragma[only_bind_into](c))
+}
+
+private predicate nodeMayUseSummary(NodeEx n, AccessPathApprox apa, Configuration config) {
+ exists(DataFlowCallable c, AccessPathApprox apa0 |
+ Stage4::parameterMayFlowThrough(_, c, apa, _) and
+ Stage4::revFlow(n, true, _, apa0, config) and
+ Stage4::fwdFlow(n, any(CallContextCall ccc), TAccessPathApproxSome(apa), apa0, config) and
+ n.getEnclosingCallable() = c
+ )
+}
+
+private newtype TSummaryCtx =
+ TSummaryCtxNone() or
+ TSummaryCtxSome(ParamNodeEx p, AccessPath ap) {
+ Stage4::parameterMayFlowThrough(p, _, ap.getApprox(), _)
+ }
+
+/**
+ * A context for generating flow summaries. This represents flow entry through
+ * a specific parameter with an access path of a specific shape.
+ *
+ * Summaries are only created for parameters that may flow through.
+ */
+abstract private class SummaryCtx extends TSummaryCtx {
+ abstract string toString();
+}
+
+/** A summary context from which no flow summary can be generated. */
+private class SummaryCtxNone extends SummaryCtx, TSummaryCtxNone {
+ override string toString() { result = "" }
+}
+
+/** A summary context from which a flow summary can be generated. */
+private class SummaryCtxSome extends SummaryCtx, TSummaryCtxSome {
+ private ParamNodeEx p;
+ private AccessPath ap;
+
+ SummaryCtxSome() { this = TSummaryCtxSome(p, ap) }
+
+ int getParameterPos() { p.isParameterOf(_, result) }
+
+ override string toString() { result = p + ": " + ap }
+
+ predicate hasLocationInfo(
+ string filepath, int startline, int startcolumn, int endline, int endcolumn
+ ) {
+ p.hasLocationInfo(filepath, startline, startcolumn, endline, endcolumn)
+ }
+}
+
+/**
+ * Gets the number of length 2 access path approximations that correspond to `apa`.
+ */
+private int count1to2unfold(AccessPathApproxCons1 apa, Configuration config) {
+ exists(TypedContent tc, int len |
+ tc = apa.getHead() and
+ len = apa.len() and
+ result =
+ strictcount(AccessPathFront apf |
+ Stage4::consCand(tc, any(AccessPathApprox ap | ap.getFront() = apf and ap.len() = len - 1),
+ config)
+ )
+ )
+}
+
+private int countNodesUsingAccessPath(AccessPathApprox apa, Configuration config) {
+ result =
+ strictcount(NodeEx n |
+ Stage4::revFlow(n, _, _, apa, config) or nodeMayUseSummary(n, apa, config)
+ )
+}
+
+/**
+ * Holds if a length 2 access path approximation matching `apa` is expected
+ * to be expensive.
+ */
+private predicate expensiveLen1to2unfolding(AccessPathApproxCons1 apa, Configuration config) {
+ exists(int aps, int nodes, int apLimit, int tupleLimit |
+ aps = count1to2unfold(apa, config) and
+ nodes = countNodesUsingAccessPath(apa, config) and
+ accessPathCostLimits(apLimit, tupleLimit) and
+ apLimit < aps and
+ tupleLimit < (aps - 1) * nodes
+ )
+}
+
+private AccessPathApprox getATail(AccessPathApprox apa, Configuration config) {
+ exists(TypedContent head |
+ apa.pop(head) = result and
+ Stage4::consCand(head, result, config)
+ )
+}
+
+/**
+ * Holds with `unfold = false` if a precise head-tail representation of `apa` is
+ * expected to be expensive. Holds with `unfold = true` otherwise.
+ */
+private predicate evalUnfold(AccessPathApprox apa, boolean unfold, Configuration config) {
+ if apa.getHead().forceHighPrecision()
+ then unfold = true
+ else
+ exists(int aps, int nodes, int apLimit, int tupleLimit |
+ aps = countPotentialAps(apa, config) and
+ nodes = countNodesUsingAccessPath(apa, config) and
+ accessPathCostLimits(apLimit, tupleLimit) and
+ if apLimit < aps and tupleLimit < (aps - 1) * nodes then unfold = false else unfold = true
+ )
+}
+
+/**
+ * Gets the number of `AccessPath`s that correspond to `apa`.
+ */
+private int countAps(AccessPathApprox apa, Configuration config) {
+ evalUnfold(apa, false, config) and
+ result = 1 and
+ (not apa instanceof AccessPathApproxCons1 or expensiveLen1to2unfolding(apa, config))
+ or
+ evalUnfold(apa, false, config) and
+ result = count1to2unfold(apa, config) and
+ not expensiveLen1to2unfolding(apa, config)
+ or
+ evalUnfold(apa, true, config) and
+ result = countPotentialAps(apa, config)
+}
+
+/**
+ * Gets the number of `AccessPath`s that would correspond to `apa` assuming
+ * that it is expanded to a precise head-tail representation.
+ */
+language[monotonicAggregates]
+private int countPotentialAps(AccessPathApprox apa, Configuration config) {
+ apa instanceof AccessPathApproxNil and result = 1
+ or
+ result = strictsum(AccessPathApprox tail | tail = getATail(apa, config) | countAps(tail, config))
+}
+
+private newtype TAccessPath =
+ TAccessPathNil(DataFlowType t) or
+ TAccessPathCons(TypedContent head, AccessPath tail) {
+ exists(AccessPathApproxCons apa |
+ not evalUnfold(apa, false, _) and
+ head = apa.getHead() and
+ tail.getApprox() = getATail(apa, _)
+ )
+ } or
+ TAccessPathCons2(TypedContent head1, TypedContent head2, int len) {
+ exists(AccessPathApproxCons apa |
+ evalUnfold(apa, false, _) and
+ not expensiveLen1to2unfolding(apa, _) and
+ apa.len() = len and
+ head1 = apa.getHead() and
+ head2 = getATail(apa, _).getHead()
+ )
+ } or
+ TAccessPathCons1(TypedContent head, int len) {
+ exists(AccessPathApproxCons apa |
+ evalUnfold(apa, false, _) and
+ expensiveLen1to2unfolding(apa, _) and
+ apa.len() = len and
+ head = apa.getHead()
+ )
+ }
+
+private newtype TPathNode =
+ TPathNodeMid(NodeEx node, CallContext cc, SummaryCtx sc, AccessPath ap, Configuration config) {
+ // A PathNode is introduced by a source ...
+ Stage4::revFlow(node, config) and
+ sourceNode(node, config) and
+ cc instanceof CallContextAny and
+ sc instanceof SummaryCtxNone and
+ ap = TAccessPathNil(node.getDataFlowType())
+ or
+ // ... or a step from an existing PathNode to another node.
+ exists(PathNodeMid mid |
+ pathStep(mid, node, cc, sc, ap) and
+ pragma[only_bind_into](config) = mid.getConfiguration() and
+ Stage4::revFlow(node, _, _, ap.getApprox(), pragma[only_bind_into](config))
+ )
+ } or
+ TPathNodeSink(NodeEx node, Configuration config) {
+ sinkNode(node, pragma[only_bind_into](config)) and
+ Stage4::revFlow(node, pragma[only_bind_into](config)) and
+ (
+ // A sink that is also a source ...
+ sourceNode(node, config)
+ or
+ // ... or a sink that can be reached from a source
+ exists(PathNodeMid mid |
+ pathStep(mid, node, _, _, TAccessPathNil(_)) and
+ pragma[only_bind_into](config) = mid.getConfiguration()
+ )
+ )
+ }
+
+/**
+ * A list of `TypedContent`s followed by a `DataFlowType`. If data flows from a
+ * source to a given node with a given `AccessPath`, this indicates the sequence
+ * of dereference operations needed to get from the value in the node to the
+ * tracked object. The final type indicates the type of the tracked object.
+ */
+abstract private class AccessPath extends TAccessPath {
+ /** Gets the head of this access path, if any. */
+ abstract TypedContent getHead();
+
+ /** Gets the tail of this access path, if any. */
+ abstract AccessPath getTail();
+
+ /** Gets the front of this access path. */
+ abstract AccessPathFront getFront();
+
+ /** Gets the approximation of this access path. */
+ abstract AccessPathApprox getApprox();
+
+ /** Gets the length of this access path. */
+ abstract int length();
+
+ /** Gets a textual representation of this access path. */
+ abstract string toString();
+
+ /** Gets the access path obtained by popping `tc` from this access path, if any. */
+ final AccessPath pop(TypedContent tc) {
+ result = this.getTail() and
+ tc = this.getHead()
+ }
+
+ /** Gets the access path obtained by pushing `tc` onto this access path. */
+ final AccessPath push(TypedContent tc) { this = result.pop(tc) }
+}
+
+private class AccessPathNil extends AccessPath, TAccessPathNil {
+ private DataFlowType t;
+
+ AccessPathNil() { this = TAccessPathNil(t) }
+
+ DataFlowType getType() { result = t }
+
+ override TypedContent getHead() { none() }
+
+ override AccessPath getTail() { none() }
+
+ override AccessPathFrontNil getFront() { result = TFrontNil(t) }
+
+ override AccessPathApproxNil getApprox() { result = TNil(t) }
+
+ override int length() { result = 0 }
+
+ override string toString() { result = concat(": " + ppReprType(t)) }
+}
+
+private class AccessPathCons extends AccessPath, TAccessPathCons {
+ private TypedContent head;
+ private AccessPath tail;
+
+ AccessPathCons() { this = TAccessPathCons(head, tail) }
+
+ override TypedContent getHead() { result = head }
+
+ override AccessPath getTail() { result = tail }
+
+ override AccessPathFrontHead getFront() { result = TFrontHead(head) }
+
+ override AccessPathApproxCons getApprox() {
+ result = TConsNil(head, tail.(AccessPathNil).getType())
+ or
+ result = TConsCons(head, tail.getHead(), this.length())
+ or
+ result = TCons1(head, this.length())
+ }
+
+ override int length() { result = 1 + tail.length() }
+
+ private string toStringImpl(boolean needsSuffix) {
+ exists(DataFlowType t |
+ tail = TAccessPathNil(t) and
+ needsSuffix = false and
+ result = head.toString() + "]" + concat(" : " + ppReprType(t))
+ )
+ or
+ result = head + ", " + tail.(AccessPathCons).toStringImpl(needsSuffix)
+ or
+ exists(TypedContent tc2, TypedContent tc3, int len | tail = TAccessPathCons2(tc2, tc3, len) |
+ result = head + ", " + tc2 + ", " + tc3 + ", ... (" and len > 2 and needsSuffix = true
+ or
+ result = head + ", " + tc2 + ", " + tc3 + "]" and len = 2 and needsSuffix = false
+ )
+ or
+ exists(TypedContent tc2, int len | tail = TAccessPathCons1(tc2, len) |
+ result = head + ", " + tc2 + ", ... (" and len > 1 and needsSuffix = true
+ or
+ result = head + ", " + tc2 + "]" and len = 1 and needsSuffix = false
+ )
+ }
+
+ override string toString() {
+ result = "[" + this.toStringImpl(true) + length().toString() + ")]"
+ or
+ result = "[" + this.toStringImpl(false)
+ }
+}
+
+private class AccessPathCons2 extends AccessPath, TAccessPathCons2 {
+ private TypedContent head1;
+ private TypedContent head2;
+ private int len;
+
+ AccessPathCons2() { this = TAccessPathCons2(head1, head2, len) }
+
+ override TypedContent getHead() { result = head1 }
+
+ override AccessPath getTail() {
+ Stage4::consCand(head1, result.getApprox(), _) and
+ result.getHead() = head2 and
+ result.length() = len - 1
+ }
+
+ override AccessPathFrontHead getFront() { result = TFrontHead(head1) }
+
+ override AccessPathApproxCons getApprox() {
+ result = TConsCons(head1, head2, len) or
+ result = TCons1(head1, len)
+ }
+
+ override int length() { result = len }
+
+ override string toString() {
+ if len = 2
+ then result = "[" + head1.toString() + ", " + head2.toString() + "]"
+ else
+ result = "[" + head1.toString() + ", " + head2.toString() + ", ... (" + len.toString() + ")]"
+ }
+}
+
+private class AccessPathCons1 extends AccessPath, TAccessPathCons1 {
+ private TypedContent head;
+ private int len;
+
+ AccessPathCons1() { this = TAccessPathCons1(head, len) }
+
+ override TypedContent getHead() { result = head }
+
+ override AccessPath getTail() {
+ Stage4::consCand(head, result.getApprox(), _) and result.length() = len - 1
+ }
+
+ override AccessPathFrontHead getFront() { result = TFrontHead(head) }
+
+ override AccessPathApproxCons getApprox() { result = TCons1(head, len) }
+
+ override int length() { result = len }
+
+ override string toString() {
+ if len = 1
+ then result = "[" + head.toString() + "]"
+ else result = "[" + head.toString() + ", ... (" + len.toString() + ")]"
+ }
+}
+
+/**
+ * A `Node` augmented with a call context (except for sinks), an access path, and a configuration.
+ * Only those `PathNode`s that are reachable from a source are generated.
+ */
+class PathNode extends TPathNode {
+ /** Gets a textual representation of this element. */
+ string toString() { none() }
+
+ /**
+ * Gets a textual representation of this element, including a textual
+ * representation of the call context.
+ */
+ string toStringWithContext() { none() }
+
+ /**
+ * Holds if this element is at the specified location.
+ * The location spans column `startcolumn` of line `startline` to
+ * column `endcolumn` of line `endline` in file `filepath`.
+ * For more information, see
+ * [Locations](https://codeql.github.com/docs/writing-codeql-queries/providing-locations-in-codeql-queries/).
+ */
+ predicate hasLocationInfo(
+ string filepath, int startline, int startcolumn, int endline, int endcolumn
+ ) {
+ none()
+ }
+
+ /** Gets the underlying `Node`. */
+ final Node getNode() { this.(PathNodeImpl).getNodeEx().projectToNode() = result }
+
+ /** Gets the associated configuration. */
+ Configuration getConfiguration() { none() }
+
+ private PathNode getASuccessorIfHidden() {
+ this.(PathNodeImpl).isHidden() and
+ result = this.(PathNodeImpl).getASuccessorImpl()
+ }
+
+ /** Gets a successor of this node, if any. */
+ final PathNode getASuccessor() {
+ result = this.(PathNodeImpl).getASuccessorImpl().getASuccessorIfHidden*() and
+ not this.(PathNodeImpl).isHidden() and
+ not result.(PathNodeImpl).isHidden()
+ }
+
+ /** Holds if this node is a source. */
+ predicate isSource() { none() }
+}
+
+abstract private class PathNodeImpl extends PathNode {
+ abstract PathNode getASuccessorImpl();
+
+ abstract NodeEx getNodeEx();
+
+ predicate isHidden() {
+ hiddenNode(this.getNodeEx().asNode()) and
+ not this.isSource() and
+ not this instanceof PathNodeSink
+ or
+ this.getNodeEx() instanceof TNodeImplicitRead
+ }
+
+ private string ppAp() {
+ this instanceof PathNodeSink and result = ""
+ or
+ exists(string s | s = this.(PathNodeMid).getAp().toString() |
+ if s = "" then result = "" else result = " " + s
+ )
+ }
+
+ private string ppCtx() {
+ this instanceof PathNodeSink and result = ""
+ or
+ result = " <" + this.(PathNodeMid).getCallContext().toString() + ">"
+ }
+
+ override string toString() { result = this.getNodeEx().toString() + ppAp() }
+
+ override string toStringWithContext() { result = this.getNodeEx().toString() + ppAp() + ppCtx() }
+
+ override predicate hasLocationInfo(
+ string filepath, int startline, int startcolumn, int endline, int endcolumn
+ ) {
+ this.getNodeEx().hasLocationInfo(filepath, startline, startcolumn, endline, endcolumn)
+ }
+}
+
+/** Holds if `n` can reach a sink. */
+private predicate directReach(PathNode n) {
+ n instanceof PathNodeSink or directReach(n.getASuccessor())
+}
+
+/** Holds if `n` can reach a sink or is used in a subpath. */
+private predicate reach(PathNode n) { directReach(n) or Subpaths::retReach(n) }
+
+/** Holds if `n1.getASuccessor() = n2` and `n2` can reach a sink. */
+private predicate pathSucc(PathNode n1, PathNode n2) { n1.getASuccessor() = n2 and directReach(n2) }
+
+private predicate pathSuccPlus(PathNode n1, PathNode n2) = fastTC(pathSucc/2)(n1, n2)
+
+/**
+ * Provides the query predicates needed to include a graph in a path-problem query.
+ */
+module PathGraph {
+ /** Holds if `(a,b)` is an edge in the graph of data flow path explanations. */
+ query predicate edges(PathNode a, PathNode b) { a.getASuccessor() = b and reach(b) }
+
+ /** Holds if `n` is a node in the graph of data flow path explanations. */
+ query predicate nodes(PathNode n, string key, string val) {
+ reach(n) and key = "semmle.label" and val = n.toString()
+ }
+
+ query predicate subpaths = Subpaths::subpaths/4;
+}
+
+/**
+ * An intermediate flow graph node. This is a triple consisting of a `Node`,
+ * a `CallContext`, and a `Configuration`.
+ */
+private class PathNodeMid extends PathNodeImpl, TPathNodeMid {
+ NodeEx node;
+ CallContext cc;
+ SummaryCtx sc;
+ AccessPath ap;
+ Configuration config;
+
+ PathNodeMid() { this = TPathNodeMid(node, cc, sc, ap, config) }
+
+ override NodeEx getNodeEx() { result = node }
+
+ CallContext getCallContext() { result = cc }
+
+ SummaryCtx getSummaryCtx() { result = sc }
+
+ AccessPath getAp() { result = ap }
+
+ override Configuration getConfiguration() { result = config }
+
+ private PathNodeMid getSuccMid() {
+ pathStep(this, result.getNodeEx(), result.getCallContext(), result.getSummaryCtx(),
+ result.getAp()) and
+ result.getConfiguration() = unbindConf(this.getConfiguration())
+ }
+
+ override PathNodeImpl getASuccessorImpl() {
+ // an intermediate step to another intermediate node
+ result = getSuccMid()
+ or
+ // a final step to a sink via zero steps means we merge the last two steps to prevent trivial-looking edges
+ exists(PathNodeMid mid, PathNodeSink sink |
+ mid = getSuccMid() and
+ mid.getNodeEx() = sink.getNodeEx() and
+ mid.getAp() instanceof AccessPathNil and
+ sink.getConfiguration() = unbindConf(mid.getConfiguration()) and
+ result = sink
+ )
+ }
+
+ override predicate isSource() {
+ sourceNode(node, config) and
+ cc instanceof CallContextAny and
+ sc instanceof SummaryCtxNone and
+ ap instanceof AccessPathNil
+ }
+}
+
+/**
+ * A flow graph node corresponding to a sink. This is disjoint from the
+ * intermediate nodes in order to uniquely correspond to a given sink by
+ * excluding the `CallContext`.
+ */
+private class PathNodeSink extends PathNodeImpl, TPathNodeSink {
+ NodeEx node;
+ Configuration config;
+
+ PathNodeSink() { this = TPathNodeSink(node, config) }
+
+ override NodeEx getNodeEx() { result = node }
+
+ override Configuration getConfiguration() { result = config }
+
+ override PathNode getASuccessorImpl() { none() }
+
+ override predicate isSource() { sourceNode(node, config) }
+}
+
+/**
+ * Holds if data may flow from `mid` to `node`. The last step in or out of
+ * a callable is recorded by `cc`.
+ */
+private predicate pathStep(
+ PathNodeMid mid, NodeEx node, CallContext cc, SummaryCtx sc, AccessPath ap
+) {
+ exists(AccessPath ap0, NodeEx midnode, Configuration conf, LocalCallContext localCC |
+ midnode = mid.getNodeEx() and
+ conf = mid.getConfiguration() and
+ cc = mid.getCallContext() and
+ sc = mid.getSummaryCtx() and
+ localCC =
+ getLocalCallContext(pragma[only_bind_into](pragma[only_bind_out](cc)),
+ midnode.getEnclosingCallable()) and
+ ap0 = mid.getAp()
+ |
+ localFlowBigStep(midnode, node, true, _, conf, localCC) and
+ ap = ap0
+ or
+ localFlowBigStep(midnode, node, false, ap.getFront(), conf, localCC) and
+ ap0 instanceof AccessPathNil
+ )
+ or
+ jumpStep(mid.getNodeEx(), node, mid.getConfiguration()) and
+ cc instanceof CallContextAny and
+ sc instanceof SummaryCtxNone and
+ ap = mid.getAp()
+ or
+ additionalJumpStep(mid.getNodeEx(), node, mid.getConfiguration()) and
+ cc instanceof CallContextAny and
+ sc instanceof SummaryCtxNone and
+ mid.getAp() instanceof AccessPathNil and
+ ap = TAccessPathNil(node.getDataFlowType())
+ or
+ exists(TypedContent tc | pathStoreStep(mid, node, ap.pop(tc), tc, cc)) and
+ sc = mid.getSummaryCtx()
+ or
+ exists(TypedContent tc | pathReadStep(mid, node, ap.push(tc), tc, cc)) and
+ sc = mid.getSummaryCtx()
+ or
+ pathIntoCallable(mid, node, _, cc, sc, _) and ap = mid.getAp()
+ or
+ pathOutOfCallable(mid, node, cc) and ap = mid.getAp() and sc instanceof SummaryCtxNone
+ or
+ pathThroughCallable(mid, node, cc, ap) and sc = mid.getSummaryCtx()
+}
+
+pragma[nomagic]
+private predicate pathReadStep(
+ PathNodeMid mid, NodeEx node, AccessPath ap0, TypedContent tc, CallContext cc
+) {
+ ap0 = mid.getAp() and
+ tc = ap0.getHead() and
+ Stage4::readStepCand(mid.getNodeEx(), tc.getContent(), node, mid.getConfiguration()) and
+ cc = mid.getCallContext()
+}
+
+pragma[nomagic]
+private predicate pathStoreStep(
+ PathNodeMid mid, NodeEx node, AccessPath ap0, TypedContent tc, CallContext cc
+) {
+ ap0 = mid.getAp() and
+ Stage4::storeStepCand(mid.getNodeEx(), _, tc, node, _, mid.getConfiguration()) and
+ cc = mid.getCallContext()
+}
+
+private predicate pathOutOfCallable0(
+ PathNodeMid mid, ReturnPosition pos, CallContext innercc, AccessPathApprox apa,
+ Configuration config
+) {
+ pos = mid.getNodeEx().(RetNodeEx).getReturnPosition() and
+ innercc = mid.getCallContext() and
+ innercc instanceof CallContextNoCall and
+ apa = mid.getAp().getApprox() and
+ config = mid.getConfiguration()
+}
+
+pragma[nomagic]
+private predicate pathOutOfCallable1(
+ PathNodeMid mid, DataFlowCall call, ReturnKindExt kind, CallContext cc, AccessPathApprox apa,
+ Configuration config
+) {
+ exists(ReturnPosition pos, DataFlowCallable c, CallContext innercc |
+ pathOutOfCallable0(mid, pos, innercc, apa, config) and
+ c = pos.getCallable() and
+ kind = pos.getKind() and
+ resolveReturn(innercc, c, call)
+ |
+ if reducedViableImplInReturn(c, call) then cc = TReturn(c, call) else cc = TAnyCallContext()
+ )
+}
+
+pragma[noinline]
+private NodeEx getAnOutNodeFlow(
+ ReturnKindExt kind, DataFlowCall call, AccessPathApprox apa, Configuration config
+) {
+ result.asNode() = kind.getAnOutNode(call) and
+ Stage4::revFlow(result, _, _, apa, config)
+}
+
+/**
+ * Holds if data may flow from `mid` to `out`. The last step of this path
+ * is a return from a callable and is recorded by `cc`, if needed.
+ */
+pragma[noinline]
+private predicate pathOutOfCallable(PathNodeMid mid, NodeEx out, CallContext cc) {
+ exists(ReturnKindExt kind, DataFlowCall call, AccessPathApprox apa, Configuration config |
+ pathOutOfCallable1(mid, call, kind, cc, apa, config) and
+ out = getAnOutNodeFlow(kind, call, apa, config)
+ )
+}
+
+/**
+ * Holds if data may flow from `mid` to the `i`th argument of `call` in `cc`.
+ */
+pragma[noinline]
+private predicate pathIntoArg(
+ PathNodeMid mid, int i, CallContext cc, DataFlowCall call, AccessPath ap, AccessPathApprox apa
+) {
+ exists(ArgNode arg |
+ arg = mid.getNodeEx().asNode() and
+ cc = mid.getCallContext() and
+ arg.argumentOf(call, i) and
+ ap = mid.getAp() and
+ apa = ap.getApprox()
+ )
+}
+
+pragma[noinline]
+private predicate parameterCand(
+ DataFlowCallable callable, int i, AccessPathApprox apa, Configuration config
+) {
+ exists(ParamNodeEx p |
+ Stage4::revFlow(p, _, _, apa, config) and
+ p.isParameterOf(callable, i)
+ )
+}
+
+pragma[nomagic]
+private predicate pathIntoCallable0(
+ PathNodeMid mid, DataFlowCallable callable, int i, CallContext outercc, DataFlowCall call,
+ AccessPath ap
+) {
+ exists(AccessPathApprox apa |
+ pathIntoArg(mid, i, outercc, call, ap, apa) and
+ callable = resolveCall(call, outercc) and
+ parameterCand(callable, any(int j | j <= i and j >= i), apa, mid.getConfiguration())
+ )
+}
+
+/**
+ * Holds if data may flow from `mid` to `p` through `call`. The contexts
+ * before and after entering the callable are `outercc` and `innercc`,
+ * respectively.
+ */
+private predicate pathIntoCallable(
+ PathNodeMid mid, ParamNodeEx p, CallContext outercc, CallContextCall innercc, SummaryCtx sc,
+ DataFlowCall call
+) {
+ exists(int i, DataFlowCallable callable, AccessPath ap |
+ pathIntoCallable0(mid, callable, i, outercc, call, ap) and
+ p.isParameterOf(callable, i) and
+ (
+ sc = TSummaryCtxSome(p, ap)
+ or
+ not exists(TSummaryCtxSome(p, ap)) and
+ sc = TSummaryCtxNone()
+ )
+ |
+ if recordDataFlowCallSite(call, callable)
+ then innercc = TSpecificCall(call)
+ else innercc = TSomeCall()
+ )
+}
+
+/** Holds if data may flow from a parameter given by `sc` to a return of kind `kind`. */
+pragma[nomagic]
+private predicate paramFlowsThrough(
+ ReturnKindExt kind, CallContextCall cc, SummaryCtxSome sc, AccessPath ap, AccessPathApprox apa,
+ Configuration config
+) {
+ exists(PathNodeMid mid, RetNodeEx ret, int pos |
+ mid.getNodeEx() = ret and
+ kind = ret.getKind() and
+ cc = mid.getCallContext() and
+ sc = mid.getSummaryCtx() and
+ config = mid.getConfiguration() and
+ ap = mid.getAp() and
+ apa = ap.getApprox() and
+ pos = sc.getParameterPos() and
+ not kind.(ParamUpdateReturnKind).getPosition() = pos
+ )
+}
+
+pragma[nomagic]
+private predicate pathThroughCallable0(
+ DataFlowCall call, PathNodeMid mid, ReturnKindExt kind, CallContext cc, AccessPath ap,
+ AccessPathApprox apa
+) {
+ exists(CallContext innercc, SummaryCtx sc |
+ pathIntoCallable(mid, _, cc, innercc, sc, call) and
+ paramFlowsThrough(kind, innercc, sc, ap, apa, unbindConf(mid.getConfiguration()))
+ )
+}
+
+/**
+ * Holds if data may flow from `mid` through a callable to the node `out`.
+ * The context `cc` is restored to its value prior to entering the callable.
+ */
+pragma[noinline]
+private predicate pathThroughCallable(PathNodeMid mid, NodeEx out, CallContext cc, AccessPath ap) {
+ exists(DataFlowCall call, ReturnKindExt kind, AccessPathApprox apa |
+ pathThroughCallable0(call, mid, kind, cc, ap, apa) and
+ out = getAnOutNodeFlow(kind, call, apa, unbindConf(mid.getConfiguration()))
+ )
+}
+
+private module Subpaths {
+ /**
+ * Holds if `(arg, par, ret, out)` forms a subpath-tuple and `ret` is determined by
+ * `kind`, `sc`, `apout`, and `innercc`.
+ */
+ pragma[nomagic]
+ private predicate subpaths01(
+ PathNode arg, ParamNodeEx par, SummaryCtxSome sc, CallContext innercc, ReturnKindExt kind,
+ NodeEx out, AccessPath apout
+ ) {
+ pathThroughCallable(arg, out, _, pragma[only_bind_into](apout)) and
+ pathIntoCallable(arg, par, _, innercc, sc, _) and
+ paramFlowsThrough(kind, innercc, sc, pragma[only_bind_into](apout), _,
+ unbindConf(arg.getConfiguration()))
+ }
+
+ /**
+ * Holds if `(arg, par, ret, out)` forms a subpath-tuple and `ret` is determined by
+ * `kind`, `sc`, `apout`, and `innercc`.
+ */
+ pragma[nomagic]
+ private predicate subpaths02(
+ PathNode arg, ParamNodeEx par, SummaryCtxSome sc, CallContext innercc, ReturnKindExt kind,
+ NodeEx out, AccessPath apout
+ ) {
+ subpaths01(arg, par, sc, innercc, kind, out, apout) and
+ out.asNode() = kind.getAnOutNode(_)
+ }
+
+ pragma[nomagic]
+ private Configuration getPathNodeConf(PathNode n) { result = n.getConfiguration() }
+
+ /**
+ * Holds if `(arg, par, ret, out)` forms a subpath-tuple.
+ */
+ pragma[nomagic]
+ private predicate subpaths03(
+ PathNode arg, ParamNodeEx par, PathNodeMid ret, NodeEx out, AccessPath apout
+ ) {
+ exists(SummaryCtxSome sc, CallContext innercc, ReturnKindExt kind, RetNodeEx retnode |
+ subpaths02(arg, par, sc, innercc, kind, out, apout) and
+ ret.getNodeEx() = retnode and
+ kind = retnode.getKind() and
+ innercc = ret.getCallContext() and
+ sc = ret.getSummaryCtx() and
+ ret.getConfiguration() = unbindConf(getPathNodeConf(arg)) and
+ apout = ret.getAp() and
+ not ret.isHidden()
+ )
+ }
+
+ /**
+ * Holds if `(arg, par, ret, out)` forms a subpath-tuple, that is, flow through
+ * a subpath between `par` and `ret` with the connecting edges `arg -> par` and
+ * `ret -> out` is summarized as the edge `arg -> out`.
+ */
+ predicate subpaths(PathNode arg, PathNodeImpl par, PathNodeMid ret, PathNodeMid out) {
+ exists(ParamNodeEx p, NodeEx o, AccessPath apout |
+ pragma[only_bind_into](arg).getASuccessor() = par and
+ pragma[only_bind_into](arg).getASuccessor() = out and
+ subpaths03(arg, p, ret, o, apout) and
+ par.getNodeEx() = p and
+ out.getNodeEx() = o and
+ out.getAp() = apout
+ )
+ }
+
+ /**
+ * Holds if `n` can reach a return node in a summarized subpath.
+ */
+ predicate retReach(PathNode n) {
+ subpaths(_, _, n, _)
+ or
+ exists(PathNode mid |
+ retReach(mid) and
+ n.getASuccessor() = mid and
+ not subpaths(_, mid, _, _)
+ )
+ }
+}
+
+/**
+ * Holds if data can flow (inter-procedurally) from `source` to `sink`.
+ *
+ * Will only have results if `configuration` has non-empty sources and
+ * sinks.
+ */
+private predicate flowsTo(
+ PathNode flowsource, PathNodeSink flowsink, Node source, Node sink, Configuration configuration
+) {
+ flowsource.isSource() and
+ flowsource.getConfiguration() = configuration and
+ flowsource.(PathNodeImpl).getNodeEx().asNode() = source and
+ (flowsource = flowsink or pathSuccPlus(flowsource, flowsink)) and
+ flowsink.getNodeEx().asNode() = sink
+}
+
+/**
+ * Holds if data can flow (inter-procedurally) from `source` to `sink`.
+ *
+ * Will only have results if `configuration` has non-empty sources and
+ * sinks.
+ */
+predicate flowsTo(Node source, Node sink, Configuration configuration) {
+ flowsTo(_, _, source, sink, configuration)
+}
+
+private predicate finalStats(boolean fwd, int nodes, int fields, int conscand, int tuples) {
+ fwd = true and
+ nodes = count(NodeEx n0 | exists(PathNodeImpl pn | pn.getNodeEx() = n0)) and
+ fields = count(TypedContent f0 | exists(PathNodeMid pn | pn.getAp().getHead() = f0)) and
+ conscand = count(AccessPath ap | exists(PathNodeMid pn | pn.getAp() = ap)) and
+ tuples = count(PathNode pn)
+ or
+ fwd = false and
+ nodes = count(NodeEx n0 | exists(PathNodeImpl pn | pn.getNodeEx() = n0 and reach(pn))) and
+ fields = count(TypedContent f0 | exists(PathNodeMid pn | pn.getAp().getHead() = f0 and reach(pn))) and
+ conscand = count(AccessPath ap | exists(PathNodeMid pn | pn.getAp() = ap and reach(pn))) and
+ tuples = count(PathNode pn | reach(pn))
+}
+
+/**
+ * INTERNAL: Only for debugging.
+ *
+ * Calculates per-stage metrics for data flow.
+ */
+predicate stageStats(
+ int n, string stage, int nodes, int fields, int conscand, int tuples, Configuration config
+) {
+ stage = "1 Fwd" and n = 10 and Stage1::stats(true, nodes, fields, conscand, tuples, config)
+ or
+ stage = "1 Rev" and n = 15 and Stage1::stats(false, nodes, fields, conscand, tuples, config)
+ or
+ stage = "2 Fwd" and n = 20 and Stage2::stats(true, nodes, fields, conscand, tuples, config)
+ or
+ stage = "2 Rev" and n = 25 and Stage2::stats(false, nodes, fields, conscand, tuples, config)
+ or
+ stage = "3 Fwd" and n = 30 and Stage3::stats(true, nodes, fields, conscand, tuples, config)
+ or
+ stage = "3 Rev" and n = 35 and Stage3::stats(false, nodes, fields, conscand, tuples, config)
+ or
+ stage = "4 Fwd" and n = 40 and Stage4::stats(true, nodes, fields, conscand, tuples, config)
+ or
+ stage = "4 Rev" and n = 45 and Stage4::stats(false, nodes, fields, conscand, tuples, config)
+ or
+ stage = "5 Fwd" and n = 50 and finalStats(true, nodes, fields, conscand, tuples)
+ or
+ stage = "5 Rev" and n = 55 and finalStats(false, nodes, fields, conscand, tuples)
+}
+
+private module FlowExploration {
+ private predicate callableStep(DataFlowCallable c1, DataFlowCallable c2, Configuration config) {
+ exists(NodeEx node1, NodeEx node2 |
+ jumpStep(node1, node2, config)
+ or
+ additionalJumpStep(node1, node2, config)
+ or
+ // flow into callable
+ viableParamArgEx(_, node2, node1)
+ or
+ // flow out of a callable
+ viableReturnPosOutEx(_, node1.(RetNodeEx).getReturnPosition(), node2)
+ |
+ c1 = node1.getEnclosingCallable() and
+ c2 = node2.getEnclosingCallable() and
+ c1 != c2
+ )
+ }
+
+ private predicate interestingCallableSrc(DataFlowCallable c, Configuration config) {
+ exists(Node n | config.isSource(n) and c = getNodeEnclosingCallable(n))
+ or
+ exists(DataFlowCallable mid |
+ interestingCallableSrc(mid, config) and callableStep(mid, c, config)
+ )
+ }
+
+ private predicate interestingCallableSink(DataFlowCallable c, Configuration config) {
+ exists(Node n | config.isSink(n) and c = getNodeEnclosingCallable(n))
+ or
+ exists(DataFlowCallable mid |
+ interestingCallableSink(mid, config) and callableStep(c, mid, config)
+ )
+ }
+
+ private newtype TCallableExt =
+ TCallable(DataFlowCallable c, Configuration config) {
+ interestingCallableSrc(c, config) or
+ interestingCallableSink(c, config)
+ } or
+ TCallableSrc() or
+ TCallableSink()
+
+ private predicate callableExtSrc(TCallableSrc src) { any() }
+
+ private predicate callableExtSink(TCallableSink sink) { any() }
+
+ private predicate callableExtStepFwd(TCallableExt ce1, TCallableExt ce2) {
+ exists(DataFlowCallable c1, DataFlowCallable c2, Configuration config |
+ callableStep(c1, c2, config) and
+ ce1 = TCallable(c1, pragma[only_bind_into](config)) and
+ ce2 = TCallable(c2, pragma[only_bind_into](config))
+ )
+ or
+ exists(Node n, Configuration config |
+ ce1 = TCallableSrc() and
+ config.isSource(n) and
+ ce2 = TCallable(getNodeEnclosingCallable(n), config)
+ )
+ or
+ exists(Node n, Configuration config |
+ ce2 = TCallableSink() and
+ config.isSink(n) and
+ ce1 = TCallable(getNodeEnclosingCallable(n), config)
+ )
+ }
+
+ private predicate callableExtStepRev(TCallableExt ce1, TCallableExt ce2) {
+ callableExtStepFwd(ce2, ce1)
+ }
+
+ private int distSrcExt(TCallableExt c) =
+ shortestDistances(callableExtSrc/1, callableExtStepFwd/2)(_, c, result)
+
+ private int distSinkExt(TCallableExt c) =
+ shortestDistances(callableExtSink/1, callableExtStepRev/2)(_, c, result)
+
+ private int distSrc(DataFlowCallable c, Configuration config) {
+ result = distSrcExt(TCallable(c, config)) - 1
+ }
+
+ private int distSink(DataFlowCallable c, Configuration config) {
+ result = distSinkExt(TCallable(c, config)) - 1
+ }
+
+ private newtype TPartialAccessPath =
+ TPartialNil(DataFlowType t) or
+ TPartialCons(TypedContent tc, int len) { len in [1 .. accessPathLimit()] }
+
+ /**
+ * Conceptually a list of `TypedContent`s followed by a `Type`, but only the first
+ * element of the list and its length are tracked. If data flows from a source to
+ * a given node with a given `AccessPath`, this indicates the sequence of
+ * dereference operations needed to get from the value in the node to the
+ * tracked object. The final type indicates the type of the tracked object.
+ */
+ private class PartialAccessPath extends TPartialAccessPath {
+ abstract string toString();
+
+ TypedContent getHead() { this = TPartialCons(result, _) }
+
+ int len() {
+ this = TPartialNil(_) and result = 0
+ or
+ this = TPartialCons(_, result)
+ }
+
+ DataFlowType getType() {
+ this = TPartialNil(result)
+ or
+ exists(TypedContent head | this = TPartialCons(head, _) | result = head.getContainerType())
+ }
+ }
+
+ private class PartialAccessPathNil extends PartialAccessPath, TPartialNil {
+ override string toString() {
+ exists(DataFlowType t | this = TPartialNil(t) | result = concat(": " + ppReprType(t)))
+ }
+ }
+
+ private class PartialAccessPathCons extends PartialAccessPath, TPartialCons {
+ override string toString() {
+ exists(TypedContent tc, int len | this = TPartialCons(tc, len) |
+ if len = 1
+ then result = "[" + tc.toString() + "]"
+ else result = "[" + tc.toString() + ", ... (" + len.toString() + ")]"
+ )
+ }
+ }
+
+ private newtype TRevPartialAccessPath =
+ TRevPartialNil() or
+ TRevPartialCons(Content c, int len) { len in [1 .. accessPathLimit()] }
+
+ /**
+ * Conceptually a list of `Content`s, but only the first
+ * element of the list and its length are tracked.
+ */
+ private class RevPartialAccessPath extends TRevPartialAccessPath {
+ abstract string toString();
+
+ Content getHead() { this = TRevPartialCons(result, _) }
+
+ int len() {
+ this = TRevPartialNil() and result = 0
+ or
+ this = TRevPartialCons(_, result)
+ }
+ }
+
+ private class RevPartialAccessPathNil extends RevPartialAccessPath, TRevPartialNil {
+ override string toString() { result = "" }
+ }
+
+ private class RevPartialAccessPathCons extends RevPartialAccessPath, TRevPartialCons {
+ override string toString() {
+ exists(Content c, int len | this = TRevPartialCons(c, len) |
+ if len = 1
+ then result = "[" + c.toString() + "]"
+ else result = "[" + c.toString() + ", ... (" + len.toString() + ")]"
+ )
+ }
+ }
+
+ private newtype TSummaryCtx1 =
+ TSummaryCtx1None() or
+ TSummaryCtx1Param(ParamNodeEx p)
+
+ private newtype TSummaryCtx2 =
+ TSummaryCtx2None() or
+ TSummaryCtx2Some(PartialAccessPath ap)
+
+ private newtype TRevSummaryCtx1 =
+ TRevSummaryCtx1None() or
+ TRevSummaryCtx1Some(ReturnPosition pos)
+
+ private newtype TRevSummaryCtx2 =
+ TRevSummaryCtx2None() or
+ TRevSummaryCtx2Some(RevPartialAccessPath ap)
+
+ private newtype TPartialPathNode =
+ TPartialPathNodeFwd(
+ NodeEx node, CallContext cc, TSummaryCtx1 sc1, TSummaryCtx2 sc2, PartialAccessPath ap,
+ Configuration config
+ ) {
+ sourceNode(node, config) and
+ cc instanceof CallContextAny and
+ sc1 = TSummaryCtx1None() and
+ sc2 = TSummaryCtx2None() and
+ ap = TPartialNil(node.getDataFlowType()) and
+ not fullBarrier(node, config) and
+ exists(config.explorationLimit())
+ or
+ partialPathNodeMk0(node, cc, sc1, sc2, ap, config) and
+ distSrc(node.getEnclosingCallable(), config) <= config.explorationLimit()
+ } or
+ TPartialPathNodeRev(
+ NodeEx node, TRevSummaryCtx1 sc1, TRevSummaryCtx2 sc2, RevPartialAccessPath ap,
+ Configuration config
+ ) {
+ sinkNode(node, config) and
+ sc1 = TRevSummaryCtx1None() and
+ sc2 = TRevSummaryCtx2None() and
+ ap = TRevPartialNil() and
+ not fullBarrier(node, config) and
+ exists(config.explorationLimit())
+ or
+ exists(PartialPathNodeRev mid |
+ revPartialPathStep(mid, node, sc1, sc2, ap, config) and
+ not clearsContentCached(node.asNode(), ap.getHead()) and
+ not fullBarrier(node, config) and
+ distSink(node.getEnclosingCallable(), config) <= config.explorationLimit()
+ )
+ }
+
+ pragma[nomagic]
+ private predicate partialPathNodeMk0(
+ NodeEx node, CallContext cc, TSummaryCtx1 sc1, TSummaryCtx2 sc2, PartialAccessPath ap,
+ Configuration config
+ ) {
+ exists(PartialPathNodeFwd mid |
+ partialPathStep(mid, node, cc, sc1, sc2, ap, config) and
+ not fullBarrier(node, config) and
+ not clearsContentCached(node.asNode(), ap.getHead().getContent()) and
+ if node.asNode() instanceof CastingNode
+ then compatibleTypes(node.getDataFlowType(), ap.getType())
+ else any()
+ )
+ }
+
+ /**
+ * A `Node` augmented with a call context, an access path, and a configuration.
+ */
+ class PartialPathNode extends TPartialPathNode {
+ /** Gets a textual representation of this element. */
+ string toString() { result = this.getNodeEx().toString() + this.ppAp() }
+
+ /**
+ * Gets a textual representation of this element, including a textual
+ * representation of the call context.
+ */
+ string toStringWithContext() {
+ result = this.getNodeEx().toString() + this.ppAp() + this.ppCtx()
+ }
+
+ /**
+ * Holds if this element is at the specified location.
+ * The location spans column `startcolumn` of line `startline` to
+ * column `endcolumn` of line `endline` in file `filepath`.
+ * For more information, see
+ * [Locations](https://codeql.github.com/docs/writing-codeql-queries/providing-locations-in-codeql-queries/).
+ */
+ predicate hasLocationInfo(
+ string filepath, int startline, int startcolumn, int endline, int endcolumn
+ ) {
+ this.getNodeEx().hasLocationInfo(filepath, startline, startcolumn, endline, endcolumn)
+ }
+
+ /** Gets the underlying `Node`. */
+ final Node getNode() { this.getNodeEx().projectToNode() = result }
+
+ private NodeEx getNodeEx() {
+ result = this.(PartialPathNodeFwd).getNodeEx() or
+ result = this.(PartialPathNodeRev).getNodeEx()
+ }
+
+ /** Gets the associated configuration. */
+ Configuration getConfiguration() { none() }
+
+ /** Gets a successor of this node, if any. */
+ PartialPathNode getASuccessor() { none() }
+
+ /**
+ * Gets the approximate distance to the nearest source measured in number
+ * of interprocedural steps.
+ */
+ int getSourceDistance() {
+ result = distSrc(this.getNodeEx().getEnclosingCallable(), this.getConfiguration())
+ }
+
+ /**
+ * Gets the approximate distance to the nearest sink measured in number
+ * of interprocedural steps.
+ */
+ int getSinkDistance() {
+ result = distSink(this.getNodeEx().getEnclosingCallable(), this.getConfiguration())
+ }
+
+ private string ppAp() {
+ exists(string s |
+ s = this.(PartialPathNodeFwd).getAp().toString() or
+ s = this.(PartialPathNodeRev).getAp().toString()
+ |
+ if s = "" then result = "" else result = " " + s
+ )
+ }
+
+ private string ppCtx() {
+ result = " <" + this.(PartialPathNodeFwd).getCallContext().toString() + ">"
+ }
+
+ /** Holds if this is a source in a forward-flow path. */
+ predicate isFwdSource() { this.(PartialPathNodeFwd).isSource() }
+
+ /** Holds if this is a sink in a reverse-flow path. */
+ predicate isRevSink() { this.(PartialPathNodeRev).isSink() }
+ }
+
+ /**
+ * Provides the query predicates needed to include a graph in a path-problem query.
+ */
+ module PartialPathGraph {
+ /** Holds if `(a,b)` is an edge in the graph of data flow path explanations. */
+ query predicate edges(PartialPathNode a, PartialPathNode b) { a.getASuccessor() = b }
+ }
+
+ private class PartialPathNodeFwd extends PartialPathNode, TPartialPathNodeFwd {
+ NodeEx node;
+ CallContext cc;
+ TSummaryCtx1 sc1;
+ TSummaryCtx2 sc2;
+ PartialAccessPath ap;
+ Configuration config;
+
+ PartialPathNodeFwd() { this = TPartialPathNodeFwd(node, cc, sc1, sc2, ap, config) }
+
+ NodeEx getNodeEx() { result = node }
+
+ CallContext getCallContext() { result = cc }
+
+ TSummaryCtx1 getSummaryCtx1() { result = sc1 }
+
+ TSummaryCtx2 getSummaryCtx2() { result = sc2 }
+
+ PartialAccessPath getAp() { result = ap }
+
+ override Configuration getConfiguration() { result = config }
+
+ override PartialPathNodeFwd getASuccessor() {
+ partialPathStep(this, result.getNodeEx(), result.getCallContext(), result.getSummaryCtx1(),
+ result.getSummaryCtx2(), result.getAp(), result.getConfiguration())
+ }
+
+ predicate isSource() {
+ sourceNode(node, config) and
+ cc instanceof CallContextAny and
+ sc1 = TSummaryCtx1None() and
+ sc2 = TSummaryCtx2None() and
+ ap instanceof TPartialNil
+ }
+ }
+
+ private class PartialPathNodeRev extends PartialPathNode, TPartialPathNodeRev {
+ NodeEx node;
+ TRevSummaryCtx1 sc1;
+ TRevSummaryCtx2 sc2;
+ RevPartialAccessPath ap;
+ Configuration config;
+
+ PartialPathNodeRev() { this = TPartialPathNodeRev(node, sc1, sc2, ap, config) }
+
+ NodeEx getNodeEx() { result = node }
+
+ TRevSummaryCtx1 getSummaryCtx1() { result = sc1 }
+
+ TRevSummaryCtx2 getSummaryCtx2() { result = sc2 }
+
+ RevPartialAccessPath getAp() { result = ap }
+
+ override Configuration getConfiguration() { result = config }
+
+ override PartialPathNodeRev getASuccessor() {
+ revPartialPathStep(result, this.getNodeEx(), this.getSummaryCtx1(), this.getSummaryCtx2(),
+ this.getAp(), this.getConfiguration())
+ }
+
+ predicate isSink() {
+ sinkNode(node, config) and
+ sc1 = TRevSummaryCtx1None() and
+ sc2 = TRevSummaryCtx2None() and
+ ap = TRevPartialNil()
+ }
+ }
+
+ private predicate partialPathStep(
+ PartialPathNodeFwd mid, NodeEx node, CallContext cc, TSummaryCtx1 sc1, TSummaryCtx2 sc2,
+ PartialAccessPath ap, Configuration config
+ ) {
+ not isUnreachableInCallCached(node.asNode(), cc.(CallContextSpecificCall).getCall()) and
+ (
+ localFlowStep(mid.getNodeEx(), node, config) and
+ cc = mid.getCallContext() and
+ sc1 = mid.getSummaryCtx1() and
+ sc2 = mid.getSummaryCtx2() and
+ ap = mid.getAp() and
+ config = mid.getConfiguration()
+ or
+ additionalLocalFlowStep(mid.getNodeEx(), node, config) and
+ cc = mid.getCallContext() and
+ sc1 = mid.getSummaryCtx1() and
+ sc2 = mid.getSummaryCtx2() and
+ mid.getAp() instanceof PartialAccessPathNil and
+ ap = TPartialNil(node.getDataFlowType()) and
+ config = mid.getConfiguration()
+ )
+ or
+ jumpStep(mid.getNodeEx(), node, config) and
+ cc instanceof CallContextAny and
+ sc1 = TSummaryCtx1None() and
+ sc2 = TSummaryCtx2None() and
+ ap = mid.getAp() and
+ config = mid.getConfiguration()
+ or
+ additionalJumpStep(mid.getNodeEx(), node, config) and
+ cc instanceof CallContextAny and
+ sc1 = TSummaryCtx1None() and
+ sc2 = TSummaryCtx2None() and
+ mid.getAp() instanceof PartialAccessPathNil and
+ ap = TPartialNil(node.getDataFlowType()) and
+ config = mid.getConfiguration()
+ or
+ partialPathStoreStep(mid, _, _, node, ap) and
+ cc = mid.getCallContext() and
+ sc1 = mid.getSummaryCtx1() and
+ sc2 = mid.getSummaryCtx2() and
+ config = mid.getConfiguration()
+ or
+ exists(PartialAccessPath ap0, TypedContent tc |
+ partialPathReadStep(mid, ap0, tc, node, cc, config) and
+ sc1 = mid.getSummaryCtx1() and
+ sc2 = mid.getSummaryCtx2() and
+ apConsFwd(ap, tc, ap0, config)
+ )
+ or
+ partialPathIntoCallable(mid, node, _, cc, sc1, sc2, _, ap, config)
+ or
+ partialPathOutOfCallable(mid, node, cc, ap, config) and
+ sc1 = TSummaryCtx1None() and
+ sc2 = TSummaryCtx2None()
+ or
+ partialPathThroughCallable(mid, node, cc, ap, config) and
+ sc1 = mid.getSummaryCtx1() and
+ sc2 = mid.getSummaryCtx2()
+ }
+
+ bindingset[result, i]
+ private int unbindInt(int i) { i <= result and i >= result }
+
+ pragma[inline]
+ private predicate partialPathStoreStep(
+ PartialPathNodeFwd mid, PartialAccessPath ap1, TypedContent tc, NodeEx node,
+ PartialAccessPath ap2
+ ) {
+ exists(NodeEx midNode, DataFlowType contentType |
+ midNode = mid.getNodeEx() and
+ ap1 = mid.getAp() and
+ store(midNode, tc, node, contentType, mid.getConfiguration()) and
+ ap2.getHead() = tc and
+ ap2.len() = unbindInt(ap1.len() + 1) and
+ compatibleTypes(ap1.getType(), contentType)
+ )
+ }
+
+ pragma[nomagic]
+ private predicate apConsFwd(
+ PartialAccessPath ap1, TypedContent tc, PartialAccessPath ap2, Configuration config
+ ) {
+ exists(PartialPathNodeFwd mid |
+ partialPathStoreStep(mid, ap1, tc, _, ap2) and
+ config = mid.getConfiguration()
+ )
+ }
+
+ pragma[nomagic]
+ private predicate partialPathReadStep(
+ PartialPathNodeFwd mid, PartialAccessPath ap, TypedContent tc, NodeEx node, CallContext cc,
+ Configuration config
+ ) {
+ exists(NodeEx midNode |
+ midNode = mid.getNodeEx() and
+ ap = mid.getAp() and
+ read(midNode, tc.getContent(), node, pragma[only_bind_into](config)) and
+ ap.getHead() = tc and
+ pragma[only_bind_into](config) = mid.getConfiguration() and
+ cc = mid.getCallContext()
+ )
+ }
+
+ private predicate partialPathOutOfCallable0(
+ PartialPathNodeFwd mid, ReturnPosition pos, CallContext innercc, PartialAccessPath ap,
+ Configuration config
+ ) {
+ pos = mid.getNodeEx().(RetNodeEx).getReturnPosition() and
+ innercc = mid.getCallContext() and
+ innercc instanceof CallContextNoCall and
+ ap = mid.getAp() and
+ config = mid.getConfiguration()
+ }
+
+ pragma[nomagic]
+ private predicate partialPathOutOfCallable1(
+ PartialPathNodeFwd mid, DataFlowCall call, ReturnKindExt kind, CallContext cc,
+ PartialAccessPath ap, Configuration config
+ ) {
+ exists(ReturnPosition pos, DataFlowCallable c, CallContext innercc |
+ partialPathOutOfCallable0(mid, pos, innercc, ap, config) and
+ c = pos.getCallable() and
+ kind = pos.getKind() and
+ resolveReturn(innercc, c, call)
+ |
+ if reducedViableImplInReturn(c, call) then cc = TReturn(c, call) else cc = TAnyCallContext()
+ )
+ }
+
+ private predicate partialPathOutOfCallable(
+ PartialPathNodeFwd mid, NodeEx out, CallContext cc, PartialAccessPath ap, Configuration config
+ ) {
+ exists(ReturnKindExt kind, DataFlowCall call |
+ partialPathOutOfCallable1(mid, call, kind, cc, ap, config)
+ |
+ out.asNode() = kind.getAnOutNode(call)
+ )
+ }
+
+ pragma[noinline]
+ private predicate partialPathIntoArg(
+ PartialPathNodeFwd mid, int i, CallContext cc, DataFlowCall call, PartialAccessPath ap,
+ Configuration config
+ ) {
+ exists(ArgNode arg |
+ arg = mid.getNodeEx().asNode() and
+ cc = mid.getCallContext() and
+ arg.argumentOf(call, i) and
+ ap = mid.getAp() and
+ config = mid.getConfiguration()
+ )
+ }
+
+ pragma[nomagic]
+ private predicate partialPathIntoCallable0(
+ PartialPathNodeFwd mid, DataFlowCallable callable, int i, CallContext outercc,
+ DataFlowCall call, PartialAccessPath ap, Configuration config
+ ) {
+ partialPathIntoArg(mid, i, outercc, call, ap, config) and
+ callable = resolveCall(call, outercc)
+ }
+
+ private predicate partialPathIntoCallable(
+ PartialPathNodeFwd mid, ParamNodeEx p, CallContext outercc, CallContextCall innercc,
+ TSummaryCtx1 sc1, TSummaryCtx2 sc2, DataFlowCall call, PartialAccessPath ap,
+ Configuration config
+ ) {
+ exists(int i, DataFlowCallable callable |
+ partialPathIntoCallable0(mid, callable, i, outercc, call, ap, config) and
+ p.isParameterOf(callable, i) and
+ sc1 = TSummaryCtx1Param(p) and
+ sc2 = TSummaryCtx2Some(ap)
+ |
+ if recordDataFlowCallSite(call, callable)
+ then innercc = TSpecificCall(call)
+ else innercc = TSomeCall()
+ )
+ }
+
+ pragma[nomagic]
+ private predicate paramFlowsThroughInPartialPath(
+ ReturnKindExt kind, CallContextCall cc, TSummaryCtx1 sc1, TSummaryCtx2 sc2,
+ PartialAccessPath ap, Configuration config
+ ) {
+ exists(PartialPathNodeFwd mid, RetNodeEx ret |
+ mid.getNodeEx() = ret and
+ kind = ret.getKind() and
+ cc = mid.getCallContext() and
+ sc1 = mid.getSummaryCtx1() and
+ sc2 = mid.getSummaryCtx2() and
+ config = mid.getConfiguration() and
+ ap = mid.getAp()
+ )
+ }
+
+ pragma[noinline]
+ private predicate partialPathThroughCallable0(
+ DataFlowCall call, PartialPathNodeFwd mid, ReturnKindExt kind, CallContext cc,
+ PartialAccessPath ap, Configuration config
+ ) {
+ exists(CallContext innercc, TSummaryCtx1 sc1, TSummaryCtx2 sc2 |
+ partialPathIntoCallable(mid, _, cc, innercc, sc1, sc2, call, _, config) and
+ paramFlowsThroughInPartialPath(kind, innercc, sc1, sc2, ap, config)
+ )
+ }
+
+ private predicate partialPathThroughCallable(
+ PartialPathNodeFwd mid, NodeEx out, CallContext cc, PartialAccessPath ap, Configuration config
+ ) {
+ exists(DataFlowCall call, ReturnKindExt kind |
+ partialPathThroughCallable0(call, mid, kind, cc, ap, config) and
+ out.asNode() = kind.getAnOutNode(call)
+ )
+ }
+
+ private predicate revPartialPathStep(
+ PartialPathNodeRev mid, NodeEx node, TRevSummaryCtx1 sc1, TRevSummaryCtx2 sc2,
+ RevPartialAccessPath ap, Configuration config
+ ) {
+ localFlowStep(node, mid.getNodeEx(), config) and
+ sc1 = mid.getSummaryCtx1() and
+ sc2 = mid.getSummaryCtx2() and
+ ap = mid.getAp() and
+ config = mid.getConfiguration()
+ or
+ additionalLocalFlowStep(node, mid.getNodeEx(), config) and
+ sc1 = mid.getSummaryCtx1() and
+ sc2 = mid.getSummaryCtx2() and
+ mid.getAp() instanceof RevPartialAccessPathNil and
+ ap = TRevPartialNil() and
+ config = mid.getConfiguration()
+ or
+ jumpStep(node, mid.getNodeEx(), config) and
+ sc1 = TRevSummaryCtx1None() and
+ sc2 = TRevSummaryCtx2None() and
+ ap = mid.getAp() and
+ config = mid.getConfiguration()
+ or
+ additionalJumpStep(node, mid.getNodeEx(), config) and
+ sc1 = TRevSummaryCtx1None() and
+ sc2 = TRevSummaryCtx2None() and
+ mid.getAp() instanceof RevPartialAccessPathNil and
+ ap = TRevPartialNil() and
+ config = mid.getConfiguration()
+ or
+ revPartialPathReadStep(mid, _, _, node, ap) and
+ sc1 = mid.getSummaryCtx1() and
+ sc2 = mid.getSummaryCtx2() and
+ config = mid.getConfiguration()
+ or
+ exists(RevPartialAccessPath ap0, Content c |
+ revPartialPathStoreStep(mid, ap0, c, node, config) and
+ sc1 = mid.getSummaryCtx1() and
+ sc2 = mid.getSummaryCtx2() and
+ apConsRev(ap, c, ap0, config)
+ )
+ or
+ exists(ParamNodeEx p |
+ mid.getNodeEx() = p and
+ viableParamArgEx(_, p, node) and
+ sc1 = mid.getSummaryCtx1() and
+ sc2 = mid.getSummaryCtx2() and
+ sc1 = TRevSummaryCtx1None() and
+ sc2 = TRevSummaryCtx2None() and
+ ap = mid.getAp() and
+ config = mid.getConfiguration()
+ )
+ or
+ exists(ReturnPosition pos |
+ revPartialPathIntoReturn(mid, pos, sc1, sc2, _, ap, config) and
+ pos = getReturnPosition(node.asNode())
+ )
+ or
+ revPartialPathThroughCallable(mid, node, ap, config) and
+ sc1 = mid.getSummaryCtx1() and
+ sc2 = mid.getSummaryCtx2()
+ }
+
+ pragma[inline]
+ private predicate revPartialPathReadStep(
+ PartialPathNodeRev mid, RevPartialAccessPath ap1, Content c, NodeEx node,
+ RevPartialAccessPath ap2
+ ) {
+ exists(NodeEx midNode |
+ midNode = mid.getNodeEx() and
+ ap1 = mid.getAp() and
+ read(node, c, midNode, mid.getConfiguration()) and
+ ap2.getHead() = c and
+ ap2.len() = unbindInt(ap1.len() + 1)
+ )
+ }
+
+ pragma[nomagic]
+ private predicate apConsRev(
+ RevPartialAccessPath ap1, Content c, RevPartialAccessPath ap2, Configuration config
+ ) {
+ exists(PartialPathNodeRev mid |
+ revPartialPathReadStep(mid, ap1, c, _, ap2) and
+ config = mid.getConfiguration()
+ )
+ }
+
+ pragma[nomagic]
+ private predicate revPartialPathStoreStep(
+ PartialPathNodeRev mid, RevPartialAccessPath ap, Content c, NodeEx node, Configuration config
+ ) {
+ exists(NodeEx midNode, TypedContent tc |
+ midNode = mid.getNodeEx() and
+ ap = mid.getAp() and
+ store(node, tc, midNode, _, config) and
+ ap.getHead() = c and
+ config = mid.getConfiguration() and
+ tc.getContent() = c
+ )
+ }
+
+ pragma[nomagic]
+ private predicate revPartialPathIntoReturn(
+ PartialPathNodeRev mid, ReturnPosition pos, TRevSummaryCtx1Some sc1, TRevSummaryCtx2Some sc2,
+ DataFlowCall call, RevPartialAccessPath ap, Configuration config
+ ) {
+ exists(NodeEx out |
+ mid.getNodeEx() = out and
+ viableReturnPosOutEx(call, pos, out) and
+ sc1 = TRevSummaryCtx1Some(pos) and
+ sc2 = TRevSummaryCtx2Some(ap) and
+ ap = mid.getAp() and
+ config = mid.getConfiguration()
+ )
+ }
+
+ pragma[nomagic]
+ private predicate revPartialPathFlowsThrough(
+ int pos, TRevSummaryCtx1Some sc1, TRevSummaryCtx2Some sc2, RevPartialAccessPath ap,
+ Configuration config
+ ) {
+ exists(PartialPathNodeRev mid, ParamNodeEx p |
+ mid.getNodeEx() = p and
+ p.getPosition() = pos and
+ sc1 = mid.getSummaryCtx1() and
+ sc2 = mid.getSummaryCtx2() and
+ ap = mid.getAp() and
+ config = mid.getConfiguration()
+ )
+ }
+
+ pragma[nomagic]
+ private predicate revPartialPathThroughCallable0(
+ DataFlowCall call, PartialPathNodeRev mid, int pos, RevPartialAccessPath ap,
+ Configuration config
+ ) {
+ exists(TRevSummaryCtx1Some sc1, TRevSummaryCtx2Some sc2 |
+ revPartialPathIntoReturn(mid, _, sc1, sc2, call, _, config) and
+ revPartialPathFlowsThrough(pos, sc1, sc2, ap, config)
+ )
+ }
+
+ pragma[nomagic]
+ private predicate revPartialPathThroughCallable(
+ PartialPathNodeRev mid, ArgNodeEx node, RevPartialAccessPath ap, Configuration config
+ ) {
+ exists(DataFlowCall call, int pos |
+ revPartialPathThroughCallable0(call, mid, pos, ap, config) and
+ node.asNode().(ArgNode).argumentOf(call, pos)
+ )
+ }
+}
+
+import FlowExploration
+
+private predicate partialFlow(
+ PartialPathNode source, PartialPathNode node, Configuration configuration
+) {
+ source.getConfiguration() = configuration and
+ source.isFwdSource() and
+ node = source.getASuccessor+()
+}
+
+private predicate revPartialFlow(
+ PartialPathNode node, PartialPathNode sink, Configuration configuration
+) {
+ sink.getConfiguration() = configuration and
+ sink.isRevSink() and
+ node.getASuccessor+() = sink
+}
diff --git a/repo-tests/codeql/cpp/ql/lib/semmle/code/cpp/ir/dataflow/internal/DataFlowImpl3.qll b/repo-tests/codeql/cpp/ql/lib/semmle/code/cpp/ir/dataflow/internal/DataFlowImpl3.qll
new file mode 100644
index 00000000000..4ca06c93362
--- /dev/null
+++ b/repo-tests/codeql/cpp/ql/lib/semmle/code/cpp/ir/dataflow/internal/DataFlowImpl3.qll
@@ -0,0 +1,4559 @@
+/**
+ * Provides an implementation of global (interprocedural) data flow. This file
+ * re-exports the local (intraprocedural) data flow analysis from
+ * `DataFlowImplSpecific::Public` and adds a global analysis, mainly exposed
+ * through the `Configuration` class. This file exists in several identical
+ * copies, allowing queries to use multiple `Configuration` classes that depend
+ * on each other without introducing mutual recursion among those configurations.
+ */
+
+private import DataFlowImplCommon
+private import DataFlowImplSpecific::Private
+import DataFlowImplSpecific::Public
+
+/**
+ * A configuration of interprocedural data flow analysis. This defines
+ * sources, sinks, and any other configurable aspect of the analysis. Each
+ * use of the global data flow library must define its own unique extension
+ * of this abstract class. To create a configuration, extend this class with
+ * a subclass whose characteristic predicate is a unique singleton string.
+ * For example, write
+ *
+ * ```ql
+ * class MyAnalysisConfiguration extends DataFlow::Configuration {
+ * MyAnalysisConfiguration() { this = "MyAnalysisConfiguration" }
+ * // Override `isSource` and `isSink`.
+ * // Optionally override `isBarrier`.
+ * // Optionally override `isAdditionalFlowStep`.
+ * }
+ * ```
+ * Conceptually, this defines a graph where the nodes are `DataFlow::Node`s and
+ * the edges are those data-flow steps that preserve the value of the node
+ * along with any additional edges defined by `isAdditionalFlowStep`.
+ * Specifying nodes in `isBarrier` will remove those nodes from the graph, and
+ * specifying nodes in `isBarrierIn` and/or `isBarrierOut` will remove in-going
+ * and/or out-going edges from those nodes, respectively.
+ *
+ * Then, to query whether there is flow between some `source` and `sink`,
+ * write
+ *
+ * ```ql
+ * exists(MyAnalysisConfiguration cfg | cfg.hasFlow(source, sink))
+ * ```
+ *
+ * Multiple configurations can coexist, but two classes extending
+ * `DataFlow::Configuration` should never depend on each other. One of them
+ * should instead depend on a `DataFlow2::Configuration`, a
+ * `DataFlow3::Configuration`, or a `DataFlow4::Configuration`.
+ */
+abstract class Configuration extends string {
+ bindingset[this]
+ Configuration() { any() }
+
+ /**
+ * Holds if `source` is a relevant data flow source.
+ */
+ abstract predicate isSource(Node source);
+
+ /**
+ * Holds if `sink` is a relevant data flow sink.
+ */
+ abstract predicate isSink(Node sink);
+
+ /**
+ * Holds if data flow through `node` is prohibited. This completely removes
+ * `node` from the data flow graph.
+ */
+ predicate isBarrier(Node node) { none() }
+
+ /** Holds if data flow into `node` is prohibited. */
+ predicate isBarrierIn(Node node) { none() }
+
+ /** Holds if data flow out of `node` is prohibited. */
+ predicate isBarrierOut(Node node) { none() }
+
+ /** Holds if data flow through nodes guarded by `guard` is prohibited. */
+ predicate isBarrierGuard(BarrierGuard guard) { none() }
+
+ /**
+ * Holds if the additional flow step from `node1` to `node2` must be taken
+ * into account in the analysis.
+ */
+ predicate isAdditionalFlowStep(Node node1, Node node2) { none() }
+
+ /**
+ * Holds if an arbitrary number of implicit read steps of content `c` may be
+ * taken at `node`.
+ */
+ predicate allowImplicitRead(Node node, Content c) { none() }
+
+ /**
+ * Gets the virtual dispatch branching limit when calculating field flow.
+ * This can be overridden to a smaller value to improve performance (a
+ * value of 0 disables field flow), or a larger value to get more results.
+ */
+ int fieldFlowBranchLimit() { result = 2 }
+
+ /**
+ * Holds if data may flow from `source` to `sink` for this configuration.
+ */
+ predicate hasFlow(Node source, Node sink) { flowsTo(source, sink, this) }
+
+ /**
+ * Holds if data may flow from `source` to `sink` for this configuration.
+ *
+ * The corresponding paths are generated from the end-points and the graph
+ * included in the module `PathGraph`.
+ */
+ predicate hasFlowPath(PathNode source, PathNode sink) { flowsTo(source, sink, _, _, this) }
+
+ /**
+ * Holds if data may flow from some source to `sink` for this configuration.
+ */
+ predicate hasFlowTo(Node sink) { hasFlow(_, sink) }
+
+ /**
+ * Holds if data may flow from some source to `sink` for this configuration.
+ */
+ predicate hasFlowToExpr(DataFlowExpr sink) { hasFlowTo(exprNode(sink)) }
+
+ /**
+ * Gets the exploration limit for `hasPartialFlow` and `hasPartialFlowRev`
+ * measured in approximate number of interprocedural steps.
+ */
+ int explorationLimit() { none() }
+
+ /**
+ * Holds if there is a partial data flow path from `source` to `node`. The
+ * approximate distance between `node` and the closest source is `dist` and
+ * is restricted to be less than or equal to `explorationLimit()`. This
+ * predicate completely disregards sink definitions.
+ *
+ * This predicate is intended for data-flow exploration and debugging and may
+ * perform poorly if the number of sources is too big and/or the exploration
+ * limit is set too high without using barriers.
+ *
+ * This predicate is disabled (has no results) by default. Override
+ * `explorationLimit()` with a suitable number to enable this predicate.
+ *
+ * To use this in a `path-problem` query, import the module `PartialPathGraph`.
+ */
+ final predicate hasPartialFlow(PartialPathNode source, PartialPathNode node, int dist) {
+ partialFlow(source, node, this) and
+ dist = node.getSourceDistance()
+ }
+
+ /**
+ * Holds if there is a partial data flow path from `node` to `sink`. The
+ * approximate distance between `node` and the closest sink is `dist` and
+ * is restricted to be less than or equal to `explorationLimit()`. This
+ * predicate completely disregards source definitions.
+ *
+ * This predicate is intended for data-flow exploration and debugging and may
+ * perform poorly if the number of sinks is too big and/or the exploration
+ * limit is set too high without using barriers.
+ *
+ * This predicate is disabled (has no results) by default. Override
+ * `explorationLimit()` with a suitable number to enable this predicate.
+ *
+ * To use this in a `path-problem` query, import the module `PartialPathGraph`.
+ *
+ * Note that reverse flow has slightly lower precision than the corresponding
+ * forward flow, as reverse flow disregards type pruning among other features.
+ */
+ final predicate hasPartialFlowRev(PartialPathNode node, PartialPathNode sink, int dist) {
+ revPartialFlow(node, sink, this) and
+ dist = node.getSinkDistance()
+ }
+}
+
+/**
+ * This class exists to prevent mutual recursion between the user-overridden
+ * member predicates of `Configuration` and the rest of the data-flow library.
+ * Good performance cannot be guaranteed in the presence of such recursion, so
+ * it should be replaced by using more than one copy of the data flow library.
+ */
+abstract private class ConfigurationRecursionPrevention extends Configuration {
+ bindingset[this]
+ ConfigurationRecursionPrevention() { any() }
+
+ override predicate hasFlow(Node source, Node sink) {
+ strictcount(Node n | this.isSource(n)) < 0
+ or
+ strictcount(Node n | this.isSink(n)) < 0
+ or
+ strictcount(Node n1, Node n2 | this.isAdditionalFlowStep(n1, n2)) < 0
+ or
+ super.hasFlow(source, sink)
+ }
+}
+
+private newtype TNodeEx =
+ TNodeNormal(Node n) or
+ TNodeImplicitRead(Node n, boolean hasRead) {
+ any(Configuration c).allowImplicitRead(n, _) and hasRead = [false, true]
+ }
+
+private class NodeEx extends TNodeEx {
+ string toString() {
+ result = this.asNode().toString()
+ or
+ exists(Node n | this.isImplicitReadNode(n, _) | result = n.toString() + " [Ext]")
+ }
+
+ Node asNode() { this = TNodeNormal(result) }
+
+ predicate isImplicitReadNode(Node n, boolean hasRead) { this = TNodeImplicitRead(n, hasRead) }
+
+ Node projectToNode() { this = TNodeNormal(result) or this = TNodeImplicitRead(result, _) }
+
+ pragma[nomagic]
+ private DataFlowCallable getEnclosingCallable0() {
+ nodeEnclosingCallable(this.projectToNode(), result)
+ }
+
+ pragma[inline]
+ DataFlowCallable getEnclosingCallable() {
+ pragma[only_bind_out](this).getEnclosingCallable0() = pragma[only_bind_into](result)
+ }
+
+ pragma[nomagic]
+ private DataFlowType getDataFlowType0() { nodeDataFlowType(this.asNode(), result) }
+
+ pragma[inline]
+ DataFlowType getDataFlowType() {
+ pragma[only_bind_out](this).getDataFlowType0() = pragma[only_bind_into](result)
+ }
+
+ predicate hasLocationInfo(
+ string filepath, int startline, int startcolumn, int endline, int endcolumn
+ ) {
+ this.projectToNode().hasLocationInfo(filepath, startline, startcolumn, endline, endcolumn)
+ }
+}
+
+private class ArgNodeEx extends NodeEx {
+ ArgNodeEx() { this.asNode() instanceof ArgNode }
+}
+
+private class ParamNodeEx extends NodeEx {
+ ParamNodeEx() { this.asNode() instanceof ParamNode }
+
+ predicate isParameterOf(DataFlowCallable c, int i) {
+ this.asNode().(ParamNode).isParameterOf(c, i)
+ }
+
+ int getPosition() { this.isParameterOf(_, result) }
+}
+
+private class RetNodeEx extends NodeEx {
+ RetNodeEx() { this.asNode() instanceof ReturnNodeExt }
+
+ ReturnPosition getReturnPosition() { result = getReturnPosition(this.asNode()) }
+
+ ReturnKindExt getKind() { result = this.asNode().(ReturnNodeExt).getKind() }
+}
+
+private predicate inBarrier(NodeEx node, Configuration config) {
+ exists(Node n |
+ node.asNode() = n and
+ config.isBarrierIn(n) and
+ config.isSource(n)
+ )
+}
+
+private predicate outBarrier(NodeEx node, Configuration config) {
+ exists(Node n |
+ node.asNode() = n and
+ config.isBarrierOut(n) and
+ config.isSink(n)
+ )
+}
+
+private predicate fullBarrier(NodeEx node, Configuration config) {
+ exists(Node n | node.asNode() = n |
+ config.isBarrier(n)
+ or
+ config.isBarrierIn(n) and
+ not config.isSource(n)
+ or
+ config.isBarrierOut(n) and
+ not config.isSink(n)
+ or
+ exists(BarrierGuard g |
+ config.isBarrierGuard(g) and
+ n = g.getAGuardedNode()
+ )
+ )
+}
+
+pragma[nomagic]
+private predicate sourceNode(NodeEx node, Configuration config) { config.isSource(node.asNode()) }
+
+pragma[nomagic]
+private predicate sinkNode(NodeEx node, Configuration config) { config.isSink(node.asNode()) }
+
+/**
+ * Holds if data can flow in one local step from `node1` to `node2`.
+ */
+private predicate localFlowStep(NodeEx node1, NodeEx node2, Configuration config) {
+ exists(Node n1, Node n2 |
+ node1.asNode() = n1 and
+ node2.asNode() = n2 and
+ simpleLocalFlowStepExt(n1, n2) and
+ not outBarrier(node1, config) and
+ not inBarrier(node2, config) and
+ not fullBarrier(node1, config) and
+ not fullBarrier(node2, config)
+ )
+ or
+ exists(Node n |
+ config.allowImplicitRead(n, _) and
+ node1.asNode() = n and
+ node2.isImplicitReadNode(n, false)
+ )
+}
+
+/**
+ * Holds if the additional step from `node1` to `node2` does not jump between callables.
+ */
+private predicate additionalLocalFlowStep(NodeEx node1, NodeEx node2, Configuration config) {
+ exists(Node n1, Node n2 |
+ node1.asNode() = n1 and
+ node2.asNode() = n2 and
+ config.isAdditionalFlowStep(n1, n2) and
+ getNodeEnclosingCallable(n1) = getNodeEnclosingCallable(n2) and
+ not outBarrier(node1, config) and
+ not inBarrier(node2, config) and
+ not fullBarrier(node1, config) and
+ not fullBarrier(node2, config)
+ )
+ or
+ exists(Node n |
+ config.allowImplicitRead(n, _) and
+ node1.isImplicitReadNode(n, true) and
+ node2.asNode() = n
+ )
+}
+
+/**
+ * Holds if data can flow from `node1` to `node2` in a way that discards call contexts.
+ */
+private predicate jumpStep(NodeEx node1, NodeEx node2, Configuration config) {
+ exists(Node n1, Node n2 |
+ node1.asNode() = n1 and
+ node2.asNode() = n2 and
+ jumpStepCached(n1, n2) and
+ not outBarrier(node1, config) and
+ not inBarrier(node2, config) and
+ not fullBarrier(node1, config) and
+ not fullBarrier(node2, config)
+ )
+}
+
+/**
+ * Holds if the additional step from `node1` to `node2` jumps between callables.
+ */
+private predicate additionalJumpStep(NodeEx node1, NodeEx node2, Configuration config) {
+ exists(Node n1, Node n2 |
+ node1.asNode() = n1 and
+ node2.asNode() = n2 and
+ config.isAdditionalFlowStep(n1, n2) and
+ getNodeEnclosingCallable(n1) != getNodeEnclosingCallable(n2) and
+ not outBarrier(node1, config) and
+ not inBarrier(node2, config) and
+ not fullBarrier(node1, config) and
+ not fullBarrier(node2, config)
+ )
+}
+
+private predicate read(NodeEx node1, Content c, NodeEx node2, Configuration config) {
+ read(node1.asNode(), c, node2.asNode())
+ or
+ exists(Node n |
+ node2.isImplicitReadNode(n, true) and
+ node1.isImplicitReadNode(n, _) and
+ config.allowImplicitRead(n, c)
+ )
+}
+
+private predicate store(
+ NodeEx node1, TypedContent tc, NodeEx node2, DataFlowType contentType, Configuration config
+) {
+ store(node1.asNode(), tc, node2.asNode(), contentType) and
+ read(_, tc.getContent(), _, config)
+}
+
+pragma[nomagic]
+private predicate viableReturnPosOutEx(DataFlowCall call, ReturnPosition pos, NodeEx out) {
+ viableReturnPosOut(call, pos, out.asNode())
+}
+
+pragma[nomagic]
+private predicate viableParamArgEx(DataFlowCall call, ParamNodeEx p, ArgNodeEx arg) {
+ viableParamArg(call, p.asNode(), arg.asNode())
+}
+
+/**
+ * Holds if field flow should be used for the given configuration.
+ */
+private predicate useFieldFlow(Configuration config) { config.fieldFlowBranchLimit() >= 1 }
+
+private module Stage1 {
+ class ApApprox = Unit;
+
+ class Ap = Unit;
+
+ class ApOption = Unit;
+
+ class Cc = boolean;
+
+ /* Begin: Stage 1 logic. */
+ /**
+ * Holds if `node` is reachable from a source in the configuration `config`.
+ *
+ * The Boolean `cc` records whether the node is reached through an
+ * argument in a call.
+ */
+ predicate fwdFlow(NodeEx node, Cc cc, Configuration config) {
+ not fullBarrier(node, config) and
+ (
+ sourceNode(node, config) and
+ cc = false
+ or
+ exists(NodeEx mid |
+ fwdFlow(mid, cc, config) and
+ localFlowStep(mid, node, config)
+ )
+ or
+ exists(NodeEx mid |
+ fwdFlow(mid, cc, config) and
+ additionalLocalFlowStep(mid, node, config)
+ )
+ or
+ exists(NodeEx mid |
+ fwdFlow(mid, _, config) and
+ jumpStep(mid, node, config) and
+ cc = false
+ )
+ or
+ exists(NodeEx mid |
+ fwdFlow(mid, _, config) and
+ additionalJumpStep(mid, node, config) and
+ cc = false
+ )
+ or
+ // store
+ exists(NodeEx mid |
+ useFieldFlow(config) and
+ fwdFlow(mid, cc, config) and
+ store(mid, _, node, _, config) and
+ not outBarrier(mid, config)
+ )
+ or
+ // read
+ exists(Content c |
+ fwdFlowRead(c, node, cc, config) and
+ fwdFlowConsCand(c, config) and
+ not inBarrier(node, config)
+ )
+ or
+ // flow into a callable
+ exists(NodeEx arg |
+ fwdFlow(arg, _, config) and
+ viableParamArgEx(_, node, arg) and
+ cc = true
+ )
+ or
+ // flow out of a callable
+ exists(DataFlowCall call |
+ fwdFlowOut(call, node, false, config) and
+ cc = false
+ or
+ fwdFlowOutFromArg(call, node, config) and
+ fwdFlowIsEntered(call, cc, config)
+ )
+ )
+ }
+
+ private predicate fwdFlow(NodeEx node, Configuration config) { fwdFlow(node, _, config) }
+
+ pragma[nomagic]
+ private predicate fwdFlowRead(Content c, NodeEx node, Cc cc, Configuration config) {
+ exists(NodeEx mid |
+ fwdFlow(mid, cc, config) and
+ read(mid, c, node, config)
+ )
+ }
+
+ /**
+ * Holds if `c` is the target of a store in the flow covered by `fwdFlow`.
+ */
+ pragma[nomagic]
+ private predicate fwdFlowConsCand(Content c, Configuration config) {
+ exists(NodeEx mid, NodeEx node, TypedContent tc |
+ not fullBarrier(node, config) and
+ useFieldFlow(config) and
+ fwdFlow(mid, _, config) and
+ store(mid, tc, node, _, config) and
+ c = tc.getContent()
+ )
+ }
+
+ pragma[nomagic]
+ private predicate fwdFlowReturnPosition(ReturnPosition pos, Cc cc, Configuration config) {
+ exists(RetNodeEx ret |
+ fwdFlow(ret, cc, config) and
+ ret.getReturnPosition() = pos
+ )
+ }
+
+ pragma[nomagic]
+ private predicate fwdFlowOut(DataFlowCall call, NodeEx out, Cc cc, Configuration config) {
+ exists(ReturnPosition pos |
+ fwdFlowReturnPosition(pos, cc, config) and
+ viableReturnPosOutEx(call, pos, out)
+ )
+ }
+
+ pragma[nomagic]
+ private predicate fwdFlowOutFromArg(DataFlowCall call, NodeEx out, Configuration config) {
+ fwdFlowOut(call, out, true, config)
+ }
+
+ /**
+ * Holds if an argument to `call` is reached in the flow covered by `fwdFlow`.
+ */
+ pragma[nomagic]
+ private predicate fwdFlowIsEntered(DataFlowCall call, Cc cc, Configuration config) {
+ exists(ArgNodeEx arg |
+ fwdFlow(arg, cc, config) and
+ viableParamArgEx(call, _, arg)
+ )
+ }
+
+ /**
+ * Holds if `node` is part of a path from a source to a sink in the
+ * configuration `config`.
+ *
+ * The Boolean `toReturn` records whether the node must be returned from
+ * the enclosing callable in order to reach a sink.
+ */
+ pragma[nomagic]
+ predicate revFlow(NodeEx node, boolean toReturn, Configuration config) {
+ revFlow0(node, toReturn, config) and
+ fwdFlow(node, config)
+ }
+
+ pragma[nomagic]
+ private predicate revFlow0(NodeEx node, boolean toReturn, Configuration config) {
+ fwdFlow(node, config) and
+ sinkNode(node, config) and
+ toReturn = false
+ or
+ exists(NodeEx mid |
+ localFlowStep(node, mid, config) and
+ revFlow(mid, toReturn, config)
+ )
+ or
+ exists(NodeEx mid |
+ additionalLocalFlowStep(node, mid, config) and
+ revFlow(mid, toReturn, config)
+ )
+ or
+ exists(NodeEx mid |
+ jumpStep(node, mid, config) and
+ revFlow(mid, _, config) and
+ toReturn = false
+ )
+ or
+ exists(NodeEx mid |
+ additionalJumpStep(node, mid, config) and
+ revFlow(mid, _, config) and
+ toReturn = false
+ )
+ or
+ // store
+ exists(Content c |
+ revFlowStore(c, node, toReturn, config) and
+ revFlowConsCand(c, config)
+ )
+ or
+ // read
+ exists(NodeEx mid, Content c |
+ read(node, c, mid, config) and
+ fwdFlowConsCand(c, pragma[only_bind_into](config)) and
+ revFlow(mid, toReturn, pragma[only_bind_into](config))
+ )
+ or
+ // flow into a callable
+ exists(DataFlowCall call |
+ revFlowIn(call, node, false, config) and
+ toReturn = false
+ or
+ revFlowInToReturn(call, node, config) and
+ revFlowIsReturned(call, toReturn, config)
+ )
+ or
+ // flow out of a callable
+ exists(ReturnPosition pos |
+ revFlowOut(pos, config) and
+ node.(RetNodeEx).getReturnPosition() = pos and
+ toReturn = true
+ )
+ }
+
+ /**
+ * Holds if `c` is the target of a read in the flow covered by `revFlow`.
+ */
+ pragma[nomagic]
+ private predicate revFlowConsCand(Content c, Configuration config) {
+ exists(NodeEx mid, NodeEx node |
+ fwdFlow(node, pragma[only_bind_into](config)) and
+ read(node, c, mid, config) and
+ fwdFlowConsCand(c, pragma[only_bind_into](config)) and
+ revFlow(pragma[only_bind_into](mid), _, pragma[only_bind_into](config))
+ )
+ }
+
+ pragma[nomagic]
+ private predicate revFlowStore(Content c, NodeEx node, boolean toReturn, Configuration config) {
+ exists(NodeEx mid, TypedContent tc |
+ revFlow(mid, toReturn, pragma[only_bind_into](config)) and
+ fwdFlowConsCand(c, pragma[only_bind_into](config)) and
+ store(node, tc, mid, _, config) and
+ c = tc.getContent()
+ )
+ }
+
+ /**
+ * Holds if `c` is the target of both a read and a store in the flow covered
+ * by `revFlow`.
+ */
+ private predicate revFlowIsReadAndStored(Content c, Configuration conf) {
+ revFlowConsCand(c, conf) and
+ revFlowStore(c, _, _, conf)
+ }
+
+ pragma[nomagic]
+ predicate viableReturnPosOutNodeCandFwd1(
+ DataFlowCall call, ReturnPosition pos, NodeEx out, Configuration config
+ ) {
+ fwdFlowReturnPosition(pos, _, config) and
+ viableReturnPosOutEx(call, pos, out)
+ }
+
+ pragma[nomagic]
+ private predicate revFlowOut(ReturnPosition pos, Configuration config) {
+ exists(DataFlowCall call, NodeEx out |
+ revFlow(out, _, config) and
+ viableReturnPosOutNodeCandFwd1(call, pos, out, config)
+ )
+ }
+
+ pragma[nomagic]
+ predicate viableParamArgNodeCandFwd1(
+ DataFlowCall call, ParamNodeEx p, ArgNodeEx arg, Configuration config
+ ) {
+ viableParamArgEx(call, p, arg) and
+ fwdFlow(arg, config)
+ }
+
+ pragma[nomagic]
+ private predicate revFlowIn(
+ DataFlowCall call, ArgNodeEx arg, boolean toReturn, Configuration config
+ ) {
+ exists(ParamNodeEx p |
+ revFlow(p, toReturn, config) and
+ viableParamArgNodeCandFwd1(call, p, arg, config)
+ )
+ }
+
+ pragma[nomagic]
+ private predicate revFlowInToReturn(DataFlowCall call, ArgNodeEx arg, Configuration config) {
+ revFlowIn(call, arg, true, config)
+ }
+
+ /**
+ * Holds if an output from `call` is reached in the flow covered by `revFlow`
+ * and data might flow through the target callable resulting in reverse flow
+ * reaching an argument of `call`.
+ */
+ pragma[nomagic]
+ private predicate revFlowIsReturned(DataFlowCall call, boolean toReturn, Configuration config) {
+ exists(NodeEx out |
+ revFlow(out, toReturn, config) and
+ fwdFlowOutFromArg(call, out, config)
+ )
+ }
+
+ pragma[nomagic]
+ predicate storeStepCand(
+ NodeEx node1, Ap ap1, TypedContent tc, NodeEx node2, DataFlowType contentType,
+ Configuration config
+ ) {
+ exists(Content c |
+ revFlowIsReadAndStored(c, pragma[only_bind_into](config)) and
+ revFlow(node2, pragma[only_bind_into](config)) and
+ store(node1, tc, node2, contentType, config) and
+ c = tc.getContent() and
+ exists(ap1)
+ )
+ }
+
+ pragma[nomagic]
+ predicate readStepCand(NodeEx n1, Content c, NodeEx n2, Configuration config) {
+ revFlowIsReadAndStored(c, pragma[only_bind_into](config)) and
+ revFlow(n2, pragma[only_bind_into](config)) and
+ read(n1, c, n2, pragma[only_bind_into](config))
+ }
+
+ pragma[nomagic]
+ predicate revFlow(NodeEx node, Configuration config) { revFlow(node, _, config) }
+
+ predicate revFlow(NodeEx node, boolean toReturn, ApOption returnAp, Ap ap, Configuration config) {
+ revFlow(node, toReturn, config) and exists(returnAp) and exists(ap)
+ }
+
+ private predicate throughFlowNodeCand(NodeEx node, Configuration config) {
+ revFlow(node, true, config) and
+ fwdFlow(node, true, config) and
+ not inBarrier(node, config) and
+ not outBarrier(node, config)
+ }
+
+ /** Holds if flow may return from `callable`. */
+ pragma[nomagic]
+ private predicate returnFlowCallableNodeCand(
+ DataFlowCallable callable, ReturnKindExt kind, Configuration config
+ ) {
+ exists(RetNodeEx ret |
+ throughFlowNodeCand(ret, config) and
+ callable = ret.getEnclosingCallable() and
+ kind = ret.getKind()
+ )
+ }
+
+ /**
+ * Holds if flow may enter through `p` and reach a return node making `p` a
+ * candidate for the origin of a summary.
+ */
+ predicate parameterMayFlowThrough(ParamNodeEx p, DataFlowCallable c, Ap ap, Configuration config) {
+ exists(ReturnKindExt kind |
+ throughFlowNodeCand(p, config) and
+ returnFlowCallableNodeCand(c, kind, config) and
+ p.getEnclosingCallable() = c and
+ exists(ap) and
+ // we don't expect a parameter to return stored in itself
+ not kind.(ParamUpdateReturnKind).getPosition() = p.getPosition()
+ )
+ }
+
+ pragma[nomagic]
+ predicate callMayFlowThroughRev(DataFlowCall call, Configuration config) {
+ exists(ArgNodeEx arg, boolean toReturn |
+ revFlow(arg, toReturn, config) and
+ revFlowInToReturn(call, arg, config) and
+ revFlowIsReturned(call, toReturn, config)
+ )
+ }
+
+ predicate stats(boolean fwd, int nodes, int fields, int conscand, int tuples, Configuration config) {
+ fwd = true and
+ nodes = count(NodeEx node | fwdFlow(node, config)) and
+ fields = count(Content f0 | fwdFlowConsCand(f0, config)) and
+ conscand = -1 and
+ tuples = count(NodeEx n, boolean b | fwdFlow(n, b, config))
+ or
+ fwd = false and
+ nodes = count(NodeEx node | revFlow(node, _, config)) and
+ fields = count(Content f0 | revFlowConsCand(f0, config)) and
+ conscand = -1 and
+ tuples = count(NodeEx n, boolean b | revFlow(n, b, config))
+ }
+ /* End: Stage 1 logic. */
+}
+
+pragma[noinline]
+private predicate localFlowStepNodeCand1(NodeEx node1, NodeEx node2, Configuration config) {
+ Stage1::revFlow(node2, config) and
+ localFlowStep(node1, node2, config)
+}
+
+pragma[noinline]
+private predicate additionalLocalFlowStepNodeCand1(NodeEx node1, NodeEx node2, Configuration config) {
+ Stage1::revFlow(node2, config) and
+ additionalLocalFlowStep(node1, node2, config)
+}
+
+pragma[nomagic]
+private predicate viableReturnPosOutNodeCand1(
+ DataFlowCall call, ReturnPosition pos, NodeEx out, Configuration config
+) {
+ Stage1::revFlow(out, config) and
+ Stage1::viableReturnPosOutNodeCandFwd1(call, pos, out, config)
+}
+
+/**
+ * Holds if data can flow out of `call` from `ret` to `out`, either
+ * through a `ReturnNode` or through an argument that has been mutated, and
+ * that this step is part of a path from a source to a sink.
+ */
+pragma[nomagic]
+private predicate flowOutOfCallNodeCand1(
+ DataFlowCall call, RetNodeEx ret, NodeEx out, Configuration config
+) {
+ viableReturnPosOutNodeCand1(call, ret.getReturnPosition(), out, config) and
+ Stage1::revFlow(ret, config) and
+ not outBarrier(ret, config) and
+ not inBarrier(out, config)
+}
+
+pragma[nomagic]
+private predicate viableParamArgNodeCand1(
+ DataFlowCall call, ParamNodeEx p, ArgNodeEx arg, Configuration config
+) {
+ Stage1::viableParamArgNodeCandFwd1(call, p, arg, config) and
+ Stage1::revFlow(arg, config)
+}
+
+/**
+ * Holds if data can flow into `call` and that this step is part of a
+ * path from a source to a sink.
+ */
+pragma[nomagic]
+private predicate flowIntoCallNodeCand1(
+ DataFlowCall call, ArgNodeEx arg, ParamNodeEx p, Configuration config
+) {
+ viableParamArgNodeCand1(call, p, arg, config) and
+ Stage1::revFlow(p, config) and
+ not outBarrier(arg, config) and
+ not inBarrier(p, config)
+}
+
+/**
+ * Gets the amount of forward branching on the origin of a cross-call path
+ * edge in the graph of paths between sources and sinks that ignores call
+ * contexts.
+ */
+private int branch(NodeEx n1, Configuration conf) {
+ result =
+ strictcount(NodeEx n |
+ flowOutOfCallNodeCand1(_, n1, n, conf) or flowIntoCallNodeCand1(_, n1, n, conf)
+ )
+}
+
+/**
+ * Gets the amount of backward branching on the target of a cross-call path
+ * edge in the graph of paths between sources and sinks that ignores call
+ * contexts.
+ */
+private int join(NodeEx n2, Configuration conf) {
+ result =
+ strictcount(NodeEx n |
+ flowOutOfCallNodeCand1(_, n, n2, conf) or flowIntoCallNodeCand1(_, n, n2, conf)
+ )
+}
+
+/**
+ * Holds if data can flow out of `call` from `ret` to `out`, either
+ * through a `ReturnNode` or through an argument that has been mutated, and
+ * that this step is part of a path from a source to a sink. The
+ * `allowsFieldFlow` flag indicates whether the branching is within the limit
+ * specified by the configuration.
+ */
+pragma[nomagic]
+private predicate flowOutOfCallNodeCand1(
+ DataFlowCall call, RetNodeEx ret, NodeEx out, boolean allowsFieldFlow, Configuration config
+) {
+ flowOutOfCallNodeCand1(call, ret, out, config) and
+ exists(int b, int j |
+ b = branch(ret, config) and
+ j = join(out, config) and
+ if b.minimum(j) <= config.fieldFlowBranchLimit()
+ then allowsFieldFlow = true
+ else allowsFieldFlow = false
+ )
+}
+
+/**
+ * Holds if data can flow into `call` and that this step is part of a
+ * path from a source to a sink. The `allowsFieldFlow` flag indicates whether
+ * the branching is within the limit specified by the configuration.
+ */
+pragma[nomagic]
+private predicate flowIntoCallNodeCand1(
+ DataFlowCall call, ArgNodeEx arg, ParamNodeEx p, boolean allowsFieldFlow, Configuration config
+) {
+ flowIntoCallNodeCand1(call, arg, p, config) and
+ exists(int b, int j |
+ b = branch(arg, config) and
+ j = join(p, config) and
+ if b.minimum(j) <= config.fieldFlowBranchLimit()
+ then allowsFieldFlow = true
+ else allowsFieldFlow = false
+ )
+}
+
+private module Stage2 {
+ module PrevStage = Stage1;
+
+ class ApApprox = PrevStage::Ap;
+
+ class Ap = boolean;
+
+ class ApNil extends Ap {
+ ApNil() { this = false }
+ }
+
+ bindingset[result, ap]
+ private ApApprox getApprox(Ap ap) { any() }
+
+ private ApNil getApNil(NodeEx node) { PrevStage::revFlow(node, _) and exists(result) }
+
+ bindingset[tc, tail]
+ private Ap apCons(TypedContent tc, Ap tail) { result = true and exists(tc) and exists(tail) }
+
+ pragma[inline]
+ private Content getHeadContent(Ap ap) { exists(result) and ap = true }
+
+ class ApOption = BooleanOption;
+
+ ApOption apNone() { result = TBooleanNone() }
+
+ ApOption apSome(Ap ap) { result = TBooleanSome(ap) }
+
+ class Cc = CallContext;
+
+ class CcCall = CallContextCall;
+
+ class CcNoCall = CallContextNoCall;
+
+ Cc ccNone() { result instanceof CallContextAny }
+
+ private class LocalCc = Unit;
+
+ bindingset[call, c, outercc]
+ private CcCall getCallContextCall(DataFlowCall call, DataFlowCallable c, Cc outercc) {
+ checkCallContextCall(outercc, call, c) and
+ if recordDataFlowCallSiteDispatch(call, c)
+ then result = TSpecificCall(call)
+ else result = TSomeCall()
+ }
+
+ bindingset[call, c, innercc]
+ private CcNoCall getCallContextReturn(DataFlowCallable c, DataFlowCall call, Cc innercc) {
+ checkCallContextReturn(innercc, c, call) and
+ if reducedViableImplInReturn(c, call) then result = TReturn(c, call) else result = ccNone()
+ }
+
+ bindingset[node, cc, config]
+ private LocalCc getLocalCc(NodeEx node, Cc cc, Configuration config) { any() }
+
+ private predicate localStep(
+ NodeEx node1, NodeEx node2, boolean preservesValue, ApNil ap, Configuration config, LocalCc lcc
+ ) {
+ (
+ preservesValue = true and
+ localFlowStepNodeCand1(node1, node2, config)
+ or
+ preservesValue = false and
+ additionalLocalFlowStepNodeCand1(node1, node2, config)
+ ) and
+ exists(ap) and
+ exists(lcc)
+ }
+
+ private predicate flowOutOfCall = flowOutOfCallNodeCand1/5;
+
+ private predicate flowIntoCall = flowIntoCallNodeCand1/5;
+
+ bindingset[ap, contentType]
+ private predicate typecheckStore(Ap ap, DataFlowType contentType) { any() }
+
+ /* Begin: Stage 2 logic. */
+ private predicate flowCand(NodeEx node, ApApprox apa, Configuration config) {
+ PrevStage::revFlow(node, _, _, apa, config)
+ }
+
+ pragma[nomagic]
+ private predicate flowThroughOutOfCall(
+ DataFlowCall call, RetNodeEx ret, NodeEx out, boolean allowsFieldFlow, Configuration config
+ ) {
+ flowOutOfCall(call, ret, out, allowsFieldFlow, pragma[only_bind_into](config)) and
+ PrevStage::callMayFlowThroughRev(call, pragma[only_bind_into](config)) and
+ PrevStage::parameterMayFlowThrough(_, ret.getEnclosingCallable(), _,
+ pragma[only_bind_into](config))
+ }
+
+ /**
+ * Holds if `node` is reachable with access path `ap` from a source in the
+ * configuration `config`.
+ *
+ * The call context `cc` records whether the node is reached through an
+ * argument in a call, and if so, `argAp` records the access path of that
+ * argument.
+ */
+ pragma[nomagic]
+ predicate fwdFlow(NodeEx node, Cc cc, ApOption argAp, Ap ap, Configuration config) {
+ flowCand(node, _, config) and
+ sourceNode(node, config) and
+ cc = ccNone() and
+ argAp = apNone() and
+ ap = getApNil(node)
+ or
+ exists(NodeEx mid, Ap ap0, LocalCc localCc |
+ fwdFlow(mid, cc, argAp, ap0, config) and
+ localCc = getLocalCc(mid, cc, config)
+ |
+ localStep(mid, node, true, _, config, localCc) and
+ ap = ap0
+ or
+ localStep(mid, node, false, ap, config, localCc) and
+ ap0 instanceof ApNil
+ )
+ or
+ exists(NodeEx mid |
+ fwdFlow(mid, _, _, ap, pragma[only_bind_into](config)) and
+ flowCand(node, _, pragma[only_bind_into](config)) and
+ jumpStep(mid, node, config) and
+ cc = ccNone() and
+ argAp = apNone()
+ )
+ or
+ exists(NodeEx mid, ApNil nil |
+ fwdFlow(mid, _, _, nil, pragma[only_bind_into](config)) and
+ flowCand(node, _, pragma[only_bind_into](config)) and
+ additionalJumpStep(mid, node, config) and
+ cc = ccNone() and
+ argAp = apNone() and
+ ap = getApNil(node)
+ )
+ or
+ // store
+ exists(TypedContent tc, Ap ap0 |
+ fwdFlowStore(_, ap0, tc, node, cc, argAp, config) and
+ ap = apCons(tc, ap0)
+ )
+ or
+ // read
+ exists(Ap ap0, Content c |
+ fwdFlowRead(ap0, c, _, node, cc, argAp, config) and
+ fwdFlowConsCand(ap0, c, ap, config)
+ )
+ or
+ // flow into a callable
+ exists(ApApprox apa |
+ fwdFlowIn(_, node, _, cc, _, ap, config) and
+ apa = getApprox(ap) and
+ if PrevStage::parameterMayFlowThrough(node, _, apa, config)
+ then argAp = apSome(ap)
+ else argAp = apNone()
+ )
+ or
+ // flow out of a callable
+ fwdFlowOutNotFromArg(node, cc, argAp, ap, config)
+ or
+ exists(DataFlowCall call, Ap argAp0 |
+ fwdFlowOutFromArg(call, node, argAp0, ap, config) and
+ fwdFlowIsEntered(call, cc, argAp, argAp0, config)
+ )
+ }
+
+ pragma[nomagic]
+ private predicate fwdFlowStore(
+ NodeEx node1, Ap ap1, TypedContent tc, NodeEx node2, Cc cc, ApOption argAp, Configuration config
+ ) {
+ exists(DataFlowType contentType |
+ fwdFlow(node1, cc, argAp, ap1, config) and
+ PrevStage::storeStepCand(node1, getApprox(ap1), tc, node2, contentType, config) and
+ typecheckStore(ap1, contentType)
+ )
+ }
+
+ /**
+ * Holds if forward flow with access path `tail` reaches a store of `c`
+ * resulting in access path `cons`.
+ */
+ pragma[nomagic]
+ private predicate fwdFlowConsCand(Ap cons, Content c, Ap tail, Configuration config) {
+ exists(TypedContent tc |
+ fwdFlowStore(_, tail, tc, _, _, _, config) and
+ tc.getContent() = c and
+ cons = apCons(tc, tail)
+ )
+ }
+
+ pragma[nomagic]
+ private predicate fwdFlowRead(
+ Ap ap, Content c, NodeEx node1, NodeEx node2, Cc cc, ApOption argAp, Configuration config
+ ) {
+ fwdFlow(node1, cc, argAp, ap, config) and
+ PrevStage::readStepCand(node1, c, node2, config) and
+ getHeadContent(ap) = c
+ }
+
+ pragma[nomagic]
+ private predicate fwdFlowIn(
+ DataFlowCall call, ParamNodeEx p, Cc outercc, Cc innercc, ApOption argAp, Ap ap,
+ Configuration config
+ ) {
+ exists(ArgNodeEx arg, boolean allowsFieldFlow |
+ fwdFlow(arg, outercc, argAp, ap, config) and
+ flowIntoCall(call, arg, p, allowsFieldFlow, config) and
+ innercc = getCallContextCall(call, p.getEnclosingCallable(), outercc)
+ |
+ ap instanceof ApNil or allowsFieldFlow = true
+ )
+ }
+
+ pragma[nomagic]
+ private predicate fwdFlowOutNotFromArg(
+ NodeEx out, Cc ccOut, ApOption argAp, Ap ap, Configuration config
+ ) {
+ exists(
+ DataFlowCall call, RetNodeEx ret, boolean allowsFieldFlow, CcNoCall innercc,
+ DataFlowCallable inner
+ |
+ fwdFlow(ret, innercc, argAp, ap, config) and
+ flowOutOfCall(call, ret, out, allowsFieldFlow, config) and
+ inner = ret.getEnclosingCallable() and
+ ccOut = getCallContextReturn(inner, call, innercc)
+ |
+ ap instanceof ApNil or allowsFieldFlow = true
+ )
+ }
+
+ pragma[nomagic]
+ private predicate fwdFlowOutFromArg(
+ DataFlowCall call, NodeEx out, Ap argAp, Ap ap, Configuration config
+ ) {
+ exists(RetNodeEx ret, boolean allowsFieldFlow, CcCall ccc |
+ fwdFlow(ret, ccc, apSome(argAp), ap, config) and
+ flowThroughOutOfCall(call, ret, out, allowsFieldFlow, config) and
+ ccc.matchesCall(call)
+ |
+ ap instanceof ApNil or allowsFieldFlow = true
+ )
+ }
+
+ /**
+ * Holds if an argument to `call` is reached in the flow covered by `fwdFlow`
+ * and data might flow through the target callable and back out at `call`.
+ */
+ pragma[nomagic]
+ private predicate fwdFlowIsEntered(
+ DataFlowCall call, Cc cc, ApOption argAp, Ap ap, Configuration config
+ ) {
+ exists(ParamNodeEx p |
+ fwdFlowIn(call, p, cc, _, argAp, ap, config) and
+ PrevStage::parameterMayFlowThrough(p, _, getApprox(ap), config)
+ )
+ }
+
+ pragma[nomagic]
+ private predicate storeStepFwd(
+ NodeEx node1, Ap ap1, TypedContent tc, NodeEx node2, Ap ap2, Configuration config
+ ) {
+ fwdFlowStore(node1, ap1, tc, node2, _, _, config) and
+ ap2 = apCons(tc, ap1) and
+ fwdFlowRead(ap2, tc.getContent(), _, _, _, _, config)
+ }
+
+ private predicate readStepFwd(
+ NodeEx n1, Ap ap1, Content c, NodeEx n2, Ap ap2, Configuration config
+ ) {
+ fwdFlowRead(ap1, c, n1, n2, _, _, config) and
+ fwdFlowConsCand(ap1, c, ap2, config)
+ }
+
+ pragma[nomagic]
+ private predicate callMayFlowThroughFwd(DataFlowCall call, Configuration config) {
+ exists(Ap argAp0, NodeEx out, Cc cc, ApOption argAp, Ap ap |
+ fwdFlow(out, pragma[only_bind_into](cc), pragma[only_bind_into](argAp), ap,
+ pragma[only_bind_into](config)) and
+ fwdFlowOutFromArg(call, out, argAp0, ap, config) and
+ fwdFlowIsEntered(pragma[only_bind_into](call), pragma[only_bind_into](cc),
+ pragma[only_bind_into](argAp), pragma[only_bind_into](argAp0),
+ pragma[only_bind_into](config))
+ )
+ }
+
+ pragma[nomagic]
+ private predicate flowThroughIntoCall(
+ DataFlowCall call, ArgNodeEx arg, ParamNodeEx p, boolean allowsFieldFlow, Configuration config
+ ) {
+ flowIntoCall(call, arg, p, allowsFieldFlow, config) and
+ fwdFlow(arg, _, _, _, pragma[only_bind_into](config)) and
+ PrevStage::parameterMayFlowThrough(p, _, _, pragma[only_bind_into](config)) and
+ callMayFlowThroughFwd(call, pragma[only_bind_into](config))
+ }
+
+ /**
+ * Holds if `node` with access path `ap` is part of a path from a source to a
+ * sink in the configuration `config`.
+ *
+ * The Boolean `toReturn` records whether the node must be returned from the
+ * enclosing callable in order to reach a sink, and if so, `returnAp` records
+ * the access path of the returned value.
+ */
+ pragma[nomagic]
+ predicate revFlow(NodeEx node, boolean toReturn, ApOption returnAp, Ap ap, Configuration config) {
+ revFlow0(node, toReturn, returnAp, ap, config) and
+ fwdFlow(node, _, _, ap, config)
+ }
+
+ pragma[nomagic]
+ private predicate revFlow0(
+ NodeEx node, boolean toReturn, ApOption returnAp, Ap ap, Configuration config
+ ) {
+ fwdFlow(node, _, _, ap, config) and
+ sinkNode(node, config) and
+ toReturn = false and
+ returnAp = apNone() and
+ ap instanceof ApNil
+ or
+ exists(NodeEx mid |
+ localStep(node, mid, true, _, config, _) and
+ revFlow(mid, toReturn, returnAp, ap, config)
+ )
+ or
+ exists(NodeEx mid, ApNil nil |
+ fwdFlow(node, _, _, ap, pragma[only_bind_into](config)) and
+ localStep(node, mid, false, _, config, _) and
+ revFlow(mid, toReturn, returnAp, nil, pragma[only_bind_into](config)) and
+ ap instanceof ApNil
+ )
+ or
+ exists(NodeEx mid |
+ jumpStep(node, mid, config) and
+ revFlow(mid, _, _, ap, config) and
+ toReturn = false and
+ returnAp = apNone()
+ )
+ or
+ exists(NodeEx mid, ApNil nil |
+ fwdFlow(node, _, _, ap, pragma[only_bind_into](config)) and
+ additionalJumpStep(node, mid, config) and
+ revFlow(pragma[only_bind_into](mid), _, _, nil, pragma[only_bind_into](config)) and
+ toReturn = false and
+ returnAp = apNone() and
+ ap instanceof ApNil
+ )
+ or
+ // store
+ exists(Ap ap0, Content c |
+ revFlowStore(ap0, c, ap, node, _, _, toReturn, returnAp, config) and
+ revFlowConsCand(ap0, c, ap, config)
+ )
+ or
+ // read
+ exists(NodeEx mid, Ap ap0 |
+ revFlow(mid, toReturn, returnAp, ap0, config) and
+ readStepFwd(node, ap, _, mid, ap0, config)
+ )
+ or
+ // flow into a callable
+ revFlowInNotToReturn(node, returnAp, ap, config) and
+ toReturn = false
+ or
+ exists(DataFlowCall call, Ap returnAp0 |
+ revFlowInToReturn(call, node, returnAp0, ap, config) and
+ revFlowIsReturned(call, toReturn, returnAp, returnAp0, config)
+ )
+ or
+ // flow out of a callable
+ revFlowOut(_, node, _, _, ap, config) and
+ toReturn = true and
+ if fwdFlow(node, any(CcCall ccc), apSome(_), ap, config)
+ then returnAp = apSome(ap)
+ else returnAp = apNone()
+ }
+
+ pragma[nomagic]
+ private predicate revFlowStore(
+ Ap ap0, Content c, Ap ap, NodeEx node, TypedContent tc, NodeEx mid, boolean toReturn,
+ ApOption returnAp, Configuration config
+ ) {
+ revFlow(mid, toReturn, returnAp, ap0, config) and
+ storeStepFwd(node, ap, tc, mid, ap0, config) and
+ tc.getContent() = c
+ }
+
+ /**
+ * Holds if reverse flow with access path `tail` reaches a read of `c`
+ * resulting in access path `cons`.
+ */
+ pragma[nomagic]
+ private predicate revFlowConsCand(Ap cons, Content c, Ap tail, Configuration config) {
+ exists(NodeEx mid, Ap tail0 |
+ revFlow(mid, _, _, tail, config) and
+ tail = pragma[only_bind_into](tail0) and
+ readStepFwd(_, cons, c, mid, tail0, config)
+ )
+ }
+
+ pragma[nomagic]
+ private predicate revFlowOut(
+ DataFlowCall call, RetNodeEx ret, boolean toReturn, ApOption returnAp, Ap ap,
+ Configuration config
+ ) {
+ exists(NodeEx out, boolean allowsFieldFlow |
+ revFlow(out, toReturn, returnAp, ap, config) and
+ flowOutOfCall(call, ret, out, allowsFieldFlow, config)
+ |
+ ap instanceof ApNil or allowsFieldFlow = true
+ )
+ }
+
+ pragma[nomagic]
+ private predicate revFlowInNotToReturn(
+ ArgNodeEx arg, ApOption returnAp, Ap ap, Configuration config
+ ) {
+ exists(ParamNodeEx p, boolean allowsFieldFlow |
+ revFlow(p, false, returnAp, ap, config) and
+ flowIntoCall(_, arg, p, allowsFieldFlow, config)
+ |
+ ap instanceof ApNil or allowsFieldFlow = true
+ )
+ }
+
+ pragma[nomagic]
+ private predicate revFlowInToReturn(
+ DataFlowCall call, ArgNodeEx arg, Ap returnAp, Ap ap, Configuration config
+ ) {
+ exists(ParamNodeEx p, boolean allowsFieldFlow |
+ revFlow(p, true, apSome(returnAp), ap, config) and
+ flowThroughIntoCall(call, arg, p, allowsFieldFlow, config)
+ |
+ ap instanceof ApNil or allowsFieldFlow = true
+ )
+ }
+
+ /**
+ * Holds if an output from `call` is reached in the flow covered by `revFlow`
+ * and data might flow through the target callable resulting in reverse flow
+ * reaching an argument of `call`.
+ */
+ pragma[nomagic]
+ private predicate revFlowIsReturned(
+ DataFlowCall call, boolean toReturn, ApOption returnAp, Ap ap, Configuration config
+ ) {
+ exists(RetNodeEx ret, CcCall ccc |
+ revFlowOut(call, ret, toReturn, returnAp, ap, config) and
+ fwdFlow(ret, ccc, apSome(_), ap, config) and
+ ccc.matchesCall(call)
+ )
+ }
+
+ pragma[nomagic]
+ predicate storeStepCand(
+ NodeEx node1, Ap ap1, TypedContent tc, NodeEx node2, DataFlowType contentType,
+ Configuration config
+ ) {
+ exists(Ap ap2, Content c |
+ store(node1, tc, node2, contentType, config) and
+ revFlowStore(ap2, c, ap1, node1, tc, node2, _, _, config) and
+ revFlowConsCand(ap2, c, ap1, config)
+ )
+ }
+
+ predicate readStepCand(NodeEx node1, Content c, NodeEx node2, Configuration config) {
+ exists(Ap ap1, Ap ap2 |
+ revFlow(node2, _, _, pragma[only_bind_into](ap2), pragma[only_bind_into](config)) and
+ readStepFwd(node1, ap1, c, node2, ap2, config) and
+ revFlowStore(ap1, c, pragma[only_bind_into](ap2), _, _, _, _, _,
+ pragma[only_bind_into](config))
+ )
+ }
+
+ predicate revFlow(NodeEx node, Configuration config) { revFlow(node, _, _, _, config) }
+
+ private predicate fwdConsCand(TypedContent tc, Ap ap, Configuration config) {
+ storeStepFwd(_, ap, tc, _, _, config)
+ }
+
+ predicate consCand(TypedContent tc, Ap ap, Configuration config) {
+ storeStepCand(_, ap, tc, _, _, config)
+ }
+
+ pragma[noinline]
+ private predicate parameterFlow(
+ ParamNodeEx p, Ap ap, Ap ap0, DataFlowCallable c, Configuration config
+ ) {
+ revFlow(p, true, apSome(ap0), ap, config) and
+ c = p.getEnclosingCallable()
+ }
+
+ predicate parameterMayFlowThrough(ParamNodeEx p, DataFlowCallable c, Ap ap, Configuration config) {
+ exists(RetNodeEx ret, Ap ap0, ReturnKindExt kind, int pos |
+ parameterFlow(p, ap, ap0, c, config) and
+ c = ret.getEnclosingCallable() and
+ revFlow(pragma[only_bind_into](ret), true, apSome(_), pragma[only_bind_into](ap0),
+ pragma[only_bind_into](config)) and
+ fwdFlow(ret, any(CcCall ccc), apSome(ap), ap0, config) and
+ kind = ret.getKind() and
+ p.getPosition() = pos and
+ // we don't expect a parameter to return stored in itself
+ not kind.(ParamUpdateReturnKind).getPosition() = pos
+ )
+ }
+
+ pragma[nomagic]
+ predicate callMayFlowThroughRev(DataFlowCall call, Configuration config) {
+ exists(Ap returnAp0, ArgNodeEx arg, boolean toReturn, ApOption returnAp, Ap ap |
+ revFlow(arg, toReturn, returnAp, ap, config) and
+ revFlowInToReturn(call, arg, returnAp0, ap, config) and
+ revFlowIsReturned(call, toReturn, returnAp, returnAp0, config)
+ )
+ }
+
+ predicate stats(boolean fwd, int nodes, int fields, int conscand, int tuples, Configuration config) {
+ fwd = true and
+ nodes = count(NodeEx node | fwdFlow(node, _, _, _, config)) and
+ fields = count(TypedContent f0 | fwdConsCand(f0, _, config)) and
+ conscand = count(TypedContent f0, Ap ap | fwdConsCand(f0, ap, config)) and
+ tuples = count(NodeEx n, Cc cc, ApOption argAp, Ap ap | fwdFlow(n, cc, argAp, ap, config))
+ or
+ fwd = false and
+ nodes = count(NodeEx node | revFlow(node, _, _, _, config)) and
+ fields = count(TypedContent f0 | consCand(f0, _, config)) and
+ conscand = count(TypedContent f0, Ap ap | consCand(f0, ap, config)) and
+ tuples = count(NodeEx n, boolean b, ApOption retAp, Ap ap | revFlow(n, b, retAp, ap, config))
+ }
+ /* End: Stage 2 logic. */
+}
+
+pragma[nomagic]
+private predicate flowOutOfCallNodeCand2(
+ DataFlowCall call, RetNodeEx node1, NodeEx node2, boolean allowsFieldFlow, Configuration config
+) {
+ flowOutOfCallNodeCand1(call, node1, node2, allowsFieldFlow, config) and
+ Stage2::revFlow(node2, pragma[only_bind_into](config)) and
+ Stage2::revFlow(node1, pragma[only_bind_into](config))
+}
+
+pragma[nomagic]
+private predicate flowIntoCallNodeCand2(
+ DataFlowCall call, ArgNodeEx node1, ParamNodeEx node2, boolean allowsFieldFlow,
+ Configuration config
+) {
+ flowIntoCallNodeCand1(call, node1, node2, allowsFieldFlow, config) and
+ Stage2::revFlow(node2, pragma[only_bind_into](config)) and
+ Stage2::revFlow(node1, pragma[only_bind_into](config))
+}
+
+private module LocalFlowBigStep {
+ /**
+ * A node where some checking is required, and hence the big-step relation
+ * is not allowed to step over.
+ */
+ private class FlowCheckNode extends NodeEx {
+ FlowCheckNode() {
+ castNode(this.asNode()) or
+ clearsContentCached(this.asNode(), _)
+ }
+ }
+
+ /**
+ * Holds if `node` can be the first node in a maximal subsequence of local
+ * flow steps in a dataflow path.
+ */
+ predicate localFlowEntry(NodeEx node, Configuration config) {
+ Stage2::revFlow(node, config) and
+ (
+ sourceNode(node, config) or
+ jumpStep(_, node, config) or
+ additionalJumpStep(_, node, config) or
+ node instanceof ParamNodeEx or
+ node.asNode() instanceof OutNodeExt or
+ store(_, _, node, _, config) or
+ read(_, _, node, config) or
+ node instanceof FlowCheckNode
+ )
+ }
+
+ /**
+ * Holds if `node` can be the last node in a maximal subsequence of local
+ * flow steps in a dataflow path.
+ */
+ private predicate localFlowExit(NodeEx node, Configuration config) {
+ exists(NodeEx next | Stage2::revFlow(next, config) |
+ jumpStep(node, next, config) or
+ additionalJumpStep(node, next, config) or
+ flowIntoCallNodeCand1(_, node, next, config) or
+ flowOutOfCallNodeCand1(_, node, next, config) or
+ store(node, _, next, _, config) or
+ read(node, _, next, config)
+ )
+ or
+ node instanceof FlowCheckNode
+ or
+ sinkNode(node, config)
+ }
+
+ pragma[noinline]
+ private predicate additionalLocalFlowStepNodeCand2(
+ NodeEx node1, NodeEx node2, Configuration config
+ ) {
+ additionalLocalFlowStepNodeCand1(node1, node2, config) and
+ Stage2::revFlow(node1, _, _, false, pragma[only_bind_into](config)) and
+ Stage2::revFlow(node2, _, _, false, pragma[only_bind_into](config))
+ }
+
+ /**
+ * Holds if the local path from `node1` to `node2` is a prefix of a maximal
+ * subsequence of local flow steps in a dataflow path.
+ *
+ * This is the transitive closure of `[additional]localFlowStep` beginning
+ * at `localFlowEntry`.
+ */
+ pragma[nomagic]
+ private predicate localFlowStepPlus(
+ NodeEx node1, NodeEx node2, boolean preservesValue, DataFlowType t, Configuration config,
+ LocalCallContext cc
+ ) {
+ not isUnreachableInCallCached(node2.asNode(), cc.(LocalCallContextSpecificCall).getCall()) and
+ (
+ localFlowEntry(node1, pragma[only_bind_into](config)) and
+ (
+ localFlowStepNodeCand1(node1, node2, config) and
+ preservesValue = true and
+ t = node1.getDataFlowType() // irrelevant dummy value
+ or
+ additionalLocalFlowStepNodeCand2(node1, node2, config) and
+ preservesValue = false and
+ t = node2.getDataFlowType()
+ ) and
+ node1 != node2 and
+ cc.relevantFor(node1.getEnclosingCallable()) and
+ not isUnreachableInCallCached(node1.asNode(), cc.(LocalCallContextSpecificCall).getCall()) and
+ Stage2::revFlow(node2, pragma[only_bind_into](config))
+ or
+ exists(NodeEx mid |
+ localFlowStepPlus(node1, mid, preservesValue, t, pragma[only_bind_into](config), cc) and
+ localFlowStepNodeCand1(mid, node2, config) and
+ not mid instanceof FlowCheckNode and
+ Stage2::revFlow(node2, pragma[only_bind_into](config))
+ )
+ or
+ exists(NodeEx mid |
+ localFlowStepPlus(node1, mid, _, _, pragma[only_bind_into](config), cc) and
+ additionalLocalFlowStepNodeCand2(mid, node2, config) and
+ not mid instanceof FlowCheckNode and
+ preservesValue = false and
+ t = node2.getDataFlowType() and
+ Stage2::revFlow(node2, pragma[only_bind_into](config))
+ )
+ )
+ }
+
+ /**
+ * Holds if `node1` can step to `node2` in one or more local steps and this
+ * path can occur as a maximal subsequence of local steps in a dataflow path.
+ */
+ pragma[nomagic]
+ predicate localFlowBigStep(
+ NodeEx node1, NodeEx node2, boolean preservesValue, AccessPathFrontNil apf,
+ Configuration config, LocalCallContext callContext
+ ) {
+ localFlowStepPlus(node1, node2, preservesValue, apf.getType(), config, callContext) and
+ localFlowExit(node2, config)
+ }
+}
+
+private import LocalFlowBigStep
+
+private module Stage3 {
+ module PrevStage = Stage2;
+
+ class ApApprox = PrevStage::Ap;
+
+ class Ap = AccessPathFront;
+
+ class ApNil = AccessPathFrontNil;
+
+ private ApApprox getApprox(Ap ap) { result = ap.toBoolNonEmpty() }
+
+ private ApNil getApNil(NodeEx node) {
+ PrevStage::revFlow(node, _) and result = TFrontNil(node.getDataFlowType())
+ }
+
+ bindingset[tc, tail]
+ private Ap apCons(TypedContent tc, Ap tail) { result.getHead() = tc and exists(tail) }
+
+ pragma[noinline]
+ private Content getHeadContent(Ap ap) { result = ap.getHead().getContent() }
+
+ class ApOption = AccessPathFrontOption;
+
+ ApOption apNone() { result = TAccessPathFrontNone() }
+
+ ApOption apSome(Ap ap) { result = TAccessPathFrontSome(ap) }
+
+ class Cc = boolean;
+
+ class CcCall extends Cc {
+ CcCall() { this = true }
+
+ /** Holds if this call context may be `call`. */
+ predicate matchesCall(DataFlowCall call) { any() }
+ }
+
+ class CcNoCall extends Cc {
+ CcNoCall() { this = false }
+ }
+
+ Cc ccNone() { result = false }
+
+ private class LocalCc = Unit;
+
+ bindingset[call, c, outercc]
+ private CcCall getCallContextCall(DataFlowCall call, DataFlowCallable c, Cc outercc) { any() }
+
+ bindingset[call, c, innercc]
+ private CcNoCall getCallContextReturn(DataFlowCallable c, DataFlowCall call, Cc innercc) { any() }
+
+ bindingset[node, cc, config]
+ private LocalCc getLocalCc(NodeEx node, Cc cc, Configuration config) { any() }
+
+ private predicate localStep(
+ NodeEx node1, NodeEx node2, boolean preservesValue, ApNil ap, Configuration config, LocalCc lcc
+ ) {
+ localFlowBigStep(node1, node2, preservesValue, ap, config, _) and exists(lcc)
+ }
+
+ private predicate flowOutOfCall = flowOutOfCallNodeCand2/5;
+
+ private predicate flowIntoCall = flowIntoCallNodeCand2/5;
+
+ pragma[nomagic]
+ private predicate clear(NodeEx node, Ap ap) { ap.isClearedAt(node.asNode()) }
+
+ pragma[nomagic]
+ private predicate castingNodeEx(NodeEx node) { node.asNode() instanceof CastingNode }
+
+ bindingset[node, ap]
+ private predicate filter(NodeEx node, Ap ap) {
+ not clear(node, ap) and
+ if castingNodeEx(node) then compatibleTypes(node.getDataFlowType(), ap.getType()) else any()
+ }
+
+ bindingset[ap, contentType]
+ private predicate typecheckStore(Ap ap, DataFlowType contentType) {
+ // We need to typecheck stores here, since reverse flow through a getter
+ // might have a different type here compared to inside the getter.
+ compatibleTypes(ap.getType(), contentType)
+ }
+
+ /* Begin: Stage 3 logic. */
+ private predicate flowCand(NodeEx node, ApApprox apa, Configuration config) {
+ PrevStage::revFlow(node, _, _, apa, config)
+ }
+
+ bindingset[result, apa]
+ private ApApprox unbindApa(ApApprox apa) {
+ exists(ApApprox apa0 |
+ apa = pragma[only_bind_into](apa0) and result = pragma[only_bind_into](apa0)
+ )
+ }
+
+ pragma[nomagic]
+ private predicate flowThroughOutOfCall(
+ DataFlowCall call, RetNodeEx ret, NodeEx out, boolean allowsFieldFlow, Configuration config
+ ) {
+ flowOutOfCall(call, ret, out, allowsFieldFlow, pragma[only_bind_into](config)) and
+ PrevStage::callMayFlowThroughRev(call, pragma[only_bind_into](config)) and
+ PrevStage::parameterMayFlowThrough(_, ret.getEnclosingCallable(), _,
+ pragma[only_bind_into](config))
+ }
+
+ /**
+ * Holds if `node` is reachable with access path `ap` from a source in the
+ * configuration `config`.
+ *
+ * The call context `cc` records whether the node is reached through an
+ * argument in a call, and if so, `argAp` records the access path of that
+ * argument.
+ */
+ pragma[nomagic]
+ predicate fwdFlow(NodeEx node, Cc cc, ApOption argAp, Ap ap, Configuration config) {
+ fwdFlow0(node, cc, argAp, ap, config) and
+ flowCand(node, unbindApa(getApprox(ap)), config) and
+ filter(node, ap)
+ }
+
+ pragma[nomagic]
+ private predicate fwdFlow0(NodeEx node, Cc cc, ApOption argAp, Ap ap, Configuration config) {
+ flowCand(node, _, config) and
+ sourceNode(node, config) and
+ cc = ccNone() and
+ argAp = apNone() and
+ ap = getApNil(node)
+ or
+ exists(NodeEx mid, Ap ap0, LocalCc localCc |
+ fwdFlow(mid, cc, argAp, ap0, config) and
+ localCc = getLocalCc(mid, cc, config)
+ |
+ localStep(mid, node, true, _, config, localCc) and
+ ap = ap0
+ or
+ localStep(mid, node, false, ap, config, localCc) and
+ ap0 instanceof ApNil
+ )
+ or
+ exists(NodeEx mid |
+ fwdFlow(mid, _, _, ap, pragma[only_bind_into](config)) and
+ flowCand(node, _, pragma[only_bind_into](config)) and
+ jumpStep(mid, node, config) and
+ cc = ccNone() and
+ argAp = apNone()
+ )
+ or
+ exists(NodeEx mid, ApNil nil |
+ fwdFlow(mid, _, _, nil, pragma[only_bind_into](config)) and
+ flowCand(node, _, pragma[only_bind_into](config)) and
+ additionalJumpStep(mid, node, config) and
+ cc = ccNone() and
+ argAp = apNone() and
+ ap = getApNil(node)
+ )
+ or
+ // store
+ exists(TypedContent tc, Ap ap0 |
+ fwdFlowStore(_, ap0, tc, node, cc, argAp, config) and
+ ap = apCons(tc, ap0)
+ )
+ or
+ // read
+ exists(Ap ap0, Content c |
+ fwdFlowRead(ap0, c, _, node, cc, argAp, config) and
+ fwdFlowConsCand(ap0, c, ap, config)
+ )
+ or
+ // flow into a callable
+ exists(ApApprox apa |
+ fwdFlowIn(_, node, _, cc, _, ap, config) and
+ apa = getApprox(ap) and
+ if PrevStage::parameterMayFlowThrough(node, _, apa, config)
+ then argAp = apSome(ap)
+ else argAp = apNone()
+ )
+ or
+ // flow out of a callable
+ fwdFlowOutNotFromArg(node, cc, argAp, ap, config)
+ or
+ exists(DataFlowCall call, Ap argAp0 |
+ fwdFlowOutFromArg(call, node, argAp0, ap, config) and
+ fwdFlowIsEntered(call, cc, argAp, argAp0, config)
+ )
+ }
+
+ pragma[nomagic]
+ private predicate fwdFlowStore(
+ NodeEx node1, Ap ap1, TypedContent tc, NodeEx node2, Cc cc, ApOption argAp, Configuration config
+ ) {
+ exists(DataFlowType contentType |
+ fwdFlow(node1, cc, argAp, ap1, config) and
+ PrevStage::storeStepCand(node1, unbindApa(getApprox(ap1)), tc, node2, contentType, config) and
+ typecheckStore(ap1, contentType)
+ )
+ }
+
+ /**
+ * Holds if forward flow with access path `tail` reaches a store of `c`
+ * resulting in access path `cons`.
+ */
+ pragma[nomagic]
+ private predicate fwdFlowConsCand(Ap cons, Content c, Ap tail, Configuration config) {
+ exists(TypedContent tc |
+ fwdFlowStore(_, tail, tc, _, _, _, config) and
+ tc.getContent() = c and
+ cons = apCons(tc, tail)
+ )
+ }
+
+ pragma[nomagic]
+ private predicate fwdFlowRead(
+ Ap ap, Content c, NodeEx node1, NodeEx node2, Cc cc, ApOption argAp, Configuration config
+ ) {
+ fwdFlow(node1, cc, argAp, ap, config) and
+ PrevStage::readStepCand(node1, c, node2, config) and
+ getHeadContent(ap) = c
+ }
+
+ pragma[nomagic]
+ private predicate fwdFlowIn(
+ DataFlowCall call, ParamNodeEx p, Cc outercc, Cc innercc, ApOption argAp, Ap ap,
+ Configuration config
+ ) {
+ exists(ArgNodeEx arg, boolean allowsFieldFlow |
+ fwdFlow(arg, outercc, argAp, ap, config) and
+ flowIntoCall(call, arg, p, allowsFieldFlow, config) and
+ innercc = getCallContextCall(call, p.getEnclosingCallable(), outercc)
+ |
+ ap instanceof ApNil or allowsFieldFlow = true
+ )
+ }
+
+ pragma[nomagic]
+ private predicate fwdFlowOutNotFromArg(
+ NodeEx out, Cc ccOut, ApOption argAp, Ap ap, Configuration config
+ ) {
+ exists(
+ DataFlowCall call, RetNodeEx ret, boolean allowsFieldFlow, CcNoCall innercc,
+ DataFlowCallable inner
+ |
+ fwdFlow(ret, innercc, argAp, ap, config) and
+ flowOutOfCall(call, ret, out, allowsFieldFlow, config) and
+ inner = ret.getEnclosingCallable() and
+ ccOut = getCallContextReturn(inner, call, innercc)
+ |
+ ap instanceof ApNil or allowsFieldFlow = true
+ )
+ }
+
+ pragma[nomagic]
+ private predicate fwdFlowOutFromArg(
+ DataFlowCall call, NodeEx out, Ap argAp, Ap ap, Configuration config
+ ) {
+ exists(RetNodeEx ret, boolean allowsFieldFlow, CcCall ccc |
+ fwdFlow(ret, ccc, apSome(argAp), ap, config) and
+ flowThroughOutOfCall(call, ret, out, allowsFieldFlow, config) and
+ ccc.matchesCall(call)
+ |
+ ap instanceof ApNil or allowsFieldFlow = true
+ )
+ }
+
+ /**
+ * Holds if an argument to `call` is reached in the flow covered by `fwdFlow`
+ * and data might flow through the target callable and back out at `call`.
+ */
+ pragma[nomagic]
+ private predicate fwdFlowIsEntered(
+ DataFlowCall call, Cc cc, ApOption argAp, Ap ap, Configuration config
+ ) {
+ exists(ParamNodeEx p |
+ fwdFlowIn(call, p, cc, _, argAp, ap, config) and
+ PrevStage::parameterMayFlowThrough(p, _, unbindApa(getApprox(ap)), config)
+ )
+ }
+
+ pragma[nomagic]
+ private predicate storeStepFwd(
+ NodeEx node1, Ap ap1, TypedContent tc, NodeEx node2, Ap ap2, Configuration config
+ ) {
+ fwdFlowStore(node1, ap1, tc, node2, _, _, config) and
+ ap2 = apCons(tc, ap1) and
+ fwdFlowRead(ap2, tc.getContent(), _, _, _, _, config)
+ }
+
+ private predicate readStepFwd(
+ NodeEx n1, Ap ap1, Content c, NodeEx n2, Ap ap2, Configuration config
+ ) {
+ fwdFlowRead(ap1, c, n1, n2, _, _, config) and
+ fwdFlowConsCand(ap1, c, ap2, config)
+ }
+
+ pragma[nomagic]
+ private predicate callMayFlowThroughFwd(DataFlowCall call, Configuration config) {
+ exists(Ap argAp0, NodeEx out, Cc cc, ApOption argAp, Ap ap |
+ fwdFlow(out, pragma[only_bind_into](cc), pragma[only_bind_into](argAp), ap,
+ pragma[only_bind_into](config)) and
+ fwdFlowOutFromArg(call, out, argAp0, ap, config) and
+ fwdFlowIsEntered(pragma[only_bind_into](call), pragma[only_bind_into](cc),
+ pragma[only_bind_into](argAp), pragma[only_bind_into](argAp0),
+ pragma[only_bind_into](config))
+ )
+ }
+
+ pragma[nomagic]
+ private predicate flowThroughIntoCall(
+ DataFlowCall call, ArgNodeEx arg, ParamNodeEx p, boolean allowsFieldFlow, Configuration config
+ ) {
+ flowIntoCall(call, arg, p, allowsFieldFlow, config) and
+ fwdFlow(arg, _, _, _, pragma[only_bind_into](config)) and
+ PrevStage::parameterMayFlowThrough(p, _, _, pragma[only_bind_into](config)) and
+ callMayFlowThroughFwd(call, pragma[only_bind_into](config))
+ }
+
+ /**
+ * Holds if `node` with access path `ap` is part of a path from a source to a
+ * sink in the configuration `config`.
+ *
+ * The Boolean `toReturn` records whether the node must be returned from the
+ * enclosing callable in order to reach a sink, and if so, `returnAp` records
+ * the access path of the returned value.
+ */
+ pragma[nomagic]
+ predicate revFlow(NodeEx node, boolean toReturn, ApOption returnAp, Ap ap, Configuration config) {
+ revFlow0(node, toReturn, returnAp, ap, config) and
+ fwdFlow(node, _, _, ap, config)
+ }
+
+ pragma[nomagic]
+ private predicate revFlow0(
+ NodeEx node, boolean toReturn, ApOption returnAp, Ap ap, Configuration config
+ ) {
+ fwdFlow(node, _, _, ap, config) and
+ sinkNode(node, config) and
+ toReturn = false and
+ returnAp = apNone() and
+ ap instanceof ApNil
+ or
+ exists(NodeEx mid |
+ localStep(node, mid, true, _, config, _) and
+ revFlow(mid, toReturn, returnAp, ap, config)
+ )
+ or
+ exists(NodeEx mid, ApNil nil |
+ fwdFlow(node, _, _, ap, pragma[only_bind_into](config)) and
+ localStep(node, mid, false, _, config, _) and
+ revFlow(mid, toReturn, returnAp, nil, pragma[only_bind_into](config)) and
+ ap instanceof ApNil
+ )
+ or
+ exists(NodeEx mid |
+ jumpStep(node, mid, config) and
+ revFlow(mid, _, _, ap, config) and
+ toReturn = false and
+ returnAp = apNone()
+ )
+ or
+ exists(NodeEx mid, ApNil nil |
+ fwdFlow(node, _, _, ap, pragma[only_bind_into](config)) and
+ additionalJumpStep(node, mid, config) and
+ revFlow(pragma[only_bind_into](mid), _, _, nil, pragma[only_bind_into](config)) and
+ toReturn = false and
+ returnAp = apNone() and
+ ap instanceof ApNil
+ )
+ or
+ // store
+ exists(Ap ap0, Content c |
+ revFlowStore(ap0, c, ap, node, _, _, toReturn, returnAp, config) and
+ revFlowConsCand(ap0, c, ap, config)
+ )
+ or
+ // read
+ exists(NodeEx mid, Ap ap0 |
+ revFlow(mid, toReturn, returnAp, ap0, config) and
+ readStepFwd(node, ap, _, mid, ap0, config)
+ )
+ or
+ // flow into a callable
+ revFlowInNotToReturn(node, returnAp, ap, config) and
+ toReturn = false
+ or
+ exists(DataFlowCall call, Ap returnAp0 |
+ revFlowInToReturn(call, node, returnAp0, ap, config) and
+ revFlowIsReturned(call, toReturn, returnAp, returnAp0, config)
+ )
+ or
+ // flow out of a callable
+ revFlowOut(_, node, _, _, ap, config) and
+ toReturn = true and
+ if fwdFlow(node, any(CcCall ccc), apSome(_), ap, config)
+ then returnAp = apSome(ap)
+ else returnAp = apNone()
+ }
+
+ pragma[nomagic]
+ private predicate revFlowStore(
+ Ap ap0, Content c, Ap ap, NodeEx node, TypedContent tc, NodeEx mid, boolean toReturn,
+ ApOption returnAp, Configuration config
+ ) {
+ revFlow(mid, toReturn, returnAp, ap0, config) and
+ storeStepFwd(node, ap, tc, mid, ap0, config) and
+ tc.getContent() = c
+ }
+
+ /**
+ * Holds if reverse flow with access path `tail` reaches a read of `c`
+ * resulting in access path `cons`.
+ */
+ pragma[nomagic]
+ private predicate revFlowConsCand(Ap cons, Content c, Ap tail, Configuration config) {
+ exists(NodeEx mid, Ap tail0 |
+ revFlow(mid, _, _, tail, config) and
+ tail = pragma[only_bind_into](tail0) and
+ readStepFwd(_, cons, c, mid, tail0, config)
+ )
+ }
+
+ pragma[nomagic]
+ private predicate revFlowOut(
+ DataFlowCall call, RetNodeEx ret, boolean toReturn, ApOption returnAp, Ap ap,
+ Configuration config
+ ) {
+ exists(NodeEx out, boolean allowsFieldFlow |
+ revFlow(out, toReturn, returnAp, ap, config) and
+ flowOutOfCall(call, ret, out, allowsFieldFlow, config)
+ |
+ ap instanceof ApNil or allowsFieldFlow = true
+ )
+ }
+
+ pragma[nomagic]
+ private predicate revFlowInNotToReturn(
+ ArgNodeEx arg, ApOption returnAp, Ap ap, Configuration config
+ ) {
+ exists(ParamNodeEx p, boolean allowsFieldFlow |
+ revFlow(p, false, returnAp, ap, config) and
+ flowIntoCall(_, arg, p, allowsFieldFlow, config)
+ |
+ ap instanceof ApNil or allowsFieldFlow = true
+ )
+ }
+
+ pragma[nomagic]
+ private predicate revFlowInToReturn(
+ DataFlowCall call, ArgNodeEx arg, Ap returnAp, Ap ap, Configuration config
+ ) {
+ exists(ParamNodeEx p, boolean allowsFieldFlow |
+ revFlow(p, true, apSome(returnAp), ap, config) and
+ flowThroughIntoCall(call, arg, p, allowsFieldFlow, config)
+ |
+ ap instanceof ApNil or allowsFieldFlow = true
+ )
+ }
+
+ /**
+ * Holds if an output from `call` is reached in the flow covered by `revFlow`
+ * and data might flow through the target callable resulting in reverse flow
+ * reaching an argument of `call`.
+ */
+ pragma[nomagic]
+ private predicate revFlowIsReturned(
+ DataFlowCall call, boolean toReturn, ApOption returnAp, Ap ap, Configuration config
+ ) {
+ exists(RetNodeEx ret, CcCall ccc |
+ revFlowOut(call, ret, toReturn, returnAp, ap, config) and
+ fwdFlow(ret, ccc, apSome(_), ap, config) and
+ ccc.matchesCall(call)
+ )
+ }
+
+ pragma[nomagic]
+ predicate storeStepCand(
+ NodeEx node1, Ap ap1, TypedContent tc, NodeEx node2, DataFlowType contentType,
+ Configuration config
+ ) {
+ exists(Ap ap2, Content c |
+ store(node1, tc, node2, contentType, config) and
+ revFlowStore(ap2, c, ap1, node1, tc, node2, _, _, config) and
+ revFlowConsCand(ap2, c, ap1, config)
+ )
+ }
+
+ predicate readStepCand(NodeEx node1, Content c, NodeEx node2, Configuration config) {
+ exists(Ap ap1, Ap ap2 |
+ revFlow(node2, _, _, pragma[only_bind_into](ap2), pragma[only_bind_into](config)) and
+ readStepFwd(node1, ap1, c, node2, ap2, config) and
+ revFlowStore(ap1, c, pragma[only_bind_into](ap2), _, _, _, _, _,
+ pragma[only_bind_into](config))
+ )
+ }
+
+ predicate revFlow(NodeEx node, Configuration config) { revFlow(node, _, _, _, config) }
+
+ private predicate fwdConsCand(TypedContent tc, Ap ap, Configuration config) {
+ storeStepFwd(_, ap, tc, _, _, config)
+ }
+
+ predicate consCand(TypedContent tc, Ap ap, Configuration config) {
+ storeStepCand(_, ap, tc, _, _, config)
+ }
+
+ pragma[noinline]
+ private predicate parameterFlow(
+ ParamNodeEx p, Ap ap, Ap ap0, DataFlowCallable c, Configuration config
+ ) {
+ revFlow(p, true, apSome(ap0), ap, config) and
+ c = p.getEnclosingCallable()
+ }
+
+ predicate parameterMayFlowThrough(ParamNodeEx p, DataFlowCallable c, Ap ap, Configuration config) {
+ exists(RetNodeEx ret, Ap ap0, ReturnKindExt kind, int pos |
+ parameterFlow(p, ap, ap0, c, config) and
+ c = ret.getEnclosingCallable() and
+ revFlow(pragma[only_bind_into](ret), true, apSome(_), pragma[only_bind_into](ap0),
+ pragma[only_bind_into](config)) and
+ fwdFlow(ret, any(CcCall ccc), apSome(ap), ap0, config) and
+ kind = ret.getKind() and
+ p.getPosition() = pos and
+ // we don't expect a parameter to return stored in itself
+ not kind.(ParamUpdateReturnKind).getPosition() = pos
+ )
+ }
+
+ pragma[nomagic]
+ predicate callMayFlowThroughRev(DataFlowCall call, Configuration config) {
+ exists(Ap returnAp0, ArgNodeEx arg, boolean toReturn, ApOption returnAp, Ap ap |
+ revFlow(arg, toReturn, returnAp, ap, config) and
+ revFlowInToReturn(call, arg, returnAp0, ap, config) and
+ revFlowIsReturned(call, toReturn, returnAp, returnAp0, config)
+ )
+ }
+
+ predicate stats(boolean fwd, int nodes, int fields, int conscand, int tuples, Configuration config) {
+ fwd = true and
+ nodes = count(NodeEx node | fwdFlow(node, _, _, _, config)) and
+ fields = count(TypedContent f0 | fwdConsCand(f0, _, config)) and
+ conscand = count(TypedContent f0, Ap ap | fwdConsCand(f0, ap, config)) and
+ tuples = count(NodeEx n, Cc cc, ApOption argAp, Ap ap | fwdFlow(n, cc, argAp, ap, config))
+ or
+ fwd = false and
+ nodes = count(NodeEx node | revFlow(node, _, _, _, config)) and
+ fields = count(TypedContent f0 | consCand(f0, _, config)) and
+ conscand = count(TypedContent f0, Ap ap | consCand(f0, ap, config)) and
+ tuples = count(NodeEx n, boolean b, ApOption retAp, Ap ap | revFlow(n, b, retAp, ap, config))
+ }
+ /* End: Stage 3 logic. */
+}
+
+/**
+ * Holds if `argApf` is recorded as the summary context for flow reaching `node`
+ * and remains relevant for the following pruning stage.
+ */
+private predicate flowCandSummaryCtx(NodeEx node, AccessPathFront argApf, Configuration config) {
+ exists(AccessPathFront apf |
+ Stage3::revFlow(node, true, _, apf, config) and
+ Stage3::fwdFlow(node, any(Stage3::CcCall ccc), TAccessPathFrontSome(argApf), apf, config)
+ )
+}
+
+/**
+ * Holds if a length 2 access path approximation with the head `tc` is expected
+ * to be expensive.
+ */
+private predicate expensiveLen2unfolding(TypedContent tc, Configuration config) {
+ exists(int tails, int nodes, int apLimit, int tupleLimit |
+ tails = strictcount(AccessPathFront apf | Stage3::consCand(tc, apf, config)) and
+ nodes =
+ strictcount(NodeEx n |
+ Stage3::revFlow(n, _, _, any(AccessPathFrontHead apf | apf.getHead() = tc), config)
+ or
+ flowCandSummaryCtx(n, any(AccessPathFrontHead apf | apf.getHead() = tc), config)
+ ) and
+ accessPathApproxCostLimits(apLimit, tupleLimit) and
+ apLimit < tails and
+ tupleLimit < (tails - 1) * nodes and
+ not tc.forceHighPrecision()
+ )
+}
+
+private newtype TAccessPathApprox =
+ TNil(DataFlowType t) or
+ TConsNil(TypedContent tc, DataFlowType t) {
+ Stage3::consCand(tc, TFrontNil(t), _) and
+ not expensiveLen2unfolding(tc, _)
+ } or
+ TConsCons(TypedContent tc1, TypedContent tc2, int len) {
+ Stage3::consCand(tc1, TFrontHead(tc2), _) and
+ len in [2 .. accessPathLimit()] and
+ not expensiveLen2unfolding(tc1, _)
+ } or
+ TCons1(TypedContent tc, int len) {
+ len in [1 .. accessPathLimit()] and
+ expensiveLen2unfolding(tc, _)
+ }
+
+/**
+ * Conceptually a list of `TypedContent`s followed by a `DataFlowType`, but only
+ * the first two elements of the list and its length are tracked. If data flows
+ * from a source to a given node with a given `AccessPathApprox`, this indicates
+ * the sequence of dereference operations needed to get from the value in the node
+ * to the tracked object. The final type indicates the type of the tracked object.
+ */
+abstract private class AccessPathApprox extends TAccessPathApprox {
+ abstract string toString();
+
+ abstract TypedContent getHead();
+
+ abstract int len();
+
+ abstract DataFlowType getType();
+
+ abstract AccessPathFront getFront();
+
+ /** Gets the access path obtained by popping `head` from this path, if any. */
+ abstract AccessPathApprox pop(TypedContent head);
+}
+
+private class AccessPathApproxNil extends AccessPathApprox, TNil {
+ private DataFlowType t;
+
+ AccessPathApproxNil() { this = TNil(t) }
+
+ override string toString() { result = concat(": " + ppReprType(t)) }
+
+ override TypedContent getHead() { none() }
+
+ override int len() { result = 0 }
+
+ override DataFlowType getType() { result = t }
+
+ override AccessPathFront getFront() { result = TFrontNil(t) }
+
+ override AccessPathApprox pop(TypedContent head) { none() }
+}
+
+abstract private class AccessPathApproxCons extends AccessPathApprox { }
+
+private class AccessPathApproxConsNil extends AccessPathApproxCons, TConsNil {
+ private TypedContent tc;
+ private DataFlowType t;
+
+ AccessPathApproxConsNil() { this = TConsNil(tc, t) }
+
+ override string toString() {
+ // The `concat` becomes "" if `ppReprType` has no result.
+ result = "[" + tc.toString() + "]" + concat(" : " + ppReprType(t))
+ }
+
+ override TypedContent getHead() { result = tc }
+
+ override int len() { result = 1 }
+
+ override DataFlowType getType() { result = tc.getContainerType() }
+
+ override AccessPathFront getFront() { result = TFrontHead(tc) }
+
+ override AccessPathApprox pop(TypedContent head) { head = tc and result = TNil(t) }
+}
+
+private class AccessPathApproxConsCons extends AccessPathApproxCons, TConsCons {
+ private TypedContent tc1;
+ private TypedContent tc2;
+ private int len;
+
+ AccessPathApproxConsCons() { this = TConsCons(tc1, tc2, len) }
+
+ override string toString() {
+ if len = 2
+ then result = "[" + tc1.toString() + ", " + tc2.toString() + "]"
+ else result = "[" + tc1.toString() + ", " + tc2.toString() + ", ... (" + len.toString() + ")]"
+ }
+
+ override TypedContent getHead() { result = tc1 }
+
+ override int len() { result = len }
+
+ override DataFlowType getType() { result = tc1.getContainerType() }
+
+ override AccessPathFront getFront() { result = TFrontHead(tc1) }
+
+ override AccessPathApprox pop(TypedContent head) {
+ head = tc1 and
+ (
+ result = TConsCons(tc2, _, len - 1)
+ or
+ len = 2 and
+ result = TConsNil(tc2, _)
+ or
+ result = TCons1(tc2, len - 1)
+ )
+ }
+}
+
+private class AccessPathApproxCons1 extends AccessPathApproxCons, TCons1 {
+ private TypedContent tc;
+ private int len;
+
+ AccessPathApproxCons1() { this = TCons1(tc, len) }
+
+ override string toString() {
+ if len = 1
+ then result = "[" + tc.toString() + "]"
+ else result = "[" + tc.toString() + ", ... (" + len.toString() + ")]"
+ }
+
+ override TypedContent getHead() { result = tc }
+
+ override int len() { result = len }
+
+ override DataFlowType getType() { result = tc.getContainerType() }
+
+ override AccessPathFront getFront() { result = TFrontHead(tc) }
+
+ override AccessPathApprox pop(TypedContent head) {
+ head = tc and
+ (
+ exists(TypedContent tc2 | Stage3::consCand(tc, TFrontHead(tc2), _) |
+ result = TConsCons(tc2, _, len - 1)
+ or
+ len = 2 and
+ result = TConsNil(tc2, _)
+ or
+ result = TCons1(tc2, len - 1)
+ )
+ or
+ exists(DataFlowType t |
+ len = 1 and
+ Stage3::consCand(tc, TFrontNil(t), _) and
+ result = TNil(t)
+ )
+ )
+ }
+}
+
+/** Gets the access path obtained by popping `tc` from `ap`, if any. */
+private AccessPathApprox pop(TypedContent tc, AccessPathApprox apa) { result = apa.pop(tc) }
+
+/** Gets the access path obtained by pushing `tc` onto `ap`. */
+private AccessPathApprox push(TypedContent tc, AccessPathApprox apa) { apa = pop(tc, result) }
+
+private newtype TAccessPathApproxOption =
+ TAccessPathApproxNone() or
+ TAccessPathApproxSome(AccessPathApprox apa)
+
+private class AccessPathApproxOption extends TAccessPathApproxOption {
+ string toString() {
+ this = TAccessPathApproxNone() and result = ""
+ or
+ this = TAccessPathApproxSome(any(AccessPathApprox apa | result = apa.toString()))
+ }
+}
+
+private module Stage4 {
+ module PrevStage = Stage3;
+
+ class ApApprox = PrevStage::Ap;
+
+ class Ap = AccessPathApprox;
+
+ class ApNil = AccessPathApproxNil;
+
+ private ApApprox getApprox(Ap ap) { result = ap.getFront() }
+
+ private ApNil getApNil(NodeEx node) {
+ PrevStage::revFlow(node, _) and result = TNil(node.getDataFlowType())
+ }
+
+ bindingset[tc, tail]
+ private Ap apCons(TypedContent tc, Ap tail) { result = push(tc, tail) }
+
+ pragma[noinline]
+ private Content getHeadContent(Ap ap) { result = ap.getHead().getContent() }
+
+ class ApOption = AccessPathApproxOption;
+
+ ApOption apNone() { result = TAccessPathApproxNone() }
+
+ ApOption apSome(Ap ap) { result = TAccessPathApproxSome(ap) }
+
+ class Cc = CallContext;
+
+ class CcCall = CallContextCall;
+
+ class CcNoCall = CallContextNoCall;
+
+ Cc ccNone() { result instanceof CallContextAny }
+
+ private class LocalCc = LocalCallContext;
+
+ bindingset[call, c, outercc]
+ private CcCall getCallContextCall(DataFlowCall call, DataFlowCallable c, Cc outercc) {
+ checkCallContextCall(outercc, call, c) and
+ if recordDataFlowCallSite(call, c) then result = TSpecificCall(call) else result = TSomeCall()
+ }
+
+ bindingset[call, c, innercc]
+ private CcNoCall getCallContextReturn(DataFlowCallable c, DataFlowCall call, Cc innercc) {
+ checkCallContextReturn(innercc, c, call) and
+ if reducedViableImplInReturn(c, call) then result = TReturn(c, call) else result = ccNone()
+ }
+
+ bindingset[node, cc, config]
+ private LocalCc getLocalCc(NodeEx node, Cc cc, Configuration config) {
+ localFlowEntry(node, config) and
+ result =
+ getLocalCallContext(pragma[only_bind_into](pragma[only_bind_out](cc)),
+ node.getEnclosingCallable())
+ }
+
+ private predicate localStep(
+ NodeEx node1, NodeEx node2, boolean preservesValue, ApNil ap, Configuration config, LocalCc lcc
+ ) {
+ localFlowBigStep(node1, node2, preservesValue, ap.getFront(), config, lcc)
+ }
+
+ pragma[nomagic]
+ private predicate flowOutOfCall(
+ DataFlowCall call, RetNodeEx node1, NodeEx node2, boolean allowsFieldFlow, Configuration config
+ ) {
+ flowOutOfCallNodeCand2(call, node1, node2, allowsFieldFlow, config) and
+ PrevStage::revFlow(node2, _, _, _, pragma[only_bind_into](config)) and
+ PrevStage::revFlow(node1, _, _, _, pragma[only_bind_into](config))
+ }
+
+ pragma[nomagic]
+ private predicate flowIntoCall(
+ DataFlowCall call, ArgNodeEx node1, ParamNodeEx node2, boolean allowsFieldFlow,
+ Configuration config
+ ) {
+ flowIntoCallNodeCand2(call, node1, node2, allowsFieldFlow, config) and
+ PrevStage::revFlow(node2, _, _, _, pragma[only_bind_into](config)) and
+ PrevStage::revFlow(node1, _, _, _, pragma[only_bind_into](config))
+ }
+
+ bindingset[node, ap]
+ private predicate filter(NodeEx node, Ap ap) { any() }
+
+ // Type checking is not necessary here as it has already been done in stage 3.
+ bindingset[ap, contentType]
+ private predicate typecheckStore(Ap ap, DataFlowType contentType) { any() }
+
+ /* Begin: Stage 4 logic. */
+ private predicate flowCand(NodeEx node, ApApprox apa, Configuration config) {
+ PrevStage::revFlow(node, _, _, apa, config)
+ }
+
+ bindingset[result, apa]
+ private ApApprox unbindApa(ApApprox apa) {
+ exists(ApApprox apa0 |
+ apa = pragma[only_bind_into](apa0) and result = pragma[only_bind_into](apa0)
+ )
+ }
+
+ pragma[nomagic]
+ private predicate flowThroughOutOfCall(
+ DataFlowCall call, RetNodeEx ret, NodeEx out, boolean allowsFieldFlow, Configuration config
+ ) {
+ flowOutOfCall(call, ret, out, allowsFieldFlow, pragma[only_bind_into](config)) and
+ PrevStage::callMayFlowThroughRev(call, pragma[only_bind_into](config)) and
+ PrevStage::parameterMayFlowThrough(_, ret.getEnclosingCallable(), _,
+ pragma[only_bind_into](config))
+ }
+
+ /**
+ * Holds if `node` is reachable with access path `ap` from a source in the
+ * configuration `config`.
+ *
+ * The call context `cc` records whether the node is reached through an
+ * argument in a call, and if so, `argAp` records the access path of that
+ * argument.
+ */
+ pragma[nomagic]
+ predicate fwdFlow(NodeEx node, Cc cc, ApOption argAp, Ap ap, Configuration config) {
+ fwdFlow0(node, cc, argAp, ap, config) and
+ flowCand(node, unbindApa(getApprox(ap)), config) and
+ filter(node, ap)
+ }
+
+ pragma[nomagic]
+ private predicate fwdFlow0(NodeEx node, Cc cc, ApOption argAp, Ap ap, Configuration config) {
+ flowCand(node, _, config) and
+ sourceNode(node, config) and
+ cc = ccNone() and
+ argAp = apNone() and
+ ap = getApNil(node)
+ or
+ exists(NodeEx mid, Ap ap0, LocalCc localCc |
+ fwdFlow(mid, cc, argAp, ap0, config) and
+ localCc = getLocalCc(mid, cc, config)
+ |
+ localStep(mid, node, true, _, config, localCc) and
+ ap = ap0
+ or
+ localStep(mid, node, false, ap, config, localCc) and
+ ap0 instanceof ApNil
+ )
+ or
+ exists(NodeEx mid |
+ fwdFlow(mid, _, _, ap, pragma[only_bind_into](config)) and
+ flowCand(node, _, pragma[only_bind_into](config)) and
+ jumpStep(mid, node, config) and
+ cc = ccNone() and
+ argAp = apNone()
+ )
+ or
+ exists(NodeEx mid, ApNil nil |
+ fwdFlow(mid, _, _, nil, pragma[only_bind_into](config)) and
+ flowCand(node, _, pragma[only_bind_into](config)) and
+ additionalJumpStep(mid, node, config) and
+ cc = ccNone() and
+ argAp = apNone() and
+ ap = getApNil(node)
+ )
+ or
+ // store
+ exists(TypedContent tc, Ap ap0 |
+ fwdFlowStore(_, ap0, tc, node, cc, argAp, config) and
+ ap = apCons(tc, ap0)
+ )
+ or
+ // read
+ exists(Ap ap0, Content c |
+ fwdFlowRead(ap0, c, _, node, cc, argAp, config) and
+ fwdFlowConsCand(ap0, c, ap, config)
+ )
+ or
+ // flow into a callable
+ exists(ApApprox apa |
+ fwdFlowIn(_, node, _, cc, _, ap, config) and
+ apa = getApprox(ap) and
+ if PrevStage::parameterMayFlowThrough(node, _, apa, config)
+ then argAp = apSome(ap)
+ else argAp = apNone()
+ )
+ or
+ // flow out of a callable
+ fwdFlowOutNotFromArg(node, cc, argAp, ap, config)
+ or
+ exists(DataFlowCall call, Ap argAp0 |
+ fwdFlowOutFromArg(call, node, argAp0, ap, config) and
+ fwdFlowIsEntered(call, cc, argAp, argAp0, config)
+ )
+ }
+
+ pragma[nomagic]
+ private predicate fwdFlowStore(
+ NodeEx node1, Ap ap1, TypedContent tc, NodeEx node2, Cc cc, ApOption argAp, Configuration config
+ ) {
+ exists(DataFlowType contentType |
+ fwdFlow(node1, cc, argAp, ap1, config) and
+ PrevStage::storeStepCand(node1, unbindApa(getApprox(ap1)), tc, node2, contentType, config) and
+ typecheckStore(ap1, contentType)
+ )
+ }
+
+ /**
+ * Holds if forward flow with access path `tail` reaches a store of `c`
+ * resulting in access path `cons`.
+ */
+ pragma[nomagic]
+ private predicate fwdFlowConsCand(Ap cons, Content c, Ap tail, Configuration config) {
+ exists(TypedContent tc |
+ fwdFlowStore(_, tail, tc, _, _, _, config) and
+ tc.getContent() = c and
+ cons = apCons(tc, tail)
+ )
+ }
+
+ pragma[nomagic]
+ private predicate fwdFlowRead(
+ Ap ap, Content c, NodeEx node1, NodeEx node2, Cc cc, ApOption argAp, Configuration config
+ ) {
+ fwdFlow(node1, cc, argAp, ap, config) and
+ PrevStage::readStepCand(node1, c, node2, config) and
+ getHeadContent(ap) = c
+ }
+
+ pragma[nomagic]
+ private predicate fwdFlowIn(
+ DataFlowCall call, ParamNodeEx p, Cc outercc, Cc innercc, ApOption argAp, Ap ap,
+ Configuration config
+ ) {
+ exists(ArgNodeEx arg, boolean allowsFieldFlow |
+ fwdFlow(arg, outercc, argAp, ap, config) and
+ flowIntoCall(call, arg, p, allowsFieldFlow, config) and
+ innercc = getCallContextCall(call, p.getEnclosingCallable(), outercc)
+ |
+ ap instanceof ApNil or allowsFieldFlow = true
+ )
+ }
+
+ pragma[nomagic]
+ private predicate fwdFlowOutNotFromArg(
+ NodeEx out, Cc ccOut, ApOption argAp, Ap ap, Configuration config
+ ) {
+ exists(
+ DataFlowCall call, RetNodeEx ret, boolean allowsFieldFlow, CcNoCall innercc,
+ DataFlowCallable inner
+ |
+ fwdFlow(ret, innercc, argAp, ap, config) and
+ flowOutOfCall(call, ret, out, allowsFieldFlow, config) and
+ inner = ret.getEnclosingCallable() and
+ ccOut = getCallContextReturn(inner, call, innercc)
+ |
+ ap instanceof ApNil or allowsFieldFlow = true
+ )
+ }
+
+ pragma[nomagic]
+ private predicate fwdFlowOutFromArg(
+ DataFlowCall call, NodeEx out, Ap argAp, Ap ap, Configuration config
+ ) {
+ exists(RetNodeEx ret, boolean allowsFieldFlow, CcCall ccc |
+ fwdFlow(ret, ccc, apSome(argAp), ap, config) and
+ flowThroughOutOfCall(call, ret, out, allowsFieldFlow, config) and
+ ccc.matchesCall(call)
+ |
+ ap instanceof ApNil or allowsFieldFlow = true
+ )
+ }
+
+ /**
+ * Holds if an argument to `call` is reached in the flow covered by `fwdFlow`
+ * and data might flow through the target callable and back out at `call`.
+ */
+ pragma[nomagic]
+ private predicate fwdFlowIsEntered(
+ DataFlowCall call, Cc cc, ApOption argAp, Ap ap, Configuration config
+ ) {
+ exists(ParamNodeEx p |
+ fwdFlowIn(call, p, cc, _, argAp, ap, config) and
+ PrevStage::parameterMayFlowThrough(p, _, unbindApa(getApprox(ap)), config)
+ )
+ }
+
+ pragma[nomagic]
+ private predicate storeStepFwd(
+ NodeEx node1, Ap ap1, TypedContent tc, NodeEx node2, Ap ap2, Configuration config
+ ) {
+ fwdFlowStore(node1, ap1, tc, node2, _, _, config) and
+ ap2 = apCons(tc, ap1) and
+ fwdFlowRead(ap2, tc.getContent(), _, _, _, _, config)
+ }
+
+ private predicate readStepFwd(
+ NodeEx n1, Ap ap1, Content c, NodeEx n2, Ap ap2, Configuration config
+ ) {
+ fwdFlowRead(ap1, c, n1, n2, _, _, config) and
+ fwdFlowConsCand(ap1, c, ap2, config)
+ }
+
+ pragma[nomagic]
+ private predicate callMayFlowThroughFwd(DataFlowCall call, Configuration config) {
+ exists(Ap argAp0, NodeEx out, Cc cc, ApOption argAp, Ap ap |
+ fwdFlow(out, pragma[only_bind_into](cc), pragma[only_bind_into](argAp), ap,
+ pragma[only_bind_into](config)) and
+ fwdFlowOutFromArg(call, out, argAp0, ap, config) and
+ fwdFlowIsEntered(pragma[only_bind_into](call), pragma[only_bind_into](cc),
+ pragma[only_bind_into](argAp), pragma[only_bind_into](argAp0),
+ pragma[only_bind_into](config))
+ )
+ }
+
+ pragma[nomagic]
+ private predicate flowThroughIntoCall(
+ DataFlowCall call, ArgNodeEx arg, ParamNodeEx p, boolean allowsFieldFlow, Configuration config
+ ) {
+ flowIntoCall(call, arg, p, allowsFieldFlow, config) and
+ fwdFlow(arg, _, _, _, pragma[only_bind_into](config)) and
+ PrevStage::parameterMayFlowThrough(p, _, _, pragma[only_bind_into](config)) and
+ callMayFlowThroughFwd(call, pragma[only_bind_into](config))
+ }
+
+ /**
+ * Holds if `node` with access path `ap` is part of a path from a source to a
+ * sink in the configuration `config`.
+ *
+ * The Boolean `toReturn` records whether the node must be returned from the
+ * enclosing callable in order to reach a sink, and if so, `returnAp` records
+ * the access path of the returned value.
+ */
+ pragma[nomagic]
+ predicate revFlow(NodeEx node, boolean toReturn, ApOption returnAp, Ap ap, Configuration config) {
+ revFlow0(node, toReturn, returnAp, ap, config) and
+ fwdFlow(node, _, _, ap, config)
+ }
+
+ pragma[nomagic]
+ private predicate revFlow0(
+ NodeEx node, boolean toReturn, ApOption returnAp, Ap ap, Configuration config
+ ) {
+ fwdFlow(node, _, _, ap, config) and
+ sinkNode(node, config) and
+ toReturn = false and
+ returnAp = apNone() and
+ ap instanceof ApNil
+ or
+ exists(NodeEx mid |
+ localStep(node, mid, true, _, config, _) and
+ revFlow(mid, toReturn, returnAp, ap, config)
+ )
+ or
+ exists(NodeEx mid, ApNil nil |
+ fwdFlow(node, _, _, ap, pragma[only_bind_into](config)) and
+ localStep(node, mid, false, _, config, _) and
+ revFlow(mid, toReturn, returnAp, nil, pragma[only_bind_into](config)) and
+ ap instanceof ApNil
+ )
+ or
+ exists(NodeEx mid |
+ jumpStep(node, mid, config) and
+ revFlow(mid, _, _, ap, config) and
+ toReturn = false and
+ returnAp = apNone()
+ )
+ or
+ exists(NodeEx mid, ApNil nil |
+ fwdFlow(node, _, _, ap, pragma[only_bind_into](config)) and
+ additionalJumpStep(node, mid, config) and
+ revFlow(pragma[only_bind_into](mid), _, _, nil, pragma[only_bind_into](config)) and
+ toReturn = false and
+ returnAp = apNone() and
+ ap instanceof ApNil
+ )
+ or
+ // store
+ exists(Ap ap0, Content c |
+ revFlowStore(ap0, c, ap, node, _, _, toReturn, returnAp, config) and
+ revFlowConsCand(ap0, c, ap, config)
+ )
+ or
+ // read
+ exists(NodeEx mid, Ap ap0 |
+ revFlow(mid, toReturn, returnAp, ap0, config) and
+ readStepFwd(node, ap, _, mid, ap0, config)
+ )
+ or
+ // flow into a callable
+ revFlowInNotToReturn(node, returnAp, ap, config) and
+ toReturn = false
+ or
+ exists(DataFlowCall call, Ap returnAp0 |
+ revFlowInToReturn(call, node, returnAp0, ap, config) and
+ revFlowIsReturned(call, toReturn, returnAp, returnAp0, config)
+ )
+ or
+ // flow out of a callable
+ revFlowOut(_, node, _, _, ap, config) and
+ toReturn = true and
+ if fwdFlow(node, any(CcCall ccc), apSome(_), ap, config)
+ then returnAp = apSome(ap)
+ else returnAp = apNone()
+ }
+
+ pragma[nomagic]
+ private predicate revFlowStore(
+ Ap ap0, Content c, Ap ap, NodeEx node, TypedContent tc, NodeEx mid, boolean toReturn,
+ ApOption returnAp, Configuration config
+ ) {
+ revFlow(mid, toReturn, returnAp, ap0, config) and
+ storeStepFwd(node, ap, tc, mid, ap0, config) and
+ tc.getContent() = c
+ }
+
+ /**
+ * Holds if reverse flow with access path `tail` reaches a read of `c`
+ * resulting in access path `cons`.
+ */
+ pragma[nomagic]
+ private predicate revFlowConsCand(Ap cons, Content c, Ap tail, Configuration config) {
+ exists(NodeEx mid, Ap tail0 |
+ revFlow(mid, _, _, tail, config) and
+ tail = pragma[only_bind_into](tail0) and
+ readStepFwd(_, cons, c, mid, tail0, config)
+ )
+ }
+
+ pragma[nomagic]
+ private predicate revFlowOut(
+ DataFlowCall call, RetNodeEx ret, boolean toReturn, ApOption returnAp, Ap ap,
+ Configuration config
+ ) {
+ exists(NodeEx out, boolean allowsFieldFlow |
+ revFlow(out, toReturn, returnAp, ap, config) and
+ flowOutOfCall(call, ret, out, allowsFieldFlow, config)
+ |
+ ap instanceof ApNil or allowsFieldFlow = true
+ )
+ }
+
+ pragma[nomagic]
+ private predicate revFlowInNotToReturn(
+ ArgNodeEx arg, ApOption returnAp, Ap ap, Configuration config
+ ) {
+ exists(ParamNodeEx p, boolean allowsFieldFlow |
+ revFlow(p, false, returnAp, ap, config) and
+ flowIntoCall(_, arg, p, allowsFieldFlow, config)
+ |
+ ap instanceof ApNil or allowsFieldFlow = true
+ )
+ }
+
+ pragma[nomagic]
+ private predicate revFlowInToReturn(
+ DataFlowCall call, ArgNodeEx arg, Ap returnAp, Ap ap, Configuration config
+ ) {
+ exists(ParamNodeEx p, boolean allowsFieldFlow |
+ revFlow(p, true, apSome(returnAp), ap, config) and
+ flowThroughIntoCall(call, arg, p, allowsFieldFlow, config)
+ |
+ ap instanceof ApNil or allowsFieldFlow = true
+ )
+ }
+
+ /**
+ * Holds if an output from `call` is reached in the flow covered by `revFlow`
+ * and data might flow through the target callable resulting in reverse flow
+ * reaching an argument of `call`.
+ */
+ pragma[nomagic]
+ private predicate revFlowIsReturned(
+ DataFlowCall call, boolean toReturn, ApOption returnAp, Ap ap, Configuration config
+ ) {
+ exists(RetNodeEx ret, CcCall ccc |
+ revFlowOut(call, ret, toReturn, returnAp, ap, config) and
+ fwdFlow(ret, ccc, apSome(_), ap, config) and
+ ccc.matchesCall(call)
+ )
+ }
+
+ pragma[nomagic]
+ predicate storeStepCand(
+ NodeEx node1, Ap ap1, TypedContent tc, NodeEx node2, DataFlowType contentType,
+ Configuration config
+ ) {
+ exists(Ap ap2, Content c |
+ store(node1, tc, node2, contentType, config) and
+ revFlowStore(ap2, c, ap1, node1, tc, node2, _, _, config) and
+ revFlowConsCand(ap2, c, ap1, config)
+ )
+ }
+
+ predicate readStepCand(NodeEx node1, Content c, NodeEx node2, Configuration config) {
+ exists(Ap ap1, Ap ap2 |
+ revFlow(node2, _, _, pragma[only_bind_into](ap2), pragma[only_bind_into](config)) and
+ readStepFwd(node1, ap1, c, node2, ap2, config) and
+ revFlowStore(ap1, c, pragma[only_bind_into](ap2), _, _, _, _, _,
+ pragma[only_bind_into](config))
+ )
+ }
+
+ predicate revFlow(NodeEx node, Configuration config) { revFlow(node, _, _, _, config) }
+
+ private predicate fwdConsCand(TypedContent tc, Ap ap, Configuration config) {
+ storeStepFwd(_, ap, tc, _, _, config)
+ }
+
+ predicate consCand(TypedContent tc, Ap ap, Configuration config) {
+ storeStepCand(_, ap, tc, _, _, config)
+ }
+
+ pragma[noinline]
+ private predicate parameterFlow(
+ ParamNodeEx p, Ap ap, Ap ap0, DataFlowCallable c, Configuration config
+ ) {
+ revFlow(p, true, apSome(ap0), ap, config) and
+ c = p.getEnclosingCallable()
+ }
+
+ predicate parameterMayFlowThrough(ParamNodeEx p, DataFlowCallable c, Ap ap, Configuration config) {
+ exists(RetNodeEx ret, Ap ap0, ReturnKindExt kind, int pos |
+ parameterFlow(p, ap, ap0, c, config) and
+ c = ret.getEnclosingCallable() and
+ revFlow(pragma[only_bind_into](ret), true, apSome(_), pragma[only_bind_into](ap0),
+ pragma[only_bind_into](config)) and
+ fwdFlow(ret, any(CcCall ccc), apSome(ap), ap0, config) and
+ kind = ret.getKind() and
+ p.getPosition() = pos and
+ // we don't expect a parameter to return stored in itself
+ not kind.(ParamUpdateReturnKind).getPosition() = pos
+ )
+ }
+
+ pragma[nomagic]
+ predicate callMayFlowThroughRev(DataFlowCall call, Configuration config) {
+ exists(Ap returnAp0, ArgNodeEx arg, boolean toReturn, ApOption returnAp, Ap ap |
+ revFlow(arg, toReturn, returnAp, ap, config) and
+ revFlowInToReturn(call, arg, returnAp0, ap, config) and
+ revFlowIsReturned(call, toReturn, returnAp, returnAp0, config)
+ )
+ }
+
+ predicate stats(boolean fwd, int nodes, int fields, int conscand, int tuples, Configuration config) {
+ fwd = true and
+ nodes = count(NodeEx node | fwdFlow(node, _, _, _, config)) and
+ fields = count(TypedContent f0 | fwdConsCand(f0, _, config)) and
+ conscand = count(TypedContent f0, Ap ap | fwdConsCand(f0, ap, config)) and
+ tuples = count(NodeEx n, Cc cc, ApOption argAp, Ap ap | fwdFlow(n, cc, argAp, ap, config))
+ or
+ fwd = false and
+ nodes = count(NodeEx node | revFlow(node, _, _, _, config)) and
+ fields = count(TypedContent f0 | consCand(f0, _, config)) and
+ conscand = count(TypedContent f0, Ap ap | consCand(f0, ap, config)) and
+ tuples = count(NodeEx n, boolean b, ApOption retAp, Ap ap | revFlow(n, b, retAp, ap, config))
+ }
+ /* End: Stage 4 logic. */
+}
+
+bindingset[conf, result]
+private Configuration unbindConf(Configuration conf) {
+ exists(Configuration c | result = pragma[only_bind_into](c) and conf = pragma[only_bind_into](c))
+}
+
+private predicate nodeMayUseSummary(NodeEx n, AccessPathApprox apa, Configuration config) {
+ exists(DataFlowCallable c, AccessPathApprox apa0 |
+ Stage4::parameterMayFlowThrough(_, c, apa, _) and
+ Stage4::revFlow(n, true, _, apa0, config) and
+ Stage4::fwdFlow(n, any(CallContextCall ccc), TAccessPathApproxSome(apa), apa0, config) and
+ n.getEnclosingCallable() = c
+ )
+}
+
+private newtype TSummaryCtx =
+ TSummaryCtxNone() or
+ TSummaryCtxSome(ParamNodeEx p, AccessPath ap) {
+ Stage4::parameterMayFlowThrough(p, _, ap.getApprox(), _)
+ }
+
+/**
+ * A context for generating flow summaries. This represents flow entry through
+ * a specific parameter with an access path of a specific shape.
+ *
+ * Summaries are only created for parameters that may flow through.
+ */
+abstract private class SummaryCtx extends TSummaryCtx {
+ abstract string toString();
+}
+
+/** A summary context from which no flow summary can be generated. */
+private class SummaryCtxNone extends SummaryCtx, TSummaryCtxNone {
+ override string toString() { result = "" }
+}
+
+/** A summary context from which a flow summary can be generated. */
+private class SummaryCtxSome extends SummaryCtx, TSummaryCtxSome {
+ private ParamNodeEx p;
+ private AccessPath ap;
+
+ SummaryCtxSome() { this = TSummaryCtxSome(p, ap) }
+
+ int getParameterPos() { p.isParameterOf(_, result) }
+
+ override string toString() { result = p + ": " + ap }
+
+ predicate hasLocationInfo(
+ string filepath, int startline, int startcolumn, int endline, int endcolumn
+ ) {
+ p.hasLocationInfo(filepath, startline, startcolumn, endline, endcolumn)
+ }
+}
+
+/**
+ * Gets the number of length 2 access path approximations that correspond to `apa`.
+ */
+private int count1to2unfold(AccessPathApproxCons1 apa, Configuration config) {
+ exists(TypedContent tc, int len |
+ tc = apa.getHead() and
+ len = apa.len() and
+ result =
+ strictcount(AccessPathFront apf |
+ Stage4::consCand(tc, any(AccessPathApprox ap | ap.getFront() = apf and ap.len() = len - 1),
+ config)
+ )
+ )
+}
+
+private int countNodesUsingAccessPath(AccessPathApprox apa, Configuration config) {
+ result =
+ strictcount(NodeEx n |
+ Stage4::revFlow(n, _, _, apa, config) or nodeMayUseSummary(n, apa, config)
+ )
+}
+
+/**
+ * Holds if a length 2 access path approximation matching `apa` is expected
+ * to be expensive.
+ */
+private predicate expensiveLen1to2unfolding(AccessPathApproxCons1 apa, Configuration config) {
+ exists(int aps, int nodes, int apLimit, int tupleLimit |
+ aps = count1to2unfold(apa, config) and
+ nodes = countNodesUsingAccessPath(apa, config) and
+ accessPathCostLimits(apLimit, tupleLimit) and
+ apLimit < aps and
+ tupleLimit < (aps - 1) * nodes
+ )
+}
+
+private AccessPathApprox getATail(AccessPathApprox apa, Configuration config) {
+ exists(TypedContent head |
+ apa.pop(head) = result and
+ Stage4::consCand(head, result, config)
+ )
+}
+
+/**
+ * Holds with `unfold = false` if a precise head-tail representation of `apa` is
+ * expected to be expensive. Holds with `unfold = true` otherwise.
+ */
+private predicate evalUnfold(AccessPathApprox apa, boolean unfold, Configuration config) {
+ if apa.getHead().forceHighPrecision()
+ then unfold = true
+ else
+ exists(int aps, int nodes, int apLimit, int tupleLimit |
+ aps = countPotentialAps(apa, config) and
+ nodes = countNodesUsingAccessPath(apa, config) and
+ accessPathCostLimits(apLimit, tupleLimit) and
+ if apLimit < aps and tupleLimit < (aps - 1) * nodes then unfold = false else unfold = true
+ )
+}
+
+/**
+ * Gets the number of `AccessPath`s that correspond to `apa`.
+ */
+private int countAps(AccessPathApprox apa, Configuration config) {
+ evalUnfold(apa, false, config) and
+ result = 1 and
+ (not apa instanceof AccessPathApproxCons1 or expensiveLen1to2unfolding(apa, config))
+ or
+ evalUnfold(apa, false, config) and
+ result = count1to2unfold(apa, config) and
+ not expensiveLen1to2unfolding(apa, config)
+ or
+ evalUnfold(apa, true, config) and
+ result = countPotentialAps(apa, config)
+}
+
+/**
+ * Gets the number of `AccessPath`s that would correspond to `apa` assuming
+ * that it is expanded to a precise head-tail representation.
+ */
+language[monotonicAggregates]
+private int countPotentialAps(AccessPathApprox apa, Configuration config) {
+ apa instanceof AccessPathApproxNil and result = 1
+ or
+ result = strictsum(AccessPathApprox tail | tail = getATail(apa, config) | countAps(tail, config))
+}
+
+private newtype TAccessPath =
+ TAccessPathNil(DataFlowType t) or
+ TAccessPathCons(TypedContent head, AccessPath tail) {
+ exists(AccessPathApproxCons apa |
+ not evalUnfold(apa, false, _) and
+ head = apa.getHead() and
+ tail.getApprox() = getATail(apa, _)
+ )
+ } or
+ TAccessPathCons2(TypedContent head1, TypedContent head2, int len) {
+ exists(AccessPathApproxCons apa |
+ evalUnfold(apa, false, _) and
+ not expensiveLen1to2unfolding(apa, _) and
+ apa.len() = len and
+ head1 = apa.getHead() and
+ head2 = getATail(apa, _).getHead()
+ )
+ } or
+ TAccessPathCons1(TypedContent head, int len) {
+ exists(AccessPathApproxCons apa |
+ evalUnfold(apa, false, _) and
+ expensiveLen1to2unfolding(apa, _) and
+ apa.len() = len and
+ head = apa.getHead()
+ )
+ }
+
+private newtype TPathNode =
+ TPathNodeMid(NodeEx node, CallContext cc, SummaryCtx sc, AccessPath ap, Configuration config) {
+ // A PathNode is introduced by a source ...
+ Stage4::revFlow(node, config) and
+ sourceNode(node, config) and
+ cc instanceof CallContextAny and
+ sc instanceof SummaryCtxNone and
+ ap = TAccessPathNil(node.getDataFlowType())
+ or
+ // ... or a step from an existing PathNode to another node.
+ exists(PathNodeMid mid |
+ pathStep(mid, node, cc, sc, ap) and
+ pragma[only_bind_into](config) = mid.getConfiguration() and
+ Stage4::revFlow(node, _, _, ap.getApprox(), pragma[only_bind_into](config))
+ )
+ } or
+ TPathNodeSink(NodeEx node, Configuration config) {
+ sinkNode(node, pragma[only_bind_into](config)) and
+ Stage4::revFlow(node, pragma[only_bind_into](config)) and
+ (
+ // A sink that is also a source ...
+ sourceNode(node, config)
+ or
+ // ... or a sink that can be reached from a source
+ exists(PathNodeMid mid |
+ pathStep(mid, node, _, _, TAccessPathNil(_)) and
+ pragma[only_bind_into](config) = mid.getConfiguration()
+ )
+ )
+ }
+
+/**
+ * A list of `TypedContent`s followed by a `DataFlowType`. If data flows from a
+ * source to a given node with a given `AccessPath`, this indicates the sequence
+ * of dereference operations needed to get from the value in the node to the
+ * tracked object. The final type indicates the type of the tracked object.
+ */
+abstract private class AccessPath extends TAccessPath {
+ /** Gets the head of this access path, if any. */
+ abstract TypedContent getHead();
+
+ /** Gets the tail of this access path, if any. */
+ abstract AccessPath getTail();
+
+ /** Gets the front of this access path. */
+ abstract AccessPathFront getFront();
+
+ /** Gets the approximation of this access path. */
+ abstract AccessPathApprox getApprox();
+
+ /** Gets the length of this access path. */
+ abstract int length();
+
+ /** Gets a textual representation of this access path. */
+ abstract string toString();
+
+ /** Gets the access path obtained by popping `tc` from this access path, if any. */
+ final AccessPath pop(TypedContent tc) {
+ result = this.getTail() and
+ tc = this.getHead()
+ }
+
+ /** Gets the access path obtained by pushing `tc` onto this access path. */
+ final AccessPath push(TypedContent tc) { this = result.pop(tc) }
+}
+
+private class AccessPathNil extends AccessPath, TAccessPathNil {
+ private DataFlowType t;
+
+ AccessPathNil() { this = TAccessPathNil(t) }
+
+ DataFlowType getType() { result = t }
+
+ override TypedContent getHead() { none() }
+
+ override AccessPath getTail() { none() }
+
+ override AccessPathFrontNil getFront() { result = TFrontNil(t) }
+
+ override AccessPathApproxNil getApprox() { result = TNil(t) }
+
+ override int length() { result = 0 }
+
+ override string toString() { result = concat(": " + ppReprType(t)) }
+}
+
+private class AccessPathCons extends AccessPath, TAccessPathCons {
+ private TypedContent head;
+ private AccessPath tail;
+
+ AccessPathCons() { this = TAccessPathCons(head, tail) }
+
+ override TypedContent getHead() { result = head }
+
+ override AccessPath getTail() { result = tail }
+
+ override AccessPathFrontHead getFront() { result = TFrontHead(head) }
+
+ override AccessPathApproxCons getApprox() {
+ result = TConsNil(head, tail.(AccessPathNil).getType())
+ or
+ result = TConsCons(head, tail.getHead(), this.length())
+ or
+ result = TCons1(head, this.length())
+ }
+
+ override int length() { result = 1 + tail.length() }
+
+ private string toStringImpl(boolean needsSuffix) {
+ exists(DataFlowType t |
+ tail = TAccessPathNil(t) and
+ needsSuffix = false and
+ result = head.toString() + "]" + concat(" : " + ppReprType(t))
+ )
+ or
+ result = head + ", " + tail.(AccessPathCons).toStringImpl(needsSuffix)
+ or
+ exists(TypedContent tc2, TypedContent tc3, int len | tail = TAccessPathCons2(tc2, tc3, len) |
+ result = head + ", " + tc2 + ", " + tc3 + ", ... (" and len > 2 and needsSuffix = true
+ or
+ result = head + ", " + tc2 + ", " + tc3 + "]" and len = 2 and needsSuffix = false
+ )
+ or
+ exists(TypedContent tc2, int len | tail = TAccessPathCons1(tc2, len) |
+ result = head + ", " + tc2 + ", ... (" and len > 1 and needsSuffix = true
+ or
+ result = head + ", " + tc2 + "]" and len = 1 and needsSuffix = false
+ )
+ }
+
+ override string toString() {
+ result = "[" + this.toStringImpl(true) + length().toString() + ")]"
+ or
+ result = "[" + this.toStringImpl(false)
+ }
+}
+
+private class AccessPathCons2 extends AccessPath, TAccessPathCons2 {
+ private TypedContent head1;
+ private TypedContent head2;
+ private int len;
+
+ AccessPathCons2() { this = TAccessPathCons2(head1, head2, len) }
+
+ override TypedContent getHead() { result = head1 }
+
+ override AccessPath getTail() {
+ Stage4::consCand(head1, result.getApprox(), _) and
+ result.getHead() = head2 and
+ result.length() = len - 1
+ }
+
+ override AccessPathFrontHead getFront() { result = TFrontHead(head1) }
+
+ override AccessPathApproxCons getApprox() {
+ result = TConsCons(head1, head2, len) or
+ result = TCons1(head1, len)
+ }
+
+ override int length() { result = len }
+
+ override string toString() {
+ if len = 2
+ then result = "[" + head1.toString() + ", " + head2.toString() + "]"
+ else
+ result = "[" + head1.toString() + ", " + head2.toString() + ", ... (" + len.toString() + ")]"
+ }
+}
+
+private class AccessPathCons1 extends AccessPath, TAccessPathCons1 {
+ private TypedContent head;
+ private int len;
+
+ AccessPathCons1() { this = TAccessPathCons1(head, len) }
+
+ override TypedContent getHead() { result = head }
+
+ override AccessPath getTail() {
+ Stage4::consCand(head, result.getApprox(), _) and result.length() = len - 1
+ }
+
+ override AccessPathFrontHead getFront() { result = TFrontHead(head) }
+
+ override AccessPathApproxCons getApprox() { result = TCons1(head, len) }
+
+ override int length() { result = len }
+
+ override string toString() {
+ if len = 1
+ then result = "[" + head.toString() + "]"
+ else result = "[" + head.toString() + ", ... (" + len.toString() + ")]"
+ }
+}
+
+/**
+ * A `Node` augmented with a call context (except for sinks), an access path, and a configuration.
+ * Only those `PathNode`s that are reachable from a source are generated.
+ */
+class PathNode extends TPathNode {
+ /** Gets a textual representation of this element. */
+ string toString() { none() }
+
+ /**
+ * Gets a textual representation of this element, including a textual
+ * representation of the call context.
+ */
+ string toStringWithContext() { none() }
+
+ /**
+ * Holds if this element is at the specified location.
+ * The location spans column `startcolumn` of line `startline` to
+ * column `endcolumn` of line `endline` in file `filepath`.
+ * For more information, see
+ * [Locations](https://codeql.github.com/docs/writing-codeql-queries/providing-locations-in-codeql-queries/).
+ */
+ predicate hasLocationInfo(
+ string filepath, int startline, int startcolumn, int endline, int endcolumn
+ ) {
+ none()
+ }
+
+ /** Gets the underlying `Node`. */
+ final Node getNode() { this.(PathNodeImpl).getNodeEx().projectToNode() = result }
+
+ /** Gets the associated configuration. */
+ Configuration getConfiguration() { none() }
+
+ private PathNode getASuccessorIfHidden() {
+ this.(PathNodeImpl).isHidden() and
+ result = this.(PathNodeImpl).getASuccessorImpl()
+ }
+
+ /** Gets a successor of this node, if any. */
+ final PathNode getASuccessor() {
+ result = this.(PathNodeImpl).getASuccessorImpl().getASuccessorIfHidden*() and
+ not this.(PathNodeImpl).isHidden() and
+ not result.(PathNodeImpl).isHidden()
+ }
+
+ /** Holds if this node is a source. */
+ predicate isSource() { none() }
+}
+
+abstract private class PathNodeImpl extends PathNode {
+ abstract PathNode getASuccessorImpl();
+
+ abstract NodeEx getNodeEx();
+
+ predicate isHidden() {
+ hiddenNode(this.getNodeEx().asNode()) and
+ not this.isSource() and
+ not this instanceof PathNodeSink
+ or
+ this.getNodeEx() instanceof TNodeImplicitRead
+ }
+
+ private string ppAp() {
+ this instanceof PathNodeSink and result = ""
+ or
+ exists(string s | s = this.(PathNodeMid).getAp().toString() |
+ if s = "" then result = "" else result = " " + s
+ )
+ }
+
+ private string ppCtx() {
+ this instanceof PathNodeSink and result = ""
+ or
+ result = " <" + this.(PathNodeMid).getCallContext().toString() + ">"
+ }
+
+ override string toString() { result = this.getNodeEx().toString() + ppAp() }
+
+ override string toStringWithContext() { result = this.getNodeEx().toString() + ppAp() + ppCtx() }
+
+ override predicate hasLocationInfo(
+ string filepath, int startline, int startcolumn, int endline, int endcolumn
+ ) {
+ this.getNodeEx().hasLocationInfo(filepath, startline, startcolumn, endline, endcolumn)
+ }
+}
+
+/** Holds if `n` can reach a sink. */
+private predicate directReach(PathNode n) {
+ n instanceof PathNodeSink or directReach(n.getASuccessor())
+}
+
+/** Holds if `n` can reach a sink or is used in a subpath. */
+private predicate reach(PathNode n) { directReach(n) or Subpaths::retReach(n) }
+
+/** Holds if `n1.getASuccessor() = n2` and `n2` can reach a sink. */
+private predicate pathSucc(PathNode n1, PathNode n2) { n1.getASuccessor() = n2 and directReach(n2) }
+
+private predicate pathSuccPlus(PathNode n1, PathNode n2) = fastTC(pathSucc/2)(n1, n2)
+
+/**
+ * Provides the query predicates needed to include a graph in a path-problem query.
+ */
+module PathGraph {
+ /** Holds if `(a,b)` is an edge in the graph of data flow path explanations. */
+ query predicate edges(PathNode a, PathNode b) { a.getASuccessor() = b and reach(b) }
+
+ /** Holds if `n` is a node in the graph of data flow path explanations. */
+ query predicate nodes(PathNode n, string key, string val) {
+ reach(n) and key = "semmle.label" and val = n.toString()
+ }
+
+ query predicate subpaths = Subpaths::subpaths/4;
+}
+
+/**
+ * An intermediate flow graph node. This is a triple consisting of a `Node`,
+ * a `CallContext`, and a `Configuration`.
+ */
+private class PathNodeMid extends PathNodeImpl, TPathNodeMid {
+ NodeEx node;
+ CallContext cc;
+ SummaryCtx sc;
+ AccessPath ap;
+ Configuration config;
+
+ PathNodeMid() { this = TPathNodeMid(node, cc, sc, ap, config) }
+
+ override NodeEx getNodeEx() { result = node }
+
+ CallContext getCallContext() { result = cc }
+
+ SummaryCtx getSummaryCtx() { result = sc }
+
+ AccessPath getAp() { result = ap }
+
+ override Configuration getConfiguration() { result = config }
+
+ private PathNodeMid getSuccMid() {
+ pathStep(this, result.getNodeEx(), result.getCallContext(), result.getSummaryCtx(),
+ result.getAp()) and
+ result.getConfiguration() = unbindConf(this.getConfiguration())
+ }
+
+ override PathNodeImpl getASuccessorImpl() {
+ // an intermediate step to another intermediate node
+ result = getSuccMid()
+ or
+ // a final step to a sink via zero steps means we merge the last two steps to prevent trivial-looking edges
+ exists(PathNodeMid mid, PathNodeSink sink |
+ mid = getSuccMid() and
+ mid.getNodeEx() = sink.getNodeEx() and
+ mid.getAp() instanceof AccessPathNil and
+ sink.getConfiguration() = unbindConf(mid.getConfiguration()) and
+ result = sink
+ )
+ }
+
+ override predicate isSource() {
+ sourceNode(node, config) and
+ cc instanceof CallContextAny and
+ sc instanceof SummaryCtxNone and
+ ap instanceof AccessPathNil
+ }
+}
+
+/**
+ * A flow graph node corresponding to a sink. This is disjoint from the
+ * intermediate nodes in order to uniquely correspond to a given sink by
+ * excluding the `CallContext`.
+ */
+private class PathNodeSink extends PathNodeImpl, TPathNodeSink {
+ NodeEx node;
+ Configuration config;
+
+ PathNodeSink() { this = TPathNodeSink(node, config) }
+
+ override NodeEx getNodeEx() { result = node }
+
+ override Configuration getConfiguration() { result = config }
+
+ override PathNode getASuccessorImpl() { none() }
+
+ override predicate isSource() { sourceNode(node, config) }
+}
+
+/**
+ * Holds if data may flow from `mid` to `node`. The last step in or out of
+ * a callable is recorded by `cc`.
+ */
+private predicate pathStep(
+ PathNodeMid mid, NodeEx node, CallContext cc, SummaryCtx sc, AccessPath ap
+) {
+ exists(AccessPath ap0, NodeEx midnode, Configuration conf, LocalCallContext localCC |
+ midnode = mid.getNodeEx() and
+ conf = mid.getConfiguration() and
+ cc = mid.getCallContext() and
+ sc = mid.getSummaryCtx() and
+ localCC =
+ getLocalCallContext(pragma[only_bind_into](pragma[only_bind_out](cc)),
+ midnode.getEnclosingCallable()) and
+ ap0 = mid.getAp()
+ |
+ localFlowBigStep(midnode, node, true, _, conf, localCC) and
+ ap = ap0
+ or
+ localFlowBigStep(midnode, node, false, ap.getFront(), conf, localCC) and
+ ap0 instanceof AccessPathNil
+ )
+ or
+ jumpStep(mid.getNodeEx(), node, mid.getConfiguration()) and
+ cc instanceof CallContextAny and
+ sc instanceof SummaryCtxNone and
+ ap = mid.getAp()
+ or
+ additionalJumpStep(mid.getNodeEx(), node, mid.getConfiguration()) and
+ cc instanceof CallContextAny and
+ sc instanceof SummaryCtxNone and
+ mid.getAp() instanceof AccessPathNil and
+ ap = TAccessPathNil(node.getDataFlowType())
+ or
+ exists(TypedContent tc | pathStoreStep(mid, node, ap.pop(tc), tc, cc)) and
+ sc = mid.getSummaryCtx()
+ or
+ exists(TypedContent tc | pathReadStep(mid, node, ap.push(tc), tc, cc)) and
+ sc = mid.getSummaryCtx()
+ or
+ pathIntoCallable(mid, node, _, cc, sc, _) and ap = mid.getAp()
+ or
+ pathOutOfCallable(mid, node, cc) and ap = mid.getAp() and sc instanceof SummaryCtxNone
+ or
+ pathThroughCallable(mid, node, cc, ap) and sc = mid.getSummaryCtx()
+}
+
+pragma[nomagic]
+private predicate pathReadStep(
+ PathNodeMid mid, NodeEx node, AccessPath ap0, TypedContent tc, CallContext cc
+) {
+ ap0 = mid.getAp() and
+ tc = ap0.getHead() and
+ Stage4::readStepCand(mid.getNodeEx(), tc.getContent(), node, mid.getConfiguration()) and
+ cc = mid.getCallContext()
+}
+
+pragma[nomagic]
+private predicate pathStoreStep(
+ PathNodeMid mid, NodeEx node, AccessPath ap0, TypedContent tc, CallContext cc
+) {
+ ap0 = mid.getAp() and
+ Stage4::storeStepCand(mid.getNodeEx(), _, tc, node, _, mid.getConfiguration()) and
+ cc = mid.getCallContext()
+}
+
+private predicate pathOutOfCallable0(
+ PathNodeMid mid, ReturnPosition pos, CallContext innercc, AccessPathApprox apa,
+ Configuration config
+) {
+ pos = mid.getNodeEx().(RetNodeEx).getReturnPosition() and
+ innercc = mid.getCallContext() and
+ innercc instanceof CallContextNoCall and
+ apa = mid.getAp().getApprox() and
+ config = mid.getConfiguration()
+}
+
+pragma[nomagic]
+private predicate pathOutOfCallable1(
+ PathNodeMid mid, DataFlowCall call, ReturnKindExt kind, CallContext cc, AccessPathApprox apa,
+ Configuration config
+) {
+ exists(ReturnPosition pos, DataFlowCallable c, CallContext innercc |
+ pathOutOfCallable0(mid, pos, innercc, apa, config) and
+ c = pos.getCallable() and
+ kind = pos.getKind() and
+ resolveReturn(innercc, c, call)
+ |
+ if reducedViableImplInReturn(c, call) then cc = TReturn(c, call) else cc = TAnyCallContext()
+ )
+}
+
+pragma[noinline]
+private NodeEx getAnOutNodeFlow(
+ ReturnKindExt kind, DataFlowCall call, AccessPathApprox apa, Configuration config
+) {
+ result.asNode() = kind.getAnOutNode(call) and
+ Stage4::revFlow(result, _, _, apa, config)
+}
+
+/**
+ * Holds if data may flow from `mid` to `out`. The last step of this path
+ * is a return from a callable and is recorded by `cc`, if needed.
+ */
+pragma[noinline]
+private predicate pathOutOfCallable(PathNodeMid mid, NodeEx out, CallContext cc) {
+ exists(ReturnKindExt kind, DataFlowCall call, AccessPathApprox apa, Configuration config |
+ pathOutOfCallable1(mid, call, kind, cc, apa, config) and
+ out = getAnOutNodeFlow(kind, call, apa, config)
+ )
+}
+
+/**
+ * Holds if data may flow from `mid` to the `i`th argument of `call` in `cc`.
+ */
+pragma[noinline]
+private predicate pathIntoArg(
+ PathNodeMid mid, int i, CallContext cc, DataFlowCall call, AccessPath ap, AccessPathApprox apa
+) {
+ exists(ArgNode arg |
+ arg = mid.getNodeEx().asNode() and
+ cc = mid.getCallContext() and
+ arg.argumentOf(call, i) and
+ ap = mid.getAp() and
+ apa = ap.getApprox()
+ )
+}
+
+pragma[noinline]
+private predicate parameterCand(
+ DataFlowCallable callable, int i, AccessPathApprox apa, Configuration config
+) {
+ exists(ParamNodeEx p |
+ Stage4::revFlow(p, _, _, apa, config) and
+ p.isParameterOf(callable, i)
+ )
+}
+
+pragma[nomagic]
+private predicate pathIntoCallable0(
+ PathNodeMid mid, DataFlowCallable callable, int i, CallContext outercc, DataFlowCall call,
+ AccessPath ap
+) {
+ exists(AccessPathApprox apa |
+ pathIntoArg(mid, i, outercc, call, ap, apa) and
+ callable = resolveCall(call, outercc) and
+ parameterCand(callable, any(int j | j <= i and j >= i), apa, mid.getConfiguration())
+ )
+}
+
+/**
+ * Holds if data may flow from `mid` to `p` through `call`. The contexts
+ * before and after entering the callable are `outercc` and `innercc`,
+ * respectively.
+ */
+private predicate pathIntoCallable(
+ PathNodeMid mid, ParamNodeEx p, CallContext outercc, CallContextCall innercc, SummaryCtx sc,
+ DataFlowCall call
+) {
+ exists(int i, DataFlowCallable callable, AccessPath ap |
+ pathIntoCallable0(mid, callable, i, outercc, call, ap) and
+ p.isParameterOf(callable, i) and
+ (
+ sc = TSummaryCtxSome(p, ap)
+ or
+ not exists(TSummaryCtxSome(p, ap)) and
+ sc = TSummaryCtxNone()
+ )
+ |
+ if recordDataFlowCallSite(call, callable)
+ then innercc = TSpecificCall(call)
+ else innercc = TSomeCall()
+ )
+}
+
+/** Holds if data may flow from a parameter given by `sc` to a return of kind `kind`. */
+pragma[nomagic]
+private predicate paramFlowsThrough(
+ ReturnKindExt kind, CallContextCall cc, SummaryCtxSome sc, AccessPath ap, AccessPathApprox apa,
+ Configuration config
+) {
+ exists(PathNodeMid mid, RetNodeEx ret, int pos |
+ mid.getNodeEx() = ret and
+ kind = ret.getKind() and
+ cc = mid.getCallContext() and
+ sc = mid.getSummaryCtx() and
+ config = mid.getConfiguration() and
+ ap = mid.getAp() and
+ apa = ap.getApprox() and
+ pos = sc.getParameterPos() and
+ not kind.(ParamUpdateReturnKind).getPosition() = pos
+ )
+}
+
+pragma[nomagic]
+private predicate pathThroughCallable0(
+ DataFlowCall call, PathNodeMid mid, ReturnKindExt kind, CallContext cc, AccessPath ap,
+ AccessPathApprox apa
+) {
+ exists(CallContext innercc, SummaryCtx sc |
+ pathIntoCallable(mid, _, cc, innercc, sc, call) and
+ paramFlowsThrough(kind, innercc, sc, ap, apa, unbindConf(mid.getConfiguration()))
+ )
+}
+
+/**
+ * Holds if data may flow from `mid` through a callable to the node `out`.
+ * The context `cc` is restored to its value prior to entering the callable.
+ */
+pragma[noinline]
+private predicate pathThroughCallable(PathNodeMid mid, NodeEx out, CallContext cc, AccessPath ap) {
+ exists(DataFlowCall call, ReturnKindExt kind, AccessPathApprox apa |
+ pathThroughCallable0(call, mid, kind, cc, ap, apa) and
+ out = getAnOutNodeFlow(kind, call, apa, unbindConf(mid.getConfiguration()))
+ )
+}
+
+private module Subpaths {
+ /**
+ * Holds if `(arg, par, ret, out)` forms a subpath-tuple and `ret` is determined by
+ * `kind`, `sc`, `apout`, and `innercc`.
+ */
+ pragma[nomagic]
+ private predicate subpaths01(
+ PathNode arg, ParamNodeEx par, SummaryCtxSome sc, CallContext innercc, ReturnKindExt kind,
+ NodeEx out, AccessPath apout
+ ) {
+ pathThroughCallable(arg, out, _, pragma[only_bind_into](apout)) and
+ pathIntoCallable(arg, par, _, innercc, sc, _) and
+ paramFlowsThrough(kind, innercc, sc, pragma[only_bind_into](apout), _,
+ unbindConf(arg.getConfiguration()))
+ }
+
+ /**
+ * Holds if `(arg, par, ret, out)` forms a subpath-tuple and `ret` is determined by
+ * `kind`, `sc`, `apout`, and `innercc`.
+ */
+ pragma[nomagic]
+ private predicate subpaths02(
+ PathNode arg, ParamNodeEx par, SummaryCtxSome sc, CallContext innercc, ReturnKindExt kind,
+ NodeEx out, AccessPath apout
+ ) {
+ subpaths01(arg, par, sc, innercc, kind, out, apout) and
+ out.asNode() = kind.getAnOutNode(_)
+ }
+
+ pragma[nomagic]
+ private Configuration getPathNodeConf(PathNode n) { result = n.getConfiguration() }
+
+ /**
+ * Holds if `(arg, par, ret, out)` forms a subpath-tuple.
+ */
+ pragma[nomagic]
+ private predicate subpaths03(
+ PathNode arg, ParamNodeEx par, PathNodeMid ret, NodeEx out, AccessPath apout
+ ) {
+ exists(SummaryCtxSome sc, CallContext innercc, ReturnKindExt kind, RetNodeEx retnode |
+ subpaths02(arg, par, sc, innercc, kind, out, apout) and
+ ret.getNodeEx() = retnode and
+ kind = retnode.getKind() and
+ innercc = ret.getCallContext() and
+ sc = ret.getSummaryCtx() and
+ ret.getConfiguration() = unbindConf(getPathNodeConf(arg)) and
+ apout = ret.getAp() and
+ not ret.isHidden()
+ )
+ }
+
+ /**
+ * Holds if `(arg, par, ret, out)` forms a subpath-tuple, that is, flow through
+ * a subpath between `par` and `ret` with the connecting edges `arg -> par` and
+ * `ret -> out` is summarized as the edge `arg -> out`.
+ */
+ predicate subpaths(PathNode arg, PathNodeImpl par, PathNodeMid ret, PathNodeMid out) {
+ exists(ParamNodeEx p, NodeEx o, AccessPath apout |
+ pragma[only_bind_into](arg).getASuccessor() = par and
+ pragma[only_bind_into](arg).getASuccessor() = out and
+ subpaths03(arg, p, ret, o, apout) and
+ par.getNodeEx() = p and
+ out.getNodeEx() = o and
+ out.getAp() = apout
+ )
+ }
+
+ /**
+ * Holds if `n` can reach a return node in a summarized subpath.
+ */
+ predicate retReach(PathNode n) {
+ subpaths(_, _, n, _)
+ or
+ exists(PathNode mid |
+ retReach(mid) and
+ n.getASuccessor() = mid and
+ not subpaths(_, mid, _, _)
+ )
+ }
+}
+
+/**
+ * Holds if data can flow (inter-procedurally) from `source` to `sink`.
+ *
+ * Will only have results if `configuration` has non-empty sources and
+ * sinks.
+ */
+private predicate flowsTo(
+ PathNode flowsource, PathNodeSink flowsink, Node source, Node sink, Configuration configuration
+) {
+ flowsource.isSource() and
+ flowsource.getConfiguration() = configuration and
+ flowsource.(PathNodeImpl).getNodeEx().asNode() = source and
+ (flowsource = flowsink or pathSuccPlus(flowsource, flowsink)) and
+ flowsink.getNodeEx().asNode() = sink
+}
+
+/**
+ * Holds if data can flow (inter-procedurally) from `source` to `sink`.
+ *
+ * Will only have results if `configuration` has non-empty sources and
+ * sinks.
+ */
+predicate flowsTo(Node source, Node sink, Configuration configuration) {
+ flowsTo(_, _, source, sink, configuration)
+}
+
+private predicate finalStats(boolean fwd, int nodes, int fields, int conscand, int tuples) {
+ fwd = true and
+ nodes = count(NodeEx n0 | exists(PathNodeImpl pn | pn.getNodeEx() = n0)) and
+ fields = count(TypedContent f0 | exists(PathNodeMid pn | pn.getAp().getHead() = f0)) and
+ conscand = count(AccessPath ap | exists(PathNodeMid pn | pn.getAp() = ap)) and
+ tuples = count(PathNode pn)
+ or
+ fwd = false and
+ nodes = count(NodeEx n0 | exists(PathNodeImpl pn | pn.getNodeEx() = n0 and reach(pn))) and
+ fields = count(TypedContent f0 | exists(PathNodeMid pn | pn.getAp().getHead() = f0 and reach(pn))) and
+ conscand = count(AccessPath ap | exists(PathNodeMid pn | pn.getAp() = ap and reach(pn))) and
+ tuples = count(PathNode pn | reach(pn))
+}
+
+/**
+ * INTERNAL: Only for debugging.
+ *
+ * Calculates per-stage metrics for data flow.
+ */
+predicate stageStats(
+ int n, string stage, int nodes, int fields, int conscand, int tuples, Configuration config
+) {
+ stage = "1 Fwd" and n = 10 and Stage1::stats(true, nodes, fields, conscand, tuples, config)
+ or
+ stage = "1 Rev" and n = 15 and Stage1::stats(false, nodes, fields, conscand, tuples, config)
+ or
+ stage = "2 Fwd" and n = 20 and Stage2::stats(true, nodes, fields, conscand, tuples, config)
+ or
+ stage = "2 Rev" and n = 25 and Stage2::stats(false, nodes, fields, conscand, tuples, config)
+ or
+ stage = "3 Fwd" and n = 30 and Stage3::stats(true, nodes, fields, conscand, tuples, config)
+ or
+ stage = "3 Rev" and n = 35 and Stage3::stats(false, nodes, fields, conscand, tuples, config)
+ or
+ stage = "4 Fwd" and n = 40 and Stage4::stats(true, nodes, fields, conscand, tuples, config)
+ or
+ stage = "4 Rev" and n = 45 and Stage4::stats(false, nodes, fields, conscand, tuples, config)
+ or
+ stage = "5 Fwd" and n = 50 and finalStats(true, nodes, fields, conscand, tuples)
+ or
+ stage = "5 Rev" and n = 55 and finalStats(false, nodes, fields, conscand, tuples)
+}
+
+private module FlowExploration {
+ private predicate callableStep(DataFlowCallable c1, DataFlowCallable c2, Configuration config) {
+ exists(NodeEx node1, NodeEx node2 |
+ jumpStep(node1, node2, config)
+ or
+ additionalJumpStep(node1, node2, config)
+ or
+ // flow into callable
+ viableParamArgEx(_, node2, node1)
+ or
+ // flow out of a callable
+ viableReturnPosOutEx(_, node1.(RetNodeEx).getReturnPosition(), node2)
+ |
+ c1 = node1.getEnclosingCallable() and
+ c2 = node2.getEnclosingCallable() and
+ c1 != c2
+ )
+ }
+
+ private predicate interestingCallableSrc(DataFlowCallable c, Configuration config) {
+ exists(Node n | config.isSource(n) and c = getNodeEnclosingCallable(n))
+ or
+ exists(DataFlowCallable mid |
+ interestingCallableSrc(mid, config) and callableStep(mid, c, config)
+ )
+ }
+
+ private predicate interestingCallableSink(DataFlowCallable c, Configuration config) {
+ exists(Node n | config.isSink(n) and c = getNodeEnclosingCallable(n))
+ or
+ exists(DataFlowCallable mid |
+ interestingCallableSink(mid, config) and callableStep(c, mid, config)
+ )
+ }
+
+ private newtype TCallableExt =
+ TCallable(DataFlowCallable c, Configuration config) {
+ interestingCallableSrc(c, config) or
+ interestingCallableSink(c, config)
+ } or
+ TCallableSrc() or
+ TCallableSink()
+
+ private predicate callableExtSrc(TCallableSrc src) { any() }
+
+ private predicate callableExtSink(TCallableSink sink) { any() }
+
+ private predicate callableExtStepFwd(TCallableExt ce1, TCallableExt ce2) {
+ exists(DataFlowCallable c1, DataFlowCallable c2, Configuration config |
+ callableStep(c1, c2, config) and
+ ce1 = TCallable(c1, pragma[only_bind_into](config)) and
+ ce2 = TCallable(c2, pragma[only_bind_into](config))
+ )
+ or
+ exists(Node n, Configuration config |
+ ce1 = TCallableSrc() and
+ config.isSource(n) and
+ ce2 = TCallable(getNodeEnclosingCallable(n), config)
+ )
+ or
+ exists(Node n, Configuration config |
+ ce2 = TCallableSink() and
+ config.isSink(n) and
+ ce1 = TCallable(getNodeEnclosingCallable(n), config)
+ )
+ }
+
+ private predicate callableExtStepRev(TCallableExt ce1, TCallableExt ce2) {
+ callableExtStepFwd(ce2, ce1)
+ }
+
+ private int distSrcExt(TCallableExt c) =
+ shortestDistances(callableExtSrc/1, callableExtStepFwd/2)(_, c, result)
+
+ private int distSinkExt(TCallableExt c) =
+ shortestDistances(callableExtSink/1, callableExtStepRev/2)(_, c, result)
+
+ private int distSrc(DataFlowCallable c, Configuration config) {
+ result = distSrcExt(TCallable(c, config)) - 1
+ }
+
+ private int distSink(DataFlowCallable c, Configuration config) {
+ result = distSinkExt(TCallable(c, config)) - 1
+ }
+
+ private newtype TPartialAccessPath =
+ TPartialNil(DataFlowType t) or
+ TPartialCons(TypedContent tc, int len) { len in [1 .. accessPathLimit()] }
+
+ /**
+ * Conceptually a list of `TypedContent`s followed by a `Type`, but only the first
+ * element of the list and its length are tracked. If data flows from a source to
+ * a given node with a given `AccessPath`, this indicates the sequence of
+ * dereference operations needed to get from the value in the node to the
+ * tracked object. The final type indicates the type of the tracked object.
+ */
+ private class PartialAccessPath extends TPartialAccessPath {
+ abstract string toString();
+
+ TypedContent getHead() { this = TPartialCons(result, _) }
+
+ int len() {
+ this = TPartialNil(_) and result = 0
+ or
+ this = TPartialCons(_, result)
+ }
+
+ DataFlowType getType() {
+ this = TPartialNil(result)
+ or
+ exists(TypedContent head | this = TPartialCons(head, _) | result = head.getContainerType())
+ }
+ }
+
+ private class PartialAccessPathNil extends PartialAccessPath, TPartialNil {
+ override string toString() {
+ exists(DataFlowType t | this = TPartialNil(t) | result = concat(": " + ppReprType(t)))
+ }
+ }
+
+ private class PartialAccessPathCons extends PartialAccessPath, TPartialCons {
+ override string toString() {
+ exists(TypedContent tc, int len | this = TPartialCons(tc, len) |
+ if len = 1
+ then result = "[" + tc.toString() + "]"
+ else result = "[" + tc.toString() + ", ... (" + len.toString() + ")]"
+ )
+ }
+ }
+
+ private newtype TRevPartialAccessPath =
+ TRevPartialNil() or
+ TRevPartialCons(Content c, int len) { len in [1 .. accessPathLimit()] }
+
+ /**
+ * Conceptually a list of `Content`s, but only the first
+ * element of the list and its length are tracked.
+ */
+ private class RevPartialAccessPath extends TRevPartialAccessPath {
+ abstract string toString();
+
+ Content getHead() { this = TRevPartialCons(result, _) }
+
+ int len() {
+ this = TRevPartialNil() and result = 0
+ or
+ this = TRevPartialCons(_, result)
+ }
+ }
+
+ private class RevPartialAccessPathNil extends RevPartialAccessPath, TRevPartialNil {
+ override string toString() { result = "" }
+ }
+
+ private class RevPartialAccessPathCons extends RevPartialAccessPath, TRevPartialCons {
+ override string toString() {
+ exists(Content c, int len | this = TRevPartialCons(c, len) |
+ if len = 1
+ then result = "[" + c.toString() + "]"
+ else result = "[" + c.toString() + ", ... (" + len.toString() + ")]"
+ )
+ }
+ }
+
+ private newtype TSummaryCtx1 =
+ TSummaryCtx1None() or
+ TSummaryCtx1Param(ParamNodeEx p)
+
+ private newtype TSummaryCtx2 =
+ TSummaryCtx2None() or
+ TSummaryCtx2Some(PartialAccessPath ap)
+
+ private newtype TRevSummaryCtx1 =
+ TRevSummaryCtx1None() or
+ TRevSummaryCtx1Some(ReturnPosition pos)
+
+ private newtype TRevSummaryCtx2 =
+ TRevSummaryCtx2None() or
+ TRevSummaryCtx2Some(RevPartialAccessPath ap)
+
+ private newtype TPartialPathNode =
+ TPartialPathNodeFwd(
+ NodeEx node, CallContext cc, TSummaryCtx1 sc1, TSummaryCtx2 sc2, PartialAccessPath ap,
+ Configuration config
+ ) {
+ sourceNode(node, config) and
+ cc instanceof CallContextAny and
+ sc1 = TSummaryCtx1None() and
+ sc2 = TSummaryCtx2None() and
+ ap = TPartialNil(node.getDataFlowType()) and
+ not fullBarrier(node, config) and
+ exists(config.explorationLimit())
+ or
+ partialPathNodeMk0(node, cc, sc1, sc2, ap, config) and
+ distSrc(node.getEnclosingCallable(), config) <= config.explorationLimit()
+ } or
+ TPartialPathNodeRev(
+ NodeEx node, TRevSummaryCtx1 sc1, TRevSummaryCtx2 sc2, RevPartialAccessPath ap,
+ Configuration config
+ ) {
+ sinkNode(node, config) and
+ sc1 = TRevSummaryCtx1None() and
+ sc2 = TRevSummaryCtx2None() and
+ ap = TRevPartialNil() and
+ not fullBarrier(node, config) and
+ exists(config.explorationLimit())
+ or
+ exists(PartialPathNodeRev mid |
+ revPartialPathStep(mid, node, sc1, sc2, ap, config) and
+ not clearsContentCached(node.asNode(), ap.getHead()) and
+ not fullBarrier(node, config) and
+ distSink(node.getEnclosingCallable(), config) <= config.explorationLimit()
+ )
+ }
+
+ pragma[nomagic]
+ private predicate partialPathNodeMk0(
+ NodeEx node, CallContext cc, TSummaryCtx1 sc1, TSummaryCtx2 sc2, PartialAccessPath ap,
+ Configuration config
+ ) {
+ exists(PartialPathNodeFwd mid |
+ partialPathStep(mid, node, cc, sc1, sc2, ap, config) and
+ not fullBarrier(node, config) and
+ not clearsContentCached(node.asNode(), ap.getHead().getContent()) and
+ if node.asNode() instanceof CastingNode
+ then compatibleTypes(node.getDataFlowType(), ap.getType())
+ else any()
+ )
+ }
+
+ /**
+ * A `Node` augmented with a call context, an access path, and a configuration.
+ */
+ class PartialPathNode extends TPartialPathNode {
+ /** Gets a textual representation of this element. */
+ string toString() { result = this.getNodeEx().toString() + this.ppAp() }
+
+ /**
+ * Gets a textual representation of this element, including a textual
+ * representation of the call context.
+ */
+ string toStringWithContext() {
+ result = this.getNodeEx().toString() + this.ppAp() + this.ppCtx()
+ }
+
+ /**
+ * Holds if this element is at the specified location.
+ * The location spans column `startcolumn` of line `startline` to
+ * column `endcolumn` of line `endline` in file `filepath`.
+ * For more information, see
+ * [Locations](https://codeql.github.com/docs/writing-codeql-queries/providing-locations-in-codeql-queries/).
+ */
+ predicate hasLocationInfo(
+ string filepath, int startline, int startcolumn, int endline, int endcolumn
+ ) {
+ this.getNodeEx().hasLocationInfo(filepath, startline, startcolumn, endline, endcolumn)
+ }
+
+ /** Gets the underlying `Node`. */
+ final Node getNode() { this.getNodeEx().projectToNode() = result }
+
+ private NodeEx getNodeEx() {
+ result = this.(PartialPathNodeFwd).getNodeEx() or
+ result = this.(PartialPathNodeRev).getNodeEx()
+ }
+
+ /** Gets the associated configuration. */
+ Configuration getConfiguration() { none() }
+
+ /** Gets a successor of this node, if any. */
+ PartialPathNode getASuccessor() { none() }
+
+ /**
+ * Gets the approximate distance to the nearest source measured in number
+ * of interprocedural steps.
+ */
+ int getSourceDistance() {
+ result = distSrc(this.getNodeEx().getEnclosingCallable(), this.getConfiguration())
+ }
+
+ /**
+ * Gets the approximate distance to the nearest sink measured in number
+ * of interprocedural steps.
+ */
+ int getSinkDistance() {
+ result = distSink(this.getNodeEx().getEnclosingCallable(), this.getConfiguration())
+ }
+
+ private string ppAp() {
+ exists(string s |
+ s = this.(PartialPathNodeFwd).getAp().toString() or
+ s = this.(PartialPathNodeRev).getAp().toString()
+ |
+ if s = "" then result = "" else result = " " + s
+ )
+ }
+
+ private string ppCtx() {
+ result = " <" + this.(PartialPathNodeFwd).getCallContext().toString() + ">"
+ }
+
+ /** Holds if this is a source in a forward-flow path. */
+ predicate isFwdSource() { this.(PartialPathNodeFwd).isSource() }
+
+ /** Holds if this is a sink in a reverse-flow path. */
+ predicate isRevSink() { this.(PartialPathNodeRev).isSink() }
+ }
+
+ /**
+ * Provides the query predicates needed to include a graph in a path-problem query.
+ */
+ module PartialPathGraph {
+ /** Holds if `(a,b)` is an edge in the graph of data flow path explanations. */
+ query predicate edges(PartialPathNode a, PartialPathNode b) { a.getASuccessor() = b }
+ }
+
+ private class PartialPathNodeFwd extends PartialPathNode, TPartialPathNodeFwd {
+ NodeEx node;
+ CallContext cc;
+ TSummaryCtx1 sc1;
+ TSummaryCtx2 sc2;
+ PartialAccessPath ap;
+ Configuration config;
+
+ PartialPathNodeFwd() { this = TPartialPathNodeFwd(node, cc, sc1, sc2, ap, config) }
+
+ NodeEx getNodeEx() { result = node }
+
+ CallContext getCallContext() { result = cc }
+
+ TSummaryCtx1 getSummaryCtx1() { result = sc1 }
+
+ TSummaryCtx2 getSummaryCtx2() { result = sc2 }
+
+ PartialAccessPath getAp() { result = ap }
+
+ override Configuration getConfiguration() { result = config }
+
+ override PartialPathNodeFwd getASuccessor() {
+ partialPathStep(this, result.getNodeEx(), result.getCallContext(), result.getSummaryCtx1(),
+ result.getSummaryCtx2(), result.getAp(), result.getConfiguration())
+ }
+
+ predicate isSource() {
+ sourceNode(node, config) and
+ cc instanceof CallContextAny and
+ sc1 = TSummaryCtx1None() and
+ sc2 = TSummaryCtx2None() and
+ ap instanceof TPartialNil
+ }
+ }
+
+ private class PartialPathNodeRev extends PartialPathNode, TPartialPathNodeRev {
+ NodeEx node;
+ TRevSummaryCtx1 sc1;
+ TRevSummaryCtx2 sc2;
+ RevPartialAccessPath ap;
+ Configuration config;
+
+ PartialPathNodeRev() { this = TPartialPathNodeRev(node, sc1, sc2, ap, config) }
+
+ NodeEx getNodeEx() { result = node }
+
+ TRevSummaryCtx1 getSummaryCtx1() { result = sc1 }
+
+ TRevSummaryCtx2 getSummaryCtx2() { result = sc2 }
+
+ RevPartialAccessPath getAp() { result = ap }
+
+ override Configuration getConfiguration() { result = config }
+
+ override PartialPathNodeRev getASuccessor() {
+ revPartialPathStep(result, this.getNodeEx(), this.getSummaryCtx1(), this.getSummaryCtx2(),
+ this.getAp(), this.getConfiguration())
+ }
+
+ predicate isSink() {
+ sinkNode(node, config) and
+ sc1 = TRevSummaryCtx1None() and
+ sc2 = TRevSummaryCtx2None() and
+ ap = TRevPartialNil()
+ }
+ }
+
+ private predicate partialPathStep(
+ PartialPathNodeFwd mid, NodeEx node, CallContext cc, TSummaryCtx1 sc1, TSummaryCtx2 sc2,
+ PartialAccessPath ap, Configuration config
+ ) {
+ not isUnreachableInCallCached(node.asNode(), cc.(CallContextSpecificCall).getCall()) and
+ (
+ localFlowStep(mid.getNodeEx(), node, config) and
+ cc = mid.getCallContext() and
+ sc1 = mid.getSummaryCtx1() and
+ sc2 = mid.getSummaryCtx2() and
+ ap = mid.getAp() and
+ config = mid.getConfiguration()
+ or
+ additionalLocalFlowStep(mid.getNodeEx(), node, config) and
+ cc = mid.getCallContext() and
+ sc1 = mid.getSummaryCtx1() and
+ sc2 = mid.getSummaryCtx2() and
+ mid.getAp() instanceof PartialAccessPathNil and
+ ap = TPartialNil(node.getDataFlowType()) and
+ config = mid.getConfiguration()
+ )
+ or
+ jumpStep(mid.getNodeEx(), node, config) and
+ cc instanceof CallContextAny and
+ sc1 = TSummaryCtx1None() and
+ sc2 = TSummaryCtx2None() and
+ ap = mid.getAp() and
+ config = mid.getConfiguration()
+ or
+ additionalJumpStep(mid.getNodeEx(), node, config) and
+ cc instanceof CallContextAny and
+ sc1 = TSummaryCtx1None() and
+ sc2 = TSummaryCtx2None() and
+ mid.getAp() instanceof PartialAccessPathNil and
+ ap = TPartialNil(node.getDataFlowType()) and
+ config = mid.getConfiguration()
+ or
+ partialPathStoreStep(mid, _, _, node, ap) and
+ cc = mid.getCallContext() and
+ sc1 = mid.getSummaryCtx1() and
+ sc2 = mid.getSummaryCtx2() and
+ config = mid.getConfiguration()
+ or
+ exists(PartialAccessPath ap0, TypedContent tc |
+ partialPathReadStep(mid, ap0, tc, node, cc, config) and
+ sc1 = mid.getSummaryCtx1() and
+ sc2 = mid.getSummaryCtx2() and
+ apConsFwd(ap, tc, ap0, config)
+ )
+ or
+ partialPathIntoCallable(mid, node, _, cc, sc1, sc2, _, ap, config)
+ or
+ partialPathOutOfCallable(mid, node, cc, ap, config) and
+ sc1 = TSummaryCtx1None() and
+ sc2 = TSummaryCtx2None()
+ or
+ partialPathThroughCallable(mid, node, cc, ap, config) and
+ sc1 = mid.getSummaryCtx1() and
+ sc2 = mid.getSummaryCtx2()
+ }
+
+ bindingset[result, i]
+ private int unbindInt(int i) { i <= result and i >= result }
+
+ pragma[inline]
+ private predicate partialPathStoreStep(
+ PartialPathNodeFwd mid, PartialAccessPath ap1, TypedContent tc, NodeEx node,
+ PartialAccessPath ap2
+ ) {
+ exists(NodeEx midNode, DataFlowType contentType |
+ midNode = mid.getNodeEx() and
+ ap1 = mid.getAp() and
+ store(midNode, tc, node, contentType, mid.getConfiguration()) and
+ ap2.getHead() = tc and
+ ap2.len() = unbindInt(ap1.len() + 1) and
+ compatibleTypes(ap1.getType(), contentType)
+ )
+ }
+
+ pragma[nomagic]
+ private predicate apConsFwd(
+ PartialAccessPath ap1, TypedContent tc, PartialAccessPath ap2, Configuration config
+ ) {
+ exists(PartialPathNodeFwd mid |
+ partialPathStoreStep(mid, ap1, tc, _, ap2) and
+ config = mid.getConfiguration()
+ )
+ }
+
+ pragma[nomagic]
+ private predicate partialPathReadStep(
+ PartialPathNodeFwd mid, PartialAccessPath ap, TypedContent tc, NodeEx node, CallContext cc,
+ Configuration config
+ ) {
+ exists(NodeEx midNode |
+ midNode = mid.getNodeEx() and
+ ap = mid.getAp() and
+ read(midNode, tc.getContent(), node, pragma[only_bind_into](config)) and
+ ap.getHead() = tc and
+ pragma[only_bind_into](config) = mid.getConfiguration() and
+ cc = mid.getCallContext()
+ )
+ }
+
+ private predicate partialPathOutOfCallable0(
+ PartialPathNodeFwd mid, ReturnPosition pos, CallContext innercc, PartialAccessPath ap,
+ Configuration config
+ ) {
+ pos = mid.getNodeEx().(RetNodeEx).getReturnPosition() and
+ innercc = mid.getCallContext() and
+ innercc instanceof CallContextNoCall and
+ ap = mid.getAp() and
+ config = mid.getConfiguration()
+ }
+
+ pragma[nomagic]
+ private predicate partialPathOutOfCallable1(
+ PartialPathNodeFwd mid, DataFlowCall call, ReturnKindExt kind, CallContext cc,
+ PartialAccessPath ap, Configuration config
+ ) {
+ exists(ReturnPosition pos, DataFlowCallable c, CallContext innercc |
+ partialPathOutOfCallable0(mid, pos, innercc, ap, config) and
+ c = pos.getCallable() and
+ kind = pos.getKind() and
+ resolveReturn(innercc, c, call)
+ |
+ if reducedViableImplInReturn(c, call) then cc = TReturn(c, call) else cc = TAnyCallContext()
+ )
+ }
+
+ private predicate partialPathOutOfCallable(
+ PartialPathNodeFwd mid, NodeEx out, CallContext cc, PartialAccessPath ap, Configuration config
+ ) {
+ exists(ReturnKindExt kind, DataFlowCall call |
+ partialPathOutOfCallable1(mid, call, kind, cc, ap, config)
+ |
+ out.asNode() = kind.getAnOutNode(call)
+ )
+ }
+
+ pragma[noinline]
+ private predicate partialPathIntoArg(
+ PartialPathNodeFwd mid, int i, CallContext cc, DataFlowCall call, PartialAccessPath ap,
+ Configuration config
+ ) {
+ exists(ArgNode arg |
+ arg = mid.getNodeEx().asNode() and
+ cc = mid.getCallContext() and
+ arg.argumentOf(call, i) and
+ ap = mid.getAp() and
+ config = mid.getConfiguration()
+ )
+ }
+
+ pragma[nomagic]
+ private predicate partialPathIntoCallable0(
+ PartialPathNodeFwd mid, DataFlowCallable callable, int i, CallContext outercc,
+ DataFlowCall call, PartialAccessPath ap, Configuration config
+ ) {
+ partialPathIntoArg(mid, i, outercc, call, ap, config) and
+ callable = resolveCall(call, outercc)
+ }
+
+ private predicate partialPathIntoCallable(
+ PartialPathNodeFwd mid, ParamNodeEx p, CallContext outercc, CallContextCall innercc,
+ TSummaryCtx1 sc1, TSummaryCtx2 sc2, DataFlowCall call, PartialAccessPath ap,
+ Configuration config
+ ) {
+ exists(int i, DataFlowCallable callable |
+ partialPathIntoCallable0(mid, callable, i, outercc, call, ap, config) and
+ p.isParameterOf(callable, i) and
+ sc1 = TSummaryCtx1Param(p) and
+ sc2 = TSummaryCtx2Some(ap)
+ |
+ if recordDataFlowCallSite(call, callable)
+ then innercc = TSpecificCall(call)
+ else innercc = TSomeCall()
+ )
+ }
+
+ pragma[nomagic]
+ private predicate paramFlowsThroughInPartialPath(
+ ReturnKindExt kind, CallContextCall cc, TSummaryCtx1 sc1, TSummaryCtx2 sc2,
+ PartialAccessPath ap, Configuration config
+ ) {
+ exists(PartialPathNodeFwd mid, RetNodeEx ret |
+ mid.getNodeEx() = ret and
+ kind = ret.getKind() and
+ cc = mid.getCallContext() and
+ sc1 = mid.getSummaryCtx1() and
+ sc2 = mid.getSummaryCtx2() and
+ config = mid.getConfiguration() and
+ ap = mid.getAp()
+ )
+ }
+
+ pragma[noinline]
+ private predicate partialPathThroughCallable0(
+ DataFlowCall call, PartialPathNodeFwd mid, ReturnKindExt kind, CallContext cc,
+ PartialAccessPath ap, Configuration config
+ ) {
+ exists(CallContext innercc, TSummaryCtx1 sc1, TSummaryCtx2 sc2 |
+ partialPathIntoCallable(mid, _, cc, innercc, sc1, sc2, call, _, config) and
+ paramFlowsThroughInPartialPath(kind, innercc, sc1, sc2, ap, config)
+ )
+ }
+
+ private predicate partialPathThroughCallable(
+ PartialPathNodeFwd mid, NodeEx out, CallContext cc, PartialAccessPath ap, Configuration config
+ ) {
+ exists(DataFlowCall call, ReturnKindExt kind |
+ partialPathThroughCallable0(call, mid, kind, cc, ap, config) and
+ out.asNode() = kind.getAnOutNode(call)
+ )
+ }
+
+ private predicate revPartialPathStep(
+ PartialPathNodeRev mid, NodeEx node, TRevSummaryCtx1 sc1, TRevSummaryCtx2 sc2,
+ RevPartialAccessPath ap, Configuration config
+ ) {
+ localFlowStep(node, mid.getNodeEx(), config) and
+ sc1 = mid.getSummaryCtx1() and
+ sc2 = mid.getSummaryCtx2() and
+ ap = mid.getAp() and
+ config = mid.getConfiguration()
+ or
+ additionalLocalFlowStep(node, mid.getNodeEx(), config) and
+ sc1 = mid.getSummaryCtx1() and
+ sc2 = mid.getSummaryCtx2() and
+ mid.getAp() instanceof RevPartialAccessPathNil and
+ ap = TRevPartialNil() and
+ config = mid.getConfiguration()
+ or
+ jumpStep(node, mid.getNodeEx(), config) and
+ sc1 = TRevSummaryCtx1None() and
+ sc2 = TRevSummaryCtx2None() and
+ ap = mid.getAp() and
+ config = mid.getConfiguration()
+ or
+ additionalJumpStep(node, mid.getNodeEx(), config) and
+ sc1 = TRevSummaryCtx1None() and
+ sc2 = TRevSummaryCtx2None() and
+ mid.getAp() instanceof RevPartialAccessPathNil and
+ ap = TRevPartialNil() and
+ config = mid.getConfiguration()
+ or
+ revPartialPathReadStep(mid, _, _, node, ap) and
+ sc1 = mid.getSummaryCtx1() and
+ sc2 = mid.getSummaryCtx2() and
+ config = mid.getConfiguration()
+ or
+ exists(RevPartialAccessPath ap0, Content c |
+ revPartialPathStoreStep(mid, ap0, c, node, config) and
+ sc1 = mid.getSummaryCtx1() and
+ sc2 = mid.getSummaryCtx2() and
+ apConsRev(ap, c, ap0, config)
+ )
+ or
+ exists(ParamNodeEx p |
+ mid.getNodeEx() = p and
+ viableParamArgEx(_, p, node) and
+ sc1 = mid.getSummaryCtx1() and
+ sc2 = mid.getSummaryCtx2() and
+ sc1 = TRevSummaryCtx1None() and
+ sc2 = TRevSummaryCtx2None() and
+ ap = mid.getAp() and
+ config = mid.getConfiguration()
+ )
+ or
+ exists(ReturnPosition pos |
+ revPartialPathIntoReturn(mid, pos, sc1, sc2, _, ap, config) and
+ pos = getReturnPosition(node.asNode())
+ )
+ or
+ revPartialPathThroughCallable(mid, node, ap, config) and
+ sc1 = mid.getSummaryCtx1() and
+ sc2 = mid.getSummaryCtx2()
+ }
+
+ pragma[inline]
+ private predicate revPartialPathReadStep(
+ PartialPathNodeRev mid, RevPartialAccessPath ap1, Content c, NodeEx node,
+ RevPartialAccessPath ap2
+ ) {
+ exists(NodeEx midNode |
+ midNode = mid.getNodeEx() and
+ ap1 = mid.getAp() and
+ read(node, c, midNode, mid.getConfiguration()) and
+ ap2.getHead() = c and
+ ap2.len() = unbindInt(ap1.len() + 1)
+ )
+ }
+
+ pragma[nomagic]
+ private predicate apConsRev(
+ RevPartialAccessPath ap1, Content c, RevPartialAccessPath ap2, Configuration config
+ ) {
+ exists(PartialPathNodeRev mid |
+ revPartialPathReadStep(mid, ap1, c, _, ap2) and
+ config = mid.getConfiguration()
+ )
+ }
+
+ pragma[nomagic]
+ private predicate revPartialPathStoreStep(
+ PartialPathNodeRev mid, RevPartialAccessPath ap, Content c, NodeEx node, Configuration config
+ ) {
+ exists(NodeEx midNode, TypedContent tc |
+ midNode = mid.getNodeEx() and
+ ap = mid.getAp() and
+ store(node, tc, midNode, _, config) and
+ ap.getHead() = c and
+ config = mid.getConfiguration() and
+ tc.getContent() = c
+ )
+ }
+
+ pragma[nomagic]
+ private predicate revPartialPathIntoReturn(
+ PartialPathNodeRev mid, ReturnPosition pos, TRevSummaryCtx1Some sc1, TRevSummaryCtx2Some sc2,
+ DataFlowCall call, RevPartialAccessPath ap, Configuration config
+ ) {
+ exists(NodeEx out |
+ mid.getNodeEx() = out and
+ viableReturnPosOutEx(call, pos, out) and
+ sc1 = TRevSummaryCtx1Some(pos) and
+ sc2 = TRevSummaryCtx2Some(ap) and
+ ap = mid.getAp() and
+ config = mid.getConfiguration()
+ )
+ }
+
+ pragma[nomagic]
+ private predicate revPartialPathFlowsThrough(
+ int pos, TRevSummaryCtx1Some sc1, TRevSummaryCtx2Some sc2, RevPartialAccessPath ap,
+ Configuration config
+ ) {
+ exists(PartialPathNodeRev mid, ParamNodeEx p |
+ mid.getNodeEx() = p and
+ p.getPosition() = pos and
+ sc1 = mid.getSummaryCtx1() and
+ sc2 = mid.getSummaryCtx2() and
+ ap = mid.getAp() and
+ config = mid.getConfiguration()
+ )
+ }
+
+ pragma[nomagic]
+ private predicate revPartialPathThroughCallable0(
+ DataFlowCall call, PartialPathNodeRev mid, int pos, RevPartialAccessPath ap,
+ Configuration config
+ ) {
+ exists(TRevSummaryCtx1Some sc1, TRevSummaryCtx2Some sc2 |
+ revPartialPathIntoReturn(mid, _, sc1, sc2, call, _, config) and
+ revPartialPathFlowsThrough(pos, sc1, sc2, ap, config)
+ )
+ }
+
+ pragma[nomagic]
+ private predicate revPartialPathThroughCallable(
+ PartialPathNodeRev mid, ArgNodeEx node, RevPartialAccessPath ap, Configuration config
+ ) {
+ exists(DataFlowCall call, int pos |
+ revPartialPathThroughCallable0(call, mid, pos, ap, config) and
+ node.asNode().(ArgNode).argumentOf(call, pos)
+ )
+ }
+}
+
+import FlowExploration
+
+private predicate partialFlow(
+ PartialPathNode source, PartialPathNode node, Configuration configuration
+) {
+ source.getConfiguration() = configuration and
+ source.isFwdSource() and
+ node = source.getASuccessor+()
+}
+
+private predicate revPartialFlow(
+ PartialPathNode node, PartialPathNode sink, Configuration configuration
+) {
+ sink.getConfiguration() = configuration and
+ sink.isRevSink() and
+ node.getASuccessor+() = sink
+}
diff --git a/repo-tests/codeql/cpp/ql/lib/semmle/code/cpp/ir/dataflow/internal/DataFlowImpl4.qll b/repo-tests/codeql/cpp/ql/lib/semmle/code/cpp/ir/dataflow/internal/DataFlowImpl4.qll
new file mode 100644
index 00000000000..4ca06c93362
--- /dev/null
+++ b/repo-tests/codeql/cpp/ql/lib/semmle/code/cpp/ir/dataflow/internal/DataFlowImpl4.qll
@@ -0,0 +1,4559 @@
+/**
+ * Provides an implementation of global (interprocedural) data flow. This file
+ * re-exports the local (intraprocedural) data flow analysis from
+ * `DataFlowImplSpecific::Public` and adds a global analysis, mainly exposed
+ * through the `Configuration` class. This file exists in several identical
+ * copies, allowing queries to use multiple `Configuration` classes that depend
+ * on each other without introducing mutual recursion among those configurations.
+ */
+
+private import DataFlowImplCommon
+private import DataFlowImplSpecific::Private
+import DataFlowImplSpecific::Public
+
+/**
+ * A configuration of interprocedural data flow analysis. This defines
+ * sources, sinks, and any other configurable aspect of the analysis. Each
+ * use of the global data flow library must define its own unique extension
+ * of this abstract class. To create a configuration, extend this class with
+ * a subclass whose characteristic predicate is a unique singleton string.
+ * For example, write
+ *
+ * ```ql
+ * class MyAnalysisConfiguration extends DataFlow::Configuration {
+ * MyAnalysisConfiguration() { this = "MyAnalysisConfiguration" }
+ * // Override `isSource` and `isSink`.
+ * // Optionally override `isBarrier`.
+ * // Optionally override `isAdditionalFlowStep`.
+ * }
+ * ```
+ * Conceptually, this defines a graph where the nodes are `DataFlow::Node`s and
+ * the edges are those data-flow steps that preserve the value of the node
+ * along with any additional edges defined by `isAdditionalFlowStep`.
+ * Specifying nodes in `isBarrier` will remove those nodes from the graph, and
+ * specifying nodes in `isBarrierIn` and/or `isBarrierOut` will remove in-going
+ * and/or out-going edges from those nodes, respectively.
+ *
+ * Then, to query whether there is flow between some `source` and `sink`,
+ * write
+ *
+ * ```ql
+ * exists(MyAnalysisConfiguration cfg | cfg.hasFlow(source, sink))
+ * ```
+ *
+ * Multiple configurations can coexist, but two classes extending
+ * `DataFlow::Configuration` should never depend on each other. One of them
+ * should instead depend on a `DataFlow2::Configuration`, a
+ * `DataFlow3::Configuration`, or a `DataFlow4::Configuration`.
+ */
+abstract class Configuration extends string {
+ bindingset[this]
+ Configuration() { any() }
+
+ /**
+ * Holds if `source` is a relevant data flow source.
+ */
+ abstract predicate isSource(Node source);
+
+ /**
+ * Holds if `sink` is a relevant data flow sink.
+ */
+ abstract predicate isSink(Node sink);
+
+ /**
+ * Holds if data flow through `node` is prohibited. This completely removes
+ * `node` from the data flow graph.
+ */
+ predicate isBarrier(Node node) { none() }
+
+ /** Holds if data flow into `node` is prohibited. */
+ predicate isBarrierIn(Node node) { none() }
+
+ /** Holds if data flow out of `node` is prohibited. */
+ predicate isBarrierOut(Node node) { none() }
+
+ /** Holds if data flow through nodes guarded by `guard` is prohibited. */
+ predicate isBarrierGuard(BarrierGuard guard) { none() }
+
+ /**
+ * Holds if the additional flow step from `node1` to `node2` must be taken
+ * into account in the analysis.
+ */
+ predicate isAdditionalFlowStep(Node node1, Node node2) { none() }
+
+ /**
+ * Holds if an arbitrary number of implicit read steps of content `c` may be
+ * taken at `node`.
+ */
+ predicate allowImplicitRead(Node node, Content c) { none() }
+
+ /**
+ * Gets the virtual dispatch branching limit when calculating field flow.
+ * This can be overridden to a smaller value to improve performance (a
+ * value of 0 disables field flow), or a larger value to get more results.
+ */
+ int fieldFlowBranchLimit() { result = 2 }
+
+ /**
+ * Holds if data may flow from `source` to `sink` for this configuration.
+ */
+ predicate hasFlow(Node source, Node sink) { flowsTo(source, sink, this) }
+
+ /**
+ * Holds if data may flow from `source` to `sink` for this configuration.
+ *
+ * The corresponding paths are generated from the end-points and the graph
+ * included in the module `PathGraph`.
+ */
+ predicate hasFlowPath(PathNode source, PathNode sink) { flowsTo(source, sink, _, _, this) }
+
+ /**
+ * Holds if data may flow from some source to `sink` for this configuration.
+ */
+ predicate hasFlowTo(Node sink) { hasFlow(_, sink) }
+
+ /**
+ * Holds if data may flow from some source to `sink` for this configuration.
+ */
+ predicate hasFlowToExpr(DataFlowExpr sink) { hasFlowTo(exprNode(sink)) }
+
+ /**
+ * Gets the exploration limit for `hasPartialFlow` and `hasPartialFlowRev`
+ * measured in approximate number of interprocedural steps.
+ */
+ int explorationLimit() { none() }
+
+ /**
+ * Holds if there is a partial data flow path from `source` to `node`. The
+ * approximate distance between `node` and the closest source is `dist` and
+ * is restricted to be less than or equal to `explorationLimit()`. This
+ * predicate completely disregards sink definitions.
+ *
+ * This predicate is intended for data-flow exploration and debugging and may
+ * perform poorly if the number of sources is too big and/or the exploration
+ * limit is set too high without using barriers.
+ *
+ * This predicate is disabled (has no results) by default. Override
+ * `explorationLimit()` with a suitable number to enable this predicate.
+ *
+ * To use this in a `path-problem` query, import the module `PartialPathGraph`.
+ */
+ final predicate hasPartialFlow(PartialPathNode source, PartialPathNode node, int dist) {
+ partialFlow(source, node, this) and
+ dist = node.getSourceDistance()
+ }
+
+ /**
+ * Holds if there is a partial data flow path from `node` to `sink`. The
+ * approximate distance between `node` and the closest sink is `dist` and
+ * is restricted to be less than or equal to `explorationLimit()`. This
+ * predicate completely disregards source definitions.
+ *
+ * This predicate is intended for data-flow exploration and debugging and may
+ * perform poorly if the number of sinks is too big and/or the exploration
+ * limit is set too high without using barriers.
+ *
+ * This predicate is disabled (has no results) by default. Override
+ * `explorationLimit()` with a suitable number to enable this predicate.
+ *
+ * To use this in a `path-problem` query, import the module `PartialPathGraph`.
+ *
+ * Note that reverse flow has slightly lower precision than the corresponding
+ * forward flow, as reverse flow disregards type pruning among other features.
+ */
+ final predicate hasPartialFlowRev(PartialPathNode node, PartialPathNode sink, int dist) {
+ revPartialFlow(node, sink, this) and
+ dist = node.getSinkDistance()
+ }
+}
+
+/**
+ * This class exists to prevent mutual recursion between the user-overridden
+ * member predicates of `Configuration` and the rest of the data-flow library.
+ * Good performance cannot be guaranteed in the presence of such recursion, so
+ * it should be replaced by using more than one copy of the data flow library.
+ */
+abstract private class ConfigurationRecursionPrevention extends Configuration {
+ bindingset[this]
+ ConfigurationRecursionPrevention() { any() }
+
+ override predicate hasFlow(Node source, Node sink) {
+ strictcount(Node n | this.isSource(n)) < 0
+ or
+ strictcount(Node n | this.isSink(n)) < 0
+ or
+ strictcount(Node n1, Node n2 | this.isAdditionalFlowStep(n1, n2)) < 0
+ or
+ super.hasFlow(source, sink)
+ }
+}
+
+private newtype TNodeEx =
+ TNodeNormal(Node n) or
+ TNodeImplicitRead(Node n, boolean hasRead) {
+ any(Configuration c).allowImplicitRead(n, _) and hasRead = [false, true]
+ }
+
+private class NodeEx extends TNodeEx {
+ string toString() {
+ result = this.asNode().toString()
+ or
+ exists(Node n | this.isImplicitReadNode(n, _) | result = n.toString() + " [Ext]")
+ }
+
+ Node asNode() { this = TNodeNormal(result) }
+
+ predicate isImplicitReadNode(Node n, boolean hasRead) { this = TNodeImplicitRead(n, hasRead) }
+
+ Node projectToNode() { this = TNodeNormal(result) or this = TNodeImplicitRead(result, _) }
+
+ pragma[nomagic]
+ private DataFlowCallable getEnclosingCallable0() {
+ nodeEnclosingCallable(this.projectToNode(), result)
+ }
+
+ pragma[inline]
+ DataFlowCallable getEnclosingCallable() {
+ pragma[only_bind_out](this).getEnclosingCallable0() = pragma[only_bind_into](result)
+ }
+
+ pragma[nomagic]
+ private DataFlowType getDataFlowType0() { nodeDataFlowType(this.asNode(), result) }
+
+ pragma[inline]
+ DataFlowType getDataFlowType() {
+ pragma[only_bind_out](this).getDataFlowType0() = pragma[only_bind_into](result)
+ }
+
+ predicate hasLocationInfo(
+ string filepath, int startline, int startcolumn, int endline, int endcolumn
+ ) {
+ this.projectToNode().hasLocationInfo(filepath, startline, startcolumn, endline, endcolumn)
+ }
+}
+
+private class ArgNodeEx extends NodeEx {
+ ArgNodeEx() { this.asNode() instanceof ArgNode }
+}
+
+private class ParamNodeEx extends NodeEx {
+ ParamNodeEx() { this.asNode() instanceof ParamNode }
+
+ predicate isParameterOf(DataFlowCallable c, int i) {
+ this.asNode().(ParamNode).isParameterOf(c, i)
+ }
+
+ int getPosition() { this.isParameterOf(_, result) }
+}
+
+private class RetNodeEx extends NodeEx {
+ RetNodeEx() { this.asNode() instanceof ReturnNodeExt }
+
+ ReturnPosition getReturnPosition() { result = getReturnPosition(this.asNode()) }
+
+ ReturnKindExt getKind() { result = this.asNode().(ReturnNodeExt).getKind() }
+}
+
+private predicate inBarrier(NodeEx node, Configuration config) {
+ exists(Node n |
+ node.asNode() = n and
+ config.isBarrierIn(n) and
+ config.isSource(n)
+ )
+}
+
+private predicate outBarrier(NodeEx node, Configuration config) {
+ exists(Node n |
+ node.asNode() = n and
+ config.isBarrierOut(n) and
+ config.isSink(n)
+ )
+}
+
+private predicate fullBarrier(NodeEx node, Configuration config) {
+ exists(Node n | node.asNode() = n |
+ config.isBarrier(n)
+ or
+ config.isBarrierIn(n) and
+ not config.isSource(n)
+ or
+ config.isBarrierOut(n) and
+ not config.isSink(n)
+ or
+ exists(BarrierGuard g |
+ config.isBarrierGuard(g) and
+ n = g.getAGuardedNode()
+ )
+ )
+}
+
+pragma[nomagic]
+private predicate sourceNode(NodeEx node, Configuration config) { config.isSource(node.asNode()) }
+
+pragma[nomagic]
+private predicate sinkNode(NodeEx node, Configuration config) { config.isSink(node.asNode()) }
+
+/**
+ * Holds if data can flow in one local step from `node1` to `node2`.
+ */
+private predicate localFlowStep(NodeEx node1, NodeEx node2, Configuration config) {
+ exists(Node n1, Node n2 |
+ node1.asNode() = n1 and
+ node2.asNode() = n2 and
+ simpleLocalFlowStepExt(n1, n2) and
+ not outBarrier(node1, config) and
+ not inBarrier(node2, config) and
+ not fullBarrier(node1, config) and
+ not fullBarrier(node2, config)
+ )
+ or
+ exists(Node n |
+ config.allowImplicitRead(n, _) and
+ node1.asNode() = n and
+ node2.isImplicitReadNode(n, false)
+ )
+}
+
+/**
+ * Holds if the additional step from `node1` to `node2` does not jump between callables.
+ */
+private predicate additionalLocalFlowStep(NodeEx node1, NodeEx node2, Configuration config) {
+ exists(Node n1, Node n2 |
+ node1.asNode() = n1 and
+ node2.asNode() = n2 and
+ config.isAdditionalFlowStep(n1, n2) and
+ getNodeEnclosingCallable(n1) = getNodeEnclosingCallable(n2) and
+ not outBarrier(node1, config) and
+ not inBarrier(node2, config) and
+ not fullBarrier(node1, config) and
+ not fullBarrier(node2, config)
+ )
+ or
+ exists(Node n |
+ config.allowImplicitRead(n, _) and
+ node1.isImplicitReadNode(n, true) and
+ node2.asNode() = n
+ )
+}
+
+/**
+ * Holds if data can flow from `node1` to `node2` in a way that discards call contexts.
+ */
+private predicate jumpStep(NodeEx node1, NodeEx node2, Configuration config) {
+ exists(Node n1, Node n2 |
+ node1.asNode() = n1 and
+ node2.asNode() = n2 and
+ jumpStepCached(n1, n2) and
+ not outBarrier(node1, config) and
+ not inBarrier(node2, config) and
+ not fullBarrier(node1, config) and
+ not fullBarrier(node2, config)
+ )
+}
+
+/**
+ * Holds if the additional step from `node1` to `node2` jumps between callables.
+ */
+private predicate additionalJumpStep(NodeEx node1, NodeEx node2, Configuration config) {
+ exists(Node n1, Node n2 |
+ node1.asNode() = n1 and
+ node2.asNode() = n2 and
+ config.isAdditionalFlowStep(n1, n2) and
+ getNodeEnclosingCallable(n1) != getNodeEnclosingCallable(n2) and
+ not outBarrier(node1, config) and
+ not inBarrier(node2, config) and
+ not fullBarrier(node1, config) and
+ not fullBarrier(node2, config)
+ )
+}
+
+private predicate read(NodeEx node1, Content c, NodeEx node2, Configuration config) {
+ read(node1.asNode(), c, node2.asNode())
+ or
+ exists(Node n |
+ node2.isImplicitReadNode(n, true) and
+ node1.isImplicitReadNode(n, _) and
+ config.allowImplicitRead(n, c)
+ )
+}
+
+private predicate store(
+ NodeEx node1, TypedContent tc, NodeEx node2, DataFlowType contentType, Configuration config
+) {
+ store(node1.asNode(), tc, node2.asNode(), contentType) and
+ read(_, tc.getContent(), _, config)
+}
+
+pragma[nomagic]
+private predicate viableReturnPosOutEx(DataFlowCall call, ReturnPosition pos, NodeEx out) {
+ viableReturnPosOut(call, pos, out.asNode())
+}
+
+pragma[nomagic]
+private predicate viableParamArgEx(DataFlowCall call, ParamNodeEx p, ArgNodeEx arg) {
+ viableParamArg(call, p.asNode(), arg.asNode())
+}
+
+/**
+ * Holds if field flow should be used for the given configuration.
+ */
+private predicate useFieldFlow(Configuration config) { config.fieldFlowBranchLimit() >= 1 }
+
+private module Stage1 {
+ class ApApprox = Unit;
+
+ class Ap = Unit;
+
+ class ApOption = Unit;
+
+ class Cc = boolean;
+
+ /* Begin: Stage 1 logic. */
+ /**
+ * Holds if `node` is reachable from a source in the configuration `config`.
+ *
+ * The Boolean `cc` records whether the node is reached through an
+ * argument in a call.
+ */
+ predicate fwdFlow(NodeEx node, Cc cc, Configuration config) {
+ not fullBarrier(node, config) and
+ (
+ sourceNode(node, config) and
+ cc = false
+ or
+ exists(NodeEx mid |
+ fwdFlow(mid, cc, config) and
+ localFlowStep(mid, node, config)
+ )
+ or
+ exists(NodeEx mid |
+ fwdFlow(mid, cc, config) and
+ additionalLocalFlowStep(mid, node, config)
+ )
+ or
+ exists(NodeEx mid |
+ fwdFlow(mid, _, config) and
+ jumpStep(mid, node, config) and
+ cc = false
+ )
+ or
+ exists(NodeEx mid |
+ fwdFlow(mid, _, config) and
+ additionalJumpStep(mid, node, config) and
+ cc = false
+ )
+ or
+ // store
+ exists(NodeEx mid |
+ useFieldFlow(config) and
+ fwdFlow(mid, cc, config) and
+ store(mid, _, node, _, config) and
+ not outBarrier(mid, config)
+ )
+ or
+ // read
+ exists(Content c |
+ fwdFlowRead(c, node, cc, config) and
+ fwdFlowConsCand(c, config) and
+ not inBarrier(node, config)
+ )
+ or
+ // flow into a callable
+ exists(NodeEx arg |
+ fwdFlow(arg, _, config) and
+ viableParamArgEx(_, node, arg) and
+ cc = true
+ )
+ or
+ // flow out of a callable
+ exists(DataFlowCall call |
+ fwdFlowOut(call, node, false, config) and
+ cc = false
+ or
+ fwdFlowOutFromArg(call, node, config) and
+ fwdFlowIsEntered(call, cc, config)
+ )
+ )
+ }
+
+ private predicate fwdFlow(NodeEx node, Configuration config) { fwdFlow(node, _, config) }
+
+ pragma[nomagic]
+ private predicate fwdFlowRead(Content c, NodeEx node, Cc cc, Configuration config) {
+ exists(NodeEx mid |
+ fwdFlow(mid, cc, config) and
+ read(mid, c, node, config)
+ )
+ }
+
+ /**
+ * Holds if `c` is the target of a store in the flow covered by `fwdFlow`.
+ */
+ pragma[nomagic]
+ private predicate fwdFlowConsCand(Content c, Configuration config) {
+ exists(NodeEx mid, NodeEx node, TypedContent tc |
+ not fullBarrier(node, config) and
+ useFieldFlow(config) and
+ fwdFlow(mid, _, config) and
+ store(mid, tc, node, _, config) and
+ c = tc.getContent()
+ )
+ }
+
+ pragma[nomagic]
+ private predicate fwdFlowReturnPosition(ReturnPosition pos, Cc cc, Configuration config) {
+ exists(RetNodeEx ret |
+ fwdFlow(ret, cc, config) and
+ ret.getReturnPosition() = pos
+ )
+ }
+
+ pragma[nomagic]
+ private predicate fwdFlowOut(DataFlowCall call, NodeEx out, Cc cc, Configuration config) {
+ exists(ReturnPosition pos |
+ fwdFlowReturnPosition(pos, cc, config) and
+ viableReturnPosOutEx(call, pos, out)
+ )
+ }
+
+ pragma[nomagic]
+ private predicate fwdFlowOutFromArg(DataFlowCall call, NodeEx out, Configuration config) {
+ fwdFlowOut(call, out, true, config)
+ }
+
+ /**
+ * Holds if an argument to `call` is reached in the flow covered by `fwdFlow`.
+ */
+ pragma[nomagic]
+ private predicate fwdFlowIsEntered(DataFlowCall call, Cc cc, Configuration config) {
+ exists(ArgNodeEx arg |
+ fwdFlow(arg, cc, config) and
+ viableParamArgEx(call, _, arg)
+ )
+ }
+
+ /**
+ * Holds if `node` is part of a path from a source to a sink in the
+ * configuration `config`.
+ *
+ * The Boolean `toReturn` records whether the node must be returned from
+ * the enclosing callable in order to reach a sink.
+ */
+ pragma[nomagic]
+ predicate revFlow(NodeEx node, boolean toReturn, Configuration config) {
+ revFlow0(node, toReturn, config) and
+ fwdFlow(node, config)
+ }
+
+ pragma[nomagic]
+ private predicate revFlow0(NodeEx node, boolean toReturn, Configuration config) {
+ fwdFlow(node, config) and
+ sinkNode(node, config) and
+ toReturn = false
+ or
+ exists(NodeEx mid |
+ localFlowStep(node, mid, config) and
+ revFlow(mid, toReturn, config)
+ )
+ or
+ exists(NodeEx mid |
+ additionalLocalFlowStep(node, mid, config) and
+ revFlow(mid, toReturn, config)
+ )
+ or
+ exists(NodeEx mid |
+ jumpStep(node, mid, config) and
+ revFlow(mid, _, config) and
+ toReturn = false
+ )
+ or
+ exists(NodeEx mid |
+ additionalJumpStep(node, mid, config) and
+ revFlow(mid, _, config) and
+ toReturn = false
+ )
+ or
+ // store
+ exists(Content c |
+ revFlowStore(c, node, toReturn, config) and
+ revFlowConsCand(c, config)
+ )
+ or
+ // read
+ exists(NodeEx mid, Content c |
+ read(node, c, mid, config) and
+ fwdFlowConsCand(c, pragma[only_bind_into](config)) and
+ revFlow(mid, toReturn, pragma[only_bind_into](config))
+ )
+ or
+ // flow into a callable
+ exists(DataFlowCall call |
+ revFlowIn(call, node, false, config) and
+ toReturn = false
+ or
+ revFlowInToReturn(call, node, config) and
+ revFlowIsReturned(call, toReturn, config)
+ )
+ or
+ // flow out of a callable
+ exists(ReturnPosition pos |
+ revFlowOut(pos, config) and
+ node.(RetNodeEx).getReturnPosition() = pos and
+ toReturn = true
+ )
+ }
+
+ /**
+ * Holds if `c` is the target of a read in the flow covered by `revFlow`.
+ */
+ pragma[nomagic]
+ private predicate revFlowConsCand(Content c, Configuration config) {
+ exists(NodeEx mid, NodeEx node |
+ fwdFlow(node, pragma[only_bind_into](config)) and
+ read(node, c, mid, config) and
+ fwdFlowConsCand(c, pragma[only_bind_into](config)) and
+ revFlow(pragma[only_bind_into](mid), _, pragma[only_bind_into](config))
+ )
+ }
+
+ pragma[nomagic]
+ private predicate revFlowStore(Content c, NodeEx node, boolean toReturn, Configuration config) {
+ exists(NodeEx mid, TypedContent tc |
+ revFlow(mid, toReturn, pragma[only_bind_into](config)) and
+ fwdFlowConsCand(c, pragma[only_bind_into](config)) and
+ store(node, tc, mid, _, config) and
+ c = tc.getContent()
+ )
+ }
+
+ /**
+ * Holds if `c` is the target of both a read and a store in the flow covered
+ * by `revFlow`.
+ */
+ private predicate revFlowIsReadAndStored(Content c, Configuration conf) {
+ revFlowConsCand(c, conf) and
+ revFlowStore(c, _, _, conf)
+ }
+
+ pragma[nomagic]
+ predicate viableReturnPosOutNodeCandFwd1(
+ DataFlowCall call, ReturnPosition pos, NodeEx out, Configuration config
+ ) {
+ fwdFlowReturnPosition(pos, _, config) and
+ viableReturnPosOutEx(call, pos, out)
+ }
+
+ pragma[nomagic]
+ private predicate revFlowOut(ReturnPosition pos, Configuration config) {
+ exists(DataFlowCall call, NodeEx out |
+ revFlow(out, _, config) and
+ viableReturnPosOutNodeCandFwd1(call, pos, out, config)
+ )
+ }
+
+ pragma[nomagic]
+ predicate viableParamArgNodeCandFwd1(
+ DataFlowCall call, ParamNodeEx p, ArgNodeEx arg, Configuration config
+ ) {
+ viableParamArgEx(call, p, arg) and
+ fwdFlow(arg, config)
+ }
+
+ pragma[nomagic]
+ private predicate revFlowIn(
+ DataFlowCall call, ArgNodeEx arg, boolean toReturn, Configuration config
+ ) {
+ exists(ParamNodeEx p |
+ revFlow(p, toReturn, config) and
+ viableParamArgNodeCandFwd1(call, p, arg, config)
+ )
+ }
+
+ pragma[nomagic]
+ private predicate revFlowInToReturn(DataFlowCall call, ArgNodeEx arg, Configuration config) {
+ revFlowIn(call, arg, true, config)
+ }
+
+ /**
+ * Holds if an output from `call` is reached in the flow covered by `revFlow`
+ * and data might flow through the target callable resulting in reverse flow
+ * reaching an argument of `call`.
+ */
+ pragma[nomagic]
+ private predicate revFlowIsReturned(DataFlowCall call, boolean toReturn, Configuration config) {
+ exists(NodeEx out |
+ revFlow(out, toReturn, config) and
+ fwdFlowOutFromArg(call, out, config)
+ )
+ }
+
+ pragma[nomagic]
+ predicate storeStepCand(
+ NodeEx node1, Ap ap1, TypedContent tc, NodeEx node2, DataFlowType contentType,
+ Configuration config
+ ) {
+ exists(Content c |
+ revFlowIsReadAndStored(c, pragma[only_bind_into](config)) and
+ revFlow(node2, pragma[only_bind_into](config)) and
+ store(node1, tc, node2, contentType, config) and
+ c = tc.getContent() and
+ exists(ap1)
+ )
+ }
+
+ pragma[nomagic]
+ predicate readStepCand(NodeEx n1, Content c, NodeEx n2, Configuration config) {
+ revFlowIsReadAndStored(c, pragma[only_bind_into](config)) and
+ revFlow(n2, pragma[only_bind_into](config)) and
+ read(n1, c, n2, pragma[only_bind_into](config))
+ }
+
+ pragma[nomagic]
+ predicate revFlow(NodeEx node, Configuration config) { revFlow(node, _, config) }
+
+ predicate revFlow(NodeEx node, boolean toReturn, ApOption returnAp, Ap ap, Configuration config) {
+ revFlow(node, toReturn, config) and exists(returnAp) and exists(ap)
+ }
+
+ private predicate throughFlowNodeCand(NodeEx node, Configuration config) {
+ revFlow(node, true, config) and
+ fwdFlow(node, true, config) and
+ not inBarrier(node, config) and
+ not outBarrier(node, config)
+ }
+
+ /** Holds if flow may return from `callable`. */
+ pragma[nomagic]
+ private predicate returnFlowCallableNodeCand(
+ DataFlowCallable callable, ReturnKindExt kind, Configuration config
+ ) {
+ exists(RetNodeEx ret |
+ throughFlowNodeCand(ret, config) and
+ callable = ret.getEnclosingCallable() and
+ kind = ret.getKind()
+ )
+ }
+
+ /**
+ * Holds if flow may enter through `p` and reach a return node making `p` a
+ * candidate for the origin of a summary.
+ */
+ predicate parameterMayFlowThrough(ParamNodeEx p, DataFlowCallable c, Ap ap, Configuration config) {
+ exists(ReturnKindExt kind |
+ throughFlowNodeCand(p, config) and
+ returnFlowCallableNodeCand(c, kind, config) and
+ p.getEnclosingCallable() = c and
+ exists(ap) and
+ // we don't expect a parameter to return stored in itself
+ not kind.(ParamUpdateReturnKind).getPosition() = p.getPosition()
+ )
+ }
+
+ pragma[nomagic]
+ predicate callMayFlowThroughRev(DataFlowCall call, Configuration config) {
+ exists(ArgNodeEx arg, boolean toReturn |
+ revFlow(arg, toReturn, config) and
+ revFlowInToReturn(call, arg, config) and
+ revFlowIsReturned(call, toReturn, config)
+ )
+ }
+
+ predicate stats(boolean fwd, int nodes, int fields, int conscand, int tuples, Configuration config) {
+ fwd = true and
+ nodes = count(NodeEx node | fwdFlow(node, config)) and
+ fields = count(Content f0 | fwdFlowConsCand(f0, config)) and
+ conscand = -1 and
+ tuples = count(NodeEx n, boolean b | fwdFlow(n, b, config))
+ or
+ fwd = false and
+ nodes = count(NodeEx node | revFlow(node, _, config)) and
+ fields = count(Content f0 | revFlowConsCand(f0, config)) and
+ conscand = -1 and
+ tuples = count(NodeEx n, boolean b | revFlow(n, b, config))
+ }
+ /* End: Stage 1 logic. */
+}
+
+pragma[noinline]
+private predicate localFlowStepNodeCand1(NodeEx node1, NodeEx node2, Configuration config) {
+ Stage1::revFlow(node2, config) and
+ localFlowStep(node1, node2, config)
+}
+
+pragma[noinline]
+private predicate additionalLocalFlowStepNodeCand1(NodeEx node1, NodeEx node2, Configuration config) {
+ Stage1::revFlow(node2, config) and
+ additionalLocalFlowStep(node1, node2, config)
+}
+
+pragma[nomagic]
+private predicate viableReturnPosOutNodeCand1(
+ DataFlowCall call, ReturnPosition pos, NodeEx out, Configuration config
+) {
+ Stage1::revFlow(out, config) and
+ Stage1::viableReturnPosOutNodeCandFwd1(call, pos, out, config)
+}
+
+/**
+ * Holds if data can flow out of `call` from `ret` to `out`, either
+ * through a `ReturnNode` or through an argument that has been mutated, and
+ * that this step is part of a path from a source to a sink.
+ */
+pragma[nomagic]
+private predicate flowOutOfCallNodeCand1(
+ DataFlowCall call, RetNodeEx ret, NodeEx out, Configuration config
+) {
+ viableReturnPosOutNodeCand1(call, ret.getReturnPosition(), out, config) and
+ Stage1::revFlow(ret, config) and
+ not outBarrier(ret, config) and
+ not inBarrier(out, config)
+}
+
+pragma[nomagic]
+private predicate viableParamArgNodeCand1(
+ DataFlowCall call, ParamNodeEx p, ArgNodeEx arg, Configuration config
+) {
+ Stage1::viableParamArgNodeCandFwd1(call, p, arg, config) and
+ Stage1::revFlow(arg, config)
+}
+
+/**
+ * Holds if data can flow into `call` and that this step is part of a
+ * path from a source to a sink.
+ */
+pragma[nomagic]
+private predicate flowIntoCallNodeCand1(
+ DataFlowCall call, ArgNodeEx arg, ParamNodeEx p, Configuration config
+) {
+ viableParamArgNodeCand1(call, p, arg, config) and
+ Stage1::revFlow(p, config) and
+ not outBarrier(arg, config) and
+ not inBarrier(p, config)
+}
+
+/**
+ * Gets the amount of forward branching on the origin of a cross-call path
+ * edge in the graph of paths between sources and sinks that ignores call
+ * contexts.
+ */
+private int branch(NodeEx n1, Configuration conf) {
+ result =
+ strictcount(NodeEx n |
+ flowOutOfCallNodeCand1(_, n1, n, conf) or flowIntoCallNodeCand1(_, n1, n, conf)
+ )
+}
+
+/**
+ * Gets the amount of backward branching on the target of a cross-call path
+ * edge in the graph of paths between sources and sinks that ignores call
+ * contexts.
+ */
+private int join(NodeEx n2, Configuration conf) {
+ result =
+ strictcount(NodeEx n |
+ flowOutOfCallNodeCand1(_, n, n2, conf) or flowIntoCallNodeCand1(_, n, n2, conf)
+ )
+}
+
+/**
+ * Holds if data can flow out of `call` from `ret` to `out`, either
+ * through a `ReturnNode` or through an argument that has been mutated, and
+ * that this step is part of a path from a source to a sink. The
+ * `allowsFieldFlow` flag indicates whether the branching is within the limit
+ * specified by the configuration.
+ */
+pragma[nomagic]
+private predicate flowOutOfCallNodeCand1(
+ DataFlowCall call, RetNodeEx ret, NodeEx out, boolean allowsFieldFlow, Configuration config
+) {
+ flowOutOfCallNodeCand1(call, ret, out, config) and
+ exists(int b, int j |
+ b = branch(ret, config) and
+ j = join(out, config) and
+ if b.minimum(j) <= config.fieldFlowBranchLimit()
+ then allowsFieldFlow = true
+ else allowsFieldFlow = false
+ )
+}
+
+/**
+ * Holds if data can flow into `call` and that this step is part of a
+ * path from a source to a sink. The `allowsFieldFlow` flag indicates whether
+ * the branching is within the limit specified by the configuration.
+ */
+pragma[nomagic]
+private predicate flowIntoCallNodeCand1(
+ DataFlowCall call, ArgNodeEx arg, ParamNodeEx p, boolean allowsFieldFlow, Configuration config
+) {
+ flowIntoCallNodeCand1(call, arg, p, config) and
+ exists(int b, int j |
+ b = branch(arg, config) and
+ j = join(p, config) and
+ if b.minimum(j) <= config.fieldFlowBranchLimit()
+ then allowsFieldFlow = true
+ else allowsFieldFlow = false
+ )
+}
+
+private module Stage2 {
+ module PrevStage = Stage1;
+
+ class ApApprox = PrevStage::Ap;
+
+ class Ap = boolean;
+
+ class ApNil extends Ap {
+ ApNil() { this = false }
+ }
+
+ bindingset[result, ap]
+ private ApApprox getApprox(Ap ap) { any() }
+
+ private ApNil getApNil(NodeEx node) { PrevStage::revFlow(node, _) and exists(result) }
+
+ bindingset[tc, tail]
+ private Ap apCons(TypedContent tc, Ap tail) { result = true and exists(tc) and exists(tail) }
+
+ pragma[inline]
+ private Content getHeadContent(Ap ap) { exists(result) and ap = true }
+
+ class ApOption = BooleanOption;
+
+ ApOption apNone() { result = TBooleanNone() }
+
+ ApOption apSome(Ap ap) { result = TBooleanSome(ap) }
+
+ class Cc = CallContext;
+
+ class CcCall = CallContextCall;
+
+ class CcNoCall = CallContextNoCall;
+
+ Cc ccNone() { result instanceof CallContextAny }
+
+ private class LocalCc = Unit;
+
+ bindingset[call, c, outercc]
+ private CcCall getCallContextCall(DataFlowCall call, DataFlowCallable c, Cc outercc) {
+ checkCallContextCall(outercc, call, c) and
+ if recordDataFlowCallSiteDispatch(call, c)
+ then result = TSpecificCall(call)
+ else result = TSomeCall()
+ }
+
+ bindingset[call, c, innercc]
+ private CcNoCall getCallContextReturn(DataFlowCallable c, DataFlowCall call, Cc innercc) {
+ checkCallContextReturn(innercc, c, call) and
+ if reducedViableImplInReturn(c, call) then result = TReturn(c, call) else result = ccNone()
+ }
+
+ bindingset[node, cc, config]
+ private LocalCc getLocalCc(NodeEx node, Cc cc, Configuration config) { any() }
+
+ private predicate localStep(
+ NodeEx node1, NodeEx node2, boolean preservesValue, ApNil ap, Configuration config, LocalCc lcc
+ ) {
+ (
+ preservesValue = true and
+ localFlowStepNodeCand1(node1, node2, config)
+ or
+ preservesValue = false and
+ additionalLocalFlowStepNodeCand1(node1, node2, config)
+ ) and
+ exists(ap) and
+ exists(lcc)
+ }
+
+ private predicate flowOutOfCall = flowOutOfCallNodeCand1/5;
+
+ private predicate flowIntoCall = flowIntoCallNodeCand1/5;
+
+ bindingset[ap, contentType]
+ private predicate typecheckStore(Ap ap, DataFlowType contentType) { any() }
+
+ /* Begin: Stage 2 logic. */
+ private predicate flowCand(NodeEx node, ApApprox apa, Configuration config) {
+ PrevStage::revFlow(node, _, _, apa, config)
+ }
+
+ pragma[nomagic]
+ private predicate flowThroughOutOfCall(
+ DataFlowCall call, RetNodeEx ret, NodeEx out, boolean allowsFieldFlow, Configuration config
+ ) {
+ flowOutOfCall(call, ret, out, allowsFieldFlow, pragma[only_bind_into](config)) and
+ PrevStage::callMayFlowThroughRev(call, pragma[only_bind_into](config)) and
+ PrevStage::parameterMayFlowThrough(_, ret.getEnclosingCallable(), _,
+ pragma[only_bind_into](config))
+ }
+
+ /**
+ * Holds if `node` is reachable with access path `ap` from a source in the
+ * configuration `config`.
+ *
+ * The call context `cc` records whether the node is reached through an
+ * argument in a call, and if so, `argAp` records the access path of that
+ * argument.
+ */
+ pragma[nomagic]
+ predicate fwdFlow(NodeEx node, Cc cc, ApOption argAp, Ap ap, Configuration config) {
+ flowCand(node, _, config) and
+ sourceNode(node, config) and
+ cc = ccNone() and
+ argAp = apNone() and
+ ap = getApNil(node)
+ or
+ exists(NodeEx mid, Ap ap0, LocalCc localCc |
+ fwdFlow(mid, cc, argAp, ap0, config) and
+ localCc = getLocalCc(mid, cc, config)
+ |
+ localStep(mid, node, true, _, config, localCc) and
+ ap = ap0
+ or
+ localStep(mid, node, false, ap, config, localCc) and
+ ap0 instanceof ApNil
+ )
+ or
+ exists(NodeEx mid |
+ fwdFlow(mid, _, _, ap, pragma[only_bind_into](config)) and
+ flowCand(node, _, pragma[only_bind_into](config)) and
+ jumpStep(mid, node, config) and
+ cc = ccNone() and
+ argAp = apNone()
+ )
+ or
+ exists(NodeEx mid, ApNil nil |
+ fwdFlow(mid, _, _, nil, pragma[only_bind_into](config)) and
+ flowCand(node, _, pragma[only_bind_into](config)) and
+ additionalJumpStep(mid, node, config) and
+ cc = ccNone() and
+ argAp = apNone() and
+ ap = getApNil(node)
+ )
+ or
+ // store
+ exists(TypedContent tc, Ap ap0 |
+ fwdFlowStore(_, ap0, tc, node, cc, argAp, config) and
+ ap = apCons(tc, ap0)
+ )
+ or
+ // read
+ exists(Ap ap0, Content c |
+ fwdFlowRead(ap0, c, _, node, cc, argAp, config) and
+ fwdFlowConsCand(ap0, c, ap, config)
+ )
+ or
+ // flow into a callable
+ exists(ApApprox apa |
+ fwdFlowIn(_, node, _, cc, _, ap, config) and
+ apa = getApprox(ap) and
+ if PrevStage::parameterMayFlowThrough(node, _, apa, config)
+ then argAp = apSome(ap)
+ else argAp = apNone()
+ )
+ or
+ // flow out of a callable
+ fwdFlowOutNotFromArg(node, cc, argAp, ap, config)
+ or
+ exists(DataFlowCall call, Ap argAp0 |
+ fwdFlowOutFromArg(call, node, argAp0, ap, config) and
+ fwdFlowIsEntered(call, cc, argAp, argAp0, config)
+ )
+ }
+
+ pragma[nomagic]
+ private predicate fwdFlowStore(
+ NodeEx node1, Ap ap1, TypedContent tc, NodeEx node2, Cc cc, ApOption argAp, Configuration config
+ ) {
+ exists(DataFlowType contentType |
+ fwdFlow(node1, cc, argAp, ap1, config) and
+ PrevStage::storeStepCand(node1, getApprox(ap1), tc, node2, contentType, config) and
+ typecheckStore(ap1, contentType)
+ )
+ }
+
+ /**
+ * Holds if forward flow with access path `tail` reaches a store of `c`
+ * resulting in access path `cons`.
+ */
+ pragma[nomagic]
+ private predicate fwdFlowConsCand(Ap cons, Content c, Ap tail, Configuration config) {
+ exists(TypedContent tc |
+ fwdFlowStore(_, tail, tc, _, _, _, config) and
+ tc.getContent() = c and
+ cons = apCons(tc, tail)
+ )
+ }
+
+ pragma[nomagic]
+ private predicate fwdFlowRead(
+ Ap ap, Content c, NodeEx node1, NodeEx node2, Cc cc, ApOption argAp, Configuration config
+ ) {
+ fwdFlow(node1, cc, argAp, ap, config) and
+ PrevStage::readStepCand(node1, c, node2, config) and
+ getHeadContent(ap) = c
+ }
+
+ pragma[nomagic]
+ private predicate fwdFlowIn(
+ DataFlowCall call, ParamNodeEx p, Cc outercc, Cc innercc, ApOption argAp, Ap ap,
+ Configuration config
+ ) {
+ exists(ArgNodeEx arg, boolean allowsFieldFlow |
+ fwdFlow(arg, outercc, argAp, ap, config) and
+ flowIntoCall(call, arg, p, allowsFieldFlow, config) and
+ innercc = getCallContextCall(call, p.getEnclosingCallable(), outercc)
+ |
+ ap instanceof ApNil or allowsFieldFlow = true
+ )
+ }
+
+ pragma[nomagic]
+ private predicate fwdFlowOutNotFromArg(
+ NodeEx out, Cc ccOut, ApOption argAp, Ap ap, Configuration config
+ ) {
+ exists(
+ DataFlowCall call, RetNodeEx ret, boolean allowsFieldFlow, CcNoCall innercc,
+ DataFlowCallable inner
+ |
+ fwdFlow(ret, innercc, argAp, ap, config) and
+ flowOutOfCall(call, ret, out, allowsFieldFlow, config) and
+ inner = ret.getEnclosingCallable() and
+ ccOut = getCallContextReturn(inner, call, innercc)
+ |
+ ap instanceof ApNil or allowsFieldFlow = true
+ )
+ }
+
+ pragma[nomagic]
+ private predicate fwdFlowOutFromArg(
+ DataFlowCall call, NodeEx out, Ap argAp, Ap ap, Configuration config
+ ) {
+ exists(RetNodeEx ret, boolean allowsFieldFlow, CcCall ccc |
+ fwdFlow(ret, ccc, apSome(argAp), ap, config) and
+ flowThroughOutOfCall(call, ret, out, allowsFieldFlow, config) and
+ ccc.matchesCall(call)
+ |
+ ap instanceof ApNil or allowsFieldFlow = true
+ )
+ }
+
+ /**
+ * Holds if an argument to `call` is reached in the flow covered by `fwdFlow`
+ * and data might flow through the target callable and back out at `call`.
+ */
+ pragma[nomagic]
+ private predicate fwdFlowIsEntered(
+ DataFlowCall call, Cc cc, ApOption argAp, Ap ap, Configuration config
+ ) {
+ exists(ParamNodeEx p |
+ fwdFlowIn(call, p, cc, _, argAp, ap, config) and
+ PrevStage::parameterMayFlowThrough(p, _, getApprox(ap), config)
+ )
+ }
+
+ pragma[nomagic]
+ private predicate storeStepFwd(
+ NodeEx node1, Ap ap1, TypedContent tc, NodeEx node2, Ap ap2, Configuration config
+ ) {
+ fwdFlowStore(node1, ap1, tc, node2, _, _, config) and
+ ap2 = apCons(tc, ap1) and
+ fwdFlowRead(ap2, tc.getContent(), _, _, _, _, config)
+ }
+
+ private predicate readStepFwd(
+ NodeEx n1, Ap ap1, Content c, NodeEx n2, Ap ap2, Configuration config
+ ) {
+ fwdFlowRead(ap1, c, n1, n2, _, _, config) and
+ fwdFlowConsCand(ap1, c, ap2, config)
+ }
+
+ pragma[nomagic]
+ private predicate callMayFlowThroughFwd(DataFlowCall call, Configuration config) {
+ exists(Ap argAp0, NodeEx out, Cc cc, ApOption argAp, Ap ap |
+ fwdFlow(out, pragma[only_bind_into](cc), pragma[only_bind_into](argAp), ap,
+ pragma[only_bind_into](config)) and
+ fwdFlowOutFromArg(call, out, argAp0, ap, config) and
+ fwdFlowIsEntered(pragma[only_bind_into](call), pragma[only_bind_into](cc),
+ pragma[only_bind_into](argAp), pragma[only_bind_into](argAp0),
+ pragma[only_bind_into](config))
+ )
+ }
+
+ pragma[nomagic]
+ private predicate flowThroughIntoCall(
+ DataFlowCall call, ArgNodeEx arg, ParamNodeEx p, boolean allowsFieldFlow, Configuration config
+ ) {
+ flowIntoCall(call, arg, p, allowsFieldFlow, config) and
+ fwdFlow(arg, _, _, _, pragma[only_bind_into](config)) and
+ PrevStage::parameterMayFlowThrough(p, _, _, pragma[only_bind_into](config)) and
+ callMayFlowThroughFwd(call, pragma[only_bind_into](config))
+ }
+
+ /**
+ * Holds if `node` with access path `ap` is part of a path from a source to a
+ * sink in the configuration `config`.
+ *
+ * The Boolean `toReturn` records whether the node must be returned from the
+ * enclosing callable in order to reach a sink, and if so, `returnAp` records
+ * the access path of the returned value.
+ */
+ pragma[nomagic]
+ predicate revFlow(NodeEx node, boolean toReturn, ApOption returnAp, Ap ap, Configuration config) {
+ revFlow0(node, toReturn, returnAp, ap, config) and
+ fwdFlow(node, _, _, ap, config)
+ }
+
+ pragma[nomagic]
+ private predicate revFlow0(
+ NodeEx node, boolean toReturn, ApOption returnAp, Ap ap, Configuration config
+ ) {
+ fwdFlow(node, _, _, ap, config) and
+ sinkNode(node, config) and
+ toReturn = false and
+ returnAp = apNone() and
+ ap instanceof ApNil
+ or
+ exists(NodeEx mid |
+ localStep(node, mid, true, _, config, _) and
+ revFlow(mid, toReturn, returnAp, ap, config)
+ )
+ or
+ exists(NodeEx mid, ApNil nil |
+ fwdFlow(node, _, _, ap, pragma[only_bind_into](config)) and
+ localStep(node, mid, false, _, config, _) and
+ revFlow(mid, toReturn, returnAp, nil, pragma[only_bind_into](config)) and
+ ap instanceof ApNil
+ )
+ or
+ exists(NodeEx mid |
+ jumpStep(node, mid, config) and
+ revFlow(mid, _, _, ap, config) and
+ toReturn = false and
+ returnAp = apNone()
+ )
+ or
+ exists(NodeEx mid, ApNil nil |
+ fwdFlow(node, _, _, ap, pragma[only_bind_into](config)) and
+ additionalJumpStep(node, mid, config) and
+ revFlow(pragma[only_bind_into](mid), _, _, nil, pragma[only_bind_into](config)) and
+ toReturn = false and
+ returnAp = apNone() and
+ ap instanceof ApNil
+ )
+ or
+ // store
+ exists(Ap ap0, Content c |
+ revFlowStore(ap0, c, ap, node, _, _, toReturn, returnAp, config) and
+ revFlowConsCand(ap0, c, ap, config)
+ )
+ or
+ // read
+ exists(NodeEx mid, Ap ap0 |
+ revFlow(mid, toReturn, returnAp, ap0, config) and
+ readStepFwd(node, ap, _, mid, ap0, config)
+ )
+ or
+ // flow into a callable
+ revFlowInNotToReturn(node, returnAp, ap, config) and
+ toReturn = false
+ or
+ exists(DataFlowCall call, Ap returnAp0 |
+ revFlowInToReturn(call, node, returnAp0, ap, config) and
+ revFlowIsReturned(call, toReturn, returnAp, returnAp0, config)
+ )
+ or
+ // flow out of a callable
+ revFlowOut(_, node, _, _, ap, config) and
+ toReturn = true and
+ if fwdFlow(node, any(CcCall ccc), apSome(_), ap, config)
+ then returnAp = apSome(ap)
+ else returnAp = apNone()
+ }
+
+ pragma[nomagic]
+ private predicate revFlowStore(
+ Ap ap0, Content c, Ap ap, NodeEx node, TypedContent tc, NodeEx mid, boolean toReturn,
+ ApOption returnAp, Configuration config
+ ) {
+ revFlow(mid, toReturn, returnAp, ap0, config) and
+ storeStepFwd(node, ap, tc, mid, ap0, config) and
+ tc.getContent() = c
+ }
+
+ /**
+ * Holds if reverse flow with access path `tail` reaches a read of `c`
+ * resulting in access path `cons`.
+ */
+ pragma[nomagic]
+ private predicate revFlowConsCand(Ap cons, Content c, Ap tail, Configuration config) {
+ exists(NodeEx mid, Ap tail0 |
+ revFlow(mid, _, _, tail, config) and
+ tail = pragma[only_bind_into](tail0) and
+ readStepFwd(_, cons, c, mid, tail0, config)
+ )
+ }
+
+ pragma[nomagic]
+ private predicate revFlowOut(
+ DataFlowCall call, RetNodeEx ret, boolean toReturn, ApOption returnAp, Ap ap,
+ Configuration config
+ ) {
+ exists(NodeEx out, boolean allowsFieldFlow |
+ revFlow(out, toReturn, returnAp, ap, config) and
+ flowOutOfCall(call, ret, out, allowsFieldFlow, config)
+ |
+ ap instanceof ApNil or allowsFieldFlow = true
+ )
+ }
+
+ pragma[nomagic]
+ private predicate revFlowInNotToReturn(
+ ArgNodeEx arg, ApOption returnAp, Ap ap, Configuration config
+ ) {
+ exists(ParamNodeEx p, boolean allowsFieldFlow |
+ revFlow(p, false, returnAp, ap, config) and
+ flowIntoCall(_, arg, p, allowsFieldFlow, config)
+ |
+ ap instanceof ApNil or allowsFieldFlow = true
+ )
+ }
+
+ pragma[nomagic]
+ private predicate revFlowInToReturn(
+ DataFlowCall call, ArgNodeEx arg, Ap returnAp, Ap ap, Configuration config
+ ) {
+ exists(ParamNodeEx p, boolean allowsFieldFlow |
+ revFlow(p, true, apSome(returnAp), ap, config) and
+ flowThroughIntoCall(call, arg, p, allowsFieldFlow, config)
+ |
+ ap instanceof ApNil or allowsFieldFlow = true
+ )
+ }
+
+ /**
+ * Holds if an output from `call` is reached in the flow covered by `revFlow`
+ * and data might flow through the target callable resulting in reverse flow
+ * reaching an argument of `call`.
+ */
+ pragma[nomagic]
+ private predicate revFlowIsReturned(
+ DataFlowCall call, boolean toReturn, ApOption returnAp, Ap ap, Configuration config
+ ) {
+ exists(RetNodeEx ret, CcCall ccc |
+ revFlowOut(call, ret, toReturn, returnAp, ap, config) and
+ fwdFlow(ret, ccc, apSome(_), ap, config) and
+ ccc.matchesCall(call)
+ )
+ }
+
+ pragma[nomagic]
+ predicate storeStepCand(
+ NodeEx node1, Ap ap1, TypedContent tc, NodeEx node2, DataFlowType contentType,
+ Configuration config
+ ) {
+ exists(Ap ap2, Content c |
+ store(node1, tc, node2, contentType, config) and
+ revFlowStore(ap2, c, ap1, node1, tc, node2, _, _, config) and
+ revFlowConsCand(ap2, c, ap1, config)
+ )
+ }
+
+ predicate readStepCand(NodeEx node1, Content c, NodeEx node2, Configuration config) {
+ exists(Ap ap1, Ap ap2 |
+ revFlow(node2, _, _, pragma[only_bind_into](ap2), pragma[only_bind_into](config)) and
+ readStepFwd(node1, ap1, c, node2, ap2, config) and
+ revFlowStore(ap1, c, pragma[only_bind_into](ap2), _, _, _, _, _,
+ pragma[only_bind_into](config))
+ )
+ }
+
+ predicate revFlow(NodeEx node, Configuration config) { revFlow(node, _, _, _, config) }
+
+ private predicate fwdConsCand(TypedContent tc, Ap ap, Configuration config) {
+ storeStepFwd(_, ap, tc, _, _, config)
+ }
+
+ predicate consCand(TypedContent tc, Ap ap, Configuration config) {
+ storeStepCand(_, ap, tc, _, _, config)
+ }
+
+ pragma[noinline]
+ private predicate parameterFlow(
+ ParamNodeEx p, Ap ap, Ap ap0, DataFlowCallable c, Configuration config
+ ) {
+ revFlow(p, true, apSome(ap0), ap, config) and
+ c = p.getEnclosingCallable()
+ }
+
+ predicate parameterMayFlowThrough(ParamNodeEx p, DataFlowCallable c, Ap ap, Configuration config) {
+ exists(RetNodeEx ret, Ap ap0, ReturnKindExt kind, int pos |
+ parameterFlow(p, ap, ap0, c, config) and
+ c = ret.getEnclosingCallable() and
+ revFlow(pragma[only_bind_into](ret), true, apSome(_), pragma[only_bind_into](ap0),
+ pragma[only_bind_into](config)) and
+ fwdFlow(ret, any(CcCall ccc), apSome(ap), ap0, config) and
+ kind = ret.getKind() and
+ p.getPosition() = pos and
+ // we don't expect a parameter to return stored in itself
+ not kind.(ParamUpdateReturnKind).getPosition() = pos
+ )
+ }
+
+ pragma[nomagic]
+ predicate callMayFlowThroughRev(DataFlowCall call, Configuration config) {
+ exists(Ap returnAp0, ArgNodeEx arg, boolean toReturn, ApOption returnAp, Ap ap |
+ revFlow(arg, toReturn, returnAp, ap, config) and
+ revFlowInToReturn(call, arg, returnAp0, ap, config) and
+ revFlowIsReturned(call, toReturn, returnAp, returnAp0, config)
+ )
+ }
+
+ predicate stats(boolean fwd, int nodes, int fields, int conscand, int tuples, Configuration config) {
+ fwd = true and
+ nodes = count(NodeEx node | fwdFlow(node, _, _, _, config)) and
+ fields = count(TypedContent f0 | fwdConsCand(f0, _, config)) and
+ conscand = count(TypedContent f0, Ap ap | fwdConsCand(f0, ap, config)) and
+ tuples = count(NodeEx n, Cc cc, ApOption argAp, Ap ap | fwdFlow(n, cc, argAp, ap, config))
+ or
+ fwd = false and
+ nodes = count(NodeEx node | revFlow(node, _, _, _, config)) and
+ fields = count(TypedContent f0 | consCand(f0, _, config)) and
+ conscand = count(TypedContent f0, Ap ap | consCand(f0, ap, config)) and
+ tuples = count(NodeEx n, boolean b, ApOption retAp, Ap ap | revFlow(n, b, retAp, ap, config))
+ }
+ /* End: Stage 2 logic. */
+}
+
+pragma[nomagic]
+private predicate flowOutOfCallNodeCand2(
+ DataFlowCall call, RetNodeEx node1, NodeEx node2, boolean allowsFieldFlow, Configuration config
+) {
+ flowOutOfCallNodeCand1(call, node1, node2, allowsFieldFlow, config) and
+ Stage2::revFlow(node2, pragma[only_bind_into](config)) and
+ Stage2::revFlow(node1, pragma[only_bind_into](config))
+}
+
+pragma[nomagic]
+private predicate flowIntoCallNodeCand2(
+ DataFlowCall call, ArgNodeEx node1, ParamNodeEx node2, boolean allowsFieldFlow,
+ Configuration config
+) {
+ flowIntoCallNodeCand1(call, node1, node2, allowsFieldFlow, config) and
+ Stage2::revFlow(node2, pragma[only_bind_into](config)) and
+ Stage2::revFlow(node1, pragma[only_bind_into](config))
+}
+
+private module LocalFlowBigStep {
+ /**
+ * A node where some checking is required, and hence the big-step relation
+ * is not allowed to step over.
+ */
+ private class FlowCheckNode extends NodeEx {
+ FlowCheckNode() {
+ castNode(this.asNode()) or
+ clearsContentCached(this.asNode(), _)
+ }
+ }
+
+ /**
+ * Holds if `node` can be the first node in a maximal subsequence of local
+ * flow steps in a dataflow path.
+ */
+ predicate localFlowEntry(NodeEx node, Configuration config) {
+ Stage2::revFlow(node, config) and
+ (
+ sourceNode(node, config) or
+ jumpStep(_, node, config) or
+ additionalJumpStep(_, node, config) or
+ node instanceof ParamNodeEx or
+ node.asNode() instanceof OutNodeExt or
+ store(_, _, node, _, config) or
+ read(_, _, node, config) or
+ node instanceof FlowCheckNode
+ )
+ }
+
+ /**
+ * Holds if `node` can be the last node in a maximal subsequence of local
+ * flow steps in a dataflow path.
+ */
+ private predicate localFlowExit(NodeEx node, Configuration config) {
+ exists(NodeEx next | Stage2::revFlow(next, config) |
+ jumpStep(node, next, config) or
+ additionalJumpStep(node, next, config) or
+ flowIntoCallNodeCand1(_, node, next, config) or
+ flowOutOfCallNodeCand1(_, node, next, config) or
+ store(node, _, next, _, config) or
+ read(node, _, next, config)
+ )
+ or
+ node instanceof FlowCheckNode
+ or
+ sinkNode(node, config)
+ }
+
+ pragma[noinline]
+ private predicate additionalLocalFlowStepNodeCand2(
+ NodeEx node1, NodeEx node2, Configuration config
+ ) {
+ additionalLocalFlowStepNodeCand1(node1, node2, config) and
+ Stage2::revFlow(node1, _, _, false, pragma[only_bind_into](config)) and
+ Stage2::revFlow(node2, _, _, false, pragma[only_bind_into](config))
+ }
+
+ /**
+ * Holds if the local path from `node1` to `node2` is a prefix of a maximal
+ * subsequence of local flow steps in a dataflow path.
+ *
+ * This is the transitive closure of `[additional]localFlowStep` beginning
+ * at `localFlowEntry`.
+ */
+ pragma[nomagic]
+ private predicate localFlowStepPlus(
+ NodeEx node1, NodeEx node2, boolean preservesValue, DataFlowType t, Configuration config,
+ LocalCallContext cc
+ ) {
+ not isUnreachableInCallCached(node2.asNode(), cc.(LocalCallContextSpecificCall).getCall()) and
+ (
+ localFlowEntry(node1, pragma[only_bind_into](config)) and
+ (
+ localFlowStepNodeCand1(node1, node2, config) and
+ preservesValue = true and
+ t = node1.getDataFlowType() // irrelevant dummy value
+ or
+ additionalLocalFlowStepNodeCand2(node1, node2, config) and
+ preservesValue = false and
+ t = node2.getDataFlowType()
+ ) and
+ node1 != node2 and
+ cc.relevantFor(node1.getEnclosingCallable()) and
+ not isUnreachableInCallCached(node1.asNode(), cc.(LocalCallContextSpecificCall).getCall()) and
+ Stage2::revFlow(node2, pragma[only_bind_into](config))
+ or
+ exists(NodeEx mid |
+ localFlowStepPlus(node1, mid, preservesValue, t, pragma[only_bind_into](config), cc) and
+ localFlowStepNodeCand1(mid, node2, config) and
+ not mid instanceof FlowCheckNode and
+ Stage2::revFlow(node2, pragma[only_bind_into](config))
+ )
+ or
+ exists(NodeEx mid |
+ localFlowStepPlus(node1, mid, _, _, pragma[only_bind_into](config), cc) and
+ additionalLocalFlowStepNodeCand2(mid, node2, config) and
+ not mid instanceof FlowCheckNode and
+ preservesValue = false and
+ t = node2.getDataFlowType() and
+ Stage2::revFlow(node2, pragma[only_bind_into](config))
+ )
+ )
+ }
+
+ /**
+ * Holds if `node1` can step to `node2` in one or more local steps and this
+ * path can occur as a maximal subsequence of local steps in a dataflow path.
+ */
+ pragma[nomagic]
+ predicate localFlowBigStep(
+ NodeEx node1, NodeEx node2, boolean preservesValue, AccessPathFrontNil apf,
+ Configuration config, LocalCallContext callContext
+ ) {
+ localFlowStepPlus(node1, node2, preservesValue, apf.getType(), config, callContext) and
+ localFlowExit(node2, config)
+ }
+}
+
+private import LocalFlowBigStep
+
+private module Stage3 {
+ module PrevStage = Stage2;
+
+ class ApApprox = PrevStage::Ap;
+
+ class Ap = AccessPathFront;
+
+ class ApNil = AccessPathFrontNil;
+
+ private ApApprox getApprox(Ap ap) { result = ap.toBoolNonEmpty() }
+
+ private ApNil getApNil(NodeEx node) {
+ PrevStage::revFlow(node, _) and result = TFrontNil(node.getDataFlowType())
+ }
+
+ bindingset[tc, tail]
+ private Ap apCons(TypedContent tc, Ap tail) { result.getHead() = tc and exists(tail) }
+
+ pragma[noinline]
+ private Content getHeadContent(Ap ap) { result = ap.getHead().getContent() }
+
+ class ApOption = AccessPathFrontOption;
+
+ ApOption apNone() { result = TAccessPathFrontNone() }
+
+ ApOption apSome(Ap ap) { result = TAccessPathFrontSome(ap) }
+
+ class Cc = boolean;
+
+ class CcCall extends Cc {
+ CcCall() { this = true }
+
+ /** Holds if this call context may be `call`. */
+ predicate matchesCall(DataFlowCall call) { any() }
+ }
+
+ class CcNoCall extends Cc {
+ CcNoCall() { this = false }
+ }
+
+ Cc ccNone() { result = false }
+
+ private class LocalCc = Unit;
+
+ bindingset[call, c, outercc]
+ private CcCall getCallContextCall(DataFlowCall call, DataFlowCallable c, Cc outercc) { any() }
+
+ bindingset[call, c, innercc]
+ private CcNoCall getCallContextReturn(DataFlowCallable c, DataFlowCall call, Cc innercc) { any() }
+
+ bindingset[node, cc, config]
+ private LocalCc getLocalCc(NodeEx node, Cc cc, Configuration config) { any() }
+
+ private predicate localStep(
+ NodeEx node1, NodeEx node2, boolean preservesValue, ApNil ap, Configuration config, LocalCc lcc
+ ) {
+ localFlowBigStep(node1, node2, preservesValue, ap, config, _) and exists(lcc)
+ }
+
+ private predicate flowOutOfCall = flowOutOfCallNodeCand2/5;
+
+ private predicate flowIntoCall = flowIntoCallNodeCand2/5;
+
+ pragma[nomagic]
+ private predicate clear(NodeEx node, Ap ap) { ap.isClearedAt(node.asNode()) }
+
+ pragma[nomagic]
+ private predicate castingNodeEx(NodeEx node) { node.asNode() instanceof CastingNode }
+
+ bindingset[node, ap]
+ private predicate filter(NodeEx node, Ap ap) {
+ not clear(node, ap) and
+ if castingNodeEx(node) then compatibleTypes(node.getDataFlowType(), ap.getType()) else any()
+ }
+
+ bindingset[ap, contentType]
+ private predicate typecheckStore(Ap ap, DataFlowType contentType) {
+ // We need to typecheck stores here, since reverse flow through a getter
+ // might have a different type here compared to inside the getter.
+ compatibleTypes(ap.getType(), contentType)
+ }
+
+ /* Begin: Stage 3 logic. */
+ private predicate flowCand(NodeEx node, ApApprox apa, Configuration config) {
+ PrevStage::revFlow(node, _, _, apa, config)
+ }
+
+ bindingset[result, apa]
+ private ApApprox unbindApa(ApApprox apa) {
+ exists(ApApprox apa0 |
+ apa = pragma[only_bind_into](apa0) and result = pragma[only_bind_into](apa0)
+ )
+ }
+
+ pragma[nomagic]
+ private predicate flowThroughOutOfCall(
+ DataFlowCall call, RetNodeEx ret, NodeEx out, boolean allowsFieldFlow, Configuration config
+ ) {
+ flowOutOfCall(call, ret, out, allowsFieldFlow, pragma[only_bind_into](config)) and
+ PrevStage::callMayFlowThroughRev(call, pragma[only_bind_into](config)) and
+ PrevStage::parameterMayFlowThrough(_, ret.getEnclosingCallable(), _,
+ pragma[only_bind_into](config))
+ }
+
+ /**
+ * Holds if `node` is reachable with access path `ap` from a source in the
+ * configuration `config`.
+ *
+ * The call context `cc` records whether the node is reached through an
+ * argument in a call, and if so, `argAp` records the access path of that
+ * argument.
+ */
+ pragma[nomagic]
+ predicate fwdFlow(NodeEx node, Cc cc, ApOption argAp, Ap ap, Configuration config) {
+ fwdFlow0(node, cc, argAp, ap, config) and
+ flowCand(node, unbindApa(getApprox(ap)), config) and
+ filter(node, ap)
+ }
+
+ pragma[nomagic]
+ private predicate fwdFlow0(NodeEx node, Cc cc, ApOption argAp, Ap ap, Configuration config) {
+ flowCand(node, _, config) and
+ sourceNode(node, config) and
+ cc = ccNone() and
+ argAp = apNone() and
+ ap = getApNil(node)
+ or
+ exists(NodeEx mid, Ap ap0, LocalCc localCc |
+ fwdFlow(mid, cc, argAp, ap0, config) and
+ localCc = getLocalCc(mid, cc, config)
+ |
+ localStep(mid, node, true, _, config, localCc) and
+ ap = ap0
+ or
+ localStep(mid, node, false, ap, config, localCc) and
+ ap0 instanceof ApNil
+ )
+ or
+ exists(NodeEx mid |
+ fwdFlow(mid, _, _, ap, pragma[only_bind_into](config)) and
+ flowCand(node, _, pragma[only_bind_into](config)) and
+ jumpStep(mid, node, config) and
+ cc = ccNone() and
+ argAp = apNone()
+ )
+ or
+ exists(NodeEx mid, ApNil nil |
+ fwdFlow(mid, _, _, nil, pragma[only_bind_into](config)) and
+ flowCand(node, _, pragma[only_bind_into](config)) and
+ additionalJumpStep(mid, node, config) and
+ cc = ccNone() and
+ argAp = apNone() and
+ ap = getApNil(node)
+ )
+ or
+ // store
+ exists(TypedContent tc, Ap ap0 |
+ fwdFlowStore(_, ap0, tc, node, cc, argAp, config) and
+ ap = apCons(tc, ap0)
+ )
+ or
+ // read
+ exists(Ap ap0, Content c |
+ fwdFlowRead(ap0, c, _, node, cc, argAp, config) and
+ fwdFlowConsCand(ap0, c, ap, config)
+ )
+ or
+ // flow into a callable
+ exists(ApApprox apa |
+ fwdFlowIn(_, node, _, cc, _, ap, config) and
+ apa = getApprox(ap) and
+ if PrevStage::parameterMayFlowThrough(node, _, apa, config)
+ then argAp = apSome(ap)
+ else argAp = apNone()
+ )
+ or
+ // flow out of a callable
+ fwdFlowOutNotFromArg(node, cc, argAp, ap, config)
+ or
+ exists(DataFlowCall call, Ap argAp0 |
+ fwdFlowOutFromArg(call, node, argAp0, ap, config) and
+ fwdFlowIsEntered(call, cc, argAp, argAp0, config)
+ )
+ }
+
+ pragma[nomagic]
+ private predicate fwdFlowStore(
+ NodeEx node1, Ap ap1, TypedContent tc, NodeEx node2, Cc cc, ApOption argAp, Configuration config
+ ) {
+ exists(DataFlowType contentType |
+ fwdFlow(node1, cc, argAp, ap1, config) and
+ PrevStage::storeStepCand(node1, unbindApa(getApprox(ap1)), tc, node2, contentType, config) and
+ typecheckStore(ap1, contentType)
+ )
+ }
+
+ /**
+ * Holds if forward flow with access path `tail` reaches a store of `c`
+ * resulting in access path `cons`.
+ */
+ pragma[nomagic]
+ private predicate fwdFlowConsCand(Ap cons, Content c, Ap tail, Configuration config) {
+ exists(TypedContent tc |
+ fwdFlowStore(_, tail, tc, _, _, _, config) and
+ tc.getContent() = c and
+ cons = apCons(tc, tail)
+ )
+ }
+
+ pragma[nomagic]
+ private predicate fwdFlowRead(
+ Ap ap, Content c, NodeEx node1, NodeEx node2, Cc cc, ApOption argAp, Configuration config
+ ) {
+ fwdFlow(node1, cc, argAp, ap, config) and
+ PrevStage::readStepCand(node1, c, node2, config) and
+ getHeadContent(ap) = c
+ }
+
+ pragma[nomagic]
+ private predicate fwdFlowIn(
+ DataFlowCall call, ParamNodeEx p, Cc outercc, Cc innercc, ApOption argAp, Ap ap,
+ Configuration config
+ ) {
+ exists(ArgNodeEx arg, boolean allowsFieldFlow |
+ fwdFlow(arg, outercc, argAp, ap, config) and
+ flowIntoCall(call, arg, p, allowsFieldFlow, config) and
+ innercc = getCallContextCall(call, p.getEnclosingCallable(), outercc)
+ |
+ ap instanceof ApNil or allowsFieldFlow = true
+ )
+ }
+
+ pragma[nomagic]
+ private predicate fwdFlowOutNotFromArg(
+ NodeEx out, Cc ccOut, ApOption argAp, Ap ap, Configuration config
+ ) {
+ exists(
+ DataFlowCall call, RetNodeEx ret, boolean allowsFieldFlow, CcNoCall innercc,
+ DataFlowCallable inner
+ |
+ fwdFlow(ret, innercc, argAp, ap, config) and
+ flowOutOfCall(call, ret, out, allowsFieldFlow, config) and
+ inner = ret.getEnclosingCallable() and
+ ccOut = getCallContextReturn(inner, call, innercc)
+ |
+ ap instanceof ApNil or allowsFieldFlow = true
+ )
+ }
+
+ pragma[nomagic]
+ private predicate fwdFlowOutFromArg(
+ DataFlowCall call, NodeEx out, Ap argAp, Ap ap, Configuration config
+ ) {
+ exists(RetNodeEx ret, boolean allowsFieldFlow, CcCall ccc |
+ fwdFlow(ret, ccc, apSome(argAp), ap, config) and
+ flowThroughOutOfCall(call, ret, out, allowsFieldFlow, config) and
+ ccc.matchesCall(call)
+ |
+ ap instanceof ApNil or allowsFieldFlow = true
+ )
+ }
+
+ /**
+ * Holds if an argument to `call` is reached in the flow covered by `fwdFlow`
+ * and data might flow through the target callable and back out at `call`.
+ */
+ pragma[nomagic]
+ private predicate fwdFlowIsEntered(
+ DataFlowCall call, Cc cc, ApOption argAp, Ap ap, Configuration config
+ ) {
+ exists(ParamNodeEx p |
+ fwdFlowIn(call, p, cc, _, argAp, ap, config) and
+ PrevStage::parameterMayFlowThrough(p, _, unbindApa(getApprox(ap)), config)
+ )
+ }
+
+ pragma[nomagic]
+ private predicate storeStepFwd(
+ NodeEx node1, Ap ap1, TypedContent tc, NodeEx node2, Ap ap2, Configuration config
+ ) {
+ fwdFlowStore(node1, ap1, tc, node2, _, _, config) and
+ ap2 = apCons(tc, ap1) and
+ fwdFlowRead(ap2, tc.getContent(), _, _, _, _, config)
+ }
+
+ private predicate readStepFwd(
+ NodeEx n1, Ap ap1, Content c, NodeEx n2, Ap ap2, Configuration config
+ ) {
+ fwdFlowRead(ap1, c, n1, n2, _, _, config) and
+ fwdFlowConsCand(ap1, c, ap2, config)
+ }
+
+ pragma[nomagic]
+ private predicate callMayFlowThroughFwd(DataFlowCall call, Configuration config) {
+ exists(Ap argAp0, NodeEx out, Cc cc, ApOption argAp, Ap ap |
+ fwdFlow(out, pragma[only_bind_into](cc), pragma[only_bind_into](argAp), ap,
+ pragma[only_bind_into](config)) and
+ fwdFlowOutFromArg(call, out, argAp0, ap, config) and
+ fwdFlowIsEntered(pragma[only_bind_into](call), pragma[only_bind_into](cc),
+ pragma[only_bind_into](argAp), pragma[only_bind_into](argAp0),
+ pragma[only_bind_into](config))
+ )
+ }
+
+ pragma[nomagic]
+ private predicate flowThroughIntoCall(
+ DataFlowCall call, ArgNodeEx arg, ParamNodeEx p, boolean allowsFieldFlow, Configuration config
+ ) {
+ flowIntoCall(call, arg, p, allowsFieldFlow, config) and
+ fwdFlow(arg, _, _, _, pragma[only_bind_into](config)) and
+ PrevStage::parameterMayFlowThrough(p, _, _, pragma[only_bind_into](config)) and
+ callMayFlowThroughFwd(call, pragma[only_bind_into](config))
+ }
+
+ /**
+ * Holds if `node` with access path `ap` is part of a path from a source to a
+ * sink in the configuration `config`.
+ *
+ * The Boolean `toReturn` records whether the node must be returned from the
+ * enclosing callable in order to reach a sink, and if so, `returnAp` records
+ * the access path of the returned value.
+ */
+ pragma[nomagic]
+ predicate revFlow(NodeEx node, boolean toReturn, ApOption returnAp, Ap ap, Configuration config) {
+ revFlow0(node, toReturn, returnAp, ap, config) and
+ fwdFlow(node, _, _, ap, config)
+ }
+
+ pragma[nomagic]
+ private predicate revFlow0(
+ NodeEx node, boolean toReturn, ApOption returnAp, Ap ap, Configuration config
+ ) {
+ fwdFlow(node, _, _, ap, config) and
+ sinkNode(node, config) and
+ toReturn = false and
+ returnAp = apNone() and
+ ap instanceof ApNil
+ or
+ exists(NodeEx mid |
+ localStep(node, mid, true, _, config, _) and
+ revFlow(mid, toReturn, returnAp, ap, config)
+ )
+ or
+ exists(NodeEx mid, ApNil nil |
+ fwdFlow(node, _, _, ap, pragma[only_bind_into](config)) and
+ localStep(node, mid, false, _, config, _) and
+ revFlow(mid, toReturn, returnAp, nil, pragma[only_bind_into](config)) and
+ ap instanceof ApNil
+ )
+ or
+ exists(NodeEx mid |
+ jumpStep(node, mid, config) and
+ revFlow(mid, _, _, ap, config) and
+ toReturn = false and
+ returnAp = apNone()
+ )
+ or
+ exists(NodeEx mid, ApNil nil |
+ fwdFlow(node, _, _, ap, pragma[only_bind_into](config)) and
+ additionalJumpStep(node, mid, config) and
+ revFlow(pragma[only_bind_into](mid), _, _, nil, pragma[only_bind_into](config)) and
+ toReturn = false and
+ returnAp = apNone() and
+ ap instanceof ApNil
+ )
+ or
+ // store
+ exists(Ap ap0, Content c |
+ revFlowStore(ap0, c, ap, node, _, _, toReturn, returnAp, config) and
+ revFlowConsCand(ap0, c, ap, config)
+ )
+ or
+ // read
+ exists(NodeEx mid, Ap ap0 |
+ revFlow(mid, toReturn, returnAp, ap0, config) and
+ readStepFwd(node, ap, _, mid, ap0, config)
+ )
+ or
+ // flow into a callable
+ revFlowInNotToReturn(node, returnAp, ap, config) and
+ toReturn = false
+ or
+ exists(DataFlowCall call, Ap returnAp0 |
+ revFlowInToReturn(call, node, returnAp0, ap, config) and
+ revFlowIsReturned(call, toReturn, returnAp, returnAp0, config)
+ )
+ or
+ // flow out of a callable
+ revFlowOut(_, node, _, _, ap, config) and
+ toReturn = true and
+ if fwdFlow(node, any(CcCall ccc), apSome(_), ap, config)
+ then returnAp = apSome(ap)
+ else returnAp = apNone()
+ }
+
+ pragma[nomagic]
+ private predicate revFlowStore(
+ Ap ap0, Content c, Ap ap, NodeEx node, TypedContent tc, NodeEx mid, boolean toReturn,
+ ApOption returnAp, Configuration config
+ ) {
+ revFlow(mid, toReturn, returnAp, ap0, config) and
+ storeStepFwd(node, ap, tc, mid, ap0, config) and
+ tc.getContent() = c
+ }
+
+ /**
+ * Holds if reverse flow with access path `tail` reaches a read of `c`
+ * resulting in access path `cons`.
+ */
+ pragma[nomagic]
+ private predicate revFlowConsCand(Ap cons, Content c, Ap tail, Configuration config) {
+ exists(NodeEx mid, Ap tail0 |
+ revFlow(mid, _, _, tail, config) and
+ tail = pragma[only_bind_into](tail0) and
+ readStepFwd(_, cons, c, mid, tail0, config)
+ )
+ }
+
+ pragma[nomagic]
+ private predicate revFlowOut(
+ DataFlowCall call, RetNodeEx ret, boolean toReturn, ApOption returnAp, Ap ap,
+ Configuration config
+ ) {
+ exists(NodeEx out, boolean allowsFieldFlow |
+ revFlow(out, toReturn, returnAp, ap, config) and
+ flowOutOfCall(call, ret, out, allowsFieldFlow, config)
+ |
+ ap instanceof ApNil or allowsFieldFlow = true
+ )
+ }
+
+ pragma[nomagic]
+ private predicate revFlowInNotToReturn(
+ ArgNodeEx arg, ApOption returnAp, Ap ap, Configuration config
+ ) {
+ exists(ParamNodeEx p, boolean allowsFieldFlow |
+ revFlow(p, false, returnAp, ap, config) and
+ flowIntoCall(_, arg, p, allowsFieldFlow, config)
+ |
+ ap instanceof ApNil or allowsFieldFlow = true
+ )
+ }
+
+ pragma[nomagic]
+ private predicate revFlowInToReturn(
+ DataFlowCall call, ArgNodeEx arg, Ap returnAp, Ap ap, Configuration config
+ ) {
+ exists(ParamNodeEx p, boolean allowsFieldFlow |
+ revFlow(p, true, apSome(returnAp), ap, config) and
+ flowThroughIntoCall(call, arg, p, allowsFieldFlow, config)
+ |
+ ap instanceof ApNil or allowsFieldFlow = true
+ )
+ }
+
+ /**
+ * Holds if an output from `call` is reached in the flow covered by `revFlow`
+ * and data might flow through the target callable resulting in reverse flow
+ * reaching an argument of `call`.
+ */
+ pragma[nomagic]
+ private predicate revFlowIsReturned(
+ DataFlowCall call, boolean toReturn, ApOption returnAp, Ap ap, Configuration config
+ ) {
+ exists(RetNodeEx ret, CcCall ccc |
+ revFlowOut(call, ret, toReturn, returnAp, ap, config) and
+ fwdFlow(ret, ccc, apSome(_), ap, config) and
+ ccc.matchesCall(call)
+ )
+ }
+
+ pragma[nomagic]
+ predicate storeStepCand(
+ NodeEx node1, Ap ap1, TypedContent tc, NodeEx node2, DataFlowType contentType,
+ Configuration config
+ ) {
+ exists(Ap ap2, Content c |
+ store(node1, tc, node2, contentType, config) and
+ revFlowStore(ap2, c, ap1, node1, tc, node2, _, _, config) and
+ revFlowConsCand(ap2, c, ap1, config)
+ )
+ }
+
+ predicate readStepCand(NodeEx node1, Content c, NodeEx node2, Configuration config) {
+ exists(Ap ap1, Ap ap2 |
+ revFlow(node2, _, _, pragma[only_bind_into](ap2), pragma[only_bind_into](config)) and
+ readStepFwd(node1, ap1, c, node2, ap2, config) and
+ revFlowStore(ap1, c, pragma[only_bind_into](ap2), _, _, _, _, _,
+ pragma[only_bind_into](config))
+ )
+ }
+
+ predicate revFlow(NodeEx node, Configuration config) { revFlow(node, _, _, _, config) }
+
+ private predicate fwdConsCand(TypedContent tc, Ap ap, Configuration config) {
+ storeStepFwd(_, ap, tc, _, _, config)
+ }
+
+ predicate consCand(TypedContent tc, Ap ap, Configuration config) {
+ storeStepCand(_, ap, tc, _, _, config)
+ }
+
+ pragma[noinline]
+ private predicate parameterFlow(
+ ParamNodeEx p, Ap ap, Ap ap0, DataFlowCallable c, Configuration config
+ ) {
+ revFlow(p, true, apSome(ap0), ap, config) and
+ c = p.getEnclosingCallable()
+ }
+
+ predicate parameterMayFlowThrough(ParamNodeEx p, DataFlowCallable c, Ap ap, Configuration config) {
+ exists(RetNodeEx ret, Ap ap0, ReturnKindExt kind, int pos |
+ parameterFlow(p, ap, ap0, c, config) and
+ c = ret.getEnclosingCallable() and
+ revFlow(pragma[only_bind_into](ret), true, apSome(_), pragma[only_bind_into](ap0),
+ pragma[only_bind_into](config)) and
+ fwdFlow(ret, any(CcCall ccc), apSome(ap), ap0, config) and
+ kind = ret.getKind() and
+ p.getPosition() = pos and
+ // we don't expect a parameter to return stored in itself
+ not kind.(ParamUpdateReturnKind).getPosition() = pos
+ )
+ }
+
+ pragma[nomagic]
+ predicate callMayFlowThroughRev(DataFlowCall call, Configuration config) {
+ exists(Ap returnAp0, ArgNodeEx arg, boolean toReturn, ApOption returnAp, Ap ap |
+ revFlow(arg, toReturn, returnAp, ap, config) and
+ revFlowInToReturn(call, arg, returnAp0, ap, config) and
+ revFlowIsReturned(call, toReturn, returnAp, returnAp0, config)
+ )
+ }
+
+ predicate stats(boolean fwd, int nodes, int fields, int conscand, int tuples, Configuration config) {
+ fwd = true and
+ nodes = count(NodeEx node | fwdFlow(node, _, _, _, config)) and
+ fields = count(TypedContent f0 | fwdConsCand(f0, _, config)) and
+ conscand = count(TypedContent f0, Ap ap | fwdConsCand(f0, ap, config)) and
+ tuples = count(NodeEx n, Cc cc, ApOption argAp, Ap ap | fwdFlow(n, cc, argAp, ap, config))
+ or
+ fwd = false and
+ nodes = count(NodeEx node | revFlow(node, _, _, _, config)) and
+ fields = count(TypedContent f0 | consCand(f0, _, config)) and
+ conscand = count(TypedContent f0, Ap ap | consCand(f0, ap, config)) and
+ tuples = count(NodeEx n, boolean b, ApOption retAp, Ap ap | revFlow(n, b, retAp, ap, config))
+ }
+ /* End: Stage 3 logic. */
+}
+
+/**
+ * Holds if `argApf` is recorded as the summary context for flow reaching `node`
+ * and remains relevant for the following pruning stage.
+ */
+private predicate flowCandSummaryCtx(NodeEx node, AccessPathFront argApf, Configuration config) {
+ exists(AccessPathFront apf |
+ Stage3::revFlow(node, true, _, apf, config) and
+ Stage3::fwdFlow(node, any(Stage3::CcCall ccc), TAccessPathFrontSome(argApf), apf, config)
+ )
+}
+
+/**
+ * Holds if a length 2 access path approximation with the head `tc` is expected
+ * to be expensive.
+ */
+private predicate expensiveLen2unfolding(TypedContent tc, Configuration config) {
+ exists(int tails, int nodes, int apLimit, int tupleLimit |
+ tails = strictcount(AccessPathFront apf | Stage3::consCand(tc, apf, config)) and
+ nodes =
+ strictcount(NodeEx n |
+ Stage3::revFlow(n, _, _, any(AccessPathFrontHead apf | apf.getHead() = tc), config)
+ or
+ flowCandSummaryCtx(n, any(AccessPathFrontHead apf | apf.getHead() = tc), config)
+ ) and
+ accessPathApproxCostLimits(apLimit, tupleLimit) and
+ apLimit < tails and
+ tupleLimit < (tails - 1) * nodes and
+ not tc.forceHighPrecision()
+ )
+}
+
+private newtype TAccessPathApprox =
+ TNil(DataFlowType t) or
+ TConsNil(TypedContent tc, DataFlowType t) {
+ Stage3::consCand(tc, TFrontNil(t), _) and
+ not expensiveLen2unfolding(tc, _)
+ } or
+ TConsCons(TypedContent tc1, TypedContent tc2, int len) {
+ Stage3::consCand(tc1, TFrontHead(tc2), _) and
+ len in [2 .. accessPathLimit()] and
+ not expensiveLen2unfolding(tc1, _)
+ } or
+ TCons1(TypedContent tc, int len) {
+ len in [1 .. accessPathLimit()] and
+ expensiveLen2unfolding(tc, _)
+ }
+
+/**
+ * Conceptually a list of `TypedContent`s followed by a `DataFlowType`, but only
+ * the first two elements of the list and its length are tracked. If data flows
+ * from a source to a given node with a given `AccessPathApprox`, this indicates
+ * the sequence of dereference operations needed to get from the value in the node
+ * to the tracked object. The final type indicates the type of the tracked object.
+ */
+abstract private class AccessPathApprox extends TAccessPathApprox {
+ abstract string toString();
+
+ abstract TypedContent getHead();
+
+ abstract int len();
+
+ abstract DataFlowType getType();
+
+ abstract AccessPathFront getFront();
+
+ /** Gets the access path obtained by popping `head` from this path, if any. */
+ abstract AccessPathApprox pop(TypedContent head);
+}
+
+private class AccessPathApproxNil extends AccessPathApprox, TNil {
+ private DataFlowType t;
+
+ AccessPathApproxNil() { this = TNil(t) }
+
+ override string toString() { result = concat(": " + ppReprType(t)) }
+
+ override TypedContent getHead() { none() }
+
+ override int len() { result = 0 }
+
+ override DataFlowType getType() { result = t }
+
+ override AccessPathFront getFront() { result = TFrontNil(t) }
+
+ override AccessPathApprox pop(TypedContent head) { none() }
+}
+
+abstract private class AccessPathApproxCons extends AccessPathApprox { }
+
+private class AccessPathApproxConsNil extends AccessPathApproxCons, TConsNil {
+ private TypedContent tc;
+ private DataFlowType t;
+
+ AccessPathApproxConsNil() { this = TConsNil(tc, t) }
+
+ override string toString() {
+ // The `concat` becomes "" if `ppReprType` has no result.
+ result = "[" + tc.toString() + "]" + concat(" : " + ppReprType(t))
+ }
+
+ override TypedContent getHead() { result = tc }
+
+ override int len() { result = 1 }
+
+ override DataFlowType getType() { result = tc.getContainerType() }
+
+ override AccessPathFront getFront() { result = TFrontHead(tc) }
+
+ override AccessPathApprox pop(TypedContent head) { head = tc and result = TNil(t) }
+}
+
+private class AccessPathApproxConsCons extends AccessPathApproxCons, TConsCons {
+ private TypedContent tc1;
+ private TypedContent tc2;
+ private int len;
+
+ AccessPathApproxConsCons() { this = TConsCons(tc1, tc2, len) }
+
+ override string toString() {
+ if len = 2
+ then result = "[" + tc1.toString() + ", " + tc2.toString() + "]"
+ else result = "[" + tc1.toString() + ", " + tc2.toString() + ", ... (" + len.toString() + ")]"
+ }
+
+ override TypedContent getHead() { result = tc1 }
+
+ override int len() { result = len }
+
+ override DataFlowType getType() { result = tc1.getContainerType() }
+
+ override AccessPathFront getFront() { result = TFrontHead(tc1) }
+
+ override AccessPathApprox pop(TypedContent head) {
+ head = tc1 and
+ (
+ result = TConsCons(tc2, _, len - 1)
+ or
+ len = 2 and
+ result = TConsNil(tc2, _)
+ or
+ result = TCons1(tc2, len - 1)
+ )
+ }
+}
+
+private class AccessPathApproxCons1 extends AccessPathApproxCons, TCons1 {
+ private TypedContent tc;
+ private int len;
+
+ AccessPathApproxCons1() { this = TCons1(tc, len) }
+
+ override string toString() {
+ if len = 1
+ then result = "[" + tc.toString() + "]"
+ else result = "[" + tc.toString() + ", ... (" + len.toString() + ")]"
+ }
+
+ override TypedContent getHead() { result = tc }
+
+ override int len() { result = len }
+
+ override DataFlowType getType() { result = tc.getContainerType() }
+
+ override AccessPathFront getFront() { result = TFrontHead(tc) }
+
+ override AccessPathApprox pop(TypedContent head) {
+ head = tc and
+ (
+ exists(TypedContent tc2 | Stage3::consCand(tc, TFrontHead(tc2), _) |
+ result = TConsCons(tc2, _, len - 1)
+ or
+ len = 2 and
+ result = TConsNil(tc2, _)
+ or
+ result = TCons1(tc2, len - 1)
+ )
+ or
+ exists(DataFlowType t |
+ len = 1 and
+ Stage3::consCand(tc, TFrontNil(t), _) and
+ result = TNil(t)
+ )
+ )
+ }
+}
+
+/** Gets the access path obtained by popping `tc` from `ap`, if any. */
+private AccessPathApprox pop(TypedContent tc, AccessPathApprox apa) { result = apa.pop(tc) }
+
+/** Gets the access path obtained by pushing `tc` onto `ap`. */
+private AccessPathApprox push(TypedContent tc, AccessPathApprox apa) { apa = pop(tc, result) }
+
+private newtype TAccessPathApproxOption =
+ TAccessPathApproxNone() or
+ TAccessPathApproxSome(AccessPathApprox apa)
+
+private class AccessPathApproxOption extends TAccessPathApproxOption {
+ string toString() {
+ this = TAccessPathApproxNone() and result = ""
+ or
+ this = TAccessPathApproxSome(any(AccessPathApprox apa | result = apa.toString()))
+ }
+}
+
+private module Stage4 {
+ module PrevStage = Stage3;
+
+ class ApApprox = PrevStage::Ap;
+
+ class Ap = AccessPathApprox;
+
+ class ApNil = AccessPathApproxNil;
+
+ private ApApprox getApprox(Ap ap) { result = ap.getFront() }
+
+ private ApNil getApNil(NodeEx node) {
+ PrevStage::revFlow(node, _) and result = TNil(node.getDataFlowType())
+ }
+
+ bindingset[tc, tail]
+ private Ap apCons(TypedContent tc, Ap tail) { result = push(tc, tail) }
+
+ pragma[noinline]
+ private Content getHeadContent(Ap ap) { result = ap.getHead().getContent() }
+
+ class ApOption = AccessPathApproxOption;
+
+ ApOption apNone() { result = TAccessPathApproxNone() }
+
+ ApOption apSome(Ap ap) { result = TAccessPathApproxSome(ap) }
+
+ class Cc = CallContext;
+
+ class CcCall = CallContextCall;
+
+ class CcNoCall = CallContextNoCall;
+
+ Cc ccNone() { result instanceof CallContextAny }
+
+ private class LocalCc = LocalCallContext;
+
+ bindingset[call, c, outercc]
+ private CcCall getCallContextCall(DataFlowCall call, DataFlowCallable c, Cc outercc) {
+ checkCallContextCall(outercc, call, c) and
+ if recordDataFlowCallSite(call, c) then result = TSpecificCall(call) else result = TSomeCall()
+ }
+
+ bindingset[call, c, innercc]
+ private CcNoCall getCallContextReturn(DataFlowCallable c, DataFlowCall call, Cc innercc) {
+ checkCallContextReturn(innercc, c, call) and
+ if reducedViableImplInReturn(c, call) then result = TReturn(c, call) else result = ccNone()
+ }
+
+ bindingset[node, cc, config]
+ private LocalCc getLocalCc(NodeEx node, Cc cc, Configuration config) {
+ localFlowEntry(node, config) and
+ result =
+ getLocalCallContext(pragma[only_bind_into](pragma[only_bind_out](cc)),
+ node.getEnclosingCallable())
+ }
+
+ private predicate localStep(
+ NodeEx node1, NodeEx node2, boolean preservesValue, ApNil ap, Configuration config, LocalCc lcc
+ ) {
+ localFlowBigStep(node1, node2, preservesValue, ap.getFront(), config, lcc)
+ }
+
+ pragma[nomagic]
+ private predicate flowOutOfCall(
+ DataFlowCall call, RetNodeEx node1, NodeEx node2, boolean allowsFieldFlow, Configuration config
+ ) {
+ flowOutOfCallNodeCand2(call, node1, node2, allowsFieldFlow, config) and
+ PrevStage::revFlow(node2, _, _, _, pragma[only_bind_into](config)) and
+ PrevStage::revFlow(node1, _, _, _, pragma[only_bind_into](config))
+ }
+
+ pragma[nomagic]
+ private predicate flowIntoCall(
+ DataFlowCall call, ArgNodeEx node1, ParamNodeEx node2, boolean allowsFieldFlow,
+ Configuration config
+ ) {
+ flowIntoCallNodeCand2(call, node1, node2, allowsFieldFlow, config) and
+ PrevStage::revFlow(node2, _, _, _, pragma[only_bind_into](config)) and
+ PrevStage::revFlow(node1, _, _, _, pragma[only_bind_into](config))
+ }
+
+ bindingset[node, ap]
+ private predicate filter(NodeEx node, Ap ap) { any() }
+
+ // Type checking is not necessary here as it has already been done in stage 3.
+ bindingset[ap, contentType]
+ private predicate typecheckStore(Ap ap, DataFlowType contentType) { any() }
+
+ /* Begin: Stage 4 logic. */
+ private predicate flowCand(NodeEx node, ApApprox apa, Configuration config) {
+ PrevStage::revFlow(node, _, _, apa, config)
+ }
+
+ bindingset[result, apa]
+ private ApApprox unbindApa(ApApprox apa) {
+ exists(ApApprox apa0 |
+ apa = pragma[only_bind_into](apa0) and result = pragma[only_bind_into](apa0)
+ )
+ }
+
+ pragma[nomagic]
+ private predicate flowThroughOutOfCall(
+ DataFlowCall call, RetNodeEx ret, NodeEx out, boolean allowsFieldFlow, Configuration config
+ ) {
+ flowOutOfCall(call, ret, out, allowsFieldFlow, pragma[only_bind_into](config)) and
+ PrevStage::callMayFlowThroughRev(call, pragma[only_bind_into](config)) and
+ PrevStage::parameterMayFlowThrough(_, ret.getEnclosingCallable(), _,
+ pragma[only_bind_into](config))
+ }
+
+ /**
+ * Holds if `node` is reachable with access path `ap` from a source in the
+ * configuration `config`.
+ *
+ * The call context `cc` records whether the node is reached through an
+ * argument in a call, and if so, `argAp` records the access path of that
+ * argument.
+ */
+ pragma[nomagic]
+ predicate fwdFlow(NodeEx node, Cc cc, ApOption argAp, Ap ap, Configuration config) {
+ fwdFlow0(node, cc, argAp, ap, config) and
+ flowCand(node, unbindApa(getApprox(ap)), config) and
+ filter(node, ap)
+ }
+
+ pragma[nomagic]
+ private predicate fwdFlow0(NodeEx node, Cc cc, ApOption argAp, Ap ap, Configuration config) {
+ flowCand(node, _, config) and
+ sourceNode(node, config) and
+ cc = ccNone() and
+ argAp = apNone() and
+ ap = getApNil(node)
+ or
+ exists(NodeEx mid, Ap ap0, LocalCc localCc |
+ fwdFlow(mid, cc, argAp, ap0, config) and
+ localCc = getLocalCc(mid, cc, config)
+ |
+ localStep(mid, node, true, _, config, localCc) and
+ ap = ap0
+ or
+ localStep(mid, node, false, ap, config, localCc) and
+ ap0 instanceof ApNil
+ )
+ or
+ exists(NodeEx mid |
+ fwdFlow(mid, _, _, ap, pragma[only_bind_into](config)) and
+ flowCand(node, _, pragma[only_bind_into](config)) and
+ jumpStep(mid, node, config) and
+ cc = ccNone() and
+ argAp = apNone()
+ )
+ or
+ exists(NodeEx mid, ApNil nil |
+ fwdFlow(mid, _, _, nil, pragma[only_bind_into](config)) and
+ flowCand(node, _, pragma[only_bind_into](config)) and
+ additionalJumpStep(mid, node, config) and
+ cc = ccNone() and
+ argAp = apNone() and
+ ap = getApNil(node)
+ )
+ or
+ // store
+ exists(TypedContent tc, Ap ap0 |
+ fwdFlowStore(_, ap0, tc, node, cc, argAp, config) and
+ ap = apCons(tc, ap0)
+ )
+ or
+ // read
+ exists(Ap ap0, Content c |
+ fwdFlowRead(ap0, c, _, node, cc, argAp, config) and
+ fwdFlowConsCand(ap0, c, ap, config)
+ )
+ or
+ // flow into a callable
+ exists(ApApprox apa |
+ fwdFlowIn(_, node, _, cc, _, ap, config) and
+ apa = getApprox(ap) and
+ if PrevStage::parameterMayFlowThrough(node, _, apa, config)
+ then argAp = apSome(ap)
+ else argAp = apNone()
+ )
+ or
+ // flow out of a callable
+ fwdFlowOutNotFromArg(node, cc, argAp, ap, config)
+ or
+ exists(DataFlowCall call, Ap argAp0 |
+ fwdFlowOutFromArg(call, node, argAp0, ap, config) and
+ fwdFlowIsEntered(call, cc, argAp, argAp0, config)
+ )
+ }
+
+ pragma[nomagic]
+ private predicate fwdFlowStore(
+ NodeEx node1, Ap ap1, TypedContent tc, NodeEx node2, Cc cc, ApOption argAp, Configuration config
+ ) {
+ exists(DataFlowType contentType |
+ fwdFlow(node1, cc, argAp, ap1, config) and
+ PrevStage::storeStepCand(node1, unbindApa(getApprox(ap1)), tc, node2, contentType, config) and
+ typecheckStore(ap1, contentType)
+ )
+ }
+
+ /**
+ * Holds if forward flow with access path `tail` reaches a store of `c`
+ * resulting in access path `cons`.
+ */
+ pragma[nomagic]
+ private predicate fwdFlowConsCand(Ap cons, Content c, Ap tail, Configuration config) {
+ exists(TypedContent tc |
+ fwdFlowStore(_, tail, tc, _, _, _, config) and
+ tc.getContent() = c and
+ cons = apCons(tc, tail)
+ )
+ }
+
+ pragma[nomagic]
+ private predicate fwdFlowRead(
+ Ap ap, Content c, NodeEx node1, NodeEx node2, Cc cc, ApOption argAp, Configuration config
+ ) {
+ fwdFlow(node1, cc, argAp, ap, config) and
+ PrevStage::readStepCand(node1, c, node2, config) and
+ getHeadContent(ap) = c
+ }
+
+ pragma[nomagic]
+ private predicate fwdFlowIn(
+ DataFlowCall call, ParamNodeEx p, Cc outercc, Cc innercc, ApOption argAp, Ap ap,
+ Configuration config
+ ) {
+ exists(ArgNodeEx arg, boolean allowsFieldFlow |
+ fwdFlow(arg, outercc, argAp, ap, config) and
+ flowIntoCall(call, arg, p, allowsFieldFlow, config) and
+ innercc = getCallContextCall(call, p.getEnclosingCallable(), outercc)
+ |
+ ap instanceof ApNil or allowsFieldFlow = true
+ )
+ }
+
+ pragma[nomagic]
+ private predicate fwdFlowOutNotFromArg(
+ NodeEx out, Cc ccOut, ApOption argAp, Ap ap, Configuration config
+ ) {
+ exists(
+ DataFlowCall call, RetNodeEx ret, boolean allowsFieldFlow, CcNoCall innercc,
+ DataFlowCallable inner
+ |
+ fwdFlow(ret, innercc, argAp, ap, config) and
+ flowOutOfCall(call, ret, out, allowsFieldFlow, config) and
+ inner = ret.getEnclosingCallable() and
+ ccOut = getCallContextReturn(inner, call, innercc)
+ |
+ ap instanceof ApNil or allowsFieldFlow = true
+ )
+ }
+
+ pragma[nomagic]
+ private predicate fwdFlowOutFromArg(
+ DataFlowCall call, NodeEx out, Ap argAp, Ap ap, Configuration config
+ ) {
+ exists(RetNodeEx ret, boolean allowsFieldFlow, CcCall ccc |
+ fwdFlow(ret, ccc, apSome(argAp), ap, config) and
+ flowThroughOutOfCall(call, ret, out, allowsFieldFlow, config) and
+ ccc.matchesCall(call)
+ |
+ ap instanceof ApNil or allowsFieldFlow = true
+ )
+ }
+
+ /**
+ * Holds if an argument to `call` is reached in the flow covered by `fwdFlow`
+ * and data might flow through the target callable and back out at `call`.
+ */
+ pragma[nomagic]
+ private predicate fwdFlowIsEntered(
+ DataFlowCall call, Cc cc, ApOption argAp, Ap ap, Configuration config
+ ) {
+ exists(ParamNodeEx p |
+ fwdFlowIn(call, p, cc, _, argAp, ap, config) and
+ PrevStage::parameterMayFlowThrough(p, _, unbindApa(getApprox(ap)), config)
+ )
+ }
+
+ pragma[nomagic]
+ private predicate storeStepFwd(
+ NodeEx node1, Ap ap1, TypedContent tc, NodeEx node2, Ap ap2, Configuration config
+ ) {
+ fwdFlowStore(node1, ap1, tc, node2, _, _, config) and
+ ap2 = apCons(tc, ap1) and
+ fwdFlowRead(ap2, tc.getContent(), _, _, _, _, config)
+ }
+
+ private predicate readStepFwd(
+ NodeEx n1, Ap ap1, Content c, NodeEx n2, Ap ap2, Configuration config
+ ) {
+ fwdFlowRead(ap1, c, n1, n2, _, _, config) and
+ fwdFlowConsCand(ap1, c, ap2, config)
+ }
+
+ pragma[nomagic]
+ private predicate callMayFlowThroughFwd(DataFlowCall call, Configuration config) {
+ exists(Ap argAp0, NodeEx out, Cc cc, ApOption argAp, Ap ap |
+ fwdFlow(out, pragma[only_bind_into](cc), pragma[only_bind_into](argAp), ap,
+ pragma[only_bind_into](config)) and
+ fwdFlowOutFromArg(call, out, argAp0, ap, config) and
+ fwdFlowIsEntered(pragma[only_bind_into](call), pragma[only_bind_into](cc),
+ pragma[only_bind_into](argAp), pragma[only_bind_into](argAp0),
+ pragma[only_bind_into](config))
+ )
+ }
+
+ pragma[nomagic]
+ private predicate flowThroughIntoCall(
+ DataFlowCall call, ArgNodeEx arg, ParamNodeEx p, boolean allowsFieldFlow, Configuration config
+ ) {
+ flowIntoCall(call, arg, p, allowsFieldFlow, config) and
+ fwdFlow(arg, _, _, _, pragma[only_bind_into](config)) and
+ PrevStage::parameterMayFlowThrough(p, _, _, pragma[only_bind_into](config)) and
+ callMayFlowThroughFwd(call, pragma[only_bind_into](config))
+ }
+
+ /**
+ * Holds if `node` with access path `ap` is part of a path from a source to a
+ * sink in the configuration `config`.
+ *
+ * The Boolean `toReturn` records whether the node must be returned from the
+ * enclosing callable in order to reach a sink, and if so, `returnAp` records
+ * the access path of the returned value.
+ */
+ pragma[nomagic]
+ predicate revFlow(NodeEx node, boolean toReturn, ApOption returnAp, Ap ap, Configuration config) {
+ revFlow0(node, toReturn, returnAp, ap, config) and
+ fwdFlow(node, _, _, ap, config)
+ }
+
+ pragma[nomagic]
+ private predicate revFlow0(
+ NodeEx node, boolean toReturn, ApOption returnAp, Ap ap, Configuration config
+ ) {
+ fwdFlow(node, _, _, ap, config) and
+ sinkNode(node, config) and
+ toReturn = false and
+ returnAp = apNone() and
+ ap instanceof ApNil
+ or
+ exists(NodeEx mid |
+ localStep(node, mid, true, _, config, _) and
+ revFlow(mid, toReturn, returnAp, ap, config)
+ )
+ or
+ exists(NodeEx mid, ApNil nil |
+ fwdFlow(node, _, _, ap, pragma[only_bind_into](config)) and
+ localStep(node, mid, false, _, config, _) and
+ revFlow(mid, toReturn, returnAp, nil, pragma[only_bind_into](config)) and
+ ap instanceof ApNil
+ )
+ or
+ exists(NodeEx mid |
+ jumpStep(node, mid, config) and
+ revFlow(mid, _, _, ap, config) and
+ toReturn = false and
+ returnAp = apNone()
+ )
+ or
+ exists(NodeEx mid, ApNil nil |
+ fwdFlow(node, _, _, ap, pragma[only_bind_into](config)) and
+ additionalJumpStep(node, mid, config) and
+ revFlow(pragma[only_bind_into](mid), _, _, nil, pragma[only_bind_into](config)) and
+ toReturn = false and
+ returnAp = apNone() and
+ ap instanceof ApNil
+ )
+ or
+ // store
+ exists(Ap ap0, Content c |
+ revFlowStore(ap0, c, ap, node, _, _, toReturn, returnAp, config) and
+ revFlowConsCand(ap0, c, ap, config)
+ )
+ or
+ // read
+ exists(NodeEx mid, Ap ap0 |
+ revFlow(mid, toReturn, returnAp, ap0, config) and
+ readStepFwd(node, ap, _, mid, ap0, config)
+ )
+ or
+ // flow into a callable
+ revFlowInNotToReturn(node, returnAp, ap, config) and
+ toReturn = false
+ or
+ exists(DataFlowCall call, Ap returnAp0 |
+ revFlowInToReturn(call, node, returnAp0, ap, config) and
+ revFlowIsReturned(call, toReturn, returnAp, returnAp0, config)
+ )
+ or
+ // flow out of a callable
+ revFlowOut(_, node, _, _, ap, config) and
+ toReturn = true and
+ if fwdFlow(node, any(CcCall ccc), apSome(_), ap, config)
+ then returnAp = apSome(ap)
+ else returnAp = apNone()
+ }
+
+ pragma[nomagic]
+ private predicate revFlowStore(
+ Ap ap0, Content c, Ap ap, NodeEx node, TypedContent tc, NodeEx mid, boolean toReturn,
+ ApOption returnAp, Configuration config
+ ) {
+ revFlow(mid, toReturn, returnAp, ap0, config) and
+ storeStepFwd(node, ap, tc, mid, ap0, config) and
+ tc.getContent() = c
+ }
+
+ /**
+ * Holds if reverse flow with access path `tail` reaches a read of `c`
+ * resulting in access path `cons`.
+ */
+ pragma[nomagic]
+ private predicate revFlowConsCand(Ap cons, Content c, Ap tail, Configuration config) {
+ exists(NodeEx mid, Ap tail0 |
+ revFlow(mid, _, _, tail, config) and
+ tail = pragma[only_bind_into](tail0) and
+ readStepFwd(_, cons, c, mid, tail0, config)
+ )
+ }
+
+ pragma[nomagic]
+ private predicate revFlowOut(
+ DataFlowCall call, RetNodeEx ret, boolean toReturn, ApOption returnAp, Ap ap,
+ Configuration config
+ ) {
+ exists(NodeEx out, boolean allowsFieldFlow |
+ revFlow(out, toReturn, returnAp, ap, config) and
+ flowOutOfCall(call, ret, out, allowsFieldFlow, config)
+ |
+ ap instanceof ApNil or allowsFieldFlow = true
+ )
+ }
+
+ pragma[nomagic]
+ private predicate revFlowInNotToReturn(
+ ArgNodeEx arg, ApOption returnAp, Ap ap, Configuration config
+ ) {
+ exists(ParamNodeEx p, boolean allowsFieldFlow |
+ revFlow(p, false, returnAp, ap, config) and
+ flowIntoCall(_, arg, p, allowsFieldFlow, config)
+ |
+ ap instanceof ApNil or allowsFieldFlow = true
+ )
+ }
+
+ pragma[nomagic]
+ private predicate revFlowInToReturn(
+ DataFlowCall call, ArgNodeEx arg, Ap returnAp, Ap ap, Configuration config
+ ) {
+ exists(ParamNodeEx p, boolean allowsFieldFlow |
+ revFlow(p, true, apSome(returnAp), ap, config) and
+ flowThroughIntoCall(call, arg, p, allowsFieldFlow, config)
+ |
+ ap instanceof ApNil or allowsFieldFlow = true
+ )
+ }
+
+ /**
+ * Holds if an output from `call` is reached in the flow covered by `revFlow`
+ * and data might flow through the target callable resulting in reverse flow
+ * reaching an argument of `call`.
+ */
+ pragma[nomagic]
+ private predicate revFlowIsReturned(
+ DataFlowCall call, boolean toReturn, ApOption returnAp, Ap ap, Configuration config
+ ) {
+ exists(RetNodeEx ret, CcCall ccc |
+ revFlowOut(call, ret, toReturn, returnAp, ap, config) and
+ fwdFlow(ret, ccc, apSome(_), ap, config) and
+ ccc.matchesCall(call)
+ )
+ }
+
+ pragma[nomagic]
+ predicate storeStepCand(
+ NodeEx node1, Ap ap1, TypedContent tc, NodeEx node2, DataFlowType contentType,
+ Configuration config
+ ) {
+ exists(Ap ap2, Content c |
+ store(node1, tc, node2, contentType, config) and
+ revFlowStore(ap2, c, ap1, node1, tc, node2, _, _, config) and
+ revFlowConsCand(ap2, c, ap1, config)
+ )
+ }
+
+ predicate readStepCand(NodeEx node1, Content c, NodeEx node2, Configuration config) {
+ exists(Ap ap1, Ap ap2 |
+ revFlow(node2, _, _, pragma[only_bind_into](ap2), pragma[only_bind_into](config)) and
+ readStepFwd(node1, ap1, c, node2, ap2, config) and
+ revFlowStore(ap1, c, pragma[only_bind_into](ap2), _, _, _, _, _,
+ pragma[only_bind_into](config))
+ )
+ }
+
+ predicate revFlow(NodeEx node, Configuration config) { revFlow(node, _, _, _, config) }
+
+ private predicate fwdConsCand(TypedContent tc, Ap ap, Configuration config) {
+ storeStepFwd(_, ap, tc, _, _, config)
+ }
+
+ predicate consCand(TypedContent tc, Ap ap, Configuration config) {
+ storeStepCand(_, ap, tc, _, _, config)
+ }
+
+ pragma[noinline]
+ private predicate parameterFlow(
+ ParamNodeEx p, Ap ap, Ap ap0, DataFlowCallable c, Configuration config
+ ) {
+ revFlow(p, true, apSome(ap0), ap, config) and
+ c = p.getEnclosingCallable()
+ }
+
+ predicate parameterMayFlowThrough(ParamNodeEx p, DataFlowCallable c, Ap ap, Configuration config) {
+ exists(RetNodeEx ret, Ap ap0, ReturnKindExt kind, int pos |
+ parameterFlow(p, ap, ap0, c, config) and
+ c = ret.getEnclosingCallable() and
+ revFlow(pragma[only_bind_into](ret), true, apSome(_), pragma[only_bind_into](ap0),
+ pragma[only_bind_into](config)) and
+ fwdFlow(ret, any(CcCall ccc), apSome(ap), ap0, config) and
+ kind = ret.getKind() and
+ p.getPosition() = pos and
+ // we don't expect a parameter to return stored in itself
+ not kind.(ParamUpdateReturnKind).getPosition() = pos
+ )
+ }
+
+ pragma[nomagic]
+ predicate callMayFlowThroughRev(DataFlowCall call, Configuration config) {
+ exists(Ap returnAp0, ArgNodeEx arg, boolean toReturn, ApOption returnAp, Ap ap |
+ revFlow(arg, toReturn, returnAp, ap, config) and
+ revFlowInToReturn(call, arg, returnAp0, ap, config) and
+ revFlowIsReturned(call, toReturn, returnAp, returnAp0, config)
+ )
+ }
+
+ predicate stats(boolean fwd, int nodes, int fields, int conscand, int tuples, Configuration config) {
+ fwd = true and
+ nodes = count(NodeEx node | fwdFlow(node, _, _, _, config)) and
+ fields = count(TypedContent f0 | fwdConsCand(f0, _, config)) and
+ conscand = count(TypedContent f0, Ap ap | fwdConsCand(f0, ap, config)) and
+ tuples = count(NodeEx n, Cc cc, ApOption argAp, Ap ap | fwdFlow(n, cc, argAp, ap, config))
+ or
+ fwd = false and
+ nodes = count(NodeEx node | revFlow(node, _, _, _, config)) and
+ fields = count(TypedContent f0 | consCand(f0, _, config)) and
+ conscand = count(TypedContent f0, Ap ap | consCand(f0, ap, config)) and
+ tuples = count(NodeEx n, boolean b, ApOption retAp, Ap ap | revFlow(n, b, retAp, ap, config))
+ }
+ /* End: Stage 4 logic. */
+}
+
+bindingset[conf, result]
+private Configuration unbindConf(Configuration conf) {
+ exists(Configuration c | result = pragma[only_bind_into](c) and conf = pragma[only_bind_into](c))
+}
+
+private predicate nodeMayUseSummary(NodeEx n, AccessPathApprox apa, Configuration config) {
+ exists(DataFlowCallable c, AccessPathApprox apa0 |
+ Stage4::parameterMayFlowThrough(_, c, apa, _) and
+ Stage4::revFlow(n, true, _, apa0, config) and
+ Stage4::fwdFlow(n, any(CallContextCall ccc), TAccessPathApproxSome(apa), apa0, config) and
+ n.getEnclosingCallable() = c
+ )
+}
+
+private newtype TSummaryCtx =
+ TSummaryCtxNone() or
+ TSummaryCtxSome(ParamNodeEx p, AccessPath ap) {
+ Stage4::parameterMayFlowThrough(p, _, ap.getApprox(), _)
+ }
+
+/**
+ * A context for generating flow summaries. This represents flow entry through
+ * a specific parameter with an access path of a specific shape.
+ *
+ * Summaries are only created for parameters that may flow through.
+ */
+abstract private class SummaryCtx extends TSummaryCtx {
+ abstract string toString();
+}
+
+/** A summary context from which no flow summary can be generated. */
+private class SummaryCtxNone extends SummaryCtx, TSummaryCtxNone {
+ override string toString() { result = "" }
+}
+
+/** A summary context from which a flow summary can be generated. */
+private class SummaryCtxSome extends SummaryCtx, TSummaryCtxSome {
+ private ParamNodeEx p;
+ private AccessPath ap;
+
+ SummaryCtxSome() { this = TSummaryCtxSome(p, ap) }
+
+ int getParameterPos() { p.isParameterOf(_, result) }
+
+ override string toString() { result = p + ": " + ap }
+
+ predicate hasLocationInfo(
+ string filepath, int startline, int startcolumn, int endline, int endcolumn
+ ) {
+ p.hasLocationInfo(filepath, startline, startcolumn, endline, endcolumn)
+ }
+}
+
+/**
+ * Gets the number of length 2 access path approximations that correspond to `apa`.
+ */
+private int count1to2unfold(AccessPathApproxCons1 apa, Configuration config) {
+ exists(TypedContent tc, int len |
+ tc = apa.getHead() and
+ len = apa.len() and
+ result =
+ strictcount(AccessPathFront apf |
+ Stage4::consCand(tc, any(AccessPathApprox ap | ap.getFront() = apf and ap.len() = len - 1),
+ config)
+ )
+ )
+}
+
+private int countNodesUsingAccessPath(AccessPathApprox apa, Configuration config) {
+ result =
+ strictcount(NodeEx n |
+ Stage4::revFlow(n, _, _, apa, config) or nodeMayUseSummary(n, apa, config)
+ )
+}
+
+/**
+ * Holds if a length 2 access path approximation matching `apa` is expected
+ * to be expensive.
+ */
+private predicate expensiveLen1to2unfolding(AccessPathApproxCons1 apa, Configuration config) {
+ exists(int aps, int nodes, int apLimit, int tupleLimit |
+ aps = count1to2unfold(apa, config) and
+ nodes = countNodesUsingAccessPath(apa, config) and
+ accessPathCostLimits(apLimit, tupleLimit) and
+ apLimit < aps and
+ tupleLimit < (aps - 1) * nodes
+ )
+}
+
+private AccessPathApprox getATail(AccessPathApprox apa, Configuration config) {
+ exists(TypedContent head |
+ apa.pop(head) = result and
+ Stage4::consCand(head, result, config)
+ )
+}
+
+/**
+ * Holds with `unfold = false` if a precise head-tail representation of `apa` is
+ * expected to be expensive. Holds with `unfold = true` otherwise.
+ */
+private predicate evalUnfold(AccessPathApprox apa, boolean unfold, Configuration config) {
+ if apa.getHead().forceHighPrecision()
+ then unfold = true
+ else
+ exists(int aps, int nodes, int apLimit, int tupleLimit |
+ aps = countPotentialAps(apa, config) and
+ nodes = countNodesUsingAccessPath(apa, config) and
+ accessPathCostLimits(apLimit, tupleLimit) and
+ if apLimit < aps and tupleLimit < (aps - 1) * nodes then unfold = false else unfold = true
+ )
+}
+
+/**
+ * Gets the number of `AccessPath`s that correspond to `apa`.
+ */
+private int countAps(AccessPathApprox apa, Configuration config) {
+ evalUnfold(apa, false, config) and
+ result = 1 and
+ (not apa instanceof AccessPathApproxCons1 or expensiveLen1to2unfolding(apa, config))
+ or
+ evalUnfold(apa, false, config) and
+ result = count1to2unfold(apa, config) and
+ not expensiveLen1to2unfolding(apa, config)
+ or
+ evalUnfold(apa, true, config) and
+ result = countPotentialAps(apa, config)
+}
+
+/**
+ * Gets the number of `AccessPath`s that would correspond to `apa` assuming
+ * that it is expanded to a precise head-tail representation.
+ */
+language[monotonicAggregates]
+private int countPotentialAps(AccessPathApprox apa, Configuration config) {
+ apa instanceof AccessPathApproxNil and result = 1
+ or
+ result = strictsum(AccessPathApprox tail | tail = getATail(apa, config) | countAps(tail, config))
+}
+
+private newtype TAccessPath =
+ TAccessPathNil(DataFlowType t) or
+ TAccessPathCons(TypedContent head, AccessPath tail) {
+ exists(AccessPathApproxCons apa |
+ not evalUnfold(apa, false, _) and
+ head = apa.getHead() and
+ tail.getApprox() = getATail(apa, _)
+ )
+ } or
+ TAccessPathCons2(TypedContent head1, TypedContent head2, int len) {
+ exists(AccessPathApproxCons apa |
+ evalUnfold(apa, false, _) and
+ not expensiveLen1to2unfolding(apa, _) and
+ apa.len() = len and
+ head1 = apa.getHead() and
+ head2 = getATail(apa, _).getHead()
+ )
+ } or
+ TAccessPathCons1(TypedContent head, int len) {
+ exists(AccessPathApproxCons apa |
+ evalUnfold(apa, false, _) and
+ expensiveLen1to2unfolding(apa, _) and
+ apa.len() = len and
+ head = apa.getHead()
+ )
+ }
+
+private newtype TPathNode =
+ TPathNodeMid(NodeEx node, CallContext cc, SummaryCtx sc, AccessPath ap, Configuration config) {
+ // A PathNode is introduced by a source ...
+ Stage4::revFlow(node, config) and
+ sourceNode(node, config) and
+ cc instanceof CallContextAny and
+ sc instanceof SummaryCtxNone and
+ ap = TAccessPathNil(node.getDataFlowType())
+ or
+ // ... or a step from an existing PathNode to another node.
+ exists(PathNodeMid mid |
+ pathStep(mid, node, cc, sc, ap) and
+ pragma[only_bind_into](config) = mid.getConfiguration() and
+ Stage4::revFlow(node, _, _, ap.getApprox(), pragma[only_bind_into](config))
+ )
+ } or
+ TPathNodeSink(NodeEx node, Configuration config) {
+ sinkNode(node, pragma[only_bind_into](config)) and
+ Stage4::revFlow(node, pragma[only_bind_into](config)) and
+ (
+ // A sink that is also a source ...
+ sourceNode(node, config)
+ or
+ // ... or a sink that can be reached from a source
+ exists(PathNodeMid mid |
+ pathStep(mid, node, _, _, TAccessPathNil(_)) and
+ pragma[only_bind_into](config) = mid.getConfiguration()
+ )
+ )
+ }
+
+/**
+ * A list of `TypedContent`s followed by a `DataFlowType`. If data flows from a
+ * source to a given node with a given `AccessPath`, this indicates the sequence
+ * of dereference operations needed to get from the value in the node to the
+ * tracked object. The final type indicates the type of the tracked object.
+ */
+abstract private class AccessPath extends TAccessPath {
+ /** Gets the head of this access path, if any. */
+ abstract TypedContent getHead();
+
+ /** Gets the tail of this access path, if any. */
+ abstract AccessPath getTail();
+
+ /** Gets the front of this access path. */
+ abstract AccessPathFront getFront();
+
+ /** Gets the approximation of this access path. */
+ abstract AccessPathApprox getApprox();
+
+ /** Gets the length of this access path. */
+ abstract int length();
+
+ /** Gets a textual representation of this access path. */
+ abstract string toString();
+
+ /** Gets the access path obtained by popping `tc` from this access path, if any. */
+ final AccessPath pop(TypedContent tc) {
+ result = this.getTail() and
+ tc = this.getHead()
+ }
+
+ /** Gets the access path obtained by pushing `tc` onto this access path. */
+ final AccessPath push(TypedContent tc) { this = result.pop(tc) }
+}
+
+private class AccessPathNil extends AccessPath, TAccessPathNil {
+ private DataFlowType t;
+
+ AccessPathNil() { this = TAccessPathNil(t) }
+
+ DataFlowType getType() { result = t }
+
+ override TypedContent getHead() { none() }
+
+ override AccessPath getTail() { none() }
+
+ override AccessPathFrontNil getFront() { result = TFrontNil(t) }
+
+ override AccessPathApproxNil getApprox() { result = TNil(t) }
+
+ override int length() { result = 0 }
+
+ override string toString() { result = concat(": " + ppReprType(t)) }
+}
+
+private class AccessPathCons extends AccessPath, TAccessPathCons {
+ private TypedContent head;
+ private AccessPath tail;
+
+ AccessPathCons() { this = TAccessPathCons(head, tail) }
+
+ override TypedContent getHead() { result = head }
+
+ override AccessPath getTail() { result = tail }
+
+ override AccessPathFrontHead getFront() { result = TFrontHead(head) }
+
+ override AccessPathApproxCons getApprox() {
+ result = TConsNil(head, tail.(AccessPathNil).getType())
+ or
+ result = TConsCons(head, tail.getHead(), this.length())
+ or
+ result = TCons1(head, this.length())
+ }
+
+ override int length() { result = 1 + tail.length() }
+
+ private string toStringImpl(boolean needsSuffix) {
+ exists(DataFlowType t |
+ tail = TAccessPathNil(t) and
+ needsSuffix = false and
+ result = head.toString() + "]" + concat(" : " + ppReprType(t))
+ )
+ or
+ result = head + ", " + tail.(AccessPathCons).toStringImpl(needsSuffix)
+ or
+ exists(TypedContent tc2, TypedContent tc3, int len | tail = TAccessPathCons2(tc2, tc3, len) |
+ result = head + ", " + tc2 + ", " + tc3 + ", ... (" and len > 2 and needsSuffix = true
+ or
+ result = head + ", " + tc2 + ", " + tc3 + "]" and len = 2 and needsSuffix = false
+ )
+ or
+ exists(TypedContent tc2, int len | tail = TAccessPathCons1(tc2, len) |
+ result = head + ", " + tc2 + ", ... (" and len > 1 and needsSuffix = true
+ or
+ result = head + ", " + tc2 + "]" and len = 1 and needsSuffix = false
+ )
+ }
+
+ override string toString() {
+ result = "[" + this.toStringImpl(true) + length().toString() + ")]"
+ or
+ result = "[" + this.toStringImpl(false)
+ }
+}
+
+private class AccessPathCons2 extends AccessPath, TAccessPathCons2 {
+ private TypedContent head1;
+ private TypedContent head2;
+ private int len;
+
+ AccessPathCons2() { this = TAccessPathCons2(head1, head2, len) }
+
+ override TypedContent getHead() { result = head1 }
+
+ override AccessPath getTail() {
+ Stage4::consCand(head1, result.getApprox(), _) and
+ result.getHead() = head2 and
+ result.length() = len - 1
+ }
+
+ override AccessPathFrontHead getFront() { result = TFrontHead(head1) }
+
+ override AccessPathApproxCons getApprox() {
+ result = TConsCons(head1, head2, len) or
+ result = TCons1(head1, len)
+ }
+
+ override int length() { result = len }
+
+ override string toString() {
+ if len = 2
+ then result = "[" + head1.toString() + ", " + head2.toString() + "]"
+ else
+ result = "[" + head1.toString() + ", " + head2.toString() + ", ... (" + len.toString() + ")]"
+ }
+}
+
+private class AccessPathCons1 extends AccessPath, TAccessPathCons1 {
+ private TypedContent head;
+ private int len;
+
+ AccessPathCons1() { this = TAccessPathCons1(head, len) }
+
+ override TypedContent getHead() { result = head }
+
+ override AccessPath getTail() {
+ Stage4::consCand(head, result.getApprox(), _) and result.length() = len - 1
+ }
+
+ override AccessPathFrontHead getFront() { result = TFrontHead(head) }
+
+ override AccessPathApproxCons getApprox() { result = TCons1(head, len) }
+
+ override int length() { result = len }
+
+ override string toString() {
+ if len = 1
+ then result = "[" + head.toString() + "]"
+ else result = "[" + head.toString() + ", ... (" + len.toString() + ")]"
+ }
+}
+
+/**
+ * A `Node` augmented with a call context (except for sinks), an access path, and a configuration.
+ * Only those `PathNode`s that are reachable from a source are generated.
+ */
+class PathNode extends TPathNode {
+ /** Gets a textual representation of this element. */
+ string toString() { none() }
+
+ /**
+ * Gets a textual representation of this element, including a textual
+ * representation of the call context.
+ */
+ string toStringWithContext() { none() }
+
+ /**
+ * Holds if this element is at the specified location.
+ * The location spans column `startcolumn` of line `startline` to
+ * column `endcolumn` of line `endline` in file `filepath`.
+ * For more information, see
+ * [Locations](https://codeql.github.com/docs/writing-codeql-queries/providing-locations-in-codeql-queries/).
+ */
+ predicate hasLocationInfo(
+ string filepath, int startline, int startcolumn, int endline, int endcolumn
+ ) {
+ none()
+ }
+
+ /** Gets the underlying `Node`. */
+ final Node getNode() { this.(PathNodeImpl).getNodeEx().projectToNode() = result }
+
+ /** Gets the associated configuration. */
+ Configuration getConfiguration() { none() }
+
+ private PathNode getASuccessorIfHidden() {
+ this.(PathNodeImpl).isHidden() and
+ result = this.(PathNodeImpl).getASuccessorImpl()
+ }
+
+ /** Gets a successor of this node, if any. */
+ final PathNode getASuccessor() {
+ result = this.(PathNodeImpl).getASuccessorImpl().getASuccessorIfHidden*() and
+ not this.(PathNodeImpl).isHidden() and
+ not result.(PathNodeImpl).isHidden()
+ }
+
+ /** Holds if this node is a source. */
+ predicate isSource() { none() }
+}
+
+abstract private class PathNodeImpl extends PathNode {
+ abstract PathNode getASuccessorImpl();
+
+ abstract NodeEx getNodeEx();
+
+ predicate isHidden() {
+ hiddenNode(this.getNodeEx().asNode()) and
+ not this.isSource() and
+ not this instanceof PathNodeSink
+ or
+ this.getNodeEx() instanceof TNodeImplicitRead
+ }
+
+ private string ppAp() {
+ this instanceof PathNodeSink and result = ""
+ or
+ exists(string s | s = this.(PathNodeMid).getAp().toString() |
+ if s = "" then result = "" else result = " " + s
+ )
+ }
+
+ private string ppCtx() {
+ this instanceof PathNodeSink and result = ""
+ or
+ result = " <" + this.(PathNodeMid).getCallContext().toString() + ">"
+ }
+
+ override string toString() { result = this.getNodeEx().toString() + ppAp() }
+
+ override string toStringWithContext() { result = this.getNodeEx().toString() + ppAp() + ppCtx() }
+
+ override predicate hasLocationInfo(
+ string filepath, int startline, int startcolumn, int endline, int endcolumn
+ ) {
+ this.getNodeEx().hasLocationInfo(filepath, startline, startcolumn, endline, endcolumn)
+ }
+}
+
+/** Holds if `n` can reach a sink. */
+private predicate directReach(PathNode n) {
+ n instanceof PathNodeSink or directReach(n.getASuccessor())
+}
+
+/** Holds if `n` can reach a sink or is used in a subpath. */
+private predicate reach(PathNode n) { directReach(n) or Subpaths::retReach(n) }
+
+/** Holds if `n1.getASuccessor() = n2` and `n2` can reach a sink. */
+private predicate pathSucc(PathNode n1, PathNode n2) { n1.getASuccessor() = n2 and directReach(n2) }
+
+private predicate pathSuccPlus(PathNode n1, PathNode n2) = fastTC(pathSucc/2)(n1, n2)
+
+/**
+ * Provides the query predicates needed to include a graph in a path-problem query.
+ */
+module PathGraph {
+ /** Holds if `(a,b)` is an edge in the graph of data flow path explanations. */
+ query predicate edges(PathNode a, PathNode b) { a.getASuccessor() = b and reach(b) }
+
+ /** Holds if `n` is a node in the graph of data flow path explanations. */
+ query predicate nodes(PathNode n, string key, string val) {
+ reach(n) and key = "semmle.label" and val = n.toString()
+ }
+
+ query predicate subpaths = Subpaths::subpaths/4;
+}
+
+/**
+ * An intermediate flow graph node. This is a triple consisting of a `Node`,
+ * a `CallContext`, and a `Configuration`.
+ */
+private class PathNodeMid extends PathNodeImpl, TPathNodeMid {
+ NodeEx node;
+ CallContext cc;
+ SummaryCtx sc;
+ AccessPath ap;
+ Configuration config;
+
+ PathNodeMid() { this = TPathNodeMid(node, cc, sc, ap, config) }
+
+ override NodeEx getNodeEx() { result = node }
+
+ CallContext getCallContext() { result = cc }
+
+ SummaryCtx getSummaryCtx() { result = sc }
+
+ AccessPath getAp() { result = ap }
+
+ override Configuration getConfiguration() { result = config }
+
+ private PathNodeMid getSuccMid() {
+ pathStep(this, result.getNodeEx(), result.getCallContext(), result.getSummaryCtx(),
+ result.getAp()) and
+ result.getConfiguration() = unbindConf(this.getConfiguration())
+ }
+
+ override PathNodeImpl getASuccessorImpl() {
+ // an intermediate step to another intermediate node
+ result = getSuccMid()
+ or
+ // a final step to a sink via zero steps means we merge the last two steps to prevent trivial-looking edges
+ exists(PathNodeMid mid, PathNodeSink sink |
+ mid = getSuccMid() and
+ mid.getNodeEx() = sink.getNodeEx() and
+ mid.getAp() instanceof AccessPathNil and
+ sink.getConfiguration() = unbindConf(mid.getConfiguration()) and
+ result = sink
+ )
+ }
+
+ override predicate isSource() {
+ sourceNode(node, config) and
+ cc instanceof CallContextAny and
+ sc instanceof SummaryCtxNone and
+ ap instanceof AccessPathNil
+ }
+}
+
+/**
+ * A flow graph node corresponding to a sink. This is disjoint from the
+ * intermediate nodes in order to uniquely correspond to a given sink by
+ * excluding the `CallContext`.
+ */
+private class PathNodeSink extends PathNodeImpl, TPathNodeSink {
+ NodeEx node;
+ Configuration config;
+
+ PathNodeSink() { this = TPathNodeSink(node, config) }
+
+ override NodeEx getNodeEx() { result = node }
+
+ override Configuration getConfiguration() { result = config }
+
+ override PathNode getASuccessorImpl() { none() }
+
+ override predicate isSource() { sourceNode(node, config) }
+}
+
+/**
+ * Holds if data may flow from `mid` to `node`. The last step in or out of
+ * a callable is recorded by `cc`.
+ */
+private predicate pathStep(
+ PathNodeMid mid, NodeEx node, CallContext cc, SummaryCtx sc, AccessPath ap
+) {
+ exists(AccessPath ap0, NodeEx midnode, Configuration conf, LocalCallContext localCC |
+ midnode = mid.getNodeEx() and
+ conf = mid.getConfiguration() and
+ cc = mid.getCallContext() and
+ sc = mid.getSummaryCtx() and
+ localCC =
+ getLocalCallContext(pragma[only_bind_into](pragma[only_bind_out](cc)),
+ midnode.getEnclosingCallable()) and
+ ap0 = mid.getAp()
+ |
+ localFlowBigStep(midnode, node, true, _, conf, localCC) and
+ ap = ap0
+ or
+ localFlowBigStep(midnode, node, false, ap.getFront(), conf, localCC) and
+ ap0 instanceof AccessPathNil
+ )
+ or
+ jumpStep(mid.getNodeEx(), node, mid.getConfiguration()) and
+ cc instanceof CallContextAny and
+ sc instanceof SummaryCtxNone and
+ ap = mid.getAp()
+ or
+ additionalJumpStep(mid.getNodeEx(), node, mid.getConfiguration()) and
+ cc instanceof CallContextAny and
+ sc instanceof SummaryCtxNone and
+ mid.getAp() instanceof AccessPathNil and
+ ap = TAccessPathNil(node.getDataFlowType())
+ or
+ exists(TypedContent tc | pathStoreStep(mid, node, ap.pop(tc), tc, cc)) and
+ sc = mid.getSummaryCtx()
+ or
+ exists(TypedContent tc | pathReadStep(mid, node, ap.push(tc), tc, cc)) and
+ sc = mid.getSummaryCtx()
+ or
+ pathIntoCallable(mid, node, _, cc, sc, _) and ap = mid.getAp()
+ or
+ pathOutOfCallable(mid, node, cc) and ap = mid.getAp() and sc instanceof SummaryCtxNone
+ or
+ pathThroughCallable(mid, node, cc, ap) and sc = mid.getSummaryCtx()
+}
+
+pragma[nomagic]
+private predicate pathReadStep(
+ PathNodeMid mid, NodeEx node, AccessPath ap0, TypedContent tc, CallContext cc
+) {
+ ap0 = mid.getAp() and
+ tc = ap0.getHead() and
+ Stage4::readStepCand(mid.getNodeEx(), tc.getContent(), node, mid.getConfiguration()) and
+ cc = mid.getCallContext()
+}
+
+pragma[nomagic]
+private predicate pathStoreStep(
+ PathNodeMid mid, NodeEx node, AccessPath ap0, TypedContent tc, CallContext cc
+) {
+ ap0 = mid.getAp() and
+ Stage4::storeStepCand(mid.getNodeEx(), _, tc, node, _, mid.getConfiguration()) and
+ cc = mid.getCallContext()
+}
+
+private predicate pathOutOfCallable0(
+ PathNodeMid mid, ReturnPosition pos, CallContext innercc, AccessPathApprox apa,
+ Configuration config
+) {
+ pos = mid.getNodeEx().(RetNodeEx).getReturnPosition() and
+ innercc = mid.getCallContext() and
+ innercc instanceof CallContextNoCall and
+ apa = mid.getAp().getApprox() and
+ config = mid.getConfiguration()
+}
+
+pragma[nomagic]
+private predicate pathOutOfCallable1(
+ PathNodeMid mid, DataFlowCall call, ReturnKindExt kind, CallContext cc, AccessPathApprox apa,
+ Configuration config
+) {
+ exists(ReturnPosition pos, DataFlowCallable c, CallContext innercc |
+ pathOutOfCallable0(mid, pos, innercc, apa, config) and
+ c = pos.getCallable() and
+ kind = pos.getKind() and
+ resolveReturn(innercc, c, call)
+ |
+ if reducedViableImplInReturn(c, call) then cc = TReturn(c, call) else cc = TAnyCallContext()
+ )
+}
+
+pragma[noinline]
+private NodeEx getAnOutNodeFlow(
+ ReturnKindExt kind, DataFlowCall call, AccessPathApprox apa, Configuration config
+) {
+ result.asNode() = kind.getAnOutNode(call) and
+ Stage4::revFlow(result, _, _, apa, config)
+}
+
+/**
+ * Holds if data may flow from `mid` to `out`. The last step of this path
+ * is a return from a callable and is recorded by `cc`, if needed.
+ */
+pragma[noinline]
+private predicate pathOutOfCallable(PathNodeMid mid, NodeEx out, CallContext cc) {
+ exists(ReturnKindExt kind, DataFlowCall call, AccessPathApprox apa, Configuration config |
+ pathOutOfCallable1(mid, call, kind, cc, apa, config) and
+ out = getAnOutNodeFlow(kind, call, apa, config)
+ )
+}
+
+/**
+ * Holds if data may flow from `mid` to the `i`th argument of `call` in `cc`.
+ */
+pragma[noinline]
+private predicate pathIntoArg(
+ PathNodeMid mid, int i, CallContext cc, DataFlowCall call, AccessPath ap, AccessPathApprox apa
+) {
+ exists(ArgNode arg |
+ arg = mid.getNodeEx().asNode() and
+ cc = mid.getCallContext() and
+ arg.argumentOf(call, i) and
+ ap = mid.getAp() and
+ apa = ap.getApprox()
+ )
+}
+
+pragma[noinline]
+private predicate parameterCand(
+ DataFlowCallable callable, int i, AccessPathApprox apa, Configuration config
+) {
+ exists(ParamNodeEx p |
+ Stage4::revFlow(p, _, _, apa, config) and
+ p.isParameterOf(callable, i)
+ )
+}
+
+pragma[nomagic]
+private predicate pathIntoCallable0(
+ PathNodeMid mid, DataFlowCallable callable, int i, CallContext outercc, DataFlowCall call,
+ AccessPath ap
+) {
+ exists(AccessPathApprox apa |
+ pathIntoArg(mid, i, outercc, call, ap, apa) and
+ callable = resolveCall(call, outercc) and
+ parameterCand(callable, any(int j | j <= i and j >= i), apa, mid.getConfiguration())
+ )
+}
+
+/**
+ * Holds if data may flow from `mid` to `p` through `call`. The contexts
+ * before and after entering the callable are `outercc` and `innercc`,
+ * respectively.
+ */
+private predicate pathIntoCallable(
+ PathNodeMid mid, ParamNodeEx p, CallContext outercc, CallContextCall innercc, SummaryCtx sc,
+ DataFlowCall call
+) {
+ exists(int i, DataFlowCallable callable, AccessPath ap |
+ pathIntoCallable0(mid, callable, i, outercc, call, ap) and
+ p.isParameterOf(callable, i) and
+ (
+ sc = TSummaryCtxSome(p, ap)
+ or
+ not exists(TSummaryCtxSome(p, ap)) and
+ sc = TSummaryCtxNone()
+ )
+ |
+ if recordDataFlowCallSite(call, callable)
+ then innercc = TSpecificCall(call)
+ else innercc = TSomeCall()
+ )
+}
+
+/** Holds if data may flow from a parameter given by `sc` to a return of kind `kind`. */
+pragma[nomagic]
+private predicate paramFlowsThrough(
+ ReturnKindExt kind, CallContextCall cc, SummaryCtxSome sc, AccessPath ap, AccessPathApprox apa,
+ Configuration config
+) {
+ exists(PathNodeMid mid, RetNodeEx ret, int pos |
+ mid.getNodeEx() = ret and
+ kind = ret.getKind() and
+ cc = mid.getCallContext() and
+ sc = mid.getSummaryCtx() and
+ config = mid.getConfiguration() and
+ ap = mid.getAp() and
+ apa = ap.getApprox() and
+ pos = sc.getParameterPos() and
+ not kind.(ParamUpdateReturnKind).getPosition() = pos
+ )
+}
+
+pragma[nomagic]
+private predicate pathThroughCallable0(
+ DataFlowCall call, PathNodeMid mid, ReturnKindExt kind, CallContext cc, AccessPath ap,
+ AccessPathApprox apa
+) {
+ exists(CallContext innercc, SummaryCtx sc |
+ pathIntoCallable(mid, _, cc, innercc, sc, call) and
+ paramFlowsThrough(kind, innercc, sc, ap, apa, unbindConf(mid.getConfiguration()))
+ )
+}
+
+/**
+ * Holds if data may flow from `mid` through a callable to the node `out`.
+ * The context `cc` is restored to its value prior to entering the callable.
+ */
+pragma[noinline]
+private predicate pathThroughCallable(PathNodeMid mid, NodeEx out, CallContext cc, AccessPath ap) {
+ exists(DataFlowCall call, ReturnKindExt kind, AccessPathApprox apa |
+ pathThroughCallable0(call, mid, kind, cc, ap, apa) and
+ out = getAnOutNodeFlow(kind, call, apa, unbindConf(mid.getConfiguration()))
+ )
+}
+
+private module Subpaths {
+ /**
+ * Holds if `(arg, par, ret, out)` forms a subpath-tuple and `ret` is determined by
+ * `kind`, `sc`, `apout`, and `innercc`.
+ */
+ pragma[nomagic]
+ private predicate subpaths01(
+ PathNode arg, ParamNodeEx par, SummaryCtxSome sc, CallContext innercc, ReturnKindExt kind,
+ NodeEx out, AccessPath apout
+ ) {
+ pathThroughCallable(arg, out, _, pragma[only_bind_into](apout)) and
+ pathIntoCallable(arg, par, _, innercc, sc, _) and
+ paramFlowsThrough(kind, innercc, sc, pragma[only_bind_into](apout), _,
+ unbindConf(arg.getConfiguration()))
+ }
+
+ /**
+ * Holds if `(arg, par, ret, out)` forms a subpath-tuple and `ret` is determined by
+ * `kind`, `sc`, `apout`, and `innercc`.
+ */
+ pragma[nomagic]
+ private predicate subpaths02(
+ PathNode arg, ParamNodeEx par, SummaryCtxSome sc, CallContext innercc, ReturnKindExt kind,
+ NodeEx out, AccessPath apout
+ ) {
+ subpaths01(arg, par, sc, innercc, kind, out, apout) and
+ out.asNode() = kind.getAnOutNode(_)
+ }
+
+ pragma[nomagic]
+ private Configuration getPathNodeConf(PathNode n) { result = n.getConfiguration() }
+
+ /**
+ * Holds if `(arg, par, ret, out)` forms a subpath-tuple.
+ */
+ pragma[nomagic]
+ private predicate subpaths03(
+ PathNode arg, ParamNodeEx par, PathNodeMid ret, NodeEx out, AccessPath apout
+ ) {
+ exists(SummaryCtxSome sc, CallContext innercc, ReturnKindExt kind, RetNodeEx retnode |
+ subpaths02(arg, par, sc, innercc, kind, out, apout) and
+ ret.getNodeEx() = retnode and
+ kind = retnode.getKind() and
+ innercc = ret.getCallContext() and
+ sc = ret.getSummaryCtx() and
+ ret.getConfiguration() = unbindConf(getPathNodeConf(arg)) and
+ apout = ret.getAp() and
+ not ret.isHidden()
+ )
+ }
+
+ /**
+ * Holds if `(arg, par, ret, out)` forms a subpath-tuple, that is, flow through
+ * a subpath between `par` and `ret` with the connecting edges `arg -> par` and
+ * `ret -> out` is summarized as the edge `arg -> out`.
+ */
+ predicate subpaths(PathNode arg, PathNodeImpl par, PathNodeMid ret, PathNodeMid out) {
+ exists(ParamNodeEx p, NodeEx o, AccessPath apout |
+ pragma[only_bind_into](arg).getASuccessor() = par and
+ pragma[only_bind_into](arg).getASuccessor() = out and
+ subpaths03(arg, p, ret, o, apout) and
+ par.getNodeEx() = p and
+ out.getNodeEx() = o and
+ out.getAp() = apout
+ )
+ }
+
+ /**
+ * Holds if `n` can reach a return node in a summarized subpath.
+ */
+ predicate retReach(PathNode n) {
+ subpaths(_, _, n, _)
+ or
+ exists(PathNode mid |
+ retReach(mid) and
+ n.getASuccessor() = mid and
+ not subpaths(_, mid, _, _)
+ )
+ }
+}
+
+/**
+ * Holds if data can flow (inter-procedurally) from `source` to `sink`.
+ *
+ * Will only have results if `configuration` has non-empty sources and
+ * sinks.
+ */
+private predicate flowsTo(
+ PathNode flowsource, PathNodeSink flowsink, Node source, Node sink, Configuration configuration
+) {
+ flowsource.isSource() and
+ flowsource.getConfiguration() = configuration and
+ flowsource.(PathNodeImpl).getNodeEx().asNode() = source and
+ (flowsource = flowsink or pathSuccPlus(flowsource, flowsink)) and
+ flowsink.getNodeEx().asNode() = sink
+}
+
+/**
+ * Holds if data can flow (inter-procedurally) from `source` to `sink`.
+ *
+ * Will only have results if `configuration` has non-empty sources and
+ * sinks.
+ */
+predicate flowsTo(Node source, Node sink, Configuration configuration) {
+ flowsTo(_, _, source, sink, configuration)
+}
+
+private predicate finalStats(boolean fwd, int nodes, int fields, int conscand, int tuples) {
+ fwd = true and
+ nodes = count(NodeEx n0 | exists(PathNodeImpl pn | pn.getNodeEx() = n0)) and
+ fields = count(TypedContent f0 | exists(PathNodeMid pn | pn.getAp().getHead() = f0)) and
+ conscand = count(AccessPath ap | exists(PathNodeMid pn | pn.getAp() = ap)) and
+ tuples = count(PathNode pn)
+ or
+ fwd = false and
+ nodes = count(NodeEx n0 | exists(PathNodeImpl pn | pn.getNodeEx() = n0 and reach(pn))) and
+ fields = count(TypedContent f0 | exists(PathNodeMid pn | pn.getAp().getHead() = f0 and reach(pn))) and
+ conscand = count(AccessPath ap | exists(PathNodeMid pn | pn.getAp() = ap and reach(pn))) and
+ tuples = count(PathNode pn | reach(pn))
+}
+
+/**
+ * INTERNAL: Only for debugging.
+ *
+ * Calculates per-stage metrics for data flow.
+ */
+predicate stageStats(
+ int n, string stage, int nodes, int fields, int conscand, int tuples, Configuration config
+) {
+ stage = "1 Fwd" and n = 10 and Stage1::stats(true, nodes, fields, conscand, tuples, config)
+ or
+ stage = "1 Rev" and n = 15 and Stage1::stats(false, nodes, fields, conscand, tuples, config)
+ or
+ stage = "2 Fwd" and n = 20 and Stage2::stats(true, nodes, fields, conscand, tuples, config)
+ or
+ stage = "2 Rev" and n = 25 and Stage2::stats(false, nodes, fields, conscand, tuples, config)
+ or
+ stage = "3 Fwd" and n = 30 and Stage3::stats(true, nodes, fields, conscand, tuples, config)
+ or
+ stage = "3 Rev" and n = 35 and Stage3::stats(false, nodes, fields, conscand, tuples, config)
+ or
+ stage = "4 Fwd" and n = 40 and Stage4::stats(true, nodes, fields, conscand, tuples, config)
+ or
+ stage = "4 Rev" and n = 45 and Stage4::stats(false, nodes, fields, conscand, tuples, config)
+ or
+ stage = "5 Fwd" and n = 50 and finalStats(true, nodes, fields, conscand, tuples)
+ or
+ stage = "5 Rev" and n = 55 and finalStats(false, nodes, fields, conscand, tuples)
+}
+
+private module FlowExploration {
+ private predicate callableStep(DataFlowCallable c1, DataFlowCallable c2, Configuration config) {
+ exists(NodeEx node1, NodeEx node2 |
+ jumpStep(node1, node2, config)
+ or
+ additionalJumpStep(node1, node2, config)
+ or
+ // flow into callable
+ viableParamArgEx(_, node2, node1)
+ or
+ // flow out of a callable
+ viableReturnPosOutEx(_, node1.(RetNodeEx).getReturnPosition(), node2)
+ |
+ c1 = node1.getEnclosingCallable() and
+ c2 = node2.getEnclosingCallable() and
+ c1 != c2
+ )
+ }
+
+ private predicate interestingCallableSrc(DataFlowCallable c, Configuration config) {
+ exists(Node n | config.isSource(n) and c = getNodeEnclosingCallable(n))
+ or
+ exists(DataFlowCallable mid |
+ interestingCallableSrc(mid, config) and callableStep(mid, c, config)
+ )
+ }
+
+ private predicate interestingCallableSink(DataFlowCallable c, Configuration config) {
+ exists(Node n | config.isSink(n) and c = getNodeEnclosingCallable(n))
+ or
+ exists(DataFlowCallable mid |
+ interestingCallableSink(mid, config) and callableStep(c, mid, config)
+ )
+ }
+
+ private newtype TCallableExt =
+ TCallable(DataFlowCallable c, Configuration config) {
+ interestingCallableSrc(c, config) or
+ interestingCallableSink(c, config)
+ } or
+ TCallableSrc() or
+ TCallableSink()
+
+ private predicate callableExtSrc(TCallableSrc src) { any() }
+
+ private predicate callableExtSink(TCallableSink sink) { any() }
+
+ private predicate callableExtStepFwd(TCallableExt ce1, TCallableExt ce2) {
+ exists(DataFlowCallable c1, DataFlowCallable c2, Configuration config |
+ callableStep(c1, c2, config) and
+ ce1 = TCallable(c1, pragma[only_bind_into](config)) and
+ ce2 = TCallable(c2, pragma[only_bind_into](config))
+ )
+ or
+ exists(Node n, Configuration config |
+ ce1 = TCallableSrc() and
+ config.isSource(n) and
+ ce2 = TCallable(getNodeEnclosingCallable(n), config)
+ )
+ or
+ exists(Node n, Configuration config |
+ ce2 = TCallableSink() and
+ config.isSink(n) and
+ ce1 = TCallable(getNodeEnclosingCallable(n), config)
+ )
+ }
+
+ private predicate callableExtStepRev(TCallableExt ce1, TCallableExt ce2) {
+ callableExtStepFwd(ce2, ce1)
+ }
+
+ private int distSrcExt(TCallableExt c) =
+ shortestDistances(callableExtSrc/1, callableExtStepFwd/2)(_, c, result)
+
+ private int distSinkExt(TCallableExt c) =
+ shortestDistances(callableExtSink/1, callableExtStepRev/2)(_, c, result)
+
+ private int distSrc(DataFlowCallable c, Configuration config) {
+ result = distSrcExt(TCallable(c, config)) - 1
+ }
+
+ private int distSink(DataFlowCallable c, Configuration config) {
+ result = distSinkExt(TCallable(c, config)) - 1
+ }
+
+ private newtype TPartialAccessPath =
+ TPartialNil(DataFlowType t) or
+ TPartialCons(TypedContent tc, int len) { len in [1 .. accessPathLimit()] }
+
+ /**
+ * Conceptually a list of `TypedContent`s followed by a `Type`, but only the first
+ * element of the list and its length are tracked. If data flows from a source to
+ * a given node with a given `AccessPath`, this indicates the sequence of
+ * dereference operations needed to get from the value in the node to the
+ * tracked object. The final type indicates the type of the tracked object.
+ */
+ private class PartialAccessPath extends TPartialAccessPath {
+ abstract string toString();
+
+ TypedContent getHead() { this = TPartialCons(result, _) }
+
+ int len() {
+ this = TPartialNil(_) and result = 0
+ or
+ this = TPartialCons(_, result)
+ }
+
+ DataFlowType getType() {
+ this = TPartialNil(result)
+ or
+ exists(TypedContent head | this = TPartialCons(head, _) | result = head.getContainerType())
+ }
+ }
+
+ private class PartialAccessPathNil extends PartialAccessPath, TPartialNil {
+ override string toString() {
+ exists(DataFlowType t | this = TPartialNil(t) | result = concat(": " + ppReprType(t)))
+ }
+ }
+
+ private class PartialAccessPathCons extends PartialAccessPath, TPartialCons {
+ override string toString() {
+ exists(TypedContent tc, int len | this = TPartialCons(tc, len) |
+ if len = 1
+ then result = "[" + tc.toString() + "]"
+ else result = "[" + tc.toString() + ", ... (" + len.toString() + ")]"
+ )
+ }
+ }
+
+ private newtype TRevPartialAccessPath =
+ TRevPartialNil() or
+ TRevPartialCons(Content c, int len) { len in [1 .. accessPathLimit()] }
+
+ /**
+ * Conceptually a list of `Content`s, but only the first
+ * element of the list and its length are tracked.
+ */
+ private class RevPartialAccessPath extends TRevPartialAccessPath {
+ abstract string toString();
+
+ Content getHead() { this = TRevPartialCons(result, _) }
+
+ int len() {
+ this = TRevPartialNil() and result = 0
+ or
+ this = TRevPartialCons(_, result)
+ }
+ }
+
+ private class RevPartialAccessPathNil extends RevPartialAccessPath, TRevPartialNil {
+ override string toString() { result = "" }
+ }
+
+ private class RevPartialAccessPathCons extends RevPartialAccessPath, TRevPartialCons {
+ override string toString() {
+ exists(Content c, int len | this = TRevPartialCons(c, len) |
+ if len = 1
+ then result = "[" + c.toString() + "]"
+ else result = "[" + c.toString() + ", ... (" + len.toString() + ")]"
+ )
+ }
+ }
+
+ private newtype TSummaryCtx1 =
+ TSummaryCtx1None() or
+ TSummaryCtx1Param(ParamNodeEx p)
+
+ private newtype TSummaryCtx2 =
+ TSummaryCtx2None() or
+ TSummaryCtx2Some(PartialAccessPath ap)
+
+ private newtype TRevSummaryCtx1 =
+ TRevSummaryCtx1None() or
+ TRevSummaryCtx1Some(ReturnPosition pos)
+
+ private newtype TRevSummaryCtx2 =
+ TRevSummaryCtx2None() or
+ TRevSummaryCtx2Some(RevPartialAccessPath ap)
+
+ private newtype TPartialPathNode =
+ TPartialPathNodeFwd(
+ NodeEx node, CallContext cc, TSummaryCtx1 sc1, TSummaryCtx2 sc2, PartialAccessPath ap,
+ Configuration config
+ ) {
+ sourceNode(node, config) and
+ cc instanceof CallContextAny and
+ sc1 = TSummaryCtx1None() and
+ sc2 = TSummaryCtx2None() and
+ ap = TPartialNil(node.getDataFlowType()) and
+ not fullBarrier(node, config) and
+ exists(config.explorationLimit())
+ or
+ partialPathNodeMk0(node, cc, sc1, sc2, ap, config) and
+ distSrc(node.getEnclosingCallable(), config) <= config.explorationLimit()
+ } or
+ TPartialPathNodeRev(
+ NodeEx node, TRevSummaryCtx1 sc1, TRevSummaryCtx2 sc2, RevPartialAccessPath ap,
+ Configuration config
+ ) {
+ sinkNode(node, config) and
+ sc1 = TRevSummaryCtx1None() and
+ sc2 = TRevSummaryCtx2None() and
+ ap = TRevPartialNil() and
+ not fullBarrier(node, config) and
+ exists(config.explorationLimit())
+ or
+ exists(PartialPathNodeRev mid |
+ revPartialPathStep(mid, node, sc1, sc2, ap, config) and
+ not clearsContentCached(node.asNode(), ap.getHead()) and
+ not fullBarrier(node, config) and
+ distSink(node.getEnclosingCallable(), config) <= config.explorationLimit()
+ )
+ }
+
+ pragma[nomagic]
+ private predicate partialPathNodeMk0(
+ NodeEx node, CallContext cc, TSummaryCtx1 sc1, TSummaryCtx2 sc2, PartialAccessPath ap,
+ Configuration config
+ ) {
+ exists(PartialPathNodeFwd mid |
+ partialPathStep(mid, node, cc, sc1, sc2, ap, config) and
+ not fullBarrier(node, config) and
+ not clearsContentCached(node.asNode(), ap.getHead().getContent()) and
+ if node.asNode() instanceof CastingNode
+ then compatibleTypes(node.getDataFlowType(), ap.getType())
+ else any()
+ )
+ }
+
+ /**
+ * A `Node` augmented with a call context, an access path, and a configuration.
+ */
+ class PartialPathNode extends TPartialPathNode {
+ /** Gets a textual representation of this element. */
+ string toString() { result = this.getNodeEx().toString() + this.ppAp() }
+
+ /**
+ * Gets a textual representation of this element, including a textual
+ * representation of the call context.
+ */
+ string toStringWithContext() {
+ result = this.getNodeEx().toString() + this.ppAp() + this.ppCtx()
+ }
+
+ /**
+ * Holds if this element is at the specified location.
+ * The location spans column `startcolumn` of line `startline` to
+ * column `endcolumn` of line `endline` in file `filepath`.
+ * For more information, see
+ * [Locations](https://codeql.github.com/docs/writing-codeql-queries/providing-locations-in-codeql-queries/).
+ */
+ predicate hasLocationInfo(
+ string filepath, int startline, int startcolumn, int endline, int endcolumn
+ ) {
+ this.getNodeEx().hasLocationInfo(filepath, startline, startcolumn, endline, endcolumn)
+ }
+
+ /** Gets the underlying `Node`. */
+ final Node getNode() { this.getNodeEx().projectToNode() = result }
+
+ private NodeEx getNodeEx() {
+ result = this.(PartialPathNodeFwd).getNodeEx() or
+ result = this.(PartialPathNodeRev).getNodeEx()
+ }
+
+ /** Gets the associated configuration. */
+ Configuration getConfiguration() { none() }
+
+ /** Gets a successor of this node, if any. */
+ PartialPathNode getASuccessor() { none() }
+
+ /**
+ * Gets the approximate distance to the nearest source measured in number
+ * of interprocedural steps.
+ */
+ int getSourceDistance() {
+ result = distSrc(this.getNodeEx().getEnclosingCallable(), this.getConfiguration())
+ }
+
+ /**
+ * Gets the approximate distance to the nearest sink measured in number
+ * of interprocedural steps.
+ */
+ int getSinkDistance() {
+ result = distSink(this.getNodeEx().getEnclosingCallable(), this.getConfiguration())
+ }
+
+ private string ppAp() {
+ exists(string s |
+ s = this.(PartialPathNodeFwd).getAp().toString() or
+ s = this.(PartialPathNodeRev).getAp().toString()
+ |
+ if s = "" then result = "" else result = " " + s
+ )
+ }
+
+ private string ppCtx() {
+ result = " <" + this.(PartialPathNodeFwd).getCallContext().toString() + ">"
+ }
+
+ /** Holds if this is a source in a forward-flow path. */
+ predicate isFwdSource() { this.(PartialPathNodeFwd).isSource() }
+
+ /** Holds if this is a sink in a reverse-flow path. */
+ predicate isRevSink() { this.(PartialPathNodeRev).isSink() }
+ }
+
+ /**
+ * Provides the query predicates needed to include a graph in a path-problem query.
+ */
+ module PartialPathGraph {
+ /** Holds if `(a,b)` is an edge in the graph of data flow path explanations. */
+ query predicate edges(PartialPathNode a, PartialPathNode b) { a.getASuccessor() = b }
+ }
+
+ private class PartialPathNodeFwd extends PartialPathNode, TPartialPathNodeFwd {
+ NodeEx node;
+ CallContext cc;
+ TSummaryCtx1 sc1;
+ TSummaryCtx2 sc2;
+ PartialAccessPath ap;
+ Configuration config;
+
+ PartialPathNodeFwd() { this = TPartialPathNodeFwd(node, cc, sc1, sc2, ap, config) }
+
+ NodeEx getNodeEx() { result = node }
+
+ CallContext getCallContext() { result = cc }
+
+ TSummaryCtx1 getSummaryCtx1() { result = sc1 }
+
+ TSummaryCtx2 getSummaryCtx2() { result = sc2 }
+
+ PartialAccessPath getAp() { result = ap }
+
+ override Configuration getConfiguration() { result = config }
+
+ override PartialPathNodeFwd getASuccessor() {
+ partialPathStep(this, result.getNodeEx(), result.getCallContext(), result.getSummaryCtx1(),
+ result.getSummaryCtx2(), result.getAp(), result.getConfiguration())
+ }
+
+ predicate isSource() {
+ sourceNode(node, config) and
+ cc instanceof CallContextAny and
+ sc1 = TSummaryCtx1None() and
+ sc2 = TSummaryCtx2None() and
+ ap instanceof TPartialNil
+ }
+ }
+
+ private class PartialPathNodeRev extends PartialPathNode, TPartialPathNodeRev {
+ NodeEx node;
+ TRevSummaryCtx1 sc1;
+ TRevSummaryCtx2 sc2;
+ RevPartialAccessPath ap;
+ Configuration config;
+
+ PartialPathNodeRev() { this = TPartialPathNodeRev(node, sc1, sc2, ap, config) }
+
+ NodeEx getNodeEx() { result = node }
+
+ TRevSummaryCtx1 getSummaryCtx1() { result = sc1 }
+
+ TRevSummaryCtx2 getSummaryCtx2() { result = sc2 }
+
+ RevPartialAccessPath getAp() { result = ap }
+
+ override Configuration getConfiguration() { result = config }
+
+ override PartialPathNodeRev getASuccessor() {
+ revPartialPathStep(result, this.getNodeEx(), this.getSummaryCtx1(), this.getSummaryCtx2(),
+ this.getAp(), this.getConfiguration())
+ }
+
+ predicate isSink() {
+ sinkNode(node, config) and
+ sc1 = TRevSummaryCtx1None() and
+ sc2 = TRevSummaryCtx2None() and
+ ap = TRevPartialNil()
+ }
+ }
+
+ private predicate partialPathStep(
+ PartialPathNodeFwd mid, NodeEx node, CallContext cc, TSummaryCtx1 sc1, TSummaryCtx2 sc2,
+ PartialAccessPath ap, Configuration config
+ ) {
+ not isUnreachableInCallCached(node.asNode(), cc.(CallContextSpecificCall).getCall()) and
+ (
+ localFlowStep(mid.getNodeEx(), node, config) and
+ cc = mid.getCallContext() and
+ sc1 = mid.getSummaryCtx1() and
+ sc2 = mid.getSummaryCtx2() and
+ ap = mid.getAp() and
+ config = mid.getConfiguration()
+ or
+ additionalLocalFlowStep(mid.getNodeEx(), node, config) and
+ cc = mid.getCallContext() and
+ sc1 = mid.getSummaryCtx1() and
+ sc2 = mid.getSummaryCtx2() and
+ mid.getAp() instanceof PartialAccessPathNil and
+ ap = TPartialNil(node.getDataFlowType()) and
+ config = mid.getConfiguration()
+ )
+ or
+ jumpStep(mid.getNodeEx(), node, config) and
+ cc instanceof CallContextAny and
+ sc1 = TSummaryCtx1None() and
+ sc2 = TSummaryCtx2None() and
+ ap = mid.getAp() and
+ config = mid.getConfiguration()
+ or
+ additionalJumpStep(mid.getNodeEx(), node, config) and
+ cc instanceof CallContextAny and
+ sc1 = TSummaryCtx1None() and
+ sc2 = TSummaryCtx2None() and
+ mid.getAp() instanceof PartialAccessPathNil and
+ ap = TPartialNil(node.getDataFlowType()) and
+ config = mid.getConfiguration()
+ or
+ partialPathStoreStep(mid, _, _, node, ap) and
+ cc = mid.getCallContext() and
+ sc1 = mid.getSummaryCtx1() and
+ sc2 = mid.getSummaryCtx2() and
+ config = mid.getConfiguration()
+ or
+ exists(PartialAccessPath ap0, TypedContent tc |
+ partialPathReadStep(mid, ap0, tc, node, cc, config) and
+ sc1 = mid.getSummaryCtx1() and
+ sc2 = mid.getSummaryCtx2() and
+ apConsFwd(ap, tc, ap0, config)
+ )
+ or
+ partialPathIntoCallable(mid, node, _, cc, sc1, sc2, _, ap, config)
+ or
+ partialPathOutOfCallable(mid, node, cc, ap, config) and
+ sc1 = TSummaryCtx1None() and
+ sc2 = TSummaryCtx2None()
+ or
+ partialPathThroughCallable(mid, node, cc, ap, config) and
+ sc1 = mid.getSummaryCtx1() and
+ sc2 = mid.getSummaryCtx2()
+ }
+
+ bindingset[result, i]
+ private int unbindInt(int i) { i <= result and i >= result }
+
+ pragma[inline]
+ private predicate partialPathStoreStep(
+ PartialPathNodeFwd mid, PartialAccessPath ap1, TypedContent tc, NodeEx node,
+ PartialAccessPath ap2
+ ) {
+ exists(NodeEx midNode, DataFlowType contentType |
+ midNode = mid.getNodeEx() and
+ ap1 = mid.getAp() and
+ store(midNode, tc, node, contentType, mid.getConfiguration()) and
+ ap2.getHead() = tc and
+ ap2.len() = unbindInt(ap1.len() + 1) and
+ compatibleTypes(ap1.getType(), contentType)
+ )
+ }
+
+ pragma[nomagic]
+ private predicate apConsFwd(
+ PartialAccessPath ap1, TypedContent tc, PartialAccessPath ap2, Configuration config
+ ) {
+ exists(PartialPathNodeFwd mid |
+ partialPathStoreStep(mid, ap1, tc, _, ap2) and
+ config = mid.getConfiguration()
+ )
+ }
+
+ pragma[nomagic]
+ private predicate partialPathReadStep(
+ PartialPathNodeFwd mid, PartialAccessPath ap, TypedContent tc, NodeEx node, CallContext cc,
+ Configuration config
+ ) {
+ exists(NodeEx midNode |
+ midNode = mid.getNodeEx() and
+ ap = mid.getAp() and
+ read(midNode, tc.getContent(), node, pragma[only_bind_into](config)) and
+ ap.getHead() = tc and
+ pragma[only_bind_into](config) = mid.getConfiguration() and
+ cc = mid.getCallContext()
+ )
+ }
+
+ private predicate partialPathOutOfCallable0(
+ PartialPathNodeFwd mid, ReturnPosition pos, CallContext innercc, PartialAccessPath ap,
+ Configuration config
+ ) {
+ pos = mid.getNodeEx().(RetNodeEx).getReturnPosition() and
+ innercc = mid.getCallContext() and
+ innercc instanceof CallContextNoCall and
+ ap = mid.getAp() and
+ config = mid.getConfiguration()
+ }
+
+ pragma[nomagic]
+ private predicate partialPathOutOfCallable1(
+ PartialPathNodeFwd mid, DataFlowCall call, ReturnKindExt kind, CallContext cc,
+ PartialAccessPath ap, Configuration config
+ ) {
+ exists(ReturnPosition pos, DataFlowCallable c, CallContext innercc |
+ partialPathOutOfCallable0(mid, pos, innercc, ap, config) and
+ c = pos.getCallable() and
+ kind = pos.getKind() and
+ resolveReturn(innercc, c, call)
+ |
+ if reducedViableImplInReturn(c, call) then cc = TReturn(c, call) else cc = TAnyCallContext()
+ )
+ }
+
+ private predicate partialPathOutOfCallable(
+ PartialPathNodeFwd mid, NodeEx out, CallContext cc, PartialAccessPath ap, Configuration config
+ ) {
+ exists(ReturnKindExt kind, DataFlowCall call |
+ partialPathOutOfCallable1(mid, call, kind, cc, ap, config)
+ |
+ out.asNode() = kind.getAnOutNode(call)
+ )
+ }
+
+ pragma[noinline]
+ private predicate partialPathIntoArg(
+ PartialPathNodeFwd mid, int i, CallContext cc, DataFlowCall call, PartialAccessPath ap,
+ Configuration config
+ ) {
+ exists(ArgNode arg |
+ arg = mid.getNodeEx().asNode() and
+ cc = mid.getCallContext() and
+ arg.argumentOf(call, i) and
+ ap = mid.getAp() and
+ config = mid.getConfiguration()
+ )
+ }
+
+ pragma[nomagic]
+ private predicate partialPathIntoCallable0(
+ PartialPathNodeFwd mid, DataFlowCallable callable, int i, CallContext outercc,
+ DataFlowCall call, PartialAccessPath ap, Configuration config
+ ) {
+ partialPathIntoArg(mid, i, outercc, call, ap, config) and
+ callable = resolveCall(call, outercc)
+ }
+
+ private predicate partialPathIntoCallable(
+ PartialPathNodeFwd mid, ParamNodeEx p, CallContext outercc, CallContextCall innercc,
+ TSummaryCtx1 sc1, TSummaryCtx2 sc2, DataFlowCall call, PartialAccessPath ap,
+ Configuration config
+ ) {
+ exists(int i, DataFlowCallable callable |
+ partialPathIntoCallable0(mid, callable, i, outercc, call, ap, config) and
+ p.isParameterOf(callable, i) and
+ sc1 = TSummaryCtx1Param(p) and
+ sc2 = TSummaryCtx2Some(ap)
+ |
+ if recordDataFlowCallSite(call, callable)
+ then innercc = TSpecificCall(call)
+ else innercc = TSomeCall()
+ )
+ }
+
+ pragma[nomagic]
+ private predicate paramFlowsThroughInPartialPath(
+ ReturnKindExt kind, CallContextCall cc, TSummaryCtx1 sc1, TSummaryCtx2 sc2,
+ PartialAccessPath ap, Configuration config
+ ) {
+ exists(PartialPathNodeFwd mid, RetNodeEx ret |
+ mid.getNodeEx() = ret and
+ kind = ret.getKind() and
+ cc = mid.getCallContext() and
+ sc1 = mid.getSummaryCtx1() and
+ sc2 = mid.getSummaryCtx2() and
+ config = mid.getConfiguration() and
+ ap = mid.getAp()
+ )
+ }
+
+ pragma[noinline]
+ private predicate partialPathThroughCallable0(
+ DataFlowCall call, PartialPathNodeFwd mid, ReturnKindExt kind, CallContext cc,
+ PartialAccessPath ap, Configuration config
+ ) {
+ exists(CallContext innercc, TSummaryCtx1 sc1, TSummaryCtx2 sc2 |
+ partialPathIntoCallable(mid, _, cc, innercc, sc1, sc2, call, _, config) and
+ paramFlowsThroughInPartialPath(kind, innercc, sc1, sc2, ap, config)
+ )
+ }
+
+ private predicate partialPathThroughCallable(
+ PartialPathNodeFwd mid, NodeEx out, CallContext cc, PartialAccessPath ap, Configuration config
+ ) {
+ exists(DataFlowCall call, ReturnKindExt kind |
+ partialPathThroughCallable0(call, mid, kind, cc, ap, config) and
+ out.asNode() = kind.getAnOutNode(call)
+ )
+ }
+
+ private predicate revPartialPathStep(
+ PartialPathNodeRev mid, NodeEx node, TRevSummaryCtx1 sc1, TRevSummaryCtx2 sc2,
+ RevPartialAccessPath ap, Configuration config
+ ) {
+ localFlowStep(node, mid.getNodeEx(), config) and
+ sc1 = mid.getSummaryCtx1() and
+ sc2 = mid.getSummaryCtx2() and
+ ap = mid.getAp() and
+ config = mid.getConfiguration()
+ or
+ additionalLocalFlowStep(node, mid.getNodeEx(), config) and
+ sc1 = mid.getSummaryCtx1() and
+ sc2 = mid.getSummaryCtx2() and
+ mid.getAp() instanceof RevPartialAccessPathNil and
+ ap = TRevPartialNil() and
+ config = mid.getConfiguration()
+ or
+ jumpStep(node, mid.getNodeEx(), config) and
+ sc1 = TRevSummaryCtx1None() and
+ sc2 = TRevSummaryCtx2None() and
+ ap = mid.getAp() and
+ config = mid.getConfiguration()
+ or
+ additionalJumpStep(node, mid.getNodeEx(), config) and
+ sc1 = TRevSummaryCtx1None() and
+ sc2 = TRevSummaryCtx2None() and
+ mid.getAp() instanceof RevPartialAccessPathNil and
+ ap = TRevPartialNil() and
+ config = mid.getConfiguration()
+ or
+ revPartialPathReadStep(mid, _, _, node, ap) and
+ sc1 = mid.getSummaryCtx1() and
+ sc2 = mid.getSummaryCtx2() and
+ config = mid.getConfiguration()
+ or
+ exists(RevPartialAccessPath ap0, Content c |
+ revPartialPathStoreStep(mid, ap0, c, node, config) and
+ sc1 = mid.getSummaryCtx1() and
+ sc2 = mid.getSummaryCtx2() and
+ apConsRev(ap, c, ap0, config)
+ )
+ or
+ exists(ParamNodeEx p |
+ mid.getNodeEx() = p and
+ viableParamArgEx(_, p, node) and
+ sc1 = mid.getSummaryCtx1() and
+ sc2 = mid.getSummaryCtx2() and
+ sc1 = TRevSummaryCtx1None() and
+ sc2 = TRevSummaryCtx2None() and
+ ap = mid.getAp() and
+ config = mid.getConfiguration()
+ )
+ or
+ exists(ReturnPosition pos |
+ revPartialPathIntoReturn(mid, pos, sc1, sc2, _, ap, config) and
+ pos = getReturnPosition(node.asNode())
+ )
+ or
+ revPartialPathThroughCallable(mid, node, ap, config) and
+ sc1 = mid.getSummaryCtx1() and
+ sc2 = mid.getSummaryCtx2()
+ }
+
+ pragma[inline]
+ private predicate revPartialPathReadStep(
+ PartialPathNodeRev mid, RevPartialAccessPath ap1, Content c, NodeEx node,
+ RevPartialAccessPath ap2
+ ) {
+ exists(NodeEx midNode |
+ midNode = mid.getNodeEx() and
+ ap1 = mid.getAp() and
+ read(node, c, midNode, mid.getConfiguration()) and
+ ap2.getHead() = c and
+ ap2.len() = unbindInt(ap1.len() + 1)
+ )
+ }
+
+ pragma[nomagic]
+ private predicate apConsRev(
+ RevPartialAccessPath ap1, Content c, RevPartialAccessPath ap2, Configuration config
+ ) {
+ exists(PartialPathNodeRev mid |
+ revPartialPathReadStep(mid, ap1, c, _, ap2) and
+ config = mid.getConfiguration()
+ )
+ }
+
+ pragma[nomagic]
+ private predicate revPartialPathStoreStep(
+ PartialPathNodeRev mid, RevPartialAccessPath ap, Content c, NodeEx node, Configuration config
+ ) {
+ exists(NodeEx midNode, TypedContent tc |
+ midNode = mid.getNodeEx() and
+ ap = mid.getAp() and
+ store(node, tc, midNode, _, config) and
+ ap.getHead() = c and
+ config = mid.getConfiguration() and
+ tc.getContent() = c
+ )
+ }
+
+ pragma[nomagic]
+ private predicate revPartialPathIntoReturn(
+ PartialPathNodeRev mid, ReturnPosition pos, TRevSummaryCtx1Some sc1, TRevSummaryCtx2Some sc2,
+ DataFlowCall call, RevPartialAccessPath ap, Configuration config
+ ) {
+ exists(NodeEx out |
+ mid.getNodeEx() = out and
+ viableReturnPosOutEx(call, pos, out) and
+ sc1 = TRevSummaryCtx1Some(pos) and
+ sc2 = TRevSummaryCtx2Some(ap) and
+ ap = mid.getAp() and
+ config = mid.getConfiguration()
+ )
+ }
+
+ pragma[nomagic]
+ private predicate revPartialPathFlowsThrough(
+ int pos, TRevSummaryCtx1Some sc1, TRevSummaryCtx2Some sc2, RevPartialAccessPath ap,
+ Configuration config
+ ) {
+ exists(PartialPathNodeRev mid, ParamNodeEx p |
+ mid.getNodeEx() = p and
+ p.getPosition() = pos and
+ sc1 = mid.getSummaryCtx1() and
+ sc2 = mid.getSummaryCtx2() and
+ ap = mid.getAp() and
+ config = mid.getConfiguration()
+ )
+ }
+
+ pragma[nomagic]
+ private predicate revPartialPathThroughCallable0(
+ DataFlowCall call, PartialPathNodeRev mid, int pos, RevPartialAccessPath ap,
+ Configuration config
+ ) {
+ exists(TRevSummaryCtx1Some sc1, TRevSummaryCtx2Some sc2 |
+ revPartialPathIntoReturn(mid, _, sc1, sc2, call, _, config) and
+ revPartialPathFlowsThrough(pos, sc1, sc2, ap, config)
+ )
+ }
+
+ pragma[nomagic]
+ private predicate revPartialPathThroughCallable(
+ PartialPathNodeRev mid, ArgNodeEx node, RevPartialAccessPath ap, Configuration config
+ ) {
+ exists(DataFlowCall call, int pos |
+ revPartialPathThroughCallable0(call, mid, pos, ap, config) and
+ node.asNode().(ArgNode).argumentOf(call, pos)
+ )
+ }
+}
+
+import FlowExploration
+
+private predicate partialFlow(
+ PartialPathNode source, PartialPathNode node, Configuration configuration
+) {
+ source.getConfiguration() = configuration and
+ source.isFwdSource() and
+ node = source.getASuccessor+()
+}
+
+private predicate revPartialFlow(
+ PartialPathNode node, PartialPathNode sink, Configuration configuration
+) {
+ sink.getConfiguration() = configuration and
+ sink.isRevSink() and
+ node.getASuccessor+() = sink
+}
diff --git a/repo-tests/codeql/cpp/ql/lib/semmle/code/cpp/ir/dataflow/internal/DataFlowImplCommon.qll b/repo-tests/codeql/cpp/ql/lib/semmle/code/cpp/ir/dataflow/internal/DataFlowImplCommon.qll
new file mode 100644
index 00000000000..f43a550af57
--- /dev/null
+++ b/repo-tests/codeql/cpp/ql/lib/semmle/code/cpp/ir/dataflow/internal/DataFlowImplCommon.qll
@@ -0,0 +1,1294 @@
+private import DataFlowImplSpecific::Private
+private import DataFlowImplSpecific::Public
+import Cached
+
+/**
+ * The cost limits for the `AccessPathFront` to `AccessPathApprox` expansion.
+ *
+ * `apLimit` bounds the acceptable fan-out, and `tupleLimit` bounds the
+ * estimated per-`AccessPathFront` tuple cost. Access paths exceeding both of
+ * these limits are represented with lower precision during pruning.
+ */
+predicate accessPathApproxCostLimits(int apLimit, int tupleLimit) {
+ apLimit = 10 and
+ tupleLimit = 10000
+}
+
+/**
+ * The cost limits for the `AccessPathApprox` to `AccessPath` expansion.
+ *
+ * `apLimit` bounds the acceptable fan-out, and `tupleLimit` bounds the
+ * estimated per-`AccessPathApprox` tuple cost. Access paths exceeding both of
+ * these limits are represented with lower precision.
+ */
+predicate accessPathCostLimits(int apLimit, int tupleLimit) {
+ apLimit = 5 and
+ tupleLimit = 1000
+}
+
+/**
+ * Provides a simple data-flow analysis for resolving lambda calls. The analysis
+ * currently excludes read-steps, store-steps, and flow-through.
+ *
+ * The analysis uses non-linear recursion: When computing a flow path in or out
+ * of a call, we use the results of the analysis recursively to resolve lambda
+ * calls. For this reason, we cannot reuse the code from `DataFlowImpl.qll` directly.
+ */
+private module LambdaFlow {
+ private predicate viableParamNonLambda(DataFlowCall call, int i, ParamNode p) {
+ p.isParameterOf(viableCallable(call), i)
+ }
+
+ private predicate viableParamLambda(DataFlowCall call, int i, ParamNode p) {
+ p.isParameterOf(viableCallableLambda(call, _), i)
+ }
+
+ private predicate viableParamArgNonLambda(DataFlowCall call, ParamNode p, ArgNode arg) {
+ exists(int i |
+ viableParamNonLambda(call, i, p) and
+ arg.argumentOf(call, i)
+ )
+ }
+
+ private predicate viableParamArgLambda(DataFlowCall call, ParamNode p, ArgNode arg) {
+ exists(int i |
+ viableParamLambda(call, i, p) and
+ arg.argumentOf(call, i)
+ )
+ }
+
+ private newtype TReturnPositionSimple =
+ TReturnPositionSimple0(DataFlowCallable c, ReturnKind kind) {
+ exists(ReturnNode ret |
+ c = getNodeEnclosingCallable(ret) and
+ kind = ret.getKind()
+ )
+ }
+
+ pragma[noinline]
+ private TReturnPositionSimple getReturnPositionSimple(ReturnNode ret, ReturnKind kind) {
+ result = TReturnPositionSimple0(getNodeEnclosingCallable(ret), kind)
+ }
+
+ pragma[nomagic]
+ private TReturnPositionSimple viableReturnPosNonLambda(DataFlowCall call, ReturnKind kind) {
+ result = TReturnPositionSimple0(viableCallable(call), kind)
+ }
+
+ pragma[nomagic]
+ private TReturnPositionSimple viableReturnPosLambda(
+ DataFlowCall call, DataFlowCallOption lastCall, ReturnKind kind
+ ) {
+ result = TReturnPositionSimple0(viableCallableLambda(call, lastCall), kind)
+ }
+
+ private predicate viableReturnPosOutNonLambda(
+ DataFlowCall call, TReturnPositionSimple pos, OutNode out
+ ) {
+ exists(ReturnKind kind |
+ pos = viableReturnPosNonLambda(call, kind) and
+ out = getAnOutNode(call, kind)
+ )
+ }
+
+ private predicate viableReturnPosOutLambda(
+ DataFlowCall call, DataFlowCallOption lastCall, TReturnPositionSimple pos, OutNode out
+ ) {
+ exists(ReturnKind kind |
+ pos = viableReturnPosLambda(call, lastCall, kind) and
+ out = getAnOutNode(call, kind)
+ )
+ }
+
+ /**
+ * Holds if data can flow (inter-procedurally) from `node` (of type `t`) to
+ * the lambda call `lambdaCall`.
+ *
+ * The parameter `toReturn` indicates whether the path from `node` to
+ * `lambdaCall` goes through a return, and `toJump` whether the path goes
+ * through a jump step.
+ *
+ * The call context `lastCall` records the last call on the path from `node`
+ * to `lambdaCall`, if any. That is, `lastCall` is able to target the enclosing
+ * callable of `lambdaCall`.
+ */
+ pragma[nomagic]
+ predicate revLambdaFlow(
+ DataFlowCall lambdaCall, LambdaCallKind kind, Node node, DataFlowType t, boolean toReturn,
+ boolean toJump, DataFlowCallOption lastCall
+ ) {
+ revLambdaFlow0(lambdaCall, kind, node, t, toReturn, toJump, lastCall) and
+ if castNode(node) or node instanceof ArgNode or node instanceof ReturnNode
+ then compatibleTypes(t, getNodeDataFlowType(node))
+ else any()
+ }
+
+ pragma[nomagic]
+ predicate revLambdaFlow0(
+ DataFlowCall lambdaCall, LambdaCallKind kind, Node node, DataFlowType t, boolean toReturn,
+ boolean toJump, DataFlowCallOption lastCall
+ ) {
+ lambdaCall(lambdaCall, kind, node) and
+ t = getNodeDataFlowType(node) and
+ toReturn = false and
+ toJump = false and
+ lastCall = TDataFlowCallNone()
+ or
+ // local flow
+ exists(Node mid, DataFlowType t0 |
+ revLambdaFlow(lambdaCall, kind, mid, t0, toReturn, toJump, lastCall)
+ |
+ simpleLocalFlowStep(node, mid) and
+ t = t0
+ or
+ exists(boolean preservesValue |
+ additionalLambdaFlowStep(node, mid, preservesValue) and
+ getNodeEnclosingCallable(node) = getNodeEnclosingCallable(mid)
+ |
+ preservesValue = false and
+ t = getNodeDataFlowType(node)
+ or
+ preservesValue = true and
+ t = t0
+ )
+ )
+ or
+ // jump step
+ exists(Node mid, DataFlowType t0 |
+ revLambdaFlow(lambdaCall, kind, mid, t0, _, _, _) and
+ toReturn = false and
+ toJump = true and
+ lastCall = TDataFlowCallNone()
+ |
+ jumpStepCached(node, mid) and
+ t = t0
+ or
+ exists(boolean preservesValue |
+ additionalLambdaFlowStep(node, mid, preservesValue) and
+ getNodeEnclosingCallable(node) != getNodeEnclosingCallable(mid)
+ |
+ preservesValue = false and
+ t = getNodeDataFlowType(node)
+ or
+ preservesValue = true and
+ t = t0
+ )
+ )
+ or
+ // flow into a callable
+ exists(ParamNode p, DataFlowCallOption lastCall0, DataFlowCall call |
+ revLambdaFlowIn(lambdaCall, kind, p, t, toJump, lastCall0) and
+ (
+ if lastCall0 = TDataFlowCallNone() and toJump = false
+ then lastCall = TDataFlowCallSome(call)
+ else lastCall = lastCall0
+ ) and
+ toReturn = false
+ |
+ viableParamArgNonLambda(call, p, node)
+ or
+ viableParamArgLambda(call, p, node) // non-linear recursion
+ )
+ or
+ // flow out of a callable
+ exists(TReturnPositionSimple pos |
+ revLambdaFlowOut(lambdaCall, kind, pos, t, toJump, lastCall) and
+ getReturnPositionSimple(node, node.(ReturnNode).getKind()) = pos and
+ toReturn = true
+ )
+ }
+
+ pragma[nomagic]
+ predicate revLambdaFlowOutLambdaCall(
+ DataFlowCall lambdaCall, LambdaCallKind kind, OutNode out, DataFlowType t, boolean toJump,
+ DataFlowCall call, DataFlowCallOption lastCall
+ ) {
+ revLambdaFlow(lambdaCall, kind, out, t, _, toJump, lastCall) and
+ exists(ReturnKindExt rk |
+ out = rk.getAnOutNode(call) and
+ lambdaCall(call, _, _)
+ )
+ }
+
+ pragma[nomagic]
+ predicate revLambdaFlowOut(
+ DataFlowCall lambdaCall, LambdaCallKind kind, TReturnPositionSimple pos, DataFlowType t,
+ boolean toJump, DataFlowCallOption lastCall
+ ) {
+ exists(DataFlowCall call, OutNode out |
+ revLambdaFlow(lambdaCall, kind, out, t, _, toJump, lastCall) and
+ viableReturnPosOutNonLambda(call, pos, out)
+ or
+ // non-linear recursion
+ revLambdaFlowOutLambdaCall(lambdaCall, kind, out, t, toJump, call, lastCall) and
+ viableReturnPosOutLambda(call, _, pos, out)
+ )
+ }
+
+ pragma[nomagic]
+ predicate revLambdaFlowIn(
+ DataFlowCall lambdaCall, LambdaCallKind kind, ParamNode p, DataFlowType t, boolean toJump,
+ DataFlowCallOption lastCall
+ ) {
+ revLambdaFlow(lambdaCall, kind, p, t, false, toJump, lastCall)
+ }
+}
+
+private DataFlowCallable viableCallableExt(DataFlowCall call) {
+ result = viableCallable(call)
+ or
+ result = viableCallableLambda(call, _)
+}
+
+cached
+private module Cached {
+ /**
+ * If needed, call this predicate from `DataFlowImplSpecific.qll` in order to
+ * force a stage-dependency on the `DataFlowImplCommon.qll` stage and therby
+ * collapsing the two stages.
+ */
+ cached
+ predicate forceCachingInSameStage() { any() }
+
+ cached
+ predicate nodeEnclosingCallable(Node n, DataFlowCallable c) { c = n.getEnclosingCallable() }
+
+ cached
+ predicate callEnclosingCallable(DataFlowCall call, DataFlowCallable c) {
+ c = call.getEnclosingCallable()
+ }
+
+ cached
+ predicate nodeDataFlowType(Node n, DataFlowType t) { t = getNodeType(n) }
+
+ cached
+ predicate jumpStepCached(Node node1, Node node2) { jumpStep(node1, node2) }
+
+ cached
+ predicate clearsContentCached(Node n, Content c) { clearsContent(n, c) }
+
+ cached
+ predicate isUnreachableInCallCached(Node n, DataFlowCall call) { isUnreachableInCall(n, call) }
+
+ cached
+ predicate outNodeExt(Node n) {
+ n instanceof OutNode
+ or
+ n.(PostUpdateNode).getPreUpdateNode() instanceof ArgNode
+ }
+
+ cached
+ predicate hiddenNode(Node n) { nodeIsHidden(n) }
+
+ cached
+ OutNodeExt getAnOutNodeExt(DataFlowCall call, ReturnKindExt k) {
+ result = getAnOutNode(call, k.(ValueReturnKind).getKind())
+ or
+ exists(ArgNode arg |
+ result.(PostUpdateNode).getPreUpdateNode() = arg and
+ arg.argumentOf(call, k.(ParamUpdateReturnKind).getPosition())
+ )
+ }
+
+ cached
+ predicate returnNodeExt(Node n, ReturnKindExt k) {
+ k = TValueReturn(n.(ReturnNode).getKind())
+ or
+ exists(ParamNode p, int pos |
+ parameterValueFlowsToPreUpdate(p, n) and
+ p.isParameterOf(_, pos) and
+ k = TParamUpdate(pos)
+ )
+ }
+
+ cached
+ predicate castNode(Node n) { n instanceof CastNode }
+
+ cached
+ predicate castingNode(Node n) {
+ castNode(n) or
+ n instanceof ParamNode or
+ n instanceof OutNodeExt or
+ // For reads, `x.f`, we want to check that the tracked type after the read (which
+ // is obtained by popping the head of the access path stack) is compatible with
+ // the type of `x.f`.
+ read(_, _, n)
+ }
+
+ cached
+ predicate parameterNode(Node n, DataFlowCallable c, int i) {
+ n.(ParameterNode).isParameterOf(c, i)
+ }
+
+ cached
+ predicate argumentNode(Node n, DataFlowCall call, int pos) {
+ n.(ArgumentNode).argumentOf(call, pos)
+ }
+
+ /**
+ * Gets a viable target for the lambda call `call`.
+ *
+ * `lastCall` records the call required to reach `call` in order for the result
+ * to be a viable target, if any.
+ */
+ cached
+ DataFlowCallable viableCallableLambda(DataFlowCall call, DataFlowCallOption lastCall) {
+ exists(Node creation, LambdaCallKind kind |
+ LambdaFlow::revLambdaFlow(call, kind, creation, _, _, _, lastCall) and
+ lambdaCreation(creation, kind, result)
+ )
+ }
+
+ /**
+ * Holds if `p` is the `i`th parameter of a viable dispatch target of `call`.
+ * The instance parameter is considered to have index `-1`.
+ */
+ pragma[nomagic]
+ private predicate viableParam(DataFlowCall call, int i, ParamNode p) {
+ p.isParameterOf(viableCallableExt(call), i)
+ }
+
+ /**
+ * Holds if `arg` is a possible argument to `p` in `call`, taking virtual
+ * dispatch into account.
+ */
+ cached
+ predicate viableParamArg(DataFlowCall call, ParamNode p, ArgNode arg) {
+ exists(int i |
+ viableParam(call, i, p) and
+ arg.argumentOf(call, i) and
+ compatibleTypes(getNodeDataFlowType(arg), getNodeDataFlowType(p))
+ )
+ }
+
+ pragma[nomagic]
+ private ReturnPosition viableReturnPos(DataFlowCall call, ReturnKindExt kind) {
+ viableCallableExt(call) = result.getCallable() and
+ kind = result.getKind()
+ }
+
+ /**
+ * Holds if a value at return position `pos` can be returned to `out` via `call`,
+ * taking virtual dispatch into account.
+ */
+ cached
+ predicate viableReturnPosOut(DataFlowCall call, ReturnPosition pos, Node out) {
+ exists(ReturnKindExt kind |
+ pos = viableReturnPos(call, kind) and
+ out = kind.getAnOutNode(call)
+ )
+ }
+
+ /** Provides predicates for calculating flow-through summaries. */
+ private module FlowThrough {
+ /**
+ * The first flow-through approximation:
+ *
+ * - Input access paths are abstracted with a Boolean parameter
+ * that indicates (non-)emptiness.
+ */
+ private module Cand {
+ /**
+ * Holds if `p` can flow to `node` in the same callable using only
+ * value-preserving steps.
+ *
+ * `read` indicates whether it is contents of `p` that can flow to `node`.
+ */
+ pragma[nomagic]
+ private predicate parameterValueFlowCand(ParamNode p, Node node, boolean read) {
+ p = node and
+ read = false
+ or
+ // local flow
+ exists(Node mid |
+ parameterValueFlowCand(p, mid, read) and
+ simpleLocalFlowStep(mid, node)
+ )
+ or
+ // read
+ exists(Node mid |
+ parameterValueFlowCand(p, mid, false) and
+ read(mid, _, node) and
+ read = true
+ )
+ or
+ // flow through: no prior read
+ exists(ArgNode arg |
+ parameterValueFlowArgCand(p, arg, false) and
+ argumentValueFlowsThroughCand(arg, node, read)
+ )
+ or
+ // flow through: no read inside method
+ exists(ArgNode arg |
+ parameterValueFlowArgCand(p, arg, read) and
+ argumentValueFlowsThroughCand(arg, node, false)
+ )
+ }
+
+ pragma[nomagic]
+ private predicate parameterValueFlowArgCand(ParamNode p, ArgNode arg, boolean read) {
+ parameterValueFlowCand(p, arg, read)
+ }
+
+ pragma[nomagic]
+ predicate parameterValueFlowsToPreUpdateCand(ParamNode p, PostUpdateNode n) {
+ parameterValueFlowCand(p, n.getPreUpdateNode(), false)
+ }
+
+ /**
+ * Holds if `p` can flow to a return node of kind `kind` in the same
+ * callable using only value-preserving steps, not taking call contexts
+ * into account.
+ *
+ * `read` indicates whether it is contents of `p` that can flow to the return
+ * node.
+ */
+ predicate parameterValueFlowReturnCand(ParamNode p, ReturnKind kind, boolean read) {
+ exists(ReturnNode ret |
+ parameterValueFlowCand(p, ret, read) and
+ kind = ret.getKind()
+ )
+ }
+
+ pragma[nomagic]
+ private predicate argumentValueFlowsThroughCand0(
+ DataFlowCall call, ArgNode arg, ReturnKind kind, boolean read
+ ) {
+ exists(ParamNode param | viableParamArg(call, param, arg) |
+ parameterValueFlowReturnCand(param, kind, read)
+ )
+ }
+
+ /**
+ * Holds if `arg` flows to `out` through a call using only value-preserving steps,
+ * not taking call contexts into account.
+ *
+ * `read` indicates whether it is contents of `arg` that can flow to `out`.
+ */
+ predicate argumentValueFlowsThroughCand(ArgNode arg, Node out, boolean read) {
+ exists(DataFlowCall call, ReturnKind kind |
+ argumentValueFlowsThroughCand0(call, arg, kind, read) and
+ out = getAnOutNode(call, kind)
+ )
+ }
+
+ predicate cand(ParamNode p, Node n) {
+ parameterValueFlowCand(p, n, _) and
+ (
+ parameterValueFlowReturnCand(p, _, _)
+ or
+ parameterValueFlowsToPreUpdateCand(p, _)
+ )
+ }
+ }
+
+ /**
+ * The final flow-through calculation:
+ *
+ * - Calculated flow is either value-preserving (`read = TReadStepTypesNone()`)
+ * or summarized as a single read step with before and after types recorded
+ * in the `ReadStepTypesOption` parameter.
+ * - Types are checked using the `compatibleTypes()` relation.
+ */
+ private module Final {
+ /**
+ * Holds if `p` can flow to `node` in the same callable using only
+ * value-preserving steps and possibly a single read step, not taking
+ * call contexts into account.
+ *
+ * If a read step was taken, then `read` captures the `Content`, the
+ * container type, and the content type.
+ */
+ predicate parameterValueFlow(ParamNode p, Node node, ReadStepTypesOption read) {
+ parameterValueFlow0(p, node, read) and
+ if node instanceof CastingNode
+ then
+ // normal flow through
+ read = TReadStepTypesNone() and
+ compatibleTypes(getNodeDataFlowType(p), getNodeDataFlowType(node))
+ or
+ // getter
+ compatibleTypes(read.getContentType(), getNodeDataFlowType(node))
+ else any()
+ }
+
+ pragma[nomagic]
+ private predicate parameterValueFlow0(ParamNode p, Node node, ReadStepTypesOption read) {
+ p = node and
+ Cand::cand(p, _) and
+ read = TReadStepTypesNone()
+ or
+ // local flow
+ exists(Node mid |
+ parameterValueFlow(p, mid, read) and
+ simpleLocalFlowStep(mid, node)
+ )
+ or
+ // read
+ exists(Node mid |
+ parameterValueFlow(p, mid, TReadStepTypesNone()) and
+ readStepWithTypes(mid, read.getContainerType(), read.getContent(), node,
+ read.getContentType()) and
+ Cand::parameterValueFlowReturnCand(p, _, true) and
+ compatibleTypes(getNodeDataFlowType(p), read.getContainerType())
+ )
+ or
+ parameterValueFlow0_0(TReadStepTypesNone(), p, node, read)
+ }
+
+ pragma[nomagic]
+ private predicate parameterValueFlow0_0(
+ ReadStepTypesOption mustBeNone, ParamNode p, Node node, ReadStepTypesOption read
+ ) {
+ // flow through: no prior read
+ exists(ArgNode arg |
+ parameterValueFlowArg(p, arg, mustBeNone) and
+ argumentValueFlowsThrough(arg, read, node)
+ )
+ or
+ // flow through: no read inside method
+ exists(ArgNode arg |
+ parameterValueFlowArg(p, arg, read) and
+ argumentValueFlowsThrough(arg, mustBeNone, node)
+ )
+ }
+
+ pragma[nomagic]
+ private predicate parameterValueFlowArg(ParamNode p, ArgNode arg, ReadStepTypesOption read) {
+ parameterValueFlow(p, arg, read) and
+ Cand::argumentValueFlowsThroughCand(arg, _, _)
+ }
+
+ pragma[nomagic]
+ private predicate argumentValueFlowsThrough0(
+ DataFlowCall call, ArgNode arg, ReturnKind kind, ReadStepTypesOption read
+ ) {
+ exists(ParamNode param | viableParamArg(call, param, arg) |
+ parameterValueFlowReturn(param, kind, read)
+ )
+ }
+
+ /**
+ * Holds if `arg` flows to `out` through a call using only
+ * value-preserving steps and possibly a single read step, not taking
+ * call contexts into account.
+ *
+ * If a read step was taken, then `read` captures the `Content`, the
+ * container type, and the content type.
+ */
+ pragma[nomagic]
+ predicate argumentValueFlowsThrough(ArgNode arg, ReadStepTypesOption read, Node out) {
+ exists(DataFlowCall call, ReturnKind kind |
+ argumentValueFlowsThrough0(call, arg, kind, read) and
+ out = getAnOutNode(call, kind)
+ |
+ // normal flow through
+ read = TReadStepTypesNone() and
+ compatibleTypes(getNodeDataFlowType(arg), getNodeDataFlowType(out))
+ or
+ // getter
+ compatibleTypes(getNodeDataFlowType(arg), read.getContainerType()) and
+ compatibleTypes(read.getContentType(), getNodeDataFlowType(out))
+ )
+ }
+
+ /**
+ * Holds if `arg` flows to `out` through a call using only
+ * value-preserving steps and a single read step, not taking call
+ * contexts into account, thus representing a getter-step.
+ */
+ predicate getterStep(ArgNode arg, Content c, Node out) {
+ argumentValueFlowsThrough(arg, TReadStepTypesSome(_, c, _), out)
+ }
+
+ /**
+ * Holds if `p` can flow to a return node of kind `kind` in the same
+ * callable using only value-preserving steps and possibly a single read
+ * step.
+ *
+ * If a read step was taken, then `read` captures the `Content`, the
+ * container type, and the content type.
+ */
+ private predicate parameterValueFlowReturn(
+ ParamNode p, ReturnKind kind, ReadStepTypesOption read
+ ) {
+ exists(ReturnNode ret |
+ parameterValueFlow(p, ret, read) and
+ kind = ret.getKind()
+ )
+ }
+ }
+
+ import Final
+ }
+
+ import FlowThrough
+
+ cached
+ private module DispatchWithCallContext {
+ /**
+ * Holds if the set of viable implementations that can be called by `call`
+ * might be improved by knowing the call context.
+ */
+ pragma[nomagic]
+ private predicate mayBenefitFromCallContextExt(DataFlowCall call, DataFlowCallable callable) {
+ mayBenefitFromCallContext(call, callable)
+ or
+ callEnclosingCallable(call, callable) and
+ exists(viableCallableLambda(call, TDataFlowCallSome(_)))
+ }
+
+ /**
+ * Gets a viable dispatch target of `call` in the context `ctx`. This is
+ * restricted to those `call`s for which a context might make a difference.
+ */
+ pragma[nomagic]
+ private DataFlowCallable viableImplInCallContextExt(DataFlowCall call, DataFlowCall ctx) {
+ result = viableImplInCallContext(call, ctx)
+ or
+ result = viableCallableLambda(call, TDataFlowCallSome(ctx))
+ or
+ exists(DataFlowCallable enclosing |
+ mayBenefitFromCallContextExt(call, enclosing) and
+ enclosing = viableCallableExt(ctx) and
+ result = viableCallableLambda(call, TDataFlowCallNone())
+ )
+ }
+
+ /**
+ * Holds if the call context `ctx` reduces the set of viable run-time
+ * dispatch targets of call `call` in `c`.
+ */
+ cached
+ predicate reducedViableImplInCallContext(DataFlowCall call, DataFlowCallable c, DataFlowCall ctx) {
+ exists(int tgts, int ctxtgts |
+ mayBenefitFromCallContextExt(call, c) and
+ c = viableCallableExt(ctx) and
+ ctxtgts = count(viableImplInCallContextExt(call, ctx)) and
+ tgts = strictcount(viableCallableExt(call)) and
+ ctxtgts < tgts
+ )
+ }
+
+ /**
+ * Gets a viable run-time dispatch target for the call `call` in the
+ * context `ctx`. This is restricted to those calls for which a context
+ * makes a difference.
+ */
+ cached
+ DataFlowCallable prunedViableImplInCallContext(DataFlowCall call, DataFlowCall ctx) {
+ result = viableImplInCallContextExt(call, ctx) and
+ reducedViableImplInCallContext(call, _, ctx)
+ }
+
+ /**
+ * Holds if flow returning from callable `c` to call `call` might return
+ * further and if this path restricts the set of call sites that can be
+ * returned to.
+ */
+ cached
+ predicate reducedViableImplInReturn(DataFlowCallable c, DataFlowCall call) {
+ exists(int tgts, int ctxtgts |
+ mayBenefitFromCallContextExt(call, _) and
+ c = viableCallableExt(call) and
+ ctxtgts = count(DataFlowCall ctx | c = viableImplInCallContextExt(call, ctx)) and
+ tgts = strictcount(DataFlowCall ctx | callEnclosingCallable(call, viableCallableExt(ctx))) and
+ ctxtgts < tgts
+ )
+ }
+
+ /**
+ * Gets a viable run-time dispatch target for the call `call` in the
+ * context `ctx`. This is restricted to those calls and results for which
+ * the return flow from the result to `call` restricts the possible context
+ * `ctx`.
+ */
+ cached
+ DataFlowCallable prunedViableImplInCallContextReverse(DataFlowCall call, DataFlowCall ctx) {
+ result = viableImplInCallContextExt(call, ctx) and
+ reducedViableImplInReturn(result, call)
+ }
+ }
+
+ import DispatchWithCallContext
+
+ /**
+ * Holds if `p` can flow to the pre-update node associated with post-update
+ * node `n`, in the same callable, using only value-preserving steps.
+ */
+ private predicate parameterValueFlowsToPreUpdate(ParamNode p, PostUpdateNode n) {
+ parameterValueFlow(p, n.getPreUpdateNode(), TReadStepTypesNone())
+ }
+
+ private predicate store(
+ Node node1, Content c, Node node2, DataFlowType contentType, DataFlowType containerType
+ ) {
+ storeStep(node1, c, node2) and
+ contentType = getNodeDataFlowType(node1) and
+ containerType = getNodeDataFlowType(node2)
+ or
+ exists(Node n1, Node n2 |
+ n1 = node1.(PostUpdateNode).getPreUpdateNode() and
+ n2 = node2.(PostUpdateNode).getPreUpdateNode()
+ |
+ argumentValueFlowsThrough(n2, TReadStepTypesSome(containerType, c, contentType), n1)
+ or
+ read(n2, c, n1) and
+ contentType = getNodeDataFlowType(n1) and
+ containerType = getNodeDataFlowType(n2)
+ )
+ }
+
+ cached
+ predicate read(Node node1, Content c, Node node2) { readStep(node1, c, node2) }
+
+ /**
+ * Holds if data can flow from `node1` to `node2` via a direct assignment to
+ * `f`.
+ *
+ * This includes reverse steps through reads when the result of the read has
+ * been stored into, in order to handle cases like `x.f1.f2 = y`.
+ */
+ cached
+ predicate store(Node node1, TypedContent tc, Node node2, DataFlowType contentType) {
+ store(node1, tc.getContent(), node2, contentType, tc.getContainerType())
+ }
+
+ /**
+ * Holds if data can flow from `fromNode` to `toNode` because they are the post-update
+ * nodes of some function output and input respectively, where the output and input
+ * are aliases. A typical example is a function returning `this`, implementing a fluent
+ * interface.
+ */
+ private predicate reverseStepThroughInputOutputAlias(
+ PostUpdateNode fromNode, PostUpdateNode toNode
+ ) {
+ exists(Node fromPre, Node toPre |
+ fromPre = fromNode.getPreUpdateNode() and
+ toPre = toNode.getPreUpdateNode()
+ |
+ exists(DataFlowCall c |
+ // Does the language-specific simpleLocalFlowStep already model flow
+ // from function input to output?
+ fromPre = getAnOutNode(c, _) and
+ toPre.(ArgNode).argumentOf(c, _) and
+ simpleLocalFlowStep(toPre.(ArgNode), fromPre)
+ )
+ or
+ argumentValueFlowsThrough(toPre, TReadStepTypesNone(), fromPre)
+ )
+ }
+
+ cached
+ predicate simpleLocalFlowStepExt(Node node1, Node node2) {
+ simpleLocalFlowStep(node1, node2) or
+ reverseStepThroughInputOutputAlias(node1, node2)
+ }
+
+ /**
+ * Holds if the call context `call` improves virtual dispatch in `callable`.
+ */
+ cached
+ predicate recordDataFlowCallSiteDispatch(DataFlowCall call, DataFlowCallable callable) {
+ reducedViableImplInCallContext(_, callable, call)
+ }
+
+ /**
+ * Holds if the call context `call` allows us to prune unreachable nodes in `callable`.
+ */
+ cached
+ predicate recordDataFlowCallSiteUnreachable(DataFlowCall call, DataFlowCallable callable) {
+ exists(Node n | getNodeEnclosingCallable(n) = callable | isUnreachableInCallCached(n, call))
+ }
+
+ cached
+ newtype TCallContext =
+ TAnyCallContext() or
+ TSpecificCall(DataFlowCall call) { recordDataFlowCallSite(call, _) } or
+ TSomeCall() or
+ TReturn(DataFlowCallable c, DataFlowCall call) { reducedViableImplInReturn(c, call) }
+
+ cached
+ newtype TReturnPosition =
+ TReturnPosition0(DataFlowCallable c, ReturnKindExt kind) {
+ exists(ReturnNodeExt ret |
+ c = returnNodeGetEnclosingCallable(ret) and
+ kind = ret.getKind()
+ )
+ }
+
+ cached
+ newtype TLocalFlowCallContext =
+ TAnyLocalCall() or
+ TSpecificLocalCall(DataFlowCall call) { isUnreachableInCallCached(_, call) }
+
+ cached
+ newtype TReturnKindExt =
+ TValueReturn(ReturnKind kind) or
+ TParamUpdate(int pos) { exists(ParamNode p | p.isParameterOf(_, pos)) }
+
+ cached
+ newtype TBooleanOption =
+ TBooleanNone() or
+ TBooleanSome(boolean b) { b = true or b = false }
+
+ cached
+ newtype TDataFlowCallOption =
+ TDataFlowCallNone() or
+ TDataFlowCallSome(DataFlowCall call)
+
+ cached
+ newtype TTypedContent = MkTypedContent(Content c, DataFlowType t) { store(_, c, _, _, t) }
+
+ cached
+ newtype TAccessPathFront =
+ TFrontNil(DataFlowType t) or
+ TFrontHead(TypedContent tc)
+
+ cached
+ newtype TAccessPathFrontOption =
+ TAccessPathFrontNone() or
+ TAccessPathFrontSome(AccessPathFront apf)
+}
+
+/**
+ * Holds if the call context `call` either improves virtual dispatch in
+ * `callable` or if it allows us to prune unreachable nodes in `callable`.
+ */
+predicate recordDataFlowCallSite(DataFlowCall call, DataFlowCallable callable) {
+ recordDataFlowCallSiteDispatch(call, callable) or
+ recordDataFlowCallSiteUnreachable(call, callable)
+}
+
+/**
+ * A `Node` at which a cast can occur such that the type should be checked.
+ */
+class CastingNode extends Node {
+ CastingNode() { castingNode(this) }
+}
+
+private predicate readStepWithTypes(
+ Node n1, DataFlowType container, Content c, Node n2, DataFlowType content
+) {
+ read(n1, c, n2) and
+ container = getNodeDataFlowType(n1) and
+ content = getNodeDataFlowType(n2)
+}
+
+private newtype TReadStepTypesOption =
+ TReadStepTypesNone() or
+ TReadStepTypesSome(DataFlowType container, Content c, DataFlowType content) {
+ readStepWithTypes(_, container, c, _, content)
+ }
+
+private class ReadStepTypesOption extends TReadStepTypesOption {
+ predicate isSome() { this instanceof TReadStepTypesSome }
+
+ DataFlowType getContainerType() { this = TReadStepTypesSome(result, _, _) }
+
+ Content getContent() { this = TReadStepTypesSome(_, result, _) }
+
+ DataFlowType getContentType() { this = TReadStepTypesSome(_, _, result) }
+
+ string toString() { if this.isSome() then result = "Some(..)" else result = "None()" }
+}
+
+/**
+ * A call context to restrict the targets of virtual dispatch, prune local flow,
+ * and match the call sites of flow into a method with flow out of a method.
+ *
+ * There are four cases:
+ * - `TAnyCallContext()` : No restrictions on method flow.
+ * - `TSpecificCall(DataFlowCall call)` : Flow entered through the
+ * given `call`. This call improves the set of viable
+ * dispatch targets for at least one method call in the current callable
+ * or helps prune unreachable nodes in the current callable.
+ * - `TSomeCall()` : Flow entered through a parameter. The
+ * originating call does not improve the set of dispatch targets for any
+ * method call in the current callable and was therefore not recorded.
+ * - `TReturn(Callable c, DataFlowCall call)` : Flow reached `call` from `c` and
+ * this dispatch target of `call` implies a reduced set of dispatch origins
+ * to which data may flow if it should reach a `return` statement.
+ */
+abstract class CallContext extends TCallContext {
+ abstract string toString();
+
+ /** Holds if this call context is relevant for `callable`. */
+ abstract predicate relevantFor(DataFlowCallable callable);
+}
+
+abstract class CallContextNoCall extends CallContext { }
+
+class CallContextAny extends CallContextNoCall, TAnyCallContext {
+ override string toString() { result = "CcAny" }
+
+ override predicate relevantFor(DataFlowCallable callable) { any() }
+}
+
+abstract class CallContextCall extends CallContext {
+ /** Holds if this call context may be `call`. */
+ bindingset[call]
+ abstract predicate matchesCall(DataFlowCall call);
+}
+
+class CallContextSpecificCall extends CallContextCall, TSpecificCall {
+ override string toString() {
+ exists(DataFlowCall call | this = TSpecificCall(call) | result = "CcCall(" + call + ")")
+ }
+
+ override predicate relevantFor(DataFlowCallable callable) {
+ recordDataFlowCallSite(getCall(), callable)
+ }
+
+ override predicate matchesCall(DataFlowCall call) { call = this.getCall() }
+
+ DataFlowCall getCall() { this = TSpecificCall(result) }
+}
+
+class CallContextSomeCall extends CallContextCall, TSomeCall {
+ override string toString() { result = "CcSomeCall" }
+
+ override predicate relevantFor(DataFlowCallable callable) {
+ exists(ParamNode p | getNodeEnclosingCallable(p) = callable)
+ }
+
+ override predicate matchesCall(DataFlowCall call) { any() }
+}
+
+class CallContextReturn extends CallContextNoCall, TReturn {
+ override string toString() {
+ exists(DataFlowCall call | this = TReturn(_, call) | result = "CcReturn(" + call + ")")
+ }
+
+ override predicate relevantFor(DataFlowCallable callable) {
+ exists(DataFlowCall call | this = TReturn(_, call) and callEnclosingCallable(call, callable))
+ }
+}
+
+/**
+ * A call context that is relevant for pruning local flow.
+ */
+abstract class LocalCallContext extends TLocalFlowCallContext {
+ abstract string toString();
+
+ /** Holds if this call context is relevant for `callable`. */
+ abstract predicate relevantFor(DataFlowCallable callable);
+}
+
+class LocalCallContextAny extends LocalCallContext, TAnyLocalCall {
+ override string toString() { result = "LocalCcAny" }
+
+ override predicate relevantFor(DataFlowCallable callable) { any() }
+}
+
+class LocalCallContextSpecificCall extends LocalCallContext, TSpecificLocalCall {
+ LocalCallContextSpecificCall() { this = TSpecificLocalCall(call) }
+
+ DataFlowCall call;
+
+ DataFlowCall getCall() { result = call }
+
+ override string toString() { result = "LocalCcCall(" + call + ")" }
+
+ override predicate relevantFor(DataFlowCallable callable) { relevantLocalCCtx(call, callable) }
+}
+
+private predicate relevantLocalCCtx(DataFlowCall call, DataFlowCallable callable) {
+ exists(Node n | getNodeEnclosingCallable(n) = callable and isUnreachableInCallCached(n, call))
+}
+
+/**
+ * Gets the local call context given the call context and the callable that
+ * the contexts apply to.
+ */
+LocalCallContext getLocalCallContext(CallContext ctx, DataFlowCallable callable) {
+ ctx.relevantFor(callable) and
+ if relevantLocalCCtx(ctx.(CallContextSpecificCall).getCall(), callable)
+ then result.(LocalCallContextSpecificCall).getCall() = ctx.(CallContextSpecificCall).getCall()
+ else result instanceof LocalCallContextAny
+}
+
+/**
+ * The value of a parameter at function entry, viewed as a node in a data
+ * flow graph.
+ */
+class ParamNode extends Node {
+ ParamNode() { parameterNode(this, _, _) }
+
+ /**
+ * Holds if this node is the parameter of callable `c` at the specified
+ * (zero-based) position.
+ */
+ predicate isParameterOf(DataFlowCallable c, int i) { parameterNode(this, c, i) }
+}
+
+/** A data-flow node that represents a call argument. */
+class ArgNode extends Node {
+ ArgNode() { argumentNode(this, _, _) }
+
+ /** Holds if this argument occurs at the given position in the given call. */
+ final predicate argumentOf(DataFlowCall call, int pos) { argumentNode(this, call, pos) }
+}
+
+/**
+ * A node from which flow can return to the caller. This is either a regular
+ * `ReturnNode` or a `PostUpdateNode` corresponding to the value of a parameter.
+ */
+class ReturnNodeExt extends Node {
+ ReturnNodeExt() { returnNodeExt(this, _) }
+
+ /** Gets the kind of this returned value. */
+ ReturnKindExt getKind() { returnNodeExt(this, result) }
+}
+
+/**
+ * A node to which data can flow from a call. Either an ordinary out node
+ * or a post-update node associated with a call argument.
+ */
+class OutNodeExt extends Node {
+ OutNodeExt() { outNodeExt(this) }
+}
+
+/**
+ * An extended return kind. A return kind describes how data can be returned
+ * from a callable. This can either be through a returned value or an updated
+ * parameter.
+ */
+abstract class ReturnKindExt extends TReturnKindExt {
+ /** Gets a textual representation of this return kind. */
+ abstract string toString();
+
+ /** Gets a node corresponding to data flow out of `call`. */
+ final OutNodeExt getAnOutNode(DataFlowCall call) { result = getAnOutNodeExt(call, this) }
+}
+
+class ValueReturnKind extends ReturnKindExt, TValueReturn {
+ private ReturnKind kind;
+
+ ValueReturnKind() { this = TValueReturn(kind) }
+
+ ReturnKind getKind() { result = kind }
+
+ override string toString() { result = kind.toString() }
+}
+
+class ParamUpdateReturnKind extends ReturnKindExt, TParamUpdate {
+ private int pos;
+
+ ParamUpdateReturnKind() { this = TParamUpdate(pos) }
+
+ int getPosition() { result = pos }
+
+ override string toString() { result = "param update " + pos }
+}
+
+/** A callable tagged with a relevant return kind. */
+class ReturnPosition extends TReturnPosition0 {
+ private DataFlowCallable c;
+ private ReturnKindExt kind;
+
+ ReturnPosition() { this = TReturnPosition0(c, kind) }
+
+ /** Gets the callable. */
+ DataFlowCallable getCallable() { result = c }
+
+ /** Gets the return kind. */
+ ReturnKindExt getKind() { result = kind }
+
+ /** Gets a textual representation of this return position. */
+ string toString() { result = "[" + kind + "] " + c }
+}
+
+/**
+ * Gets the enclosing callable of `n`. Unlike `n.getEnclosingCallable()`, this
+ * predicate ensures that joins go from `n` to the result instead of the other
+ * way around.
+ */
+pragma[inline]
+DataFlowCallable getNodeEnclosingCallable(Node n) {
+ nodeEnclosingCallable(pragma[only_bind_out](n), pragma[only_bind_into](result))
+}
+
+/** Gets the type of `n` used for type pruning. */
+pragma[inline]
+DataFlowType getNodeDataFlowType(Node n) {
+ nodeDataFlowType(pragma[only_bind_out](n), pragma[only_bind_into](result))
+}
+
+pragma[noinline]
+private DataFlowCallable returnNodeGetEnclosingCallable(ReturnNodeExt ret) {
+ result = getNodeEnclosingCallable(ret)
+}
+
+pragma[noinline]
+private ReturnPosition getReturnPosition0(ReturnNodeExt ret, ReturnKindExt kind) {
+ result.getCallable() = returnNodeGetEnclosingCallable(ret) and
+ kind = result.getKind()
+}
+
+pragma[noinline]
+ReturnPosition getReturnPosition(ReturnNodeExt ret) {
+ result = getReturnPosition0(ret, ret.getKind())
+}
+
+/**
+ * Checks whether `inner` can return to `call` in the call context `innercc`.
+ * Assumes a context of `inner = viableCallableExt(call)`.
+ */
+bindingset[innercc, inner, call]
+predicate checkCallContextReturn(CallContext innercc, DataFlowCallable inner, DataFlowCall call) {
+ innercc instanceof CallContextAny
+ or
+ exists(DataFlowCallable c0, DataFlowCall call0 |
+ callEnclosingCallable(call0, inner) and
+ innercc = TReturn(c0, call0) and
+ c0 = prunedViableImplInCallContextReverse(call0, call)
+ )
+}
+
+/**
+ * Checks whether `call` can resolve to `calltarget` in the call context `cc`.
+ * Assumes a context of `calltarget = viableCallableExt(call)`.
+ */
+bindingset[cc, call, calltarget]
+predicate checkCallContextCall(CallContext cc, DataFlowCall call, DataFlowCallable calltarget) {
+ exists(DataFlowCall ctx | cc = TSpecificCall(ctx) |
+ if reducedViableImplInCallContext(call, _, ctx)
+ then calltarget = prunedViableImplInCallContext(call, ctx)
+ else any()
+ )
+ or
+ cc instanceof CallContextSomeCall
+ or
+ cc instanceof CallContextAny
+ or
+ cc instanceof CallContextReturn
+}
+
+/**
+ * Resolves a return from `callable` in `cc` to `call`. This is equivalent to
+ * `callable = viableCallableExt(call) and checkCallContextReturn(cc, callable, call)`.
+ */
+bindingset[cc, callable]
+predicate resolveReturn(CallContext cc, DataFlowCallable callable, DataFlowCall call) {
+ cc instanceof CallContextAny and callable = viableCallableExt(call)
+ or
+ exists(DataFlowCallable c0, DataFlowCall call0 |
+ callEnclosingCallable(call0, callable) and
+ cc = TReturn(c0, call0) and
+ c0 = prunedViableImplInCallContextReverse(call0, call)
+ )
+}
+
+/**
+ * Resolves a call from `call` in `cc` to `result`. This is equivalent to
+ * `result = viableCallableExt(call) and checkCallContextCall(cc, call, result)`.
+ */
+bindingset[call, cc]
+DataFlowCallable resolveCall(DataFlowCall call, CallContext cc) {
+ exists(DataFlowCall ctx | cc = TSpecificCall(ctx) |
+ if reducedViableImplInCallContext(call, _, ctx)
+ then result = prunedViableImplInCallContext(call, ctx)
+ else result = viableCallableExt(call)
+ )
+ or
+ result = viableCallableExt(call) and cc instanceof CallContextSomeCall
+ or
+ result = viableCallableExt(call) and cc instanceof CallContextAny
+ or
+ result = viableCallableExt(call) and cc instanceof CallContextReturn
+}
+
+/** An optional Boolean value. */
+class BooleanOption extends TBooleanOption {
+ string toString() {
+ this = TBooleanNone() and result = ""
+ or
+ this = TBooleanSome(any(boolean b | result = b.toString()))
+ }
+}
+
+/** An optional `DataFlowCall`. */
+class DataFlowCallOption extends TDataFlowCallOption {
+ string toString() {
+ this = TDataFlowCallNone() and
+ result = "(none)"
+ or
+ exists(DataFlowCall call |
+ this = TDataFlowCallSome(call) and
+ result = call.toString()
+ )
+ }
+}
+
+/** Content tagged with the type of a containing object. */
+class TypedContent extends MkTypedContent {
+ private Content c;
+ private DataFlowType t;
+
+ TypedContent() { this = MkTypedContent(c, t) }
+
+ /** Gets the content. */
+ Content getContent() { result = c }
+
+ /** Gets the container type. */
+ DataFlowType getContainerType() { result = t }
+
+ /** Gets a textual representation of this content. */
+ string toString() { result = c.toString() }
+
+ /**
+ * Holds if access paths with this `TypedContent` at their head always should
+ * be tracked at high precision. This disables adaptive access path precision
+ * for such access paths.
+ */
+ predicate forceHighPrecision() { forceHighPrecision(c) }
+}
+
+/**
+ * The front of an access path. This is either a head or a nil.
+ */
+abstract class AccessPathFront extends TAccessPathFront {
+ abstract string toString();
+
+ abstract DataFlowType getType();
+
+ abstract boolean toBoolNonEmpty();
+
+ TypedContent getHead() { this = TFrontHead(result) }
+
+ predicate isClearedAt(Node n) { clearsContentCached(n, getHead().getContent()) }
+}
+
+class AccessPathFrontNil extends AccessPathFront, TFrontNil {
+ private DataFlowType t;
+
+ AccessPathFrontNil() { this = TFrontNil(t) }
+
+ override string toString() { result = ppReprType(t) }
+
+ override DataFlowType getType() { result = t }
+
+ override boolean toBoolNonEmpty() { result = false }
+}
+
+class AccessPathFrontHead extends AccessPathFront, TFrontHead {
+ private TypedContent tc;
+
+ AccessPathFrontHead() { this = TFrontHead(tc) }
+
+ override string toString() { result = tc.toString() }
+
+ override DataFlowType getType() { result = tc.getContainerType() }
+
+ override boolean toBoolNonEmpty() { result = true }
+}
+
+/** An optional access path front. */
+class AccessPathFrontOption extends TAccessPathFrontOption {
+ string toString() {
+ this = TAccessPathFrontNone() and result = ""
+ or
+ this = TAccessPathFrontSome(any(AccessPathFront apf | result = apf.toString()))
+ }
+}
diff --git a/repo-tests/codeql/cpp/ql/lib/semmle/code/cpp/ir/dataflow/internal/DataFlowImplConsistency.qll b/repo-tests/codeql/cpp/ql/lib/semmle/code/cpp/ir/dataflow/internal/DataFlowImplConsistency.qll
new file mode 100644
index 00000000000..a55e65a81f6
--- /dev/null
+++ b/repo-tests/codeql/cpp/ql/lib/semmle/code/cpp/ir/dataflow/internal/DataFlowImplConsistency.qll
@@ -0,0 +1,181 @@
+/**
+ * Provides consistency queries for checking invariants in the language-specific
+ * data-flow classes and predicates.
+ */
+
+private import DataFlowImplSpecific::Private
+private import DataFlowImplSpecific::Public
+private import tainttracking1.TaintTrackingParameter::Private
+private import tainttracking1.TaintTrackingParameter::Public
+
+module Consistency {
+ private class RelevantNode extends Node {
+ RelevantNode() {
+ this instanceof ArgumentNode or
+ this instanceof ParameterNode or
+ this instanceof ReturnNode or
+ this = getAnOutNode(_, _) or
+ simpleLocalFlowStep(this, _) or
+ simpleLocalFlowStep(_, this) or
+ jumpStep(this, _) or
+ jumpStep(_, this) or
+ storeStep(this, _, _) or
+ storeStep(_, _, this) or
+ readStep(this, _, _) or
+ readStep(_, _, this) or
+ defaultAdditionalTaintStep(this, _) or
+ defaultAdditionalTaintStep(_, this)
+ }
+ }
+
+ query predicate uniqueEnclosingCallable(Node n, string msg) {
+ exists(int c |
+ n instanceof RelevantNode and
+ c = count(n.getEnclosingCallable()) and
+ c != 1 and
+ msg = "Node should have one enclosing callable but has " + c + "."
+ )
+ }
+
+ query predicate uniqueType(Node n, string msg) {
+ exists(int c |
+ n instanceof RelevantNode and
+ c = count(getNodeType(n)) and
+ c != 1 and
+ msg = "Node should have one type but has " + c + "."
+ )
+ }
+
+ query predicate uniqueNodeLocation(Node n, string msg) {
+ exists(int c |
+ c =
+ count(string filepath, int startline, int startcolumn, int endline, int endcolumn |
+ n.hasLocationInfo(filepath, startline, startcolumn, endline, endcolumn)
+ ) and
+ c != 1 and
+ msg = "Node should have one location but has " + c + "."
+ )
+ }
+
+ query predicate missingLocation(string msg) {
+ exists(int c |
+ c =
+ strictcount(Node n |
+ not exists(string filepath, int startline, int startcolumn, int endline, int endcolumn |
+ n.hasLocationInfo(filepath, startline, startcolumn, endline, endcolumn)
+ )
+ ) and
+ msg = "Nodes without location: " + c
+ )
+ }
+
+ query predicate uniqueNodeToString(Node n, string msg) {
+ exists(int c |
+ c = count(n.toString()) and
+ c != 1 and
+ msg = "Node should have one toString but has " + c + "."
+ )
+ }
+
+ query predicate missingToString(string msg) {
+ exists(int c |
+ c = strictcount(Node n | not exists(n.toString())) and
+ msg = "Nodes without toString: " + c
+ )
+ }
+
+ query predicate parameterCallable(ParameterNode p, string msg) {
+ exists(DataFlowCallable c | p.isParameterOf(c, _) and c != p.getEnclosingCallable()) and
+ msg = "Callable mismatch for parameter."
+ }
+
+ query predicate localFlowIsLocal(Node n1, Node n2, string msg) {
+ simpleLocalFlowStep(n1, n2) and
+ n1.getEnclosingCallable() != n2.getEnclosingCallable() and
+ msg = "Local flow step does not preserve enclosing callable."
+ }
+
+ private DataFlowType typeRepr() { result = getNodeType(_) }
+
+ query predicate compatibleTypesReflexive(DataFlowType t, string msg) {
+ t = typeRepr() and
+ not compatibleTypes(t, t) and
+ msg = "Type compatibility predicate is not reflexive."
+ }
+
+ query predicate unreachableNodeCCtx(Node n, DataFlowCall call, string msg) {
+ isUnreachableInCall(n, call) and
+ exists(DataFlowCallable c |
+ c = n.getEnclosingCallable() and
+ not viableCallable(call) = c
+ ) and
+ msg = "Call context for isUnreachableInCall is inconsistent with call graph."
+ }
+
+ query predicate localCallNodes(DataFlowCall call, Node n, string msg) {
+ (
+ n = getAnOutNode(call, _) and
+ msg = "OutNode and call does not share enclosing callable."
+ or
+ n.(ArgumentNode).argumentOf(call, _) and
+ msg = "ArgumentNode and call does not share enclosing callable."
+ ) and
+ n.getEnclosingCallable() != call.getEnclosingCallable()
+ }
+
+ // This predicate helps the compiler forget that in some languages
+ // it is impossible for a result of `getPreUpdateNode` to be an
+ // instance of `PostUpdateNode`.
+ private Node getPre(PostUpdateNode n) {
+ result = n.getPreUpdateNode()
+ or
+ none()
+ }
+
+ query predicate postIsNotPre(PostUpdateNode n, string msg) {
+ getPre(n) = n and
+ msg = "PostUpdateNode should not equal its pre-update node."
+ }
+
+ query predicate postHasUniquePre(PostUpdateNode n, string msg) {
+ exists(int c |
+ c = count(n.getPreUpdateNode()) and
+ c != 1 and
+ msg = "PostUpdateNode should have one pre-update node but has " + c + "."
+ )
+ }
+
+ query predicate uniquePostUpdate(Node n, string msg) {
+ 1 < strictcount(PostUpdateNode post | post.getPreUpdateNode() = n) and
+ msg = "Node has multiple PostUpdateNodes."
+ }
+
+ query predicate postIsInSameCallable(PostUpdateNode n, string msg) {
+ n.getEnclosingCallable() != n.getPreUpdateNode().getEnclosingCallable() and
+ msg = "PostUpdateNode does not share callable with its pre-update node."
+ }
+
+ private predicate hasPost(Node n) { exists(PostUpdateNode post | post.getPreUpdateNode() = n) }
+
+ query predicate reverseRead(Node n, string msg) {
+ exists(Node n2 | readStep(n, _, n2) and hasPost(n2) and not hasPost(n)) and
+ msg = "Origin of readStep is missing a PostUpdateNode."
+ }
+
+ query predicate argHasPostUpdate(ArgumentNode n, string msg) {
+ not hasPost(n) and
+ not isImmutableOrUnobservable(n) and
+ msg = "ArgumentNode is missing PostUpdateNode."
+ }
+
+ // This predicate helps the compiler forget that in some languages
+ // it is impossible for a `PostUpdateNode` to be the target of
+ // `simpleLocalFlowStep`.
+ private predicate isPostUpdateNode(Node n) { n instanceof PostUpdateNode or none() }
+
+ query predicate postWithInFlow(Node n, string msg) {
+ isPostUpdateNode(n) and
+ simpleLocalFlowStep(_, n) and
+ msg = "PostUpdateNode should not be the target of local flow."
+ }
+}
diff --git a/repo-tests/codeql/cpp/ql/lib/semmle/code/cpp/ir/dataflow/internal/DataFlowImplSpecific.qll b/repo-tests/codeql/cpp/ql/lib/semmle/code/cpp/ir/dataflow/internal/DataFlowImplSpecific.qll
new file mode 100644
index 00000000000..4ea383b20a1
--- /dev/null
+++ b/repo-tests/codeql/cpp/ql/lib/semmle/code/cpp/ir/dataflow/internal/DataFlowImplSpecific.qll
@@ -0,0 +1,11 @@
+/**
+ * Provides IR-specific definitions for use in the data flow library.
+ */
+module Private {
+ import DataFlowPrivate
+ import DataFlowDispatch
+}
+
+module Public {
+ import DataFlowUtil
+}
diff --git a/repo-tests/codeql/cpp/ql/lib/semmle/code/cpp/ir/dataflow/internal/DataFlowPrivate.qll b/repo-tests/codeql/cpp/ql/lib/semmle/code/cpp/ir/dataflow/internal/DataFlowPrivate.qll
new file mode 100644
index 00000000000..00996a6ebfc
--- /dev/null
+++ b/repo-tests/codeql/cpp/ql/lib/semmle/code/cpp/ir/dataflow/internal/DataFlowPrivate.qll
@@ -0,0 +1,509 @@
+private import cpp
+private import DataFlowUtil
+private import semmle.code.cpp.ir.IR
+private import DataFlowDispatch
+
+/**
+ * A data flow node that occurs as the argument of a call and is passed as-is
+ * to the callable. Instance arguments (`this` pointer) and read side effects
+ * on parameters are also included.
+ */
+abstract class ArgumentNode extends OperandNode {
+ /**
+ * Holds if this argument occurs at the given position in the given call.
+ * The instance argument is considered to have index `-1`.
+ */
+ abstract predicate argumentOf(DataFlowCall call, int pos);
+
+ /** Gets the call in which this node is an argument. */
+ DataFlowCall getCall() { this.argumentOf(result, _) }
+}
+
+/**
+ * A data flow node that occurs as the argument to a call, or an
+ * implicit `this` pointer argument.
+ */
+private class PrimaryArgumentNode extends ArgumentNode {
+ override ArgumentOperand op;
+
+ PrimaryArgumentNode() { exists(CallInstruction call | op = call.getAnArgumentOperand()) }
+
+ override predicate argumentOf(DataFlowCall call, int pos) { op = call.getArgumentOperand(pos) }
+
+ override string toString() {
+ exists(Expr unconverted |
+ unconverted = op.getDef().getUnconvertedResultExpression() and
+ result = unconverted.toString()
+ )
+ or
+ // Certain instructions don't map to an unconverted result expression. For these cases
+ // we fall back to a simpler naming scheme. This can happen in IR-generated constructors.
+ not exists(op.getDef().getUnconvertedResultExpression()) and
+ (
+ result = "Argument " + op.(PositionalArgumentOperand).getIndex()
+ or
+ op instanceof ThisArgumentOperand and result = "Argument this"
+ )
+ }
+}
+
+/**
+ * A data flow node representing the read side effect of a call on a
+ * specific parameter.
+ */
+private class SideEffectArgumentNode extends ArgumentNode {
+ override SideEffectOperand op;
+ ReadSideEffectInstruction read;
+
+ SideEffectArgumentNode() { op = read.getSideEffectOperand() }
+
+ override predicate argumentOf(DataFlowCall call, int pos) {
+ read.getPrimaryInstruction() = call and
+ pos = getArgumentPosOfSideEffect(read.getIndex())
+ }
+
+ override string toString() {
+ result = read.getArgumentDef().getUnconvertedResultExpression().toString() + " indirection"
+ or
+ // Some instructions don't map to an unconverted result expression. For these cases
+ // we fall back to a simpler naming scheme. This can happen in IR-generated constructors.
+ not exists(read.getArgumentDef().getUnconvertedResultExpression()) and
+ (
+ if read.getIndex() = -1
+ then result = "Argument this indirection"
+ else result = "Argument " + read.getIndex() + " indirection"
+ )
+ }
+}
+
+private newtype TReturnKind =
+ TNormalReturnKind() or
+ TIndirectReturnKind(ParameterIndex index)
+
+/**
+ * A return kind. A return kind describes how a value can be returned
+ * from a callable. For C++, this is simply a function return.
+ */
+class ReturnKind extends TReturnKind {
+ /** Gets a textual representation of this return kind. */
+ abstract string toString();
+}
+
+private class NormalReturnKind extends ReturnKind, TNormalReturnKind {
+ override string toString() { result = "return" }
+}
+
+private class IndirectReturnKind extends ReturnKind, TIndirectReturnKind {
+ ParameterIndex index;
+
+ IndirectReturnKind() { this = TIndirectReturnKind(index) }
+
+ override string toString() { result = "outparam[" + index.toString() + "]" }
+}
+
+/** A data flow node that occurs as the result of a `ReturnStmt`. */
+class ReturnNode extends InstructionNode {
+ Instruction primary;
+
+ ReturnNode() {
+ exists(ReturnValueInstruction ret | instr = ret.getReturnValue() and primary = ret)
+ or
+ exists(ReturnIndirectionInstruction rii |
+ instr = rii.getSideEffectOperand().getAnyDef() and primary = rii
+ )
+ }
+
+ /** Gets the kind of this returned value. */
+ abstract ReturnKind getKind();
+}
+
+class ReturnValueNode extends ReturnNode {
+ override ReturnValueInstruction primary;
+
+ override ReturnKind getKind() { result = TNormalReturnKind() }
+}
+
+class ReturnIndirectionNode extends ReturnNode {
+ override ReturnIndirectionInstruction primary;
+
+ override ReturnKind getKind() {
+ exists(int index |
+ primary.hasIndex(index) and
+ result = TIndirectReturnKind(index)
+ )
+ }
+}
+
+/** A data flow node that represents the output of a call. */
+class OutNode extends InstructionNode {
+ OutNode() {
+ instr instanceof CallInstruction or
+ instr instanceof WriteSideEffectInstruction
+ }
+
+ /** Gets the underlying call. */
+ abstract DataFlowCall getCall();
+
+ abstract ReturnKind getReturnKind();
+}
+
+private class CallOutNode extends OutNode {
+ override CallInstruction instr;
+
+ override DataFlowCall getCall() { result = instr }
+
+ override ReturnKind getReturnKind() { result instanceof NormalReturnKind }
+}
+
+private class SideEffectOutNode extends OutNode {
+ override WriteSideEffectInstruction instr;
+
+ override DataFlowCall getCall() { result = instr.getPrimaryInstruction() }
+
+ override ReturnKind getReturnKind() { result = TIndirectReturnKind(instr.getIndex()) }
+}
+
+/**
+ * Gets a node that can read the value returned from `call` with return kind
+ * `kind`.
+ */
+OutNode getAnOutNode(DataFlowCall call, ReturnKind kind) {
+ // There should be only one `OutNode` for a given `(call, kind)` pair. Showing the optimizer that
+ // this is true helps it make better decisions downstream, especially in virtual dispatch.
+ result =
+ unique(OutNode outNode |
+ outNode.getCall() = call and
+ outNode.getReturnKind() = kind
+ )
+}
+
+/**
+ * Holds if data can flow from `node1` to `node2` in a way that loses the
+ * calling context. For example, this would happen with flow through a
+ * global or static variable.
+ */
+predicate jumpStep(Node n1, Node n2) { none() }
+
+private predicate fieldStoreStepNoChi(Node node1, FieldContent f, PostUpdateNode node2) {
+ exists(StoreInstruction store, Class c |
+ store = node2.asInstruction() and
+ store.getSourceValueOperand() = node1.asOperand() and
+ getWrittenField(store, f.(FieldContent).getAField(), c) and
+ f.hasOffset(c, _, _)
+ )
+}
+
+private FieldAddressInstruction getFieldInstruction(Instruction instr) {
+ result = instr or
+ result = instr.(CopyValueInstruction).getUnary()
+}
+
+pragma[noinline]
+private predicate getWrittenField(Instruction instr, Field f, Class c) {
+ exists(FieldAddressInstruction fa |
+ fa =
+ getFieldInstruction([
+ instr.(StoreInstruction).getDestinationAddress(),
+ instr.(WriteSideEffectInstruction).getDestinationAddress()
+ ]) and
+ f = fa.getField() and
+ c = f.getDeclaringType()
+ )
+}
+
+private predicate fieldStoreStepChi(Node node1, FieldContent f, PostUpdateNode node2) {
+ exists(ChiPartialOperand operand, ChiInstruction chi |
+ chi.getPartialOperand() = operand and
+ node1.asOperand() = operand and
+ node2.asInstruction() = chi and
+ exists(Class c |
+ c = chi.getResultType() and
+ exists(int startBit, int endBit |
+ chi.getUpdatedInterval(startBit, endBit) and
+ f.hasOffset(c, startBit, endBit)
+ )
+ or
+ getWrittenField(operand.getDef(), f.getAField(), c) and
+ f.hasOffset(c, _, _)
+ )
+ )
+}
+
+private predicate arrayStoreStepChi(Node node1, ArrayContent a, PostUpdateNode node2) {
+ exists(a) and
+ exists(ChiPartialOperand operand, ChiInstruction chi, StoreInstruction store |
+ chi.getPartialOperand() = operand and
+ store = operand.getDef() and
+ node1.asOperand() = operand and
+ // This `ChiInstruction` will always have a non-conflated result because both `ArrayStoreNode`
+ // and `PointerStoreNode` require it in their characteristic predicates.
+ node2.asInstruction() = chi and
+ (
+ // `x[i] = taint()`
+ // This matches the characteristic predicate in `ArrayStoreNode`.
+ store.getDestinationAddress() instanceof PointerAddInstruction
+ or
+ // `*p = taint()`
+ // This matches the characteristic predicate in `PointerStoreNode`.
+ store.getDestinationAddress().(CopyValueInstruction).getUnary() instanceof LoadInstruction
+ )
+ )
+}
+
+/**
+ * Holds if data can flow from `node1` to `node2` via an assignment to `f`.
+ * Thus, `node2` references an object with a field `f` that contains the
+ * value of `node1`.
+ */
+predicate storeStep(Node node1, Content f, PostUpdateNode node2) {
+ fieldStoreStepNoChi(node1, f, node2) or
+ fieldStoreStepChi(node1, f, node2) or
+ arrayStoreStepChi(node1, f, node2) or
+ fieldStoreStepAfterArraySuppression(node1, f, node2)
+}
+
+// This predicate pushes the correct `FieldContent` onto the access path when the
+// `suppressArrayRead` predicate has popped off an `ArrayContent`.
+private predicate fieldStoreStepAfterArraySuppression(
+ Node node1, FieldContent f, PostUpdateNode node2
+) {
+ exists(WriteSideEffectInstruction write, ChiInstruction chi, Class c |
+ not chi.isResultConflated() and
+ node1.asInstruction() = chi and
+ node2.asInstruction() = chi and
+ chi.getPartial() = write and
+ getWrittenField(write, f.getAField(), c) and
+ f.hasOffset(c, _, _)
+ )
+}
+
+bindingset[result, i]
+private int unbindInt(int i) { i <= result and i >= result }
+
+pragma[noinline]
+private predicate getLoadedField(LoadInstruction load, Field f, Class c) {
+ exists(FieldAddressInstruction fa |
+ fa = load.getSourceAddress() and
+ f = fa.getField() and
+ c = f.getDeclaringType()
+ )
+}
+
+/**
+ * Holds if data can flow from `node1` to `node2` via a read of `f`.
+ * Thus, `node1` references an object with a field `f` whose value ends up in
+ * `node2`.
+ */
+private predicate fieldReadStep(Node node1, FieldContent f, Node node2) {
+ exists(LoadOperand operand |
+ node2.asOperand() = operand and
+ node1.asInstruction() = operand.getAnyDef() and
+ exists(Class c |
+ c = operand.getAnyDef().getResultType() and
+ exists(int startBit, int endBit |
+ operand.getUsedInterval(unbindInt(startBit), unbindInt(endBit)) and
+ f.hasOffset(c, startBit, endBit)
+ )
+ or
+ getLoadedField(operand.getUse(), f.getAField(), c) and
+ f.hasOffset(c, _, _)
+ )
+ )
+}
+
+/**
+ * When a store step happens in a function that looks like an array write such as:
+ * ```cpp
+ * void f(int* pa) {
+ * pa = source();
+ * }
+ * ```
+ * it can be a write to an array, but it can also happen that `f` is called as `f(&a.x)`. If that is
+ * the case, the `ArrayContent` that was written by the call to `f` should be popped off the access
+ * path, and a `FieldContent` containing `x` should be pushed instead.
+ * So this case pops `ArrayContent` off the access path, and the `fieldStoreStepAfterArraySuppression`
+ * predicate in `storeStep` ensures that we push the right `FieldContent` onto the access path.
+ */
+predicate suppressArrayRead(Node node1, ArrayContent a, Node node2) {
+ exists(a) and
+ exists(WriteSideEffectInstruction write, ChiInstruction chi |
+ node1.asInstruction() = write and
+ node2.asInstruction() = chi and
+ chi.getPartial() = write and
+ getWrittenField(write, _, _)
+ )
+}
+
+private class ArrayToPointerConvertInstruction extends ConvertInstruction {
+ ArrayToPointerConvertInstruction() {
+ this.getUnary().getResultType() instanceof ArrayType and
+ this.getResultType() instanceof PointerType
+ }
+}
+
+private Instruction skipOneCopyValueInstructionRec(CopyValueInstruction copy) {
+ copy.getUnary() = result and not result instanceof CopyValueInstruction
+ or
+ result = skipOneCopyValueInstructionRec(copy.getUnary())
+}
+
+private Instruction skipCopyValueInstructions(Operand op) {
+ not result instanceof CopyValueInstruction and result = op.getDef()
+ or
+ result = skipOneCopyValueInstructionRec(op.getDef())
+}
+
+private predicate arrayReadStep(Node node1, ArrayContent a, Node node2) {
+ exists(a) and
+ // Explicit dereferences such as `*p` or `p[i]` where `p` is a pointer or array.
+ exists(LoadOperand operand, Instruction address |
+ operand.isDefinitionInexact() and
+ node1.asInstruction() = operand.getAnyDef() and
+ operand = node2.asOperand() and
+ address = skipCopyValueInstructions(operand.getAddressOperand()) and
+ (
+ address instanceof LoadInstruction or
+ address instanceof ArrayToPointerConvertInstruction or
+ address instanceof PointerOffsetInstruction
+ )
+ )
+}
+
+/**
+ * In cases such as:
+ * ```cpp
+ * void f(int* pa) {
+ * *pa = source();
+ * }
+ * ...
+ * int x;
+ * f(&x);
+ * use(x);
+ * ```
+ * the load on `x` in `use(x)` will exactly overlap with its definition (in this case the definition
+ * is a `WriteSideEffect`). This predicate pops the `ArrayContent` (pushed by the store in `f`)
+ * from the access path.
+ */
+private predicate exactReadStep(Node node1, ArrayContent a, Node node2) {
+ exists(a) and
+ exists(WriteSideEffectInstruction write, ChiInstruction chi |
+ not chi.isResultConflated() and
+ chi.getPartial() = write and
+ node1.asInstruction() = write and
+ node2.asInstruction() = chi and
+ // To distinquish this case from the `arrayReadStep` case we require that the entire variable was
+ // overwritten by the `WriteSideEffectInstruction` (i.e., there is a load that reads the
+ // entire variable).
+ exists(LoadInstruction load | load.getSourceValue() = chi)
+ )
+}
+
+/**
+ * Holds if data can flow from `node1` to `node2` via a read of `f`.
+ * Thus, `node1` references an object with a field `f` whose value ends up in
+ * `node2`.
+ */
+predicate readStep(Node node1, Content f, Node node2) {
+ fieldReadStep(node1, f, node2) or
+ arrayReadStep(node1, f, node2) or
+ exactReadStep(node1, f, node2) or
+ suppressArrayRead(node1, f, node2)
+}
+
+/**
+ * Holds if values stored inside content `c` are cleared at node `n`.
+ */
+predicate clearsContent(Node n, Content c) {
+ none() // stub implementation
+}
+
+/** Gets the type of `n` used for type pruning. */
+IRType getNodeType(Node n) {
+ suppressUnusedNode(n) and
+ result instanceof IRVoidType // stub implementation
+}
+
+/** Gets a string representation of a type returned by `getNodeType`. */
+string ppReprType(IRType t) { none() } // stub implementation
+
+/**
+ * Holds if `t1` and `t2` are compatible, that is, whether data can flow from
+ * a node of type `t1` to a node of type `t2`.
+ */
+pragma[inline]
+predicate compatibleTypes(IRType t1, IRType t2) {
+ any() // stub implementation
+}
+
+private predicate suppressUnusedNode(Node n) { any() }
+
+//////////////////////////////////////////////////////////////////////////////
+// Java QL library compatibility wrappers
+//////////////////////////////////////////////////////////////////////////////
+/** A node that performs a type cast. */
+class CastNode extends InstructionNode {
+ CastNode() { none() } // stub implementation
+}
+
+/**
+ * A function that may contain code or a variable that may contain itself. When
+ * flow crosses from one _enclosing callable_ to another, the interprocedural
+ * data-flow library discards call contexts and inserts a node in the big-step
+ * relation used for human-readable path explanations.
+ */
+class DataFlowCallable = Declaration;
+
+class DataFlowExpr = Expr;
+
+class DataFlowType = IRType;
+
+/** A function call relevant for data flow. */
+class DataFlowCall extends CallInstruction {
+ Function getEnclosingCallable() { result = this.getEnclosingFunction() }
+}
+
+predicate isUnreachableInCall(Node n, DataFlowCall call) { none() } // stub implementation
+
+int accessPathLimit() { result = 5 }
+
+/**
+ * Holds if access paths with `c` at their head always should be tracked at high
+ * precision. This disables adaptive access path precision for such access paths.
+ */
+predicate forceHighPrecision(Content c) { none() }
+
+/** The unit type. */
+private newtype TUnit = TMkUnit()
+
+/** The trivial type with a single element. */
+class Unit extends TUnit {
+ /** Gets a textual representation of this element. */
+ string toString() { result = "unit" }
+}
+
+/**
+ * Holds if `n` does not require a `PostUpdateNode` as it either cannot be
+ * modified or its modification cannot be observed, for example if it is a
+ * freshly created object that is not saved in a variable.
+ *
+ * This predicate is only used for consistency checks.
+ */
+predicate isImmutableOrUnobservable(Node n) {
+ // The rules for whether an IR argument gets a post-update node are too
+ // complex to model here.
+ any()
+}
+
+/** Holds if `n` should be hidden from path explanations. */
+predicate nodeIsHidden(Node n) { n instanceof OperandNode and not n instanceof ArgumentNode }
+
+class LambdaCallKind = Unit;
+
+/** Holds if `creation` is an expression that creates a lambda of kind `kind` for `c`. */
+predicate lambdaCreation(Node creation, LambdaCallKind kind, DataFlowCallable c) { none() }
+
+/** Holds if `call` is a lambda call of kind `kind` where `receiver` is the lambda expression. */
+predicate lambdaCall(DataFlowCall call, LambdaCallKind kind, Node receiver) { none() }
+
+/** Extra data-flow steps needed for lambda flow analysis. */
+predicate additionalLambdaFlowStep(Node nodeFrom, Node nodeTo, boolean preservesValue) { none() }
diff --git a/repo-tests/codeql/cpp/ql/lib/semmle/code/cpp/ir/dataflow/internal/DataFlowUtil.qll b/repo-tests/codeql/cpp/ql/lib/semmle/code/cpp/ir/dataflow/internal/DataFlowUtil.qll
new file mode 100644
index 00000000000..9e7a95e010d
--- /dev/null
+++ b/repo-tests/codeql/cpp/ql/lib/semmle/code/cpp/ir/dataflow/internal/DataFlowUtil.qll
@@ -0,0 +1,879 @@
+/**
+ * Provides C++-specific definitions for use in the data flow library.
+ */
+
+private import cpp
+// The `ValueNumbering` library has to be imported right after `cpp` to ensure
+// that the cached IR gets the same checksum here as it does in queries that use
+// `ValueNumbering` without `DataFlow`.
+private import semmle.code.cpp.ir.ValueNumbering
+private import semmle.code.cpp.ir.IR
+private import semmle.code.cpp.controlflow.IRGuards
+private import semmle.code.cpp.models.interfaces.DataFlow
+
+cached
+private module Cached {
+ cached
+ newtype TIRDataFlowNode =
+ TInstructionNode(Instruction i) or
+ TOperandNode(Operand op) or
+ TVariableNode(Variable var)
+
+ cached
+ predicate localFlowStepCached(Node nodeFrom, Node nodeTo) {
+ simpleLocalFlowStep(nodeFrom, nodeTo)
+ }
+}
+
+private import Cached
+
+/**
+ * A node in a data flow graph.
+ *
+ * A node can be either an expression, a parameter, or an uninitialized local
+ * variable. Such nodes are created with `DataFlow::exprNode`,
+ * `DataFlow::parameterNode`, and `DataFlow::uninitializedNode` respectively.
+ */
+class Node extends TIRDataFlowNode {
+ /**
+ * INTERNAL: Do not use.
+ */
+ Declaration getEnclosingCallable() { none() } // overridden in subclasses
+
+ /** Gets the function to which this node belongs, if any. */
+ Function getFunction() { none() } // overridden in subclasses
+
+ /** Gets the type of this node. */
+ IRType getType() { none() } // overridden in subclasses
+
+ /** Gets the instruction corresponding to this node, if any. */
+ Instruction asInstruction() { result = this.(InstructionNode).getInstruction() }
+
+ /** Gets the operands corresponding to this node, if any. */
+ Operand asOperand() { result = this.(OperandNode).getOperand() }
+
+ /**
+ * Gets the non-conversion expression corresponding to this node, if any.
+ * This predicate only has a result on nodes that represent the value of
+ * evaluating the expression. For data flowing _out of_ an expression, like
+ * when an argument is passed by reference, use `asDefiningArgument` instead
+ * of `asExpr`.
+ *
+ * If this node strictly (in the sense of `asConvertedExpr`) corresponds to
+ * a `Conversion`, then the result is the underlying non-`Conversion` base
+ * expression.
+ */
+ Expr asExpr() { result = this.(ExprNode).getExpr() }
+
+ /**
+ * Gets the expression corresponding to this node, if any. The returned
+ * expression may be a `Conversion`.
+ */
+ Expr asConvertedExpr() { result = this.(ExprNode).getConvertedExpr() }
+
+ /**
+ * Gets the argument that defines this `DefinitionByReferenceNode`, if any.
+ * This predicate should be used instead of `asExpr` when referring to the
+ * value of a reference argument _after_ the call has returned. For example,
+ * in `f(&x)`, this predicate will have `&x` as its result for the `Node`
+ * that represents the new value of `x`.
+ */
+ Expr asDefiningArgument() { result = this.(DefinitionByReferenceNode).getArgument() }
+
+ /** Gets the positional parameter corresponding to this node, if any. */
+ Parameter asParameter() { result = this.(ExplicitParameterNode).getParameter() }
+
+ /**
+ * Gets the variable corresponding to this node, if any. This can be used for
+ * modeling flow in and out of global variables.
+ */
+ Variable asVariable() { result = this.(VariableNode).getVariable() }
+
+ /**
+ * Gets the expression that is partially defined by this node, if any.
+ *
+ * Partial definitions are created for field stores (`x.y = taint();` is a partial
+ * definition of `x`), and for calls that may change the value of an object (so
+ * `x.set(taint())` is a partial definition of `x`, and `transfer(&x, taint())` is
+ * a partial definition of `&x`).
+ */
+ Expr asPartialDefinition() { result = this.(PartialDefinitionNode).getDefinedExpr() }
+
+ /**
+ * DEPRECATED: See UninitializedNode.
+ *
+ * Gets the uninitialized local variable corresponding to this node, if
+ * any.
+ */
+ deprecated LocalVariable asUninitialized() { none() }
+
+ /**
+ * Gets an upper bound on the type of this node.
+ */
+ IRType getTypeBound() { result = getType() }
+
+ /** Gets the location of this element. */
+ Location getLocation() { none() } // overridden by subclasses
+
+ /**
+ * Holds if this element is at the specified location.
+ * The location spans column `startcolumn` of line `startline` to
+ * column `endcolumn` of line `endline` in file `filepath`.
+ * For more information, see
+ * [Locations](https://codeql.github.com/docs/writing-codeql-queries/providing-locations-in-codeql-queries/).
+ */
+ predicate hasLocationInfo(
+ string filepath, int startline, int startcolumn, int endline, int endcolumn
+ ) {
+ this.getLocation().hasLocationInfo(filepath, startline, startcolumn, endline, endcolumn)
+ }
+
+ /** Gets a textual representation of this element. */
+ string toString() { none() } // overridden by subclasses
+}
+
+/**
+ * An instruction, viewed as a node in a data flow graph.
+ */
+class InstructionNode extends Node, TInstructionNode {
+ Instruction instr;
+
+ InstructionNode() { this = TInstructionNode(instr) }
+
+ /** Gets the instruction corresponding to this node. */
+ Instruction getInstruction() { result = instr }
+
+ override Declaration getEnclosingCallable() { result = this.getFunction() }
+
+ override Function getFunction() { result = instr.getEnclosingFunction() }
+
+ override IRType getType() { result = instr.getResultIRType() }
+
+ override Location getLocation() { result = instr.getLocation() }
+
+ override string toString() {
+ // This predicate is overridden in subclasses. This default implementation
+ // does not use `Instruction.toString` because that's expensive to compute.
+ result = this.getInstruction().getOpcode().toString()
+ }
+}
+
+/**
+ * An operand, viewed as a node in a data flow graph.
+ */
+class OperandNode extends Node, TOperandNode {
+ Operand op;
+
+ OperandNode() { this = TOperandNode(op) }
+
+ /** Gets the operand corresponding to this node. */
+ Operand getOperand() { result = op }
+
+ override Declaration getEnclosingCallable() { result = this.getFunction() }
+
+ override Function getFunction() { result = op.getUse().getEnclosingFunction() }
+
+ override IRType getType() { result = op.getIRType() }
+
+ override Location getLocation() { result = op.getLocation() }
+
+ override string toString() { result = this.getOperand().toString() }
+}
+
+/**
+ * An expression, viewed as a node in a data flow graph.
+ */
+class ExprNode extends InstructionNode {
+ ExprNode() { exists(instr.getConvertedResultExpression()) }
+
+ /**
+ * Gets the non-conversion expression corresponding to this node, if any. If
+ * this node strictly (in the sense of `getConvertedExpr`) corresponds to a
+ * `Conversion`, then the result is that `Conversion`'s non-`Conversion` base
+ * expression.
+ */
+ Expr getExpr() { result = instr.getUnconvertedResultExpression() }
+
+ /**
+ * Gets the expression corresponding to this node, if any. The returned
+ * expression may be a `Conversion`.
+ */
+ Expr getConvertedExpr() { result = instr.getConvertedResultExpression() }
+
+ override string toString() { result = this.asConvertedExpr().toString() }
+}
+
+/**
+ * INTERNAL: do not use. Translates a parameter/argument index into a negative
+ * number that denotes the index of its side effect (pointer indirection).
+ */
+bindingset[index]
+int getArgumentPosOfSideEffect(int index) {
+ // -1 -> -2
+ // 0 -> -3
+ // 1 -> -4
+ // ...
+ result = -3 - index
+}
+
+/**
+ * The value of a parameter at function entry, viewed as a node in a data
+ * flow graph. This includes both explicit parameters such as `x` in `f(x)`
+ * and implicit parameters such as `this` in `x.f()`.
+ *
+ * To match a specific kind of parameter, consider using one of the subclasses
+ * `ExplicitParameterNode`, `ThisParameterNode`, or
+ * `ParameterIndirectionNode`.
+ */
+class ParameterNode extends InstructionNode {
+ ParameterNode() {
+ // To avoid making this class abstract, we enumerate its values here
+ instr instanceof InitializeParameterInstruction
+ or
+ instr instanceof InitializeIndirectionInstruction
+ }
+
+ /**
+ * Holds if this node is the parameter of `f` at the specified position. The
+ * implicit `this` parameter is considered to have position `-1`, and
+ * pointer-indirection parameters are at further negative positions.
+ */
+ predicate isParameterOf(Function f, int pos) { none() } // overridden by subclasses
+}
+
+/** An explicit positional parameter, not including `this` or `...`. */
+private class ExplicitParameterNode extends ParameterNode {
+ override InitializeParameterInstruction instr;
+
+ ExplicitParameterNode() { exists(instr.getParameter()) }
+
+ override predicate isParameterOf(Function f, int pos) {
+ f.getParameter(pos) = instr.getParameter()
+ }
+
+ /** Gets the `Parameter` associated with this node. */
+ Parameter getParameter() { result = instr.getParameter() }
+
+ override string toString() { result = instr.getParameter().toString() }
+}
+
+/** An implicit `this` parameter. */
+class ThisParameterNode extends ParameterNode {
+ override InitializeParameterInstruction instr;
+
+ ThisParameterNode() { instr.getIRVariable() instanceof IRThisVariable }
+
+ override predicate isParameterOf(Function f, int pos) {
+ pos = -1 and instr.getEnclosingFunction() = f
+ }
+
+ override string toString() { result = "this" }
+}
+
+/** A synthetic parameter to model the pointed-to object of a pointer parameter. */
+class ParameterIndirectionNode extends ParameterNode {
+ override InitializeIndirectionInstruction instr;
+
+ override predicate isParameterOf(Function f, int pos) {
+ exists(int index |
+ instr.getEnclosingFunction() = f and
+ instr.hasIndex(index)
+ |
+ pos = getArgumentPosOfSideEffect(index)
+ )
+ }
+
+ override string toString() { result = "*" + instr.getIRVariable().toString() }
+}
+
+/**
+ * DEPRECATED: Data flow was never an accurate way to determine what
+ * expressions might be uninitialized. It errs on the side of saying that
+ * everything is uninitialized, and this is even worse in the IR because the IR
+ * doesn't use syntactic hints to rule out variables that are definitely
+ * initialized.
+ *
+ * The value of an uninitialized local variable, viewed as a node in a data
+ * flow graph.
+ */
+deprecated class UninitializedNode extends Node {
+ UninitializedNode() { none() }
+
+ LocalVariable getLocalVariable() { none() }
+}
+
+/**
+ * A node associated with an object after an operation that might have
+ * changed its state.
+ *
+ * This can be either the argument to a callable after the callable returns
+ * (which might have mutated the argument), or the qualifier of a field after
+ * an update to the field.
+ *
+ * Nodes corresponding to AST elements, for example `ExprNode`, usually refer
+ * to the value before the update with the exception of `ClassInstanceExpr`,
+ * which represents the value after the constructor has run.
+ *
+ * This class exists to match the interface used by Java. There are currently no non-abstract
+ * classes that extend it. When we implement field flow, we can revisit this.
+ */
+abstract class PostUpdateNode extends InstructionNode {
+ /**
+ * Gets the node before the state update.
+ */
+ abstract Node getPreUpdateNode();
+}
+
+/**
+ * The base class for nodes that perform "partial definitions".
+ *
+ * In contrast to a normal "definition", which provides a new value for
+ * something, a partial definition is an expression that may affect a
+ * value, but does not necessarily replace it entirely. For example:
+ * ```
+ * x.y = 1; // a partial definition of the object `x`.
+ * x.y.z = 1; // a partial definition of the object `x.y`.
+ * x.setY(1); // a partial definition of the object `x`.
+ * setY(&x); // a partial definition of the object `x`.
+ * ```
+ */
+abstract private class PartialDefinitionNode extends PostUpdateNode {
+ abstract Expr getDefinedExpr();
+}
+
+private class ExplicitFieldStoreQualifierNode extends PartialDefinitionNode {
+ override ChiInstruction instr;
+ StoreInstruction store;
+
+ ExplicitFieldStoreQualifierNode() {
+ not instr.isResultConflated() and
+ instr.getPartial() = store and
+ (
+ instr.getUpdatedInterval(_, _) or
+ store.getDestinationAddress() instanceof FieldAddressInstruction
+ )
+ }
+
+ // By using an operand as the result of this predicate we avoid the dataflow inconsistency errors
+ // caused by having multiple nodes sharing the same pre update node. This inconsistency error can cause
+ // a tuple explosion in the big step dataflow relation since it can make many nodes be the entry node
+ // into a big step.
+ override Node getPreUpdateNode() { result.asOperand() = instr.getTotalOperand() }
+
+ override Expr getDefinedExpr() {
+ result =
+ store
+ .getDestinationAddress()
+ .(FieldAddressInstruction)
+ .getObjectAddress()
+ .getUnconvertedResultExpression()
+ }
+}
+
+/**
+ * Not every store instruction generates a chi instruction that we can attach a PostUpdateNode to.
+ * For instance, an update to a field of a struct containing only one field. Even if the store does
+ * have a chi instruction, a subsequent use of the result of the store may be linked directly to the
+ * result of the store as an inexact definition if the store totally overlaps the use. For these
+ * cases we attach the PostUpdateNode to the store instruction. There's no obvious pre update node
+ * for this case (as the entire memory is updated), so `getPreUpdateNode` is implemented as
+ * `none()`.
+ */
+private class ExplicitSingleFieldStoreQualifierNode extends PartialDefinitionNode {
+ override StoreInstruction instr;
+
+ ExplicitSingleFieldStoreQualifierNode() {
+ (
+ instr.getAUse().isDefinitionInexact()
+ or
+ not exists(ChiInstruction chi | chi.getPartial() = instr)
+ ) and
+ // Without this condition any store would create a `PostUpdateNode`.
+ instr.getDestinationAddress() instanceof FieldAddressInstruction
+ }
+
+ override Node getPreUpdateNode() { none() }
+
+ override Expr getDefinedExpr() {
+ result =
+ instr
+ .getDestinationAddress()
+ .(FieldAddressInstruction)
+ .getObjectAddress()
+ .getUnconvertedResultExpression()
+ }
+}
+
+private FieldAddressInstruction getFieldInstruction(Instruction instr) {
+ result = instr or
+ result = instr.(CopyValueInstruction).getUnary()
+}
+
+/**
+ * The target of a `fieldStoreStepAfterArraySuppression` store step, which is used to convert
+ * an `ArrayContent` to a `FieldContent` when the `WriteSideEffect` instruction stores
+ * into a field. See the QLDoc for `suppressArrayRead` for an example of where such a conversion
+ * is inserted.
+ */
+private class WriteSideEffectFieldStoreQualifierNode extends PartialDefinitionNode {
+ override ChiInstruction instr;
+ WriteSideEffectInstruction write;
+ FieldAddressInstruction field;
+
+ WriteSideEffectFieldStoreQualifierNode() {
+ not instr.isResultConflated() and
+ instr.getPartial() = write and
+ field = getFieldInstruction(write.getDestinationAddress())
+ }
+
+ override Node getPreUpdateNode() { result.asOperand() = instr.getTotalOperand() }
+
+ override Expr getDefinedExpr() {
+ result = field.getObjectAddress().getUnconvertedResultExpression()
+ }
+}
+
+/**
+ * The `PostUpdateNode` that is the target of a `arrayStoreStepChi` store step. The overriden
+ * `ChiInstruction` corresponds to the instruction represented by `node2` in `arrayStoreStepChi`.
+ */
+private class ArrayStoreNode extends PartialDefinitionNode {
+ override ChiInstruction instr;
+ PointerAddInstruction add;
+
+ ArrayStoreNode() {
+ not instr.isResultConflated() and
+ exists(StoreInstruction store |
+ instr.getPartial() = store and
+ add = store.getDestinationAddress()
+ )
+ }
+
+ override Node getPreUpdateNode() { result.asOperand() = instr.getTotalOperand() }
+
+ override Expr getDefinedExpr() { result = add.getLeft().getUnconvertedResultExpression() }
+}
+
+/**
+ * The `PostUpdateNode` that is the target of a `arrayStoreStepChi` store step. The overriden
+ * `ChiInstruction` corresponds to the instruction represented by `node2` in `arrayStoreStepChi`.
+ */
+private class PointerStoreNode extends PostUpdateNode {
+ override ChiInstruction instr;
+
+ PointerStoreNode() {
+ not instr.isResultConflated() and
+ exists(StoreInstruction store |
+ instr.getPartial() = store and
+ store.getDestinationAddress().(CopyValueInstruction).getUnary() instanceof LoadInstruction
+ )
+ }
+
+ override Node getPreUpdateNode() { result.asOperand() = instr.getTotalOperand() }
+}
+
+/**
+ * A node that represents the value of a variable after a function call that
+ * may have changed the variable because it's passed by reference.
+ *
+ * A typical example would be a call `f(&x)`. Firstly, there will be flow into
+ * `x` from previous definitions of `x`. Secondly, there will be a
+ * `DefinitionByReferenceNode` to represent the value of `x` after the call has
+ * returned. This node will have its `getArgument()` equal to `&x` and its
+ * `getVariableAccess()` equal to `x`.
+ */
+class DefinitionByReferenceNode extends InstructionNode {
+ override WriteSideEffectInstruction instr;
+
+ /** Gets the unconverted argument corresponding to this node. */
+ Expr getArgument() {
+ result =
+ instr
+ .getPrimaryInstruction()
+ .(CallInstruction)
+ .getArgument(instr.getIndex())
+ .getUnconvertedResultExpression()
+ }
+
+ /** Gets the parameter through which this value is assigned. */
+ Parameter getParameter() {
+ exists(CallInstruction ci | result = ci.getStaticCallTarget().getParameter(instr.getIndex()))
+ }
+
+ override string toString() {
+ // This string should be unique enough to be helpful but common enough to
+ // avoid storing too many different strings.
+ result =
+ instr.getPrimaryInstruction().(CallInstruction).getStaticCallTarget().getName() +
+ " output argument"
+ or
+ not exists(instr.getPrimaryInstruction().(CallInstruction).getStaticCallTarget()) and
+ result = "output argument"
+ }
+}
+
+/**
+ * A `Node` corresponding to a variable in the program, as opposed to the
+ * value of that variable at some particular point. This can be used for
+ * modeling flow in and out of global variables.
+ */
+class VariableNode extends Node, TVariableNode {
+ Variable v;
+
+ VariableNode() { this = TVariableNode(v) }
+
+ /** Gets the variable corresponding to this node. */
+ Variable getVariable() { result = v }
+
+ override Function getFunction() { none() }
+
+ override Declaration getEnclosingCallable() {
+ // When flow crosses from one _enclosing callable_ to another, the
+ // interprocedural data-flow library discards call contexts and inserts a
+ // node in the big-step relation used for human-readable path explanations.
+ // Therefore we want a distinct enclosing callable for each `VariableNode`,
+ // and that can be the `Variable` itself.
+ result = v
+ }
+
+ override IRType getType() { result.getCanonicalLanguageType().hasUnspecifiedType(v.getType(), _) }
+
+ override Location getLocation() { result = v.getLocation() }
+
+ override string toString() { result = v.toString() }
+}
+
+/**
+ * Gets the node corresponding to `instr`.
+ */
+InstructionNode instructionNode(Instruction instr) { result.getInstruction() = instr }
+
+/**
+ * DEPRECATED: use `definitionByReferenceNodeFromArgument` instead.
+ *
+ * Gets the `Node` corresponding to a definition by reference of the variable
+ * that is passed as `argument` of a call.
+ */
+deprecated DefinitionByReferenceNode definitionByReferenceNode(Expr e) { result.getArgument() = e }
+
+/**
+ * Gets the `Node` corresponding to the value of evaluating `e` or any of its
+ * conversions. There is no result if `e` is a `Conversion`. For data flowing
+ * _out of_ an expression, like when an argument is passed by reference, use
+ * `definitionByReferenceNodeFromArgument` instead.
+ */
+ExprNode exprNode(Expr e) { result.getExpr() = e }
+
+/**
+ * Gets the `Node` corresponding to the value of evaluating `e`. Here, `e` may
+ * be a `Conversion`. For data flowing _out of_ an expression, like when an
+ * argument is passed by reference, use
+ * `definitionByReferenceNodeFromArgument` instead.
+ */
+ExprNode convertedExprNode(Expr e) { result.getConvertedExpr() = e }
+
+/**
+ * Gets the `Node` corresponding to the value of `p` at function entry.
+ */
+ExplicitParameterNode parameterNode(Parameter p) { result.getParameter() = p }
+
+/**
+ * Gets the `Node` corresponding to a definition by reference of the variable
+ * that is passed as unconverted `argument` of a call.
+ */
+DefinitionByReferenceNode definitionByReferenceNodeFromArgument(Expr argument) {
+ result.getArgument() = argument
+}
+
+/** Gets the `VariableNode` corresponding to the variable `v`. */
+VariableNode variableNode(Variable v) { result.getVariable() = v }
+
+/**
+ * DEPRECATED: See UninitializedNode.
+ *
+ * Gets the `Node` corresponding to the value of an uninitialized local
+ * variable `v`.
+ */
+Node uninitializedNode(LocalVariable v) { none() }
+
+/**
+ * Holds if data flows from `nodeFrom` to `nodeTo` in exactly one local
+ * (intra-procedural) step.
+ */
+predicate localFlowStep = localFlowStepCached/2;
+
+/**
+ * INTERNAL: do not use.
+ *
+ * This is the local flow predicate that's used as a building block in global
+ * data flow. It may have less flow than the `localFlowStep` predicate.
+ */
+predicate simpleLocalFlowStep(Node nodeFrom, Node nodeTo) {
+ // Operand -> Instruction flow
+ simpleInstructionLocalFlowStep(nodeFrom.asOperand(), nodeTo.asInstruction())
+ or
+ // Instruction -> Operand flow
+ simpleOperandLocalFlowStep(nodeFrom.asInstruction(), nodeTo.asOperand())
+}
+
+pragma[noinline]
+private predicate getFieldSizeOfClass(Class c, Type type, int size) {
+ exists(Field f |
+ f.getDeclaringType() = c and
+ f.getUnderlyingType() = type and
+ type.getSize() = size
+ )
+}
+
+private predicate isSingleFieldClass(Type type, Operand op) {
+ exists(int size, Class c |
+ c = op.getType().getUnderlyingType() and
+ c.getSize() = size and
+ getFieldSizeOfClass(c, type, size)
+ )
+}
+
+private predicate simpleOperandLocalFlowStep(Instruction iFrom, Operand opTo) {
+ // Propagate flow from an instruction to its exact uses.
+ opTo.getDef() = iFrom
+ or
+ opTo = any(ReadSideEffectInstruction read).getSideEffectOperand() and
+ not iFrom.isResultConflated() and
+ iFrom = opTo.getAnyDef()
+ or
+ // Loading a single `int` from an `int *` parameter is not an exact load since
+ // the parameter may point to an entire array rather than a single `int`. The
+ // following rule ensures that any flow going into the
+ // `InitializeIndirectionInstruction`, even if it's for a different array
+ // element, will propagate to a load of the first element.
+ //
+ // Since we're linking `InitializeIndirectionInstruction` and
+ // `LoadInstruction` together directly, this rule will break if there's any
+ // reassignment of the parameter indirection, including a conditional one that
+ // leads to a phi node.
+ exists(InitializeIndirectionInstruction init |
+ iFrom = init and
+ opTo.(LoadOperand).getAnyDef() = init and
+ // Check that the types match. Otherwise we can get flow from an object to
+ // its fields, which leads to field conflation when there's flow from other
+ // fields to the object elsewhere.
+ init.getParameter().getType().getUnspecifiedType().(DerivedType).getBaseType() =
+ opTo.getType().getUnspecifiedType()
+ )
+ or
+ // Flow from stores to structs with a single field to a load of that field.
+ exists(LoadInstruction load |
+ load.getSourceValueOperand() = opTo and
+ opTo.getAnyDef() = iFrom and
+ isSingleFieldClass(pragma[only_bind_out](pragma[only_bind_out](iFrom).getResultType()), opTo)
+ )
+}
+
+private predicate simpleInstructionLocalFlowStep(Operand opFrom, Instruction iTo) {
+ iTo.(CopyInstruction).getSourceValueOperand() = opFrom
+ or
+ iTo.(PhiInstruction).getAnInputOperand() = opFrom
+ or
+ // Treat all conversions as flow, even conversions between different numeric types.
+ iTo.(ConvertInstruction).getUnaryOperand() = opFrom
+ or
+ iTo.(CheckedConvertOrNullInstruction).getUnaryOperand() = opFrom
+ or
+ iTo.(InheritanceConversionInstruction).getUnaryOperand() = opFrom
+ or
+ // A chi instruction represents a point where a new value (the _partial_
+ // operand) may overwrite an old value (the _total_ operand), but the alias
+ // analysis couldn't determine that it surely will overwrite every bit of it or
+ // that it surely will overwrite no bit of it.
+ //
+ // By allowing flow through the total operand, we ensure that flow is not lost
+ // due to shortcomings of the alias analysis. We may get false flow in cases
+ // where the data is indeed overwritten.
+ //
+ // Flow through the partial operand belongs in the taint-tracking libraries
+ // for now.
+ iTo.getAnOperand().(ChiTotalOperand) = opFrom
+ or
+ // Add flow from write side-effects to non-conflated chi instructions through their
+ // partial operands. From there, a `readStep` will find subsequent reads of that field.
+ // Consider the following example:
+ // ```
+ // void setX(Point* p, int new_x) {
+ // p->x = new_x;
+ // }
+ // ...
+ // setX(&p, taint());
+ // ```
+ // Here, a `WriteSideEffectInstruction` will provide a new definition for `p->x` after the call to
+ // `setX`, which will be melded into `p` through a chi instruction.
+ exists(ChiInstruction chi | chi = iTo |
+ opFrom.getAnyDef() instanceof WriteSideEffectInstruction and
+ chi.getPartialOperand() = opFrom and
+ not chi.isResultConflated() and
+ // In a call such as `set_value(&x->val);` we don't want the memory representing `x` to receive
+ // dataflow by a simple step. Instead, this is handled by field flow. If we add a simple step here
+ // we can get field-to-object flow.
+ not chi.isPartialUpdate()
+ )
+ or
+ // Flow through modeled functions
+ modelFlow(opFrom, iTo)
+}
+
+private predicate modelFlow(Operand opFrom, Instruction iTo) {
+ exists(
+ CallInstruction call, DataFlowFunction func, FunctionInput modelIn, FunctionOutput modelOut
+ |
+ call.getStaticCallTarget() = func and
+ func.hasDataFlow(modelIn, modelOut)
+ |
+ (
+ modelOut.isReturnValue() and
+ iTo = call
+ or
+ // TODO: Add write side effects for return values
+ modelOut.isReturnValueDeref() and
+ iTo = call
+ or
+ exists(int index, WriteSideEffectInstruction outNode |
+ modelOut.isParameterDerefOrQualifierObject(index) and
+ iTo = outNode and
+ outNode = getSideEffectFor(call, index)
+ )
+ ) and
+ (
+ exists(int index |
+ modelIn.isParameterOrQualifierAddress(index) and
+ opFrom = call.getArgumentOperand(index)
+ )
+ or
+ exists(int index, ReadSideEffectInstruction read |
+ modelIn.isParameterDerefOrQualifierObject(index) and
+ read = getSideEffectFor(call, index) and
+ opFrom = read.getSideEffectOperand()
+ )
+ )
+ )
+}
+
+/**
+ * Holds if the result is a side effect for instruction `call` on argument
+ * index `argument`. This helper predicate makes it easy to join on both of
+ * these columns at once, avoiding pathological join orders in case the
+ * argument index should get joined first.
+ */
+pragma[noinline]
+SideEffectInstruction getSideEffectFor(CallInstruction call, int argument) {
+ call = result.getPrimaryInstruction() and
+ argument = result.(IndexedInstruction).getIndex()
+}
+
+/**
+ * Holds if data flows from `source` to `sink` in zero or more local
+ * (intra-procedural) steps.
+ */
+predicate localFlow(Node source, Node sink) { localFlowStep*(source, sink) }
+
+/**
+ * Holds if data can flow from `i1` to `i2` in zero or more
+ * local (intra-procedural) steps.
+ */
+predicate localInstructionFlow(Instruction e1, Instruction e2) {
+ localFlow(instructionNode(e1), instructionNode(e2))
+}
+
+/**
+ * Holds if data can flow from `e1` to `e2` in zero or more
+ * local (intra-procedural) steps.
+ */
+predicate localExprFlow(Expr e1, Expr e2) { localFlow(exprNode(e1), exprNode(e2)) }
+
+/**
+ * Gets a field corresponding to the bit range `[startBit..endBit)` of class `c`, if any.
+ */
+private Field getAField(Class c, int startBit, int endBit) {
+ result.getDeclaringType() = c and
+ startBit = 8 * result.getByteOffset() and
+ endBit = 8 * result.getType().getSize() + startBit
+ or
+ exists(Field f, Class cInner |
+ f = c.getAField() and
+ cInner = f.getUnderlyingType() and
+ result = getAField(cInner, startBit - 8 * f.getByteOffset(), endBit - 8 * f.getByteOffset())
+ )
+}
+
+private newtype TContent =
+ TFieldContent(Class c, int startBit, int endBit) { exists(getAField(c, startBit, endBit)) } or
+ TCollectionContent() or
+ TArrayContent()
+
+/**
+ * A description of the way data may be stored inside an object. Examples
+ * include instance fields, the contents of a collection object, or the contents
+ * of an array.
+ */
+class Content extends TContent {
+ /** Gets a textual representation of this element. */
+ abstract string toString();
+
+ predicate hasLocationInfo(string path, int sl, int sc, int el, int ec) {
+ path = "" and sl = 0 and sc = 0 and el = 0 and ec = 0
+ }
+}
+
+/** A reference through an instance field. */
+class FieldContent extends Content, TFieldContent {
+ Class c;
+ int startBit;
+ int endBit;
+
+ FieldContent() { this = TFieldContent(c, startBit, endBit) }
+
+ // Ensure that there's just 1 result for `toString`.
+ override string toString() { result = min(Field f | f = getAField() | f.toString()) }
+
+ predicate hasOffset(Class cl, int start, int end) { cl = c and start = startBit and end = endBit }
+
+ Field getAField() { result = getAField(c, startBit, endBit) }
+}
+
+/** A reference through an array. */
+class ArrayContent extends Content, TArrayContent {
+ override string toString() { result = "[]" }
+}
+
+/** A reference through the contents of some collection-like container. */
+private class CollectionContent extends Content, TCollectionContent {
+ override string toString() { result = "" }
+}
+
+/**
+ * A guard that validates some instruction.
+ *
+ * To use this in a configuration, extend the class and provide a
+ * characteristic predicate precisely specifying the guard, and override
+ * `checks` to specify what is being validated and in which branch.
+ *
+ * It is important that all extending classes in scope are disjoint.
+ */
+class BarrierGuard extends IRGuardCondition {
+ /** Override this predicate to hold if this guard validates `instr` upon evaluating to `b`. */
+ predicate checksInstr(Instruction instr, boolean b) { none() }
+
+ /** Override this predicate to hold if this guard validates `expr` upon evaluating to `b`. */
+ predicate checks(Expr e, boolean b) { none() }
+
+ /** Gets a node guarded by this guard. */
+ final Node getAGuardedNode() {
+ exists(ValueNumber value, boolean edge |
+ (
+ this.checksInstr(value.getAnInstruction(), edge)
+ or
+ this.checks(value.getAnInstruction().getConvertedResultExpression(), edge)
+ ) and
+ result.asInstruction() = value.getAnInstruction() and
+ this.controls(result.asInstruction().getBlock(), edge)
+ )
+ }
+}
diff --git a/repo-tests/codeql/cpp/ql/lib/semmle/code/cpp/ir/dataflow/internal/ModelUtil.qll b/repo-tests/codeql/cpp/ql/lib/semmle/code/cpp/ir/dataflow/internal/ModelUtil.qll
new file mode 100644
index 00000000000..c7e61ea2e33
--- /dev/null
+++ b/repo-tests/codeql/cpp/ql/lib/semmle/code/cpp/ir/dataflow/internal/ModelUtil.qll
@@ -0,0 +1,45 @@
+/**
+ * Provides predicates for mapping the `FunctionInput` and `FunctionOutput`
+ * classes used in function models to the corresponding instructions.
+ */
+
+private import semmle.code.cpp.ir.IR
+private import semmle.code.cpp.ir.dataflow.DataFlow
+
+/**
+ * Gets the instruction that goes into `input` for `call`.
+ */
+Operand callInput(CallInstruction call, FunctionInput input) {
+ // An argument or qualifier
+ exists(int index |
+ result = call.getArgumentOperand(index) and
+ input.isParameterOrQualifierAddress(index)
+ )
+ or
+ // A value pointed to by an argument or qualifier
+ exists(ReadSideEffectInstruction read |
+ result = read.getSideEffectOperand() and
+ read.getPrimaryInstruction() = call and
+ input.isParameterDerefOrQualifierObject(read.getIndex())
+ )
+}
+
+/**
+ * Gets the instruction that holds the `output` for `call`.
+ */
+Instruction callOutput(CallInstruction call, FunctionOutput output) {
+ // The return value
+ result = call and
+ output.isReturnValue()
+ or
+ // The side effect of a call on the value pointed to by an argument or qualifier
+ exists(WriteSideEffectInstruction effect |
+ result = effect and
+ effect.getPrimaryInstruction() = call and
+ output.isParameterDerefOrQualifierObject(effect.getIndex())
+ )
+ or
+ // TODO: modify this when we get return value dereferences
+ result = call and
+ output.isReturnValueDeref()
+}
diff --git a/repo-tests/codeql/cpp/ql/lib/semmle/code/cpp/ir/dataflow/internal/PrintIRLocalFlow.qll b/repo-tests/codeql/cpp/ql/lib/semmle/code/cpp/ir/dataflow/internal/PrintIRLocalFlow.qll
new file mode 100644
index 00000000000..16182296e40
--- /dev/null
+++ b/repo-tests/codeql/cpp/ql/lib/semmle/code/cpp/ir/dataflow/internal/PrintIRLocalFlow.qll
@@ -0,0 +1,142 @@
+private import cpp
+// The `ValueNumbering` library has to be imported right after `cpp` to ensure
+// that the cached IR gets the same checksum here as it does in queries that use
+// `ValueNumbering` without `DataFlow`.
+private import semmle.code.cpp.ir.ValueNumbering
+private import semmle.code.cpp.ir.IR
+private import semmle.code.cpp.ir.dataflow.DataFlow
+private import semmle.code.cpp.ir.dataflow.internal.DataFlowUtil
+private import PrintIRUtilities
+
+/**
+ * Gets the local dataflow from other nodes in the same function to this node.
+ */
+private string getFromFlow(DataFlow::Node useNode, int order1, int order2) {
+ exists(DataFlow::Node defNode, string prefix |
+ (
+ simpleLocalFlowStep(defNode, useNode) and prefix = ""
+ or
+ any(DataFlow::Configuration cfg).isAdditionalFlowStep(defNode, useNode) and
+ defNode.getEnclosingCallable() = useNode.getEnclosingCallable() and
+ prefix = "+"
+ ) and
+ if defNode.asInstruction() = useNode.asOperand().getAnyDef()
+ then
+ // Shorthand for flow from the def of this operand.
+ result = prefix + "def" and
+ order1 = -1 and
+ order2 = 0
+ else
+ if defNode.asOperand().getUse() = useNode.asInstruction()
+ then
+ // Shorthand for flow from an operand of this instruction
+ result = prefix + defNode.asOperand().getDumpId() and
+ order1 = -1 and
+ order2 = defNode.asOperand().getDumpSortOrder()
+ else result = prefix + nodeId(defNode, order1, order2)
+ )
+}
+
+/**
+ * Gets the local dataflow from this node to other nodes in the same function.
+ */
+private string getToFlow(DataFlow::Node defNode, int order1, int order2) {
+ exists(DataFlow::Node useNode, string prefix |
+ (
+ simpleLocalFlowStep(defNode, useNode) and prefix = ""
+ or
+ any(DataFlow::Configuration cfg).isAdditionalFlowStep(defNode, useNode) and
+ defNode.getEnclosingCallable() = useNode.getEnclosingCallable() and
+ prefix = "+"
+ ) and
+ if useNode.asInstruction() = defNode.asOperand().getUse()
+ then
+ // Shorthand for flow to this operand's instruction.
+ result = prefix + "result" and
+ order1 = -1 and
+ order2 = 0
+ else result = prefix + nodeId(useNode, order1, order2)
+ )
+}
+
+/**
+ * Gets the properties of the dataflow node `node`.
+ */
+private string getNodeProperty(DataFlow::Node node, string key) {
+ // List dataflow into and out of this node. Flow into this node is printed as `src->@`, and flow
+ // out of this node is printed as `@->dest`.
+ key = "flow" and
+ result =
+ strictconcat(string flow, boolean to, int order1, int order2 |
+ flow = getFromFlow(node, order1, order2) + "->@" and to = false
+ or
+ flow = "@->" + getToFlow(node, order1, order2) and to = true
+ |
+ flow, ", " order by to, order1, order2, flow
+ )
+ or
+ // Is this node a dataflow sink?
+ key = "sink" and
+ any(DataFlow::Configuration cfg).isSink(node) and
+ result = "true"
+ or
+ // Is this node a dataflow source?
+ key = "source" and
+ any(DataFlow::Configuration cfg).isSource(node) and
+ result = "true"
+ or
+ // Is this node a dataflow barrier, and if so, what kind?
+ key = "barrier" and
+ result =
+ strictconcat(string kind |
+ any(DataFlow::Configuration cfg).isBarrier(node) and kind = "full"
+ or
+ any(DataFlow::Configuration cfg).isBarrierIn(node) and kind = "in"
+ or
+ any(DataFlow::Configuration cfg).isBarrierOut(node) and kind = "out"
+ or
+ exists(DataFlow::BarrierGuard guard |
+ any(DataFlow::Configuration cfg).isBarrierGuard(guard) and
+ node = guard.getAGuardedNode() and
+ kind = "guard(" + guard.getResultId() + ")"
+ )
+ |
+ kind, ", "
+ )
+ or
+ // Is there partial flow from a source to this node?
+ // This property will only be emitted if partial flow is enabled by overriding
+ // `DataFlow::Configration::explorationLimit()`.
+ key = "pflow" and
+ result =
+ strictconcat(DataFlow::PartialPathNode sourceNode, DataFlow::PartialPathNode destNode, int dist,
+ int order1, int order2 |
+ any(DataFlow::Configuration cfg).hasPartialFlow(sourceNode, destNode, dist) and
+ destNode.getNode() = node and
+ // Only print flow from a source in the same function.
+ sourceNode.getNode().getEnclosingCallable() = node.getEnclosingCallable()
+ |
+ nodeId(sourceNode.getNode(), order1, order2) + "+" + dist.toString(), ", "
+ order by
+ order1, order2, dist desc
+ )
+}
+
+/**
+ * Property provider for local IR dataflow.
+ */
+class LocalFlowPropertyProvider extends IRPropertyProvider {
+ override string getOperandProperty(Operand operand, string key) {
+ exists(DataFlow::Node node |
+ operand = node.asOperand() and
+ result = getNodeProperty(node, key)
+ )
+ }
+
+ override string getInstructionProperty(Instruction instruction, string key) {
+ exists(DataFlow::Node node |
+ instruction = node.asInstruction() and
+ result = getNodeProperty(node, key)
+ )
+ }
+}
diff --git a/repo-tests/codeql/cpp/ql/lib/semmle/code/cpp/ir/dataflow/internal/PrintIRStoreSteps.qll b/repo-tests/codeql/cpp/ql/lib/semmle/code/cpp/ir/dataflow/internal/PrintIRStoreSteps.qll
new file mode 100644
index 00000000000..8c318216217
--- /dev/null
+++ b/repo-tests/codeql/cpp/ql/lib/semmle/code/cpp/ir/dataflow/internal/PrintIRStoreSteps.qll
@@ -0,0 +1,33 @@
+/**
+ * Print the dataflow local store steps in IR dumps.
+ */
+
+private import cpp
+// The `ValueNumbering` library has to be imported right after `cpp` to ensure
+// that the cached IR gets the same checksum here as it does in queries that use
+// `ValueNumbering` without `DataFlow`.
+private import semmle.code.cpp.ir.ValueNumbering
+private import semmle.code.cpp.ir.IR
+private import semmle.code.cpp.ir.dataflow.DataFlow
+private import semmle.code.cpp.ir.dataflow.internal.DataFlowUtil
+private import semmle.code.cpp.ir.dataflow.internal.DataFlowPrivate
+private import PrintIRUtilities
+
+/**
+ * Property provider for local IR dataflow store steps.
+ */
+class LocalFlowPropertyProvider extends IRPropertyProvider {
+ override string getInstructionProperty(Instruction instruction, string key) {
+ exists(DataFlow::Node objectNode, Content content |
+ key = "content[" + content.toString() + "]" and
+ instruction = objectNode.asInstruction() and
+ result =
+ strictconcat(string element, DataFlow::Node fieldNode |
+ storeStep(fieldNode, content, objectNode) and
+ element = nodeId(fieldNode, _, _)
+ |
+ element, ", "
+ )
+ )
+ }
+}
diff --git a/repo-tests/codeql/cpp/ql/lib/semmle/code/cpp/ir/dataflow/internal/PrintIRUtilities.qll b/repo-tests/codeql/cpp/ql/lib/semmle/code/cpp/ir/dataflow/internal/PrintIRUtilities.qll
new file mode 100644
index 00000000000..5fc15cf986c
--- /dev/null
+++ b/repo-tests/codeql/cpp/ql/lib/semmle/code/cpp/ir/dataflow/internal/PrintIRUtilities.qll
@@ -0,0 +1,39 @@
+/**
+ * Shared utilities used when printing dataflow annotations in IR dumps.
+ */
+
+private import cpp
+// The `ValueNumbering` library has to be imported right after `cpp` to ensure
+// that the cached IR gets the same checksum here as it does in queries that use
+// `ValueNumbering` without `DataFlow`.
+private import semmle.code.cpp.ir.ValueNumbering
+private import semmle.code.cpp.ir.IR
+private import semmle.code.cpp.ir.dataflow.DataFlow
+
+/**
+ * Gets a short ID for an IR dataflow node.
+ * - For `Instruction`s, this is just the result ID of the instruction (e.g. `m128`).
+ * - For `Operand`s, this is the label of the operand, prefixed with the result ID of the
+ * instruction and a dot (e.g. `m128.left`).
+ * - For `Variable`s, this is the qualified name of the variable.
+ */
+string nodeId(DataFlow::Node node, int order1, int order2) {
+ exists(Instruction instruction | instruction = node.asInstruction() |
+ result = instruction.getResultId() and
+ order1 = instruction.getBlock().getDisplayIndex() and
+ order2 = instruction.getDisplayIndexInBlock()
+ )
+ or
+ exists(Operand operand, Instruction instruction |
+ operand = node.asOperand() and
+ instruction = operand.getUse()
+ |
+ result = instruction.getResultId() + "." + operand.getDumpId() and
+ order1 = instruction.getBlock().getDisplayIndex() and
+ order2 = instruction.getDisplayIndexInBlock()
+ )
+ or
+ result = "var(" + node.asVariable().getQualifiedName() + ")" and
+ order1 = 1000000 and
+ order2 = 0
+}
diff --git a/repo-tests/codeql/cpp/ql/lib/semmle/code/cpp/ir/dataflow/internal/TaintTrackingUtil.qll b/repo-tests/codeql/cpp/ql/lib/semmle/code/cpp/ir/dataflow/internal/TaintTrackingUtil.qll
new file mode 100644
index 00000000000..f563e47db9f
--- /dev/null
+++ b/repo-tests/codeql/cpp/ql/lib/semmle/code/cpp/ir/dataflow/internal/TaintTrackingUtil.qll
@@ -0,0 +1,223 @@
+private import semmle.code.cpp.ir.IR
+private import semmle.code.cpp.ir.dataflow.DataFlow
+private import ModelUtil
+private import semmle.code.cpp.models.interfaces.DataFlow
+private import semmle.code.cpp.models.interfaces.SideEffect
+
+/**
+ * Holds if taint propagates from `nodeFrom` to `nodeTo` in exactly one local
+ * (intra-procedural) step.
+ */
+predicate localTaintStep(DataFlow::Node nodeFrom, DataFlow::Node nodeTo) {
+ DataFlow::localFlowStep(nodeFrom, nodeTo)
+ or
+ localAdditionalTaintStep(nodeFrom, nodeTo)
+}
+
+/**
+ * Holds if taint can flow in one local step from `nodeFrom` to `nodeTo` excluding
+ * local data flow steps. That is, `nodeFrom` and `nodeTo` are likely to represent
+ * different objects.
+ */
+cached
+predicate localAdditionalTaintStep(DataFlow::Node nodeFrom, DataFlow::Node nodeTo) {
+ operandToInstructionTaintStep(nodeFrom.asOperand(), nodeTo.asInstruction())
+ or
+ instructionToOperandTaintStep(nodeFrom.asInstruction(), nodeTo.asOperand())
+}
+
+private predicate instructionToOperandTaintStep(Instruction fromInstr, Operand toOperand) {
+ // Propagate flow from the definition of an operand to the operand, even when the overlap is inexact.
+ // We only do this in certain cases:
+ // 1. The instruction's result must not be conflated, and
+ // 2. The instruction's result type is one the types where we expect element-to-object flow. Currently
+ // this is array types and union types. This matches the other two cases of element-to-object flow in
+ // `DefaultTaintTracking`.
+ toOperand.getAnyDef() = fromInstr and
+ not fromInstr.isResultConflated() and
+ (
+ fromInstr.getResultType() instanceof ArrayType or
+ fromInstr.getResultType() instanceof Union
+ )
+ or
+ exists(ReadSideEffectInstruction readInstr |
+ fromInstr = readInstr.getArgumentDef() and
+ toOperand = readInstr.getSideEffectOperand()
+ )
+ or
+ toOperand.(LoadOperand).getAnyDef() = fromInstr
+}
+
+/**
+ * Holds if taint propagates from `nodeFrom` to `nodeTo` in exactly one local
+ * (intra-procedural) step.
+ */
+private predicate operandToInstructionTaintStep(Operand opFrom, Instruction instrTo) {
+ // Taint can flow through expressions that alter the value but preserve
+ // more than one bit of it _or_ expressions that follow data through
+ // pointer indirections.
+ instrTo.getAnOperand() = opFrom and
+ (
+ instrTo instanceof ArithmeticInstruction
+ or
+ instrTo instanceof BitwiseInstruction
+ or
+ instrTo instanceof PointerArithmeticInstruction
+ or
+ // The `CopyInstruction` case is also present in non-taint data flow, but
+ // that uses `getDef` rather than `getAnyDef`. For taint, we want flow
+ // from a definition of `myStruct` to a `myStruct.myField` expression.
+ instrTo instanceof CopyInstruction
+ )
+ or
+ // Unary instructions tend to preserve enough information in practice that we
+ // want taint to flow through.
+ // The exception is `FieldAddressInstruction`. Together with the rules below for
+ // `LoadInstruction`s and `ChiInstruction`s, flow through `FieldAddressInstruction`
+ // could cause flow into one field to come out an unrelated field.
+ // This would happen across function boundaries, where the IR would not be able to
+ // match loads to stores.
+ instrTo.(UnaryInstruction).getUnaryOperand() = opFrom and
+ (
+ not instrTo instanceof FieldAddressInstruction
+ or
+ instrTo.(FieldAddressInstruction).getField().getDeclaringType() instanceof Union
+ )
+ or
+ instrTo.(LoadInstruction).getSourceAddressOperand() = opFrom
+ or
+ // Flow from an element to an array or union that contains it.
+ instrTo.(ChiInstruction).getPartialOperand() = opFrom and
+ not instrTo.isResultConflated() and
+ exists(Type t | instrTo.getResultLanguageType().hasType(t, false) |
+ t instanceof Union
+ or
+ t instanceof ArrayType
+ )
+ or
+ // Until we have flow through indirections across calls, we'll take flow out
+ // of the indirection and into the argument.
+ // When we get proper flow through indirections across calls, this code can be
+ // moved to `adjusedSink` or possibly into the `DataFlow::ExprNode` class.
+ exists(ReadSideEffectInstruction read |
+ read.getSideEffectOperand() = opFrom and
+ read.getArgumentDef() = instrTo
+ )
+ or
+ // Until we have from through indirections across calls, we'll take flow out
+ // of the parameter and into its indirection.
+ // `InitializeIndirectionInstruction` only has a single operand: the address of the
+ // value whose indirection we are initializing. When initializing an indirection of a parameter `p`,
+ // the IR looks like this:
+ // ```
+ // m1 = InitializeParameter[p] : &r1
+ // r2 = Load[p] : r2, m1
+ // m3 = InitializeIndirection[p] : &r2
+ // ```
+ // So by having flow from `r2` to `m3` we're enabling flow from `m1` to `m3`. This relies on the
+ // `LoadOperand`'s overlap being exact.
+ instrTo.(InitializeIndirectionInstruction).getAnOperand() = opFrom
+ or
+ modeledTaintStep(opFrom, instrTo)
+}
+
+/**
+ * Holds if taint may propagate from `source` to `sink` in zero or more local
+ * (intra-procedural) steps.
+ */
+predicate localTaint(DataFlow::Node source, DataFlow::Node sink) { localTaintStep*(source, sink) }
+
+/**
+ * Holds if taint can flow from `i1` to `i2` in zero or more
+ * local (intra-procedural) steps.
+ */
+predicate localInstructionTaint(Instruction i1, Instruction i2) {
+ localTaint(DataFlow::instructionNode(i1), DataFlow::instructionNode(i2))
+}
+
+/**
+ * Holds if taint can flow from `e1` to `e2` in zero or more
+ * local (intra-procedural) steps.
+ */
+predicate localExprTaint(Expr e1, Expr e2) {
+ localTaint(DataFlow::exprNode(e1), DataFlow::exprNode(e2))
+}
+
+/**
+ * Holds if the additional step from `src` to `sink` should be included in all
+ * global taint flow configurations.
+ */
+predicate defaultAdditionalTaintStep(DataFlow::Node src, DataFlow::Node sink) {
+ localAdditionalTaintStep(src, sink)
+}
+
+/**
+ * Holds if default `TaintTracking::Configuration`s should allow implicit reads
+ * of `c` at sinks and inputs to additional taint steps.
+ */
+bindingset[node]
+predicate defaultImplicitTaintRead(DataFlow::Node node, DataFlow::Content c) { none() }
+
+/**
+ * Holds if `node` should be a sanitizer in all global taint flow configurations
+ * but not in local taint.
+ */
+predicate defaultTaintSanitizer(DataFlow::Node node) { none() }
+
+/**
+ * Holds if taint can flow from `instrIn` to `instrOut` through a call to a
+ * modeled function.
+ */
+predicate modeledTaintStep(Operand nodeIn, Instruction nodeOut) {
+ exists(CallInstruction call, TaintFunction func, FunctionInput modelIn, FunctionOutput modelOut |
+ (
+ nodeIn = callInput(call, modelIn)
+ or
+ exists(int n |
+ modelIn.isParameterDerefOrQualifierObject(n) and
+ if n = -1
+ then nodeIn = callInput(call, any(InQualifierObject inQualifier))
+ else nodeIn = callInput(call, any(InParameter inParam | inParam.getIndex() = n))
+ )
+ ) and
+ nodeOut = callOutput(call, modelOut) and
+ call.getStaticCallTarget() = func and
+ func.hasTaintFlow(modelIn, modelOut)
+ )
+ or
+ // Taint flow from one argument to another and data flow from an argument to a
+ // return value. This happens in functions like `strcat` and `memcpy`. We
+ // could model this flow in two separate steps, but that would add reverse
+ // flow from the write side-effect to the call instruction, which may not be
+ // desirable.
+ exists(
+ CallInstruction call, Function func, FunctionInput modelIn, OutParameterDeref modelMidOut,
+ int indexMid, InParameter modelMidIn, OutReturnValue modelOut
+ |
+ nodeIn = callInput(call, modelIn) and
+ nodeOut = callOutput(call, modelOut) and
+ call.getStaticCallTarget() = func and
+ func.(TaintFunction).hasTaintFlow(modelIn, modelMidOut) and
+ func.(DataFlowFunction).hasDataFlow(modelMidIn, modelOut) and
+ modelMidOut.isParameterDeref(indexMid) and
+ modelMidIn.isParameter(indexMid)
+ )
+ or
+ // Taint flow from a pointer argument to an output, when the model specifies flow from the deref
+ // to that output, but the deref is not modeled in the IR for the caller.
+ exists(
+ CallInstruction call, ReadSideEffectInstruction read, Function func, FunctionInput modelIn,
+ FunctionOutput modelOut
+ |
+ read.getSideEffectOperand() = callInput(call, modelIn) and
+ read.getArgumentDef() = nodeIn.getDef() and
+ not read.getSideEffect().isResultModeled() and
+ call.getStaticCallTarget() = func and
+ (
+ func.(DataFlowFunction).hasDataFlow(modelIn, modelOut)
+ or
+ func.(TaintFunction).hasTaintFlow(modelIn, modelOut)
+ ) and
+ nodeOut = callOutput(call, modelOut)
+ )
+}
diff --git a/repo-tests/codeql/cpp/ql/lib/semmle/code/cpp/ir/dataflow/internal/tainttracking1/TaintTrackingImpl.qll b/repo-tests/codeql/cpp/ql/lib/semmle/code/cpp/ir/dataflow/internal/tainttracking1/TaintTrackingImpl.qll
new file mode 100644
index 00000000000..f4f73b8247c
--- /dev/null
+++ b/repo-tests/codeql/cpp/ql/lib/semmle/code/cpp/ir/dataflow/internal/tainttracking1/TaintTrackingImpl.qll
@@ -0,0 +1,120 @@
+/**
+ * Provides an implementation of global (interprocedural) taint tracking.
+ * This file re-exports the local (intraprocedural) taint-tracking analysis
+ * from `TaintTrackingParameter::Public` and adds a global analysis, mainly
+ * exposed through the `Configuration` class. For some languages, this file
+ * exists in several identical copies, allowing queries to use multiple
+ * `Configuration` classes that depend on each other without introducing
+ * mutual recursion among those configurations.
+ */
+
+import TaintTrackingParameter::Public
+private import TaintTrackingParameter::Private
+
+/**
+ * A configuration of interprocedural taint tracking analysis. This defines
+ * sources, sinks, and any other configurable aspect of the analysis. Each
+ * use of the taint tracking library must define its own unique extension of
+ * this abstract class.
+ *
+ * A taint-tracking configuration is a special data flow configuration
+ * (`DataFlow::Configuration`) that allows for flow through nodes that do not
+ * necessarily preserve values but are still relevant from a taint tracking
+ * perspective. (For example, string concatenation, where one of the operands
+ * is tainted.)
+ *
+ * To create a configuration, extend this class with a subclass whose
+ * characteristic predicate is a unique singleton string. For example, write
+ *
+ * ```ql
+ * class MyAnalysisConfiguration extends TaintTracking::Configuration {
+ * MyAnalysisConfiguration() { this = "MyAnalysisConfiguration" }
+ * // Override `isSource` and `isSink`.
+ * // Optionally override `isSanitizer`.
+ * // Optionally override `isSanitizerIn`.
+ * // Optionally override `isSanitizerOut`.
+ * // Optionally override `isSanitizerGuard`.
+ * // Optionally override `isAdditionalTaintStep`.
+ * }
+ * ```
+ *
+ * Then, to query whether there is flow between some `source` and `sink`,
+ * write
+ *
+ * ```ql
+ * exists(MyAnalysisConfiguration cfg | cfg.hasFlow(source, sink))
+ * ```
+ *
+ * Multiple configurations can coexist, but it is unsupported to depend on
+ * another `TaintTracking::Configuration` or a `DataFlow::Configuration` in the
+ * overridden predicates that define sources, sinks, or additional steps.
+ * Instead, the dependency should go to a `TaintTracking2::Configuration` or a
+ * `DataFlow2::Configuration`, `DataFlow3::Configuration`, etc.
+ */
+abstract class Configuration extends DataFlow::Configuration {
+ bindingset[this]
+ Configuration() { any() }
+
+ /**
+ * Holds if `source` is a relevant taint source.
+ *
+ * The smaller this predicate is, the faster `hasFlow()` will converge.
+ */
+ // overridden to provide taint-tracking specific qldoc
+ abstract override predicate isSource(DataFlow::Node source);
+
+ /**
+ * Holds if `sink` is a relevant taint sink.
+ *
+ * The smaller this predicate is, the faster `hasFlow()` will converge.
+ */
+ // overridden to provide taint-tracking specific qldoc
+ abstract override predicate isSink(DataFlow::Node sink);
+
+ /** Holds if the node `node` is a taint sanitizer. */
+ predicate isSanitizer(DataFlow::Node node) { none() }
+
+ final override predicate isBarrier(DataFlow::Node node) {
+ isSanitizer(node) or
+ defaultTaintSanitizer(node)
+ }
+
+ /** Holds if taint propagation into `node` is prohibited. */
+ predicate isSanitizerIn(DataFlow::Node node) { none() }
+
+ final override predicate isBarrierIn(DataFlow::Node node) { isSanitizerIn(node) }
+
+ /** Holds if taint propagation out of `node` is prohibited. */
+ predicate isSanitizerOut(DataFlow::Node node) { none() }
+
+ final override predicate isBarrierOut(DataFlow::Node node) { isSanitizerOut(node) }
+
+ /** Holds if taint propagation through nodes guarded by `guard` is prohibited. */
+ predicate isSanitizerGuard(DataFlow::BarrierGuard guard) { none() }
+
+ final override predicate isBarrierGuard(DataFlow::BarrierGuard guard) { isSanitizerGuard(guard) }
+
+ /**
+ * Holds if the additional taint propagation step from `node1` to `node2`
+ * must be taken into account in the analysis.
+ */
+ predicate isAdditionalTaintStep(DataFlow::Node node1, DataFlow::Node node2) { none() }
+
+ final override predicate isAdditionalFlowStep(DataFlow::Node node1, DataFlow::Node node2) {
+ isAdditionalTaintStep(node1, node2) or
+ defaultAdditionalTaintStep(node1, node2)
+ }
+
+ override predicate allowImplicitRead(DataFlow::Node node, DataFlow::Content c) {
+ (this.isSink(node) or this.isAdditionalTaintStep(node, _)) and
+ defaultImplicitTaintRead(node, c)
+ }
+
+ /**
+ * Holds if taint may flow from `source` to `sink` for this configuration.
+ */
+ // overridden to provide taint-tracking specific qldoc
+ override predicate hasFlow(DataFlow::Node source, DataFlow::Node sink) {
+ super.hasFlow(source, sink)
+ }
+}
diff --git a/repo-tests/codeql/cpp/ql/lib/semmle/code/cpp/ir/dataflow/internal/tainttracking1/TaintTrackingParameter.qll b/repo-tests/codeql/cpp/ql/lib/semmle/code/cpp/ir/dataflow/internal/tainttracking1/TaintTrackingParameter.qll
new file mode 100644
index 00000000000..1a1d605bc74
--- /dev/null
+++ b/repo-tests/codeql/cpp/ql/lib/semmle/code/cpp/ir/dataflow/internal/tainttracking1/TaintTrackingParameter.qll
@@ -0,0 +1,5 @@
+import semmle.code.cpp.ir.dataflow.internal.TaintTrackingUtil as Public
+
+module Private {
+ import semmle.code.cpp.ir.dataflow.DataFlow::DataFlow as DataFlow
+}
diff --git a/repo-tests/codeql/cpp/ql/lib/semmle/code/cpp/ir/dataflow/internal/tainttracking2/TaintTrackingImpl.qll b/repo-tests/codeql/cpp/ql/lib/semmle/code/cpp/ir/dataflow/internal/tainttracking2/TaintTrackingImpl.qll
new file mode 100644
index 00000000000..f4f73b8247c
--- /dev/null
+++ b/repo-tests/codeql/cpp/ql/lib/semmle/code/cpp/ir/dataflow/internal/tainttracking2/TaintTrackingImpl.qll
@@ -0,0 +1,120 @@
+/**
+ * Provides an implementation of global (interprocedural) taint tracking.
+ * This file re-exports the local (intraprocedural) taint-tracking analysis
+ * from `TaintTrackingParameter::Public` and adds a global analysis, mainly
+ * exposed through the `Configuration` class. For some languages, this file
+ * exists in several identical copies, allowing queries to use multiple
+ * `Configuration` classes that depend on each other without introducing
+ * mutual recursion among those configurations.
+ */
+
+import TaintTrackingParameter::Public
+private import TaintTrackingParameter::Private
+
+/**
+ * A configuration of interprocedural taint tracking analysis. This defines
+ * sources, sinks, and any other configurable aspect of the analysis. Each
+ * use of the taint tracking library must define its own unique extension of
+ * this abstract class.
+ *
+ * A taint-tracking configuration is a special data flow configuration
+ * (`DataFlow::Configuration`) that allows for flow through nodes that do not
+ * necessarily preserve values but are still relevant from a taint tracking
+ * perspective. (For example, string concatenation, where one of the operands
+ * is tainted.)
+ *
+ * To create a configuration, extend this class with a subclass whose
+ * characteristic predicate is a unique singleton string. For example, write
+ *
+ * ```ql
+ * class MyAnalysisConfiguration extends TaintTracking::Configuration {
+ * MyAnalysisConfiguration() { this = "MyAnalysisConfiguration" }
+ * // Override `isSource` and `isSink`.
+ * // Optionally override `isSanitizer`.
+ * // Optionally override `isSanitizerIn`.
+ * // Optionally override `isSanitizerOut`.
+ * // Optionally override `isSanitizerGuard`.
+ * // Optionally override `isAdditionalTaintStep`.
+ * }
+ * ```
+ *
+ * Then, to query whether there is flow between some `source` and `sink`,
+ * write
+ *
+ * ```ql
+ * exists(MyAnalysisConfiguration cfg | cfg.hasFlow(source, sink))
+ * ```
+ *
+ * Multiple configurations can coexist, but it is unsupported to depend on
+ * another `TaintTracking::Configuration` or a `DataFlow::Configuration` in the
+ * overridden predicates that define sources, sinks, or additional steps.
+ * Instead, the dependency should go to a `TaintTracking2::Configuration` or a
+ * `DataFlow2::Configuration`, `DataFlow3::Configuration`, etc.
+ */
+abstract class Configuration extends DataFlow::Configuration {
+ bindingset[this]
+ Configuration() { any() }
+
+ /**
+ * Holds if `source` is a relevant taint source.
+ *
+ * The smaller this predicate is, the faster `hasFlow()` will converge.
+ */
+ // overridden to provide taint-tracking specific qldoc
+ abstract override predicate isSource(DataFlow::Node source);
+
+ /**
+ * Holds if `sink` is a relevant taint sink.
+ *
+ * The smaller this predicate is, the faster `hasFlow()` will converge.
+ */
+ // overridden to provide taint-tracking specific qldoc
+ abstract override predicate isSink(DataFlow::Node sink);
+
+ /** Holds if the node `node` is a taint sanitizer. */
+ predicate isSanitizer(DataFlow::Node node) { none() }
+
+ final override predicate isBarrier(DataFlow::Node node) {
+ isSanitizer(node) or
+ defaultTaintSanitizer(node)
+ }
+
+ /** Holds if taint propagation into `node` is prohibited. */
+ predicate isSanitizerIn(DataFlow::Node node) { none() }
+
+ final override predicate isBarrierIn(DataFlow::Node node) { isSanitizerIn(node) }
+
+ /** Holds if taint propagation out of `node` is prohibited. */
+ predicate isSanitizerOut(DataFlow::Node node) { none() }
+
+ final override predicate isBarrierOut(DataFlow::Node node) { isSanitizerOut(node) }
+
+ /** Holds if taint propagation through nodes guarded by `guard` is prohibited. */
+ predicate isSanitizerGuard(DataFlow::BarrierGuard guard) { none() }
+
+ final override predicate isBarrierGuard(DataFlow::BarrierGuard guard) { isSanitizerGuard(guard) }
+
+ /**
+ * Holds if the additional taint propagation step from `node1` to `node2`
+ * must be taken into account in the analysis.
+ */
+ predicate isAdditionalTaintStep(DataFlow::Node node1, DataFlow::Node node2) { none() }
+
+ final override predicate isAdditionalFlowStep(DataFlow::Node node1, DataFlow::Node node2) {
+ isAdditionalTaintStep(node1, node2) or
+ defaultAdditionalTaintStep(node1, node2)
+ }
+
+ override predicate allowImplicitRead(DataFlow::Node node, DataFlow::Content c) {
+ (this.isSink(node) or this.isAdditionalTaintStep(node, _)) and
+ defaultImplicitTaintRead(node, c)
+ }
+
+ /**
+ * Holds if taint may flow from `source` to `sink` for this configuration.
+ */
+ // overridden to provide taint-tracking specific qldoc
+ override predicate hasFlow(DataFlow::Node source, DataFlow::Node sink) {
+ super.hasFlow(source, sink)
+ }
+}
diff --git a/repo-tests/codeql/cpp/ql/lib/semmle/code/cpp/ir/dataflow/internal/tainttracking2/TaintTrackingParameter.qll b/repo-tests/codeql/cpp/ql/lib/semmle/code/cpp/ir/dataflow/internal/tainttracking2/TaintTrackingParameter.qll
new file mode 100644
index 00000000000..ac0b79d067e
--- /dev/null
+++ b/repo-tests/codeql/cpp/ql/lib/semmle/code/cpp/ir/dataflow/internal/tainttracking2/TaintTrackingParameter.qll
@@ -0,0 +1,5 @@
+import semmle.code.cpp.ir.dataflow.internal.TaintTrackingUtil as Public
+
+module Private {
+ import semmle.code.cpp.ir.dataflow.DataFlow2::DataFlow2 as DataFlow
+}
diff --git a/repo-tests/codeql/cpp/ql/lib/semmle/code/cpp/ir/dataflow/internal/tainttracking3/TaintTrackingImpl.qll b/repo-tests/codeql/cpp/ql/lib/semmle/code/cpp/ir/dataflow/internal/tainttracking3/TaintTrackingImpl.qll
new file mode 100644
index 00000000000..f4f73b8247c
--- /dev/null
+++ b/repo-tests/codeql/cpp/ql/lib/semmle/code/cpp/ir/dataflow/internal/tainttracking3/TaintTrackingImpl.qll
@@ -0,0 +1,120 @@
+/**
+ * Provides an implementation of global (interprocedural) taint tracking.
+ * This file re-exports the local (intraprocedural) taint-tracking analysis
+ * from `TaintTrackingParameter::Public` and adds a global analysis, mainly
+ * exposed through the `Configuration` class. For some languages, this file
+ * exists in several identical copies, allowing queries to use multiple
+ * `Configuration` classes that depend on each other without introducing
+ * mutual recursion among those configurations.
+ */
+
+import TaintTrackingParameter::Public
+private import TaintTrackingParameter::Private
+
+/**
+ * A configuration of interprocedural taint tracking analysis. This defines
+ * sources, sinks, and any other configurable aspect of the analysis. Each
+ * use of the taint tracking library must define its own unique extension of
+ * this abstract class.
+ *
+ * A taint-tracking configuration is a special data flow configuration
+ * (`DataFlow::Configuration`) that allows for flow through nodes that do not
+ * necessarily preserve values but are still relevant from a taint tracking
+ * perspective. (For example, string concatenation, where one of the operands
+ * is tainted.)
+ *
+ * To create a configuration, extend this class with a subclass whose
+ * characteristic predicate is a unique singleton string. For example, write
+ *
+ * ```ql
+ * class MyAnalysisConfiguration extends TaintTracking::Configuration {
+ * MyAnalysisConfiguration() { this = "MyAnalysisConfiguration" }
+ * // Override `isSource` and `isSink`.
+ * // Optionally override `isSanitizer`.
+ * // Optionally override `isSanitizerIn`.
+ * // Optionally override `isSanitizerOut`.
+ * // Optionally override `isSanitizerGuard`.
+ * // Optionally override `isAdditionalTaintStep`.
+ * }
+ * ```
+ *
+ * Then, to query whether there is flow between some `source` and `sink`,
+ * write
+ *
+ * ```ql
+ * exists(MyAnalysisConfiguration cfg | cfg.hasFlow(source, sink))
+ * ```
+ *
+ * Multiple configurations can coexist, but it is unsupported to depend on
+ * another `TaintTracking::Configuration` or a `DataFlow::Configuration` in the
+ * overridden predicates that define sources, sinks, or additional steps.
+ * Instead, the dependency should go to a `TaintTracking2::Configuration` or a
+ * `DataFlow2::Configuration`, `DataFlow3::Configuration`, etc.
+ */
+abstract class Configuration extends DataFlow::Configuration {
+ bindingset[this]
+ Configuration() { any() }
+
+ /**
+ * Holds if `source` is a relevant taint source.
+ *
+ * The smaller this predicate is, the faster `hasFlow()` will converge.
+ */
+ // overridden to provide taint-tracking specific qldoc
+ abstract override predicate isSource(DataFlow::Node source);
+
+ /**
+ * Holds if `sink` is a relevant taint sink.
+ *
+ * The smaller this predicate is, the faster `hasFlow()` will converge.
+ */
+ // overridden to provide taint-tracking specific qldoc
+ abstract override predicate isSink(DataFlow::Node sink);
+
+ /** Holds if the node `node` is a taint sanitizer. */
+ predicate isSanitizer(DataFlow::Node node) { none() }
+
+ final override predicate isBarrier(DataFlow::Node node) {
+ isSanitizer(node) or
+ defaultTaintSanitizer(node)
+ }
+
+ /** Holds if taint propagation into `node` is prohibited. */
+ predicate isSanitizerIn(DataFlow::Node node) { none() }
+
+ final override predicate isBarrierIn(DataFlow::Node node) { isSanitizerIn(node) }
+
+ /** Holds if taint propagation out of `node` is prohibited. */
+ predicate isSanitizerOut(DataFlow::Node node) { none() }
+
+ final override predicate isBarrierOut(DataFlow::Node node) { isSanitizerOut(node) }
+
+ /** Holds if taint propagation through nodes guarded by `guard` is prohibited. */
+ predicate isSanitizerGuard(DataFlow::BarrierGuard guard) { none() }
+
+ final override predicate isBarrierGuard(DataFlow::BarrierGuard guard) { isSanitizerGuard(guard) }
+
+ /**
+ * Holds if the additional taint propagation step from `node1` to `node2`
+ * must be taken into account in the analysis.
+ */
+ predicate isAdditionalTaintStep(DataFlow::Node node1, DataFlow::Node node2) { none() }
+
+ final override predicate isAdditionalFlowStep(DataFlow::Node node1, DataFlow::Node node2) {
+ isAdditionalTaintStep(node1, node2) or
+ defaultAdditionalTaintStep(node1, node2)
+ }
+
+ override predicate allowImplicitRead(DataFlow::Node node, DataFlow::Content c) {
+ (this.isSink(node) or this.isAdditionalTaintStep(node, _)) and
+ defaultImplicitTaintRead(node, c)
+ }
+
+ /**
+ * Holds if taint may flow from `source` to `sink` for this configuration.
+ */
+ // overridden to provide taint-tracking specific qldoc
+ override predicate hasFlow(DataFlow::Node source, DataFlow::Node sink) {
+ super.hasFlow(source, sink)
+ }
+}
diff --git a/repo-tests/codeql/cpp/ql/lib/semmle/code/cpp/ir/dataflow/internal/tainttracking3/TaintTrackingParameter.qll b/repo-tests/codeql/cpp/ql/lib/semmle/code/cpp/ir/dataflow/internal/tainttracking3/TaintTrackingParameter.qll
new file mode 100644
index 00000000000..2a3b69f55cd
--- /dev/null
+++ b/repo-tests/codeql/cpp/ql/lib/semmle/code/cpp/ir/dataflow/internal/tainttracking3/TaintTrackingParameter.qll
@@ -0,0 +1,5 @@
+import semmle.code.cpp.ir.dataflow.internal.TaintTrackingUtil as Public
+
+module Private {
+ import semmle.code.cpp.ir.dataflow.DataFlow3::DataFlow3 as DataFlow
+}
diff --git a/repo-tests/codeql/cpp/ql/lib/semmle/code/cpp/ir/implementation/EdgeKind.qll b/repo-tests/codeql/cpp/ql/lib/semmle/code/cpp/ir/implementation/EdgeKind.qll
new file mode 100644
index 00000000000..32e36bb6787
--- /dev/null
+++ b/repo-tests/codeql/cpp/ql/lib/semmle/code/cpp/ir/implementation/EdgeKind.qll
@@ -0,0 +1,139 @@
+/**
+ * Provides classes that specify the conditions under which control flows along a given edge.
+ */
+
+private import internal.EdgeKindInternal
+
+private newtype TEdgeKind =
+ TGotoEdge() or // Single successor (including fall-through)
+ TTrueEdge() or // 'true' edge of conditional branch
+ TFalseEdge() or // 'false' edge of conditional branch
+ TExceptionEdge() or // Thrown exception
+ TDefaultEdge() or // 'default' label of switch
+ TCaseEdge(string minValue, string maxValue) {
+ // Case label of switch
+ Language::hasCaseEdge(minValue, maxValue)
+ }
+
+/**
+ * Represents the kind of an edge in the IR control flow graph. Each
+ * `Instruction` or `IRBlock` has at most one successor of any single
+ * `EdgeKind`.
+ */
+abstract class EdgeKind extends TEdgeKind {
+ /** Gets a textual representation of this edge kind. */
+ abstract string toString();
+}
+
+/**
+ * A "goto" edge, representing the unconditional successor of an `Instruction`
+ * or `IRBlock`.
+ */
+class GotoEdge extends EdgeKind, TGotoEdge {
+ final override string toString() { result = "Goto" }
+}
+
+/**
+ * A "true" edge, representing the successor of a conditional branch when the
+ * condition is non-zero.
+ */
+class TrueEdge extends EdgeKind, TTrueEdge {
+ final override string toString() { result = "True" }
+}
+
+/**
+ * A "false" edge, representing the successor of a conditional branch when the
+ * condition is zero.
+ */
+class FalseEdge extends EdgeKind, TFalseEdge {
+ final override string toString() { result = "False" }
+}
+
+/**
+ * An "exception" edge, representing the successor of an instruction when that
+ * instruction's evaluation throws an exception.
+ */
+class ExceptionEdge extends EdgeKind, TExceptionEdge {
+ final override string toString() { result = "Exception" }
+}
+
+/**
+ * A "default" edge, representing the successor of a `Switch` instruction when
+ * none of the case values matches the condition value.
+ */
+class DefaultEdge extends EdgeKind, TDefaultEdge {
+ final override string toString() { result = "Default" }
+}
+
+/**
+ * A "case" edge, representing the successor of a `Switch` instruction when the
+ * the condition value matches a correponding `case` label.
+ */
+class CaseEdge extends EdgeKind, TCaseEdge {
+ string minValue;
+ string maxValue;
+
+ CaseEdge() { this = TCaseEdge(minValue, maxValue) }
+
+ final override string toString() {
+ if minValue = maxValue
+ then result = "Case[" + minValue + "]"
+ else result = "Case[" + minValue + ".." + maxValue + "]"
+ }
+
+ /**
+ * Gets the smallest value of the switch expression for which control will flow along this edge.
+ */
+ final string getMinValue() { result = minValue }
+
+ /**
+ * Gets the largest value of the switch expression for which control will flow along this edge.
+ */
+ final string getMaxValue() { result = maxValue }
+}
+
+/**
+ * Predicates to access the single instance of each `EdgeKind` class.
+ */
+module EdgeKind {
+ /**
+ * Gets the single instance of the `GotoEdge` class.
+ */
+ GotoEdge gotoEdge() { result = TGotoEdge() }
+
+ /**
+ * Gets the single instance of the `TrueEdge` class.
+ */
+ TrueEdge trueEdge() { result = TTrueEdge() }
+
+ /**
+ * Gets the single instance of the `FalseEdge` class.
+ */
+ FalseEdge falseEdge() { result = TFalseEdge() }
+
+ /**
+ * Gets the single instance of the `ExceptionEdge` class.
+ */
+ ExceptionEdge exceptionEdge() { result = TExceptionEdge() }
+
+ /**
+ * Gets the single instance of the `DefaultEdge` class.
+ */
+ DefaultEdge defaultEdge() { result = TDefaultEdge() }
+
+ /**
+ * Gets the `CaseEdge` representing a `case` label with the specified lower and upper bounds.
+ * For example:
+ * ```
+ * switch (x) {
+ * case 1: // Edge kind is `caseEdge("1", "1")`
+ * return x;
+ * case 2...8: // Edge kind is `caseEdge("2", "8")`
+ * return x - 1;
+ * default: // Edge kind is `defaultEdge()`
+ * return 0;
+ * }
+ * ```
+ */
+ CaseEdge caseEdge(string minValue, string maxValue) { result = TCaseEdge(minValue, maxValue) }
+}
diff --git a/repo-tests/codeql/cpp/ql/lib/semmle/code/cpp/ir/implementation/IRConfiguration.qll b/repo-tests/codeql/cpp/ql/lib/semmle/code/cpp/ir/implementation/IRConfiguration.qll
new file mode 100644
index 00000000000..37ac2fccdd9
--- /dev/null
+++ b/repo-tests/codeql/cpp/ql/lib/semmle/code/cpp/ir/implementation/IRConfiguration.qll
@@ -0,0 +1,45 @@
+/**
+ * Module used to configure the IR generation process.
+ */
+
+private import internal.IRConfigurationInternal
+
+private newtype TIRConfiguration = MkIRConfiguration()
+
+/**
+ * The query can extend this class to control which functions have IR generated for them.
+ */
+class IRConfiguration extends TIRConfiguration {
+ /** Gets a textual representation of this element. */
+ string toString() { result = "IRConfiguration" }
+
+ /**
+ * Holds if IR should be created for function `func`. By default, holds for all functions.
+ */
+ predicate shouldCreateIRForFunction(Language::Function func) { any() }
+
+ /**
+ * Holds if the strings used as part of an IR dump should be generated for function `func`.
+ *
+ * This predicate is overridden in `PrintIR.qll` to avoid the expense of generating a large number
+ * of debug strings for IR that will not be dumped. We still generate the actual IR for these
+ * functions, however, to preserve the results of any interprocedural analysis.
+ */
+ predicate shouldEvaluateDebugStringsForFunction(Language::Function func) { any() }
+}
+
+private newtype TIREscapeAnalysisConfiguration = MkIREscapeAnalysisConfiguration()
+
+/**
+ * The query can extend this class to control what escape analysis is used when generating SSA.
+ */
+class IREscapeAnalysisConfiguration extends TIREscapeAnalysisConfiguration {
+ /** Gets a textual representation of this element. */
+ string toString() { result = "IREscapeAnalysisConfiguration" }
+
+ /**
+ * Holds if the escape analysis done by SSA construction should be sound. By default, the SSA is
+ * built assuming that no variable's address ever escapes.
+ */
+ predicate useSoundEscapeAnalysis() { none() }
+}
diff --git a/repo-tests/codeql/cpp/ql/lib/semmle/code/cpp/ir/implementation/IRType.qll b/repo-tests/codeql/cpp/ql/lib/semmle/code/cpp/ir/implementation/IRType.qll
new file mode 100644
index 00000000000..e0bccafae6b
--- /dev/null
+++ b/repo-tests/codeql/cpp/ql/lib/semmle/code/cpp/ir/implementation/IRType.qll
@@ -0,0 +1,349 @@
+/**
+ * Minimal, language-neutral type system for the IR.
+ */
+
+private import internal.IRTypeInternal
+
+cached
+private newtype TIRType =
+ TIRVoidType() or
+ TIRUnknownType() or
+ TIRErrorType() { Language::hasErrorType() } or
+ TIRBooleanType(int byteSize) { Language::hasBooleanType(byteSize) } or
+ TIRSignedIntegerType(int byteSize) { Language::hasSignedIntegerType(byteSize) } or
+ TIRUnsignedIntegerType(int byteSize) { Language::hasUnsignedIntegerType(byteSize) } or
+ TIRFloatingPointType(int byteSize, int base, Language::TypeDomain domain) {
+ Language::hasFloatingPointType(byteSize, base, domain)
+ } or
+ TIRAddressType(int byteSize) { Language::hasAddressType(byteSize) } or
+ TIRFunctionAddressType(int byteSize) { Language::hasFunctionAddressType(byteSize) } or
+ TIROpaqueType(Language::OpaqueTypeTag tag, int byteSize) {
+ Language::hasOpaqueType(tag, byteSize)
+ }
+
+/**
+ * The language-neutral type of an IR `Instruction`, `Operand`, or `IRVariable`.
+ * The interface to `IRType` and its subclasses is the same across all languages for which the IR
+ * is supported, so analyses that expect to be used for multiple languages should generally use
+ * `IRType` rather than a language-specific type.
+ *
+ * Many types from the language-specific type system will map to a single canonical `IRType`. Two
+ * types that map to the same `IRType` are considered equivalent by the IR. As an example, in C++,
+ * all pointer types map to the same instance of `IRAddressType`.
+ */
+class IRType extends TIRType {
+ /** Gets a textual representation of this type. */
+ string toString() { none() }
+
+ /**
+ * Gets a string that uniquely identifies this `IRType`. This string is often the same as the
+ * result of `IRType.toString()`, but for some types it may be more verbose to ensure uniqueness.
+ */
+ string getIdentityString() { result = toString() }
+
+ /**
+ * Gets the size of the type, in bytes, if known.
+ *
+ * This will hold for all `IRType` objects except `IRUnknownType`.
+ */
+ // This predicate is overridden with `pragma[noinline]` in every leaf subclass.
+ // This allows callers to ask for things like _the_ floating-point type of
+ // size 4 without getting a join that first finds all types of size 4 and
+ // _then_ restricts them to floating-point types.
+ int getByteSize() { none() }
+
+ /**
+ * Gets a single instance of `LanguageType` that maps to this `IRType`.
+ */
+ Language::LanguageType getCanonicalLanguageType() { none() }
+}
+
+/**
+ * An unknown type. Generally used to represent results and operands that access an unknown set of
+ * memory locations, such as the side effects of a function call.
+ */
+class IRUnknownType extends IRType, TIRUnknownType {
+ final override string toString() { result = "unknown" }
+
+ final override int getByteSize() { none() }
+
+ final override Language::LanguageType getCanonicalLanguageType() {
+ result = Language::getCanonicalUnknownType()
+ }
+}
+
+/**
+ * A void type, which has no values. Used to represent the result type of an instruction that does
+ * not produce a result.
+ */
+class IRVoidType extends IRType, TIRVoidType {
+ final override string toString() { result = "void" }
+
+ final override int getByteSize() { result = 0 }
+
+ final override Language::LanguageType getCanonicalLanguageType() {
+ result = Language::getCanonicalVoidType()
+ }
+}
+
+/**
+ * An error type. Used when an error in the source code prevents the extractor from determining the
+ * proper type.
+ */
+class IRErrorType extends IRType, TIRErrorType {
+ final override string toString() { result = "error" }
+
+ final override int getByteSize() { result = 0 }
+
+ final override Language::LanguageType getCanonicalLanguageType() {
+ result = Language::getCanonicalErrorType()
+ }
+}
+
+private class IRSizedType extends IRType {
+ int byteSize;
+
+ IRSizedType() {
+ this = TIRBooleanType(byteSize) or
+ this = TIRSignedIntegerType(byteSize) or
+ this = TIRUnsignedIntegerType(byteSize) or
+ this = TIRFloatingPointType(byteSize, _, _) or
+ this = TIRAddressType(byteSize) or
+ this = TIRFunctionAddressType(byteSize) or
+ this = TIROpaqueType(_, byteSize)
+ }
+ // Don't override `getByteSize()` here. The optimizer seems to generate better code when this is
+ // overridden only in the leaf classes.
+}
+
+/**
+ * A Boolean type, which can hold the values `true` (non-zero) or `false` (zero).
+ */
+class IRBooleanType extends IRSizedType, TIRBooleanType {
+ final override string toString() { result = "bool" + byteSize.toString() }
+
+ final override Language::LanguageType getCanonicalLanguageType() {
+ result = Language::getCanonicalBooleanType(byteSize)
+ }
+
+ pragma[noinline]
+ final override int getByteSize() { result = byteSize }
+}
+
+/**
+ * A numeric type. This includes `IRSignedIntegerType`, `IRUnsignedIntegerType`, and
+ * `IRFloatingPointType`.
+ */
+class IRNumericType extends IRSizedType {
+ IRNumericType() {
+ this = TIRSignedIntegerType(byteSize) or
+ this = TIRUnsignedIntegerType(byteSize) or
+ this = TIRFloatingPointType(byteSize, _, _)
+ }
+ // Don't override `getByteSize()` here. The optimizer seems to generate better code when this is
+ // overridden only in the leaf classes.
+}
+
+/**
+ * An integer type. This includes `IRSignedIntegerType` and `IRUnsignedIntegerType`.
+ */
+class IRIntegerType extends IRNumericType {
+ IRIntegerType() {
+ this = TIRSignedIntegerType(byteSize) or
+ this = TIRUnsignedIntegerType(byteSize)
+ }
+
+ /** Holds if this integer type is signed. */
+ predicate isSigned() { none() }
+
+ /** Holds if this integer type is unsigned. */
+ predicate isUnsigned() { none() }
+ // Don't override `getByteSize()` here. The optimizer seems to generate better code when this is
+ // overridden only in the leaf classes.
+}
+
+/**
+ * A signed two's-complement integer. Also used to represent enums whose underlying type is a signed
+ * integer, as well as character types whose representation is signed.
+ */
+class IRSignedIntegerType extends IRIntegerType, TIRSignedIntegerType {
+ final override string toString() { result = "int" + byteSize.toString() }
+
+ final override Language::LanguageType getCanonicalLanguageType() {
+ result = Language::getCanonicalSignedIntegerType(byteSize)
+ }
+
+ pragma[noinline]
+ final override int getByteSize() { result = byteSize }
+
+ override predicate isSigned() { any() }
+}
+
+/**
+ * An unsigned two's-complement integer. Also used to represent enums whose underlying type is an
+ * unsigned integer, as well as character types whose representation is unsigned.
+ */
+class IRUnsignedIntegerType extends IRIntegerType, TIRUnsignedIntegerType {
+ final override string toString() { result = "uint" + byteSize.toString() }
+
+ final override Language::LanguageType getCanonicalLanguageType() {
+ result = Language::getCanonicalUnsignedIntegerType(byteSize)
+ }
+
+ pragma[noinline]
+ final override int getByteSize() { result = byteSize }
+
+ override predicate isUnsigned() { any() }
+}
+
+/**
+ * A floating-point type.
+ */
+class IRFloatingPointType extends IRNumericType, TIRFloatingPointType {
+ final private int base;
+ final private Language::TypeDomain domain;
+
+ IRFloatingPointType() { this = TIRFloatingPointType(_, base, domain) }
+
+ final override string toString() {
+ result = getDomainPrefix() + getBaseString() + byteSize.toString()
+ }
+
+ final override Language::LanguageType getCanonicalLanguageType() {
+ result = Language::getCanonicalFloatingPointType(byteSize, base, domain)
+ }
+
+ pragma[noinline]
+ final override int getByteSize() { result = byteSize }
+
+ /** Gets the numeric base of the type. Can be either 2 (binary) or 10 (decimal). */
+ final int getBase() { result = base }
+
+ /**
+ * Gets the type domain of the type. Can be `RealDomain`, `ComplexDomain`, or `ImaginaryDomain`.
+ */
+ final Language::TypeDomain getDomain() { result = domain }
+
+ private string getBaseString() {
+ base = 2 and result = "float"
+ or
+ base = 10 and result = "decimal"
+ }
+
+ private string getDomainPrefix() {
+ domain instanceof Language::RealDomain and result = ""
+ or
+ domain instanceof Language::ComplexDomain and result = "c"
+ or
+ domain instanceof Language::ImaginaryDomain and result = "i"
+ }
+}
+
+/**
+ * An address type, representing the memory address of data. Used to represent pointers, references,
+ * and lvalues, include those that are garbage collected.
+ *
+ * The address of a function is represented by the separate `IRFunctionAddressType`.
+ */
+class IRAddressType extends IRSizedType, TIRAddressType {
+ final override string toString() { result = "addr" + byteSize.toString() }
+
+ final override Language::LanguageType getCanonicalLanguageType() {
+ result = Language::getCanonicalAddressType(byteSize)
+ }
+
+ pragma[noinline]
+ final override int getByteSize() { result = byteSize }
+}
+
+/**
+ * An address type, representing the memory address of code. Used to represent function pointers,
+ * function references, and the target of a direct function call.
+ */
+class IRFunctionAddressType extends IRSizedType, TIRFunctionAddressType {
+ final override string toString() { result = "func" + byteSize.toString() }
+
+ final override Language::LanguageType getCanonicalLanguageType() {
+ result = Language::getCanonicalFunctionAddressType(byteSize)
+ }
+
+ pragma[noinline]
+ final override int getByteSize() { result = byteSize }
+}
+
+/**
+ * A type with known size that does not fit any of the other kinds of type. Used to represent
+ * classes, structs, unions, fixed-size arrays, pointers-to-member, and more.
+ */
+class IROpaqueType extends IRSizedType, TIROpaqueType {
+ Language::OpaqueTypeTag tag;
+
+ IROpaqueType() { this = TIROpaqueType(tag, byteSize) }
+
+ final override string toString() {
+ result = "opaque" + byteSize.toString() + "{" + tag.toString() + "}"
+ }
+
+ final override string getIdentityString() {
+ result = "opaque" + byteSize.toString() + "{" + Language::getOpaqueTagIdentityString(tag) + "}"
+ }
+
+ final override Language::LanguageType getCanonicalLanguageType() {
+ result = Language::getCanonicalOpaqueType(tag, byteSize)
+ }
+
+ /**
+ * Gets the "tag" that differentiates this type from other incompatible opaque types that have the
+ * same size.
+ */
+ final Language::OpaqueTypeTag getTag() { result = tag }
+
+ pragma[noinline]
+ final override int getByteSize() { result = byteSize }
+}
+
+/**
+ * INTERNAL: Do not use.
+ * Query predicates used to check invariants that should hold for all `IRType` objects. To run all
+ * consistency queries for the IR, including the ones below, run
+ * "semmle/code/cpp/IR/IRConsistency.ql".
+ */
+module IRTypeConsistency {
+ /**
+ * Holds if the type has no result for `IRType.getCanonicalLanguageType()`.
+ */
+ query predicate missingCanonicalLanguageType(IRType type, string message) {
+ not exists(type.getCanonicalLanguageType()) and
+ message = "Type does not have a canonical `LanguageType`"
+ }
+
+ /**
+ * Holds if the type has more than one result for `IRType.getCanonicalLanguageType()`.
+ */
+ query predicate multipleCanonicalLanguageTypes(IRType type, string message) {
+ strictcount(type.getCanonicalLanguageType()) > 1 and
+ message =
+ "Type has multiple canonical `LanguageType`s: " +
+ concat(type.getCanonicalLanguageType().toString(), ", ")
+ }
+
+ /**
+ * Holds if the type has no result for `LanguageType.getIRType()`.
+ */
+ query predicate missingIRType(Language::LanguageType type, string message) {
+ not exists(type.getIRType()) and
+ message = "`LanguageType` does not have a corresponding `IRType`."
+ }
+
+ /**
+ * Holds if the type has more than one result for `LanguageType.getIRType()`.
+ */
+ query predicate multipleIRTypes(Language::LanguageType type, string message) {
+ strictcount(type.getIRType()) > 1 and
+ message =
+ "`LanguageType` " + type + " has multiple `IRType`s: " +
+ concat(type.getIRType().toString(), ", ")
+ }
+
+ import Language::LanguageTypeConsistency
+}
diff --git a/repo-tests/codeql/cpp/ql/lib/semmle/code/cpp/ir/implementation/MemoryAccessKind.qll b/repo-tests/codeql/cpp/ql/lib/semmle/code/cpp/ir/implementation/MemoryAccessKind.qll
new file mode 100644
index 00000000000..5e11a310e2f
--- /dev/null
+++ b/repo-tests/codeql/cpp/ql/lib/semmle/code/cpp/ir/implementation/MemoryAccessKind.qll
@@ -0,0 +1,101 @@
+/**
+ * Provides classes that describe how a particular `Instruction` or its operands access memory.
+ */
+
+private import IRConfiguration
+
+private newtype TMemoryAccessKind =
+ TIndirectMemoryAccess() or
+ TBufferMemoryAccess() or
+ TEntireAllocationMemoryAccess() or
+ TEscapedMemoryAccess() or
+ TNonLocalMemoryAccess() or
+ TPhiMemoryAccess() or
+ TUnmodeledMemoryAccess() or
+ TChiTotalMemoryAccess() or
+ TChiPartialMemoryAccess()
+
+/**
+ * Describes the set of memory locations memory accessed by a memory operand or
+ * memory result.
+ */
+class MemoryAccessKind extends TMemoryAccessKind {
+ /** Gets a textual representation of this access kind. */
+ string toString() { none() }
+
+ /**
+ * Holds if the operand or result accesses memory pointed to by the `AddressOperand` on the
+ * same instruction.
+ */
+ predicate usesAddressOperand() { none() }
+}
+
+/**
+ * The operand or result accesses memory at the address specified by the `AddressOperand` on the
+ * same instruction.
+ */
+class IndirectMemoryAccess extends MemoryAccessKind, TIndirectMemoryAccess {
+ override string toString() { result = "indirect" }
+
+ final override predicate usesAddressOperand() { any() }
+}
+
+/**
+ * The operand or result accesses memory starting at the address specified by the `AddressOperand`
+ * on the same instruction, accessing a number of consecutive elements given by the
+ * `BufferSizeOperand`.
+ */
+class BufferMemoryAccess extends MemoryAccessKind, TBufferMemoryAccess {
+ override string toString() { result = "buffer" }
+
+ final override predicate usesAddressOperand() { any() }
+}
+
+/**
+ * The operand or results accesses all memory in the contiguous allocation that contains the address
+ * specified by the `AddressOperand` on the same instruction.
+ */
+class EntireAllocationMemoryAccess extends MemoryAccessKind, TEntireAllocationMemoryAccess {
+ override string toString() { result = "alloc" }
+
+ final override predicate usesAddressOperand() { any() }
+}
+
+/**
+ * The operand or result accesses all memory whose address has escaped.
+ */
+class EscapedMemoryAccess extends MemoryAccessKind, TEscapedMemoryAccess {
+ override string toString() { result = "escaped" }
+}
+
+/**
+ * The operand or result access all memory whose address has escaped, other than data on the stack
+ * frame of the current function.
+ */
+class NonLocalMemoryAccess extends MemoryAccessKind, TNonLocalMemoryAccess {
+ override string toString() { result = "nonlocal" }
+}
+
+/**
+ * The operand is a Phi operand, which accesses the same memory as its
+ * definition.
+ */
+class PhiMemoryAccess extends MemoryAccessKind, TPhiMemoryAccess {
+ override string toString() { result = "phi" }
+}
+
+/**
+ * The operand is a ChiTotal operand, which accesses the same memory as its
+ * definition.
+ */
+class ChiTotalMemoryAccess extends MemoryAccessKind, TChiTotalMemoryAccess {
+ override string toString() { result = "chi(total)" }
+}
+
+/**
+ * The operand is a ChiPartial operand, which accesses the same memory as its
+ * definition.
+ */
+class ChiPartialMemoryAccess extends MemoryAccessKind, TChiPartialMemoryAccess {
+ override string toString() { result = "chi(partial)" }
+}
diff --git a/repo-tests/codeql/cpp/ql/lib/semmle/code/cpp/ir/implementation/Opcode.qll b/repo-tests/codeql/cpp/ql/lib/semmle/code/cpp/ir/implementation/Opcode.qll
new file mode 100644
index 00000000000..c4134d240ab
--- /dev/null
+++ b/repo-tests/codeql/cpp/ql/lib/semmle/code/cpp/ir/implementation/Opcode.qll
@@ -0,0 +1,1239 @@
+/**
+ * Provides `Opcode`s that specify the operation performed by an `Instruction`, as well as metadata
+ * about those opcodes, such as operand kinds and memory accesses.
+ */
+
+private import internal.OpcodeImports as Imports
+private import internal.OperandTag
+import Imports::MemoryAccessKind
+
+private newtype TOpcode =
+ TNoOp() or
+ TUninitialized() or
+ TError() or
+ TInitializeParameter() or
+ TInitializeIndirection() or
+ TInitializeThis() or
+ TEnterFunction() or
+ TExitFunction() or
+ TReturnValue() or
+ TReturnVoid() or
+ TReturnIndirection() or
+ TCopyValue() or
+ TLoad() or
+ TStore() or
+ TAdd() or
+ TSub() or
+ TMul() or
+ TDiv() or
+ TRem() or
+ TNegate() or
+ TShiftLeft() or
+ TShiftRight() or
+ TBitAnd() or
+ TBitOr() or
+ TBitXor() or
+ TBitComplement() or
+ TLogicalNot() or
+ TCompareEQ() or
+ TCompareNE() or
+ TCompareLT() or
+ TCompareGT() or
+ TCompareLE() or
+ TCompareGE() or
+ TPointerAdd() or
+ TPointerSub() or
+ TPointerDiff() or
+ TConvert() or
+ TConvertToNonVirtualBase() or
+ TConvertToVirtualBase() or
+ TConvertToDerived() or
+ TCheckedConvertOrNull() or
+ TCheckedConvertOrThrow() or
+ TCompleteObjectAddress() or
+ TVariableAddress() or
+ TFieldAddress() or
+ TFunctionAddress() or
+ TElementsAddress() or
+ TConstant() or
+ TStringConstant() or
+ TConditionalBranch() or
+ TSwitch() or
+ TCall() or
+ TCatchByType() or
+ TCatchAny() or
+ TThrowValue() or
+ TReThrow() or
+ TUnwind() or
+ TAliasedDefinition() or
+ TInitializeNonLocal() or
+ TAliasedUse() or
+ TPhi() or
+ TBuiltIn() or
+ TVarArgsStart() or
+ TVarArgsEnd() or
+ TVarArg() or
+ TNextVarArg() or
+ TCallSideEffect() or
+ TCallReadSideEffect() or
+ TIndirectReadSideEffect() or
+ TIndirectMustWriteSideEffect() or
+ TIndirectMayWriteSideEffect() or
+ TBufferReadSideEffect() or
+ TBufferMustWriteSideEffect() or
+ TBufferMayWriteSideEffect() or
+ TSizedBufferReadSideEffect() or
+ TSizedBufferMustWriteSideEffect() or
+ TSizedBufferMayWriteSideEffect() or
+ TInitializeDynamicAllocation() or
+ TChi() or
+ TInlineAsm() or
+ TUnreached() or
+ TNewObj()
+
+/**
+ * An opcode that specifies the operation performed by an `Instruction`.
+ */
+class Opcode extends TOpcode {
+ /** Gets a textual representation of this element. */
+ string toString() { result = "UnknownOpcode" }
+
+ /**
+ * Gets the kind of memory access performed by this instruction's result.
+ * Holds only for opcodes with a memory result.
+ */
+ MemoryAccessKind getWriteMemoryAccess() { none() }
+
+ /**
+ * Gets the kind of memory access performed by this instruction's `MemoryOperand`. Holds only for
+ * opcodes that read from memory.
+ */
+ MemoryAccessKind getReadMemoryAccess() { none() }
+
+ /**
+ * Holds if the instruction has an `AddressOperand`.
+ */
+ predicate hasAddressOperand() { none() }
+
+ /**
+ * Holds if the instruction has a `BufferSizeOperand`.
+ */
+ predicate hasBufferSizeOperand() { none() }
+
+ /**
+ * Holds if the instruction's write memory access is a `may` write, as opposed to a `must` write.
+ */
+ predicate hasMayWriteMemoryAccess() { none() }
+
+ /**
+ * Holds if the instruction's read memory access is a `may` read, as opposed to a `must` read.
+ */
+ predicate hasMayReadMemoryAccess() { none() }
+
+ /**
+ * Holds if the instruction must have an operand with the specified `OperandTag`.
+ */
+ final predicate hasOperand(OperandTag tag) {
+ hasOperandInternal(tag)
+ or
+ hasAddressOperand() and tag instanceof AddressOperandTag
+ or
+ hasBufferSizeOperand() and tag instanceof BufferSizeOperandTag
+ }
+
+ /**
+ * Holds if the instruction must have an operand with the specified `OperandTag`, ignoring
+ * `AddressOperandTag` and `BufferSizeOperandTag`.
+ */
+ predicate hasOperandInternal(OperandTag tag) { none() }
+}
+
+/**
+ * The `Opcode` for a `UnaryInstruction`.
+ *
+ * See the `UnaryInstruction` documentation for more details.
+ */
+abstract class UnaryOpcode extends Opcode {
+ final override predicate hasOperandInternal(OperandTag tag) { tag instanceof UnaryOperandTag }
+}
+
+/**
+ * The `Opcode` for a `BinaryInstruction`.
+ *
+ * See the `BinaryInstruction` documentation for more details.
+ */
+abstract class BinaryOpcode extends Opcode {
+ final override predicate hasOperandInternal(OperandTag tag) {
+ tag instanceof LeftOperandTag or
+ tag instanceof RightOperandTag
+ }
+}
+
+/**
+ * The `Opcode` for a `PointerArithmeticInstruction`.
+ *
+ * See the `PointerArithmeticInstruction` documentation for more details.
+ */
+abstract class PointerArithmeticOpcode extends BinaryOpcode { }
+
+/**
+ * The `Opcode` for a `PointerOffsetInstruction`.
+ *
+ * See the `PointerOffsetInstruction` documentation for more details.
+ */
+abstract class PointerOffsetOpcode extends PointerArithmeticOpcode { }
+
+/**
+ * The `Opcode` for an `ArithmeticInstruction`.
+ *
+ * See the `ArithmeticInstruction` documentation for more details.
+ */
+abstract class ArithmeticOpcode extends Opcode { }
+
+/**
+ * The `Opcode` for a `BinaryArithmeticInstruction`.
+ *
+ * See the `BinaryArithmeticInstruction` documentation for more details.
+ */
+abstract class BinaryArithmeticOpcode extends BinaryOpcode, ArithmeticOpcode { }
+
+/**
+ * The `Opcode` for a `UnaryArithmeticInstruction`.
+ *
+ * See the `UnaryArithmeticInstruction` documentation for more details.
+ */
+abstract class UnaryArithmeticOpcode extends UnaryOpcode, ArithmeticOpcode { }
+
+/**
+ * The `Opcode` for a `BitwiseInstruction`.
+ *
+ * See the `BitwiseInstruction` documentation for more details.
+ */
+abstract class BitwiseOpcode extends Opcode { }
+
+/**
+ * The `Opcode` for a `BinaryBitwiseInstruction`.
+ *
+ * See the `BinaryBitwiseInstruction` documentation for more details.
+ */
+abstract class BinaryBitwiseOpcode extends BinaryOpcode, BitwiseOpcode { }
+
+/**
+ * The `Opcode` for a `UnaryBitwiseInstruction`.
+ *
+ * See the `UnaryBitwiseInstruction` documentation for more details.
+ */
+abstract class UnaryBitwiseOpcode extends UnaryOpcode, BitwiseOpcode { }
+
+/**
+ * The `Opcode` for a `CompareInstruction`.
+ *
+ * See the `CompareInstruction` documentation for more details.
+ */
+abstract class CompareOpcode extends BinaryOpcode { }
+
+/**
+ * The `Opcode` for a `RelationalInstruction`.
+ *
+ * See the `RelationalInstruction` documentation for more details.
+ */
+abstract class RelationalOpcode extends CompareOpcode { }
+
+/**
+ * The `Opcode` for a `CopyInstruction`.
+ *
+ * See the `CopyInstruction` documentation for more details.
+ */
+abstract class CopyOpcode extends Opcode { }
+
+/**
+ * The `Opcode` for a `ConvertToBaseInstruction`.
+ *
+ * See the `ConvertToBaseInstruction` documentation for more details.
+ */
+abstract class ConvertToBaseOpcode extends UnaryOpcode { }
+
+/**
+ * The `Opcode` for a `ReturnInstruction`.
+ *
+ * See the `ReturnInstruction` documentation for more details.
+ */
+abstract class ReturnOpcode extends Opcode { }
+
+/**
+ * The `Opcode` for a `ThrowInstruction`.
+ *
+ * See the `ThrowInstruction` documentation for more details.
+ */
+abstract class ThrowOpcode extends Opcode { }
+
+/**
+ * The `Opcode` for a `CatchInstruction`.
+ *
+ * See the `CatchInstruction` documentation for more details.
+ */
+abstract class CatchOpcode extends Opcode { }
+
+abstract private class OpcodeWithCondition extends Opcode {
+ final override predicate hasOperandInternal(OperandTag tag) { tag instanceof ConditionOperandTag }
+}
+
+/**
+ * The `Opcode` for a `BuiltInOperationInstruction`.
+ *
+ * See the `BuiltInOperationInstruction` documentation for more details.
+ */
+abstract class BuiltInOperationOpcode extends Opcode { }
+
+/**
+ * The `Opcode` for a `SideEffectInstruction`.
+ *
+ * See the `SideEffectInstruction` documentation for more details.
+ */
+abstract class SideEffectOpcode extends Opcode { }
+
+/**
+ * An opcode that accesses a single memory location via an `AddressOperand`.
+ */
+abstract class IndirectMemoryAccessOpcode extends Opcode {
+ final override predicate hasAddressOperand() { any() }
+}
+
+/**
+ * An opcode that writes to a single memory location via an `AddressOperand`.
+ */
+abstract class IndirectWriteOpcode extends IndirectMemoryAccessOpcode {
+ final override MemoryAccessKind getWriteMemoryAccess() { result instanceof IndirectMemoryAccess }
+}
+
+/**
+ * An opcode that reads from a single memory location via an `AddressOperand`.
+ */
+abstract class IndirectReadOpcode extends IndirectMemoryAccessOpcode {
+ final override MemoryAccessKind getReadMemoryAccess() { result instanceof IndirectMemoryAccess }
+}
+
+/**
+ * An opcode that accesses a memory buffer.
+ */
+abstract class BufferAccessOpcode extends Opcode {
+ final override predicate hasAddressOperand() { any() }
+}
+
+/**
+ * An opcode that accesses a memory buffer of unknown size.
+ */
+abstract class UnsizedBufferAccessOpcode extends BufferAccessOpcode { }
+
+/**
+ * An opcode that writes to a memory buffer of unknown size.
+ */
+abstract class UnsizedBufferWriteOpcode extends UnsizedBufferAccessOpcode {
+ final override MemoryAccessKind getWriteMemoryAccess() { result instanceof BufferMemoryAccess }
+}
+
+/**
+ * An opcode that reads from a memory buffer of unknown size.
+ */
+abstract class UnsizedBufferReadOpcode extends UnsizedBufferAccessOpcode {
+ final override MemoryAccessKind getReadMemoryAccess() { result instanceof BufferMemoryAccess }
+}
+
+/**
+ * An opcode that access an entire memory allocation.
+ */
+abstract class EntireAllocationAccessOpcode extends Opcode {
+ final override predicate hasAddressOperand() { any() }
+}
+
+/**
+ * An opcode that write to an entire memory allocation.
+ */
+abstract class EntireAllocationWriteOpcode extends EntireAllocationAccessOpcode {
+ final override MemoryAccessKind getWriteMemoryAccess() {
+ result instanceof EntireAllocationMemoryAccess
+ }
+}
+
+/**
+ * An opcode that reads from an entire memory allocation.
+ */
+abstract class EntireAllocationReadOpcode extends EntireAllocationAccessOpcode {
+ final override MemoryAccessKind getReadMemoryAccess() {
+ result instanceof EntireAllocationMemoryAccess
+ }
+}
+
+/**
+ * An opcode that accesses a memory buffer whose size is determined by a `BufferSizeOperand`.
+ */
+abstract class SizedBufferAccessOpcode extends BufferAccessOpcode {
+ final override predicate hasBufferSizeOperand() { any() }
+}
+
+/**
+ * An opcode that writes to a memory buffer whose size is determined by a `BufferSizeOperand`.
+ */
+abstract class SizedBufferWriteOpcode extends SizedBufferAccessOpcode {
+ final override MemoryAccessKind getWriteMemoryAccess() {
+ result instanceof BufferMemoryAccess //TODO: SizedBufferMemoryAccess
+ }
+}
+
+/**
+ * An opcode that reads from a memory buffer whose size is determined by a `BufferSizeOperand`.
+ */
+abstract class SizedBufferReadOpcode extends SizedBufferAccessOpcode {
+ final override MemoryAccessKind getReadMemoryAccess() {
+ result instanceof BufferMemoryAccess //TODO: SizedBufferMemoryAccess
+ }
+}
+
+/**
+ * An opcode that might write to any escaped memory location.
+ */
+abstract class EscapedWriteOpcode extends Opcode {
+ final override MemoryAccessKind getWriteMemoryAccess() { result instanceof EscapedMemoryAccess }
+}
+
+/**
+ * An opcode that might read from any escaped memory location.
+ */
+abstract class EscapedReadOpcode extends Opcode {
+ final override MemoryAccessKind getReadMemoryAccess() { result instanceof EscapedMemoryAccess }
+}
+
+/**
+ * An opcode whose write memory access is a `may` write, as opposed to a `must` write.
+ */
+abstract class MayWriteOpcode extends Opcode {
+ final override predicate hasMayWriteMemoryAccess() { any() }
+}
+
+/**
+ * An opcode whose read memory access is a `may` read, as opposed to a `must` read.
+ */
+abstract class MayReadOpcode extends Opcode {
+ final override predicate hasMayReadMemoryAccess() { any() }
+}
+
+/**
+ * An opcode that reads a value from memory.
+ */
+abstract class OpcodeWithLoad extends IndirectReadOpcode {
+ final override predicate hasOperandInternal(OperandTag tag) { tag instanceof LoadOperandTag }
+}
+
+/**
+ * The `Opcode` for a `ReadSideEffectInstruction`.
+ *
+ * See the `ReadSideEffectInstruction` documentation for more details.
+ */
+abstract class ReadSideEffectOpcode extends SideEffectOpcode {
+ final override predicate hasOperandInternal(OperandTag tag) {
+ tag instanceof SideEffectOperandTag
+ }
+}
+
+/**
+ * The `Opcode` for a `WriteSideEffectInstruction`.
+ *
+ * See the `WriteSideEffectInstruction` documentation for more details.
+ */
+abstract class WriteSideEffectOpcode extends SideEffectOpcode { }
+
+/**
+ * Provides `Opcode`s that specify the operation performed by an `Instruction`.
+ */
+module Opcode {
+ /**
+ * The `Opcode` for a `NoOpInstruction`.
+ *
+ * See the `NoOpInstruction` documentation for more details.
+ */
+ class NoOp extends Opcode, TNoOp {
+ final override string toString() { result = "NoOp" }
+ }
+
+ /**
+ * The `Opcode` for an `UninitializedInstruction`.
+ *
+ * See the `UninitializedInstruction` documentation for more details.
+ */
+ class Uninitialized extends IndirectWriteOpcode, TUninitialized {
+ final override string toString() { result = "Uninitialized" }
+ }
+
+ /**
+ * The `Opcode` for an `ErrorInstruction`.
+ *
+ * See the `ErrorInstruction` documentation for more details.
+ */
+ class Error extends Opcode, TError {
+ final override string toString() { result = "Error" }
+ }
+
+ /**
+ * The `Opcode` for an `InitializeParameterInstruction`.
+ *
+ * See the `InitializeParameterInstruction` documentation for more details.
+ */
+ class InitializeParameter extends IndirectWriteOpcode, TInitializeParameter {
+ final override string toString() { result = "InitializeParameter" }
+ }
+
+ /**
+ * The `Opcode` for an `InitializeIndirectionInstruction`.
+ *
+ * See the `InitializeIndirectionInstruction` documentation for more details.
+ */
+ class InitializeIndirection extends EntireAllocationWriteOpcode, TInitializeIndirection {
+ final override string toString() { result = "InitializeIndirection" }
+ }
+
+ /**
+ * The `Opcode` for an `InitializeThisInstruction`.
+ *
+ * See the `InitializeThisInstruction` documentation for more details.
+ */
+ class InitializeThis extends Opcode, TInitializeThis {
+ final override string toString() { result = "InitializeThis" }
+ }
+
+ /**
+ * The `Opcode` for an `EnterFunctionInstruction`.
+ *
+ * See the `EnterFunctionInstruction` documentation for more details.
+ */
+ class EnterFunction extends Opcode, TEnterFunction {
+ final override string toString() { result = "EnterFunction" }
+ }
+
+ /**
+ * The `Opcode` for an `ExitFunctionInstruction`.
+ *
+ * See the `ExitFunctionInstruction` documentation for more details.
+ */
+ class ExitFunction extends Opcode, TExitFunction {
+ final override string toString() { result = "ExitFunction" }
+ }
+
+ /**
+ * The `Opcode` for a `ReturnValueInstruction`.
+ *
+ * See the `ReturnValueInstruction` documentation for more details.
+ */
+ class ReturnValue extends ReturnOpcode, OpcodeWithLoad, TReturnValue {
+ final override string toString() { result = "ReturnValue" }
+ }
+
+ /**
+ * The `Opcode` for a `ReturnVoidInstruction`.
+ *
+ * See the `ReturnVoidInstruction` documentation for more details.
+ */
+ class ReturnVoid extends ReturnOpcode, TReturnVoid {
+ final override string toString() { result = "ReturnVoid" }
+ }
+
+ /**
+ * The `Opcode` for a `ReturnIndirectionInstruction`.
+ *
+ * See the `ReturnIndirectionInstruction` documentation for more details.
+ */
+ class ReturnIndirection extends EntireAllocationReadOpcode, TReturnIndirection {
+ final override string toString() { result = "ReturnIndirection" }
+
+ final override predicate hasOperandInternal(OperandTag tag) {
+ tag instanceof SideEffectOperandTag
+ }
+ }
+
+ /**
+ * The `Opcode` for a `CopyValueInstruction`.
+ *
+ * See the `CopyValueInstruction` documentation for more details.
+ */
+ class CopyValue extends UnaryOpcode, CopyOpcode, TCopyValue {
+ final override string toString() { result = "CopyValue" }
+ }
+
+ /**
+ * The `Opcode` for a `LoadInstruction`.
+ *
+ * See the `LoadInstruction` documentation for more details.
+ */
+ class Load extends CopyOpcode, OpcodeWithLoad, TLoad {
+ final override string toString() { result = "Load" }
+ }
+
+ /**
+ * The `Opcode` for a `StoreInstruction`.
+ *
+ * See the `StoreInstruction` documentation for more details.
+ */
+ class Store extends CopyOpcode, IndirectWriteOpcode, TStore {
+ final override string toString() { result = "Store" }
+
+ final override predicate hasOperandInternal(OperandTag tag) {
+ tag instanceof StoreValueOperandTag
+ }
+ }
+
+ /**
+ * The `Opcode` for an `AddInstruction`.
+ *
+ * See the `AddInstruction` documentation for more details.
+ */
+ class Add extends BinaryArithmeticOpcode, TAdd {
+ final override string toString() { result = "Add" }
+ }
+
+ /**
+ * The `Opcode` for a `SubInstruction`.
+ *
+ * See the `SubInstruction` documentation for more details.
+ */
+ class Sub extends BinaryArithmeticOpcode, TSub {
+ final override string toString() { result = "Sub" }
+ }
+
+ /**
+ * The `Opcode` for a `MulInstruction`.
+ *
+ * See the `MulInstruction` documentation for more details.
+ */
+ class Mul extends BinaryArithmeticOpcode, TMul {
+ final override string toString() { result = "Mul" }
+ }
+
+ /**
+ * The `Opcode` for a `DivInstruction`.
+ *
+ * See the `DivInstruction` documentation for more details.
+ */
+ class Div extends BinaryArithmeticOpcode, TDiv {
+ final override string toString() { result = "Div" }
+ }
+
+ /**
+ * The `Opcode` for a `RemInstruction`.
+ *
+ * See the `RemInstruction` documentation for more details.
+ */
+ class Rem extends BinaryArithmeticOpcode, TRem {
+ final override string toString() { result = "Rem" }
+ }
+
+ /**
+ * The `Opcode` for a `NegateInstruction`.
+ *
+ * See the `NegateInstruction` documentation for more details.
+ */
+ class Negate extends UnaryArithmeticOpcode, TNegate {
+ final override string toString() { result = "Negate" }
+ }
+
+ /**
+ * The `Opcode` for a `ShiftLeftInstruction`.
+ *
+ * See the `ShiftLeftInstruction` documentation for more details.
+ */
+ class ShiftLeft extends BinaryBitwiseOpcode, TShiftLeft {
+ final override string toString() { result = "ShiftLeft" }
+ }
+
+ /**
+ * The `Opcode` for a `ShiftRightInstruction`.
+ *
+ * See the `ShiftRightInstruction` documentation for more details.
+ */
+ class ShiftRight extends BinaryBitwiseOpcode, TShiftRight {
+ final override string toString() { result = "ShiftRight" }
+ }
+
+ /**
+ * The `Opcode` for a `BitAndInstruction`.
+ *
+ * See the `BitAndInstruction` documentation for more details.
+ */
+ class BitAnd extends BinaryBitwiseOpcode, TBitAnd {
+ final override string toString() { result = "BitAnd" }
+ }
+
+ /**
+ * The `Opcode` for a `BitOrInstruction`.
+ *
+ * See the `BitOrInstruction` documentation for more details.
+ */
+ class BitOr extends BinaryBitwiseOpcode, TBitOr {
+ final override string toString() { result = "BitOr" }
+ }
+
+ /**
+ * The `Opcode` for a `BitXorInstruction`.
+ *
+ * See the `BitXorInstruction` documentation for more details.
+ */
+ class BitXor extends BinaryBitwiseOpcode, TBitXor {
+ final override string toString() { result = "BitXor" }
+ }
+
+ /**
+ * The `Opcode` for a `BitComplementInstruction`.
+ *
+ * See the `BitComplementInstruction` documentation for more details.
+ */
+ class BitComplement extends UnaryBitwiseOpcode, TBitComplement {
+ final override string toString() { result = "BitComplement" }
+ }
+
+ /**
+ * The `Opcode` for a `LogicalNotInstruction`.
+ *
+ * See the `LogicalNotInstruction` documentation for more details.
+ */
+ class LogicalNot extends UnaryOpcode, TLogicalNot {
+ final override string toString() { result = "LogicalNot" }
+ }
+
+ /**
+ * The `Opcode` for a `CompareEQInstruction`.
+ *
+ * See the `CompareEQInstruction` documentation for more details.
+ */
+ class CompareEQ extends CompareOpcode, TCompareEQ {
+ final override string toString() { result = "CompareEQ" }
+ }
+
+ /**
+ * The `Opcode` for a `CompareNEInstruction`.
+ *
+ * See the `CompareNEInstruction` documentation for more details.
+ */
+ class CompareNE extends CompareOpcode, TCompareNE {
+ final override string toString() { result = "CompareNE" }
+ }
+
+ /**
+ * The `Opcode` for a `CompareLTInstruction`.
+ *
+ * See the `CompareLTInstruction` documentation for more details.
+ */
+ class CompareLT extends RelationalOpcode, TCompareLT {
+ final override string toString() { result = "CompareLT" }
+ }
+
+ /**
+ * The `Opcode` for a `CompareGTInstruction`.
+ *
+ * See the `CompareGTInstruction` documentation for more details.
+ */
+ class CompareGT extends RelationalOpcode, TCompareGT {
+ final override string toString() { result = "CompareGT" }
+ }
+
+ /**
+ * The `Opcode` for a `CompareLEInstruction`.
+ *
+ * See the `CompareLEInstruction` documentation for more details.
+ */
+ class CompareLE extends RelationalOpcode, TCompareLE {
+ final override string toString() { result = "CompareLE" }
+ }
+
+ /**
+ * The `Opcode` for a `CompareGEInstruction`.
+ *
+ * See the `CompareGEInstruction` documentation for more details.
+ */
+ class CompareGE extends RelationalOpcode, TCompareGE {
+ final override string toString() { result = "CompareGE" }
+ }
+
+ /**
+ * The `Opcode` for a `PointerAddInstruction`.
+ *
+ * See the `PointerAddInstruction` documentation for more details.
+ */
+ class PointerAdd extends PointerOffsetOpcode, TPointerAdd {
+ final override string toString() { result = "PointerAdd" }
+ }
+
+ /**
+ * The `Opcode` for a `PointerSubInstruction`.
+ *
+ * See the `PointerSubInstruction` documentation for more details.
+ */
+ class PointerSub extends PointerOffsetOpcode, TPointerSub {
+ final override string toString() { result = "PointerSub" }
+ }
+
+ /**
+ * The `Opcode` for a `PointerDiffInstruction`.
+ *
+ * See the `PointerDiffInstruction` documentation for more details.
+ */
+ class PointerDiff extends PointerArithmeticOpcode, TPointerDiff {
+ final override string toString() { result = "PointerDiff" }
+ }
+
+ /**
+ * The `Opcode` for a `ConvertInstruction`.
+ *
+ * See the `ConvertInstruction` documentation for more details.
+ */
+ class Convert extends UnaryOpcode, TConvert {
+ final override string toString() { result = "Convert" }
+ }
+
+ /**
+ * The `Opcode` for a `ConvertToNonVirtualBaseInstruction`.
+ *
+ * See the `ConvertToNonVirtualBaseInstruction` documentation for more details.
+ */
+ class ConvertToNonVirtualBase extends ConvertToBaseOpcode, TConvertToNonVirtualBase {
+ final override string toString() { result = "ConvertToNonVirtualBase" }
+ }
+
+ /**
+ * The `Opcode` for a `ConvertToVirtualBaseInstruction`.
+ *
+ * See the `ConvertToVirtualBaseInstruction` documentation for more details.
+ */
+ class ConvertToVirtualBase extends ConvertToBaseOpcode, TConvertToVirtualBase {
+ final override string toString() { result = "ConvertToVirtualBase" }
+ }
+
+ /**
+ * The `Opcode` for a `ConvertToDerivedInstruction`.
+ *
+ * See the `ConvertToDerivedInstruction` documentation for more details.
+ */
+ class ConvertToDerived extends UnaryOpcode, TConvertToDerived {
+ final override string toString() { result = "ConvertToDerived" }
+ }
+
+ /**
+ * The `Opcode` for a `CheckedConvertOrNullInstruction`.
+ *
+ * See the `CheckedConvertOrNullInstruction` documentation for more details.
+ */
+ class CheckedConvertOrNull extends UnaryOpcode, TCheckedConvertOrNull {
+ final override string toString() { result = "CheckedConvertOrNull" }
+ }
+
+ /**
+ * The `Opcode` for a `CheckedConvertOrThrowInstruction`.
+ *
+ * See the `CheckedConvertOrThrowInstruction` documentation for more details.
+ */
+ class CheckedConvertOrThrow extends UnaryOpcode, TCheckedConvertOrThrow {
+ final override string toString() { result = "CheckedConvertOrThrow" }
+ }
+
+ /**
+ * The `Opcode` for a `CompleteObjectAddressInstruction`.
+ *
+ * See the `CompleteObjectAddressInstruction` documentation for more details.
+ */
+ class CompleteObjectAddress extends UnaryOpcode, TCompleteObjectAddress {
+ final override string toString() { result = "CompleteObjectAddress" }
+ }
+
+ /**
+ * The `Opcode` for a `VariableAddressInstruction`.
+ *
+ * See the `VariableAddressInstruction` documentation for more details.
+ */
+ class VariableAddress extends Opcode, TVariableAddress {
+ final override string toString() { result = "VariableAddress" }
+ }
+
+ /**
+ * The `Opcode` for a `FieldAddressInstruction`.
+ *
+ * See the `FieldAddressInstruction` documentation for more details.
+ */
+ class FieldAddress extends UnaryOpcode, TFieldAddress {
+ final override string toString() { result = "FieldAddress" }
+ }
+
+ /**
+ * The `Opcode` for an `ElementsAddressInstruction`.
+ *
+ * See the `ElementsAddressInstruction` documentation for more details.
+ */
+ class ElementsAddress extends UnaryOpcode, TElementsAddress {
+ final override string toString() { result = "ElementsAddress" }
+ }
+
+ /**
+ * The `Opcode` for a `FunctionAddressInstruction`.
+ *
+ * See the `FunctionAddressInstruction` documentation for more details.
+ */
+ class FunctionAddress extends Opcode, TFunctionAddress {
+ final override string toString() { result = "FunctionAddress" }
+ }
+
+ /**
+ * The `Opcode` for a `ConstantInstruction`.
+ *
+ * See the `ConstantInstruction` documentation for more details.
+ */
+ class Constant extends Opcode, TConstant {
+ final override string toString() { result = "Constant" }
+ }
+
+ /**
+ * The `Opcode` for a `StringConstantInstruction`.
+ *
+ * See the `StringConstantInstruction` documentation for more details.
+ */
+ class StringConstant extends Opcode, TStringConstant {
+ final override string toString() { result = "StringConstant" }
+ }
+
+ /**
+ * The `Opcode` for a `ConditionalBranchInstruction`.
+ *
+ * See the `ConditionalBranchInstruction` documentation for more details.
+ */
+ class ConditionalBranch extends OpcodeWithCondition, TConditionalBranch {
+ final override string toString() { result = "ConditionalBranch" }
+ }
+
+ /**
+ * The `Opcode` for a `SwitchInstruction`.
+ *
+ * See the `SwitchInstruction` documentation for more details.
+ */
+ class Switch extends OpcodeWithCondition, TSwitch {
+ final override string toString() { result = "Switch" }
+ }
+
+ /**
+ * The `Opcode` for a `CallInstruction`.
+ *
+ * See the `CallInstruction` documentation for more details.
+ */
+ class Call extends Opcode, TCall {
+ final override string toString() { result = "Call" }
+
+ final override predicate hasOperandInternal(OperandTag tag) {
+ tag instanceof CallTargetOperandTag
+ }
+ }
+
+ /**
+ * The `Opcode` for a `CatchByTypeInstruction`.
+ *
+ * See the `CatchByTypeInstruction` documentation for more details.
+ */
+ class CatchByType extends CatchOpcode, TCatchByType {
+ final override string toString() { result = "CatchByType" }
+ }
+
+ /**
+ * The `Opcode` for a `CatchAnyInstruction`.
+ *
+ * See the `CatchAnyInstruction` documentation for more details.
+ */
+ class CatchAny extends CatchOpcode, TCatchAny {
+ final override string toString() { result = "CatchAny" }
+ }
+
+ /**
+ * The `Opcode` for a `ThrowValueInstruction`.
+ *
+ * See the `ThrowValueInstruction` documentation for more details.
+ */
+ class ThrowValue extends ThrowOpcode, OpcodeWithLoad, TThrowValue {
+ final override string toString() { result = "ThrowValue" }
+ }
+
+ /**
+ * The `Opcode` for a `ReThrowInstruction`.
+ *
+ * See the `ReThrowInstruction` documentation for more details.
+ */
+ class ReThrow extends ThrowOpcode, TReThrow {
+ final override string toString() { result = "ReThrow" }
+ }
+
+ /**
+ * The `Opcode` for an `UnwindInstruction`.
+ *
+ * See the `UnwindInstruction` documentation for more details.
+ */
+ class Unwind extends Opcode, TUnwind {
+ final override string toString() { result = "Unwind" }
+ }
+
+ /**
+ * The `Opcode` for an `AliasedDefinitionInstruction`.
+ *
+ * See the `AliasedDefinitionInstruction` documentation for more details.
+ */
+ class AliasedDefinition extends Opcode, TAliasedDefinition {
+ final override string toString() { result = "AliasedDefinition" }
+
+ final override MemoryAccessKind getWriteMemoryAccess() { result instanceof EscapedMemoryAccess }
+ }
+
+ /**
+ * The `Opcode` for an `InitializeNonLocalInstruction`.
+ *
+ * See the `InitializeNonLocalInstruction` documentation for more details.
+ */
+ class InitializeNonLocal extends Opcode, TInitializeNonLocal {
+ final override string toString() { result = "InitializeNonLocal" }
+
+ final override MemoryAccessKind getWriteMemoryAccess() {
+ result instanceof NonLocalMemoryAccess
+ }
+ }
+
+ /**
+ * The `Opcode` for an `AliasedUseInstruction`.
+ *
+ * See the `AliasedUseInstruction` documentation for more details.
+ */
+ class AliasedUse extends Opcode, TAliasedUse {
+ final override string toString() { result = "AliasedUse" }
+
+ final override MemoryAccessKind getReadMemoryAccess() { result instanceof NonLocalMemoryAccess }
+
+ final override predicate hasOperandInternal(OperandTag tag) {
+ tag instanceof SideEffectOperandTag
+ }
+ }
+
+ /**
+ * The `Opcode` for a `PhiInstruction`.
+ *
+ * See the `PhiInstruction` documentation for more details.
+ */
+ class Phi extends Opcode, TPhi {
+ final override string toString() { result = "Phi" }
+
+ final override MemoryAccessKind getWriteMemoryAccess() { result instanceof PhiMemoryAccess }
+ }
+
+ /**
+ * The `Opcode` for a `BuiltInInstruction`.
+ *
+ * See the `BuiltInInstruction` documentation for more details.
+ */
+ class BuiltIn extends BuiltInOperationOpcode, TBuiltIn {
+ final override string toString() { result = "BuiltIn" }
+ }
+
+ /**
+ * The `Opcode` for a `VarArgsStartInstruction`.
+ *
+ * See the `VarArgsStartInstruction` documentation for more details.
+ */
+ class VarArgsStart extends UnaryOpcode, TVarArgsStart {
+ final override string toString() { result = "VarArgsStart" }
+ }
+
+ /**
+ * The `Opcode` for a `VarArgsEndInstruction`.
+ *
+ * See the `VarArgsEndInstruction` documentation for more details.
+ */
+ class VarArgsEnd extends UnaryOpcode, TVarArgsEnd {
+ final override string toString() { result = "VarArgsEnd" }
+ }
+
+ /**
+ * The `Opcode` for a `VarArgInstruction`.
+ *
+ * See the `VarArgInstruction` documentation for more details.
+ */
+ class VarArg extends UnaryOpcode, TVarArg {
+ final override string toString() { result = "VarArg" }
+ }
+
+ /**
+ * The `Opcode` for a `NextVarArgInstruction`.
+ *
+ * See the `NextVarArgInstruction` documentation for more details.
+ */
+ class NextVarArg extends UnaryOpcode, TNextVarArg {
+ final override string toString() { result = "NextVarArg" }
+ }
+
+ /**
+ * The `Opcode` for a `CallSideEffectInstruction`.
+ *
+ * See the `CallSideEffectInstruction` documentation for more details.
+ */
+ class CallSideEffect extends WriteSideEffectOpcode, EscapedWriteOpcode, MayWriteOpcode,
+ ReadSideEffectOpcode, EscapedReadOpcode, MayReadOpcode, TCallSideEffect {
+ final override string toString() { result = "CallSideEffect" }
+ }
+
+ /**
+ * The `Opcode` for a `CallReadSideEffectInstruction`.
+ *
+ * See the `CallReadSideEffectInstruction` documentation for more details.
+ */
+ class CallReadSideEffect extends ReadSideEffectOpcode, EscapedReadOpcode, MayReadOpcode,
+ TCallReadSideEffect {
+ final override string toString() { result = "CallReadSideEffect" }
+ }
+
+ /**
+ * The `Opcode` for an `IndirectReadSideEffectInstruction`.
+ *
+ * See the `IndirectReadSideEffectInstruction` documentation for more details.
+ */
+ class IndirectReadSideEffect extends ReadSideEffectOpcode, IndirectReadOpcode,
+ TIndirectReadSideEffect {
+ final override string toString() { result = "IndirectReadSideEffect" }
+ }
+
+ /**
+ * The `Opcode` for an `IndirectMustWriteSideEffectInstruction`.
+ *
+ * See the `IndirectMustWriteSideEffectInstruction` documentation for more details.
+ */
+ class IndirectMustWriteSideEffect extends WriteSideEffectOpcode, IndirectWriteOpcode,
+ TIndirectMustWriteSideEffect {
+ final override string toString() { result = "IndirectMustWriteSideEffect" }
+ }
+
+ /**
+ * The `Opcode` for an `IndirectMayWriteSideEffectInstruction`.
+ *
+ * See the `IndirectMayWriteSideEffectInstruction` documentation for more details.
+ */
+ class IndirectMayWriteSideEffect extends WriteSideEffectOpcode, IndirectWriteOpcode,
+ MayWriteOpcode, TIndirectMayWriteSideEffect {
+ final override string toString() { result = "IndirectMayWriteSideEffect" }
+ }
+
+ /**
+ * The `Opcode` for a `BufferReadSideEffectInstruction`.
+ *
+ * See the `BufferReadSideEffectInstruction` documentation for more details.
+ */
+ class BufferReadSideEffect extends ReadSideEffectOpcode, UnsizedBufferReadOpcode,
+ TBufferReadSideEffect {
+ final override string toString() { result = "BufferReadSideEffect" }
+ }
+
+ /**
+ * The `Opcode` for a `BufferMustWriteSideEffectInstruction`.
+ *
+ * See the `BufferMustWriteSideEffectInstruction` documentation for more details.
+ */
+ class BufferMustWriteSideEffect extends WriteSideEffectOpcode, UnsizedBufferWriteOpcode,
+ TBufferMustWriteSideEffect {
+ final override string toString() { result = "BufferMustWriteSideEffect" }
+ }
+
+ /**
+ * The `Opcode` for a `BufferMayWriteSideEffectInstruction`.
+ *
+ * See the `BufferMayWriteSideEffectInstruction` documentation for more details.
+ */
+ class BufferMayWriteSideEffect extends WriteSideEffectOpcode, UnsizedBufferWriteOpcode,
+ MayWriteOpcode, TBufferMayWriteSideEffect {
+ final override string toString() { result = "BufferMayWriteSideEffect" }
+ }
+
+ /**
+ * The `Opcode` for a `SizedBufferReadSideEffectInstruction`.
+ *
+ * See the `SizedBufferReadSideEffectInstruction` documentation for more details.
+ */
+ class SizedBufferReadSideEffect extends ReadSideEffectOpcode, SizedBufferReadOpcode,
+ TSizedBufferReadSideEffect {
+ final override string toString() { result = "SizedBufferReadSideEffect" }
+ }
+
+ /**
+ * The `Opcode` for a `SizedBufferMustWriteSideEffectInstruction`.
+ *
+ * See the `SizedBufferMustWriteSideEffectInstruction` documentation for more details.
+ */
+ class SizedBufferMustWriteSideEffect extends WriteSideEffectOpcode, SizedBufferWriteOpcode,
+ TSizedBufferMustWriteSideEffect {
+ final override string toString() { result = "SizedBufferMustWriteSideEffect" }
+ }
+
+ /**
+ * The `Opcode` for a `SizedBufferMayWriteSideEffectInstruction`.
+ *
+ * See the `SizedBufferMayWriteSideEffectInstruction` documentation for more details.
+ */
+ class SizedBufferMayWriteSideEffect extends WriteSideEffectOpcode, SizedBufferWriteOpcode,
+ MayWriteOpcode, TSizedBufferMayWriteSideEffect {
+ final override string toString() { result = "SizedBufferMayWriteSideEffect" }
+ }
+
+ /**
+ * The `Opcode` for an `InitializeDynamicAllocationInstruction`.
+ *
+ * See the `InitializeDynamicAllocationInstruction` documentation for more details.
+ */
+ class InitializeDynamicAllocation extends SideEffectOpcode, EntireAllocationWriteOpcode,
+ TInitializeDynamicAllocation {
+ final override string toString() { result = "InitializeDynamicAllocation" }
+ }
+
+ /**
+ * The `Opcode` for a `ChiInstruction`.
+ *
+ * See the `ChiInstruction` documentation for more details.
+ */
+ class Chi extends Opcode, TChi {
+ final override string toString() { result = "Chi" }
+
+ final override predicate hasOperandInternal(OperandTag tag) {
+ tag instanceof ChiTotalOperandTag
+ or
+ tag instanceof ChiPartialOperandTag
+ }
+
+ final override MemoryAccessKind getWriteMemoryAccess() {
+ result instanceof ChiTotalMemoryAccess
+ }
+ }
+
+ /**
+ * The `Opcode` for an `InlineAsmInstruction`.
+ *
+ * See the `InlineAsmInstruction` documentation for more details.
+ */
+ class InlineAsm extends Opcode, EscapedWriteOpcode, MayWriteOpcode, EscapedReadOpcode,
+ MayReadOpcode, TInlineAsm {
+ final override string toString() { result = "InlineAsm" }
+
+ final override predicate hasOperandInternal(OperandTag tag) {
+ tag instanceof SideEffectOperandTag
+ }
+ }
+
+ /**
+ * The `Opcode` for an `UnreachedInstruction`.
+ *
+ * See the `UnreachedInstruction` documentation for more details.
+ */
+ class Unreached extends Opcode, TUnreached {
+ final override string toString() { result = "Unreached" }
+ }
+
+ /**
+ * The `Opcode` for a `NewObjInstruction`.
+ *
+ * See the `NewObjInstruction` documentation for more details.
+ */
+ class NewObj extends Opcode, TNewObj {
+ final override string toString() { result = "NewObj" }
+ }
+}
diff --git a/repo-tests/codeql/cpp/ql/lib/semmle/code/cpp/ir/implementation/TempVariableTag.qll b/repo-tests/codeql/cpp/ql/lib/semmle/code/cpp/ir/implementation/TempVariableTag.qll
new file mode 100644
index 00000000000..5f230de560d
--- /dev/null
+++ b/repo-tests/codeql/cpp/ql/lib/semmle/code/cpp/ir/implementation/TempVariableTag.qll
@@ -0,0 +1,17 @@
+/**
+ * Defines the public interface to temporary variable tags, which describe the reason a particular
+ * `IRTempVariable` was generated.
+ */
+
+private import internal.TempVariableTagInternal
+private import Imports::TempVariableTag
+
+/**
+ * A reason that a particular IR temporary variable was generated. For example, it could be
+ * generated to hold the return value of a function, or to hold the result of a `?:` operator
+ * computed on each branch. The set of possible `TempVariableTag`s is language-dependent.
+ */
+class TempVariableTag extends TTempVariableTag {
+ /** Gets a textual representation of this tag. */
+ string toString() { result = getTempVariableTagId(this) }
+}
diff --git a/repo-tests/codeql/cpp/ql/lib/semmle/code/cpp/ir/implementation/UseSoundEscapeAnalysis.qll b/repo-tests/codeql/cpp/ql/lib/semmle/code/cpp/ir/implementation/UseSoundEscapeAnalysis.qll
new file mode 100644
index 00000000000..b9b1dc243b1
--- /dev/null
+++ b/repo-tests/codeql/cpp/ql/lib/semmle/code/cpp/ir/implementation/UseSoundEscapeAnalysis.qll
@@ -0,0 +1,9 @@
+import IRConfiguration
+
+/**
+ * Overrides the default IR configuration to use sound escape analysis, instead of assuming that
+ * variable addresses never escape.
+ */
+class SoundEscapeAnalysisConfiguration extends IREscapeAnalysisConfiguration {
+ override predicate useSoundEscapeAnalysis() { any() }
+}
diff --git a/repo-tests/codeql/cpp/ql/lib/semmle/code/cpp/ir/implementation/aliased_ssa/IR.qll b/repo-tests/codeql/cpp/ql/lib/semmle/code/cpp/ir/implementation/aliased_ssa/IR.qll
new file mode 100644
index 00000000000..c96783fe6e8
--- /dev/null
+++ b/repo-tests/codeql/cpp/ql/lib/semmle/code/cpp/ir/implementation/aliased_ssa/IR.qll
@@ -0,0 +1,80 @@
+/**
+ * Provides classes that describe the Intermediate Representation (IR) of the program.
+ *
+ * The IR is a representation of the semantics of the program, with very little dependence on the
+ * syntax that was used to write the program. For example, in C++, the statements `i += 1;`, `i++`,
+ * and `++i` all have the same semantic effect, but appear in the AST as three different types of
+ * `Expr` node. In the IR, all three statements are broken down into a sequence of fundamental
+ * operations similar to:
+ *
+ * ```
+ * r1(int*) = VariableAddress[i] // Compute the address of variable `i`
+ * r2(int) = Load &:r1, m0 // Load the value of `i`
+ * r3(int) = Constant[1] // An integer constant with the value `1`
+ * r4(int) = Add r2, r3 // Add `1` to the value of `i`
+ * r5(int) = Store &r1, r4 // Store the new value back into the variable `i`
+ * ```
+ *
+ * This allows IR-based analysis to focus on the fundamental operations, rather than having to be
+ * concerned with the various ways of expressing those operations in source code.
+ *
+ * The key classes in the IR are:
+ *
+ * - `IRFunction` - Contains the IR for an entire function definition, including all of that
+ * function's `Instruction`s, `IRBlock`s, and `IRVariables`.
+ * - `Instruction` - A single operation in the IR. An instruction specifies the operation to be
+ * performed, the operands that produce the inputs to that operation, and the type of the result
+ * of the operation. Control flows from an `Instruction` to one of a set of successor
+ * `Instruction`s.
+ * - `Operand` - An input value of an `Instruction`. All inputs of an `Instruction` are explicitly
+ * represented as `Operand`s, even if the input was implicit in the source code. An `Operand` has
+ * a link to the `Instruction` that consumes its value (its "use") and a link to the `Instruction`
+ * that produces its value (its "definition").
+ * - `IRVariable` - A variable accessed by the IR for a particular function. An `IRVariable` is
+ * created for each variable directly accessed by the function. In addition, `IRVariable`s are
+ * created to represent certain temporary storage locations that do not have explicitly declared
+ * variables in the source code, such as the return value of the function.
+ * - `IRBlock` - A "basic block" in the control flow graph of a function. An `IRBlock` contains a
+ * sequence of instructions such that control flow can only enter the block at the first
+ * instruction, and can only leave the block from the last instruction.
+ * - `IRType` - The type of a value accessed in the IR. Unlike the `Type` class in the AST, `IRType`
+ * is language-neutral. For example, in C++, `unsigned int`, `char32_t`, and `wchar_t` might all
+ * be represented as the `IRType` `uint4`, a four-byte unsigned integer.
+ */
+
+import IRFunction
+import Instruction
+import IRBlock
+import IRVariable
+import Operand
+private import internal.IRImports as Imports
+import Imports::EdgeKind
+import Imports::IRType
+import Imports::MemoryAccessKind
+
+private newtype TIRPropertyProvider = MkIRPropertyProvider()
+
+/**
+ * A class that provides additional properties to be dumped for IR instructions and blocks when using
+ * the PrintIR module. Libraries that compute additional facts about IR elements can extend the
+ * single instance of this class to specify the additional properties computed by the library.
+ */
+class IRPropertyProvider extends TIRPropertyProvider {
+ /** Gets a textual representation of this element. */
+ string toString() { result = "IRPropertyProvider" }
+
+ /**
+ * Gets the value of the property named `key` for the specified instruction.
+ */
+ string getInstructionProperty(Instruction instruction, string key) { none() }
+
+ /**
+ * Gets the value of the property named `key` for the specified block.
+ */
+ string getBlockProperty(IRBlock block, string key) { none() }
+
+ /**
+ * Gets the value of the property named `key` for the specified operand.
+ */
+ string getOperandProperty(Operand operand, string key) { none() }
+}
diff --git a/repo-tests/codeql/cpp/ql/lib/semmle/code/cpp/ir/implementation/aliased_ssa/IRBlock.qll b/repo-tests/codeql/cpp/ql/lib/semmle/code/cpp/ir/implementation/aliased_ssa/IRBlock.qll
new file mode 100644
index 00000000000..4b86f9a7cec
--- /dev/null
+++ b/repo-tests/codeql/cpp/ql/lib/semmle/code/cpp/ir/implementation/aliased_ssa/IRBlock.qll
@@ -0,0 +1,331 @@
+/**
+ * Provides classes describing basic blocks in the IR of a function.
+ */
+
+private import internal.IRInternal
+import Instruction
+private import internal.IRBlockImports as Imports
+import Imports::EdgeKind
+private import Cached
+
+/**
+ * A basic block in the IR. A basic block consists of a sequence of `Instructions` with the only
+ * incoming edges at the beginning of the sequence and the only outgoing edges at the end of the
+ * sequence.
+ *
+ * This class does not contain any members that query the predecessor or successor edges of the
+ * block. This allows different classes that extend `IRBlockBase` to expose different subsets of
+ * edges (e.g. ignoring unreachable edges).
+ *
+ * Most consumers should use the class `IRBlock`.
+ */
+class IRBlockBase extends TIRBlock {
+ /** Gets a textual representation of this block. */
+ final string toString() { result = getFirstInstruction(this).toString() }
+
+ /** Gets the source location of the first non-`Phi` instruction in this block. */
+ final Language::Location getLocation() { result = getFirstInstruction().getLocation() }
+
+ /**
+ * INTERNAL: Do not use.
+ *
+ * Gets the zero-based index of the block within its function.
+ *
+ * This predicate is used by debugging and printing code only.
+ */
+ int getDisplayIndex() {
+ exists(IRConfiguration::IRConfiguration config |
+ config.shouldEvaluateDebugStringsForFunction(this.getEnclosingFunction())
+ ) and
+ this =
+ rank[result + 1](IRBlock funcBlock, int sortOverride, int sortKey1, int sortKey2 |
+ funcBlock.getEnclosingFunction() = getEnclosingFunction() and
+ funcBlock.getFirstInstruction().hasSortKeys(sortKey1, sortKey2) and
+ // Ensure that the block containing `EnterFunction` always comes first.
+ if funcBlock.getFirstInstruction() instanceof EnterFunctionInstruction
+ then sortOverride = 0
+ else sortOverride = 1
+ |
+ funcBlock order by sortOverride, sortKey1, sortKey2
+ )
+ }
+
+ /**
+ * Gets the `index`th non-`Phi` instruction in this block.
+ */
+ final Instruction getInstruction(int index) { result = getInstruction(this, index) }
+
+ /**
+ * Get the `Phi` instructions that appear at the start of this block.
+ */
+ final PhiInstruction getAPhiInstruction() {
+ Construction::getPhiInstructionBlockStart(result) = getFirstInstruction()
+ }
+
+ /**
+ * Gets an instruction in this block. This includes `Phi` instructions.
+ */
+ final Instruction getAnInstruction() {
+ result = getInstruction(_) or
+ result = getAPhiInstruction()
+ }
+
+ /**
+ * Gets the first non-`Phi` instruction in this block.
+ */
+ final Instruction getFirstInstruction() { result = getFirstInstruction(this) }
+
+ /**
+ * Gets the last instruction in this block.
+ */
+ final Instruction getLastInstruction() { result = getInstruction(getInstructionCount() - 1) }
+
+ /**
+ * Gets the number of non-`Phi` instructions in this block.
+ */
+ final int getInstructionCount() { result = getInstructionCount(this) }
+
+ /**
+ * Gets the `IRFunction` that contains this block.
+ */
+ final IRFunction getEnclosingIRFunction() {
+ result = getFirstInstruction(this).getEnclosingIRFunction()
+ }
+
+ /**
+ * Gets the `Function` that contains this block.
+ */
+ final Language::Function getEnclosingFunction() {
+ result = getFirstInstruction(this).getEnclosingFunction()
+ }
+}
+
+/**
+ * A basic block with additional information about its predecessor and successor edges. Each edge
+ * corresponds to the control flow between the last instruction of one block and the first
+ * instruction of another block.
+ */
+class IRBlock extends IRBlockBase {
+ /**
+ * Gets a block to which control flows directly from this block.
+ */
+ final IRBlock getASuccessor() { blockSuccessor(this, result) }
+
+ /**
+ * Gets a block from which control flows directly to this block.
+ */
+ final IRBlock getAPredecessor() { blockSuccessor(result, this) }
+
+ /**
+ * Gets the block to which control flows directly from this block along an edge of kind `kind`.
+ */
+ final IRBlock getSuccessor(EdgeKind kind) { blockSuccessor(this, result, kind) }
+
+ /**
+ * Gets the block to which control flows directly from this block along a back edge of kind
+ * `kind`.
+ */
+ final IRBlock getBackEdgeSuccessor(EdgeKind kind) { backEdgeSuccessor(this, result, kind) }
+
+ /**
+ * Holds if this block immediately dominates `block`.
+ *
+ * Block `A` immediate dominates block `B` if block `A` strictly dominates block `B` and block `B`
+ * is a direct successor of block `A`.
+ */
+ final predicate immediatelyDominates(IRBlock block) { blockImmediatelyDominates(this, block) }
+
+ /**
+ * Holds if this block strictly dominates `block`.
+ *
+ * Block `A` strictly dominates block `B` if block `A` dominates block `B` and blocks `A` and `B`
+ * are not the same block.
+ */
+ final predicate strictlyDominates(IRBlock block) { blockImmediatelyDominates+(this, block) }
+
+ /**
+ * Holds if this block dominates `block`.
+ *
+ * Block `A` dominates block `B` if any control flow path from the entry block of the function to
+ * block `B` must pass through block `A`. A block always dominates itself.
+ */
+ final predicate dominates(IRBlock block) { strictlyDominates(block) or this = block }
+
+ /**
+ * Gets a block on the dominance frontier of this block.
+ *
+ * The dominance frontier of block `A` is the set of blocks `B` such that block `A` does not
+ * dominate block `B`, but block `A` does dominate an immediate predecessor of block `B`.
+ */
+ pragma[noinline]
+ final IRBlock dominanceFrontier() {
+ dominates(result.getAPredecessor()) and
+ not strictlyDominates(result)
+ }
+
+ /**
+ * Holds if this block immediately post-dominates `block`.
+ *
+ * Block `A` immediate post-dominates block `B` if block `A` strictly post-dominates block `B` and
+ * block `B` is a direct successor of block `A`.
+ */
+ final predicate immediatelyPostDominates(IRBlock block) {
+ blockImmediatelyPostDominates(this, block)
+ }
+
+ /**
+ * Holds if this block strictly post-dominates `block`.
+ *
+ * Block `A` strictly post-dominates block `B` if block `A` post-dominates block `B` and blocks `A`
+ * and `B` are not the same block.
+ */
+ final predicate strictlyPostDominates(IRBlock block) {
+ blockImmediatelyPostDominates+(this, block)
+ }
+
+ /**
+ * Holds if this block is a post-dominator of `block`.
+ *
+ * Block `A` post-dominates block `B` if any control flow path from `B` to the exit block of the
+ * function must pass through block `A`. A block always post-dominates itself.
+ */
+ final predicate postDominates(IRBlock block) { strictlyPostDominates(block) or this = block }
+
+ /**
+ * Gets a block on the post-dominance frontier of this block.
+ *
+ * The post-dominance frontier of block `A` is the set of blocks `B` such that block `A` does not
+ * post-dominate block `B`, but block `A` does post-dominate an immediate successor of block `B`.
+ */
+ pragma[noinline]
+ final IRBlock postPominanceFrontier() {
+ postDominates(result.getASuccessor()) and
+ not strictlyPostDominates(result)
+ }
+
+ /**
+ * Holds if this block is reachable from the entry block of its function.
+ */
+ final predicate isReachableFromFunctionEntry() {
+ this = getEnclosingIRFunction().getEntryBlock() or
+ getAPredecessor().isReachableFromFunctionEntry()
+ }
+}
+
+private predicate startsBasicBlock(Instruction instr) {
+ not instr instanceof PhiInstruction and
+ not adjacentInBlock(_, instr)
+}
+
+/** Holds if `i2` follows `i1` in a `IRBlock`. */
+private predicate adjacentInBlock(Instruction i1, Instruction i2) {
+ // - i2 must be the only successor of i1
+ i2 = unique(Instruction i | i = i1.getASuccessor()) and
+ // - i1 must be the only predecessor of i2
+ i1 = unique(Instruction i | i.getASuccessor() = i2) and
+ // - The edge between the two must be a GotoEdge. We just check that one
+ // exists since we've already checked that it's unique.
+ exists(GotoEdge edgeKind | exists(i1.getSuccessor(edgeKind))) and
+ // - The edge must not be a back edge. This means we get the same back edges
+ // in the basic-block graph as we do in the raw CFG.
+ not exists(Construction::getInstructionBackEdgeSuccessor(i1, _))
+ // This predicate could be simplified to remove one of the `unique`s if we
+ // were willing to rely on the CFG being well-formed and thus never having
+ // more than one successor to an instruction that has a `GotoEdge` out of it.
+}
+
+private predicate isEntryBlock(TIRBlock block) {
+ block = MkIRBlock(any(EnterFunctionInstruction enter))
+}
+
+cached
+private module Cached {
+ cached
+ newtype TIRBlock = MkIRBlock(Instruction firstInstr) { startsBasicBlock(firstInstr) }
+
+ /** Holds if `i` is the `index`th instruction the block starting with `first`. */
+ private Instruction getInstructionFromFirst(Instruction first, int index) =
+ shortestDistances(startsBasicBlock/1, adjacentInBlock/2)(first, result, index)
+
+ /** Holds if `i` is the `index`th instruction in `block`. */
+ cached
+ Instruction getInstruction(TIRBlock block, int index) {
+ result = getInstructionFromFirst(getFirstInstruction(block), index)
+ }
+
+ cached
+ int getInstructionCount(TIRBlock block) { result = strictcount(getInstruction(block, _)) }
+
+ cached
+ predicate blockSuccessor(TIRBlock pred, TIRBlock succ, EdgeKind kind) {
+ exists(Instruction predLast, Instruction succFirst |
+ predLast = getInstruction(pred, getInstructionCount(pred) - 1) and
+ succFirst = predLast.getSuccessor(kind) and
+ succ = MkIRBlock(succFirst)
+ )
+ }
+
+ pragma[noinline]
+ private predicate blockIdentity(TIRBlock b1, TIRBlock b2) { b1 = b2 }
+
+ pragma[noopt]
+ cached
+ predicate backEdgeSuccessor(TIRBlock pred, TIRBlock succ, EdgeKind kind) {
+ backEdgeSuccessorRaw(pred, succ, kind)
+ or
+ // See the QLDoc on `backEdgeSuccessorRaw`.
+ exists(TIRBlock pred2 |
+ // Joining with `blockIdentity` is a performance trick to get
+ // `forwardEdgeRaw` on the RHS of a join, where it's fast.
+ blockIdentity(pred, pred2) and
+ forwardEdgeRaw+(pred, pred2)
+ ) and
+ blockSuccessor(pred, succ, kind)
+ }
+
+ /**
+ * Holds if there is an edge from `pred` to `succ` that is not a back edge.
+ */
+ private predicate forwardEdgeRaw(TIRBlock pred, TIRBlock succ) {
+ exists(EdgeKind kind |
+ blockSuccessor(pred, succ, kind) and
+ not backEdgeSuccessorRaw(pred, succ, kind)
+ )
+ }
+
+ /**
+ * Holds if the `kind`-edge from `pred` to `succ` is a back edge according to
+ * `Construction`.
+ *
+ * There could be loops of non-back-edges if there is a flaw in the IR
+ * construction or back-edge detection, and this could cause non-termination
+ * of subsequent analysis. To prevent that, a subsequent predicate further
+ * classifies all edges as back edges if they are involved in a loop of
+ * non-back-edges.
+ */
+ private predicate backEdgeSuccessorRaw(TIRBlock pred, TIRBlock succ, EdgeKind kind) {
+ exists(Instruction predLast, Instruction succFirst |
+ predLast = getInstruction(pred, getInstructionCount(pred) - 1) and
+ succFirst = Construction::getInstructionBackEdgeSuccessor(predLast, kind) and
+ succ = MkIRBlock(succFirst)
+ )
+ }
+
+ cached
+ predicate blockSuccessor(TIRBlock pred, TIRBlock succ) { blockSuccessor(pred, succ, _) }
+
+ cached
+ predicate blockImmediatelyDominates(TIRBlock dominator, TIRBlock block) =
+ idominance(isEntryBlock/1, blockSuccessor/2)(_, dominator, block)
+}
+
+private Instruction getFirstInstruction(TIRBlock block) { block = MkIRBlock(result) }
+
+private predicate blockFunctionExit(IRBlock exit) {
+ exit.getLastInstruction() instanceof ExitFunctionInstruction
+}
+
+private predicate blockPredecessor(IRBlock src, IRBlock pred) { src.getAPredecessor() = pred }
+
+private predicate blockImmediatelyPostDominates(IRBlock postDominator, IRBlock block) =
+ idominance(blockFunctionExit/1, blockPredecessor/2)(_, postDominator, block)
diff --git a/repo-tests/codeql/cpp/ql/lib/semmle/code/cpp/ir/implementation/aliased_ssa/IRConsistency.ql b/repo-tests/codeql/cpp/ql/lib/semmle/code/cpp/ir/implementation/aliased_ssa/IRConsistency.ql
new file mode 100644
index 00000000000..0b49f422bab
--- /dev/null
+++ b/repo-tests/codeql/cpp/ql/lib/semmle/code/cpp/ir/implementation/aliased_ssa/IRConsistency.ql
@@ -0,0 +1,8 @@
+/**
+ * @name Aliased SSA IR Consistency Check
+ * @description Performs consistency checks on the Intermediate Representation. This query should have no results.
+ * @kind table
+ * @id cpp/aliased-ssa-ir-consistency-check
+ */
+
+import IRConsistency
diff --git a/repo-tests/codeql/cpp/ql/lib/semmle/code/cpp/ir/implementation/aliased_ssa/IRConsistency.qll b/repo-tests/codeql/cpp/ql/lib/semmle/code/cpp/ir/implementation/aliased_ssa/IRConsistency.qll
new file mode 100644
index 00000000000..31983d34247
--- /dev/null
+++ b/repo-tests/codeql/cpp/ql/lib/semmle/code/cpp/ir/implementation/aliased_ssa/IRConsistency.qll
@@ -0,0 +1,527 @@
+private import IR
+import InstructionConsistency // module is below
+import IRTypeConsistency // module is in IRType.qll
+
+module InstructionConsistency {
+ private import internal.InstructionImports as Imports
+ private import Imports::OperandTag
+ private import Imports::Overlap
+ private import internal.IRInternal
+
+ private newtype TOptionalIRFunction =
+ TPresentIRFunction(IRFunction irFunc) or
+ TMissingIRFunction()
+
+ /**
+ * An `IRFunction` that might not exist. This is used so that we can produce consistency failures
+ * for IR that also incorrectly lacks a `getEnclosingIRFunction()`.
+ */
+ abstract private class OptionalIRFunction extends TOptionalIRFunction {
+ abstract string toString();
+
+ abstract Language::Location getLocation();
+ }
+
+ private class PresentIRFunction extends OptionalIRFunction, TPresentIRFunction {
+ private IRFunction irFunc;
+
+ PresentIRFunction() { this = TPresentIRFunction(irFunc) }
+
+ override string toString() {
+ result = concat(Language::getIdentityString(irFunc.getFunction()), "; ")
+ }
+
+ override Language::Location getLocation() {
+ // To avoid an overwhelming number of results when the extractor merges functions with the
+ // same name, just pick a single location.
+ result =
+ min(Language::Location loc | loc = irFunc.getLocation() | loc order by loc.toString())
+ }
+ }
+
+ private class MissingIRFunction extends OptionalIRFunction, TMissingIRFunction {
+ override string toString() { result = "" }
+
+ override Language::Location getLocation() { result instanceof Language::UnknownDefaultLocation }
+ }
+
+ private OptionalIRFunction getInstructionIRFunction(Instruction instr) {
+ result = TPresentIRFunction(instr.getEnclosingIRFunction())
+ or
+ not exists(instr.getEnclosingIRFunction()) and result = TMissingIRFunction()
+ }
+
+ pragma[inline]
+ private OptionalIRFunction getInstructionIRFunction(Instruction instr, string irFuncText) {
+ result = getInstructionIRFunction(instr) and
+ irFuncText = result.toString()
+ }
+
+ private OptionalIRFunction getOperandIRFunction(Operand operand) {
+ result = TPresentIRFunction(operand.getEnclosingIRFunction())
+ or
+ not exists(operand.getEnclosingIRFunction()) and result = TMissingIRFunction()
+ }
+
+ pragma[inline]
+ private OptionalIRFunction getOperandIRFunction(Operand operand, string irFuncText) {
+ result = getOperandIRFunction(operand) and
+ irFuncText = result.toString()
+ }
+
+ private OptionalIRFunction getBlockIRFunction(IRBlock block) {
+ result = TPresentIRFunction(block.getEnclosingIRFunction())
+ or
+ not exists(block.getEnclosingIRFunction()) and result = TMissingIRFunction()
+ }
+
+ /**
+ * Holds if instruction `instr` is missing an expected operand with tag `tag`.
+ */
+ query predicate missingOperand(
+ Instruction instr, string message, OptionalIRFunction irFunc, string irFuncText
+ ) {
+ exists(OperandTag tag |
+ instr.getOpcode().hasOperand(tag) and
+ not exists(NonPhiOperand operand |
+ operand = instr.getAnOperand() and
+ operand.getOperandTag() = tag
+ ) and
+ message =
+ "Instruction '" + instr.getOpcode().toString() +
+ "' is missing an expected operand with tag '" + tag.toString() + "' in function '$@'." and
+ irFunc = getInstructionIRFunction(instr, irFuncText)
+ )
+ }
+
+ /**
+ * Holds if instruction `instr` has an unexpected operand with tag `tag`.
+ */
+ query predicate unexpectedOperand(
+ Instruction instr, string message, OptionalIRFunction irFunc, string irFuncText
+ ) {
+ exists(OperandTag tag |
+ exists(NonPhiOperand operand |
+ operand = instr.getAnOperand() and
+ operand.getOperandTag() = tag
+ ) and
+ not instr.getOpcode().hasOperand(tag) and
+ not (instr instanceof CallInstruction and tag instanceof ArgumentOperandTag) and
+ not (
+ instr instanceof BuiltInOperationInstruction and tag instanceof PositionalArgumentOperandTag
+ ) and
+ not (instr instanceof InlineAsmInstruction and tag instanceof AsmOperandTag) and
+ message =
+ "Instruction '" + instr.toString() + "' has unexpected operand '" + tag.toString() +
+ "' in function '$@'." and
+ irFunc = getInstructionIRFunction(instr, irFuncText)
+ )
+ }
+
+ /**
+ * Holds if instruction `instr` has multiple operands with tag `tag`.
+ */
+ query predicate duplicateOperand(
+ Instruction instr, string message, OptionalIRFunction irFunc, string irFuncText
+ ) {
+ exists(OperandTag tag, int operandCount |
+ operandCount =
+ strictcount(NonPhiOperand operand |
+ operand = instr.getAnOperand() and
+ operand.getOperandTag() = tag
+ ) and
+ operandCount > 1 and
+ message =
+ "Instruction has " + operandCount + " operands with tag '" + tag.toString() + "'" +
+ " in function '$@'." and
+ irFunc = getInstructionIRFunction(instr, irFuncText)
+ )
+ }
+
+ /**
+ * Holds if `Phi` instruction `instr` is missing an operand corresponding to
+ * the predecessor block `pred`.
+ */
+ query predicate missingPhiOperand(
+ PhiInstruction instr, string message, OptionalIRFunction irFunc, string irFuncText
+ ) {
+ exists(IRBlock pred |
+ pred = instr.getBlock().getAPredecessor() and
+ not exists(PhiInputOperand operand |
+ operand = instr.getAnOperand() and
+ operand.getPredecessorBlock() = pred
+ ) and
+ message =
+ "Instruction '" + instr.toString() + "' is missing an operand for predecessor block '" +
+ pred.toString() + "' in function '$@'." and
+ irFunc = getInstructionIRFunction(instr, irFuncText)
+ )
+ }
+
+ query predicate missingOperandType(
+ Operand operand, string message, OptionalIRFunction irFunc, string irFuncText
+ ) {
+ exists(Instruction use |
+ not exists(operand.getType()) and
+ use = operand.getUse() and
+ message =
+ "Operand '" + operand.toString() + "' of instruction '" + use.getOpcode().toString() +
+ "' is missing a type in function '$@'." and
+ irFunc = getOperandIRFunction(operand, irFuncText)
+ )
+ }
+
+ query predicate duplicateChiOperand(
+ ChiInstruction chi, string message, OptionalIRFunction irFunc, string irFuncText
+ ) {
+ chi.getTotal() = chi.getPartial() and
+ message =
+ "Chi instruction for " + chi.getPartial().toString() +
+ " has duplicate operands in function '$@'." and
+ irFunc = getInstructionIRFunction(chi, irFuncText)
+ }
+
+ query predicate sideEffectWithoutPrimary(
+ SideEffectInstruction instr, string message, OptionalIRFunction irFunc, string irFuncText
+ ) {
+ not exists(instr.getPrimaryInstruction()) and
+ message =
+ "Side effect instruction '" + instr + "' is missing a primary instruction in function '$@'." and
+ irFunc = getInstructionIRFunction(instr, irFuncText)
+ }
+
+ /**
+ * Holds if an instruction, other than `ExitFunction`, has no successors.
+ */
+ query predicate instructionWithoutSuccessor(
+ Instruction instr, string message, OptionalIRFunction irFunc, string irFuncText
+ ) {
+ not exists(instr.getASuccessor()) and
+ not instr instanceof ExitFunctionInstruction and
+ // Phi instructions aren't linked into the instruction-level flow graph.
+ not instr instanceof PhiInstruction and
+ not instr instanceof UnreachedInstruction and
+ message = "Instruction '" + instr.toString() + "' has no successors in function '$@'." and
+ irFunc = getInstructionIRFunction(instr, irFuncText)
+ }
+
+ /**
+ * Holds if there are multiple edges of the same kind from `source`.
+ */
+ query predicate ambiguousSuccessors(
+ Instruction source, string message, OptionalIRFunction irFunc, string irFuncText
+ ) {
+ exists(EdgeKind kind, int n |
+ n = strictcount(Instruction t | source.getSuccessor(kind) = t) and
+ n > 1 and
+ message =
+ "Instruction '" + source.toString() + "' has " + n.toString() + " successors of kind '" +
+ kind.toString() + "' in function '$@'." and
+ irFunc = getInstructionIRFunction(source, irFuncText)
+ )
+ }
+
+ /**
+ * Holds if `instr` is part of a loop even though the AST of `instr`'s enclosing function
+ * contains no element that can cause loops.
+ */
+ query predicate unexplainedLoop(
+ Instruction instr, string message, OptionalIRFunction irFunc, string irFuncText
+ ) {
+ exists(Language::Function f |
+ exists(IRBlock block |
+ instr.getBlock() = block and
+ block.getEnclosingFunction() = f and
+ block.getASuccessor+() = block
+ ) and
+ not Language::hasPotentialLoop(f) and
+ message =
+ "Instruction '" + instr.toString() + "' is part of an unexplained loop in function '$@'." and
+ irFunc = getInstructionIRFunction(instr, irFuncText)
+ )
+ }
+
+ /**
+ * Holds if a `Phi` instruction is present in a block with fewer than two
+ * predecessors.
+ */
+ query predicate unnecessaryPhiInstruction(
+ PhiInstruction instr, string message, OptionalIRFunction irFunc, string irFuncText
+ ) {
+ exists(int n |
+ n = count(instr.getBlock().getAPredecessor()) and
+ n < 2 and
+ message =
+ "Instruction '" + instr.toString() + "' is in a block with only " + n.toString() +
+ " predecessors in function '$@'." and
+ irFunc = getInstructionIRFunction(instr, irFuncText)
+ )
+ }
+
+ /**
+ * Holds if a memory operand is connected to a definition with an unmodeled result.
+ */
+ query predicate memoryOperandDefinitionIsUnmodeled(
+ Instruction instr, string message, OptionalIRFunction irFunc, string irFuncText
+ ) {
+ exists(MemoryOperand operand, Instruction def |
+ operand = instr.getAnOperand() and
+ def = operand.getAnyDef() and
+ not def.isResultModeled() and
+ message =
+ "Memory operand definition on instruction '" + instr.toString() +
+ "' has unmodeled result in function '$@'." and
+ irFunc = getInstructionIRFunction(instr, irFuncText)
+ )
+ }
+
+ /**
+ * Holds if operand `operand` consumes a value that was defined in
+ * a different function.
+ */
+ query predicate operandAcrossFunctions(
+ Operand operand, string message, OptionalIRFunction useIRFunc, string useIRFuncText,
+ OptionalIRFunction defIRFunc, string defIRFuncText
+ ) {
+ exists(Instruction useInstr, Instruction defInstr |
+ operand.getUse() = useInstr and
+ operand.getAnyDef() = defInstr and
+ useIRFunc = getInstructionIRFunction(useInstr, useIRFuncText) and
+ defIRFunc = getInstructionIRFunction(defInstr, defIRFuncText) and
+ useIRFunc != defIRFunc and
+ message =
+ "Operand '" + operand.toString() + "' is used on instruction '" + useInstr.toString() +
+ "' in function '$@', but is defined on instruction '" + defInstr.toString() +
+ "' in function '$@'."
+ )
+ }
+
+ /**
+ * Holds if instruction `instr` is not in exactly one block.
+ */
+ query predicate instructionWithoutUniqueBlock(
+ Instruction instr, string message, OptionalIRFunction irFunc, string irFuncText
+ ) {
+ exists(int blockCount |
+ blockCount = count(instr.getBlock()) and
+ blockCount != 1 and
+ message =
+ "Instruction '" + instr.toString() + "' is a member of " + blockCount.toString() +
+ " blocks in function '$@'." and
+ irFunc = getInstructionIRFunction(instr, irFuncText)
+ )
+ }
+
+ private predicate forwardEdge(IRBlock b1, IRBlock b2) {
+ b1.getASuccessor() = b2 and
+ not b1.getBackEdgeSuccessor(_) = b2
+ }
+
+ /**
+ * Holds if `f` contains a loop in which no edge is a back edge.
+ *
+ * This check ensures we don't have too _few_ back edges.
+ */
+ query predicate containsLoopOfForwardEdges(IRFunction f, string message) {
+ exists(IRBlock block |
+ forwardEdge+(block, block) and
+ block.getEnclosingIRFunction() = f and
+ message = "Function contains a loop consisting of only forward edges."
+ )
+ }
+
+ /**
+ * Holds if `block` is reachable from its function entry point but would not
+ * be reachable by traversing only forward edges. This check is skipped for
+ * functions containing `goto` statements as the property does not generally
+ * hold there.
+ *
+ * This check ensures we don't have too _many_ back edges.
+ */
+ query predicate lostReachability(
+ IRBlock block, string message, OptionalIRFunction irFunc, string irFuncText
+ ) {
+ exists(IRFunction f, IRBlock entry |
+ entry = f.getEntryBlock() and
+ entry.getASuccessor+() = block and
+ not forwardEdge+(entry, block) and
+ not Language::hasGoto(f.getFunction()) and
+ message =
+ "Block '" + block.toString() +
+ "' is not reachable by traversing only forward edges in function '$@'." and
+ irFunc = TPresentIRFunction(f) and
+ irFuncText = irFunc.toString()
+ )
+ }
+
+ /**
+ * Holds if the number of back edges differs between the `Instruction` graph
+ * and the `IRBlock` graph.
+ */
+ query predicate backEdgeCountMismatch(OptionalIRFunction irFunc, string message) {
+ exists(int fromInstr, int fromBlock |
+ fromInstr =
+ count(Instruction i1, Instruction i2 |
+ getInstructionIRFunction(i1) = irFunc and i1.getBackEdgeSuccessor(_) = i2
+ ) and
+ fromBlock =
+ count(IRBlock b1, IRBlock b2 |
+ getBlockIRFunction(b1) = irFunc and b1.getBackEdgeSuccessor(_) = b2
+ ) and
+ fromInstr != fromBlock and
+ message =
+ "The instruction graph for function '" + irFunc.toString() + "' contains " +
+ fromInstr.toString() + " back edges, but the block graph contains " + fromBlock.toString()
+ + " back edges."
+ )
+ }
+
+ /**
+ * Gets the point in the function at which the specified operand is evaluated. For most operands,
+ * this is at the instruction that consumes the use. For a `PhiInputOperand`, the effective point
+ * of evaluation is at the end of the corresponding predecessor block.
+ */
+ private predicate pointOfEvaluation(Operand operand, IRBlock block, int index) {
+ block = operand.(PhiInputOperand).getPredecessorBlock() and
+ index = block.getInstructionCount()
+ or
+ exists(Instruction use |
+ use = operand.(NonPhiOperand).getUse() and
+ block.getInstruction(index) = use
+ )
+ }
+
+ /**
+ * Holds if `useOperand` has a definition that does not dominate the use.
+ */
+ query predicate useNotDominatedByDefinition(
+ Operand useOperand, string message, OptionalIRFunction irFunc, string irFuncText
+ ) {
+ exists(IRBlock useBlock, int useIndex, Instruction defInstr, IRBlock defBlock, int defIndex |
+ pointOfEvaluation(useOperand, useBlock, useIndex) and
+ defInstr = useOperand.getAnyDef() and
+ (
+ defInstr instanceof PhiInstruction and
+ defBlock = defInstr.getBlock() and
+ defIndex = -1
+ or
+ defBlock.getInstruction(defIndex) = defInstr
+ ) and
+ not (
+ defBlock.strictlyDominates(useBlock)
+ or
+ defBlock = useBlock and
+ defIndex < useIndex
+ ) and
+ message =
+ "Operand '" + useOperand.toString() +
+ "' is not dominated by its definition in function '$@'." and
+ irFunc = getOperandIRFunction(useOperand, irFuncText)
+ )
+ }
+
+ query predicate switchInstructionWithoutDefaultEdge(
+ SwitchInstruction switchInstr, string message, OptionalIRFunction irFunc, string irFuncText
+ ) {
+ not exists(switchInstr.getDefaultSuccessor()) and
+ message =
+ "SwitchInstruction " + switchInstr.toString() + " without a DefaultEdge in function '$@'." and
+ irFunc = getInstructionIRFunction(switchInstr, irFuncText)
+ }
+
+ /**
+ * Holds if `instr` is on the chain of chi/phi instructions for all aliased
+ * memory.
+ */
+ private predicate isOnAliasedDefinitionChain(Instruction instr) {
+ instr instanceof AliasedDefinitionInstruction
+ or
+ isOnAliasedDefinitionChain(instr.(ChiInstruction).getTotal())
+ or
+ isOnAliasedDefinitionChain(instr.(PhiInstruction).getAnInputOperand().getAnyDef())
+ }
+
+ private predicate shouldBeConflated(Instruction instr) {
+ isOnAliasedDefinitionChain(instr)
+ or
+ instr.getOpcode() instanceof Opcode::InitializeNonLocal
+ }
+
+ query predicate notMarkedAsConflated(
+ Instruction instr, string message, OptionalIRFunction irFunc, string irFuncText
+ ) {
+ shouldBeConflated(instr) and
+ not instr.isResultConflated() and
+ message =
+ "Instruction '" + instr.toString() +
+ "' should be marked as having a conflated result in function '$@'." and
+ irFunc = getInstructionIRFunction(instr, irFuncText)
+ }
+
+ query predicate wronglyMarkedAsConflated(
+ Instruction instr, string message, OptionalIRFunction irFunc, string irFuncText
+ ) {
+ instr.isResultConflated() and
+ not shouldBeConflated(instr) and
+ message =
+ "Instruction '" + instr.toString() +
+ "' should not be marked as having a conflated result in function '$@'." and
+ irFunc = getInstructionIRFunction(instr, irFuncText)
+ }
+
+ query predicate invalidOverlap(
+ MemoryOperand useOperand, string message, OptionalIRFunction irFunc, string irFuncText
+ ) {
+ exists(Overlap overlap |
+ overlap = useOperand.getDefinitionOverlap() and
+ overlap instanceof MayPartiallyOverlap and
+ message =
+ "MemoryOperand '" + useOperand.toString() + "' has a `getDefinitionOverlap()` of '" +
+ overlap.toString() + "'." and
+ irFunc = getOperandIRFunction(useOperand, irFuncText)
+ )
+ }
+
+ query predicate nonUniqueEnclosingIRFunction(
+ Instruction instr, string message, OptionalIRFunction irFunc, string irFuncText
+ ) {
+ exists(int irFuncCount |
+ irFuncCount = count(instr.getEnclosingIRFunction()) and
+ irFuncCount != 1 and
+ message =
+ "Instruction '" + instr.toString() + "' has " + irFuncCount.toString() +
+ " results for `getEnclosingIRFunction()` in function '$@'." and
+ irFunc = getInstructionIRFunction(instr, irFuncText)
+ )
+ }
+
+ /**
+ * Holds if the object address operand for the given `FieldAddress` instruction does not have an
+ * address type.
+ */
+ query predicate fieldAddressOnNonPointer(
+ FieldAddressInstruction instr, string message, OptionalIRFunction irFunc, string irFuncText
+ ) {
+ not instr.getObjectAddressOperand().getIRType() instanceof IRAddressType and
+ message =
+ "FieldAddress instruction '" + instr.toString() +
+ "' has an object address operand that is not an address, in function '$@'." and
+ irFunc = getInstructionIRFunction(instr, irFuncText)
+ }
+
+ /**
+ * Holds if the `this` argument operand for the given `Call` instruction does not have an address
+ * type.
+ */
+ query predicate thisArgumentIsNonPointer(
+ CallInstruction instr, string message, OptionalIRFunction irFunc, string irFuncText
+ ) {
+ exists(ThisArgumentOperand thisOperand | thisOperand = instr.getThisArgumentOperand() |
+ not thisOperand.getIRType() instanceof IRAddressType
+ ) and
+ message =
+ "Call instruction '" + instr.toString() +
+ "' has a `this` argument operand that is not an address, in function '$@'." and
+ irFunc = getInstructionIRFunction(instr, irFuncText)
+ }
+}
diff --git a/repo-tests/codeql/cpp/ql/lib/semmle/code/cpp/ir/implementation/aliased_ssa/IRFunction.qll b/repo-tests/codeql/cpp/ql/lib/semmle/code/cpp/ir/implementation/aliased_ssa/IRFunction.qll
new file mode 100644
index 00000000000..5968e58f90b
--- /dev/null
+++ b/repo-tests/codeql/cpp/ql/lib/semmle/code/cpp/ir/implementation/aliased_ssa/IRFunction.qll
@@ -0,0 +1,59 @@
+/**
+ * Provides the class `IRFunction`, which represents the Intermediate Representation for the
+ * definition of a function.
+ */
+
+private import internal.IRInternal
+private import internal.IRFunctionImports as Imports
+import Imports::IRFunctionBase
+import Instruction
+
+/**
+ * The IR for a function.
+ */
+class IRFunction extends IRFunctionBase {
+ /**
+ * Gets the entry point for this function.
+ */
+ pragma[noinline]
+ final EnterFunctionInstruction getEnterFunctionInstruction() {
+ result.getEnclosingIRFunction() = this
+ }
+
+ /**
+ * Gets the exit point for this function.
+ */
+ pragma[noinline]
+ final ExitFunctionInstruction getExitFunctionInstruction() {
+ result.getEnclosingIRFunction() = this
+ }
+
+ /**
+ * Gets the single return instruction for this function.
+ */
+ pragma[noinline]
+ final ReturnInstruction getReturnInstruction() { result.getEnclosingIRFunction() = this }
+
+ /**
+ * Gets the variable used to hold the return value of this function. If this
+ * function does not return a value, this predicate does not hold.
+ */
+ pragma[noinline]
+ final IRReturnVariable getReturnVariable() { result.getEnclosingIRFunction() = this }
+
+ /**
+ * Gets the block containing the entry point of this function.
+ */
+ pragma[noinline]
+ final IRBlock getEntryBlock() { result.getFirstInstruction() = getEnterFunctionInstruction() }
+
+ /**
+ * Gets all instructions in this function.
+ */
+ final Instruction getAnInstruction() { result.getEnclosingIRFunction() = this }
+
+ /**
+ * Gets all blocks in this function.
+ */
+ final IRBlock getABlock() { result.getEnclosingIRFunction() = this }
+}
diff --git a/repo-tests/codeql/cpp/ql/lib/semmle/code/cpp/ir/implementation/aliased_ssa/IRVariable.qll b/repo-tests/codeql/cpp/ql/lib/semmle/code/cpp/ir/implementation/aliased_ssa/IRVariable.qll
new file mode 100644
index 00000000000..146fc270738
--- /dev/null
+++ b/repo-tests/codeql/cpp/ql/lib/semmle/code/cpp/ir/implementation/aliased_ssa/IRVariable.qll
@@ -0,0 +1,327 @@
+/**
+ * Provides classes that represent variables accessed by the IR.
+ */
+
+private import internal.IRInternal
+import IRFunction
+private import internal.IRVariableImports as Imports
+import Imports::TempVariableTag
+private import Imports::IRUtilities
+private import Imports::TTempVariableTag
+private import Imports::TIRVariable
+private import Imports::IRType
+
+/**
+ * A variable referenced by the IR for a function.
+ *
+ * The variable may be a user-declared variable (`IRUserVariable`) or a temporary variable generated
+ * by the AST-to-IR translation (`IRTempVariable`).
+ */
+class IRVariable extends TIRVariable {
+ Language::Function func;
+
+ IRVariable() {
+ this = TIRUserVariable(_, _, func) or
+ this = TIRTempVariable(func, _, _, _) or
+ this = TIRStringLiteral(func, _, _, _) or
+ this = TIRDynamicInitializationFlag(func, _, _)
+ }
+
+ /** Gets a textual representation of this element. */
+ string toString() { none() }
+
+ /**
+ * Holds if this variable's value cannot be changed within a function. Currently used for string
+ * literals, but could also apply to `const` global and static variables.
+ */
+ predicate isReadOnly() { none() }
+
+ /**
+ * Gets the type of the variable.
+ */
+ final Language::Type getType() { getLanguageType().hasType(result, false) }
+
+ /**
+ * Gets the language-neutral type of the variable.
+ */
+ final IRType getIRType() { result = getLanguageType().getIRType() }
+
+ /**
+ * Gets the type of the variable.
+ */
+ Language::LanguageType getLanguageType() { none() }
+
+ /**
+ * Gets the AST node that declared this variable, or that introduced this
+ * variable as part of the AST-to-IR translation.
+ */
+ Language::AST getAST() { none() }
+
+ /**
+ * Gets an identifier string for the variable. This identifier is unique
+ * within the function.
+ */
+ string getUniqueId() { none() }
+
+ /**
+ * Gets the source location of this variable.
+ */
+ final Language::Location getLocation() { result = getAST().getLocation() }
+
+ /**
+ * Gets the IR for the function that references this variable.
+ */
+ final IRFunction getEnclosingIRFunction() { result.getFunction() = func }
+
+ /**
+ * Gets the function that references this variable.
+ */
+ final Language::Function getEnclosingFunction() { result = func }
+}
+
+/**
+ * A user-declared variable referenced by the IR for a function.
+ */
+class IRUserVariable extends IRVariable, TIRUserVariable {
+ Language::Variable var;
+ Language::LanguageType type;
+
+ IRUserVariable() { this = TIRUserVariable(var, type, func) }
+
+ final override string toString() { result = getVariable().toString() }
+
+ final override Language::AST getAST() { result = var }
+
+ final override string getUniqueId() {
+ result = getVariable().toString() + " " + getVariable().getLocation().toString()
+ }
+
+ final override Language::LanguageType getLanguageType() { result = type }
+
+ /**
+ * Gets the original user-declared variable.
+ */
+ Language::Variable getVariable() { result = var }
+}
+
+/**
+ * A variable (user-declared or temporary) that is allocated on the stack. This includes all
+ * parameters, non-static local variables, and temporary variables.
+ */
+class IRAutomaticVariable extends IRVariable {
+ IRAutomaticVariable() {
+ exists(Language::Variable var |
+ this = TIRUserVariable(var, _, func) and
+ Language::isVariableAutomatic(var)
+ )
+ or
+ this = TIRTempVariable(func, _, _, _)
+ }
+}
+
+/**
+ * A user-declared variable that is allocated on the stack. This includes all parameters and
+ * non-static local variables.
+ */
+class IRAutomaticUserVariable extends IRUserVariable, IRAutomaticVariable {
+ override Language::AutomaticVariable var;
+
+ final override Language::AutomaticVariable getVariable() { result = var }
+}
+
+/**
+ * A user-declared variable that is not allocated on the stack. This includes all global variables,
+ * namespace-scope variables, static fields, and static local variables.
+ */
+class IRStaticUserVariable extends IRUserVariable {
+ override Language::StaticVariable var;
+
+ IRStaticUserVariable() { not Language::isVariableAutomatic(var) }
+
+ final override Language::StaticVariable getVariable() { result = var }
+}
+
+/**
+ * A variable that is not user-declared. This includes temporary variables generated as part of IR
+ * construction, as well as string literals.
+ */
+class IRGeneratedVariable extends IRVariable {
+ Language::AST ast;
+ Language::LanguageType type;
+
+ IRGeneratedVariable() {
+ this = TIRTempVariable(func, ast, _, type) or
+ this = TIRStringLiteral(func, ast, type, _) or
+ this = TIRDynamicInitializationFlag(func, ast, type)
+ }
+
+ final override Language::LanguageType getLanguageType() { result = type }
+
+ final override Language::AST getAST() { result = ast }
+
+ override string toString() { result = getBaseString() + getLocationString() }
+
+ override string getUniqueId() { none() }
+
+ /**
+ * INTERNAL: Do not use.
+ *
+ * Gets a string containing the source code location of the AST that generated this variable.
+ *
+ * This is used by debugging and printing code only.
+ */
+ final string getLocationString() {
+ result =
+ ast.getLocation().getStartLine().toString() + ":" +
+ ast.getLocation().getStartColumn().toString()
+ }
+
+ /**
+ * INTERNAL: Do not use.
+ *
+ * Gets the string that is combined with the location of the variable to generate the string
+ * representation of this variable.
+ *
+ * This is used by debugging and printing code only.
+ */
+ string getBaseString() { none() }
+}
+
+/**
+ * A temporary variable introduced by IR construction. The most common examples are the variable
+ * generated to hold the return value of a function, or the variable generated to hold the result of
+ * a condition operator (`a ? b : c`).
+ */
+class IRTempVariable extends IRGeneratedVariable, IRAutomaticVariable, TIRTempVariable {
+ TempVariableTag tag;
+
+ IRTempVariable() { this = TIRTempVariable(func, ast, tag, type) }
+
+ final override string getUniqueId() {
+ result = "Temp: " + Construction::getTempVariableUniqueId(this)
+ }
+
+ /**
+ * Gets the "tag" object that differentiates this temporary variable from other temporary
+ * variables generated for the same AST.
+ */
+ final TempVariableTag getTag() { result = tag }
+
+ override string getBaseString() { result = "#temp" }
+}
+
+/**
+ * A temporary variable generated to hold the return value of a function.
+ */
+class IRReturnVariable extends IRTempVariable {
+ IRReturnVariable() { tag = ReturnValueTempVar() }
+
+ final override string toString() { result = "#return" }
+}
+
+/**
+ * A temporary variable generated to hold the exception thrown by a `ThrowValue` instruction.
+ */
+class IRThrowVariable extends IRTempVariable {
+ IRThrowVariable() { tag = ThrowTempVar() }
+
+ final override string getBaseString() { result = "#throw" }
+}
+
+/**
+ * A temporary variable generated to hold the contents of all arguments passed to the `...` of a
+ * function that accepts a variable number of arguments.
+ */
+class IREllipsisVariable extends IRTempVariable, IRParameter {
+ IREllipsisVariable() { tag = EllipsisTempVar() }
+
+ final override string toString() { result = "#ellipsis" }
+
+ final override int getIndex() { result = func.getNumberOfParameters() }
+}
+
+/**
+ * A temporary variable generated to hold the `this` pointer.
+ */
+class IRThisVariable extends IRTempVariable, IRParameter {
+ IRThisVariable() { tag = ThisTempVar() }
+
+ final override string toString() { result = "#this" }
+
+ final override int getIndex() { result = -1 }
+}
+
+/**
+ * A variable generated to represent the contents of a string literal. This variable acts much like
+ * a read-only global variable.
+ */
+class IRStringLiteral extends IRGeneratedVariable, TIRStringLiteral {
+ Language::StringLiteral literal;
+
+ IRStringLiteral() { this = TIRStringLiteral(func, ast, type, literal) }
+
+ final override predicate isReadOnly() { any() }
+
+ final override string getUniqueId() {
+ result = "String: " + getLocationString() + "=" + Language::getStringLiteralText(literal)
+ }
+
+ final override string getBaseString() { result = "#string" }
+
+ /**
+ * Gets the AST of the string literal represented by this `IRStringLiteral`.
+ */
+ final Language::StringLiteral getLiteral() { result = literal }
+}
+
+/**
+ * A variable generated to track whether a specific non-stack variable has been initialized. This is
+ * used to model the runtime initialization of static local variables in C++, as well as static
+ * fields in C#.
+ */
+class IRDynamicInitializationFlag extends IRGeneratedVariable, TIRDynamicInitializationFlag {
+ Language::Variable var;
+
+ IRDynamicInitializationFlag() {
+ this = TIRDynamicInitializationFlag(func, var, type) and ast = var
+ }
+
+ final override string toString() { result = var.toString() + "#init" }
+
+ /**
+ * Gets variable whose initialization is guarded by this flag.
+ */
+ final Language::Variable getVariable() { result = var }
+
+ final override string getUniqueId() {
+ result = "Init: " + getVariable().toString() + " " + getVariable().getLocation().toString()
+ }
+
+ final override string getBaseString() { result = "#init:" + var.toString() + ":" }
+}
+
+/**
+ * An IR variable which acts like a function parameter, including positional parameters and the
+ * temporary variables generated for `this` and ellipsis parameters.
+ */
+class IRParameter extends IRAutomaticVariable {
+ IRParameter() {
+ this.(IRAutomaticUserVariable).getVariable() instanceof Language::Parameter
+ or
+ this = TIRTempVariable(_, _, ThisTempVar(), _)
+ or
+ this = TIRTempVariable(_, _, EllipsisTempVar(), _)
+ }
+
+ /**
+ * Gets the zero-based index of this parameter. The `this` parameter has index -1.
+ */
+ int getIndex() { none() }
+}
+
+/**
+ * An IR variable representing a positional parameter.
+ */
+class IRPositionalParameter extends IRParameter, IRAutomaticUserVariable {
+ final override int getIndex() { result = getVariable().(Language::Parameter).getIndex() }
+}
diff --git a/repo-tests/codeql/cpp/ql/lib/semmle/code/cpp/ir/implementation/aliased_ssa/Instruction.qll b/repo-tests/codeql/cpp/ql/lib/semmle/code/cpp/ir/implementation/aliased_ssa/Instruction.qll
new file mode 100644
index 00000000000..6f471d8a7e8
--- /dev/null
+++ b/repo-tests/codeql/cpp/ql/lib/semmle/code/cpp/ir/implementation/aliased_ssa/Instruction.qll
@@ -0,0 +1,2184 @@
+/**
+ * Provides classes that represent the individual instructions in the IR for a function.
+ */
+
+private import internal.IRInternal
+import IRFunction
+import IRBlock
+import IRVariable
+import Operand
+private import internal.InstructionImports as Imports
+import Imports::EdgeKind
+import Imports::IRType
+import Imports::MemoryAccessKind
+import Imports::Opcode
+private import Imports::OperandTag
+
+/**
+ * Gets an `Instruction` that is contained in `IRFunction`, and has a location with the specified
+ * `File` and line number. Used for assigning register names when printing IR.
+ */
+private Instruction getAnInstructionAtLine(IRFunction irFunc, Language::File file, int line) {
+ exists(IRConfiguration::IRConfiguration config |
+ config.shouldEvaluateDebugStringsForFunction(irFunc.getFunction())
+ ) and
+ exists(Language::Location location |
+ irFunc = result.getEnclosingIRFunction() and
+ location = result.getLocation() and
+ file = location.getFile() and
+ line = location.getStartLine()
+ )
+}
+
+/**
+ * A single instruction in the IR.
+ */
+class Instruction extends Construction::TStageInstruction {
+ Instruction() {
+ // The base `TStageInstruction` type is a superset of the actual instructions appearing in this
+ // stage. This call lets the stage filter out the ones that are not reused from raw IR.
+ Construction::hasInstruction(this)
+ }
+
+ /** Gets a textual representation of this element. */
+ final string toString() { result = getOpcode().toString() + ": " + getAST().toString() }
+
+ /**
+ * Gets a string showing the result, opcode, and operands of the instruction, equivalent to what
+ * would be printed by PrintIR.ql. For example:
+ *
+ * `mu0_28(int) = Store r0_26, r0_27`
+ */
+ final string getDumpString() {
+ result = getResultString() + " = " + getOperationString() + " " + getOperandsString()
+ }
+
+ private predicate shouldGenerateDumpStrings() {
+ exists(IRConfiguration::IRConfiguration config |
+ config.shouldEvaluateDebugStringsForFunction(this.getEnclosingFunction())
+ )
+ }
+
+ /**
+ * Gets a string describing the operation of this instruction. This includes
+ * the opcode and the immediate value, if any. For example:
+ *
+ * VariableAddress[x]
+ */
+ final string getOperationString() {
+ shouldGenerateDumpStrings() and
+ if exists(getImmediateString())
+ then result = getOperationPrefix() + getOpcode().toString() + "[" + getImmediateString() + "]"
+ else result = getOperationPrefix() + getOpcode().toString()
+ }
+
+ /**
+ * Gets a string describing the immediate value of this instruction, if any.
+ */
+ string getImmediateString() { none() }
+
+ private string getOperationPrefix() {
+ shouldGenerateDumpStrings() and
+ if this instanceof SideEffectInstruction then result = "^" else result = ""
+ }
+
+ private string getResultPrefix() {
+ shouldGenerateDumpStrings() and
+ if getResultIRType() instanceof IRVoidType
+ then result = "v"
+ else
+ if hasMemoryResult()
+ then if isResultModeled() then result = "m" else result = "mu"
+ else result = "r"
+ }
+
+ /**
+ * Gets the zero-based index of this instruction within its block. This is
+ * used by debugging and printing code only.
+ */
+ int getDisplayIndexInBlock() {
+ shouldGenerateDumpStrings() and
+ exists(IRBlock block |
+ this = block.getInstruction(result)
+ or
+ this =
+ rank[-result - 1](PhiInstruction phiInstr |
+ phiInstr = block.getAPhiInstruction()
+ |
+ phiInstr order by phiInstr.getUniqueId()
+ )
+ )
+ }
+
+ private int getLineRank() {
+ shouldGenerateDumpStrings() and
+ this =
+ rank[result](Instruction instr |
+ instr =
+ getAnInstructionAtLine(getEnclosingIRFunction(), getLocation().getFile(),
+ getLocation().getStartLine())
+ |
+ instr order by instr.getBlock().getDisplayIndex(), instr.getDisplayIndexInBlock()
+ )
+ }
+
+ /**
+ * Gets a human-readable string that uniquely identifies this instruction
+ * within the function. This string is used to refer to this instruction when
+ * printing IR dumps.
+ *
+ * Example: `r1_1`
+ */
+ string getResultId() {
+ shouldGenerateDumpStrings() and
+ result = getResultPrefix() + getAST().getLocation().getStartLine() + "_" + getLineRank()
+ }
+
+ /**
+ * Gets a string describing the result of this instruction, suitable for
+ * display in IR dumps. This consists of the result ID plus the type of the
+ * result.
+ *
+ * Example: `r1_1(int*)`
+ */
+ final string getResultString() {
+ shouldGenerateDumpStrings() and
+ result = getResultId() + "(" + getResultLanguageType().getDumpString() + ")"
+ }
+
+ /**
+ * Gets a string describing the operands of this instruction, suitable for
+ * display in IR dumps.
+ *
+ * Example: `func:r3_4, this:r3_5`
+ */
+ string getOperandsString() {
+ shouldGenerateDumpStrings() and
+ result =
+ concat(Operand operand |
+ operand = getAnOperand()
+ |
+ operand.getDumpString(), ", " order by operand.getDumpSortOrder()
+ )
+ }
+
+ /**
+ * Gets a string identifier for this function that is unique among all
+ * instructions in the same function.
+ *
+ * This is used for sorting IR output for tests, and is likely to be
+ * inefficient for any other use.
+ */
+ final string getUniqueId() { result = Construction::getInstructionUniqueId(this) }
+
+ /**
+ * INTERNAL: Do not use.
+ *
+ * Gets two sort keys for this instruction - used to order instructions for printing
+ * in test outputs.
+ */
+ final predicate hasSortKeys(int key1, int key2) {
+ Construction::instructionHasSortKeys(this, key1, key2)
+ }
+
+ /**
+ * Gets the basic block that contains this instruction.
+ */
+ final IRBlock getBlock() { result.getAnInstruction() = this }
+
+ /**
+ * Gets the function that contains this instruction.
+ */
+ final Language::Function getEnclosingFunction() {
+ result = getEnclosingIRFunction().getFunction()
+ }
+
+ /**
+ * Gets the IRFunction object that contains the IR for this instruction.
+ */
+ final IRFunction getEnclosingIRFunction() {
+ result = Construction::getInstructionEnclosingIRFunction(this)
+ }
+
+ /**
+ * Gets the AST that caused this instruction to be generated.
+ */
+ final Language::AST getAST() { result = Construction::getInstructionAST(this) }
+
+ /**
+ * Gets the location of the source code for this instruction.
+ */
+ final Language::Location getLocation() { result = getAST().getLocation() }
+
+ /**
+ * Gets the `Expr` whose result is computed by this instruction, if any. The `Expr` may be a
+ * conversion.
+ */
+ final Language::Expr getConvertedResultExpression() {
+ result = Raw::getInstructionConvertedResultExpression(this)
+ }
+
+ /**
+ * Gets the unconverted form of the `Expr` whose result is computed by this instruction, if any.
+ */
+ final Language::Expr getUnconvertedResultExpression() {
+ result = Raw::getInstructionUnconvertedResultExpression(this)
+ }
+
+ /**
+ * Gets the language-specific type of the result produced by this instruction.
+ *
+ * Most consumers of the IR should use `getResultIRType()` instead. `getResultIRType()` uses a
+ * less complex, language-neutral type system in which all semantically equivalent types share the
+ * same `IRType` instance. For example, in C++, four different `Instruction`s might have three
+ * different values for `getResultLanguageType()`: `unsigned int`, `char32_t`, and `wchar_t`,
+ * whereas all four instructions would have the same value for `getResultIRType()`, `uint4`.
+ */
+ final Language::LanguageType getResultLanguageType() {
+ result = Construction::getInstructionResultType(this)
+ }
+
+ /**
+ * Gets the type of the result produced by this instruction. If the instruction does not produce
+ * a result, its result type will be `IRVoidType`.
+ */
+ cached
+ final IRType getResultIRType() { result = getResultLanguageType().getIRType() }
+
+ /**
+ * Gets the type of the result produced by this instruction. If the
+ * instruction does not produce a result, its result type will be `VoidType`.
+ *
+ * If `isGLValue()` holds, then the result type of this instruction should be
+ * thought of as "pointer to `getResultType()`".
+ */
+ final Language::Type getResultType() {
+ exists(Language::LanguageType resultType |
+ resultType = getResultLanguageType() and
+ (
+ resultType.hasUnspecifiedType(result, _)
+ or
+ not resultType.hasUnspecifiedType(_, _) and result instanceof Language::UnknownType
+ )
+ )
+ }
+
+ /**
+ * Holds if the result produced by this instruction is a glvalue. If this
+ * holds, the result of the instruction represents the address of a location,
+ * and the type of the location is given by `getResultType()`. If this does
+ * not hold, the result of the instruction represents a value whose type is
+ * given by `getResultType()`.
+ *
+ * For example, the statement `y = x;` generates the following IR:
+ * ```
+ * r1_0(glval: int) = VariableAddress[x]
+ * r1_1(int) = Load r1_0, mu0_1
+ * r1_2(glval: int) = VariableAddress[y]
+ * mu1_3(int) = Store r1_2, r1_1
+ * ```
+ *
+ * The result of each `VariableAddress` instruction is a glvalue of type
+ * `int`, representing the address of the corresponding integer variable. The
+ * result of the `Load` instruction is a prvalue of type `int`, representing
+ * the integer value loaded from variable `x`.
+ */
+ final predicate isGLValue() { getResultLanguageType().hasType(_, true) }
+
+ /**
+ * Gets the size of the result produced by this instruction, in bytes. If the
+ * result does not have a known constant size, this predicate does not hold.
+ *
+ * If `this.isGLValue()` holds for this instruction, the value of
+ * `getResultSize()` will always be the size of a pointer.
+ */
+ final int getResultSize() { result = getResultLanguageType().getByteSize() }
+
+ /**
+ * Gets the opcode that specifies the operation performed by this instruction.
+ */
+ pragma[inline]
+ final Opcode getOpcode() { Construction::getInstructionOpcode(result, this) }
+
+ /**
+ * Gets all direct uses of the result of this instruction. The result can be
+ * an `Operand` for which `isDefinitionInexact` holds.
+ */
+ final Operand getAUse() { result.getAnyDef() = this }
+
+ /**
+ * Gets all of this instruction's operands.
+ */
+ final Operand getAnOperand() { result.getUse() = this }
+
+ /**
+ * Holds if this instruction produces a memory result.
+ */
+ final predicate hasMemoryResult() { exists(getResultMemoryAccess()) }
+
+ /**
+ * Gets the kind of memory access performed by this instruction's result.
+ * Holds only for instructions with a memory result.
+ */
+ pragma[inline]
+ final MemoryAccessKind getResultMemoryAccess() { result = getOpcode().getWriteMemoryAccess() }
+
+ /**
+ * Holds if the memory access performed by this instruction's result will not always write to
+ * every bit in the memory location. This is most commonly used for memory accesses that may or
+ * may not actually occur depending on runtime state (for example, the write side effect of an
+ * output parameter that is not written to on all paths), or for accesses where the memory
+ * location is a conservative estimate of the memory that might actually be accessed at runtime
+ * (for example, the global side effects of a function call).
+ */
+ pragma[inline]
+ final predicate hasResultMayMemoryAccess() { getOpcode().hasMayWriteMemoryAccess() }
+
+ /**
+ * Gets the operand that holds the memory address to which this instruction stores its
+ * result, if any. For example, in `m3 = Store r1, r2`, the result of `getResultAddressOperand()`
+ * is `r1`.
+ */
+ final AddressOperand getResultAddressOperand() {
+ getResultMemoryAccess().usesAddressOperand() and
+ result.getUse() = this
+ }
+
+ /**
+ * Gets the instruction that holds the exact memory address to which this instruction stores its
+ * result, if any. For example, in `m3 = Store r1, r2`, the result of `getResultAddressOperand()`
+ * is the instruction that defines `r1`.
+ */
+ final Instruction getResultAddress() { result = getResultAddressOperand().getDef() }
+
+ /**
+ * Holds if the result of this instruction is precisely modeled in SSA. Always
+ * holds for a register result. For a memory result, a modeled result is
+ * connected to its actual uses. An unmodeled result has no uses.
+ *
+ * For example:
+ * ```
+ * int x = 1;
+ * int *p = &x;
+ * int y = *p;
+ * ```
+ * In non-aliased SSA, `x` will not be modeled because it has its address
+ * taken. In that case, `isResultModeled()` would not hold for the result of
+ * the `Store` to `x`.
+ */
+ final predicate isResultModeled() {
+ // Register results are always in SSA form.
+ not hasMemoryResult() or
+ Construction::hasModeledMemoryResult(this)
+ }
+
+ /**
+ * Holds if this is an instruction with a memory result that represents a
+ * conflation of more than one memory allocation.
+ *
+ * This happens in practice when dereferencing a pointer that cannot be
+ * tracked back to a single local allocation. Such memory is instead modeled
+ * as originating on the `AliasedDefinitionInstruction` at the entry of the
+ * function.
+ */
+ final predicate isResultConflated() { Construction::hasConflatedMemoryResult(this) }
+
+ /**
+ * Gets the successor of this instruction along the control flow edge
+ * specified by `kind`.
+ */
+ final Instruction getSuccessor(EdgeKind kind) {
+ result = Construction::getInstructionSuccessor(this, kind)
+ }
+
+ /**
+ * Gets the a _back-edge successor_ of this instruction along the control
+ * flow edge specified by `kind`. A back edge in the control-flow graph is
+ * intuitively the edge that goes back around a loop. If all back edges are
+ * removed from the control-flow graph, it becomes acyclic.
+ */
+ final Instruction getBackEdgeSuccessor(EdgeKind kind) {
+ // We don't take these edges from
+ // `Construction::getInstructionBackEdgeSuccessor` since that relation has
+ // not been treated to remove any loops that might be left over due to
+ // flaws in the IR construction or back-edge detection.
+ exists(IRBlock block |
+ block = this.getBlock() and
+ this = block.getLastInstruction() and
+ result = block.getBackEdgeSuccessor(kind).getFirstInstruction()
+ )
+ }
+
+ /**
+ * Gets all direct successors of this instruction.
+ */
+ final Instruction getASuccessor() { result = getSuccessor(_) }
+
+ /**
+ * Gets a predecessor of this instruction such that the predecessor reaches
+ * this instruction along the control flow edge specified by `kind`.
+ */
+ final Instruction getPredecessor(EdgeKind kind) { result.getSuccessor(kind) = this }
+
+ /**
+ * Gets all direct predecessors of this instruction.
+ */
+ final Instruction getAPredecessor() { result = getPredecessor(_) }
+}
+
+/**
+ * An instruction that refers to a variable.
+ *
+ * This class is used for any instruction whose operation fundamentally depends on a specific
+ * variable. For example, it is used for `VariableAddress`, which returns the address of a specific
+ * variable, and `InitializeParameter`, which returns the value that was passed to the specified
+ * parameter by the caller. `VariableInstruction` is not used for `Load` or `Store` instructions
+ * that happen to load from or store to a particular variable; in those cases, the memory location
+ * being accessed is specified by the `AddressOperand` on the instruction, which may or may not be
+ * defined by the result of a `VariableAddress` instruction.
+ */
+class VariableInstruction extends Instruction {
+ IRVariable var;
+
+ VariableInstruction() { var = Raw::getInstructionVariable(this) }
+
+ override string getImmediateString() { result = var.toString() }
+
+ /**
+ * Gets the variable that this instruction references.
+ */
+ final IRVariable getIRVariable() { result = var }
+
+ /**
+ * Gets the AST variable that this instruction's IR variable refers to, if one exists.
+ */
+ final Language::Variable getASTVariable() { result = var.(IRUserVariable).getVariable() }
+}
+
+/**
+ * An instruction that refers to a field of a class, struct, or union.
+ *
+ * This class is used for any instruction whose operation fundamentally depends on a specific
+ * field. For example, it is used for `FieldAddress`, which computes the address of a specific
+ * field on an object. `FieldInstruction` is not used for `Load` or `Store` instructions that happen
+ * to load from or store to a particular field; in those cases, the memory location being accessed
+ * is specified by the `AddressOperand` on the instruction, which may or may not be defined by the
+ * result of a `FieldAddress` instruction.
+ */
+class FieldInstruction extends Instruction {
+ Language::Field field;
+
+ FieldInstruction() { field = Raw::getInstructionField(this) }
+
+ final override string getImmediateString() { result = field.toString() }
+
+ /**
+ * Gets the field that this instruction references.
+ */
+ final Language::Field getField() { result = field }
+}
+
+/**
+ * An instruction that refers to a function.
+ *
+ * This class is used for any instruction whose operation fundamentally depends on a specific
+ * function. For example, it is used for `FunctionAddress`, which returns the address of a specific
+ * function. `FunctionInstruction` is not used for `Call` instructions that happen to call a
+ * particular function; in that case, the function being called is specified by the
+ * `CallTargetOperand` on the instruction, which may or may not be defined by the result of a
+ * `FunctionAddress` instruction.
+ */
+class FunctionInstruction extends Instruction {
+ Language::Function funcSymbol;
+
+ FunctionInstruction() { funcSymbol = Raw::getInstructionFunction(this) }
+
+ final override string getImmediateString() { result = funcSymbol.toString() }
+
+ /**
+ * Gets the function that this instruction references.
+ */
+ final Language::Function getFunctionSymbol() { result = funcSymbol }
+}
+
+/**
+ * An instruction whose result is a compile-time constant value.
+ */
+class ConstantValueInstruction extends Instruction {
+ string value;
+
+ ConstantValueInstruction() { value = Raw::getInstructionConstantValue(this) }
+
+ final override string getImmediateString() { result = value }
+
+ /**
+ * Gets the constant value of this instruction's result.
+ */
+ final string getValue() { result = value }
+}
+
+/**
+ * An instruction that refers to an argument of a `Call` instruction.
+ *
+ * This instruction is used for side effects of a `Call` instruction that read or write memory
+ * pointed to by one of the arguments of the call.
+ */
+class IndexedInstruction extends Instruction {
+ int index;
+
+ IndexedInstruction() { index = Raw::getInstructionIndex(this) }
+
+ final override string getImmediateString() { result = index.toString() }
+
+ /**
+ * Gets the zero-based index of the argument that this instruction references.
+ */
+ final int getIndex() { result = index }
+}
+
+/**
+ * An instruction representing the entry point to a function.
+ *
+ * Each `IRFunction` has exactly one `EnterFunction` instruction. Execution of the function begins
+ * at this instruction. This instruction has no predecessors.
+ */
+class EnterFunctionInstruction extends Instruction {
+ EnterFunctionInstruction() { getOpcode() instanceof Opcode::EnterFunction }
+}
+
+/**
+ * An instruction that returns the address of a variable.
+ *
+ * This instruction returns the address of a local variable, parameter, static field,
+ * namespace-scope variable, or global variable. For the address of a non-static field of a class,
+ * struct, or union, see `FieldAddressInstruction`.
+ */
+class VariableAddressInstruction extends VariableInstruction {
+ VariableAddressInstruction() { getOpcode() instanceof Opcode::VariableAddress }
+}
+
+/**
+ * An instruction that returns the address of a function.
+ *
+ * This instruction returns the address of a function, including non-member functions, static member
+ * functions, and non-static member functions.
+ *
+ * The result has an `IRFunctionAddress` type.
+ */
+class FunctionAddressInstruction extends FunctionInstruction {
+ FunctionAddressInstruction() { getOpcode() instanceof Opcode::FunctionAddress }
+}
+
+/**
+ * An instruction that initializes a parameter of the enclosing function with the value of the
+ * corresponding argument passed by the caller.
+ *
+ * Each parameter of a function will have exactly one `InitializeParameter` instruction that
+ * initializes that parameter.
+ */
+class InitializeParameterInstruction extends VariableInstruction {
+ InitializeParameterInstruction() { getOpcode() instanceof Opcode::InitializeParameter }
+
+ /**
+ * Gets the parameter initialized by this instruction.
+ */
+ final Language::Parameter getParameter() { result = var.(IRUserVariable).getVariable() }
+
+ /**
+ * Holds if this instruction initializes the parameter with index `index`, or
+ * if `index` is `-1` and this instruction initializes `this`.
+ */
+ pragma[noinline]
+ final predicate hasIndex(int index) {
+ index >= 0 and index = this.getParameter().getIndex()
+ or
+ index = -1 and this.getIRVariable() instanceof IRThisVariable
+ }
+}
+
+/**
+ * An instruction that initializes all memory that existed before this function was called.
+ *
+ * This instruction provides a definition for memory that, because it was actually allocated and
+ * initialized elsewhere, would not otherwise have a definition in this function.
+ */
+class InitializeNonLocalInstruction extends Instruction {
+ InitializeNonLocalInstruction() { getOpcode() instanceof Opcode::InitializeNonLocal }
+}
+
+/**
+ * An instruction that initializes the memory pointed to by a parameter of the enclosing function
+ * with the value of that memory on entry to the function.
+ */
+class InitializeIndirectionInstruction extends VariableInstruction {
+ InitializeIndirectionInstruction() { getOpcode() instanceof Opcode::InitializeIndirection }
+
+ /**
+ * Gets the parameter initialized by this instruction.
+ */
+ final Language::Parameter getParameter() { result = var.(IRUserVariable).getVariable() }
+
+ /**
+ * Holds if this instruction initializes the memory pointed to by the parameter with
+ * index `index`, or if `index` is `-1` and this instruction initializes the memory
+ * pointed to by `this`.
+ */
+ pragma[noinline]
+ final predicate hasIndex(int index) {
+ index >= 0 and index = this.getParameter().getIndex()
+ or
+ index = -1 and this.getIRVariable() instanceof IRThisVariable
+ }
+}
+
+/**
+ * An instruction that initializes the `this` pointer parameter of the enclosing function.
+ */
+class InitializeThisInstruction extends Instruction {
+ InitializeThisInstruction() { getOpcode() instanceof Opcode::InitializeThis }
+}
+
+/**
+ * An instruction that computes the address of a non-static field of an object.
+ */
+class FieldAddressInstruction extends FieldInstruction {
+ FieldAddressInstruction() { getOpcode() instanceof Opcode::FieldAddress }
+
+ /**
+ * Gets the operand that provides the address of the object containing the field.
+ */
+ final UnaryOperand getObjectAddressOperand() { result = getAnOperand() }
+
+ /**
+ * Gets the instruction whose result provides the address of the object containing the field.
+ */
+ final Instruction getObjectAddress() { result = getObjectAddressOperand().getDef() }
+}
+
+/**
+ * An instruction that computes the address of the first element of a managed array.
+ *
+ * This instruction is used for element access to C# arrays.
+ */
+class ElementsAddressInstruction extends UnaryInstruction {
+ ElementsAddressInstruction() { getOpcode() instanceof Opcode::ElementsAddress }
+
+ /**
+ * Gets the operand that provides the address of the array object.
+ */
+ final UnaryOperand getArrayObjectAddressOperand() { result = getAnOperand() }
+
+ /**
+ * Gets the instruction whose result provides the address of the array object.
+ */
+ final Instruction getArrayObjectAddress() { result = getArrayObjectAddressOperand().getDef() }
+}
+
+/**
+ * An instruction that produces a well-defined but unknown result and has
+ * unknown side effects, including side effects that are not conservatively
+ * modeled in the SSA graph.
+ *
+ * This type of instruction appears when there is an `ErrorExpr` in the AST,
+ * meaning that the extractor could not understand the expression and therefore
+ * produced a partial AST. Queries that give alerts when some action is _not_
+ * taken may want to ignore any function that contains an `ErrorInstruction`.
+ */
+class ErrorInstruction extends Instruction {
+ ErrorInstruction() { getOpcode() instanceof Opcode::Error }
+}
+
+/**
+ * An instruction that returns an uninitialized value.
+ *
+ * This instruction is used to provide an initial definition for a stack variable that does not have
+ * an initializer, or whose initializer only partially initializes the variable.
+ */
+class UninitializedInstruction extends VariableInstruction {
+ UninitializedInstruction() { getOpcode() instanceof Opcode::Uninitialized }
+
+ /**
+ * Gets the variable that is uninitialized.
+ */
+ final Language::Variable getLocalVariable() { result = var.(IRUserVariable).getVariable() }
+}
+
+/**
+ * An instruction that has no effect.
+ *
+ * This instruction is typically inserted to ensure that a particular AST is associated with at
+ * least one instruction, even when the AST has no semantic effect.
+ */
+class NoOpInstruction extends Instruction {
+ NoOpInstruction() { getOpcode() instanceof Opcode::NoOp }
+}
+
+/**
+ * An instruction that returns control to the caller of the function.
+ *
+ * This instruction represents the normal (non-exception) return from a function, either from an
+ * explicit `return` statement or from control flow reaching the end of the function's body.
+ *
+ * Each function has exactly one `ReturnInstruction`. Each `return` statement in a function is
+ * represented as an initialization of the temporary variable that holds the return value, with
+ * control then flowing to the common `ReturnInstruction` for that function. Exception: A function
+ * that never returns will not have a `ReturnInstruction`.
+ *
+ * The `ReturnInstruction` for a function will have a control-flow successor edge to a block
+ * containing the `ExitFunction` instruction for that function.
+ *
+ * There are two differet return instructions: `ReturnValueInstruction`, for returning a value from
+ * a non-`void`-returning function, and `ReturnVoidInstruction`, for returning from a
+ * `void`-returning function.
+ */
+class ReturnInstruction extends Instruction {
+ ReturnInstruction() { getOpcode() instanceof ReturnOpcode }
+}
+
+/**
+ * An instruction that returns control to the caller of the function, without returning a value.
+ */
+class ReturnVoidInstruction extends ReturnInstruction {
+ ReturnVoidInstruction() { getOpcode() instanceof Opcode::ReturnVoid }
+}
+
+/**
+ * An instruction that returns control to the caller of the function, including a return value.
+ */
+class ReturnValueInstruction extends ReturnInstruction {
+ ReturnValueInstruction() { getOpcode() instanceof Opcode::ReturnValue }
+
+ /**
+ * Gets the operand that provides the value being returned by the function.
+ */
+ final LoadOperand getReturnValueOperand() { result = getAnOperand() }
+
+ /**
+ * Gets the instruction whose result provides the value being returned by the function, if an
+ * exact definition is available.
+ */
+ final Instruction getReturnValue() { result = getReturnValueOperand().getDef() }
+}
+
+/**
+ * An instruction that represents the use of the value pointed to by a parameter of the function
+ * after the function returns control to its caller.
+ *
+ * This instruction does not itself return control to the caller. It merely represents the potential
+ * for a caller to use the memory pointed to by the parameter sometime after the call returns. This
+ * is the counterpart to the `InitializeIndirection` instruction, which represents the possibility
+ * that the caller initialized the memory pointed to by the parameter before the call.
+ */
+class ReturnIndirectionInstruction extends VariableInstruction {
+ ReturnIndirectionInstruction() { getOpcode() instanceof Opcode::ReturnIndirection }
+
+ /**
+ * Gets the operand that provides the value of the pointed-to memory.
+ */
+ final SideEffectOperand getSideEffectOperand() { result = getAnOperand() }
+
+ /**
+ * Gets the instruction whose result provides the value of the pointed-to memory, if an exact
+ * definition is available.
+ */
+ final Instruction getSideEffect() { result = getSideEffectOperand().getDef() }
+
+ /**
+ * Gets the operand that provides the address of the pointed-to memory.
+ */
+ final AddressOperand getSourceAddressOperand() { result = getAnOperand() }
+
+ /**
+ * Gets the instruction whose result provides the address of the pointed-to memory.
+ */
+ final Instruction getSourceAddress() { result = getSourceAddressOperand().getDef() }
+
+ /**
+ * Gets the parameter for which this instruction reads the final pointed-to value within the
+ * function.
+ */
+ final Language::Parameter getParameter() { result = var.(IRUserVariable).getVariable() }
+
+ /**
+ * Holds if this instruction is the return indirection for `this`.
+ */
+ final predicate isThisIndirection() { var instanceof IRThisVariable }
+
+ /**
+ * Holds if this instruction is the return indirection for the parameter with index `index`, or
+ * if this instruction is the return indirection for `this` and `index` is `-1`.
+ */
+ pragma[noinline]
+ final predicate hasIndex(int index) {
+ index >= 0 and index = this.getParameter().getIndex()
+ or
+ index = -1 and this.isThisIndirection()
+ }
+}
+
+/**
+ * An instruction that returns a copy of its operand.
+ *
+ * There are several different copy instructions, depending on the source and destination of the
+ * copy operation:
+ * - `CopyValueInstruction` - Copies a register operand to a register result.
+ * - `LoadInstruction` - Copies a memory operand to a register result.
+ * - `StoreInstruction` - Copies a register operand to a memory result.
+ */
+class CopyInstruction extends Instruction {
+ CopyInstruction() { getOpcode() instanceof CopyOpcode }
+
+ /**
+ * Gets the operand that provides the input value of the copy.
+ */
+ Operand getSourceValueOperand() { none() }
+
+ /**
+ * Gets the instruction whose result provides the input value of the copy, if an exact definition
+ * is available.
+ */
+ final Instruction getSourceValue() { result = getSourceValueOperand().getDef() }
+}
+
+/**
+ * An instruction that returns a register result containing a copy of its register operand.
+ */
+class CopyValueInstruction extends CopyInstruction, UnaryInstruction {
+ CopyValueInstruction() { getOpcode() instanceof Opcode::CopyValue }
+
+ final override UnaryOperand getSourceValueOperand() { result = getAnOperand() }
+}
+
+/**
+ * Gets a string describing the location pointed to by the specified address operand.
+ */
+private string getAddressOperandDescription(AddressOperand operand) {
+ result = operand.getDef().(VariableAddressInstruction).getIRVariable().toString()
+ or
+ not operand.getDef() instanceof VariableAddressInstruction and
+ result = "?"
+}
+
+/**
+ * An instruction that returns a register result containing a copy of its memory operand.
+ */
+class LoadInstruction extends CopyInstruction {
+ LoadInstruction() { getOpcode() instanceof Opcode::Load }
+
+ final override string getImmediateString() {
+ result = getAddressOperandDescription(getSourceAddressOperand())
+ }
+
+ /**
+ * Gets the operand that provides the address of the value being loaded.
+ */
+ final AddressOperand getSourceAddressOperand() { result = getAnOperand() }
+
+ /**
+ * Gets the instruction whose result provides the address of the value being loaded.
+ */
+ final Instruction getSourceAddress() { result = getSourceAddressOperand().getDef() }
+
+ final override LoadOperand getSourceValueOperand() { result = getAnOperand() }
+}
+
+/**
+ * An instruction that returns a memory result containing a copy of its register operand.
+ */
+class StoreInstruction extends CopyInstruction {
+ StoreInstruction() { getOpcode() instanceof Opcode::Store }
+
+ final override string getImmediateString() {
+ result = getAddressOperandDescription(getDestinationAddressOperand())
+ }
+
+ /**
+ * Gets the operand that provides the address of the location to which the value will be stored.
+ */
+ final AddressOperand getDestinationAddressOperand() { result = getAnOperand() }
+
+ /**
+ * Gets the instruction whose result provides the address of the location to which the value will
+ * be stored, if an exact definition is available.
+ */
+ final Instruction getDestinationAddress() { result = getDestinationAddressOperand().getDef() }
+
+ final override StoreValueOperand getSourceValueOperand() { result = getAnOperand() }
+}
+
+/**
+ * An instruction that branches to one of two successor instructions based on the value of a Boolean
+ * operand.
+ */
+class ConditionalBranchInstruction extends Instruction {
+ ConditionalBranchInstruction() { getOpcode() instanceof Opcode::ConditionalBranch }
+
+ /**
+ * Gets the operand that provides the Boolean condition controlling the branch.
+ */
+ final ConditionOperand getConditionOperand() { result = getAnOperand() }
+
+ /**
+ * Gets the instruction whose result provides the Boolean condition controlling the branch.
+ */
+ final Instruction getCondition() { result = getConditionOperand().getDef() }
+
+ /**
+ * Gets the instruction to which control will flow if the condition is true.
+ */
+ final Instruction getTrueSuccessor() { result = getSuccessor(EdgeKind::trueEdge()) }
+
+ /**
+ * Gets the instruction to which control will flow if the condition is false.
+ */
+ final Instruction getFalseSuccessor() { result = getSuccessor(EdgeKind::falseEdge()) }
+}
+
+/**
+ * An instruction representing the exit point of a function.
+ *
+ * Each `IRFunction` has exactly one `ExitFunction` instruction, unless the function neither returns
+ * nor throws an exception. Control flows to the `ExitFunction` instruction from both normal returns
+ * (`ReturnVoid`, `ReturnValue`) and propagated exceptions (`Unwind`). This instruction has no
+ * successors.
+ */
+class ExitFunctionInstruction extends Instruction {
+ ExitFunctionInstruction() { getOpcode() instanceof Opcode::ExitFunction }
+}
+
+/**
+ * An instruction whose result is a constant value.
+ */
+class ConstantInstruction extends ConstantValueInstruction {
+ ConstantInstruction() { getOpcode() instanceof Opcode::Constant }
+}
+
+/**
+ * An instruction whose result is a constant value of integer or Boolean type.
+ */
+class IntegerConstantInstruction extends ConstantInstruction {
+ IntegerConstantInstruction() {
+ exists(IRType resultType |
+ resultType = getResultIRType() and
+ (resultType instanceof IRIntegerType or resultType instanceof IRBooleanType)
+ )
+ }
+}
+
+/**
+ * An instruction whose result is a constant value of floating-point type.
+ */
+class FloatConstantInstruction extends ConstantInstruction {
+ FloatConstantInstruction() { getResultIRType() instanceof IRFloatingPointType }
+}
+
+/**
+ * An instruction whose result is the address of a string literal.
+ */
+class StringConstantInstruction extends VariableInstruction {
+ override IRStringLiteral var;
+
+ final override string getImmediateString() { result = Language::getStringLiteralText(getValue()) }
+
+ /**
+ * Gets the string literal whose address is returned by this instruction.
+ */
+ final Language::StringLiteral getValue() { result = var.getLiteral() }
+}
+
+/**
+ * An instruction whose result is computed from two operands.
+ */
+class BinaryInstruction extends Instruction {
+ BinaryInstruction() { getOpcode() instanceof BinaryOpcode }
+
+ /**
+ * Gets the left operand of this binary instruction.
+ */
+ final LeftOperand getLeftOperand() { result = getAnOperand() }
+
+ /**
+ * Gets the right operand of this binary instruction.
+ */
+ final RightOperand getRightOperand() { result = getAnOperand() }
+
+ /**
+ * Gets the instruction whose result provides the value of the left operand of this binary
+ * instruction.
+ */
+ final Instruction getLeft() { result = getLeftOperand().getDef() }
+
+ /**
+ * Gets the instruction whose result provides the value of the right operand of this binary
+ * instruction.
+ */
+ final Instruction getRight() { result = getRightOperand().getDef() }
+
+ /**
+ * Holds if this instruction's operands are `op1` and `op2`, in either order.
+ */
+ final predicate hasOperands(Operand op1, Operand op2) {
+ op1 = getLeftOperand() and op2 = getRightOperand()
+ or
+ op1 = getRightOperand() and op2 = getLeftOperand()
+ }
+}
+
+/**
+ * An instruction that computes the result of an arithmetic operation.
+ */
+class ArithmeticInstruction extends Instruction {
+ ArithmeticInstruction() { getOpcode() instanceof ArithmeticOpcode }
+}
+
+/**
+ * An instruction that performs an arithmetic operation on two numeric operands.
+ */
+class BinaryArithmeticInstruction extends ArithmeticInstruction, BinaryInstruction { }
+
+/**
+ * An instruction whose result is computed by performing an arithmetic operation on a single
+ * numeric operand.
+ */
+class UnaryArithmeticInstruction extends ArithmeticInstruction, UnaryInstruction { }
+
+/**
+ * An instruction that computes the sum of two numeric operands.
+ *
+ * Both operands must have the same numeric type, which will also be the result type. The result of
+ * integer overflow is the infinite-precision result modulo 2^n. Floating-point addition is
+ * performed according to IEEE-754.
+ */
+class AddInstruction extends BinaryArithmeticInstruction {
+ AddInstruction() { getOpcode() instanceof Opcode::Add }
+}
+
+/**
+ * An instruction that computes the difference of two numeric operands.
+ *
+ * Both operands must have the same numeric type, which will also be the result type. The result of
+ * integer overflow is the infinite-precision result modulo 2^n. Floating-point subtraction is performed
+ * according to IEEE-754.
+ */
+class SubInstruction extends BinaryArithmeticInstruction {
+ SubInstruction() { getOpcode() instanceof Opcode::Sub }
+}
+
+/**
+ * An instruction that computes the product of two numeric operands.
+ *
+ * Both operands must have the same numeric type, which will also be the result type. The result of
+ * integer overflow is the infinite-precision result modulo 2^n. Floating-point multiplication is
+ * performed according to IEEE-754.
+ */
+class MulInstruction extends BinaryArithmeticInstruction {
+ MulInstruction() { getOpcode() instanceof Opcode::Mul }
+}
+
+/**
+ * An instruction that computes the quotient of two numeric operands.
+ *
+ * Both operands must have the same numeric type, which will also be the result type. The result of
+ * division by zero or integer overflow is undefined. Floating-point division is performed according
+ * to IEEE-754.
+ */
+class DivInstruction extends BinaryArithmeticInstruction {
+ DivInstruction() { getOpcode() instanceof Opcode::Div }
+}
+
+/**
+ * An instruction that computes the remainder of two integer operands.
+ *
+ * Both operands must have the same integer type, which will also be the result type. The result of
+ * division by zero or integer overflow is undefined.
+ */
+class RemInstruction extends BinaryArithmeticInstruction {
+ RemInstruction() { getOpcode() instanceof Opcode::Rem }
+}
+
+/**
+ * An instruction that negates a single numeric operand.
+ *
+ * The operand must have a numeric type, which will also be the result type. The result of integer
+ * negation uses two's complement, and is computed modulo 2^n. The result of floating-point negation
+ * is performed according to IEEE-754.
+ */
+class NegateInstruction extends UnaryArithmeticInstruction {
+ NegateInstruction() { getOpcode() instanceof Opcode::Negate }
+}
+
+/**
+ * An instruction that computes the result of a bitwise operation.
+ */
+class BitwiseInstruction extends Instruction {
+ BitwiseInstruction() { getOpcode() instanceof BitwiseOpcode }
+}
+
+/**
+ * An instruction that performs a bitwise operation on two integer operands.
+ */
+class BinaryBitwiseInstruction extends BitwiseInstruction, BinaryInstruction { }
+
+/**
+ * An instruction that performs a bitwise operation on a single integer operand.
+ */
+class UnaryBitwiseInstruction extends BitwiseInstruction, UnaryInstruction { }
+
+/**
+ * An instruction that computes the bitwise "and" of two integer operands.
+ *
+ * Both operands must have the same integer type, which will also be the result type.
+ */
+class BitAndInstruction extends BinaryBitwiseInstruction {
+ BitAndInstruction() { getOpcode() instanceof Opcode::BitAnd }
+}
+
+/**
+ * An instruction that computes the bitwise "or" of two integer operands.
+ *
+ * Both operands must have the same integer type, which will also be the result type.
+ */
+class BitOrInstruction extends BinaryBitwiseInstruction {
+ BitOrInstruction() { getOpcode() instanceof Opcode::BitOr }
+}
+
+/**
+ * An instruction that computes the bitwise "xor" of two integer operands.
+ *
+ * Both operands must have the same integer type, which will also be the result type.
+ */
+class BitXorInstruction extends BinaryBitwiseInstruction {
+ BitXorInstruction() { getOpcode() instanceof Opcode::BitXor }
+}
+
+/**
+ * An instruction that shifts its left operand to the left by the number of bits specified by its
+ * right operand.
+ *
+ * Both operands must have an integer type. The result has the same type as the left operand. The
+ * rightmost bits are zero-filled.
+ */
+class ShiftLeftInstruction extends BinaryBitwiseInstruction {
+ ShiftLeftInstruction() { getOpcode() instanceof Opcode::ShiftLeft }
+}
+
+/**
+ * An instruction that shifts its left operand to the right by the number of bits specified by its
+ * right operand.
+ *
+ * Both operands must have an integer type. The result has the same type as the left operand. If the
+ * left operand has an unsigned integer type, the leftmost bits are zero-filled. If the left operand
+ * has a signed integer type, the leftmost bits are filled by duplicating the most significant bit
+ * of the left operand.
+ */
+class ShiftRightInstruction extends BinaryBitwiseInstruction {
+ ShiftRightInstruction() { getOpcode() instanceof Opcode::ShiftRight }
+}
+
+/**
+ * An instruction that performs a binary arithmetic operation involving at least one pointer
+ * operand.
+ */
+class PointerArithmeticInstruction extends BinaryInstruction {
+ int elementSize;
+
+ PointerArithmeticInstruction() {
+ getOpcode() instanceof PointerArithmeticOpcode and
+ elementSize = Raw::getInstructionElementSize(this)
+ }
+
+ final override string getImmediateString() { result = elementSize.toString() }
+
+ /**
+ * Gets the size of the elements pointed to by the pointer operands, in bytes.
+ *
+ * When adding an integer offset to a pointer (`PointerAddInstruction`) or subtracting an integer
+ * offset from a pointer (`PointerSubInstruction`), the integer offset is multiplied by the
+ * element size to compute the actual number of bytes added to or subtracted from the pointer
+ * address. When computing the integer difference between two pointers (`PointerDiffInstruction`),
+ * the result is computed by computing the difference between the two pointer byte addresses, then
+ * dividing that byte count by the element size.
+ */
+ final int getElementSize() { result = elementSize }
+}
+
+/**
+ * An instruction that adds or subtracts an integer offset from a pointer.
+ */
+class PointerOffsetInstruction extends PointerArithmeticInstruction {
+ PointerOffsetInstruction() { getOpcode() instanceof PointerOffsetOpcode }
+}
+
+/**
+ * An instruction that adds an integer offset to a pointer.
+ *
+ * The result is the byte address computed by adding the value of the right (integer) operand,
+ * multiplied by the element size, to the value of the left (pointer) operand. The result of pointer
+ * overflow is undefined.
+ */
+class PointerAddInstruction extends PointerOffsetInstruction {
+ PointerAddInstruction() { getOpcode() instanceof Opcode::PointerAdd }
+}
+
+/**
+ * An instruction that subtracts an integer offset from a pointer.
+ *
+ * The result is the byte address computed by subtracting the value of the right (integer) operand,
+ * multiplied by the element size, from the value of the left (pointer) operand. The result of
+ * pointer underflow is undefined.
+ */
+class PointerSubInstruction extends PointerOffsetInstruction {
+ PointerSubInstruction() { getOpcode() instanceof Opcode::PointerSub }
+}
+
+/**
+ * An instruction that computes the difference between two pointers.
+ *
+ * Both operands must have the same pointer type. The result must have an integer type whose size is
+ * the same as that of the pointer operands. The result is computed by subtracting the byte address
+ * in the right operand from the byte address in the left operand, and dividing by the element size.
+ * If the difference in byte addresses is not divisible by the element size, the result is
+ * undefined.
+ */
+class PointerDiffInstruction extends PointerArithmeticInstruction {
+ PointerDiffInstruction() { getOpcode() instanceof Opcode::PointerDiff }
+}
+
+/**
+ * An instruction whose result is computed from a single operand.
+ */
+class UnaryInstruction extends Instruction {
+ UnaryInstruction() { getOpcode() instanceof UnaryOpcode }
+
+ /**
+ * Gets the sole operand of this instruction.
+ */
+ final UnaryOperand getUnaryOperand() { result = getAnOperand() }
+
+ /**
+ * Gets the instruction whose result provides the sole operand of this instruction.
+ */
+ final Instruction getUnary() { result = getUnaryOperand().getDef() }
+}
+
+/**
+ * An instruction that converts the value of its operand to a value of a different type.
+ */
+class ConvertInstruction extends UnaryInstruction {
+ ConvertInstruction() { getOpcode() instanceof Opcode::Convert }
+}
+
+/**
+ * An instruction that converts the address of a polymorphic object to the address of a different
+ * subobject of the same polymorphic object, returning a null address if the dynamic type of the
+ * object is not compatible with the result type.
+ *
+ * If the operand holds a null address, the result is a null address.
+ *
+ * This instruction is used to represent a C++ `dynamic_cast<>` to a pointer type, or a C# `is` or
+ * `as` expression.
+ */
+class CheckedConvertOrNullInstruction extends UnaryInstruction {
+ CheckedConvertOrNullInstruction() { getOpcode() instanceof Opcode::CheckedConvertOrNull }
+}
+
+/**
+ * An instruction that converts the address of a polymorphic object to the address of a different
+ * subobject of the same polymorphic object, throwing an exception if the dynamic type of the object
+ * is not compatible with the result type.
+ *
+ * If the operand holds a null address, the result is a null address.
+ *
+ * This instruction is used to represent a C++ `dynamic_cast<>` to a reference type, or a C# cast
+ * expression.
+ */
+class CheckedConvertOrThrowInstruction extends UnaryInstruction {
+ CheckedConvertOrThrowInstruction() { getOpcode() instanceof Opcode::CheckedConvertOrThrow }
+}
+
+/**
+ * An instruction that returns the address of the complete object that contains the subobject
+ * pointed to by its operand.
+ *
+ * If the operand holds a null address, the result is a null address.
+ *
+ * This instruction is used to represent `dyanmic_cast` in C++, which returns the pointer to
+ * the most-derived object.
+ */
+class CompleteObjectAddressInstruction extends UnaryInstruction {
+ CompleteObjectAddressInstruction() { getOpcode() instanceof Opcode::CompleteObjectAddress }
+}
+
+/**
+ * An instruction that converts the address of an object to the address of a different subobject of
+ * the same object, without any type checking at runtime.
+ */
+class InheritanceConversionInstruction extends UnaryInstruction {
+ Language::Class baseClass;
+ Language::Class derivedClass;
+
+ InheritanceConversionInstruction() {
+ Raw::getInstructionInheritance(this, baseClass, derivedClass)
+ }
+
+ final override string getImmediateString() {
+ result = derivedClass.toString() + " : " + baseClass.toString()
+ }
+
+ /**
+ * Gets the `ClassDerivation` for the inheritance relationship between
+ * the base and derived classes. This predicate does not hold if the
+ * conversion is to an indirect virtual base class.
+ */
+ final Language::ClassDerivation getDerivation() {
+ result.getBaseClass() = baseClass and result.getDerivedClass() = derivedClass
+ }
+
+ /**
+ * Gets the base class of the conversion. This will be either a direct
+ * base class of the derived class, or a virtual base class of the
+ * derived class.
+ */
+ final Language::Class getBaseClass() { result = baseClass }
+
+ /**
+ * Gets the derived class of the conversion.
+ */
+ final Language::Class getDerivedClass() { result = derivedClass }
+}
+
+/**
+ * An instruction that converts from the address of a derived class to the address of a base class.
+ */
+class ConvertToBaseInstruction extends InheritanceConversionInstruction {
+ ConvertToBaseInstruction() { getOpcode() instanceof ConvertToBaseOpcode }
+}
+
+/**
+ * An instruction that converts from the address of a derived class to the address of a direct
+ * non-virtual base class.
+ *
+ * If the operand holds a null address, the result is a null address.
+ */
+class ConvertToNonVirtualBaseInstruction extends ConvertToBaseInstruction {
+ ConvertToNonVirtualBaseInstruction() { getOpcode() instanceof Opcode::ConvertToNonVirtualBase }
+}
+
+/**
+ * An instruction that converts from the address of a derived class to the address of a virtual base
+ * class.
+ *
+ * If the operand holds a null address, the result is a null address.
+ */
+class ConvertToVirtualBaseInstruction extends ConvertToBaseInstruction {
+ ConvertToVirtualBaseInstruction() { getOpcode() instanceof Opcode::ConvertToVirtualBase }
+}
+
+/**
+ * An instruction that converts from the address of a base class to the address of a direct
+ * non-virtual derived class.
+ *
+ * If the operand holds a null address, the result is a null address.
+ */
+class ConvertToDerivedInstruction extends InheritanceConversionInstruction {
+ ConvertToDerivedInstruction() { getOpcode() instanceof Opcode::ConvertToDerived }
+}
+
+/**
+ * An instruction that computes the bitwise complement of its operand.
+ *
+ * The operand must have an integer type, which will also be the result type.
+ */
+class BitComplementInstruction extends UnaryBitwiseInstruction {
+ BitComplementInstruction() { getOpcode() instanceof Opcode::BitComplement }
+}
+
+/**
+ * An instruction that computes the logical complement of its operand.
+ *
+ * The operand must have a Boolean type, which will also be the result type.
+ */
+class LogicalNotInstruction extends UnaryInstruction {
+ LogicalNotInstruction() { getOpcode() instanceof Opcode::LogicalNot }
+}
+
+/**
+ * An instruction that compares two numeric operands.
+ */
+class CompareInstruction extends BinaryInstruction {
+ CompareInstruction() { getOpcode() instanceof CompareOpcode }
+}
+
+/**
+ * An instruction that returns a `true` result if its operands are equal.
+ *
+ * Both operands must have the same numeric or address type. The result must have a Boolean type.
+ * The result is `true` if `left == right`, and `false` if `left != right` or the two operands are
+ * unordered. Floating-point comparison is performed according to IEEE-754.
+ */
+class CompareEQInstruction extends CompareInstruction {
+ CompareEQInstruction() { getOpcode() instanceof Opcode::CompareEQ }
+}
+
+/**
+ * An instruction that returns a `true` result if its operands are not equal.
+ *
+ * Both operands must have the same numeric or address type. The result must have a Boolean type.
+ * The result is `true` if `left != right` or if the two operands are unordered, and `false` if
+ * `left == right`. Floating-point comparison is performed according to IEEE-754.
+ */
+class CompareNEInstruction extends CompareInstruction {
+ CompareNEInstruction() { getOpcode() instanceof Opcode::CompareNE }
+}
+
+/**
+ * An instruction that does a relative comparison of two values, such as `<` or `>=`.
+ */
+class RelationalInstruction extends CompareInstruction {
+ RelationalInstruction() { getOpcode() instanceof RelationalOpcode }
+
+ /**
+ * Gets the operand on the "greater" (or "greater-or-equal") side
+ * of this relational instruction, that is, the side that is larger
+ * if the overall instruction evaluates to `true`; for example on
+ * `x <= 20` this is the `20`, and on `y > 0` it is `y`.
+ */
+ Instruction getGreater() { none() }
+
+ /**
+ * Gets the operand on the "lesser" (or "lesser-or-equal") side
+ * of this relational instruction, that is, the side that is smaller
+ * if the overall instruction evaluates to `true`; for example on
+ * `x <= 20` this is `x`, and on `y > 0` it is the `0`.
+ */
+ Instruction getLesser() { none() }
+
+ /**
+ * Holds if this relational instruction is strict (is not an "or-equal" instruction).
+ */
+ predicate isStrict() { none() }
+}
+
+/**
+ * An instruction that returns a `true` result if its left operand is less than its right operand.
+ *
+ * Both operands must have the same numeric or address type. The result must have a Boolean type.
+ * The result is `true` if the `left < right`, and `false` if `left >= right` or if the two operands
+ * are unordered. Floating-point comparison is performed according to IEEE-754.
+ */
+class CompareLTInstruction extends RelationalInstruction {
+ CompareLTInstruction() { getOpcode() instanceof Opcode::CompareLT }
+
+ override Instruction getLesser() { result = getLeft() }
+
+ override Instruction getGreater() { result = getRight() }
+
+ override predicate isStrict() { any() }
+}
+
+/**
+ * An instruction that returns a `true` result if its left operand is greater than its right operand.
+ *
+ * Both operands must have the same numeric or address type. The result must have a Boolean type.
+ * The result is `true` if the `left > right`, and `false` if `left <= right` or if the two operands
+ * are unordered. Floating-point comparison is performed according to IEEE-754.
+ */
+class CompareGTInstruction extends RelationalInstruction {
+ CompareGTInstruction() { getOpcode() instanceof Opcode::CompareGT }
+
+ override Instruction getLesser() { result = getRight() }
+
+ override Instruction getGreater() { result = getLeft() }
+
+ override predicate isStrict() { any() }
+}
+
+/**
+ * An instruction that returns a `true` result if its left operand is less than or equal to its
+ * right operand.
+ *
+ * Both operands must have the same numeric or address type. The result must have a Boolean type.
+ * The result is `true` if the `left <= right`, and `false` if `left > right` or if the two operands
+ * are unordered. Floating-point comparison is performed according to IEEE-754.
+ */
+class CompareLEInstruction extends RelationalInstruction {
+ CompareLEInstruction() { getOpcode() instanceof Opcode::CompareLE }
+
+ override Instruction getLesser() { result = getLeft() }
+
+ override Instruction getGreater() { result = getRight() }
+
+ override predicate isStrict() { none() }
+}
+
+/**
+ * An instruction that returns a `true` result if its left operand is greater than or equal to its
+ * right operand.
+ *
+ * Both operands must have the same numeric or address type. The result must have a Boolean type.
+ * The result is `true` if the `left >= right`, and `false` if `left < right` or if the two operands
+ * are unordered. Floating-point comparison is performed according to IEEE-754.
+ */
+class CompareGEInstruction extends RelationalInstruction {
+ CompareGEInstruction() { getOpcode() instanceof Opcode::CompareGE }
+
+ override Instruction getLesser() { result = getRight() }
+
+ override Instruction getGreater() { result = getLeft() }
+
+ override predicate isStrict() { none() }
+}
+
+/**
+ * An instruction that branches to one of multiple successor instructions based on the value of an
+ * integer operand.
+ *
+ * This instruction will have zero or more successors whose edge kind is `CaseEdge`, each
+ * representing the branch that will be taken if the controlling expression is within the range
+ * specified for that case edge. The range of a case edge must be disjoint from the range of each
+ * other case edge.
+ *
+ * The instruction may optionally have a successor edge whose edge kind is `DefaultEdge`,
+ * representing the branch that will be taken if the controlling expression is not within the range
+ * of any case edge.
+ */
+class SwitchInstruction extends Instruction {
+ SwitchInstruction() { getOpcode() instanceof Opcode::Switch }
+
+ /** Gets the operand that provides the integer value controlling the switch. */
+ final ConditionOperand getExpressionOperand() { result = getAnOperand() }
+
+ /** Gets the instruction whose result provides the integer value controlling the switch. */
+ final Instruction getExpression() { result = getExpressionOperand().getDef() }
+
+ /** Gets the successor instructions along the case edges of the switch. */
+ final Instruction getACaseSuccessor() { exists(CaseEdge edge | result = getSuccessor(edge)) }
+
+ /** Gets the successor instruction along the default edge of the switch, if any. */
+ final Instruction getDefaultSuccessor() { result = getSuccessor(EdgeKind::defaultEdge()) }
+}
+
+/**
+ * An instruction that calls a function.
+ */
+class CallInstruction extends Instruction {
+ CallInstruction() { getOpcode() instanceof Opcode::Call }
+
+ final override string getImmediateString() {
+ result = getStaticCallTarget().toString()
+ or
+ not exists(getStaticCallTarget()) and result = "?"
+ }
+
+ /**
+ * Gets the operand the specifies the target function of the call.
+ */
+ final CallTargetOperand getCallTargetOperand() { result = getAnOperand() }
+
+ /**
+ * Gets the `Instruction` that computes the target function of the call. This is usually a
+ * `FunctionAddress` instruction, but can also be an arbitrary instruction that produces a
+ * function pointer.
+ */
+ final Instruction getCallTarget() { result = getCallTargetOperand().getDef() }
+
+ /**
+ * Gets all of the argument operands of the call, including the `this` pointer, if any.
+ */
+ final ArgumentOperand getAnArgumentOperand() { result = getAnOperand() }
+
+ /**
+ * Gets the `Function` that the call targets, if this is statically known.
+ */
+ final Language::Function getStaticCallTarget() {
+ result = getCallTarget().(FunctionAddressInstruction).getFunctionSymbol()
+ }
+
+ /**
+ * Gets all of the arguments of the call, including the `this` pointer, if any.
+ */
+ final Instruction getAnArgument() { result = getAnArgumentOperand().getDef() }
+
+ /**
+ * Gets the `this` pointer argument operand of the call, if any.
+ */
+ final ThisArgumentOperand getThisArgumentOperand() { result = getAnOperand() }
+
+ /**
+ * Gets the `this` pointer argument of the call, if any.
+ */
+ final Instruction getThisArgument() { result = getThisArgumentOperand().getDef() }
+
+ /**
+ * Gets the argument operand at the specified index.
+ */
+ pragma[noinline]
+ final PositionalArgumentOperand getPositionalArgumentOperand(int index) {
+ result = getAnOperand() and
+ result.getIndex() = index
+ }
+
+ /**
+ * Gets the argument at the specified index.
+ */
+ pragma[noinline]
+ final Instruction getPositionalArgument(int index) {
+ result = getPositionalArgumentOperand(index).getDef()
+ }
+
+ /**
+ * Gets the argument operand at the specified index, or `this` if `index` is `-1`.
+ */
+ pragma[noinline]
+ final ArgumentOperand getArgumentOperand(int index) {
+ index >= 0 and result = getPositionalArgumentOperand(index)
+ or
+ index = -1 and result = getThisArgumentOperand()
+ }
+
+ /**
+ * Gets the argument at the specified index, or `this` if `index` is `-1`.
+ */
+ pragma[noinline]
+ final Instruction getArgument(int index) { result = getArgumentOperand(index).getDef() }
+
+ /**
+ * Gets the number of arguments of the call, including the `this` pointer, if any.
+ */
+ final int getNumberOfArguments() { result = count(this.getAnArgumentOperand()) }
+
+ /**
+ * Holds if the result is a side effect for the argument at the specified index, or `this` if
+ * `index` is `-1`.
+ *
+ * This helper predicate makes it easy to join on both of these columns at once, avoiding
+ * pathological join orders in case the argument index should get joined first.
+ */
+ pragma[noinline]
+ final SideEffectInstruction getAParameterSideEffect(int index) {
+ this = result.getPrimaryInstruction() and
+ index = result.(IndexedInstruction).getIndex()
+ }
+}
+
+/**
+ * An instruction representing a side effect of a function call.
+ */
+class SideEffectInstruction extends Instruction {
+ SideEffectInstruction() { getOpcode() instanceof SideEffectOpcode }
+
+ /**
+ * Gets the instruction whose execution causes this side effect.
+ */
+ final Instruction getPrimaryInstruction() {
+ result = Construction::getPrimaryInstructionForSideEffect(this)
+ }
+}
+
+/**
+ * An instruction representing the side effect of a function call on any memory that might be
+ * accessed by that call.
+ */
+class CallSideEffectInstruction extends SideEffectInstruction {
+ CallSideEffectInstruction() { getOpcode() instanceof Opcode::CallSideEffect }
+}
+
+/**
+ * An instruction representing the side effect of a function call on any memory
+ * that might be read by that call.
+ *
+ * This instruction is emitted instead of `CallSideEffectInstruction` when it is certain that the
+ * call target cannot write to escaped memory.
+ */
+class CallReadSideEffectInstruction extends SideEffectInstruction {
+ CallReadSideEffectInstruction() { getOpcode() instanceof Opcode::CallReadSideEffect }
+}
+
+/**
+ * An instruction representing a read side effect of a function call on a
+ * specific parameter.
+ */
+class ReadSideEffectInstruction extends SideEffectInstruction, IndexedInstruction {
+ ReadSideEffectInstruction() { getOpcode() instanceof ReadSideEffectOpcode }
+
+ /** Gets the operand for the value that will be read from this instruction, if known. */
+ final SideEffectOperand getSideEffectOperand() { result = getAnOperand() }
+
+ /** Gets the value that will be read from this instruction, if known. */
+ final Instruction getSideEffect() { result = getSideEffectOperand().getDef() }
+
+ /** Gets the operand for the address from which this instruction may read. */
+ final AddressOperand getArgumentOperand() { result = getAnOperand() }
+
+ /** Gets the address from which this instruction may read. */
+ final Instruction getArgumentDef() { result = getArgumentOperand().getDef() }
+}
+
+/**
+ * An instruction representing the read of an indirect parameter within a function call.
+ */
+class IndirectReadSideEffectInstruction extends ReadSideEffectInstruction {
+ IndirectReadSideEffectInstruction() { getOpcode() instanceof Opcode::IndirectReadSideEffect }
+}
+
+/**
+ * An instruction representing the read of an indirect buffer parameter within a function call.
+ */
+class BufferReadSideEffectInstruction extends ReadSideEffectInstruction {
+ BufferReadSideEffectInstruction() { getOpcode() instanceof Opcode::BufferReadSideEffect }
+}
+
+/**
+ * An instruction representing the read of an indirect buffer parameter within a function call.
+ */
+class SizedBufferReadSideEffectInstruction extends ReadSideEffectInstruction {
+ SizedBufferReadSideEffectInstruction() {
+ getOpcode() instanceof Opcode::SizedBufferReadSideEffect
+ }
+
+ /**
+ * Gets the operand that holds the number of bytes read from the buffer.
+ */
+ final BufferSizeOperand getBufferSizeOperand() { result = getAnOperand() }
+
+ /**
+ * Gets the instruction whose result provides the number of bytes read from the buffer.
+ */
+ final Instruction getBufferSize() { result = getBufferSizeOperand().getDef() }
+}
+
+/**
+ * An instruction representing a write side effect of a function call on a
+ * specific parameter.
+ */
+class WriteSideEffectInstruction extends SideEffectInstruction, IndexedInstruction {
+ WriteSideEffectInstruction() { getOpcode() instanceof WriteSideEffectOpcode }
+
+ /**
+ * Get the operand that holds the address of the memory to be written.
+ */
+ final AddressOperand getDestinationAddressOperand() { result = getAnOperand() }
+
+ /**
+ * Gets the instruction whose result provides the address of the memory to be written.
+ */
+ Instruction getDestinationAddress() { result = getDestinationAddressOperand().getDef() }
+}
+
+/**
+ * An instruction representing the write of an indirect parameter within a function call.
+ */
+class IndirectMustWriteSideEffectInstruction extends WriteSideEffectInstruction {
+ IndirectMustWriteSideEffectInstruction() {
+ getOpcode() instanceof Opcode::IndirectMustWriteSideEffect
+ }
+}
+
+/**
+ * An instruction representing the write of an indirect buffer parameter within a function call. The
+ * entire buffer is overwritten.
+ */
+class BufferMustWriteSideEffectInstruction extends WriteSideEffectInstruction {
+ BufferMustWriteSideEffectInstruction() {
+ getOpcode() instanceof Opcode::BufferMustWriteSideEffect
+ }
+}
+
+/**
+ * An instruction representing the write of an indirect buffer parameter within a function call. The
+ * entire buffer is overwritten.
+ */
+class SizedBufferMustWriteSideEffectInstruction extends WriteSideEffectInstruction {
+ SizedBufferMustWriteSideEffectInstruction() {
+ getOpcode() instanceof Opcode::SizedBufferMustWriteSideEffect
+ }
+
+ /**
+ * Gets the operand that holds the number of bytes written to the buffer.
+ */
+ final BufferSizeOperand getBufferSizeOperand() { result = getAnOperand() }
+
+ /**
+ * Gets the instruction whose result provides the number of bytes written to the buffer.
+ */
+ final Instruction getBufferSize() { result = getBufferSizeOperand().getDef() }
+}
+
+/**
+ * An instruction representing the potential write of an indirect parameter within a function call.
+ *
+ * Unlike `IndirectWriteSideEffectInstruction`, the location might not be completely overwritten.
+ * written.
+ */
+class IndirectMayWriteSideEffectInstruction extends WriteSideEffectInstruction {
+ IndirectMayWriteSideEffectInstruction() {
+ getOpcode() instanceof Opcode::IndirectMayWriteSideEffect
+ }
+}
+
+/**
+ * An instruction representing the write of an indirect buffer parameter within a function call.
+ *
+ * Unlike `BufferWriteSideEffectInstruction`, the buffer might not be completely overwritten.
+ */
+class BufferMayWriteSideEffectInstruction extends WriteSideEffectInstruction {
+ BufferMayWriteSideEffectInstruction() { getOpcode() instanceof Opcode::BufferMayWriteSideEffect }
+}
+
+/**
+ * An instruction representing the write of an indirect buffer parameter within a function call.
+ *
+ * Unlike `BufferWriteSideEffectInstruction`, the buffer might not be completely overwritten.
+ */
+class SizedBufferMayWriteSideEffectInstruction extends WriteSideEffectInstruction {
+ SizedBufferMayWriteSideEffectInstruction() {
+ getOpcode() instanceof Opcode::SizedBufferMayWriteSideEffect
+ }
+
+ /**
+ * Gets the operand that holds the number of bytes written to the buffer.
+ */
+ final BufferSizeOperand getBufferSizeOperand() { result = getAnOperand() }
+
+ /**
+ * Gets the instruction whose result provides the number of bytes written to the buffer.
+ */
+ final Instruction getBufferSize() { result = getBufferSizeOperand().getDef() }
+}
+
+/**
+ * An instruction representing the initial value of newly allocated memory, such as the result of a
+ * call to `malloc`.
+ */
+class InitializeDynamicAllocationInstruction extends SideEffectInstruction {
+ InitializeDynamicAllocationInstruction() {
+ getOpcode() instanceof Opcode::InitializeDynamicAllocation
+ }
+
+ /**
+ * Gets the operand that represents the address of the allocation this instruction is initializing.
+ */
+ final AddressOperand getAllocationAddressOperand() { result = getAnOperand() }
+
+ /**
+ * Gets the address for the allocation this instruction is initializing.
+ */
+ final Instruction getAllocationAddress() { result = getAllocationAddressOperand().getDef() }
+}
+
+/**
+ * An instruction representing a GNU or MSVC inline assembly statement.
+ */
+class InlineAsmInstruction extends Instruction {
+ InlineAsmInstruction() { getOpcode() instanceof Opcode::InlineAsm }
+}
+
+/**
+ * An instruction that throws an exception.
+ */
+class ThrowInstruction extends Instruction {
+ ThrowInstruction() { getOpcode() instanceof ThrowOpcode }
+}
+
+/**
+ * An instruction that throws a new exception.
+ */
+class ThrowValueInstruction extends ThrowInstruction {
+ ThrowValueInstruction() { getOpcode() instanceof Opcode::ThrowValue }
+
+ /**
+ * Gets the address operand of the exception thrown by this instruction.
+ */
+ final AddressOperand getExceptionAddressOperand() { result = getAnOperand() }
+
+ /**
+ * Gets the address of the exception thrown by this instruction.
+ */
+ final Instruction getExceptionAddress() { result = getExceptionAddressOperand().getDef() }
+
+ /**
+ * Gets the operand for the exception thrown by this instruction.
+ */
+ final LoadOperand getExceptionOperand() { result = getAnOperand() }
+
+ /**
+ * Gets the exception thrown by this instruction.
+ */
+ final Instruction getException() { result = getExceptionOperand().getDef() }
+}
+
+/**
+ * An instruction that re-throws the current exception.
+ */
+class ReThrowInstruction extends ThrowInstruction {
+ ReThrowInstruction() { getOpcode() instanceof Opcode::ReThrow }
+}
+
+/**
+ * An instruction that exits the current function by propagating an exception.
+ */
+class UnwindInstruction extends Instruction {
+ UnwindInstruction() { getOpcode() instanceof Opcode::Unwind }
+}
+
+/**
+ * An instruction that starts a `catch` handler.
+ */
+class CatchInstruction extends Instruction {
+ CatchInstruction() { getOpcode() instanceof CatchOpcode }
+}
+
+/**
+ * An instruction that catches an exception of a specific type.
+ */
+class CatchByTypeInstruction extends CatchInstruction {
+ Language::LanguageType exceptionType;
+
+ CatchByTypeInstruction() {
+ getOpcode() instanceof Opcode::CatchByType and
+ exceptionType = Raw::getInstructionExceptionType(this)
+ }
+
+ final override string getImmediateString() { result = exceptionType.toString() }
+
+ /**
+ * Gets the type of exception to be caught.
+ */
+ final Language::LanguageType getExceptionType() { result = exceptionType }
+}
+
+/**
+ * An instruction that catches any exception.
+ */
+class CatchAnyInstruction extends CatchInstruction {
+ CatchAnyInstruction() { getOpcode() instanceof Opcode::CatchAny }
+}
+
+/**
+ * An instruction that initializes all escaped memory.
+ */
+class AliasedDefinitionInstruction extends Instruction {
+ AliasedDefinitionInstruction() { getOpcode() instanceof Opcode::AliasedDefinition }
+}
+
+/**
+ * An instruction that consumes all escaped memory on exit from the function.
+ */
+class AliasedUseInstruction extends Instruction {
+ AliasedUseInstruction() { getOpcode() instanceof Opcode::AliasedUse }
+}
+
+/**
+ * An instruction representing the choice of one of multiple input values based on control flow.
+ *
+ * A `PhiInstruction` is inserted at the beginning of a block whenever two different definitions of
+ * the same variable reach that block. The `PhiInstruction` will have one operand corresponding to
+ * each control flow predecessor of the block, with that operand representing the version of the
+ * variable that flows from that predecessor. The result value of the `PhiInstruction` will be
+ * a copy of whichever operand corresponds to the actual predecessor that entered the block at
+ * runtime.
+ */
+class PhiInstruction extends Instruction {
+ PhiInstruction() { getOpcode() instanceof Opcode::Phi }
+
+ /**
+ * Gets all of the instruction's `PhiInputOperand`s, representing the values that flow from each predecessor block.
+ */
+ final PhiInputOperand getAnInputOperand() { result = this.getAnOperand() }
+
+ /**
+ * Gets an instruction that defines the input to one of the operands of this
+ * instruction. It's possible for more than one operand to have the same
+ * defining instruction, so this predicate will have the same number of
+ * results as `getAnInputOperand()` or fewer.
+ */
+ pragma[noinline]
+ final Instruction getAnInput() { result = this.getAnInputOperand().getDef() }
+
+ /**
+ * Gets the input operand representing the value that flows from the specified predecessor block.
+ */
+ final PhiInputOperand getInputOperand(IRBlock predecessorBlock) {
+ result = this.getAnOperand() and
+ result.getPredecessorBlock() = predecessorBlock
+ }
+}
+
+/**
+ * An instruction representing the effect that a write to a memory may have on potential aliases of
+ * that memory.
+ *
+ * A `ChiInstruction` is inserted immediately after an instruction that writes to memory. The
+ * `ChiInstruction` has two operands. The first operand, given by `getTotalOperand()`, represents
+ * the previous state of all of the memory that might be aliased by the memory write. The second
+ * operand, given by `getPartialOperand()`, represents the memory that was actually modified by the
+ * memory write. The result of the `ChiInstruction` represents the same memory as
+ * `getTotalOperand()`, updated to include the changes due to the value that was actually stored by
+ * the memory write.
+ *
+ * As an example, suppose that variable `p` and `q` are pointers that may or may not point to the
+ * same memory:
+ * ```
+ * *p = 5;
+ * x = *q;
+ * ```
+ *
+ * The IR would look like:
+ * ```
+ * r1_1 = VariableAddress[p]
+ * r1_2 = Load r1_1, m0_0 // Load the value of `p`
+ * r1_3 = Constant[5]
+ * m1_4 = Store r1_2, r1_3 // Store to `*p`
+ * m1_5 = ^Chi m0_1, m1_4 // Side effect of the previous Store on aliased memory
+ * r1_6 = VariableAddress[x]
+ * r1_7 = VariableAddress[q]
+ * r1_8 = Load r1_7, m0_2 // Load the value of `q`
+ * r1_9 = Load r1_8, m1_5 // Load the value of `*q`
+ * m1_10 = Store r1_6, r1_9 // Store to x
+ * ```
+ *
+ * Note the `Chi` instruction after the store to `*p`. The indicates that the previous contents of
+ * aliased memory (`m0_1`) are merged with the new value written by the store (`m1_4`), producing a
+ * new version of aliased memory (`m1_5`). On the subsequent load from `*q`, the source operand of
+ * `*q` is `m1_5`, indicating that the store to `*p` may (or may not) have updated the memory
+ * pointed to by `q`.
+ *
+ * For more information about how `Chi` instructions are used to model memory side effects, see
+ * https://link.springer.com/content/pdf/10.1007%2F3-540-61053-7_66.pdf.
+ */
+class ChiInstruction extends Instruction {
+ ChiInstruction() { getOpcode() instanceof Opcode::Chi }
+
+ /**
+ * Gets the operand that represents the previous state of all memory that might be aliased by the
+ * memory write.
+ */
+ final ChiTotalOperand getTotalOperand() { result = getAnOperand() }
+
+ /**
+ * Gets the operand that represents the previous state of all memory that might be aliased by the
+ * memory write.
+ */
+ final Instruction getTotal() { result = getTotalOperand().getDef() }
+
+ /**
+ * Gets the operand that represents the new value written by the memory write.
+ */
+ final ChiPartialOperand getPartialOperand() { result = getAnOperand() }
+
+ /**
+ * Gets the operand that represents the new value written by the memory write.
+ */
+ final Instruction getPartial() { result = getPartialOperand().getDef() }
+
+ /**
+ * Gets the bit range `[startBit, endBit)` updated by the partial operand of this `ChiInstruction`, relative to the start address of the total operand.
+ */
+ final predicate getUpdatedInterval(int startBit, int endBit) {
+ Construction::getIntervalUpdatedByChi(this, startBit, endBit)
+ }
+
+ /**
+ * Holds if the `ChiPartialOperand` totally, but not exactly, overlaps with the `ChiTotalOperand`.
+ * This means that the `ChiPartialOperand` will not override the entire memory associated with the
+ * `ChiTotalOperand`.
+ */
+ final predicate isPartialUpdate() { Construction::chiOnlyPartiallyUpdatesLocation(this) }
+}
+
+/**
+ * An instruction representing unreachable code.
+ *
+ * This instruction is inserted in place of the original target instruction of a `ConditionalBranch`
+ * or `Switch` instruction where that particular edge is infeasible.
+ */
+class UnreachedInstruction extends Instruction {
+ UnreachedInstruction() { getOpcode() instanceof Opcode::Unreached }
+}
+
+/**
+ * An instruction representing a built-in operation.
+ *
+ * This is used to represent a variety of intrinsic operations provided by the compiler
+ * implementation, such as vector arithmetic.
+ */
+class BuiltInOperationInstruction extends Instruction {
+ Language::BuiltInOperation operation;
+
+ BuiltInOperationInstruction() {
+ getOpcode() instanceof BuiltInOperationOpcode and
+ operation = Raw::getInstructionBuiltInOperation(this)
+ }
+
+ /**
+ * Gets the language-specific `BuiltInOperation` object that specifies the operation that is
+ * performed by this instruction.
+ */
+ final Language::BuiltInOperation getBuiltInOperation() { result = operation }
+}
+
+/**
+ * An instruction representing a built-in operation that does not have a specific opcode. The
+ * actual operation is specified by the `getBuiltInOperation()` predicate.
+ */
+class BuiltInInstruction extends BuiltInOperationInstruction {
+ BuiltInInstruction() { getOpcode() instanceof Opcode::BuiltIn }
+
+ final override string getImmediateString() { result = getBuiltInOperation().toString() }
+}
+
+/**
+ * An instruction that returns a `va_list` to access the arguments passed to the `...` parameter.
+ *
+ * The operand specifies the address of the `IREllipsisVariable` used to represent the `...`
+ * parameter. The result is a `va_list` that initially refers to the first argument that was passed
+ * to the `...` parameter.
+ */
+class VarArgsStartInstruction extends UnaryInstruction {
+ VarArgsStartInstruction() { getOpcode() instanceof Opcode::VarArgsStart }
+}
+
+/**
+ * An instruction that cleans up a `va_list` after it is no longer in use.
+ *
+ * The operand specifies the address of the `va_list` to clean up. This instruction does not return
+ * a result.
+ */
+class VarArgsEndInstruction extends UnaryInstruction {
+ VarArgsEndInstruction() { getOpcode() instanceof Opcode::VarArgsEnd }
+}
+
+/**
+ * An instruction that returns the address of the argument currently pointed to by a `va_list`.
+ *
+ * The operand is the `va_list` that points to the argument. The result is the address of the
+ * argument.
+ */
+class VarArgInstruction extends UnaryInstruction {
+ VarArgInstruction() { getOpcode() instanceof Opcode::VarArg }
+}
+
+/**
+ * An instruction that modifies a `va_list` to point to the next argument that was passed to the
+ * `...` parameter.
+ *
+ * The operand is the current `va_list`. The result is an updated `va_list` that points to the next
+ * argument of the `...` parameter.
+ */
+class NextVarArgInstruction extends UnaryInstruction {
+ NextVarArgInstruction() { getOpcode() instanceof Opcode::NextVarArg }
+}
+
+/**
+ * An instruction that allocates a new object on the managed heap.
+ *
+ * This instruction is used to represent the allocation of a new object in C# using the `new`
+ * expression. This instruction does not invoke a constructor for the object. Instead, there will be
+ * a subsequent `Call` instruction to invoke the appropriate constructor directory, passing the
+ * result of the `NewObj` as the `this` argument.
+ *
+ * The result is the address of the newly allocated object.
+ */
+class NewObjInstruction extends Instruction {
+ NewObjInstruction() { getOpcode() instanceof Opcode::NewObj }
+}
diff --git a/repo-tests/codeql/cpp/ql/lib/semmle/code/cpp/ir/implementation/aliased_ssa/Operand.qll b/repo-tests/codeql/cpp/ql/lib/semmle/code/cpp/ir/implementation/aliased_ssa/Operand.qll
new file mode 100644
index 00000000000..d7cf89ca9aa
--- /dev/null
+++ b/repo-tests/codeql/cpp/ql/lib/semmle/code/cpp/ir/implementation/aliased_ssa/Operand.qll
@@ -0,0 +1,484 @@
+/**
+ * Provides classes that represent the input values of IR instructions.
+ */
+
+private import internal.IRInternal
+private import Instruction
+private import IRBlock
+private import internal.OperandImports as Imports
+private import Imports::MemoryAccessKind
+private import Imports::IRType
+private import Imports::Overlap
+private import Imports::OperandTag
+private import Imports::TOperand
+private import internal.OperandInternal
+
+/**
+ * An operand of an `Instruction` in this stage of the IR. Implemented as a union of the branches
+ * of `TOperand` that are used in this stage.
+ */
+private class TStageOperand =
+ TRegisterOperand or TNonSSAMemoryOperand or TPhiOperand or TChiOperand;
+
+/**
+ * An operand of an `Instruction`. The operand represents a use of the result of one instruction
+ * (the defining instruction) in another instruction (the use instruction)
+ */
+class Operand extends TStageOperand {
+ cached
+ Operand() {
+ // Ensure that the operand does not refer to instructions from earlier stages that are unreachable here
+ exists(Instruction use, Instruction def | this = registerOperand(use, _, def))
+ or
+ exists(Instruction use | this = nonSSAMemoryOperand(use, _))
+ or
+ exists(Instruction use, Instruction def, IRBlock predecessorBlock |
+ this = phiOperand(use, def, predecessorBlock, _) or
+ this = reusedPhiOperand(use, def, predecessorBlock, _)
+ )
+ or
+ exists(Instruction use | this = chiOperand(use, _))
+ }
+
+ /** Gets a textual representation of this element. */
+ string toString() { result = "Operand" }
+
+ /**
+ * Gets the location of the source code for this operand.
+ */
+ final Language::Location getLocation() { result = getUse().getLocation() }
+
+ /**
+ * Gets the function that contains this operand.
+ */
+ final IRFunction getEnclosingIRFunction() { result = getUse().getEnclosingIRFunction() }
+
+ /**
+ * Gets the `Instruction` that consumes this operand.
+ */
+ Instruction getUse() { none() }
+
+ /**
+ * Gets the `Instruction` whose result is the value of the operand. Unlike
+ * `getDef`, this also has a result when `isDefinitionInexact` holds, which
+ * means that the resulting instruction may only _partially_ or _potentially_
+ * be the value of this operand.
+ */
+ Instruction getAnyDef() { none() }
+
+ /**
+ * Gets the `Instruction` whose result is the value of the operand. Unlike
+ * `getAnyDef`, this also has no result when `isDefinitionInexact` holds,
+ * which means that the resulting instruction must always be exactly the be
+ * the value of this operand.
+ */
+ final Instruction getDef() {
+ result = this.getAnyDef() and
+ getDefinitionOverlap() instanceof MustExactlyOverlap
+ }
+
+ /**
+ * DEPRECATED: renamed to `getUse`.
+ *
+ * Gets the `Instruction` that consumes this operand.
+ */
+ deprecated final Instruction getUseInstruction() { result = getUse() }
+
+ /**
+ * DEPRECATED: use `getAnyDef` or `getDef`. The exact replacement for this
+ * predicate is `getAnyDef`, but most uses of this predicate should probably
+ * be replaced with `getDef`.
+ *
+ * Gets the `Instruction` whose result is the value of the operand.
+ */
+ deprecated final Instruction getDefinitionInstruction() { result = getAnyDef() }
+
+ /**
+ * Gets the overlap relationship between the operand's definition and its use.
+ */
+ Overlap getDefinitionOverlap() { none() }
+
+ /**
+ * Holds if the result of the definition instruction does not exactly overlap this use.
+ */
+ final predicate isDefinitionInexact() { not getDefinitionOverlap() instanceof MustExactlyOverlap }
+
+ /**
+ * Gets a prefix to use when dumping the operand in an operand list.
+ */
+ string getDumpLabel() { result = "" }
+
+ /**
+ * Gets a string that uniquely identifies this operand on its use instruction.
+ */
+ string getDumpId() { result = "" }
+
+ /**
+ * Gets a string describing this operand, suitable for display in IR dumps. This consists of the
+ * result ID of the instruction consumed by the operand, plus a label identifying the operand
+ * kind.
+ *
+ * For example: `this:r3_5`
+ */
+ final string getDumpString() {
+ result = getDumpLabel() + getInexactSpecifier() + getDefinitionId()
+ }
+
+ /**
+ * Gets a string containing the identifier of the definition of this use, or `m?` if the
+ * definition is not modeled in SSA.
+ */
+ private string getDefinitionId() {
+ result = getAnyDef().getResultId()
+ or
+ not exists(getAnyDef()) and result = "m?"
+ }
+
+ /**
+ * Gets a string prefix to prepend to the operand's definition ID in an IR dump, specifying whether the operand is
+ * an exact or inexact use of its definition. For an inexact use, the prefix is "~". For an exact use, the prefix is
+ * the empty string.
+ */
+ private string getInexactSpecifier() {
+ if isDefinitionInexact() then result = "~" else result = ""
+ }
+
+ /**
+ * Get the order in which the operand should be sorted in the operand list.
+ */
+ int getDumpSortOrder() { result = -1 }
+
+ /**
+ * Gets the type of the value consumed by this operand. This is usually the same as the
+ * result type of the definition instruction consumed by this operand. For register operands,
+ * this is always the case. For some memory operands, the operand type may be different from
+ * the definition type, such as in the case of a partial read or a read from a pointer that
+ * has been cast to a different type.
+ */
+ Language::LanguageType getLanguageType() { result = getAnyDef().getResultLanguageType() }
+
+ /**
+ * Gets the language-neutral type of the value consumed by this operand. This is usually the same
+ * as the result type of the definition instruction consumed by this operand. For register
+ * operands, this is always the case. For some memory operands, the operand type may be different
+ * from the definition type, such as in the case of a partial read or a read from a pointer that
+ * has been cast to a different type.
+ */
+ final IRType getIRType() { result = getLanguageType().getIRType() }
+
+ /**
+ * Gets the type of the value consumed by this operand. This is usually the same as the
+ * result type of the definition instruction consumed by this operand. For register operands,
+ * this is always the case. For some memory operands, the operand type may be different from
+ * the definition type, such as in the case of a partial read or a read from a pointer that
+ * has been cast to a different type.
+ */
+ final Language::Type getType() { getLanguageType().hasType(result, _) }
+
+ /**
+ * Holds if the value consumed by this operand is a glvalue. If this
+ * holds, the value of the operand represents the address of a location,
+ * and the type of the location is given by `getType()`. If this does
+ * not hold, the value of the operand represents a value whose type is
+ * given by `getType()`.
+ */
+ final predicate isGLValue() { getLanguageType().hasType(_, true) }
+
+ /**
+ * Gets the size of the value consumed by this operand, in bytes. If the operand does not have
+ * a known constant size, this predicate does not hold.
+ */
+ final int getSize() { result = getLanguageType().getByteSize() }
+}
+
+/**
+ * An operand that consumes a memory result (e.g. the `LoadOperand` on a `Load` instruction).
+ */
+class MemoryOperand extends Operand {
+ cached
+ MemoryOperand() {
+ this instanceof TNonSSAMemoryOperand or
+ this instanceof TPhiOperand or
+ this instanceof TChiOperand
+ }
+
+ /**
+ * Gets the kind of memory access performed by the operand.
+ */
+ MemoryAccessKind getMemoryAccess() { result = getUse().getOpcode().getReadMemoryAccess() }
+
+ /**
+ * Holds if the memory access performed by this operand will not always read from every bit in the
+ * memory location. This is most commonly used for memory accesses that may or may not actually
+ * occur depending on runtime state (for example, the write side effect of an output parameter
+ * that is not written to on all paths), or for accesses where the memory location is a
+ * conservative estimate of the memory that might actually be accessed at runtime (for example,
+ * the global side effects of a function call).
+ */
+ predicate hasMayReadMemoryAccess() { getUse().getOpcode().hasMayReadMemoryAccess() }
+
+ /**
+ * Returns the operand that holds the memory address from which the current operand loads its
+ * value, if any. For example, in `r3 = Load r1, m2`, the result of `getAddressOperand()` for `m2`
+ * is `r1`.
+ */
+ final AddressOperand getAddressOperand() {
+ getMemoryAccess().usesAddressOperand() and
+ result.getUse() = getUse()
+ }
+}
+
+/**
+ * An operand that is not an operand of a `PhiInstruction`.
+ */
+class NonPhiOperand extends Operand {
+ Instruction useInstr;
+ OperandTag tag;
+
+ NonPhiOperand() {
+ this = registerOperand(useInstr, tag, _) or
+ this = nonSSAMemoryOperand(useInstr, tag) or
+ this = chiOperand(useInstr, tag)
+ }
+
+ final override Instruction getUse() { result = useInstr }
+
+ final override string getDumpLabel() { result = tag.getLabel() }
+
+ final override string getDumpId() { result = tag.getId() }
+
+ final override int getDumpSortOrder() { result = tag.getSortOrder() }
+
+ /**
+ * Gets the `OperandTag` that specifies how this operand is used by its `Instruction`.
+ */
+ final OperandTag getOperandTag() { result = tag }
+}
+
+/**
+ * An operand that consumes a register (non-memory) result.
+ */
+class RegisterOperand extends NonPhiOperand, TRegisterOperand {
+ override RegisterOperandTag tag;
+ Instruction defInstr;
+
+ cached
+ RegisterOperand() { this = registerOperand(useInstr, tag, defInstr) }
+
+ final override string toString() { result = tag.toString() }
+
+ final override Instruction getAnyDef() { result = defInstr }
+
+ final override Overlap getDefinitionOverlap() {
+ // All register results overlap exactly with their uses.
+ result instanceof MustExactlyOverlap
+ }
+}
+
+/**
+ * A memory operand other than the operand of a `Phi` instruction.
+ */
+class NonPhiMemoryOperand extends NonPhiOperand, MemoryOperand, TNonPhiMemoryOperand {
+ override MemoryOperandTag tag;
+
+ cached
+ NonPhiMemoryOperand() {
+ this = nonSSAMemoryOperand(useInstr, tag)
+ or
+ this = chiOperand(useInstr, tag)
+ }
+
+ final override string toString() { result = tag.toString() }
+
+ final override Instruction getAnyDef() {
+ result = unique(Instruction defInstr | hasDefinition(defInstr, _))
+ }
+
+ final override Overlap getDefinitionOverlap() { hasDefinition(_, result) }
+
+ pragma[noinline]
+ private predicate hasDefinition(Instruction defInstr, Overlap overlap) {
+ defInstr = Construction::getMemoryOperandDefinition(useInstr, tag, overlap) and
+ not Construction::isInCycle(useInstr) and
+ strictcount(Construction::getMemoryOperandDefinition(useInstr, tag, _)) = 1
+ }
+
+ /**
+ * Holds if the operand totally overlaps with its definition and consumes the
+ * bit range `[startBitOffset, endBitOffset)` relative to the start address of the definition.
+ */
+ predicate getUsedInterval(int startBitOffset, int endBitOffset) {
+ Construction::getUsedInterval(this, startBitOffset, endBitOffset)
+ }
+}
+
+/**
+ * A memory operand whose type may be different from the type of the result of its definition.
+ */
+class TypedOperand extends NonPhiMemoryOperand {
+ override TypedOperandTag tag;
+
+ final override Language::LanguageType getLanguageType() {
+ result = Construction::getInstructionOperandType(useInstr, tag)
+ }
+}
+
+/**
+ * The address operand of an instruction that loads or stores a value from
+ * memory (e.g. `Load`, `Store`).
+ */
+class AddressOperand extends RegisterOperand {
+ override AddressOperandTag tag;
+}
+
+/**
+ * The buffer size operand of an instruction that represents a read or write of
+ * a buffer.
+ */
+class BufferSizeOperand extends RegisterOperand {
+ override BufferSizeOperandTag tag;
+}
+
+/**
+ * The source value operand of an instruction that loads a value from memory (e.g. `Load`,
+ * `ReturnValue`, `ThrowValue`).
+ */
+class LoadOperand extends TypedOperand {
+ override LoadOperandTag tag;
+}
+
+/**
+ * The source value operand of a `Store` instruction.
+ */
+class StoreValueOperand extends RegisterOperand {
+ override StoreValueOperandTag tag;
+}
+
+/**
+ * The sole operand of a unary instruction (e.g. `Convert`, `Negate`, `Copy`).
+ */
+class UnaryOperand extends RegisterOperand {
+ override UnaryOperandTag tag;
+}
+
+/**
+ * The left operand of a binary instruction (e.g. `Add`, `CompareEQ`).
+ */
+class LeftOperand extends RegisterOperand {
+ override LeftOperandTag tag;
+}
+
+/**
+ * The right operand of a binary instruction (e.g. `Add`, `CompareEQ`).
+ */
+class RightOperand extends RegisterOperand {
+ override RightOperandTag tag;
+}
+
+/**
+ * The condition operand of a `ConditionalBranch` or `Switch` instruction.
+ */
+class ConditionOperand extends RegisterOperand {
+ override ConditionOperandTag tag;
+}
+
+/**
+ * The operand representing the target function of an `Call` instruction.
+ */
+class CallTargetOperand extends RegisterOperand {
+ override CallTargetOperandTag tag;
+}
+
+/**
+ * An operand representing an argument to a function call. This includes both
+ * positional arguments (represented by `PositionalArgumentOperand`) and the
+ * implicit `this` argument, if any (represented by `ThisArgumentOperand`).
+ */
+class ArgumentOperand extends RegisterOperand {
+ override ArgumentOperandTag tag;
+}
+
+/**
+ * An operand representing the implicit 'this' argument to a member function
+ * call.
+ */
+class ThisArgumentOperand extends ArgumentOperand {
+ override ThisArgumentOperandTag tag;
+}
+
+/**
+ * An operand representing an argument to a function call.
+ */
+class PositionalArgumentOperand extends ArgumentOperand {
+ override PositionalArgumentOperandTag tag;
+
+ /**
+ * Gets the zero-based index of the argument.
+ */
+ final int getIndex() { result = tag.getArgIndex() }
+}
+
+/**
+ * An operand representing memory read as a side effect of evaluating another instruction.
+ */
+class SideEffectOperand extends TypedOperand {
+ override SideEffectOperandTag tag;
+}
+
+/**
+ * An operand of a `PhiInstruction`.
+ */
+class PhiInputOperand extends MemoryOperand, TPhiOperand {
+ PhiInstruction useInstr;
+ Instruction defInstr;
+ IRBlock predecessorBlock;
+ Overlap overlap;
+
+ cached
+ PhiInputOperand() {
+ this = phiOperand(useInstr, defInstr, predecessorBlock, overlap)
+ or
+ this = reusedPhiOperand(useInstr, defInstr, predecessorBlock, overlap)
+ }
+
+ override string toString() { result = "Phi" }
+
+ final override PhiInstruction getUse() { result = useInstr }
+
+ final override Instruction getAnyDef() { result = defInstr }
+
+ final override Overlap getDefinitionOverlap() { result = overlap }
+
+ final override int getDumpSortOrder() { result = 11 + getPredecessorBlock().getDisplayIndex() }
+
+ final override string getDumpLabel() {
+ result = "from " + getPredecessorBlock().getDisplayIndex().toString() + ":"
+ }
+
+ final override string getDumpId() { result = getPredecessorBlock().getDisplayIndex().toString() }
+
+ /**
+ * Gets the predecessor block from which this value comes.
+ */
+ final IRBlock getPredecessorBlock() { result = predecessorBlock }
+
+ final override MemoryAccessKind getMemoryAccess() { result instanceof PhiMemoryAccess }
+}
+
+/**
+ * The total operand of a Chi node, representing the previous value of the memory.
+ */
+class ChiTotalOperand extends NonPhiMemoryOperand {
+ override ChiTotalOperandTag tag;
+
+ final override MemoryAccessKind getMemoryAccess() { result instanceof ChiTotalMemoryAccess }
+}
+
+/**
+ * The partial operand of a Chi node, representing the value being written to part of the memory.
+ */
+class ChiPartialOperand extends NonPhiMemoryOperand {
+ override ChiPartialOperandTag tag;
+
+ final override MemoryAccessKind getMemoryAccess() { result instanceof ChiPartialMemoryAccess }
+}
diff --git a/repo-tests/codeql/cpp/ql/lib/semmle/code/cpp/ir/implementation/aliased_ssa/PrintIR.ql b/repo-tests/codeql/cpp/ql/lib/semmle/code/cpp/ir/implementation/aliased_ssa/PrintIR.ql
new file mode 100644
index 00000000000..6adf2a3978c
--- /dev/null
+++ b/repo-tests/codeql/cpp/ql/lib/semmle/code/cpp/ir/implementation/aliased_ssa/PrintIR.ql
@@ -0,0 +1,8 @@
+/**
+ * @name Print Aliased SSA IR
+ * @description Outputs a representation of the Aliased SSA IR graph
+ * @id cpp/print-aliased-ssa-ir
+ * @kind graph
+ */
+
+import PrintIR
diff --git a/repo-tests/codeql/cpp/ql/lib/semmle/code/cpp/ir/implementation/aliased_ssa/PrintIR.qll b/repo-tests/codeql/cpp/ql/lib/semmle/code/cpp/ir/implementation/aliased_ssa/PrintIR.qll
new file mode 100644
index 00000000000..59dadee7154
--- /dev/null
+++ b/repo-tests/codeql/cpp/ql/lib/semmle/code/cpp/ir/implementation/aliased_ssa/PrintIR.qll
@@ -0,0 +1,329 @@
+/**
+ * Outputs a representation of the IR as a control flow graph.
+ *
+ * This file contains the actual implementation of `PrintIR.ql`. For test cases and very small
+ * databases, `PrintIR.ql` can be run directly to dump the IR for the entire database. For most
+ * uses, however, it is better to write a query that imports `PrintIR.qll`, extends
+ * `PrintIRConfiguration`, and overrides `shouldPrintFunction()` to select a subset of functions to
+ * dump.
+ */
+
+private import internal.IRInternal
+private import IR
+private import internal.PrintIRImports as Imports
+import Imports::IRConfiguration
+
+private newtype TPrintIRConfiguration = MkPrintIRConfiguration()
+
+/**
+ * The query can extend this class to control which functions are printed.
+ */
+class PrintIRConfiguration extends TPrintIRConfiguration {
+ /** Gets a textual representation of this configuration. */
+ string toString() { result = "PrintIRConfiguration" }
+
+ /**
+ * Holds if the IR for `func` should be printed. By default, holds for all
+ * functions.
+ */
+ predicate shouldPrintFunction(Language::Function func) { any() }
+}
+
+/**
+ * Override of `IRConfiguration` to only evaluate debug strings for the functions that are to be dumped.
+ */
+private class FilteredIRConfiguration extends IRConfiguration {
+ override predicate shouldEvaluateDebugStringsForFunction(Language::Function func) {
+ shouldPrintFunction(func)
+ }
+}
+
+private predicate shouldPrintFunction(Language::Function func) {
+ exists(PrintIRConfiguration config | config.shouldPrintFunction(func))
+}
+
+private string getAdditionalInstructionProperty(Instruction instr, string key) {
+ exists(IRPropertyProvider provider | result = provider.getInstructionProperty(instr, key))
+}
+
+private string getAdditionalBlockProperty(IRBlock block, string key) {
+ exists(IRPropertyProvider provider | result = provider.getBlockProperty(block, key))
+}
+
+/**
+ * Gets the properties of an operand from any active property providers.
+ */
+private string getAdditionalOperandProperty(Operand operand, string key) {
+ exists(IRPropertyProvider provider | result = provider.getOperandProperty(operand, key))
+}
+
+/**
+ * Gets a string listing the properties of the operand and their corresponding values. If the
+ * operand has no properties, this predicate has no result.
+ */
+private string getOperandPropertyListString(Operand operand) {
+ result =
+ strictconcat(string key, string value |
+ value = getAdditionalOperandProperty(operand, key)
+ |
+ key + ":" + value, ", "
+ )
+}
+
+/**
+ * Gets a string listing the properties of the operand and their corresponding values. The list is
+ * surrounded by curly braces. If the operand has no properties, this predicate returns an empty
+ * string.
+ */
+private string getOperandPropertyString(Operand operand) {
+ result = "{" + getOperandPropertyListString(operand) + "}"
+ or
+ not exists(getOperandPropertyListString(operand)) and result = ""
+}
+
+private newtype TPrintableIRNode =
+ TPrintableIRFunction(IRFunction irFunc) { shouldPrintFunction(irFunc.getFunction()) } or
+ TPrintableIRBlock(IRBlock block) { shouldPrintFunction(block.getEnclosingFunction()) } or
+ TPrintableInstruction(Instruction instr) { shouldPrintFunction(instr.getEnclosingFunction()) }
+
+/**
+ * A node to be emitted in the IR graph.
+ */
+abstract private class PrintableIRNode extends TPrintableIRNode {
+ abstract string toString();
+
+ /**
+ * Gets the location to be emitted for the node.
+ */
+ abstract Language::Location getLocation();
+
+ /**
+ * Gets the label to be emitted for the node.
+ */
+ abstract string getLabel();
+
+ /**
+ * Gets the order in which the node appears in its parent node.
+ */
+ abstract int getOrder();
+
+ /**
+ * Gets the parent of this node.
+ */
+ abstract PrintableIRNode getParent();
+
+ /**
+ * Gets the kind of graph represented by this node ("graph" or "tree").
+ */
+ string getGraphKind() { none() }
+
+ /**
+ * Holds if this node should always be rendered as text, even in a graphical
+ * viewer.
+ */
+ predicate forceText() { none() }
+
+ /**
+ * Gets the value of the node property with the specified key.
+ */
+ string getProperty(string key) {
+ key = "semmle.label" and result = getLabel()
+ or
+ key = "semmle.order" and result = getOrder().toString()
+ or
+ key = "semmle.graphKind" and result = getGraphKind()
+ or
+ key = "semmle.forceText" and forceText() and result = "true"
+ }
+}
+
+/**
+ * An IR graph node representing a `IRFunction` object.
+ */
+private class PrintableIRFunction extends PrintableIRNode, TPrintableIRFunction {
+ IRFunction irFunc;
+
+ PrintableIRFunction() { this = TPrintableIRFunction(irFunc) }
+
+ override string toString() { result = irFunc.toString() }
+
+ override Language::Location getLocation() { result = irFunc.getLocation() }
+
+ override string getLabel() { result = Language::getIdentityString(irFunc.getFunction()) }
+
+ override int getOrder() {
+ this =
+ rank[result + 1](PrintableIRFunction orderedFunc, Language::Location location |
+ location = orderedFunc.getIRFunction().getLocation()
+ |
+ orderedFunc
+ order by
+ location.getFile().getAbsolutePath(), location.getStartLine(), location.getStartColumn(),
+ orderedFunc.getLabel()
+ )
+ }
+
+ final override PrintableIRNode getParent() { none() }
+
+ final IRFunction getIRFunction() { result = irFunc }
+}
+
+/**
+ * An IR graph node representing an `IRBlock` object.
+ */
+private class PrintableIRBlock extends PrintableIRNode, TPrintableIRBlock {
+ IRBlock block;
+
+ PrintableIRBlock() { this = TPrintableIRBlock(block) }
+
+ override string toString() { result = getLabel() }
+
+ override Language::Location getLocation() { result = block.getLocation() }
+
+ override string getLabel() { result = "Block " + block.getDisplayIndex().toString() }
+
+ override int getOrder() { result = block.getDisplayIndex() }
+
+ final override string getGraphKind() { result = "tree" }
+
+ final override predicate forceText() { any() }
+
+ final override PrintableIRFunction getParent() {
+ result.getIRFunction() = block.getEnclosingIRFunction()
+ }
+
+ override string getProperty(string key) {
+ result = PrintableIRNode.super.getProperty(key) or
+ result = getAdditionalBlockProperty(block, key)
+ }
+
+ final IRBlock getBlock() { result = block }
+}
+
+/**
+ * An IR graph node representing an `Instruction`.
+ */
+private class PrintableInstruction extends PrintableIRNode, TPrintableInstruction {
+ Instruction instr;
+
+ PrintableInstruction() { this = TPrintableInstruction(instr) }
+
+ override string toString() { result = instr.toString() }
+
+ override Language::Location getLocation() { result = instr.getLocation() }
+
+ override string getLabel() {
+ exists(IRBlock block |
+ instr = block.getAnInstruction() and
+ exists(
+ string resultString, string operationString, string operandsString, int resultWidth,
+ int operationWidth
+ |
+ resultString = instr.getResultString() and
+ operationString = instr.getOperationString() and
+ operandsString = getOperandsString() and
+ columnWidths(block, resultWidth, operationWidth) and
+ result =
+ resultString + getPaddingString(resultWidth - resultString.length()) + " = " +
+ operationString + getPaddingString(operationWidth - operationString.length()) + " : " +
+ operandsString
+ )
+ )
+ }
+
+ override int getOrder() { result = instr.getDisplayIndexInBlock() }
+
+ final override PrintableIRBlock getParent() { result.getBlock() = instr.getBlock() }
+
+ final Instruction getInstruction() { result = instr }
+
+ override string getProperty(string key) {
+ result = PrintableIRNode.super.getProperty(key) or
+ result = getAdditionalInstructionProperty(instr, key)
+ }
+
+ /**
+ * Gets the string representation of the operand list. This is the same as
+ * `Instruction::getOperandsString()`, except that each operand is annotated with any properties
+ * provided by active `IRPropertyProvider` instances.
+ */
+ private string getOperandsString() {
+ result =
+ concat(Operand operand |
+ operand = instr.getAnOperand()
+ |
+ operand.getDumpString() + getOperandPropertyString(operand), ", "
+ order by
+ operand.getDumpSortOrder()
+ )
+ }
+}
+
+private predicate columnWidths(IRBlock block, int resultWidth, int operationWidth) {
+ resultWidth = max(Instruction instr | instr.getBlock() = block | instr.getResultString().length()) and
+ operationWidth =
+ max(Instruction instr | instr.getBlock() = block | instr.getOperationString().length())
+}
+
+private int maxColumnWidth() {
+ result =
+ max(Instruction instr, int width |
+ width = instr.getResultString().length() or
+ width = instr.getOperationString().length() or
+ width = instr.getOperandsString().length()
+ |
+ width
+ )
+}
+
+private string getPaddingString(int n) {
+ n = 0 and result = ""
+ or
+ n > 0 and n <= maxColumnWidth() and result = getPaddingString(n - 1) + " "
+}
+
+/**
+ * Holds if `node` belongs to the output graph, and its property `key` has the given `value`.
+ */
+query predicate nodes(PrintableIRNode node, string key, string value) {
+ value = node.getProperty(key)
+}
+
+private int getSuccessorIndex(IRBlock pred, IRBlock succ) {
+ succ =
+ rank[result + 1](IRBlock aSucc, EdgeKind kind |
+ aSucc = pred.getSuccessor(kind)
+ |
+ aSucc order by kind.toString()
+ )
+}
+
+/**
+ * Holds if the output graph contains an edge from `pred` to `succ`, and that edge's property `key`
+ * has the given `value`.
+ */
+query predicate edges(PrintableIRBlock pred, PrintableIRBlock succ, string key, string value) {
+ exists(EdgeKind kind, IRBlock predBlock, IRBlock succBlock |
+ predBlock = pred.getBlock() and
+ succBlock = succ.getBlock() and
+ predBlock.getSuccessor(kind) = succBlock and
+ (
+ (
+ key = "semmle.label" and
+ if predBlock.getBackEdgeSuccessor(kind) = succBlock
+ then value = kind.toString() + " (back edge)"
+ else value = kind.toString()
+ )
+ or
+ key = "semmle.order" and
+ value = getSuccessorIndex(predBlock, succBlock).toString()
+ )
+ )
+}
+
+/**
+ * Holds if `parent` is the parent node of `child` in the output graph.
+ */
+query predicate parents(PrintableIRNode child, PrintableIRNode parent) {
+ parent = child.getParent()
+}
diff --git a/repo-tests/codeql/cpp/ql/lib/semmle/code/cpp/ir/implementation/aliased_ssa/constant/ConstantAnalysis.qll b/repo-tests/codeql/cpp/ql/lib/semmle/code/cpp/ir/implementation/aliased_ssa/constant/ConstantAnalysis.qll
new file mode 100644
index 00000000000..76f52f8334a
--- /dev/null
+++ b/repo-tests/codeql/cpp/ql/lib/semmle/code/cpp/ir/implementation/aliased_ssa/constant/ConstantAnalysis.qll
@@ -0,0 +1,53 @@
+private import internal.ConstantAnalysisInternal
+private import semmle.code.cpp.ir.internal.IntegerPartial
+private import IR
+
+language[monotonicAggregates]
+int getConstantValue(Instruction instr) {
+ result = instr.(IntegerConstantInstruction).getValue().toInt()
+ or
+ result = getBinaryInstructionValue(instr)
+ or
+ result = neg(getConstantValue(instr.(NegateInstruction).getUnary()))
+ or
+ result = getConstantValue(instr.(CopyInstruction).getSourceValue())
+ or
+ exists(PhiInstruction phi |
+ phi = instr and
+ result = unique(Operand op | op = phi.getAnInputOperand() | getConstantValue(op.getDef()))
+ )
+}
+
+pragma[noinline]
+private predicate binaryInstructionOperands(BinaryInstruction instr, int left, int right) {
+ left = getConstantValue(instr.getLeft()) and
+ right = getConstantValue(instr.getRight())
+}
+
+pragma[noinline]
+private int getBinaryInstructionValue(BinaryInstruction instr) {
+ exists(int left, int right |
+ binaryInstructionOperands(instr, left, right) and
+ (
+ instr instanceof AddInstruction and result = add(left, right)
+ or
+ instr instanceof SubInstruction and result = sub(left, right)
+ or
+ instr instanceof MulInstruction and result = mul(left, right)
+ or
+ instr instanceof DivInstruction and result = div(left, right)
+ or
+ instr instanceof CompareEQInstruction and result = compareEQ(left, right)
+ or
+ instr instanceof CompareNEInstruction and result = compareNE(left, right)
+ or
+ instr instanceof CompareLTInstruction and result = compareLT(left, right)
+ or
+ instr instanceof CompareGTInstruction and result = compareGT(left, right)
+ or
+ instr instanceof CompareLEInstruction and result = compareLE(left, right)
+ or
+ instr instanceof CompareGEInstruction and result = compareGE(left, right)
+ )
+ )
+}
diff --git a/repo-tests/codeql/cpp/ql/lib/semmle/code/cpp/ir/implementation/aliased_ssa/constant/PrintConstantAnalysis.qll b/repo-tests/codeql/cpp/ql/lib/semmle/code/cpp/ir/implementation/aliased_ssa/constant/PrintConstantAnalysis.qll
new file mode 100644
index 00000000000..57a7cf594ca
--- /dev/null
+++ b/repo-tests/codeql/cpp/ql/lib/semmle/code/cpp/ir/implementation/aliased_ssa/constant/PrintConstantAnalysis.qll
@@ -0,0 +1,11 @@
+private import internal.ConstantAnalysisInternal
+private import semmle.code.cpp.ir.internal.IntegerConstant
+private import ConstantAnalysis
+import IR
+
+private class ConstantAnalysisPropertyProvider extends IRPropertyProvider {
+ override string getInstructionProperty(Instruction instr, string key) {
+ key = "ConstantValue" and
+ result = getValue(getConstantValue(instr)).toString()
+ }
+}
diff --git a/repo-tests/codeql/cpp/ql/lib/semmle/code/cpp/ir/implementation/aliased_ssa/constant/internal/ConstantAnalysisInternal.qll b/repo-tests/codeql/cpp/ql/lib/semmle/code/cpp/ir/implementation/aliased_ssa/constant/internal/ConstantAnalysisInternal.qll
new file mode 100644
index 00000000000..d55844c0471
--- /dev/null
+++ b/repo-tests/codeql/cpp/ql/lib/semmle/code/cpp/ir/implementation/aliased_ssa/constant/internal/ConstantAnalysisInternal.qll
@@ -0,0 +1 @@
+import semmle.code.cpp.ir.implementation.aliased_ssa.IR as IR
diff --git a/repo-tests/codeql/cpp/ql/lib/semmle/code/cpp/ir/implementation/aliased_ssa/gvn/PrintValueNumbering.qll b/repo-tests/codeql/cpp/ql/lib/semmle/code/cpp/ir/implementation/aliased_ssa/gvn/PrintValueNumbering.qll
new file mode 100644
index 00000000000..a7fb1b3c07e
--- /dev/null
+++ b/repo-tests/codeql/cpp/ql/lib/semmle/code/cpp/ir/implementation/aliased_ssa/gvn/PrintValueNumbering.qll
@@ -0,0 +1,17 @@
+private import internal.ValueNumberingImports
+private import ValueNumbering
+
+/**
+ * Provides additional information about value numbering in IR dumps.
+ */
+class ValueNumberPropertyProvider extends IRPropertyProvider {
+ override string getInstructionProperty(Instruction instr, string key) {
+ exists(ValueNumber vn |
+ vn = valueNumber(instr) and
+ key = "valnum" and
+ if strictcount(vn.getAnInstruction()) > 1
+ then result = vn.getDebugString()
+ else result = "unique"
+ )
+ }
+}
diff --git a/repo-tests/codeql/cpp/ql/lib/semmle/code/cpp/ir/implementation/aliased_ssa/gvn/ValueNumbering.qll b/repo-tests/codeql/cpp/ql/lib/semmle/code/cpp/ir/implementation/aliased_ssa/gvn/ValueNumbering.qll
new file mode 100644
index 00000000000..796fb792366
--- /dev/null
+++ b/repo-tests/codeql/cpp/ql/lib/semmle/code/cpp/ir/implementation/aliased_ssa/gvn/ValueNumbering.qll
@@ -0,0 +1,88 @@
+private import internal.ValueNumberingInternal
+private import internal.ValueNumberingImports
+
+/**
+ * The value number assigned to a particular set of instructions that produce equivalent results.
+ */
+class ValueNumber extends TValueNumber {
+ final string toString() { result = "GVN" }
+
+ final string getDebugString() { result = strictconcat(getAnInstruction().getResultId(), ", ") }
+
+ final Language::Location getLocation() {
+ if
+ exists(Instruction i |
+ i = getAnInstruction() and not i.getLocation() instanceof Language::UnknownLocation
+ )
+ then
+ result =
+ min(Language::Location l |
+ l = getAnInstruction().getLocation() and not l instanceof Language::UnknownLocation
+ |
+ l
+ order by
+ l.getFile().getAbsolutePath(), l.getStartLine(), l.getStartColumn(), l.getEndLine(),
+ l.getEndColumn()
+ )
+ else result instanceof Language::UnknownDefaultLocation
+ }
+
+ /**
+ * Gets the instructions that have been assigned this value number. This will always produce at
+ * least one result.
+ */
+ final Instruction getAnInstruction() { this = valueNumber(result) }
+
+ /**
+ * Gets one of the instructions that was assigned this value number. The chosen instuction is
+ * deterministic but arbitrary. Intended for use only in debugging.
+ */
+ final Instruction getExampleInstruction() {
+ result =
+ min(Instruction instr |
+ instr = getAnInstruction()
+ |
+ instr order by instr.getBlock().getDisplayIndex(), instr.getDisplayIndexInBlock()
+ )
+ }
+
+ /**
+ * Gets an `Operand` whose definition is exact and has this value number.
+ */
+ final Operand getAUse() { this = valueNumber(result.getDef()) }
+
+ final string getKind() {
+ this instanceof TVariableAddressValueNumber and result = "VariableAddress"
+ or
+ this instanceof TInitializeParameterValueNumber and result = "InitializeParameter"
+ or
+ this instanceof TConstantValueNumber and result = "Constant"
+ or
+ this instanceof TStringConstantValueNumber and result = "StringConstant"
+ or
+ this instanceof TFieldAddressValueNumber and result = "FieldAddress"
+ or
+ this instanceof TBinaryValueNumber and result = "Binary"
+ or
+ this instanceof TPointerArithmeticValueNumber and result = "PointerArithmetic"
+ or
+ this instanceof TUnaryValueNumber and result = "Unary"
+ or
+ this instanceof TInheritanceConversionValueNumber and result = "InheritanceConversion"
+ or
+ this instanceof TLoadTotalOverlapValueNumber and result = "LoadTotalOverlap"
+ or
+ this instanceof TUniqueValueNumber and result = "Unique"
+ }
+}
+
+/**
+ * Gets the value number assigned to `instr`, if any. Returns at most one result.
+ */
+ValueNumber valueNumber(Instruction instr) { result = tvalueNumber(instr) }
+
+/**
+ * Gets the value number assigned to the exact definition of `op`, if any.
+ * Returns at most one result.
+ */
+ValueNumber valueNumberOfOperand(Operand op) { result = tvalueNumberOfOperand(op) }
diff --git a/repo-tests/codeql/cpp/ql/lib/semmle/code/cpp/ir/implementation/aliased_ssa/gvn/internal/ValueNumberingImports.qll b/repo-tests/codeql/cpp/ql/lib/semmle/code/cpp/ir/implementation/aliased_ssa/gvn/internal/ValueNumberingImports.qll
new file mode 100644
index 00000000000..8482a5e4b14
--- /dev/null
+++ b/repo-tests/codeql/cpp/ql/lib/semmle/code/cpp/ir/implementation/aliased_ssa/gvn/internal/ValueNumberingImports.qll
@@ -0,0 +1,3 @@
+import semmle.code.cpp.ir.implementation.aliased_ssa.IR
+import semmle.code.cpp.ir.internal.Overlap
+import semmle.code.cpp.ir.internal.IRCppLanguage as Language
diff --git a/repo-tests/codeql/cpp/ql/lib/semmle/code/cpp/ir/implementation/aliased_ssa/gvn/internal/ValueNumberingInternal.qll b/repo-tests/codeql/cpp/ql/lib/semmle/code/cpp/ir/implementation/aliased_ssa/gvn/internal/ValueNumberingInternal.qll
new file mode 100644
index 00000000000..2467d961892
--- /dev/null
+++ b/repo-tests/codeql/cpp/ql/lib/semmle/code/cpp/ir/implementation/aliased_ssa/gvn/internal/ValueNumberingInternal.qll
@@ -0,0 +1,311 @@
+private import ValueNumberingImports
+
+newtype TValueNumber =
+ TVariableAddressValueNumber(IRFunction irFunc, Language::AST ast) {
+ variableAddressValueNumber(_, irFunc, ast)
+ } or
+ TInitializeParameterValueNumber(IRFunction irFunc, Language::AST var) {
+ initializeParameterValueNumber(_, irFunc, var)
+ } or
+ TConstantValueNumber(IRFunction irFunc, IRType type, string value) {
+ constantValueNumber(_, irFunc, type, value)
+ } or
+ TStringConstantValueNumber(IRFunction irFunc, IRType type, string value) {
+ stringConstantValueNumber(_, irFunc, type, value)
+ } or
+ TFieldAddressValueNumber(IRFunction irFunc, Language::Field field, TValueNumber objectAddress) {
+ fieldAddressValueNumber(_, irFunc, field, objectAddress)
+ } or
+ TBinaryValueNumber(
+ IRFunction irFunc, Opcode opcode, TValueNumber leftOperand, TValueNumber rightOperand
+ ) {
+ binaryValueNumber(_, irFunc, opcode, leftOperand, rightOperand)
+ } or
+ TPointerArithmeticValueNumber(
+ IRFunction irFunc, Opcode opcode, int elementSize, TValueNumber leftOperand,
+ TValueNumber rightOperand
+ ) {
+ pointerArithmeticValueNumber(_, irFunc, opcode, elementSize, leftOperand, rightOperand)
+ } or
+ TUnaryValueNumber(IRFunction irFunc, Opcode opcode, TValueNumber operand) {
+ unaryValueNumber(_, irFunc, opcode, operand)
+ } or
+ TInheritanceConversionValueNumber(
+ IRFunction irFunc, Opcode opcode, Language::Class baseClass, Language::Class derivedClass,
+ TValueNumber operand
+ ) {
+ inheritanceConversionValueNumber(_, irFunc, opcode, baseClass, derivedClass, operand)
+ } or
+ TLoadTotalOverlapValueNumber(
+ IRFunction irFunc, IRType type, TValueNumber memOperand, TValueNumber operand
+ ) {
+ loadTotalOverlapValueNumber(_, irFunc, type, memOperand, operand)
+ } or
+ TUniqueValueNumber(IRFunction irFunc, Instruction instr) { uniqueValueNumber(instr, irFunc) }
+
+/**
+ * A `CopyInstruction` whose source operand's value is congruent to the definition of that source
+ * operand.
+ * For example:
+ * ```
+ * Point p = { 1, 2 };
+ * Point q = p;
+ * int a = p.x;
+ * ```
+ * The use of `p` on line 2 is linked to the definition of `p` on line 1, and is congruent to that
+ * definition because it accesses the exact same memory.
+ * The use of `p.x` on line 3 is linked to the definition of `p` on line 1 as well, but is not
+ * congruent to that definition because `p.x` accesses only a subset of the memory defined by `p`.
+ */
+class CongruentCopyInstruction extends CopyInstruction {
+ CongruentCopyInstruction() {
+ this.getSourceValueOperand().getDefinitionOverlap() instanceof MustExactlyOverlap
+ }
+}
+
+class LoadTotalOverlapInstruction extends LoadInstruction {
+ LoadTotalOverlapInstruction() {
+ this.getSourceValueOperand().getDefinitionOverlap() instanceof MustTotallyOverlap
+ }
+}
+
+/**
+ * Holds if this library knows how to assign a value number to the specified instruction, other than
+ * a `unique` value number that is never shared by multiple instructions.
+ */
+private predicate numberableInstruction(Instruction instr) {
+ instr instanceof VariableAddressInstruction
+ or
+ instr instanceof InitializeParameterInstruction
+ or
+ instr instanceof ConstantInstruction
+ or
+ instr instanceof StringConstantInstruction
+ or
+ instr instanceof FieldAddressInstruction
+ or
+ instr instanceof BinaryInstruction
+ or
+ instr instanceof UnaryInstruction and not instr instanceof CopyInstruction
+ or
+ instr instanceof PointerArithmeticInstruction
+ or
+ instr instanceof CongruentCopyInstruction
+ or
+ instr instanceof LoadTotalOverlapInstruction
+}
+
+private predicate filteredNumberableInstruction(Instruction instr) {
+ // count rather than strictcount to handle missing AST elements
+ // separate instanceof and inline casts to avoid failed casts with a count of 0
+ instr instanceof VariableAddressInstruction and
+ count(instr.(VariableAddressInstruction).getIRVariable().getAST()) != 1
+ or
+ instr instanceof ConstantInstruction and
+ count(instr.getResultIRType()) != 1
+ or
+ instr instanceof FieldAddressInstruction and
+ count(instr.(FieldAddressInstruction).getField()) != 1
+}
+
+private predicate variableAddressValueNumber(
+ VariableAddressInstruction instr, IRFunction irFunc, Language::AST ast
+) {
+ instr.getEnclosingIRFunction() = irFunc and
+ // The underlying AST element is used as value-numbering key instead of the
+ // `IRVariable` to work around a problem where a variable or expression with
+ // multiple types gives rise to multiple `IRVariable`s.
+ instr.getIRVariable().getAST() = ast and
+ strictcount(instr.getIRVariable().getAST()) = 1
+}
+
+private predicate initializeParameterValueNumber(
+ InitializeParameterInstruction instr, IRFunction irFunc, Language::AST var
+) {
+ instr.getEnclosingIRFunction() = irFunc and
+ // The underlying AST element is used as value-numbering key instead of the
+ // `IRVariable` to work around a problem where a variable or expression with
+ // multiple types gives rise to multiple `IRVariable`s.
+ instr.getIRVariable().getAST() = var
+}
+
+private predicate constantValueNumber(
+ ConstantInstruction instr, IRFunction irFunc, IRType type, string value
+) {
+ instr.getEnclosingIRFunction() = irFunc and
+ strictcount(instr.getResultIRType()) = 1 and
+ instr.getResultIRType() = type and
+ instr.getValue() = value
+}
+
+private predicate stringConstantValueNumber(
+ StringConstantInstruction instr, IRFunction irFunc, IRType type, string value
+) {
+ instr.getEnclosingIRFunction() = irFunc and
+ instr.getResultIRType() = type and
+ instr.getValue().getValue() = value
+}
+
+private predicate fieldAddressValueNumber(
+ FieldAddressInstruction instr, IRFunction irFunc, Language::Field field,
+ TValueNumber objectAddress
+) {
+ instr.getEnclosingIRFunction() = irFunc and
+ instr.getField() = field and
+ strictcount(instr.getField()) = 1 and
+ tvalueNumber(instr.getObjectAddress()) = objectAddress
+}
+
+private predicate binaryValueNumber(
+ BinaryInstruction instr, IRFunction irFunc, Opcode opcode, TValueNumber leftOperand,
+ TValueNumber rightOperand
+) {
+ instr.getEnclosingIRFunction() = irFunc and
+ not instr instanceof PointerArithmeticInstruction and
+ instr.getOpcode() = opcode and
+ tvalueNumber(instr.getLeft()) = leftOperand and
+ tvalueNumber(instr.getRight()) = rightOperand
+}
+
+private predicate pointerArithmeticValueNumber(
+ PointerArithmeticInstruction instr, IRFunction irFunc, Opcode opcode, int elementSize,
+ TValueNumber leftOperand, TValueNumber rightOperand
+) {
+ instr.getEnclosingIRFunction() = irFunc and
+ instr.getOpcode() = opcode and
+ instr.getElementSize() = elementSize and
+ tvalueNumber(instr.getLeft()) = leftOperand and
+ tvalueNumber(instr.getRight()) = rightOperand
+}
+
+private predicate unaryValueNumber(
+ UnaryInstruction instr, IRFunction irFunc, Opcode opcode, TValueNumber operand
+) {
+ instr.getEnclosingIRFunction() = irFunc and
+ not instr instanceof InheritanceConversionInstruction and
+ not instr instanceof CopyInstruction and
+ not instr instanceof FieldAddressInstruction and
+ instr.getOpcode() = opcode and
+ tvalueNumber(instr.getUnary()) = operand
+}
+
+private predicate inheritanceConversionValueNumber(
+ InheritanceConversionInstruction instr, IRFunction irFunc, Opcode opcode,
+ Language::Class baseClass, Language::Class derivedClass, TValueNumber operand
+) {
+ instr.getEnclosingIRFunction() = irFunc and
+ instr.getOpcode() = opcode and
+ instr.getBaseClass() = baseClass and
+ instr.getDerivedClass() = derivedClass and
+ tvalueNumber(instr.getUnary()) = operand
+}
+
+private predicate loadTotalOverlapValueNumber(
+ LoadTotalOverlapInstruction instr, IRFunction irFunc, IRType type, TValueNumber memOperand,
+ TValueNumber operand
+) {
+ instr.getEnclosingIRFunction() = irFunc and
+ tvalueNumber(instr.getAnOperand().(MemoryOperand).getAnyDef()) = memOperand and
+ tvalueNumberOfOperand(instr.getAnOperand().(AddressOperand)) = operand and
+ instr.getResultIRType() = type
+}
+
+/**
+ * Holds if `instr` should be assigned a unique value number because this library does not know how
+ * to determine if two instances of that instruction are equivalent.
+ */
+private predicate uniqueValueNumber(Instruction instr, IRFunction irFunc) {
+ instr.getEnclosingIRFunction() = irFunc and
+ not instr.getResultIRType() instanceof IRVoidType and
+ (
+ not numberableInstruction(instr)
+ or
+ filteredNumberableInstruction(instr)
+ )
+}
+
+/**
+ * Gets the value number assigned to `instr`, if any. Returns at most one result.
+ */
+cached
+TValueNumber tvalueNumber(Instruction instr) {
+ result = nonUniqueValueNumber(instr)
+ or
+ exists(IRFunction irFunc |
+ uniqueValueNumber(instr, irFunc) and
+ result = TUniqueValueNumber(irFunc, instr)
+ )
+}
+
+/**
+ * Gets the value number assigned to the exact definition of `op`, if any.
+ * Returns at most one result.
+ */
+TValueNumber tvalueNumberOfOperand(Operand op) { result = tvalueNumber(op.getDef()) }
+
+/**
+ * Gets the value number assigned to `instr`, if any, unless that instruction is assigned a unique
+ * value number.
+ */
+private TValueNumber nonUniqueValueNumber(Instruction instr) {
+ exists(IRFunction irFunc |
+ irFunc = instr.getEnclosingIRFunction() and
+ (
+ exists(Language::AST ast |
+ variableAddressValueNumber(instr, irFunc, ast) and
+ result = TVariableAddressValueNumber(irFunc, ast)
+ )
+ or
+ exists(Language::AST var |
+ initializeParameterValueNumber(instr, irFunc, var) and
+ result = TInitializeParameterValueNumber(irFunc, var)
+ )
+ or
+ exists(string value, IRType type |
+ constantValueNumber(instr, irFunc, type, value) and
+ result = TConstantValueNumber(irFunc, type, value)
+ )
+ or
+ exists(IRType type, string value |
+ stringConstantValueNumber(instr, irFunc, type, value) and
+ result = TStringConstantValueNumber(irFunc, type, value)
+ )
+ or
+ exists(Language::Field field, TValueNumber objectAddress |
+ fieldAddressValueNumber(instr, irFunc, field, objectAddress) and
+ result = TFieldAddressValueNumber(irFunc, field, objectAddress)
+ )
+ or
+ exists(Opcode opcode, TValueNumber leftOperand, TValueNumber rightOperand |
+ binaryValueNumber(instr, irFunc, opcode, leftOperand, rightOperand) and
+ result = TBinaryValueNumber(irFunc, opcode, leftOperand, rightOperand)
+ )
+ or
+ exists(Opcode opcode, TValueNumber operand |
+ unaryValueNumber(instr, irFunc, opcode, operand) and
+ result = TUnaryValueNumber(irFunc, opcode, operand)
+ )
+ or
+ exists(
+ Opcode opcode, Language::Class baseClass, Language::Class derivedClass, TValueNumber operand
+ |
+ inheritanceConversionValueNumber(instr, irFunc, opcode, baseClass, derivedClass, operand) and
+ result = TInheritanceConversionValueNumber(irFunc, opcode, baseClass, derivedClass, operand)
+ )
+ or
+ exists(Opcode opcode, int elementSize, TValueNumber leftOperand, TValueNumber rightOperand |
+ pointerArithmeticValueNumber(instr, irFunc, opcode, elementSize, leftOperand, rightOperand) and
+ result =
+ TPointerArithmeticValueNumber(irFunc, opcode, elementSize, leftOperand, rightOperand)
+ )
+ or
+ exists(IRType type, TValueNumber memOperand, TValueNumber operand |
+ loadTotalOverlapValueNumber(instr, irFunc, type, memOperand, operand) and
+ result = TLoadTotalOverlapValueNumber(irFunc, type, memOperand, operand)
+ )
+ or
+ // The value number of a copy is just the value number of its source value.
+ result = tvalueNumber(instr.(CongruentCopyInstruction).getSourceValue())
+ )
+ )
+}
diff --git a/repo-tests/codeql/cpp/ql/lib/semmle/code/cpp/ir/implementation/aliased_ssa/internal/AliasAnalysis.qll b/repo-tests/codeql/cpp/ql/lib/semmle/code/cpp/ir/implementation/aliased_ssa/internal/AliasAnalysis.qll
new file mode 100644
index 00000000000..9997b5b49a7
--- /dev/null
+++ b/repo-tests/codeql/cpp/ql/lib/semmle/code/cpp/ir/implementation/aliased_ssa/internal/AliasAnalysis.qll
@@ -0,0 +1,461 @@
+private import AliasAnalysisInternal
+private import InputIR
+private import AliasAnalysisImports
+
+private class IntValue = Ints::IntValue;
+
+/**
+ * If `instr` is a `SideEffectInstruction`, gets the primary `CallInstruction` that caused the side
+ * effect. If `instr` is a `CallInstruction`, gets that same `CallInstruction`.
+ */
+private CallInstruction getPrimaryCall(Instruction instr) {
+ result = instr
+ or
+ result = instr.(SideEffectInstruction).getPrimaryInstruction()
+}
+
+/**
+ * Holds if `operand` serves as an input argument (or indirection) to `call`, in the position
+ * specified by `input`.
+ */
+private predicate isCallInput(
+ CallInstruction call, Operand operand, AliasModels::FunctionInput input
+) {
+ call = getPrimaryCall(operand.getUse()) and
+ (
+ exists(int index |
+ input.isParameterOrQualifierAddress(index) and
+ operand = call.getArgumentOperand(index)
+ )
+ or
+ exists(int index, ReadSideEffectInstruction read |
+ input.isParameterDerefOrQualifierObject(index) and
+ read = call.getAParameterSideEffect(index) and
+ operand = read.getSideEffectOperand()
+ )
+ )
+}
+
+/**
+ * Holds if `instr` serves as a return value or output argument indirection for `call`, in the
+ * position specified by `output`.
+ */
+private predicate isCallOutput(
+ CallInstruction call, Instruction instr, AliasModels::FunctionOutput output
+) {
+ call = getPrimaryCall(instr) and
+ (
+ output.isReturnValue() and instr = call
+ or
+ exists(int index, WriteSideEffectInstruction write |
+ output.isParameterDerefOrQualifierObject(index) and
+ write = call.getAParameterSideEffect(index) and
+ instr = write
+ )
+ )
+}
+
+/**
+ * Holds if the address in `operand` flows directly to the result of `resultInstr` due to modeled
+ * address flow through a function call.
+ */
+private predicate hasAddressFlowThroughCall(Operand operand, Instruction resultInstr) {
+ exists(
+ CallInstruction call, AliasModels::FunctionInput input, AliasModels::FunctionOutput output
+ |
+ call.getStaticCallTarget().(AliasModels::AliasFunction).hasAddressFlow(input, output) and
+ isCallInput(call, operand, input) and
+ isCallOutput(call, resultInstr, output)
+ )
+}
+
+/**
+ * Holds if the operand `tag` of instruction `instr` is used in a way that does
+ * not result in any address held in that operand from escaping beyond the
+ * instruction.
+ */
+private predicate operandIsConsumedWithoutEscaping(Operand operand) {
+ // The source/destination address of a Load/Store does not escape (but the
+ // loaded/stored value could).
+ operand instanceof AddressOperand
+ or
+ exists(Instruction instr |
+ instr = operand.getUse() and
+ (
+ // Neither operand of a Compare escapes.
+ instr instanceof CompareInstruction
+ or
+ // Neither operand of a PointerDiff escapes.
+ instr instanceof PointerDiffInstruction
+ or
+ // Converting an address to a `bool` does not escape the address.
+ instr.(ConvertInstruction).getResultIRType() instanceof IRBooleanType
+ or
+ instr instanceof CallInstruction and
+ not exists(IREscapeAnalysisConfiguration config | config.useSoundEscapeAnalysis())
+ )
+ )
+ or
+ // Some standard function arguments never escape
+ isNeverEscapesArgument(operand)
+}
+
+private predicate operandEscapesDomain(Operand operand) {
+ not operandIsConsumedWithoutEscaping(operand) and
+ not operandIsPropagated(operand, _, _) and
+ not isArgumentForParameter(_, operand, _) and
+ not isOnlyEscapesViaReturnArgument(operand) and
+ not operand.getUse() instanceof ReturnValueInstruction and
+ not operand.getUse() instanceof ReturnIndirectionInstruction and
+ not operand instanceof PhiInputOperand
+}
+
+/**
+ * If the result of instruction `instr` is an integer constant, returns the
+ * value of that constant. Otherwise, returns unknown.
+ */
+IntValue getConstantValue(Instruction instr) {
+ if instr instanceof IntegerConstantInstruction
+ then result = instr.(IntegerConstantInstruction).getValue().toInt()
+ else result = Ints::unknown()
+}
+
+/**
+ * Computes the offset, in bits, by which the result of `instr` differs from the
+ * pointer argument to `instr`, if that offset is a constant. Otherwise, returns
+ * unknown.
+ */
+IntValue getPointerBitOffset(PointerOffsetInstruction instr) {
+ exists(IntValue bitOffset |
+ bitOffset = Ints::mul(Ints::mul(getConstantValue(instr.getRight()), instr.getElementSize()), 8) and
+ (
+ instr instanceof PointerAddInstruction and result = bitOffset
+ or
+ instr instanceof PointerSubInstruction and result = Ints::neg(bitOffset)
+ )
+ )
+}
+
+/**
+ * Holds if any address held in operand `operand` is propagated to the result of `instr`, offset by
+ * the number of bits in `bitOffset`. If the address is propagated, but the offset is not known to
+ * be a constant, then `bitOffset` is `unknown()`.
+ */
+private predicate operandIsPropagated(Operand operand, IntValue bitOffset, Instruction instr) {
+ // Some functions are known to propagate an argument
+ hasAddressFlowThroughCall(operand, instr) and
+ bitOffset = 0
+ or
+ instr = operand.getUse() and
+ (
+ // Converting to a non-virtual base class adds the offset of the base class.
+ exists(ConvertToNonVirtualBaseInstruction convert |
+ convert = instr and
+ bitOffset = Ints::mul(convert.getDerivation().getByteOffset(), 8)
+ )
+ or
+ // Conversion using dynamic_cast results in an unknown offset
+ instr instanceof CheckedConvertOrNullInstruction and
+ bitOffset = Ints::unknown()
+ or
+ // Converting to a derived class subtracts the offset of the base class.
+ exists(ConvertToDerivedInstruction convert |
+ convert = instr and
+ bitOffset = Ints::neg(Ints::mul(convert.getDerivation().getByteOffset(), 8))
+ )
+ or
+ // Converting to a virtual base class adds an unknown offset.
+ instr instanceof ConvertToVirtualBaseInstruction and
+ bitOffset = Ints::unknown()
+ or
+ // Conversion to another pointer type propagates the source address.
+ exists(ConvertInstruction convert, IRType resultType |
+ convert = instr and
+ resultType = convert.getResultIRType() and
+ resultType instanceof IRAddressType and
+ bitOffset = 0
+ )
+ or
+ // Adding an integer to or subtracting an integer from a pointer propagates
+ // the address with an offset.
+ exists(PointerOffsetInstruction ptrOffset |
+ ptrOffset = instr and
+ operand = ptrOffset.getLeftOperand() and
+ bitOffset = getPointerBitOffset(ptrOffset)
+ )
+ or
+ // Computing a field address from a pointer propagates the address plus the
+ // offset of the field.
+ bitOffset = Language::getFieldBitOffset(instr.(FieldAddressInstruction).getField())
+ or
+ // A copy propagates the source value.
+ operand = instr.(CopyInstruction).getSourceValueOperand() and bitOffset = 0
+ )
+}
+
+private predicate operandEscapesNonReturn(Operand operand) {
+ exists(Instruction instr |
+ // The address is propagated to the result of the instruction, and that result itself is returned
+ operandIsPropagated(operand, _, instr) and resultEscapesNonReturn(instr)
+ )
+ or
+ // The operand is used in a function call which returns it, and the return value is then returned
+ exists(CallInstruction ci, Instruction init |
+ isArgumentForParameter(ci, operand, init) and
+ (
+ resultMayReachReturn(init) and
+ resultEscapesNonReturn(ci)
+ or
+ resultEscapesNonReturn(init)
+ )
+ )
+ or
+ isOnlyEscapesViaReturnArgument(operand) and resultEscapesNonReturn(operand.getUse())
+ or
+ operand instanceof PhiInputOperand and
+ resultEscapesNonReturn(operand.getUse())
+ or
+ operandEscapesDomain(operand)
+}
+
+private predicate operandMayReachReturn(Operand operand) {
+ exists(Instruction instr |
+ // The address is propagated to the result of the instruction, and that result itself is returned
+ operandIsPropagated(operand, _, instr) and
+ resultMayReachReturn(instr)
+ )
+ or
+ // The operand is used in a function call which returns it, and the return value is then returned
+ exists(CallInstruction ci, Instruction init |
+ isArgumentForParameter(ci, operand, init) and
+ resultMayReachReturn(init) and
+ resultMayReachReturn(ci)
+ )
+ or
+ // The address is returned
+ operand.getUse() instanceof ReturnValueInstruction
+ or
+ isOnlyEscapesViaReturnArgument(operand) and resultMayReachReturn(operand.getUse())
+ or
+ operand instanceof PhiInputOperand and
+ resultMayReachReturn(operand.getUse())
+}
+
+private predicate operandReturned(Operand operand, IntValue bitOffset) {
+ // The address is propagated to the result of the instruction, and that result itself is returned
+ exists(Instruction instr, IntValue bitOffset1, IntValue bitOffset2 |
+ operandIsPropagated(operand, bitOffset1, instr) and
+ resultReturned(instr, bitOffset2) and
+ bitOffset = Ints::add(bitOffset1, bitOffset2)
+ )
+ or
+ // The operand is used in a function call which returns it, and the return value is then returned
+ exists(CallInstruction ci, Instruction init, IntValue bitOffset1, IntValue bitOffset2 |
+ isArgumentForParameter(ci, operand, init) and
+ resultReturned(init, bitOffset1) and
+ resultReturned(ci, bitOffset2) and
+ bitOffset = Ints::add(bitOffset1, bitOffset2)
+ )
+ or
+ // The address is returned
+ operand.getUse() instanceof ReturnValueInstruction and
+ bitOffset = 0
+ or
+ isOnlyEscapesViaReturnArgument(operand) and
+ resultReturned(operand.getUse(), _) and
+ bitOffset = Ints::unknown()
+}
+
+private predicate isArgumentForParameter(
+ CallInstruction ci, Operand operand, InitializeParameterInstruction init
+) {
+ exists(Language::Function f |
+ ci = operand.getUse() and
+ f = ci.getStaticCallTarget() and
+ (
+ init.getParameter() = f.getParameter(operand.(PositionalArgumentOperand).getIndex())
+ or
+ init.getIRVariable() instanceof IRThisVariable and
+ unique( | | init.getEnclosingFunction()) = f and
+ operand instanceof ThisArgumentOperand
+ ) and
+ not Language::isFunctionVirtual(f) and
+ not f instanceof AliasModels::AliasFunction
+ )
+}
+
+private predicate isOnlyEscapesViaReturnArgument(Operand operand) {
+ exists(AliasModels::AliasFunction f |
+ f = operand.getUse().(CallInstruction).getStaticCallTarget() and
+ (
+ f.parameterEscapesOnlyViaReturn(operand.(PositionalArgumentOperand).getIndex())
+ or
+ f.parameterEscapesOnlyViaReturn(-1) and
+ operand instanceof ThisArgumentOperand
+ )
+ )
+}
+
+private predicate isNeverEscapesArgument(Operand operand) {
+ exists(AliasModels::AliasFunction f |
+ f = operand.getUse().(CallInstruction).getStaticCallTarget() and
+ (
+ f.parameterNeverEscapes(operand.(PositionalArgumentOperand).getIndex())
+ or
+ f.parameterNeverEscapes(-1) and
+ operand instanceof ThisArgumentOperand
+ )
+ )
+}
+
+private predicate resultReturned(Instruction instr, IntValue bitOffset) {
+ operandReturned(instr.getAUse(), bitOffset)
+}
+
+private predicate resultMayReachReturn(Instruction instr) { operandMayReachReturn(instr.getAUse()) }
+
+/**
+ * Holds if any address held in the result of instruction `instr` escapes
+ * outside the domain of the analysis.
+ */
+private predicate resultEscapesNonReturn(Instruction instr) {
+ // The result escapes if it has at least one use that escapes.
+ operandEscapesNonReturn(instr.getAUse())
+ or
+ // The result also escapes if it is not modeled in SSA, because we do not know where it might be
+ // used.
+ not instr.isResultModeled()
+}
+
+/**
+ * Holds if the address of `allocation` escapes outside the domain of the analysis. This can occur
+ * either because the allocation's address is taken within the function and escapes, or because the
+ * allocation is marked as always escaping via `alwaysEscapes()`.
+ */
+predicate allocationEscapes(Configuration::Allocation allocation) {
+ allocation.alwaysEscapes()
+ or
+ exists(IREscapeAnalysisConfiguration config |
+ config.useSoundEscapeAnalysis() and resultEscapesNonReturn(allocation.getABaseInstruction())
+ )
+ or
+ Configuration::phaseNeedsSoundEscapeAnalysis() and
+ resultEscapesNonReturn(allocation.getABaseInstruction())
+}
+
+/**
+ * Equivalent to `operandIsPropagated()`, but includes interprocedural propagation.
+ */
+private predicate operandIsPropagatedIncludingByCall(
+ Operand operand, IntValue bitOffset, Instruction instr
+) {
+ operandIsPropagated(operand, bitOffset, instr)
+ or
+ exists(CallInstruction call, Instruction init |
+ isArgumentForParameter(call, operand, init) and
+ resultReturned(init, bitOffset) and
+ instr = call
+ )
+}
+
+/**
+ * Holds if `addrOperand` is at offset `bitOffset` from the value of instruction `base`. The offset
+ * may be `unknown()`.
+ */
+private predicate hasBaseAndOffset(AddressOperand addrOperand, Instruction base, IntValue bitOffset) {
+ base = addrOperand.getDef() and bitOffset = 0 // Base case
+ or
+ exists(
+ Instruction middle, int previousBitOffset, Operand middleOperand, IntValue additionalBitOffset
+ |
+ // We already have an offset from `middle`.
+ hasBaseAndOffset(addrOperand, middle, previousBitOffset) and
+ // `middle` is propagated from `base`.
+ operandIsPropagatedIncludingByCall(middleOperand, additionalBitOffset, middle) and
+ base = middleOperand.getDef() and
+ bitOffset = Ints::add(previousBitOffset, additionalBitOffset)
+ )
+}
+
+/**
+ * Holds if `addrOperand` is at constant offset `bitOffset` from the value of instruction `base`.
+ * Only holds for the `base` with the longest chain of propagation to `addrOperand`.
+ */
+predicate addressOperandBaseAndConstantOffset(
+ AddressOperand addrOperand, Instruction base, int bitOffset
+) {
+ hasBaseAndOffset(addrOperand, base, bitOffset) and
+ Ints::hasValue(bitOffset) and
+ not exists(Instruction previousBase, int previousBitOffset |
+ hasBaseAndOffset(addrOperand, previousBase, previousBitOffset) and
+ previousBase = base.getAnOperand().getDef() and
+ Ints::hasValue(previousBitOffset)
+ )
+}
+
+/**
+ * Gets the allocation into which `addrOperand` points, if known.
+ */
+Configuration::Allocation getAddressOperandAllocation(AddressOperand addrOperand) {
+ addressOperandAllocationAndOffset(addrOperand, result, _)
+}
+
+/**
+ * Holds if `addrOperand` is at offset `bitOffset` from a base instruction of `allocation`. The
+ * offset may be `unknown()`.
+ */
+predicate addressOperandAllocationAndOffset(
+ AddressOperand addrOperand, Configuration::Allocation allocation, IntValue bitOffset
+) {
+ exists(Instruction base |
+ allocation.getABaseInstruction() = base and
+ hasBaseAndOffset(addrOperand, base, bitOffset) and
+ not exists(Instruction previousBase |
+ hasBaseAndOffset(addrOperand, pragma[only_bind_out](previousBase), _) and
+ previousBase = base.getAnOperand().getDef()
+ )
+ )
+}
+
+/**
+ * Predicates used only for printing annotated IR dumps. These should not be used in production
+ * queries.
+ */
+module Print {
+ string getOperandProperty(Operand operand, string key) {
+ key = "alloc" and
+ result =
+ strictconcat(Configuration::Allocation allocation, IntValue bitOffset |
+ addressOperandAllocationAndOffset(operand, allocation, bitOffset)
+ |
+ allocation.toString() + Ints::getBitOffsetString(bitOffset), ", "
+ )
+ or
+ key = "prop" and
+ result =
+ strictconcat(Instruction destInstr, IntValue bitOffset, string value |
+ operandIsPropagatedIncludingByCall(operand, bitOffset, destInstr) and
+ if destInstr = operand.getUse()
+ then value = "@" + Ints::getBitOffsetString(bitOffset) + "->result"
+ else value = "@" + Ints::getBitOffsetString(bitOffset) + "->" + destInstr.getResultId()
+ |
+ value, ", "
+ )
+ }
+
+ string getInstructionProperty(Instruction instr, string key) {
+ key = "prop" and
+ result =
+ strictconcat(IntValue bitOffset, Operand sourceOperand, string value |
+ operandIsPropagatedIncludingByCall(sourceOperand, bitOffset, instr) and
+ if instr = sourceOperand.getUse()
+ then value = sourceOperand.getDumpId() + Ints::getBitOffsetString(bitOffset) + "->@"
+ else
+ value =
+ sourceOperand.getUse().getResultId() + "." + sourceOperand.getDumpId() +
+ Ints::getBitOffsetString(bitOffset) + "->@"
+ |
+ value, ", "
+ )
+ }
+}
diff --git a/repo-tests/codeql/cpp/ql/lib/semmle/code/cpp/ir/implementation/aliased_ssa/internal/AliasAnalysisImports.qll b/repo-tests/codeql/cpp/ql/lib/semmle/code/cpp/ir/implementation/aliased_ssa/internal/AliasAnalysisImports.qll
new file mode 100644
index 00000000000..c4aeaf93cce
--- /dev/null
+++ b/repo-tests/codeql/cpp/ql/lib/semmle/code/cpp/ir/implementation/aliased_ssa/internal/AliasAnalysisImports.qll
@@ -0,0 +1,3 @@
+import semmle.code.cpp.ir.implementation.IRConfiguration
+import semmle.code.cpp.ir.internal.IntegerConstant as Ints
+import semmle.code.cpp.models.interfaces.Alias as AliasModels
diff --git a/repo-tests/codeql/cpp/ql/lib/semmle/code/cpp/ir/implementation/aliased_ssa/internal/AliasAnalysisInternal.qll b/repo-tests/codeql/cpp/ql/lib/semmle/code/cpp/ir/implementation/aliased_ssa/internal/AliasAnalysisInternal.qll
new file mode 100644
index 00000000000..8a407105080
--- /dev/null
+++ b/repo-tests/codeql/cpp/ql/lib/semmle/code/cpp/ir/implementation/aliased_ssa/internal/AliasAnalysisInternal.qll
@@ -0,0 +1,3 @@
+import semmle.code.cpp.ir.internal.IRCppLanguage as Language
+import semmle.code.cpp.ir.implementation.unaliased_ssa.IR as InputIR
+import AliasConfiguration as Configuration
diff --git a/repo-tests/codeql/cpp/ql/lib/semmle/code/cpp/ir/implementation/aliased_ssa/internal/AliasConfiguration.qll b/repo-tests/codeql/cpp/ql/lib/semmle/code/cpp/ir/implementation/aliased_ssa/internal/AliasConfiguration.qll
new file mode 100644
index 00000000000..8ba91d70087
--- /dev/null
+++ b/repo-tests/codeql/cpp/ql/lib/semmle/code/cpp/ir/implementation/aliased_ssa/internal/AliasConfiguration.qll
@@ -0,0 +1,146 @@
+private import AliasConfigurationInternal
+private import semmle.code.cpp.ir.implementation.unaliased_ssa.IR
+private import cpp
+private import AliasAnalysis
+private import semmle.code.cpp.ir.implementation.unaliased_ssa.internal.SimpleSSA as UnaliasedSSA
+
+private newtype TAllocation =
+ TVariableAllocation(IRVariable var) {
+ // Only model variables that were not already handled in unaliased SSA.
+ not UnaliasedSSA::canReuseSSAForVariable(var)
+ } or
+ TIndirectParameterAllocation(IRAutomaticVariable var) {
+ exists(InitializeIndirectionInstruction instr | instr.getIRVariable() = var)
+ } or
+ TDynamicAllocation(CallInstruction call) {
+ exists(InitializeDynamicAllocationInstruction instr | instr.getPrimaryInstruction() = call)
+ }
+
+/**
+ * A memory allocation that can be tracked by the AliasedSSA alias analysis.
+ */
+abstract class Allocation extends TAllocation {
+ abstract string toString();
+
+ final string getAllocationString() { result = toString() }
+
+ abstract Instruction getABaseInstruction();
+
+ abstract IRFunction getEnclosingIRFunction();
+
+ abstract Language::Location getLocation();
+
+ abstract string getUniqueId();
+
+ abstract IRType getIRType();
+
+ abstract predicate isReadOnly();
+
+ abstract predicate alwaysEscapes();
+
+ abstract predicate isAlwaysAllocatedOnStack();
+
+ final predicate isUnaliased() { not allocationEscapes(this) }
+}
+
+class VariableAllocation extends Allocation, TVariableAllocation {
+ IRVariable var;
+
+ VariableAllocation() { this = TVariableAllocation(var) }
+
+ final override string toString() { result = var.toString() }
+
+ final override VariableInstruction getABaseInstruction() {
+ result.getIRVariable() = var and
+ (result instanceof VariableAddressInstruction or result instanceof StringConstantInstruction)
+ }
+
+ final override IRFunction getEnclosingIRFunction() { result = var.getEnclosingIRFunction() }
+
+ final override Language::Location getLocation() { result = var.getLocation() }
+
+ final override string getUniqueId() { result = var.getUniqueId() }
+
+ final override IRType getIRType() { result = var.getIRType() }
+
+ final override predicate isReadOnly() { var.isReadOnly() }
+
+ final override predicate isAlwaysAllocatedOnStack() { var instanceof IRAutomaticVariable }
+
+ final override predicate alwaysEscapes() {
+ // All variables with static storage duration have their address escape, even when escape analysis
+ // is allowed to be unsound. Otherwise, we won't have a definition for any non-escaped global
+ // variable. Normally, we rely on `AliasedDefinition` to handle that.
+ not var instanceof IRAutomaticVariable
+ }
+
+ final IRVariable getIRVariable() { result = var }
+}
+
+class IndirectParameterAllocation extends Allocation, TIndirectParameterAllocation {
+ IRAutomaticVariable var;
+
+ IndirectParameterAllocation() { this = TIndirectParameterAllocation(var) }
+
+ final override string toString() { result = "*" + var.toString() }
+
+ final override InitializeParameterInstruction getABaseInstruction() {
+ result.getIRVariable() = var
+ }
+
+ final override IRFunction getEnclosingIRFunction() { result = var.getEnclosingIRFunction() }
+
+ final override Language::Location getLocation() { result = var.getLocation() }
+
+ final override string getUniqueId() { result = var.getUniqueId() }
+
+ final override IRType getIRType() { result instanceof IRUnknownType }
+
+ final override predicate isReadOnly() { none() }
+
+ final override predicate isAlwaysAllocatedOnStack() { none() }
+
+ final override predicate alwaysEscapes() { none() }
+}
+
+class DynamicAllocation extends Allocation, TDynamicAllocation {
+ CallInstruction call;
+
+ DynamicAllocation() { this = TDynamicAllocation(call) }
+
+ final override string toString() {
+ // This isn't performant, but it's only used in test/dump code right now.
+ // Dynamic allocations within a function are numbered in the order by start
+ // line number. This keeps them stable when the function moves within the
+ // file, or when non-allocating lines are added and removed within the
+ // function.
+ exists(int i |
+ result = "dynamic{" + i.toString() + "}" and
+ call =
+ rank[i](CallInstruction rangeCall |
+ exists(TDynamicAllocation(rangeCall)) and
+ rangeCall.getEnclosingIRFunction() = call.getEnclosingIRFunction()
+ |
+ rangeCall order by rangeCall.getLocation().getStartLine()
+ )
+ )
+ }
+
+ final override CallInstruction getABaseInstruction() { result = call }
+
+ final override IRFunction getEnclosingIRFunction() { result = call.getEnclosingIRFunction() }
+
+ final override Language::Location getLocation() { result = call.getLocation() }
+
+ final override string getUniqueId() { result = call.getUniqueId() }
+
+ final override IRType getIRType() { result instanceof IRUnknownType }
+
+ final override predicate isReadOnly() { none() }
+
+ final override predicate isAlwaysAllocatedOnStack() { none() }
+
+ final override predicate alwaysEscapes() { none() }
+}
+
+predicate phaseNeedsSoundEscapeAnalysis() { none() }
diff --git a/repo-tests/codeql/cpp/ql/lib/semmle/code/cpp/ir/implementation/aliased_ssa/internal/AliasConfigurationInternal.qll b/repo-tests/codeql/cpp/ql/lib/semmle/code/cpp/ir/implementation/aliased_ssa/internal/AliasConfigurationInternal.qll
new file mode 100644
index 00000000000..bd6c2f4c151
--- /dev/null
+++ b/repo-tests/codeql/cpp/ql/lib/semmle/code/cpp/ir/implementation/aliased_ssa/internal/AliasConfigurationInternal.qll
@@ -0,0 +1 @@
+import semmle.code.cpp.ir.internal.IRCppLanguage as Language
diff --git a/repo-tests/codeql/cpp/ql/lib/semmle/code/cpp/ir/implementation/aliased_ssa/internal/AliasedSSA.qll b/repo-tests/codeql/cpp/ql/lib/semmle/code/cpp/ir/implementation/aliased_ssa/internal/AliasedSSA.qll
new file mode 100644
index 00000000000..acdae2b758a
--- /dev/null
+++ b/repo-tests/codeql/cpp/ql/lib/semmle/code/cpp/ir/implementation/aliased_ssa/internal/AliasedSSA.qll
@@ -0,0 +1,650 @@
+import AliasAnalysis
+import semmle.code.cpp.ir.internal.Overlap
+private import semmle.code.cpp.ir.internal.IRCppLanguage as Language
+private import semmle.code.cpp.Print
+private import semmle.code.cpp.ir.implementation.unaliased_ssa.IR
+private import semmle.code.cpp.ir.implementation.unaliased_ssa.internal.SSAConstruction as OldSSA
+private import semmle.code.cpp.ir.internal.IntegerConstant as Ints
+private import semmle.code.cpp.ir.internal.IntegerInterval as Interval
+private import semmle.code.cpp.ir.implementation.internal.OperandTag
+private import AliasConfiguration
+
+private class IntValue = Ints::IntValue;
+
+private predicate isIndirectOrBufferMemoryAccess(MemoryAccessKind kind) {
+ kind instanceof IndirectMemoryAccess or
+ kind instanceof BufferMemoryAccess
+}
+
+private predicate hasResultMemoryAccess(
+ Instruction instr, Allocation var, IRType type, Language::LanguageType languageType,
+ IntValue startBitOffset, IntValue endBitOffset, boolean isMayAccess
+) {
+ exists(AddressOperand addrOperand |
+ addrOperand = instr.getResultAddressOperand() and
+ addressOperandAllocationAndOffset(addrOperand, var, startBitOffset) and
+ languageType = instr.getResultLanguageType() and
+ type = languageType.getIRType() and
+ isIndirectOrBufferMemoryAccess(instr.getResultMemoryAccess()) and
+ (if instr.hasResultMayMemoryAccess() then isMayAccess = true else isMayAccess = false) and
+ if exists(type.getByteSize())
+ then endBitOffset = Ints::add(startBitOffset, Ints::mul(type.getByteSize(), 8))
+ else endBitOffset = Ints::unknown()
+ )
+}
+
+private predicate hasOperandMemoryAccess(
+ MemoryOperand operand, Allocation var, IRType type, Language::LanguageType languageType,
+ IntValue startBitOffset, IntValue endBitOffset, boolean isMayAccess
+) {
+ exists(AddressOperand addrOperand |
+ addrOperand = operand.getAddressOperand() and
+ addressOperandAllocationAndOffset(addrOperand, var, startBitOffset) and
+ languageType = operand.getLanguageType() and
+ type = languageType.getIRType() and
+ isIndirectOrBufferMemoryAccess(operand.getMemoryAccess()) and
+ (if operand.hasMayReadMemoryAccess() then isMayAccess = true else isMayAccess = false) and
+ if exists(type.getByteSize())
+ then endBitOffset = Ints::add(startBitOffset, Ints::mul(type.getByteSize(), 8))
+ else endBitOffset = Ints::unknown()
+ )
+}
+
+private newtype TMemoryLocation =
+ TVariableMemoryLocation(
+ Allocation var, IRType type, Language::LanguageType languageType, IntValue startBitOffset,
+ IntValue endBitOffset, boolean isMayAccess
+ ) {
+ (
+ hasResultMemoryAccess(_, var, type, _, startBitOffset, endBitOffset, isMayAccess)
+ or
+ hasOperandMemoryAccess(_, var, type, _, startBitOffset, endBitOffset, isMayAccess)
+ or
+ // For a stack variable, always create a memory location for the entire variable.
+ var.isAlwaysAllocatedOnStack() and
+ type = var.getIRType() and
+ startBitOffset = 0 and
+ endBitOffset = type.getByteSize() * 8 and
+ isMayAccess = false
+ ) and
+ languageType = type.getCanonicalLanguageType()
+ } or
+ TEntireAllocationMemoryLocation(Allocation var, boolean isMayAccess) {
+ (
+ var instanceof IndirectParameterAllocation or
+ var instanceof DynamicAllocation
+ ) and
+ (isMayAccess = false or isMayAccess = true)
+ } or
+ TUnknownMemoryLocation(IRFunction irFunc, boolean isMayAccess) {
+ isMayAccess = false or isMayAccess = true
+ } or
+ TAllNonLocalMemory(IRFunction irFunc, boolean isMayAccess) {
+ isMayAccess = false or isMayAccess = true
+ } or
+ TAllAliasedMemory(IRFunction irFunc, boolean isMayAccess) {
+ isMayAccess = false or isMayAccess = true
+ }
+
+/**
+ * Represents the memory location accessed by a memory operand or memory result. In this implementation, the location is
+ * one of the following:
+ * - `VariableMemoryLocation` - A location within a known `IRVariable`, at an offset that is either a constant or is
+ * unknown.
+ * - `UnknownMemoryLocation` - A location not known to be within a specific `IRVariable`.
+ */
+abstract class MemoryLocation extends TMemoryLocation {
+ final string toString() {
+ if isMayAccess() then result = "?" + toStringInternal() else result = toStringInternal()
+ }
+
+ abstract string toStringInternal();
+
+ abstract VirtualVariable getVirtualVariable();
+
+ abstract Language::LanguageType getType();
+
+ abstract string getUniqueId();
+
+ abstract IRFunction getIRFunction();
+
+ abstract Location getLocation();
+
+ final IRType getIRType() { result = getType().getIRType() }
+
+ abstract predicate isMayAccess();
+
+ Allocation getAllocation() { none() }
+
+ /**
+ * Holds if the location cannot be overwritten except by definition of a `MemoryLocation` for
+ * which `def.canDefineReadOnly()` holds.
+ */
+ predicate isReadOnly() { none() }
+
+ /**
+ * Holds if a definition of this location can be the definition of a read-only use location.
+ */
+ predicate canDefineReadOnly() { none() }
+
+ /**
+ * Holds if the location always represents memory allocated on the stack (for example, a variable
+ * with automatic storage duration).
+ */
+ predicate isAlwaysAllocatedOnStack() { none() }
+
+ final predicate canReuseSSA() { none() }
+}
+
+/**
+ * Represents a set of `MemoryLocation`s that cannot overlap with
+ * `MemoryLocation`s outside of the set. The `VirtualVariable` will be
+ * represented by a `MemoryLocation` that totally overlaps all other
+ * `MemoryLocations` in the set.
+ */
+abstract class VirtualVariable extends MemoryLocation { }
+
+abstract class AllocationMemoryLocation extends MemoryLocation {
+ Allocation var;
+ boolean isMayAccess;
+
+ AllocationMemoryLocation() {
+ this instanceof TMemoryLocation and
+ isMayAccess = false
+ or
+ isMayAccess = true // Just ensures that `isMayAccess` is bound.
+ }
+
+ final override VirtualVariable getVirtualVariable() {
+ if allocationEscapes(var)
+ then result = TAllAliasedMemory(var.getEnclosingIRFunction(), false)
+ else result.(AllocationMemoryLocation).getAllocation() = var
+ }
+
+ final override IRFunction getIRFunction() { result = var.getEnclosingIRFunction() }
+
+ final override Location getLocation() { result = var.getLocation() }
+
+ final override Allocation getAllocation() { result = var }
+
+ final override predicate isMayAccess() { isMayAccess = true }
+
+ final override predicate isReadOnly() { var.isReadOnly() }
+}
+
+/**
+ * An access to memory within a single known `IRVariable`. The variable may be either an unescaped variable
+ * (with its own `VirtualIRVariable`) or an escaped variable (assigned to `UnknownVirtualVariable`).
+ */
+class VariableMemoryLocation extends TVariableMemoryLocation, AllocationMemoryLocation {
+ IRType type;
+ Language::LanguageType languageType;
+ IntValue startBitOffset;
+ IntValue endBitOffset;
+
+ VariableMemoryLocation() {
+ this =
+ TVariableMemoryLocation(var, type, languageType, startBitOffset, endBitOffset, isMayAccess)
+ }
+
+ private string getIntervalString() {
+ if coversEntireVariable()
+ then result = ""
+ else result = Interval::getIntervalString(startBitOffset, endBitOffset)
+ }
+
+ private string getTypeString() {
+ if coversEntireVariable() and type = var.getIRType()
+ then result = ""
+ else result = "<" + languageType.toString() + ">"
+ }
+
+ final override string toStringInternal() {
+ result = var.toString() + getIntervalString() + getTypeString()
+ }
+
+ final override Language::LanguageType getType() {
+ if
+ strictcount(Language::LanguageType accessType |
+ hasResultMemoryAccess(_, var, type, accessType, startBitOffset, endBitOffset, _) or
+ hasOperandMemoryAccess(_, var, type, accessType, startBitOffset, endBitOffset, _)
+ ) = 1
+ then
+ // All of the accesses have the same `LanguageType`, so just use that.
+ hasResultMemoryAccess(_, var, type, result, startBitOffset, endBitOffset, _) or
+ hasOperandMemoryAccess(_, var, type, result, startBitOffset, endBitOffset, _)
+ else
+ // There is no single type for all accesses, so just use the canonical one for this `IRType`.
+ result = type.getCanonicalLanguageType()
+ }
+
+ final IntValue getStartBitOffset() { result = startBitOffset }
+
+ final IntValue getEndBitOffset() { result = endBitOffset }
+
+ final override string getUniqueId() {
+ result =
+ var.getUniqueId() + Interval::getIntervalString(startBitOffset, endBitOffset) + "<" +
+ type.getIdentityString() + ">"
+ }
+
+ final override predicate isAlwaysAllocatedOnStack() { var.isAlwaysAllocatedOnStack() }
+
+ /**
+ * Holds if this memory location covers the entire variable.
+ */
+ final predicate coversEntireVariable() { varIRTypeHasBitRange(startBitOffset, endBitOffset) }
+
+ pragma[noinline]
+ private predicate varIRTypeHasBitRange(int start, int end) {
+ start = 0 and
+ end = var.getIRType().getByteSize() * 8
+ }
+}
+
+class EntireAllocationMemoryLocation extends TEntireAllocationMemoryLocation,
+ AllocationMemoryLocation {
+ EntireAllocationMemoryLocation() { this = TEntireAllocationMemoryLocation(var, isMayAccess) }
+
+ final override string toStringInternal() { result = var.toString() }
+
+ final override Language::LanguageType getType() {
+ result = any(IRUnknownType unknownType).getCanonicalLanguageType()
+ }
+
+ final override string getUniqueId() { result = var.getUniqueId() }
+}
+
+class EntireAllocationVirtualVariable extends EntireAllocationMemoryLocation, VirtualVariable {
+ EntireAllocationVirtualVariable() {
+ not allocationEscapes(var) and
+ not isMayAccess()
+ }
+}
+
+/**
+ * Represents the `MemoryLocation` for an `IRVariable` that acts as its own `VirtualVariable`. Includes any
+ * `VariableMemoryLocation` that exactly overlaps its entire `IRVariable`, and only if that `IRVariable` does not
+ * escape.
+ */
+class VariableVirtualVariable extends VariableMemoryLocation, VirtualVariable {
+ VariableVirtualVariable() {
+ not allocationEscapes(var) and
+ type = var.getIRType() and
+ coversEntireVariable() and
+ not isMayAccess()
+ }
+}
+
+/**
+ * An access to memory that is not known to be confined to a specific `IRVariable`.
+ */
+class UnknownMemoryLocation extends TUnknownMemoryLocation, MemoryLocation {
+ IRFunction irFunc;
+ boolean isMayAccess;
+
+ UnknownMemoryLocation() { this = TUnknownMemoryLocation(irFunc, isMayAccess) }
+
+ final override string toStringInternal() { result = "{Unknown}" }
+
+ final override VirtualVariable getVirtualVariable() { result = TAllAliasedMemory(irFunc, false) }
+
+ final override Language::LanguageType getType() {
+ result = any(IRUnknownType type).getCanonicalLanguageType()
+ }
+
+ final override IRFunction getIRFunction() { result = irFunc }
+
+ final override Location getLocation() { result = irFunc.getLocation() }
+
+ final override string getUniqueId() { result = "{Unknown}" }
+
+ final override predicate isMayAccess() { isMayAccess = true }
+}
+
+/**
+ * An access to memory that is not known to be confined to a specific `IRVariable`, but is known to
+ * not access memory on the current function's stack frame.
+ */
+class AllNonLocalMemory extends TAllNonLocalMemory, MemoryLocation {
+ IRFunction irFunc;
+ boolean isMayAccess;
+
+ AllNonLocalMemory() { this = TAllNonLocalMemory(irFunc, isMayAccess) }
+
+ final override string toStringInternal() { result = "{AllNonLocal}" }
+
+ final override AliasedVirtualVariable getVirtualVariable() { result.getIRFunction() = irFunc }
+
+ final override Language::LanguageType getType() {
+ result = any(IRUnknownType type).getCanonicalLanguageType()
+ }
+
+ final override IRFunction getIRFunction() { result = irFunc }
+
+ final override Location getLocation() { result = irFunc.getLocation() }
+
+ final override string getUniqueId() { result = "{AllNonLocal}" }
+
+ final override predicate isMayAccess() { isMayAccess = true }
+
+ override predicate canDefineReadOnly() {
+ // A "must" access that defines all non-local memory appears only on the `InitializeNonLocal`
+ // instruction, which provides the initial definition for all memory outside of the current
+ // function's stack frame. This memory includes string literals and other read-only globals, so
+ // we allow such an access to be the definition for a use of a read-only location.
+ not isMayAccess()
+ }
+}
+
+/**
+ * An access to all aliased memory.
+ */
+class AllAliasedMemory extends TAllAliasedMemory, MemoryLocation {
+ IRFunction irFunc;
+ boolean isMayAccess;
+
+ AllAliasedMemory() { this = TAllAliasedMemory(irFunc, isMayAccess) }
+
+ final override string toStringInternal() { result = "{AllAliased}" }
+
+ final override Language::LanguageType getType() {
+ result = any(IRUnknownType type).getCanonicalLanguageType()
+ }
+
+ final override IRFunction getIRFunction() { result = irFunc }
+
+ final override Location getLocation() { result = irFunc.getLocation() }
+
+ final override string getUniqueId() { result = " " + toString() }
+
+ final override VirtualVariable getVirtualVariable() { result = TAllAliasedMemory(irFunc, false) }
+
+ final override predicate isMayAccess() { isMayAccess = true }
+}
+
+/** A virtual variable that groups all escaped memory within a function. */
+class AliasedVirtualVariable extends AllAliasedMemory, VirtualVariable {
+ AliasedVirtualVariable() { not isMayAccess() }
+}
+
+/**
+ * Gets the overlap relationship between the definition location `def` and the use location `use`.
+ */
+Overlap getOverlap(MemoryLocation def, MemoryLocation use) {
+ exists(Overlap overlap |
+ // Compute the overlap based only on the extent.
+ overlap = getExtentOverlap(def, use) and
+ // Filter out attempts to write to read-only memory.
+ (def.canDefineReadOnly() or not use.isReadOnly()) and
+ if def.isMayAccess()
+ then
+ // No matter what kind of extent overlap we have, the final overlap is still
+ // `MayPartiallyOverlap`, because the def might not have written all of the bits of the use
+ // location.
+ result instanceof MayPartiallyOverlap
+ else
+ if
+ overlap instanceof MustExactlyOverlap and
+ (use.isMayAccess() or not def.getIRType() = use.getIRType())
+ then
+ // Can't exactly overlap with a "may" use or a use of a different type.
+ result instanceof MustTotallyOverlap
+ else result = overlap
+ )
+}
+
+/**
+ * Gets the overlap relationship between the definition location `def` and the use location `use`,
+ * based only on the set of memory locations accessed. Handling of "may" accesses and read-only
+ * locations occurs in `getOverlap()`.
+ */
+private Overlap getExtentOverlap(MemoryLocation def, MemoryLocation use) {
+ // The def and the use must have the same virtual variable, or no overlap is possible.
+ (
+ // AllAliasedMemory must totally overlap any location within the same virtual variable.
+ def.getVirtualVariable() = use.getVirtualVariable() and
+ def instanceof AllAliasedMemory and
+ result instanceof MustTotallyOverlap
+ or
+ // An UnknownMemoryLocation may partially overlap any Location within the same virtual variable,
+ // even itself.
+ def.getVirtualVariable() = use.getVirtualVariable() and
+ def instanceof UnknownMemoryLocation and
+ result instanceof MayPartiallyOverlap
+ or
+ def.getVirtualVariable() = use.getVirtualVariable() and
+ def instanceof AllNonLocalMemory and
+ (
+ // AllNonLocalMemory exactly overlaps itself.
+ use instanceof AllNonLocalMemory and
+ result instanceof MustExactlyOverlap
+ or
+ not use instanceof AllNonLocalMemory and
+ not use.isAlwaysAllocatedOnStack() and
+ if use instanceof VariableMemoryLocation
+ then
+ // AllNonLocalMemory totally overlaps any non-local variable.
+ result instanceof MustTotallyOverlap
+ else
+ // AllNonLocalMemory may partially overlap any other location within the same virtual
+ // variable, except a stack variable.
+ result instanceof MayPartiallyOverlap
+ )
+ or
+ def.getVirtualVariable() = use.getVirtualVariable() and
+ def instanceof EntireAllocationMemoryLocation and
+ (
+ // EntireAllocationMemoryLocation exactly overlaps itself.
+ use instanceof EntireAllocationMemoryLocation and
+ result instanceof MustExactlyOverlap
+ or
+ not use instanceof EntireAllocationMemoryLocation and
+ if def.getAllocation() = use.getAllocation()
+ then
+ // EntireAllocationMemoryLocation totally overlaps any location within
+ // the same allocation.
+ result instanceof MustTotallyOverlap
+ else (
+ // There is no overlap with a location that's known to belong to a
+ // different allocation, but all other locations may partially overlap.
+ not exists(use.getAllocation()) and
+ result instanceof MayPartiallyOverlap
+ )
+ )
+ or
+ exists(VariableMemoryLocation defVariableLocation |
+ defVariableLocation = def and
+ (
+ // A VariableMemoryLocation may partially overlap an unknown location within the same
+ // virtual variable.
+ def.getVirtualVariable() = use.getVirtualVariable() and
+ (
+ use instanceof UnknownMemoryLocation or
+ use instanceof AllAliasedMemory or
+ use instanceof EntireAllocationMemoryLocation
+ ) and
+ result instanceof MayPartiallyOverlap
+ or
+ // A VariableMemoryLocation that is not a local variable may partially overlap an
+ // AllNonLocalMemory within the same virtual variable.
+ def.getVirtualVariable() = use.getVirtualVariable() and
+ use instanceof AllNonLocalMemory and
+ result instanceof MayPartiallyOverlap and
+ not defVariableLocation.isAlwaysAllocatedOnStack()
+ or
+ // A VariableMemoryLocation overlaps another location within the same variable based on the
+ // relationship of the two offset intervals.
+ exists(Overlap intervalOverlap |
+ intervalOverlap = getVariableMemoryLocationOverlap(def, use) and
+ if intervalOverlap instanceof MustExactlyOverlap
+ then result instanceof MustExactlyOverlap
+ else
+ if defVariableLocation.coversEntireVariable()
+ then
+ // The definition covers the entire variable, so assume that it totally overlaps the use, even if the
+ // interval for the use is unknown or outside the bounds of the variable.
+ result instanceof MustTotallyOverlap
+ else
+ // Just use the overlap relation of the interval.
+ result = intervalOverlap
+ )
+ )
+ )
+ )
+}
+
+/*
+ * The following predicates compute the overlap relation between `VariableMemoryLocation`s in the
+ * same `VirtualVariable` as follows:
+ * 1. In `isRelevantOffset`, compute the set of offsets within each virtual variable (linear in
+ * the number of VMLs)
+ * 2. In `isCoveredOffset`, rank the offsets within each virtual variable (linear in the number
+ * of VMLs)
+ * 3. In `isCoveredOffset`, compute the set of ranks that each VML with known start and end
+ * offsets covers (linear in the size of the overlap set)
+ * 4. In `overlappingVariableMemoryLocations`, compute the set of overlapping pairs of VMLs using a
+ * join on `isCoveredOffset` (linear in the size of the overlap set)
+ * 5. In `overlappingIRVariableMemoryLocations`, restrict to only the pairs that share an
+ * `IRVariable` (linear in the size of the overlap set)
+ * 5. In `getVariableMemoryLocationOverlap`, compute the precise overlap relation for each
+ * overlapping pair of VMLs (linear in the size of the overlap set)
+ */
+
+private predicate isRelevantOffset(VirtualVariable vv, IntValue offset) {
+ exists(VariableMemoryLocation ml | ml.getVirtualVariable() = vv |
+ ml.getStartBitOffset() = offset
+ or
+ ml.getEndBitOffset() = offset
+ )
+}
+
+private predicate isRelatableMemoryLocation(VariableMemoryLocation vml) {
+ vml.getEndBitOffset() != Ints::unknown() and
+ vml.getStartBitOffset() != Ints::unknown()
+}
+
+private predicate isCoveredOffset(Allocation var, int offsetRank, VariableMemoryLocation vml) {
+ exists(int startRank, int endRank, VirtualVariable vvar |
+ vml.getStartBitOffset() = rank[startRank](IntValue offset_ | isRelevantOffset(vvar, offset_)) and
+ vml.getEndBitOffset() = rank[endRank](IntValue offset_ | isRelevantOffset(vvar, offset_)) and
+ var = vml.getAllocation() and
+ vvar = vml.getVirtualVariable() and
+ isRelatableMemoryLocation(vml) and
+ offsetRank in [startRank .. endRank]
+ )
+}
+
+private predicate hasUnknownOffset(Allocation var, VariableMemoryLocation vml) {
+ vml.getAllocation() = var and
+ (
+ vml.getStartBitOffset() = Ints::unknown() or
+ vml.getEndBitOffset() = Ints::unknown()
+ )
+}
+
+private predicate overlappingIRVariableMemoryLocations(
+ VariableMemoryLocation def, VariableMemoryLocation use
+) {
+ exists(Allocation var, int offsetRank |
+ isCoveredOffset(var, offsetRank, def) and
+ isCoveredOffset(var, offsetRank, use)
+ )
+ or
+ hasUnknownOffset(use.getAllocation(), def)
+ or
+ hasUnknownOffset(def.getAllocation(), use)
+}
+
+private Overlap getVariableMemoryLocationOverlap(
+ VariableMemoryLocation def, VariableMemoryLocation use
+) {
+ overlappingIRVariableMemoryLocations(def, use) and
+ result =
+ Interval::getOverlap(def.getStartBitOffset(), def.getEndBitOffset(), use.getStartBitOffset(),
+ use.getEndBitOffset())
+}
+
+/**
+ * Holds if the def/use information for the result of `instr` can be reused from the previous
+ * iteration of the IR.
+ */
+predicate canReuseSSAForOldResult(Instruction instr) { OldSSA::canReuseSSAForMemoryResult(instr) }
+
+bindingset[result, b]
+private boolean unbindBool(boolean b) { result != b.booleanNot() }
+
+MemoryLocation getResultMemoryLocation(Instruction instr) {
+ not canReuseSSAForOldResult(instr) and
+ exists(MemoryAccessKind kind, boolean isMayAccess |
+ kind = instr.getResultMemoryAccess() and
+ (if instr.hasResultMayMemoryAccess() then isMayAccess = true else isMayAccess = false) and
+ (
+ (
+ isIndirectOrBufferMemoryAccess(kind) and
+ if hasResultMemoryAccess(instr, _, _, _, _, _, _)
+ then
+ exists(Allocation var, IRType type, IntValue startBitOffset, IntValue endBitOffset |
+ hasResultMemoryAccess(instr, var, type, _, startBitOffset, endBitOffset, isMayAccess) and
+ result =
+ TVariableMemoryLocation(var, type, _, startBitOffset, endBitOffset,
+ unbindBool(isMayAccess))
+ )
+ else result = TUnknownMemoryLocation(instr.getEnclosingIRFunction(), isMayAccess)
+ )
+ or
+ kind instanceof EntireAllocationMemoryAccess and
+ result =
+ TEntireAllocationMemoryLocation(getAddressOperandAllocation(instr.getResultAddressOperand()),
+ unbindBool(isMayAccess))
+ or
+ kind instanceof EscapedMemoryAccess and
+ result = TAllAliasedMemory(instr.getEnclosingIRFunction(), isMayAccess)
+ or
+ kind instanceof NonLocalMemoryAccess and
+ result = TAllNonLocalMemory(instr.getEnclosingIRFunction(), isMayAccess)
+ )
+ )
+}
+
+MemoryLocation getOperandMemoryLocation(MemoryOperand operand) {
+ not canReuseSSAForOldResult(operand.getAnyDef()) and
+ exists(MemoryAccessKind kind, boolean isMayAccess |
+ kind = operand.getMemoryAccess() and
+ (if operand.hasMayReadMemoryAccess() then isMayAccess = true else isMayAccess = false) and
+ (
+ (
+ isIndirectOrBufferMemoryAccess(kind) and
+ if hasOperandMemoryAccess(operand, _, _, _, _, _, _)
+ then
+ exists(Allocation var, IRType type, IntValue startBitOffset, IntValue endBitOffset |
+ hasOperandMemoryAccess(operand, var, type, _, startBitOffset, endBitOffset, isMayAccess) and
+ result =
+ TVariableMemoryLocation(var, type, _, startBitOffset, endBitOffset, isMayAccess)
+ )
+ else result = TUnknownMemoryLocation(operand.getEnclosingIRFunction(), isMayAccess)
+ )
+ or
+ kind instanceof EntireAllocationMemoryAccess and
+ result =
+ TEntireAllocationMemoryLocation(getAddressOperandAllocation(operand.getAddressOperand()),
+ isMayAccess)
+ or
+ kind instanceof EscapedMemoryAccess and
+ result = TAllAliasedMemory(operand.getEnclosingIRFunction(), isMayAccess)
+ or
+ kind instanceof NonLocalMemoryAccess and
+ result = TAllNonLocalMemory(operand.getEnclosingIRFunction(), isMayAccess)
+ )
+ )
+}
+
+/** Gets the start bit offset of a `MemoryLocation`, if any. */
+int getStartBitOffset(VariableMemoryLocation location) {
+ result = location.getStartBitOffset() and Ints::hasValue(result)
+}
+
+/** Gets the end bit offset of a `MemoryLocation`, if any. */
+int getEndBitOffset(VariableMemoryLocation location) {
+ result = location.getEndBitOffset() and Ints::hasValue(result)
+}
diff --git a/repo-tests/codeql/cpp/ql/lib/semmle/code/cpp/ir/implementation/aliased_ssa/internal/IRBlockImports.qll b/repo-tests/codeql/cpp/ql/lib/semmle/code/cpp/ir/implementation/aliased_ssa/internal/IRBlockImports.qll
new file mode 100644
index 00000000000..d1b46ed35c8
--- /dev/null
+++ b/repo-tests/codeql/cpp/ql/lib/semmle/code/cpp/ir/implementation/aliased_ssa/internal/IRBlockImports.qll
@@ -0,0 +1 @@
+import semmle.code.cpp.ir.implementation.EdgeKind as EdgeKind
diff --git a/repo-tests/codeql/cpp/ql/lib/semmle/code/cpp/ir/implementation/aliased_ssa/internal/IRFunctionImports.qll b/repo-tests/codeql/cpp/ql/lib/semmle/code/cpp/ir/implementation/aliased_ssa/internal/IRFunctionImports.qll
new file mode 100644
index 00000000000..8ec63b7c1cb
--- /dev/null
+++ b/repo-tests/codeql/cpp/ql/lib/semmle/code/cpp/ir/implementation/aliased_ssa/internal/IRFunctionImports.qll
@@ -0,0 +1 @@
+import semmle.code.cpp.ir.implementation.internal.IRFunctionBase as IRFunctionBase
diff --git a/repo-tests/codeql/cpp/ql/lib/semmle/code/cpp/ir/implementation/aliased_ssa/internal/IRImports.qll b/repo-tests/codeql/cpp/ql/lib/semmle/code/cpp/ir/implementation/aliased_ssa/internal/IRImports.qll
new file mode 100644
index 00000000000..42d6e7db693
--- /dev/null
+++ b/repo-tests/codeql/cpp/ql/lib/semmle/code/cpp/ir/implementation/aliased_ssa/internal/IRImports.qll
@@ -0,0 +1,3 @@
+import semmle.code.cpp.ir.implementation.EdgeKind as EdgeKind
+import semmle.code.cpp.ir.implementation.IRType as IRType
+import semmle.code.cpp.ir.implementation.MemoryAccessKind as MemoryAccessKind
diff --git a/repo-tests/codeql/cpp/ql/lib/semmle/code/cpp/ir/implementation/aliased_ssa/internal/IRInternal.qll b/repo-tests/codeql/cpp/ql/lib/semmle/code/cpp/ir/implementation/aliased_ssa/internal/IRInternal.qll
new file mode 100644
index 00000000000..3a7a08accc0
--- /dev/null
+++ b/repo-tests/codeql/cpp/ql/lib/semmle/code/cpp/ir/implementation/aliased_ssa/internal/IRInternal.qll
@@ -0,0 +1,4 @@
+import semmle.code.cpp.ir.internal.IRCppLanguage as Language
+import SSAConstruction as Construction
+import semmle.code.cpp.ir.implementation.IRConfiguration as IRConfiguration
+import semmle.code.cpp.ir.implementation.raw.internal.IRConstruction::Raw as Raw
diff --git a/repo-tests/codeql/cpp/ql/lib/semmle/code/cpp/ir/implementation/aliased_ssa/internal/IRVariableImports.qll b/repo-tests/codeql/cpp/ql/lib/semmle/code/cpp/ir/implementation/aliased_ssa/internal/IRVariableImports.qll
new file mode 100644
index 00000000000..8c60565defc
--- /dev/null
+++ b/repo-tests/codeql/cpp/ql/lib/semmle/code/cpp/ir/implementation/aliased_ssa/internal/IRVariableImports.qll
@@ -0,0 +1,5 @@
+import semmle.code.cpp.ir.implementation.IRType as IRType
+import semmle.code.cpp.ir.implementation.TempVariableTag as TempVariableTag
+import semmle.code.cpp.ir.internal.IRUtilities as IRUtilities
+import semmle.code.cpp.ir.internal.TempVariableTag as TTempVariableTag
+import semmle.code.cpp.ir.implementation.internal.TIRVariable as TIRVariable
diff --git a/repo-tests/codeql/cpp/ql/lib/semmle/code/cpp/ir/implementation/aliased_ssa/internal/InstructionImports.qll b/repo-tests/codeql/cpp/ql/lib/semmle/code/cpp/ir/implementation/aliased_ssa/internal/InstructionImports.qll
new file mode 100644
index 00000000000..946fd770e94
--- /dev/null
+++ b/repo-tests/codeql/cpp/ql/lib/semmle/code/cpp/ir/implementation/aliased_ssa/internal/InstructionImports.qll
@@ -0,0 +1,6 @@
+import semmle.code.cpp.ir.implementation.EdgeKind as EdgeKind
+import semmle.code.cpp.ir.implementation.IRType as IRType
+import semmle.code.cpp.ir.implementation.MemoryAccessKind as MemoryAccessKind
+import semmle.code.cpp.ir.implementation.Opcode as Opcode
+import semmle.code.cpp.ir.implementation.internal.OperandTag as OperandTag
+import semmle.code.cpp.ir.internal.Overlap as Overlap
diff --git a/repo-tests/codeql/cpp/ql/lib/semmle/code/cpp/ir/implementation/aliased_ssa/internal/OperandImports.qll b/repo-tests/codeql/cpp/ql/lib/semmle/code/cpp/ir/implementation/aliased_ssa/internal/OperandImports.qll
new file mode 100644
index 00000000000..d0e013d1fba
--- /dev/null
+++ b/repo-tests/codeql/cpp/ql/lib/semmle/code/cpp/ir/implementation/aliased_ssa/internal/OperandImports.qll
@@ -0,0 +1,5 @@
+import semmle.code.cpp.ir.implementation.MemoryAccessKind as MemoryAccessKind
+import semmle.code.cpp.ir.implementation.IRType as IRType
+import semmle.code.cpp.ir.internal.Overlap as Overlap
+import semmle.code.cpp.ir.implementation.internal.OperandTag as OperandTag
+import semmle.code.cpp.ir.implementation.internal.TOperand as TOperand
diff --git a/repo-tests/codeql/cpp/ql/lib/semmle/code/cpp/ir/implementation/aliased_ssa/internal/OperandInternal.qll b/repo-tests/codeql/cpp/ql/lib/semmle/code/cpp/ir/implementation/aliased_ssa/internal/OperandInternal.qll
new file mode 100644
index 00000000000..b47c20e97ef
--- /dev/null
+++ b/repo-tests/codeql/cpp/ql/lib/semmle/code/cpp/ir/implementation/aliased_ssa/internal/OperandInternal.qll
@@ -0,0 +1,2 @@
+private import semmle.code.cpp.ir.implementation.internal.TOperand
+import AliasedSSAOperands
diff --git a/repo-tests/codeql/cpp/ql/lib/semmle/code/cpp/ir/implementation/aliased_ssa/internal/PrintAliasAnalysis.qll b/repo-tests/codeql/cpp/ql/lib/semmle/code/cpp/ir/implementation/aliased_ssa/internal/PrintAliasAnalysis.qll
new file mode 100644
index 00000000000..262088245e8
--- /dev/null
+++ b/repo-tests/codeql/cpp/ql/lib/semmle/code/cpp/ir/implementation/aliased_ssa/internal/PrintAliasAnalysis.qll
@@ -0,0 +1,19 @@
+/**
+ * Include this module to annotate IR dumps with information computed by `AliasAnalysis.qll`.
+ */
+
+private import AliasAnalysisInternal
+private import InputIR
+private import AliasAnalysisImports
+private import AliasAnalysis
+private import semmle.code.cpp.ir.internal.IntegerConstant
+
+private class AliasPropertyProvider extends IRPropertyProvider {
+ override string getOperandProperty(Operand operand, string key) {
+ result = Print::getOperandProperty(operand, key)
+ }
+
+ override string getInstructionProperty(Instruction instr, string key) {
+ result = Print::getInstructionProperty(instr, key)
+ }
+}
diff --git a/repo-tests/codeql/cpp/ql/lib/semmle/code/cpp/ir/implementation/aliased_ssa/internal/PrintIRImports.qll b/repo-tests/codeql/cpp/ql/lib/semmle/code/cpp/ir/implementation/aliased_ssa/internal/PrintIRImports.qll
new file mode 100644
index 00000000000..46254a6e3f2
--- /dev/null
+++ b/repo-tests/codeql/cpp/ql/lib/semmle/code/cpp/ir/implementation/aliased_ssa/internal/PrintIRImports.qll
@@ -0,0 +1 @@
+import semmle.code.cpp.ir.IRConfiguration as IRConfiguration
diff --git a/repo-tests/codeql/cpp/ql/lib/semmle/code/cpp/ir/implementation/aliased_ssa/internal/PrintSSA.qll b/repo-tests/codeql/cpp/ql/lib/semmle/code/cpp/ir/implementation/aliased_ssa/internal/PrintSSA.qll
new file mode 100644
index 00000000000..72bb239c153
--- /dev/null
+++ b/repo-tests/codeql/cpp/ql/lib/semmle/code/cpp/ir/implementation/aliased_ssa/internal/PrintSSA.qll
@@ -0,0 +1,157 @@
+private import SSAConstructionInternal
+private import OldIR
+private import Alias
+private import SSAConstruction
+private import DebugSSA
+
+bindingset[offset]
+private string getKeySuffixForOffset(int offset) {
+ offset >= 0 and
+ if offset % 2 = 0 then result = "" else result = "_Chi"
+}
+
+bindingset[offset]
+private int getIndexForOffset(int offset) { offset >= 0 and result = offset / 2 }
+
+/**
+ * Property provide that dumps the memory access of each result. Useful for debugging SSA
+ * construction.
+ */
+class PropertyProvider extends IRPropertyProvider {
+ override string getInstructionProperty(Instruction instruction, string key) {
+ key = "ResultMemoryLocation" and
+ result =
+ strictconcat(MemoryLocation loc |
+ loc = getResultMemoryLocation(instruction)
+ |
+ loc.toString(), ","
+ )
+ or
+ key = "ResultVirtualVariable" and
+ result =
+ strictconcat(MemoryLocation loc |
+ loc = getResultMemoryLocation(instruction)
+ |
+ loc.getVirtualVariable().toString(), ","
+ )
+ or
+ key = "OperandMemoryLocation" and
+ result =
+ strictconcat(MemoryLocation loc |
+ loc = getOperandMemoryLocation(instruction.getAnOperand())
+ |
+ loc.toString(), ","
+ )
+ or
+ key = "OperandVirtualVariable" and
+ result =
+ strictconcat(MemoryLocation loc |
+ loc = getOperandMemoryLocation(instruction.getAnOperand())
+ |
+ loc.getVirtualVariable().toString(), ","
+ )
+ or
+ exists(MemoryLocation useLocation, IRBlock defBlock, int defRank, int defOffset |
+ hasDefinitionAtRank(useLocation, _, defBlock, defRank, defOffset) and
+ defBlock.getInstruction(getIndexForOffset(defOffset)) = instruction and
+ key = "DefinitionRank" + getKeySuffixForOffset(defOffset) + "[" + useLocation.toString() + "]" and
+ result = defRank.toString()
+ )
+ or
+ exists(MemoryLocation useLocation, IRBlock useBlock, int useRank |
+ hasUseAtRank(useLocation, useBlock, useRank, instruction) and
+ key = "UseRank[" + useLocation.toString() + "]" and
+ result = useRank.toString()
+ )
+ or
+ exists(MemoryLocation useLocation, IRBlock defBlock, int defRank, int defOffset |
+ hasDefinitionAtRank(useLocation, _, defBlock, defRank, defOffset) and
+ defBlock.getInstruction(getIndexForOffset(defOffset)) = instruction and
+ key =
+ "DefinitionReachesUse" + getKeySuffixForOffset(defOffset) + "[" + useLocation.toString() +
+ "]" and
+ result =
+ strictconcat(IRBlock useBlock, int useRank, int useIndex |
+ exists(Instruction useInstruction |
+ hasUseAtRank(useLocation, useBlock, useRank, useInstruction) and
+ useBlock.getInstruction(useIndex) = useInstruction and
+ definitionReachesUse(useLocation, defBlock, defRank, useBlock, useRank)
+ )
+ |
+ useBlock.getDisplayIndex().toString() + "_" + useIndex, ", "
+ order by
+ useBlock.getDisplayIndex(), useIndex
+ )
+ )
+ }
+
+ override string getBlockProperty(IRBlock block, string key) {
+ exists(MemoryLocation useLocation, int defRank, int defIndex |
+ hasDefinitionAtRank(useLocation, _, block, defRank, defIndex) and
+ defIndex = -1 and
+ key = "DefinitionRank(Phi)[" + useLocation.toString() + "]" and
+ result = defRank.toString()
+ )
+ or
+ exists(MemoryLocation useLocation, MemoryLocation defLocation, int defRank, int defIndex |
+ hasDefinitionAtRank(useLocation, defLocation, block, defRank, defIndex) and
+ defIndex = -1 and
+ key = "DefinitionReachesUse(Phi)[" + useLocation.toString() + "]" and
+ result =
+ strictconcat(IRBlock useBlock, int useRank, int useIndex |
+ exists(Instruction useInstruction |
+ hasUseAtRank(useLocation, useBlock, useRank, useInstruction) and
+ useBlock.getInstruction(useIndex) = useInstruction and
+ definitionReachesUse(useLocation, block, defRank, useBlock, useRank) and
+ exists(getOverlap(defLocation, useLocation))
+ )
+ |
+ useBlock.getDisplayIndex().toString() + "_" + useIndex, ", "
+ order by
+ useBlock.getDisplayIndex(), useIndex
+ )
+ )
+ or
+ exists(
+ MemoryLocation useLocation, IRBlock predBlock, IRBlock defBlock, int defIndex, Overlap overlap
+ |
+ hasPhiOperandDefinition(_, useLocation, block, predBlock, defBlock, defIndex) and
+ key =
+ "PhiUse[" + useLocation.toString() + " from " + predBlock.getDisplayIndex().toString() + "]" and
+ result =
+ defBlock.getDisplayIndex().toString() + "_" + defIndex + " (" + overlap.toString() + ")"
+ )
+ or
+ key = "LiveOnEntry" and
+ result =
+ strictconcat(MemoryLocation useLocation |
+ locationLiveOnEntryToBlock(useLocation, block)
+ |
+ useLocation.toString(), ", " order by useLocation.toString()
+ )
+ or
+ key = "LiveOnExit" and
+ result =
+ strictconcat(MemoryLocation useLocation |
+ locationLiveOnExitFromBlock(useLocation, block)
+ |
+ useLocation.toString(), ", " order by useLocation.toString()
+ )
+ or
+ key = "DefsLiveOnEntry" and
+ result =
+ strictconcat(MemoryLocation defLocation |
+ definitionLiveOnEntryToBlock(defLocation, block)
+ |
+ defLocation.toString(), ", " order by defLocation.toString()
+ )
+ or
+ key = "DefsLiveOnExit" and
+ result =
+ strictconcat(MemoryLocation defLocation |
+ definitionLiveOnExitFromBlock(defLocation, block)
+ |
+ defLocation.toString(), ", " order by defLocation.toString()
+ )
+ }
+}
diff --git a/repo-tests/codeql/cpp/ql/lib/semmle/code/cpp/ir/implementation/aliased_ssa/internal/SSAConsistency.ql b/repo-tests/codeql/cpp/ql/lib/semmle/code/cpp/ir/implementation/aliased_ssa/internal/SSAConsistency.ql
new file mode 100644
index 00000000000..3379f4530a1
--- /dev/null
+++ b/repo-tests/codeql/cpp/ql/lib/semmle/code/cpp/ir/implementation/aliased_ssa/internal/SSAConsistency.ql
@@ -0,0 +1,8 @@
+/**
+ * @name Aliased SSA Consistency Check
+ * @description Performs consistency checks on the SSA construction. This query should have no results.
+ * @kind table
+ * @id cpp/aliased-ssa-consistency-check
+ */
+
+import SSAConsistency
diff --git a/repo-tests/codeql/cpp/ql/lib/semmle/code/cpp/ir/implementation/aliased_ssa/internal/SSAConsistency.qll b/repo-tests/codeql/cpp/ql/lib/semmle/code/cpp/ir/implementation/aliased_ssa/internal/SSAConsistency.qll
new file mode 100644
index 00000000000..5686bb439eb
--- /dev/null
+++ b/repo-tests/codeql/cpp/ql/lib/semmle/code/cpp/ir/implementation/aliased_ssa/internal/SSAConsistency.qll
@@ -0,0 +1,2 @@
+private import SSAConstruction as SSA
+import SSA::SSAConsistency
diff --git a/repo-tests/codeql/cpp/ql/lib/semmle/code/cpp/ir/implementation/aliased_ssa/internal/SSAConstruction.qll b/repo-tests/codeql/cpp/ql/lib/semmle/code/cpp/ir/implementation/aliased_ssa/internal/SSAConstruction.qll
new file mode 100644
index 00000000000..5092e921cb3
--- /dev/null
+++ b/repo-tests/codeql/cpp/ql/lib/semmle/code/cpp/ir/implementation/aliased_ssa/internal/SSAConstruction.qll
@@ -0,0 +1,1131 @@
+import SSAConstructionInternal
+private import SSAConstructionImports as Imports
+private import Imports::Opcode
+private import Imports::OperandTag
+private import Imports::Overlap
+private import Imports::TInstruction
+private import Imports::RawIR as RawIR
+private import SSAInstructions
+private import SSAOperands
+private import NewIR
+
+private class OldBlock = Reachability::ReachableBlock;
+
+private class OldInstruction = Reachability::ReachableInstruction;
+
+import Cached
+
+cached
+private module Cached {
+ cached
+ predicate hasPhiInstructionCached(
+ OldInstruction blockStartInstr, Alias::MemoryLocation defLocation
+ ) {
+ exists(OldBlock oldBlock |
+ definitionHasPhiNode(defLocation, oldBlock) and
+ blockStartInstr = oldBlock.getFirstInstruction()
+ )
+ }
+
+ cached
+ predicate hasChiInstructionCached(OldInstruction primaryInstruction) {
+ hasChiNode(_, primaryInstruction)
+ }
+
+ cached
+ predicate hasUnreachedInstructionCached(IRFunction irFunc) {
+ exists(OldInstruction oldInstruction |
+ irFunc = oldInstruction.getEnclosingIRFunction() and
+ Reachability::isInfeasibleInstructionSuccessor(oldInstruction, _)
+ )
+ }
+
+ class TStageInstruction =
+ TRawInstruction or TPhiInstruction or TChiInstruction or TUnreachedInstruction;
+
+ /**
+ * If `oldInstruction` is a `Phi` instruction that has exactly one reachable predecessor block,
+ * this predicate returns the `PhiInputOperand` corresponding to that predecessor block.
+ * Otherwise, this predicate does not hold.
+ */
+ private OldIR::PhiInputOperand getDegeneratePhiOperand(OldInstruction oldInstruction) {
+ result =
+ unique(OldIR::PhiInputOperand operand |
+ operand = oldInstruction.(OldIR::PhiInstruction).getAnInputOperand() and
+ operand.getPredecessorBlock() instanceof OldBlock
+ )
+ }
+
+ cached
+ predicate hasInstruction(TStageInstruction instr) {
+ instr instanceof TRawInstruction and instr instanceof OldInstruction
+ or
+ instr = phiInstruction(_, _)
+ or
+ instr = reusedPhiInstruction(_) and
+ // Check that the phi instruction is *not* degenerate, but we can't use
+ // getDegeneratePhiOperand in the first stage with phi instyructions
+ not exists(
+ unique(OldIR::PhiInputOperand operand |
+ operand = instr.(OldIR::PhiInstruction).getAnInputOperand() and
+ operand.getPredecessorBlock() instanceof OldBlock
+ )
+ )
+ or
+ instr instanceof TChiInstruction
+ or
+ instr instanceof TUnreachedInstruction
+ }
+
+ cached
+ IRBlock getNewBlock(OldBlock oldBlock) {
+ exists(Instruction newEnd, OldIR::Instruction oldEnd |
+ (
+ result.getLastInstruction() = newEnd and
+ not newEnd instanceof ChiInstruction
+ or
+ newEnd = result.getLastInstruction().(ChiInstruction).getAPredecessor() // does this work?
+ ) and
+ (
+ oldBlock.getLastInstruction() = oldEnd and
+ not oldEnd instanceof OldIR::ChiInstruction
+ or
+ oldEnd = oldBlock.getLastInstruction().(OldIR::ChiInstruction).getAPredecessor() // does this work?
+ ) and
+ oldEnd = getNewInstruction(newEnd)
+ )
+ }
+
+ /**
+ * Gets the block from the old IR that corresponds to `newBlock`.
+ */
+ private OldBlock getOldBlock(IRBlock newBlock) { getNewBlock(result) = newBlock }
+
+ /**
+ * Holds if this iteration of SSA can model the def/use information for the result of
+ * `oldInstruction`, either because alias analysis has determined a memory location for that
+ * result, or because a previous iteration of the IR already computed that def/use information
+ * completely.
+ */
+ private predicate canModelResultForOldInstruction(OldInstruction oldInstruction) {
+ // We're modeling the result's memory location ourselves.
+ exists(Alias::getResultMemoryLocation(oldInstruction))
+ or
+ // This result was already modeled by a previous iteration of SSA.
+ Alias::canReuseSSAForOldResult(oldInstruction)
+ }
+
+ cached
+ predicate hasModeledMemoryResult(Instruction instruction) {
+ canModelResultForOldInstruction(getOldInstruction(instruction)) or
+ instruction instanceof PhiInstruction or // Phis always have modeled results
+ instruction instanceof ChiInstruction // Chis always have modeled results
+ }
+
+ cached
+ predicate hasConflatedMemoryResult(Instruction instruction) {
+ instruction instanceof AliasedDefinitionInstruction
+ or
+ instruction.getOpcode() instanceof Opcode::InitializeNonLocal
+ or
+ // Chi instructions track virtual variables, and therefore a chi instruction is
+ // conflated if it's associated with the aliased virtual variable.
+ exists(OldInstruction oldInstruction | instruction = getChi(oldInstruction) |
+ Alias::getResultMemoryLocation(oldInstruction).getVirtualVariable() instanceof
+ Alias::AliasedVirtualVariable
+ )
+ or
+ // Phi instructions track locations, and therefore a phi instruction is
+ // conflated if it's associated with a conflated location.
+ exists(Alias::MemoryLocation location |
+ instruction = getPhi(_, location) and
+ not exists(location.getAllocation())
+ )
+ }
+
+ cached
+ Instruction getRegisterOperandDefinition(Instruction instruction, RegisterOperandTag tag) {
+ exists(OldInstruction oldInstruction, OldIR::RegisterOperand oldOperand |
+ oldInstruction = getOldInstruction(instruction) and
+ oldOperand = oldInstruction.getAnOperand() and
+ tag = oldOperand.getOperandTag() and
+ result = getNewInstruction(oldOperand.getAnyDef())
+ )
+ }
+
+ pragma[noopt]
+ private predicate hasMemoryOperandDefinition(
+ OldInstruction oldInstruction, OldIR::NonPhiMemoryOperand oldOperand, Overlap overlap,
+ Instruction instr
+ ) {
+ oldOperand = oldInstruction.getAnOperand() and
+ oldOperand instanceof OldIR::NonPhiMemoryOperand and
+ exists(
+ OldBlock useBlock, int useRank, Alias::MemoryLocation useLocation,
+ Alias::MemoryLocation defLocation, OldBlock defBlock, int defRank, int defOffset,
+ Alias::MemoryLocation actualDefLocation
+ |
+ useLocation = Alias::getOperandMemoryLocation(oldOperand) and
+ hasUseAtRank(useLocation, useBlock, useRank, oldInstruction) and
+ definitionReachesUse(useLocation, defBlock, defRank, useBlock, useRank) and
+ hasDefinitionAtRank(useLocation, defLocation, defBlock, defRank, defOffset) and
+ instr = getDefinitionOrChiInstruction(defBlock, defOffset, defLocation, actualDefLocation) and
+ overlap = Alias::getOverlap(actualDefLocation, useLocation)
+ )
+ }
+
+ /**
+ * Gets the new definition instruction for `oldOperand` based on `oldOperand`'s definition in the
+ * old IR. Usually, this will just get the old definition of `oldOperand` and map it to the
+ * corresponding new instruction. However, if the old definition of `oldOperand` is a `Phi`
+ * instruction that is now degenerate due all but one of its predecessor branches being
+ * unreachable, this predicate will recurse through any degenerate `Phi` instructions to find the
+ * true definition.
+ */
+ private Instruction getNewDefinitionFromOldSSA(OldIR::MemoryOperand oldOperand, Overlap overlap) {
+ exists(Overlap originalOverlap |
+ originalOverlap = oldOperand.getDefinitionOverlap() and
+ (
+ result = getNewInstruction(oldOperand.getAnyDef()) and
+ overlap = originalOverlap
+ or
+ exists(OldIR::PhiInputOperand phiOperand, Overlap phiOperandOverlap |
+ phiOperand = getDegeneratePhiOperand(oldOperand.getAnyDef()) and
+ result = getNewDefinitionFromOldSSA(phiOperand, phiOperandOverlap) and
+ overlap =
+ combineOverlap(pragma[only_bind_out](phiOperandOverlap),
+ pragma[only_bind_out](originalOverlap))
+ )
+ )
+ )
+ }
+
+ cached
+ private Instruction getMemoryOperandDefinition0(
+ Instruction instruction, MemoryOperandTag tag, Overlap overlap
+ ) {
+ exists(OldInstruction oldInstruction, OldIR::NonPhiMemoryOperand oldOperand |
+ oldInstruction = getOldInstruction(instruction) and
+ oldOperand = oldInstruction.getAnOperand() and
+ tag = oldOperand.getOperandTag() and
+ hasMemoryOperandDefinition(oldInstruction, oldOperand, overlap, result)
+ )
+ or
+ instruction = getChi(getOldInstruction(result)) and
+ tag instanceof ChiPartialOperandTag and
+ overlap instanceof MustExactlyOverlap
+ or
+ tag instanceof ChiTotalOperandTag and
+ result = getChiInstructionTotalOperand(instruction) and
+ overlap instanceof MustExactlyOverlap
+ }
+
+ cached
+ Instruction getMemoryOperandDefinition(
+ Instruction instruction, MemoryOperandTag tag, Overlap overlap
+ ) {
+ // getMemoryOperandDefinition0 currently has a bug where it can match with multiple overlaps.
+ // This predicate ensures that the chosen overlap is the most conservative if there's any doubt.
+ result = getMemoryOperandDefinition0(instruction, tag, overlap) and
+ not (
+ overlap instanceof MustExactlyOverlap and
+ exists(MustTotallyOverlap o | exists(getMemoryOperandDefinition0(instruction, tag, o)))
+ )
+ or
+ exists(OldIR::NonPhiMemoryOperand oldOperand |
+ result = getNewDefinitionFromOldSSA(oldOperand, overlap) and
+ oldOperand.getUse() = instruction and
+ tag = oldOperand.getOperandTag()
+ )
+ }
+
+ /**
+ * Holds if the partial operand of this `ChiInstruction` updates the bit range
+ * `[startBitOffset, endBitOffset)` of the total operand.
+ */
+ cached
+ predicate getIntervalUpdatedByChi(ChiInstruction chi, int startBitOffset, int endBitOffset) {
+ exists(Alias::MemoryLocation location, OldInstruction oldInstruction |
+ oldInstruction = getOldInstruction(chi.getPartial()) and
+ location = Alias::getResultMemoryLocation(oldInstruction) and
+ startBitOffset = Alias::getStartBitOffset(location) and
+ endBitOffset = Alias::getEndBitOffset(location)
+ )
+ }
+
+ /**
+ * Holds if `operand` totally overlaps with its definition and consumes the bit range
+ * `[startBitOffset, endBitOffset)`.
+ */
+ cached
+ predicate getUsedInterval(NonPhiMemoryOperand operand, int startBitOffset, int endBitOffset) {
+ exists(Alias::MemoryLocation location, OldIR::NonPhiMemoryOperand oldOperand |
+ oldOperand = operand.getUse().(OldInstruction).getAnOperand() and
+ location = Alias::getOperandMemoryLocation(oldOperand) and
+ startBitOffset = Alias::getStartBitOffset(location) and
+ endBitOffset = Alias::getEndBitOffset(location)
+ )
+ }
+
+ /**
+ * Holds if the `ChiPartialOperand` only partially overlaps with the `ChiTotalOperand`.
+ * This means that the `ChiPartialOperand` will not override the entire memory associated
+ * with the `ChiTotalOperand`.
+ */
+ cached
+ predicate chiOnlyPartiallyUpdatesLocation(ChiInstruction chi) {
+ exists(Alias::MemoryLocation location, OldInstruction oldInstruction |
+ oldInstruction = getOldInstruction(chi.getPartial()) and
+ location = Alias::getResultMemoryLocation(oldInstruction)
+ |
+ Alias::getStartBitOffset(location) != 0 or
+ Alias::getEndBitOffset(location) != 8 * location.getType().getByteSize()
+ )
+ }
+
+ /**
+ * Holds if `instr` is part of a cycle in the operand graph that doesn't go
+ * through a phi instruction and therefore should be impossible.
+ *
+ * For performance reasons, this predicate is not implemented (never holds)
+ * for the SSA stages of the IR.
+ */
+ cached
+ predicate isInCycle(Instruction instr) { none() }
+
+ cached
+ Language::LanguageType getInstructionOperandType(Instruction instr, TypedOperandTag tag) {
+ exists(OldInstruction oldInstruction, OldIR::TypedOperand oldOperand |
+ oldInstruction = getOldInstruction(instr) and
+ oldOperand = oldInstruction.getAnOperand() and
+ tag = oldOperand.getOperandTag() and
+ result = oldOperand.getLanguageType()
+ )
+ }
+
+ /**
+ * Gets the new definition instruction for the operand of `instr` that flows from the block
+ * `newPredecessorBlock`, based on that operand's definition in the old IR.
+ */
+ private Instruction getNewPhiOperandDefinitionFromOldSSA(
+ Instruction instr, IRBlock newPredecessorBlock, Overlap overlap
+ ) {
+ exists(OldIR::PhiInstruction oldPhi, OldIR::PhiInputOperand oldOperand |
+ oldPhi = getOldInstruction(instr) and
+ oldOperand = oldPhi.getInputOperand(getOldBlock(newPredecessorBlock)) and
+ result = getNewDefinitionFromOldSSA(oldOperand, overlap)
+ )
+ }
+
+ pragma[noopt]
+ cached
+ Instruction getPhiOperandDefinition(
+ Instruction instr, IRBlock newPredecessorBlock, Overlap overlap
+ ) {
+ exists(
+ Alias::MemoryLocation defLocation, Alias::MemoryLocation useLocation, OldBlock phiBlock,
+ OldBlock predBlock, OldBlock defBlock, int defOffset, Alias::MemoryLocation actualDefLocation
+ |
+ hasPhiOperandDefinition(defLocation, useLocation, phiBlock, predBlock, defBlock, defOffset) and
+ instr = getPhi(phiBlock, useLocation) and
+ newPredecessorBlock = getNewBlock(predBlock) and
+ result = getDefinitionOrChiInstruction(defBlock, defOffset, defLocation, actualDefLocation) and
+ overlap = Alias::getOverlap(actualDefLocation, useLocation)
+ )
+ or
+ result = getNewPhiOperandDefinitionFromOldSSA(instr, newPredecessorBlock, overlap)
+ }
+
+ cached
+ Instruction getChiInstructionTotalOperand(ChiInstruction chiInstr) {
+ exists(
+ Alias::VirtualVariable vvar, OldInstruction oldInstr, Alias::MemoryLocation defLocation,
+ OldBlock defBlock, int defRank, int defOffset, OldBlock useBlock, int useRank
+ |
+ chiInstr = getChi(oldInstr) and
+ vvar = Alias::getResultMemoryLocation(oldInstr).getVirtualVariable() and
+ hasDefinitionAtRank(vvar, defLocation, defBlock, defRank, defOffset) and
+ hasUseAtRank(vvar, useBlock, useRank, oldInstr) and
+ definitionReachesUse(vvar, defBlock, defRank, useBlock, useRank) and
+ result = getDefinitionOrChiInstruction(defBlock, defOffset, vvar, _)
+ )
+ }
+
+ cached
+ Instruction getPhiInstructionBlockStart(PhiInstruction instr) {
+ exists(OldBlock oldBlock |
+ (
+ instr = getPhi(oldBlock, _)
+ or
+ // Any `Phi` that we propagated from the previous iteration stays in the same block.
+ getOldInstruction(instr).getBlock() = oldBlock
+ ) and
+ result = getNewInstruction(oldBlock.getFirstInstruction())
+ )
+ }
+
+ /*
+ * This adds Chi nodes to the instruction successor relation; if an instruction has a Chi node,
+ * that node is its successor in the new successor relation, and the Chi node's successors are
+ * the new instructions generated from the successors of the old instruction
+ */
+
+ cached
+ Instruction getInstructionSuccessor(Instruction instruction, EdgeKind kind) {
+ if hasChiNode(_, getOldInstruction(instruction))
+ then
+ result = getChi(getOldInstruction(instruction)) and
+ kind instanceof GotoEdge
+ else (
+ exists(OldInstruction oldInstruction |
+ oldInstruction = getOldInstruction(instruction) and
+ (
+ if Reachability::isInfeasibleInstructionSuccessor(oldInstruction, kind)
+ then result = unreachedInstruction(instruction.getEnclosingIRFunction())
+ else result = getNewInstruction(oldInstruction.getSuccessor(kind))
+ )
+ )
+ or
+ exists(OldInstruction oldInstruction |
+ instruction = getChi(oldInstruction) and
+ result = getNewInstruction(oldInstruction.getSuccessor(kind))
+ )
+ )
+ }
+
+ cached
+ Instruction getInstructionBackEdgeSuccessor(Instruction instruction, EdgeKind kind) {
+ exists(OldInstruction oldInstruction |
+ not Reachability::isInfeasibleInstructionSuccessor(oldInstruction, kind) and
+ // There is only one case for the translation into `result` because the
+ // SSA construction never inserts extra instructions _before_ an existing
+ // instruction.
+ getOldInstruction(result) = oldInstruction.getBackEdgeSuccessor(kind) and
+ // There are two cases for the translation into `instruction` because the
+ // SSA construction might have inserted a chi node _after_
+ // `oldInstruction`, in which case the back edge should come out of the
+ // chi node instead.
+ if hasChiNode(_, oldInstruction)
+ then instruction = getChi(oldInstruction)
+ else instruction = getNewInstruction(oldInstruction)
+ )
+ }
+
+ cached
+ Language::AST getInstructionAST(Instruction instr) {
+ result = getOldInstruction(instr).getAST()
+ or
+ exists(RawIR::Instruction blockStartInstr |
+ instr = phiInstruction(blockStartInstr, _) and
+ result = blockStartInstr.getAST()
+ )
+ or
+ exists(RawIR::Instruction primaryInstr |
+ instr = chiInstruction(primaryInstr) and
+ result = primaryInstr.getAST()
+ )
+ or
+ exists(IRFunctionBase irFunc |
+ instr = unreachedInstruction(irFunc) and result = irFunc.getFunction()
+ )
+ }
+
+ cached
+ Language::LanguageType getInstructionResultType(Instruction instr) {
+ result = instr.(RawIR::Instruction).getResultLanguageType()
+ or
+ exists(Alias::MemoryLocation defLocation |
+ instr = phiInstruction(_, defLocation) and
+ result = defLocation.getType()
+ )
+ or
+ exists(Instruction primaryInstr, Alias::VirtualVariable vvar |
+ instr = chiInstruction(primaryInstr) and
+ hasChiNode(vvar, primaryInstr) and
+ result = vvar.getType()
+ )
+ or
+ instr = reusedPhiInstruction(_) and
+ result = instr.(OldInstruction).getResultLanguageType()
+ or
+ instr = unreachedInstruction(_) and result = Language::getVoidType()
+ }
+
+ /**
+ * Holds if `opcode` is the opcode that specifies the operation performed by `instr`.
+ *
+ * The parameters are ordered such that they produce a clean join (with no need for reordering)
+ * in the characteristic predicates of the `Instruction` subclasses.
+ */
+ cached
+ predicate getInstructionOpcode(Opcode opcode, Instruction instr) {
+ opcode = getOldInstruction(instr).getOpcode()
+ or
+ instr = phiInstruction(_, _) and opcode instanceof Opcode::Phi
+ or
+ instr = chiInstruction(_) and opcode instanceof Opcode::Chi
+ or
+ instr = unreachedInstruction(_) and opcode instanceof Opcode::Unreached
+ }
+
+ cached
+ IRFunctionBase getInstructionEnclosingIRFunction(Instruction instr) {
+ result = getOldInstruction(instr).getEnclosingIRFunction()
+ or
+ exists(OldInstruction blockStartInstr |
+ instr = phiInstruction(blockStartInstr, _) and
+ result = blockStartInstr.getEnclosingIRFunction()
+ )
+ or
+ exists(OldInstruction primaryInstr |
+ instr = chiInstruction(primaryInstr) and result = primaryInstr.getEnclosingIRFunction()
+ )
+ or
+ instr = unreachedInstruction(result)
+ }
+
+ cached
+ Instruction getPrimaryInstructionForSideEffect(Instruction instruction) {
+ exists(OldIR::SideEffectInstruction oldInstruction |
+ oldInstruction = getOldInstruction(instruction) and
+ result = getNewInstruction(oldInstruction.getPrimaryInstruction())
+ )
+ or
+ exists(OldIR::Instruction oldInstruction |
+ instruction = getChi(oldInstruction) and
+ result = getNewInstruction(oldInstruction)
+ )
+ }
+}
+
+private Instruction getNewInstruction(OldInstruction instr) { getOldInstruction(result) = instr }
+
+private OldInstruction getOldInstruction(Instruction instr) { instr = result }
+
+private ChiInstruction getChi(OldInstruction primaryInstr) { result = chiInstruction(primaryInstr) }
+
+private PhiInstruction getPhi(OldBlock defBlock, Alias::MemoryLocation defLocation) {
+ result = phiInstruction(defBlock.getFirstInstruction(), defLocation)
+}
+
+/**
+ * Holds if instruction `def` needs to have a `Chi` instruction inserted after it, to account for a partial definition
+ * of a virtual variable. The `Chi` instruction provides a definition of the entire virtual variable of which the
+ * original definition location is a member.
+ */
+private predicate hasChiNode(Alias::VirtualVariable vvar, OldInstruction def) {
+ exists(Alias::MemoryLocation defLocation |
+ defLocation = Alias::getResultMemoryLocation(def) and
+ defLocation.getVirtualVariable() = vvar and
+ // If the definition totally (or exactly) overlaps the virtual variable, then there's no need for a `Chi`
+ // instruction.
+ Alias::getOverlap(defLocation, vvar) instanceof MayPartiallyOverlap
+ )
+}
+
+private import PhiInsertion
+
+/**
+ * Module to handle insertion of `Phi` instructions at the correct blocks. We insert a `Phi` instruction at the
+ * beginning of a block for a given location when that block is on the dominance frontier of a definition of the
+ * location and there is a use of that location reachable from that block without an intervening definition of the
+ * location.
+ * Within the approach outlined above, we treat a location slightly differently depending on whether or not it is a
+ * virtual variable. For a virtual variable, we will insert a `Phi` instruction on the dominance frontier if there is
+ * a use of any member location of that virtual variable that is reachable from the `Phi` instruction. For a location
+ * that is not a virtual variable, we insert a `Phi` instruction only if there is an exactly-overlapping use of the
+ * location reachable from the `Phi` instruction. This ensures that we insert a `Phi` instruction for a non-virtual
+ * variable only if doing so would allow dataflow analysis to get a more precise result than if we just used a `Phi`
+ * instruction for the virtual variable as a whole.
+ */
+private module PhiInsertion {
+ /**
+ * Holds if `phiBlock` is a block in the dominance frontier of a block that has a definition of the
+ * memory location `defLocation`.
+ */
+ pragma[noinline]
+ private predicate dominanceFrontierOfDefinition(
+ Alias::MemoryLocation defLocation, OldBlock phiBlock
+ ) {
+ exists(OldBlock defBlock |
+ phiBlock = Dominance::getDominanceFrontier(defBlock) and
+ definitionHasDefinitionInBlock(defLocation, defBlock)
+ )
+ }
+
+ /**
+ * Holds if a `Phi` instruction needs to be inserted for location `defLocation` at the beginning of block `phiBlock`.
+ */
+ predicate definitionHasPhiNode(Alias::MemoryLocation defLocation, OldBlock phiBlock) {
+ dominanceFrontierOfDefinition(defLocation, phiBlock) and
+ /* We can also eliminate those nodes where the definition is not live on any incoming edge */
+ definitionLiveOnEntryToBlock(defLocation, phiBlock)
+ }
+
+ /**
+ * Holds if the memory location `defLocation` has a definition in block `block`, either because of an existing
+ * instruction, a `Phi` node, or a `Chi` node.
+ */
+ private predicate definitionHasDefinitionInBlock(Alias::MemoryLocation defLocation, OldBlock block) {
+ definitionHasPhiNode(defLocation, block)
+ or
+ exists(OldInstruction def, Alias::MemoryLocation resultLocation |
+ def.getBlock() = block and
+ resultLocation = Alias::getResultMemoryLocation(def) and
+ (
+ defLocation = resultLocation
+ or
+ // For a virtual variable, any definition of a member location will either generate a `Chi` node that defines
+ // the virtual variable, or will totally overlap the virtual variable. Either way, treat this as a definition of
+ // the virtual variable.
+ defLocation = resultLocation.getVirtualVariable()
+ )
+ )
+ }
+
+ /**
+ * Holds if there is a use at (`block`, `index`) that could consume the result of a `Phi` instruction for
+ * `defLocation`.
+ */
+ private predicate definitionHasUse(Alias::MemoryLocation defLocation, OldBlock block, int index) {
+ exists(OldInstruction use |
+ block.getInstruction(index) = use and
+ if defLocation instanceof Alias::VirtualVariable
+ then (
+ exists(Alias::MemoryLocation useLocation |
+ // For a virtual variable, any use of a location that is a member of the virtual variable counts as a use.
+ useLocation = Alias::getOperandMemoryLocation(use.getAnOperand()) and
+ defLocation = useLocation.getVirtualVariable()
+ )
+ or
+ // A `Chi` instruction consumes the enclosing virtual variable of its use location.
+ hasChiNode(defLocation, use)
+ ) else (
+ // For other locations, only an exactly-overlapping use of the same location counts as a use.
+ defLocation = Alias::getOperandMemoryLocation(use.getAnOperand()) and
+ Alias::getOverlap(defLocation, defLocation) instanceof MustExactlyOverlap
+ )
+ )
+ }
+
+ /**
+ * Holds if the location `defLocation` is redefined at (`block`, `index`). A location is considered "redefined" if
+ * there is a definition that would prevent a previous definition of `defLocation` from being consumed as the operand
+ * of a `Phi` node that occurs after the redefinition.
+ */
+ private predicate definitionHasRedefinition(
+ Alias::MemoryLocation defLocation, OldBlock block, int index
+ ) {
+ exists(OldInstruction redef, Alias::MemoryLocation redefLocation |
+ block.getInstruction(index) = redef and
+ redefLocation = Alias::getResultMemoryLocation(redef) and
+ if defLocation instanceof Alias::VirtualVariable
+ then
+ // For a virtual variable, the definition may be consumed by any use of a location that is a member of the
+ // virtual variable. Thus, the definition is live until a subsequent redefinition of the entire virtual
+ // variable.
+ exists(Overlap overlap |
+ overlap = Alias::getOverlap(redefLocation, defLocation) and
+ not overlap instanceof MayPartiallyOverlap
+ )
+ else
+ // For other locations, the definition may only be consumed by an exactly-overlapping use of the same location.
+ // Thus, the definition is live until a subsequent definition of any location that may overlap the original
+ // definition location.
+ exists(Alias::getOverlap(redefLocation, defLocation))
+ )
+ }
+
+ /**
+ * Holds if the definition `defLocation` is live on entry to block `block`. The definition is live if there is at
+ * least one use of that definition before any intervening instruction that redefines the definition location.
+ */
+ predicate definitionLiveOnEntryToBlock(Alias::MemoryLocation defLocation, OldBlock block) {
+ exists(int firstAccess |
+ definitionHasUse(defLocation, block, firstAccess) and
+ firstAccess =
+ min(int index |
+ definitionHasUse(defLocation, block, index)
+ or
+ definitionHasRedefinition(defLocation, block, index)
+ )
+ )
+ or
+ definitionLiveOnExitFromBlock(defLocation, block) and
+ not definitionHasRedefinition(defLocation, block, _)
+ }
+
+ /**
+ * Holds if the definition `defLocation` is live on exit from block `block`. The definition is live on exit if it is
+ * live on entry to any of the successors of `block`.
+ */
+ pragma[noinline]
+ predicate definitionLiveOnExitFromBlock(Alias::MemoryLocation defLocation, OldBlock block) {
+ definitionLiveOnEntryToBlock(defLocation, block.getAFeasibleSuccessor())
+ }
+}
+
+private import DefUse
+
+/**
+ * Module containing the predicates that connect uses to their reaching definition. The reaching definitions are
+ * computed separately for each unique use `MemoryLocation`. An instruction is treated as a definition of a use location
+ * if the defined location overlaps the use location in any way. Thus, a single instruction may serve as a definition
+ * for multiple use locations, since a single definition location may overlap many use locations.
+ *
+ * Definitions and uses are identified by a block and an integer "offset". An offset of -1 indicates the definition
+ * from a `Phi` instruction at the beginning of the block. An offset of 2*i indicates a definition or use on the
+ * instruction at index `i` in the block. An offset of 2*i+1 indicates a definition or use on the `Chi` instruction that
+ * will be inserted immediately after the instruction at index `i` in the block.
+ *
+ * For a given use location, each definition and use is also assigned a "rank" within its block. The rank is simply the
+ * one-based index of that definition or use within the list of definitions and uses of that location within the block,
+ * ordered by offset. The rank allows the various reachability predicates to be computed more efficiently than they
+ * would if based solely on offset, since the set of possible ranks is dense while the set of possible offsets is
+ * potentially very sparse.
+ */
+module DefUse {
+ /**
+ * Gets the `Instruction` for the definition at offset `defOffset` in block `defBlock`.
+ */
+ Instruction getDefinitionOrChiInstruction(
+ OldBlock defBlock, int defOffset, Alias::MemoryLocation defLocation,
+ Alias::MemoryLocation actualDefLocation
+ ) {
+ exists(OldInstruction oldInstr, int oldOffset |
+ oldInstr = defBlock.getInstruction(oldOffset) and
+ oldOffset >= 0
+ |
+ // An odd offset corresponds to the `Chi` instruction.
+ defOffset = oldOffset * 2 + 1 and
+ result = getChi(oldInstr) and
+ (
+ defLocation = Alias::getResultMemoryLocation(oldInstr) or
+ defLocation = Alias::getResultMemoryLocation(oldInstr).getVirtualVariable()
+ ) and
+ actualDefLocation = defLocation.getVirtualVariable()
+ or
+ // An even offset corresponds to the original instruction.
+ defOffset = oldOffset * 2 and
+ result = getNewInstruction(oldInstr) and
+ (
+ defLocation = Alias::getResultMemoryLocation(oldInstr) or
+ defLocation = Alias::getResultMemoryLocation(oldInstr).getVirtualVariable()
+ ) and
+ actualDefLocation = defLocation
+ )
+ or
+ defOffset = -1 and
+ hasDefinition(_, defLocation, defBlock, defOffset) and
+ result = getPhi(defBlock, defLocation) and
+ actualDefLocation = defLocation
+ }
+
+ /**
+ * Gets the rank index of a hyphothetical use one instruction past the end of
+ * the block. This index can be used to determine if a definition reaches the
+ * end of the block, even if the definition is the last instruction in the
+ * block.
+ */
+ private int exitRank(Alias::MemoryLocation useLocation, OldBlock block) {
+ result = max(int rankIndex | defUseRank(useLocation, block, rankIndex, _)) + 1
+ }
+
+ /**
+ * Holds if a definition that overlaps `useLocation` at (`defBlock`, `defRank`) reaches the use of `useLocation` at
+ * (`useBlock`, `useRank`) without any intervening definitions that overlap `useLocation`, where `defBlock` and
+ * `useBlock` are the same block.
+ */
+ private predicate definitionReachesUseWithinBlock(
+ Alias::MemoryLocation useLocation, OldBlock defBlock, int defRank, OldBlock useBlock,
+ int useRank
+ ) {
+ defBlock = useBlock and
+ hasDefinitionAtRank(useLocation, _, defBlock, defRank, _) and
+ hasUseAtRank(useLocation, useBlock, useRank, _) and
+ definitionReachesRank(useLocation, defBlock, defRank, useRank)
+ }
+
+ /**
+ * Holds if a definition that overlaps `useLocation` at (`defBlock`, `defRank`) reaches the use of `useLocation` at
+ * (`useBlock`, `useRank`) without any intervening definitions that overlap `useLocation`.
+ */
+ predicate definitionReachesUse(
+ Alias::MemoryLocation useLocation, OldBlock defBlock, int defRank, OldBlock useBlock,
+ int useRank
+ ) {
+ hasUseAtRank(useLocation, useBlock, useRank, _) and
+ (
+ definitionReachesUseWithinBlock(useLocation, defBlock, defRank, useBlock, useRank)
+ or
+ definitionReachesEndOfBlock(useLocation, defBlock, defRank, useBlock.getAFeasiblePredecessor()) and
+ not definitionReachesUseWithinBlock(useLocation, useBlock, _, useBlock, useRank)
+ )
+ }
+
+ /**
+ * Holds if the definition that overlaps `useLocation` at `(block, defRank)` reaches the rank
+ * index `reachesRank` in block `block`.
+ */
+ private predicate definitionReachesRank(
+ Alias::MemoryLocation useLocation, OldBlock block, int defRank, int reachesRank
+ ) {
+ // The def always reaches the next use, even if there is also a def on the
+ // use instruction.
+ hasDefinitionAtRank(useLocation, _, block, defRank, _) and
+ reachesRank = defRank + 1
+ or
+ // If the def reached the previous rank, it also reaches the current rank,
+ // unless there was another def at the previous rank.
+ exists(int prevRank |
+ reachesRank = prevRank + 1 and
+ definitionReachesRank(useLocation, block, defRank, prevRank) and
+ not prevRank = exitRank(useLocation, block) and
+ not hasDefinitionAtRank(useLocation, _, block, prevRank, _)
+ )
+ }
+
+ /**
+ * Holds if the definition that overlaps `useLocation` at `(defBlock, defRank)` reaches the end of
+ * block `block` without any intervening definitions that overlap `useLocation`.
+ */
+ predicate definitionReachesEndOfBlock(
+ Alias::MemoryLocation useLocation, OldBlock defBlock, int defRank, OldBlock block
+ ) {
+ hasDefinitionAtRank(useLocation, _, defBlock, defRank, _) and
+ (
+ // If we're looking at the def's own block, just see if it reaches the exit
+ // rank of the block.
+ block = defBlock and
+ locationLiveOnExitFromBlock(useLocation, defBlock) and
+ definitionReachesRank(useLocation, defBlock, defRank, exitRank(useLocation, defBlock))
+ or
+ exists(OldBlock idom |
+ definitionReachesEndOfBlock(useLocation, defBlock, defRank, idom) and
+ noDefinitionsSinceIDominator(useLocation, idom, block)
+ )
+ )
+ }
+
+ pragma[noinline]
+ private predicate noDefinitionsSinceIDominator(
+ Alias::MemoryLocation useLocation, OldBlock idom, OldBlock block
+ ) {
+ Dominance::blockImmediatelyDominates(idom, block) and // It is sufficient to traverse the dominator graph, cf. discussion above.
+ locationLiveOnExitFromBlock(useLocation, block) and
+ not hasDefinition(useLocation, _, block, _)
+ }
+
+ /**
+ * Holds if the specified `useLocation` is live on entry to `block`. This holds if there is a use of `useLocation`
+ * that is reachable from the start of `block` without passing through a definition that overlaps `useLocation`.
+ * Note that even a partially-overlapping definition blocks liveness, because such a definition will insert a `Chi`
+ * instruction whose result totally overlaps the location.
+ */
+ predicate locationLiveOnEntryToBlock(Alias::MemoryLocation useLocation, OldBlock block) {
+ definitionHasPhiNode(useLocation, block)
+ or
+ exists(int firstAccess |
+ hasUse(useLocation, block, firstAccess, _) and
+ firstAccess =
+ min(int offset |
+ hasUse(useLocation, block, offset, _)
+ or
+ hasNonPhiDefinition(useLocation, _, block, offset)
+ )
+ )
+ or
+ locationLiveOnExitFromBlock(useLocation, block) and
+ not hasNonPhiDefinition(useLocation, _, block, _)
+ }
+
+ /**
+ * Holds if the specified `useLocation` is live on exit from `block`.
+ */
+ pragma[noinline]
+ predicate locationLiveOnExitFromBlock(Alias::MemoryLocation useLocation, OldBlock block) {
+ locationLiveOnEntryToBlock(useLocation, block.getAFeasibleSuccessor())
+ }
+
+ /**
+ * Holds if there is a definition at offset `offset` in block `block` that overlaps memory location `useLocation`.
+ * This predicate does not include definitions for Phi nodes.
+ */
+ private predicate hasNonPhiDefinition(
+ Alias::MemoryLocation useLocation, Alias::MemoryLocation defLocation, OldBlock block, int offset
+ ) {
+ exists(OldInstruction def, Overlap overlap, int index |
+ defLocation = Alias::getResultMemoryLocation(def) and
+ block.getInstruction(index) = def and
+ overlap = Alias::getOverlap(defLocation, useLocation) and
+ if overlap instanceof MayPartiallyOverlap
+ then offset = (index * 2) + 1 // The use will be connected to the definition on the `Chi` instruction.
+ else offset = index * 2 // The use will be connected to the definition on the original instruction.
+ )
+ }
+
+ /**
+ * Holds if there is a definition at offset `offset` in block `block` that overlaps memory location `useLocation`.
+ * This predicate includes definitions for Phi nodes (at offset -1).
+ */
+ private predicate hasDefinition(
+ Alias::MemoryLocation useLocation, Alias::MemoryLocation defLocation, OldBlock block, int offset
+ ) {
+ (
+ // If there is a Phi node for the use location itself, treat that as a definition at offset -1.
+ offset = -1 and
+ if definitionHasPhiNode(useLocation, block)
+ then defLocation = useLocation
+ else (
+ definitionHasPhiNode(defLocation, block) and
+ defLocation = useLocation.getVirtualVariable() and
+ // Handle the unusual case where a virtual variable does not overlap one of its member
+ // locations. For example, a definition of the virtual variable representing all aliased
+ // memory does not overlap a use of a string literal, because the contents of a string
+ // literal can never be redefined. The string literal's location could still be a member of
+ // the `AliasedVirtualVariable` due to something like:
+ // ```
+ // char s[10];
+ // strcpy(s, p);
+ // const char* p = b ? "SomeLiteral" : s;
+ // return p[3];
+ // ```
+ // In the above example, `p[3]` may access either the string literal or the local variable
+ // `s`, so both of those locations must be members of the `AliasedVirtualVariable`.
+ exists(Alias::getOverlap(defLocation, useLocation))
+ )
+ )
+ or
+ hasNonPhiDefinition(useLocation, defLocation, block, offset)
+ }
+
+ /**
+ * Holds if there is a definition at offset `offset` in block `block` that overlaps memory location `useLocation`.
+ * `rankIndex` is the rank of the definition as computed by `defUseRank()`.
+ */
+ predicate hasDefinitionAtRank(
+ Alias::MemoryLocation useLocation, Alias::MemoryLocation defLocation, OldBlock block,
+ int rankIndex, int offset
+ ) {
+ hasDefinition(useLocation, defLocation, block, offset) and
+ defUseRank(useLocation, block, rankIndex, offset)
+ }
+
+ /**
+ * Holds if there is a use of `useLocation` on instruction `use` at offset `offset` in block `block`.
+ */
+ private predicate hasUse(
+ Alias::MemoryLocation useLocation, OldBlock block, int offset, OldInstruction use
+ ) {
+ exists(int index |
+ block.getInstruction(index) = use and
+ (
+ // A direct use of the location.
+ useLocation = Alias::getOperandMemoryLocation(use.getAnOperand()) and offset = index * 2
+ or
+ // A `Chi` instruction will include a use of the virtual variable.
+ hasChiNode(useLocation, use) and offset = (index * 2) + 1
+ )
+ )
+ }
+
+ /**
+ * Holds if there is a use of memory location `useLocation` on instruction `use` in block `block`. `rankIndex` is the
+ * rank of the use use as computed by `defUseRank`.
+ */
+ predicate hasUseAtRank(
+ Alias::MemoryLocation useLocation, OldBlock block, int rankIndex, OldInstruction use
+ ) {
+ exists(int offset |
+ hasUse(useLocation, block, offset, use) and
+ defUseRank(useLocation, block, rankIndex, offset)
+ )
+ }
+
+ /**
+ * Holds if there is a definition at offset `offset` in block `block` that overlaps memory location `useLocation`, or
+ * a use of `useLocation` at offset `offset` in block `block`. `rankIndex` is the sequence number of the definition
+ * or use within `block`, counting only uses of `useLocation` and definitions that overlap `useLocation`.
+ */
+ private predicate defUseRank(
+ Alias::MemoryLocation useLocation, OldBlock block, int rankIndex, int offset
+ ) {
+ offset =
+ rank[rankIndex](int j |
+ hasDefinition(useLocation, _, block, j) or hasUse(useLocation, block, j, _)
+ )
+ }
+
+ /**
+ * Holds if the `Phi` instruction for location `useLocation` at the beginning of block `phiBlock` has an operand along
+ * the incoming edge from `predBlock`, where that operand's definition is at offset `defOffset` in block `defBlock`.
+ */
+ pragma[noopt]
+ predicate hasPhiOperandDefinition(
+ Alias::MemoryLocation defLocation, Alias::MemoryLocation useLocation, OldBlock phiBlock,
+ OldBlock predBlock, OldBlock defBlock, int defOffset
+ ) {
+ exists(int defRank |
+ definitionHasPhiNode(useLocation, phiBlock) and
+ predBlock = phiBlock.getAFeasiblePredecessor() and
+ definitionReachesEndOfBlock(useLocation, defBlock, defRank, predBlock) and
+ hasDefinitionAtRank(useLocation, defLocation, defBlock, defRank, defOffset) and
+ exists(Alias::getOverlap(defLocation, useLocation))
+ )
+ }
+}
+
+predicate canReuseSSAForMemoryResult(Instruction instruction) {
+ exists(OldInstruction oldInstruction |
+ oldInstruction = getOldInstruction(instruction) and
+ (
+ // The previous iteration said it was reusable, so we should mark it as reusable as well.
+ Alias::canReuseSSAForOldResult(oldInstruction)
+ or
+ // The current alias analysis says it is reusable.
+ Alias::getResultMemoryLocation(oldInstruction).canReuseSSA()
+ )
+ )
+ or
+ exists(Alias::MemoryLocation defLocation |
+ // This is a `Phi` for a reusable location, so the result of the `Phi` is reusable as well.
+ instruction = phiInstruction(_, defLocation) and
+ defLocation.canReuseSSA()
+ )
+ // We don't support reusing SSA for any location that could create a `Chi` instruction.
+}
+
+/**
+ * Expose some of the internal predicates to PrintSSA.qll. We do this by publically importing those modules in the
+ * `DebugSSA` module, which is then imported by PrintSSA.
+ */
+module DebugSSA {
+ import PhiInsertion
+ import DefUse
+}
+
+import CachedForDebugging
+
+cached
+private module CachedForDebugging {
+ cached
+ string getTempVariableUniqueId(IRTempVariable var) {
+ result = getOldTempVariable(var).getUniqueId()
+ }
+
+ cached
+ string getInstructionUniqueId(Instruction instr) {
+ exists(OldInstruction oldInstr |
+ oldInstr = getOldInstruction(instr) and
+ result = "NonSSA: " + oldInstr.getUniqueId()
+ )
+ or
+ exists(Alias::MemoryLocation location, OldBlock phiBlock, string specificity |
+ instr = getPhi(phiBlock, location) and
+ result =
+ "Phi Block(" + phiBlock.getFirstInstruction().getUniqueId() + ")[" + specificity + "]: " +
+ location.getUniqueId() and
+ if location instanceof Alias::VirtualVariable
+ then
+ // Sort Phi nodes for virtual variables before Phi nodes for member locations.
+ specificity = "g"
+ else specificity = "s"
+ )
+ or
+ instr = unreachedInstruction(_) and
+ result = "Unreached"
+ }
+
+ private OldIR::IRTempVariable getOldTempVariable(IRTempVariable var) {
+ result.getEnclosingFunction() = var.getEnclosingFunction() and
+ result.getAST() = var.getAST() and
+ result.getTag() = var.getTag()
+ }
+
+ cached
+ predicate instructionHasSortKeys(Instruction instr, int key1, int key2) {
+ exists(OldInstruction oldInstr |
+ oldInstr = getOldInstruction(instr) and
+ oldInstr.hasSortKeys(key1, key2)
+ )
+ or
+ instr instanceof TUnreachedInstruction and
+ key1 = maxValue() and
+ key2 = maxValue()
+ }
+
+ /**
+ * Returns the value of the maximum representable integer.
+ */
+ cached
+ int maxValue() { result = 2147483647 }
+}
+
+module SSAConsistency {
+ /**
+ * Holds if a `MemoryOperand` has more than one `MemoryLocation` assigned by alias analysis.
+ */
+ query predicate multipleOperandMemoryLocations(
+ OldIR::MemoryOperand operand, string message, OldIR::IRFunction func, string funcText
+ ) {
+ exists(int locationCount |
+ locationCount = strictcount(Alias::getOperandMemoryLocation(operand)) and
+ locationCount > 1 and
+ func = operand.getEnclosingIRFunction() and
+ funcText = Language::getIdentityString(func.getFunction()) and
+ message =
+ operand.getUse().toString() + " " + "Operand has " + locationCount.toString() +
+ " memory accesses in function '$@': " +
+ strictconcat(Alias::getOperandMemoryLocation(operand).toString(), ", ")
+ )
+ }
+
+ /**
+ * Holds if a `MemoryLocation` does not have an associated `VirtualVariable`.
+ */
+ query predicate missingVirtualVariableForMemoryLocation(
+ Alias::MemoryLocation location, string message, OldIR::IRFunction func, string funcText
+ ) {
+ not exists(location.getVirtualVariable()) and
+ func = location.getIRFunction() and
+ funcText = Language::getIdentityString(func.getFunction()) and
+ message = "Memory location has no virtual variable in function '$@'."
+ }
+
+ /**
+ * Holds if a `MemoryLocation` is a member of more than one `VirtualVariable`.
+ */
+ query predicate multipleVirtualVariablesForMemoryLocation(
+ Alias::MemoryLocation location, string message, OldIR::IRFunction func, string funcText
+ ) {
+ exists(int vvarCount |
+ vvarCount = strictcount(location.getVirtualVariable()) and
+ vvarCount > 1 and
+ func = location.getIRFunction() and
+ funcText = Language::getIdentityString(func.getFunction()) and
+ message =
+ "Memory location has " + vvarCount.toString() + " virtual variables in function '$@': (" +
+ concat(Alias::VirtualVariable vvar |
+ vvar = location.getVirtualVariable()
+ |
+ vvar.toString(), ", "
+ ) + ")."
+ )
+ }
+}
+
+/**
+ * Provides the portion of the parameterized IR interface that is used to construct the SSA stages
+ * of the IR. The raw stage of the IR does not expose these predicates.
+ * These predicates are all just aliases for predicates defined in the `Cached` module. This ensures
+ * that all of SSA construction will be evaluated in the same stage.
+ */
+module SSA {
+ class MemoryLocation = Alias::MemoryLocation;
+
+ predicate hasPhiInstruction = Cached::hasPhiInstructionCached/2;
+
+ predicate hasChiInstruction = Cached::hasChiInstructionCached/1;
+
+ predicate hasUnreachedInstruction = Cached::hasUnreachedInstructionCached/1;
+}
diff --git a/repo-tests/codeql/cpp/ql/lib/semmle/code/cpp/ir/implementation/aliased_ssa/internal/SSAConstructionImports.qll b/repo-tests/codeql/cpp/ql/lib/semmle/code/cpp/ir/implementation/aliased_ssa/internal/SSAConstructionImports.qll
new file mode 100644
index 00000000000..219180d9f4d
--- /dev/null
+++ b/repo-tests/codeql/cpp/ql/lib/semmle/code/cpp/ir/implementation/aliased_ssa/internal/SSAConstructionImports.qll
@@ -0,0 +1,6 @@
+import semmle.code.cpp.ir.implementation.Opcode as Opcode
+import semmle.code.cpp.ir.implementation.internal.OperandTag as OperandTag
+import semmle.code.cpp.ir.internal.Overlap as Overlap
+import semmle.code.cpp.ir.implementation.internal.TInstruction as TInstruction
+import semmle.code.cpp.ir.implementation.raw.IR as RawIR
+import semmle.code.cpp.ir.implementation.internal.TOperand as TOperand
diff --git a/repo-tests/codeql/cpp/ql/lib/semmle/code/cpp/ir/implementation/aliased_ssa/internal/SSAConstructionInternal.qll b/repo-tests/codeql/cpp/ql/lib/semmle/code/cpp/ir/implementation/aliased_ssa/internal/SSAConstructionInternal.qll
new file mode 100644
index 00000000000..a1ce2629cc2
--- /dev/null
+++ b/repo-tests/codeql/cpp/ql/lib/semmle/code/cpp/ir/implementation/aliased_ssa/internal/SSAConstructionInternal.qll
@@ -0,0 +1,8 @@
+import semmle.code.cpp.ir.implementation.unaliased_ssa.IR as OldIR
+import semmle.code.cpp.ir.implementation.unaliased_ssa.internal.reachability.ReachableBlock as Reachability
+import semmle.code.cpp.ir.implementation.unaliased_ssa.internal.reachability.Dominance as Dominance
+import semmle.code.cpp.ir.implementation.aliased_ssa.IR as NewIR
+import semmle.code.cpp.ir.implementation.internal.TInstruction::AliasedSSAInstructions as SSAInstructions
+import semmle.code.cpp.ir.internal.IRCppLanguage as Language
+import AliasedSSA as Alias
+import semmle.code.cpp.ir.implementation.internal.TOperand::AliasedSSAOperands as SSAOperands
diff --git a/repo-tests/codeql/cpp/ql/lib/semmle/code/cpp/ir/implementation/internal/EdgeKindInternal.qll b/repo-tests/codeql/cpp/ql/lib/semmle/code/cpp/ir/implementation/internal/EdgeKindInternal.qll
new file mode 100644
index 00000000000..bd6c2f4c151
--- /dev/null
+++ b/repo-tests/codeql/cpp/ql/lib/semmle/code/cpp/ir/implementation/internal/EdgeKindInternal.qll
@@ -0,0 +1 @@
+import semmle.code.cpp.ir.internal.IRCppLanguage as Language
diff --git a/repo-tests/codeql/cpp/ql/lib/semmle/code/cpp/ir/implementation/internal/IRConfigurationInternal.qll b/repo-tests/codeql/cpp/ql/lib/semmle/code/cpp/ir/implementation/internal/IRConfigurationInternal.qll
new file mode 100644
index 00000000000..bd6c2f4c151
--- /dev/null
+++ b/repo-tests/codeql/cpp/ql/lib/semmle/code/cpp/ir/implementation/internal/IRConfigurationInternal.qll
@@ -0,0 +1 @@
+import semmle.code.cpp.ir.internal.IRCppLanguage as Language
diff --git a/repo-tests/codeql/cpp/ql/lib/semmle/code/cpp/ir/implementation/internal/IRFunctionBase.qll b/repo-tests/codeql/cpp/ql/lib/semmle/code/cpp/ir/implementation/internal/IRFunctionBase.qll
new file mode 100644
index 00000000000..60895ce3d26
--- /dev/null
+++ b/repo-tests/codeql/cpp/ql/lib/semmle/code/cpp/ir/implementation/internal/IRFunctionBase.qll
@@ -0,0 +1,27 @@
+/**
+ * Provides a base class, `IRFunctionBase`, for the stage-independent portions of `IRFunction`.
+ */
+
+private import IRFunctionBaseInternal
+
+private newtype TIRFunction =
+ MkIRFunction(Language::Function func) { IRConstruction::Raw::functionHasIR(func) }
+
+/**
+ * The IR for a function. This base class contains only the predicates that are the same between all
+ * phases of the IR. Each instantiation of `IRFunction` extends this class.
+ */
+class IRFunctionBase extends TIRFunction {
+ Language::Function func;
+
+ IRFunctionBase() { this = MkIRFunction(func) }
+
+ /** Gets a textual representation of this element. */
+ final string toString() { result = "IR: " + func.toString() }
+
+ /** Gets the function whose IR is represented. */
+ final Language::Function getFunction() { result = func }
+
+ /** Gets the location of the function. */
+ final Language::Location getLocation() { result = func.getLocation() }
+}
diff --git a/repo-tests/codeql/cpp/ql/lib/semmle/code/cpp/ir/implementation/internal/IRFunctionBaseInternal.qll b/repo-tests/codeql/cpp/ql/lib/semmle/code/cpp/ir/implementation/internal/IRFunctionBaseInternal.qll
new file mode 100644
index 00000000000..cc1bdb6444b
--- /dev/null
+++ b/repo-tests/codeql/cpp/ql/lib/semmle/code/cpp/ir/implementation/internal/IRFunctionBaseInternal.qll
@@ -0,0 +1,2 @@
+import semmle.code.cpp.ir.internal.IRCppLanguage as Language
+import semmle.code.cpp.ir.implementation.raw.internal.IRConstruction as IRConstruction
diff --git a/repo-tests/codeql/cpp/ql/lib/semmle/code/cpp/ir/implementation/internal/IRTypeInternal.qll b/repo-tests/codeql/cpp/ql/lib/semmle/code/cpp/ir/implementation/internal/IRTypeInternal.qll
new file mode 100644
index 00000000000..bd6c2f4c151
--- /dev/null
+++ b/repo-tests/codeql/cpp/ql/lib/semmle/code/cpp/ir/implementation/internal/IRTypeInternal.qll
@@ -0,0 +1 @@
+import semmle.code.cpp.ir.internal.IRCppLanguage as Language
diff --git a/repo-tests/codeql/cpp/ql/lib/semmle/code/cpp/ir/implementation/internal/OpcodeImports.qll b/repo-tests/codeql/cpp/ql/lib/semmle/code/cpp/ir/implementation/internal/OpcodeImports.qll
new file mode 100644
index 00000000000..809bdcc867c
--- /dev/null
+++ b/repo-tests/codeql/cpp/ql/lib/semmle/code/cpp/ir/implementation/internal/OpcodeImports.qll
@@ -0,0 +1 @@
+import semmle.code.cpp.ir.implementation.MemoryAccessKind as MemoryAccessKind
diff --git a/repo-tests/codeql/cpp/ql/lib/semmle/code/cpp/ir/implementation/internal/OperandTag.qll b/repo-tests/codeql/cpp/ql/lib/semmle/code/cpp/ir/implementation/internal/OperandTag.qll
new file mode 100644
index 00000000000..21dfedd95cd
--- /dev/null
+++ b/repo-tests/codeql/cpp/ql/lib/semmle/code/cpp/ir/implementation/internal/OperandTag.qll
@@ -0,0 +1,298 @@
+/**
+ * Defines the set of possible `OperandTag`s, which are used to identify the role each `Operand`
+ * plays in the evaluation of its `Instruction`.
+ */
+
+private import OperandTagInternal
+
+private newtype TOperandTag =
+ TAddressOperand() or
+ TBufferSizeOperand() or
+ TSideEffectOperand() or
+ TLoadOperand() or
+ TStoreValueOperand() or
+ TUnaryOperand() or
+ TLeftOperand() or
+ TRightOperand() or
+ TConditionOperand() or
+ TCallTargetOperand() or
+ TThisArgumentOperand() or
+ TPositionalArgumentOperand(int argIndex) { Language::hasPositionalArgIndex(argIndex) } or
+ TChiTotalOperand() or
+ TChiPartialOperand() or
+ TAsmOperand(int index) { Language::hasAsmOperandIndex(index) }
+
+/**
+ * Identifies the kind of operand on an instruction. Each `Instruction` has at
+ * most one operand of any single `OperandTag`. The set of `OperandTag`s used by
+ * an `Instruction` is determined by the instruction's opcode.
+ */
+abstract class OperandTag extends TOperandTag {
+ /** Gets a textual representation of this operand tag */
+ abstract string toString();
+
+ /**
+ * Gets an integer that represents where this this operand will appear in the operand list of an
+ * instruction when the IR is printed.
+ */
+ abstract int getSortOrder();
+
+ /**
+ * Gets a label that will appear before the operand when the IR is printed.
+ */
+ final string getLabel() { if alwaysPrintLabel() then result = getId() + ":" else result = "" }
+
+ /**
+ * Gets an identifier that uniquely identifies this operand within its instruction.
+ */
+ abstract string getId();
+
+ /**
+ * Holds if the operand should always be prefixed with its label in the dump of its instruction.
+ */
+ predicate alwaysPrintLabel() { none() }
+}
+
+/**
+ * An operand that consumes a memory result (e.g. the `LoadOperand` on a `Load` instruction).
+ */
+abstract class MemoryOperandTag extends OperandTag { }
+
+/**
+ * An operand that consumes a register (non-memory) result.
+ */
+abstract class RegisterOperandTag extends OperandTag { }
+
+/**
+ * A memory operand whose type may be different from the result type of its definition instruction.
+ */
+abstract class TypedOperandTag extends MemoryOperandTag { }
+
+// Note: individual subtypes are listed in the order that the operands should
+// appear in the operand list of the instruction when the IR is printed.
+/**
+ * The address operand of an instruction that loads or stores a value from
+ * memory (e.g. `Load`, `Store`, `InitializeParameter`, `IndirectReadSideEffect`).
+ */
+class AddressOperandTag extends RegisterOperandTag, TAddressOperand {
+ final override string toString() { result = "Address" }
+
+ final override int getSortOrder() { result = 0 }
+
+ final override predicate alwaysPrintLabel() { any() }
+
+ final override string getId() { result = "&" }
+}
+
+AddressOperandTag addressOperand() { result = TAddressOperand() }
+
+/**
+ * The buffer size operand of an instruction that represents a read or write of
+ * a buffer.
+ */
+class BufferSizeOperandTag extends RegisterOperandTag, TBufferSizeOperand {
+ final override string toString() { result = "BufferSize" }
+
+ final override int getSortOrder() { result = 1 }
+
+ final override string getId() { result = "size" }
+}
+
+BufferSizeOperandTag bufferSizeOperand() { result = TBufferSizeOperand() }
+
+/**
+ * The operand representing the read side effect of a `SideEffectInstruction`.
+ */
+class SideEffectOperandTag extends TypedOperandTag, TSideEffectOperand {
+ final override string toString() { result = "SideEffect" }
+
+ final override int getSortOrder() { result = 2 }
+
+ final override string getId() { result = "side_effect" }
+}
+
+SideEffectOperandTag sideEffectOperand() { result = TSideEffectOperand() }
+
+/**
+ * The source value operand of an instruction that loads a value from memory (e.g. `Load`,
+ * `ReturnValue`, `ThrowValue`).
+ */
+class LoadOperandTag extends TypedOperandTag, TLoadOperand {
+ final override string toString() { result = "Load" }
+
+ final override int getSortOrder() { result = 3 }
+
+ final override string getId() { result = "load" }
+}
+
+LoadOperandTag loadOperand() { result = TLoadOperand() }
+
+/**
+ * The source value operand of a `Store` instruction.
+ */
+class StoreValueOperandTag extends RegisterOperandTag, TStoreValueOperand {
+ final override string toString() { result = "StoreValue" }
+
+ final override int getSortOrder() { result = 4 }
+
+ final override string getId() { result = "store" }
+}
+
+StoreValueOperandTag storeValueOperand() { result = TStoreValueOperand() }
+
+/**
+ * The sole operand of a unary instruction (e.g. `Convert`, `Negate`, `Copy`).
+ */
+class UnaryOperandTag extends RegisterOperandTag, TUnaryOperand {
+ final override string toString() { result = "Unary" }
+
+ final override int getSortOrder() { result = 5 }
+
+ final override string getId() { result = "unary" }
+}
+
+UnaryOperandTag unaryOperand() { result = TUnaryOperand() }
+
+/**
+ * The left operand of a binary instruction (e.g. `Add`, `CompareEQ`).
+ */
+class LeftOperandTag extends RegisterOperandTag, TLeftOperand {
+ final override string toString() { result = "Left" }
+
+ final override int getSortOrder() { result = 6 }
+
+ final override string getId() { result = "left" }
+}
+
+LeftOperandTag leftOperand() { result = TLeftOperand() }
+
+/**
+ * The right operand of a binary instruction (e.g. `Add`, `CompareEQ`).
+ */
+class RightOperandTag extends RegisterOperandTag, TRightOperand {
+ final override string toString() { result = "Right" }
+
+ final override int getSortOrder() { result = 7 }
+
+ final override string getId() { result = "right" }
+}
+
+RightOperandTag rightOperand() { result = TRightOperand() }
+
+/**
+ * The condition operand of a `ConditionalBranch` or `Switch` instruction.
+ */
+class ConditionOperandTag extends RegisterOperandTag, TConditionOperand {
+ final override string toString() { result = "Condition" }
+
+ final override int getSortOrder() { result = 8 }
+
+ final override string getId() { result = "cond" }
+}
+
+ConditionOperandTag conditionOperand() { result = TConditionOperand() }
+
+/**
+ * The operand representing the target function of an `Call` instruction.
+ */
+class CallTargetOperandTag extends RegisterOperandTag, TCallTargetOperand {
+ final override string toString() { result = "CallTarget" }
+
+ final override int getSortOrder() { result = 10 }
+
+ final override predicate alwaysPrintLabel() { any() }
+
+ final override string getId() { result = "func" }
+}
+
+CallTargetOperandTag callTargetOperand() { result = TCallTargetOperand() }
+
+/**
+ * An operand representing an argument to a function call. This includes both
+ * positional arguments (represented by `PositionalArgumentOperand`) and the
+ * implicit `this` argument, if any (represented by `ThisArgumentOperand`).
+ */
+abstract class ArgumentOperandTag extends RegisterOperandTag { }
+
+/**
+ * An operand representing the implicit 'this' argument to a member function
+ * call.
+ */
+class ThisArgumentOperandTag extends ArgumentOperandTag, TThisArgumentOperand {
+ ThisArgumentOperandTag() { this = TThisArgumentOperand() }
+
+ final override string toString() { result = "Arg(this)" }
+
+ final override int getSortOrder() { result = 11 }
+
+ final override predicate alwaysPrintLabel() { any() }
+
+ final override string getId() { result = "this" }
+}
+
+ThisArgumentOperandTag thisArgumentOperand() { result = TThisArgumentOperand() }
+
+/**
+ * An operand representing an argument to a function call.
+ */
+class PositionalArgumentOperandTag extends ArgumentOperandTag, TPositionalArgumentOperand {
+ int argIndex;
+
+ PositionalArgumentOperandTag() { this = TPositionalArgumentOperand(argIndex) }
+
+ final override string toString() { result = "Arg(" + argIndex + ")" }
+
+ final override int getSortOrder() { result = 12 + argIndex }
+
+ final override predicate alwaysPrintLabel() { any() }
+
+ final int getArgIndex() { result = argIndex }
+
+ final override string getId() { result = argIndex.toString() }
+}
+
+PositionalArgumentOperandTag positionalArgumentOperand(int argIndex) {
+ result = TPositionalArgumentOperand(argIndex)
+}
+
+abstract class ChiOperandTag extends MemoryOperandTag { }
+
+class ChiTotalOperandTag extends ChiOperandTag, TChiTotalOperand {
+ final override string toString() { result = "ChiTotal" }
+
+ final override int getSortOrder() { result = 13 }
+
+ final override predicate alwaysPrintLabel() { any() }
+
+ final override string getId() { result = "total" }
+}
+
+ChiTotalOperandTag chiTotalOperand() { result = TChiTotalOperand() }
+
+class ChiPartialOperandTag extends ChiOperandTag, TChiPartialOperand {
+ final override string toString() { result = "ChiPartial" }
+
+ final override int getSortOrder() { result = 14 }
+
+ final override predicate alwaysPrintLabel() { any() }
+
+ final override string getId() { result = "partial" }
+}
+
+ChiPartialOperandTag chiPartialOperand() { result = TChiPartialOperand() }
+
+class AsmOperandTag extends RegisterOperandTag, TAsmOperand {
+ int index;
+
+ AsmOperandTag() { this = TAsmOperand(index) }
+
+ final override string toString() { result = "AsmOperand(" + index + ")" }
+
+ final override int getSortOrder() { result = 15 + index }
+
+ final override predicate alwaysPrintLabel() { any() }
+
+ final override string getId() { result = index.toString() }
+}
+
+AsmOperandTag asmOperand(int index) { result = TAsmOperand(index) }
diff --git a/repo-tests/codeql/cpp/ql/lib/semmle/code/cpp/ir/implementation/internal/OperandTagInternal.qll b/repo-tests/codeql/cpp/ql/lib/semmle/code/cpp/ir/implementation/internal/OperandTagInternal.qll
new file mode 100644
index 00000000000..bd6c2f4c151
--- /dev/null
+++ b/repo-tests/codeql/cpp/ql/lib/semmle/code/cpp/ir/implementation/internal/OperandTagInternal.qll
@@ -0,0 +1 @@
+import semmle.code.cpp.ir.internal.IRCppLanguage as Language
diff --git a/repo-tests/codeql/cpp/ql/lib/semmle/code/cpp/ir/implementation/internal/TIRVariable.qll b/repo-tests/codeql/cpp/ql/lib/semmle/code/cpp/ir/implementation/internal/TIRVariable.qll
new file mode 100644
index 00000000000..12a0c6e7898
--- /dev/null
+++ b/repo-tests/codeql/cpp/ql/lib/semmle/code/cpp/ir/implementation/internal/TIRVariable.qll
@@ -0,0 +1,23 @@
+private import TIRVariableInternal
+private import Imports::TempVariableTag
+
+newtype TIRVariable =
+ TIRUserVariable(Language::Variable var, Language::LanguageType type, Language::Function func) {
+ Construction::hasUserVariable(func, var, type)
+ } or
+ TIRTempVariable(
+ Language::Function func, Language::AST ast, TempVariableTag tag, Language::LanguageType type
+ ) {
+ Construction::hasTempVariable(func, ast, tag, type)
+ } or
+ TIRDynamicInitializationFlag(
+ Language::Function func, Language::Variable var, Language::LanguageType type
+ ) {
+ Construction::hasDynamicInitializationFlag(func, var, type)
+ } or
+ TIRStringLiteral(
+ Language::Function func, Language::AST ast, Language::LanguageType type,
+ Language::StringLiteral literal
+ ) {
+ Construction::hasStringLiteral(func, ast, type, literal)
+ }
diff --git a/repo-tests/codeql/cpp/ql/lib/semmle/code/cpp/ir/implementation/internal/TIRVariableInternal.qll b/repo-tests/codeql/cpp/ql/lib/semmle/code/cpp/ir/implementation/internal/TIRVariableInternal.qll
new file mode 100644
index 00000000000..7984c4883fd
--- /dev/null
+++ b/repo-tests/codeql/cpp/ql/lib/semmle/code/cpp/ir/implementation/internal/TIRVariableInternal.qll
@@ -0,0 +1,7 @@
+import semmle.code.cpp.ir.internal.IRCppLanguage as Language
+import semmle.code.cpp.ir.implementation.raw.internal.IRConstruction::Raw as Construction
+private import semmle.code.cpp.ir.implementation.TempVariableTag as TempVariableTag_
+
+module Imports {
+ module TempVariableTag = TempVariableTag_;
+}
diff --git a/repo-tests/codeql/cpp/ql/lib/semmle/code/cpp/ir/implementation/internal/TInstruction.qll b/repo-tests/codeql/cpp/ql/lib/semmle/code/cpp/ir/implementation/internal/TInstruction.qll
new file mode 100644
index 00000000000..4b3f19cbdde
--- /dev/null
+++ b/repo-tests/codeql/cpp/ql/lib/semmle/code/cpp/ir/implementation/internal/TInstruction.qll
@@ -0,0 +1,103 @@
+private import TInstructionInternal
+private import IRFunctionBase
+private import TInstructionImports as Imports
+private import Imports::IRType
+private import Imports::Opcode
+
+/**
+ * An IR instruction. `TInstruction` is shared across all phases of the IR. There are individual
+ * branches of this type for instructions created directly from the AST (`TRawInstruction`) and for
+ * instructions added by each stage of SSA construction (`T*PhiInstruction`, `T*ChiInstruction`,
+ * `T*UnreachedInstruction`). Each stage then defines a `TStageInstruction` type that is a union of
+ * all of the branches that can appear in that particular stage. The public `Instruction` class for
+ * each phase extends the `TStageInstruction` type for that stage.
+ */
+cached
+newtype TInstruction =
+ TRawInstruction(
+ IRConstruction::Raw::InstructionTag1 tag1, IRConstruction::Raw::InstructionTag2 tag2
+ ) {
+ IRConstruction::Raw::hasInstruction(tag1, tag2)
+ } or
+ TUnaliasedSSAPhiInstruction(
+ TRawInstruction blockStartInstr, UnaliasedSSA::SSA::MemoryLocation memoryLocation
+ ) {
+ UnaliasedSSA::SSA::hasPhiInstruction(blockStartInstr, memoryLocation)
+ } or
+ TUnaliasedSSAChiInstruction(TRawInstruction primaryInstruction) { none() } or
+ TUnaliasedSSAUnreachedInstruction(IRFunctionBase irFunc) {
+ UnaliasedSSA::SSA::hasUnreachedInstruction(irFunc)
+ } or
+ TAliasedSSAPhiInstruction(
+ TRawInstruction blockStartInstr, AliasedSSA::SSA::MemoryLocation memoryLocation
+ ) {
+ AliasedSSA::SSA::hasPhiInstruction(blockStartInstr, memoryLocation)
+ } or
+ TAliasedSSAChiInstruction(TRawInstruction primaryInstruction) {
+ AliasedSSA::SSA::hasChiInstruction(primaryInstruction)
+ } or
+ TAliasedSSAUnreachedInstruction(IRFunctionBase irFunc) {
+ AliasedSSA::SSA::hasUnreachedInstruction(irFunc)
+ }
+
+/**
+ * Provides wrappers for the constructors of each branch of `TInstruction` that is used by the
+ * unaliased SSA stage.
+ * These wrappers are not parameterized because it is not possible to invoke an IPA constructor via
+ * a class alias.
+ */
+module UnaliasedSSAInstructions {
+ class TPhiInstruction = TUnaliasedSSAPhiInstruction;
+
+ TPhiInstruction phiInstruction(
+ TRawInstruction blockStartInstr, UnaliasedSSA::SSA::MemoryLocation memoryLocation
+ ) {
+ result = TUnaliasedSSAPhiInstruction(blockStartInstr, memoryLocation)
+ }
+
+ TRawInstruction reusedPhiInstruction(TRawInstruction blockStartInstr) { none() }
+
+ class TChiInstruction = TUnaliasedSSAChiInstruction;
+
+ TChiInstruction chiInstruction(TRawInstruction primaryInstruction) {
+ result = TUnaliasedSSAChiInstruction(primaryInstruction)
+ }
+
+ class TUnreachedInstruction = TUnaliasedSSAUnreachedInstruction;
+
+ TUnreachedInstruction unreachedInstruction(IRFunctionBase irFunc) {
+ result = TUnaliasedSSAUnreachedInstruction(irFunc)
+ }
+}
+
+/**
+ * Provides wrappers for the constructors of each branch of `TInstruction` that is used by the
+ * aliased SSA stage.
+ * These wrappers are not parameterized because it is not possible to invoke an IPA constructor via
+ * a class alias.
+ */
+module AliasedSSAInstructions {
+ class TPhiInstruction = TAliasedSSAPhiInstruction or TUnaliasedSSAPhiInstruction;
+
+ TPhiInstruction phiInstruction(
+ TRawInstruction blockStartInstr, AliasedSSA::SSA::MemoryLocation memoryLocation
+ ) {
+ result = TAliasedSSAPhiInstruction(blockStartInstr, memoryLocation)
+ }
+
+ TPhiInstruction reusedPhiInstruction(TRawInstruction blockStartInstr) {
+ result = TUnaliasedSSAPhiInstruction(blockStartInstr, _)
+ }
+
+ class TChiInstruction = TAliasedSSAChiInstruction;
+
+ TChiInstruction chiInstruction(TRawInstruction primaryInstruction) {
+ result = TAliasedSSAChiInstruction(primaryInstruction)
+ }
+
+ class TUnreachedInstruction = TAliasedSSAUnreachedInstruction;
+
+ TUnreachedInstruction unreachedInstruction(IRFunctionBase irFunc) {
+ result = TAliasedSSAUnreachedInstruction(irFunc)
+ }
+}
diff --git a/repo-tests/codeql/cpp/ql/lib/semmle/code/cpp/ir/implementation/internal/TInstructionImports.qll b/repo-tests/codeql/cpp/ql/lib/semmle/code/cpp/ir/implementation/internal/TInstructionImports.qll
new file mode 100644
index 00000000000..e008ce7d8d3
--- /dev/null
+++ b/repo-tests/codeql/cpp/ql/lib/semmle/code/cpp/ir/implementation/internal/TInstructionImports.qll
@@ -0,0 +1,2 @@
+import semmle.code.cpp.ir.implementation.IRType as IRType
+import semmle.code.cpp.ir.implementation.Opcode as Opcode
diff --git a/repo-tests/codeql/cpp/ql/lib/semmle/code/cpp/ir/implementation/internal/TInstructionInternal.qll b/repo-tests/codeql/cpp/ql/lib/semmle/code/cpp/ir/implementation/internal/TInstructionInternal.qll
new file mode 100644
index 00000000000..adaaaca9cd8
--- /dev/null
+++ b/repo-tests/codeql/cpp/ql/lib/semmle/code/cpp/ir/implementation/internal/TInstructionInternal.qll
@@ -0,0 +1,4 @@
+import semmle.code.cpp.ir.internal.IRCppLanguage as Language
+import semmle.code.cpp.ir.implementation.raw.internal.IRConstruction as IRConstruction
+import semmle.code.cpp.ir.implementation.unaliased_ssa.internal.SSAConstruction as UnaliasedSSA
+import semmle.code.cpp.ir.implementation.aliased_ssa.internal.SSAConstruction as AliasedSSA
diff --git a/repo-tests/codeql/cpp/ql/lib/semmle/code/cpp/ir/implementation/internal/TOperand.qll b/repo-tests/codeql/cpp/ql/lib/semmle/code/cpp/ir/implementation/internal/TOperand.qll
new file mode 100644
index 00000000000..e86494af03a
--- /dev/null
+++ b/repo-tests/codeql/cpp/ql/lib/semmle/code/cpp/ir/implementation/internal/TOperand.qll
@@ -0,0 +1,208 @@
+private import TInstruction
+private import OperandTag
+private import semmle.code.cpp.ir.implementation.raw.internal.IRConstruction as RawConstruction
+private import semmle.code.cpp.ir.implementation.unaliased_ssa.internal.SSAConstruction as UnaliasedConstruction
+private import semmle.code.cpp.ir.implementation.aliased_ssa.internal.SSAConstruction as AliasedConstruction
+private import semmle.code.cpp.ir.implementation.raw.IR as Raw
+private import semmle.code.cpp.ir.implementation.unaliased_ssa.IR as Unaliased
+private import semmle.code.cpp.ir.implementation.aliased_ssa.IR as Aliased
+private import semmle.code.cpp.ir.internal.Overlap
+
+/**
+ * Provides the newtype used to represent operands across all phases of the IR.
+ */
+private module Internal {
+ /**
+ * An IR operand. `TOperand` is shared across all phases of the IR. There are branches of this
+ * type for operands created directly from the AST (`TRegisterOperand` and `TNonSSAMemoryOperand`),
+ * for operands computed by each stage of SSA construction (`T*PhiOperand` and
+ * `TAliasedChiOperand`), and a placehold branch for operands that do not exist in a given
+ * stage of IR construction (`TNoOperand`).
+ */
+ cached
+ newtype TOperand =
+ // RAW
+ TRegisterOperand(TRawInstruction useInstr, RegisterOperandTag tag, TRawInstruction defInstr) {
+ defInstr = RawConstruction::getRegisterOperandDefinition(useInstr, tag) and
+ not RawConstruction::isInCycle(useInstr) and
+ strictcount(RawConstruction::getRegisterOperandDefinition(useInstr, tag)) = 1
+ } or
+ // Placeholder for Phi and Chi operands in stages that don't have the corresponding instructions
+ TNoOperand() { none() } or
+ // Can be "removed" later when there's unreachable code
+ // These operands can be reused across all three stages. They just get different defs.
+ TNonSSAMemoryOperand(Raw::Instruction useInstr, MemoryOperandTag tag) {
+ // Has no definition in raw but will get definitions later
+ useInstr.getOpcode().hasOperand(tag)
+ } or
+ TUnaliasedPhiOperand(
+ Unaliased::PhiInstruction useInstr, Unaliased::IRBlock predecessorBlock, Overlap overlap
+ ) {
+ exists(UnaliasedConstruction::getPhiOperandDefinition(useInstr, predecessorBlock, overlap))
+ } or
+ //// ALIASED
+ ////
+ // Until we share SSA, these will be all the phis there are. With SSA
+ // sharing, these will add to the ones that are already there.
+ // If we share SSA, be careful with the case where we remove all possible
+ // indirect writes to a variable because they're dead code. In that case it's
+ // important that we use the same definition of "is variable aliased" across
+ // the phases.
+ TAliasedPhiOperand(
+ TAliasedSSAPhiInstruction useInstr, Aliased::IRBlock predecessorBlock, Overlap overlap
+ ) {
+ exists(AliasedConstruction::getPhiOperandDefinition(useInstr, predecessorBlock, overlap))
+ } or
+ TAliasedChiOperand(TAliasedSSAChiInstruction useInstr, ChiOperandTag tag) { any() }
+}
+
+/**
+ * Reexports some branches from `TOperand` so they can be used in stage modules without importing
+ * `TOperand` itself.
+ */
+private module Shared {
+ class TRegisterOperand = Internal::TRegisterOperand;
+
+ /**
+ * Returns the register operand with the specified parameters.
+ */
+ TRegisterOperand registerOperand(
+ TRawInstruction useInstr, RegisterOperandTag tag, TRawInstruction defInstr
+ ) {
+ result = Internal::TRegisterOperand(useInstr, tag, defInstr)
+ }
+
+ class TNonSSAMemoryOperand = Internal::TNonSSAMemoryOperand;
+
+ /**
+ * Returns the non-Phi memory operand with the specified parameters.
+ */
+ TNonSSAMemoryOperand nonSSAMemoryOperand(TRawInstruction useInstr, MemoryOperandTag tag) {
+ result = Internal::TNonSSAMemoryOperand(useInstr, tag)
+ }
+}
+
+/**
+ * Provides wrappers for the constructors of each branch of `TOperand` that is used by the
+ * raw IR stage.
+ * These wrappers are not parameterized because it is not possible to invoke an IPA constructor via
+ * a class alias.
+ */
+module RawOperands {
+ import Shared
+
+ class TPhiOperand = Internal::TNoOperand;
+
+ class TChiOperand = Internal::TNoOperand;
+
+ class TNonPhiMemoryOperand = TNonSSAMemoryOperand or TChiOperand;
+
+ /**
+ * Returns the Phi operand with the specified parameters.
+ */
+ TPhiOperand phiOperand(
+ Raw::PhiInstruction useInstr, Raw::Instruction defInstr, Raw::IRBlock predecessorBlock,
+ Overlap overlap
+ ) {
+ none()
+ }
+
+ TPhiOperand reusedPhiOperand(
+ Raw::PhiInstruction useInstr, Raw::Instruction defInstr, Raw::IRBlock predecessorBlock,
+ Overlap overlap
+ ) {
+ none()
+ }
+
+ /**
+ * Returns the Chi operand with the specified parameters.
+ */
+ TChiOperand chiOperand(Raw::Instruction useInstr, ChiOperandTag tag) { none() }
+}
+
+/**
+ * Provides wrappers for the constructors of each branch of `TOperand` that is used by the
+ * unaliased SSA stage.
+ * These wrappers are not parameterized because it is not possible to invoke an IPA constructor via
+ * a class alias.
+ */
+module UnaliasedSSAOperands {
+ import Shared
+
+ class TPhiOperand = Internal::TUnaliasedPhiOperand;
+
+ class TChiOperand = Internal::TNoOperand;
+
+ class TNonPhiMemoryOperand = TNonSSAMemoryOperand or TChiOperand;
+
+ /**
+ * Returns the Phi operand with the specified parameters.
+ */
+ TPhiOperand phiOperand(
+ Unaliased::PhiInstruction useInstr, Unaliased::Instruction defInstr,
+ Unaliased::IRBlock predecessorBlock, Overlap overlap
+ ) {
+ defInstr = UnaliasedConstruction::getPhiOperandDefinition(useInstr, predecessorBlock, overlap) and
+ result = Internal::TUnaliasedPhiOperand(useInstr, predecessorBlock, overlap)
+ }
+
+ TPhiOperand reusedPhiOperand(
+ Unaliased::PhiInstruction useInstr, Unaliased::Instruction defInstr,
+ Unaliased::IRBlock predecessorBlock, Overlap overlap
+ ) {
+ none()
+ }
+
+ /**
+ * Returns the Chi operand with the specified parameters.
+ */
+ TChiOperand chiOperand(Unaliased::Instruction useInstr, ChiOperandTag tag) { none() }
+}
+
+/**
+ * Provides wrappers for the constructors of each branch of `TOperand` that is used by the
+ * asliased SSA stage.
+ * These wrappers are not parameterized because it is not possible to invoke an IPA constructor via
+ * a class alias.
+ */
+module AliasedSSAOperands {
+ import Shared
+
+ class TPhiOperand = Internal::TAliasedPhiOperand or Internal::TUnaliasedPhiOperand;
+
+ class TChiOperand = Internal::TAliasedChiOperand;
+
+ class TNonPhiMemoryOperand = TNonSSAMemoryOperand or TChiOperand;
+
+ /**
+ * Returns the Phi operand with the specified parameters.
+ */
+ TPhiOperand phiOperand(
+ Aliased::PhiInstruction useInstr, Aliased::Instruction defInstr,
+ Aliased::IRBlock predecessorBlock, Overlap overlap
+ ) {
+ defInstr = AliasedConstruction::getPhiOperandDefinition(useInstr, predecessorBlock, overlap) and
+ result = Internal::TAliasedPhiOperand(useInstr, predecessorBlock, overlap)
+ }
+
+ /**
+ * Returns the Phi operand with the specified parameters.
+ */
+ TPhiOperand reusedPhiOperand(
+ Aliased::PhiInstruction useInstr, Aliased::Instruction defInstr,
+ Aliased::IRBlock predecessorBlock, Overlap overlap
+ ) {
+ exists(Unaliased::IRBlock oldBlock |
+ predecessorBlock = AliasedConstruction::getNewBlock(oldBlock) and
+ result = Internal::TUnaliasedPhiOperand(useInstr, oldBlock, _) and
+ defInstr = AliasedConstruction::getPhiOperandDefinition(useInstr, predecessorBlock, overlap)
+ )
+ }
+
+ /**
+ * Returns the Chi operand with the specified parameters.
+ */
+ TChiOperand chiOperand(TAliasedSSAChiInstruction useInstr, ChiOperandTag tag) {
+ result = Internal::TAliasedChiOperand(useInstr, tag)
+ }
+}
diff --git a/repo-tests/codeql/cpp/ql/lib/semmle/code/cpp/ir/implementation/internal/TempVariableTagInternal.qll b/repo-tests/codeql/cpp/ql/lib/semmle/code/cpp/ir/implementation/internal/TempVariableTagInternal.qll
new file mode 100644
index 00000000000..c5e07a351ab
--- /dev/null
+++ b/repo-tests/codeql/cpp/ql/lib/semmle/code/cpp/ir/implementation/internal/TempVariableTagInternal.qll
@@ -0,0 +1,6 @@
+import semmle.code.cpp.ir.internal.IRCppLanguage as Language
+private import semmle.code.cpp.ir.internal.TempVariableTag as TempVariableTag_
+
+module Imports {
+ module TempVariableTag = TempVariableTag_;
+}
diff --git a/repo-tests/codeql/cpp/ql/lib/semmle/code/cpp/ir/implementation/raw/IR.qll b/repo-tests/codeql/cpp/ql/lib/semmle/code/cpp/ir/implementation/raw/IR.qll
new file mode 100644
index 00000000000..c96783fe6e8
--- /dev/null
+++ b/repo-tests/codeql/cpp/ql/lib/semmle/code/cpp/ir/implementation/raw/IR.qll
@@ -0,0 +1,80 @@
+/**
+ * Provides classes that describe the Intermediate Representation (IR) of the program.
+ *
+ * The IR is a representation of the semantics of the program, with very little dependence on the
+ * syntax that was used to write the program. For example, in C++, the statements `i += 1;`, `i++`,
+ * and `++i` all have the same semantic effect, but appear in the AST as three different types of
+ * `Expr` node. In the IR, all three statements are broken down into a sequence of fundamental
+ * operations similar to:
+ *
+ * ```
+ * r1(int*) = VariableAddress[i] // Compute the address of variable `i`
+ * r2(int) = Load &:r1, m0 // Load the value of `i`
+ * r3(int) = Constant[1] // An integer constant with the value `1`
+ * r4(int) = Add r2, r3 // Add `1` to the value of `i`
+ * r5(int) = Store &r1, r4 // Store the new value back into the variable `i`
+ * ```
+ *
+ * This allows IR-based analysis to focus on the fundamental operations, rather than having to be
+ * concerned with the various ways of expressing those operations in source code.
+ *
+ * The key classes in the IR are:
+ *
+ * - `IRFunction` - Contains the IR for an entire function definition, including all of that
+ * function's `Instruction`s, `IRBlock`s, and `IRVariables`.
+ * - `Instruction` - A single operation in the IR. An instruction specifies the operation to be
+ * performed, the operands that produce the inputs to that operation, and the type of the result
+ * of the operation. Control flows from an `Instruction` to one of a set of successor
+ * `Instruction`s.
+ * - `Operand` - An input value of an `Instruction`. All inputs of an `Instruction` are explicitly
+ * represented as `Operand`s, even if the input was implicit in the source code. An `Operand` has
+ * a link to the `Instruction` that consumes its value (its "use") and a link to the `Instruction`
+ * that produces its value (its "definition").
+ * - `IRVariable` - A variable accessed by the IR for a particular function. An `IRVariable` is
+ * created for each variable directly accessed by the function. In addition, `IRVariable`s are
+ * created to represent certain temporary storage locations that do not have explicitly declared
+ * variables in the source code, such as the return value of the function.
+ * - `IRBlock` - A "basic block" in the control flow graph of a function. An `IRBlock` contains a
+ * sequence of instructions such that control flow can only enter the block at the first
+ * instruction, and can only leave the block from the last instruction.
+ * - `IRType` - The type of a value accessed in the IR. Unlike the `Type` class in the AST, `IRType`
+ * is language-neutral. For example, in C++, `unsigned int`, `char32_t`, and `wchar_t` might all
+ * be represented as the `IRType` `uint4`, a four-byte unsigned integer.
+ */
+
+import IRFunction
+import Instruction
+import IRBlock
+import IRVariable
+import Operand
+private import internal.IRImports as Imports
+import Imports::EdgeKind
+import Imports::IRType
+import Imports::MemoryAccessKind
+
+private newtype TIRPropertyProvider = MkIRPropertyProvider()
+
+/**
+ * A class that provides additional properties to be dumped for IR instructions and blocks when using
+ * the PrintIR module. Libraries that compute additional facts about IR elements can extend the
+ * single instance of this class to specify the additional properties computed by the library.
+ */
+class IRPropertyProvider extends TIRPropertyProvider {
+ /** Gets a textual representation of this element. */
+ string toString() { result = "IRPropertyProvider" }
+
+ /**
+ * Gets the value of the property named `key` for the specified instruction.
+ */
+ string getInstructionProperty(Instruction instruction, string key) { none() }
+
+ /**
+ * Gets the value of the property named `key` for the specified block.
+ */
+ string getBlockProperty(IRBlock block, string key) { none() }
+
+ /**
+ * Gets the value of the property named `key` for the specified operand.
+ */
+ string getOperandProperty(Operand operand, string key) { none() }
+}
diff --git a/repo-tests/codeql/cpp/ql/lib/semmle/code/cpp/ir/implementation/raw/IRBlock.qll b/repo-tests/codeql/cpp/ql/lib/semmle/code/cpp/ir/implementation/raw/IRBlock.qll
new file mode 100644
index 00000000000..4b86f9a7cec
--- /dev/null
+++ b/repo-tests/codeql/cpp/ql/lib/semmle/code/cpp/ir/implementation/raw/IRBlock.qll
@@ -0,0 +1,331 @@
+/**
+ * Provides classes describing basic blocks in the IR of a function.
+ */
+
+private import internal.IRInternal
+import Instruction
+private import internal.IRBlockImports as Imports
+import Imports::EdgeKind
+private import Cached
+
+/**
+ * A basic block in the IR. A basic block consists of a sequence of `Instructions` with the only
+ * incoming edges at the beginning of the sequence and the only outgoing edges at the end of the
+ * sequence.
+ *
+ * This class does not contain any members that query the predecessor or successor edges of the
+ * block. This allows different classes that extend `IRBlockBase` to expose different subsets of
+ * edges (e.g. ignoring unreachable edges).
+ *
+ * Most consumers should use the class `IRBlock`.
+ */
+class IRBlockBase extends TIRBlock {
+ /** Gets a textual representation of this block. */
+ final string toString() { result = getFirstInstruction(this).toString() }
+
+ /** Gets the source location of the first non-`Phi` instruction in this block. */
+ final Language::Location getLocation() { result = getFirstInstruction().getLocation() }
+
+ /**
+ * INTERNAL: Do not use.
+ *
+ * Gets the zero-based index of the block within its function.
+ *
+ * This predicate is used by debugging and printing code only.
+ */
+ int getDisplayIndex() {
+ exists(IRConfiguration::IRConfiguration config |
+ config.shouldEvaluateDebugStringsForFunction(this.getEnclosingFunction())
+ ) and
+ this =
+ rank[result + 1](IRBlock funcBlock, int sortOverride, int sortKey1, int sortKey2 |
+ funcBlock.getEnclosingFunction() = getEnclosingFunction() and
+ funcBlock.getFirstInstruction().hasSortKeys(sortKey1, sortKey2) and
+ // Ensure that the block containing `EnterFunction` always comes first.
+ if funcBlock.getFirstInstruction() instanceof EnterFunctionInstruction
+ then sortOverride = 0
+ else sortOverride = 1
+ |
+ funcBlock order by sortOverride, sortKey1, sortKey2
+ )
+ }
+
+ /**
+ * Gets the `index`th non-`Phi` instruction in this block.
+ */
+ final Instruction getInstruction(int index) { result = getInstruction(this, index) }
+
+ /**
+ * Get the `Phi` instructions that appear at the start of this block.
+ */
+ final PhiInstruction getAPhiInstruction() {
+ Construction::getPhiInstructionBlockStart(result) = getFirstInstruction()
+ }
+
+ /**
+ * Gets an instruction in this block. This includes `Phi` instructions.
+ */
+ final Instruction getAnInstruction() {
+ result = getInstruction(_) or
+ result = getAPhiInstruction()
+ }
+
+ /**
+ * Gets the first non-`Phi` instruction in this block.
+ */
+ final Instruction getFirstInstruction() { result = getFirstInstruction(this) }
+
+ /**
+ * Gets the last instruction in this block.
+ */
+ final Instruction getLastInstruction() { result = getInstruction(getInstructionCount() - 1) }
+
+ /**
+ * Gets the number of non-`Phi` instructions in this block.
+ */
+ final int getInstructionCount() { result = getInstructionCount(this) }
+
+ /**
+ * Gets the `IRFunction` that contains this block.
+ */
+ final IRFunction getEnclosingIRFunction() {
+ result = getFirstInstruction(this).getEnclosingIRFunction()
+ }
+
+ /**
+ * Gets the `Function` that contains this block.
+ */
+ final Language::Function getEnclosingFunction() {
+ result = getFirstInstruction(this).getEnclosingFunction()
+ }
+}
+
+/**
+ * A basic block with additional information about its predecessor and successor edges. Each edge
+ * corresponds to the control flow between the last instruction of one block and the first
+ * instruction of another block.
+ */
+class IRBlock extends IRBlockBase {
+ /**
+ * Gets a block to which control flows directly from this block.
+ */
+ final IRBlock getASuccessor() { blockSuccessor(this, result) }
+
+ /**
+ * Gets a block from which control flows directly to this block.
+ */
+ final IRBlock getAPredecessor() { blockSuccessor(result, this) }
+
+ /**
+ * Gets the block to which control flows directly from this block along an edge of kind `kind`.
+ */
+ final IRBlock getSuccessor(EdgeKind kind) { blockSuccessor(this, result, kind) }
+
+ /**
+ * Gets the block to which control flows directly from this block along a back edge of kind
+ * `kind`.
+ */
+ final IRBlock getBackEdgeSuccessor(EdgeKind kind) { backEdgeSuccessor(this, result, kind) }
+
+ /**
+ * Holds if this block immediately dominates `block`.
+ *
+ * Block `A` immediate dominates block `B` if block `A` strictly dominates block `B` and block `B`
+ * is a direct successor of block `A`.
+ */
+ final predicate immediatelyDominates(IRBlock block) { blockImmediatelyDominates(this, block) }
+
+ /**
+ * Holds if this block strictly dominates `block`.
+ *
+ * Block `A` strictly dominates block `B` if block `A` dominates block `B` and blocks `A` and `B`
+ * are not the same block.
+ */
+ final predicate strictlyDominates(IRBlock block) { blockImmediatelyDominates+(this, block) }
+
+ /**
+ * Holds if this block dominates `block`.
+ *
+ * Block `A` dominates block `B` if any control flow path from the entry block of the function to
+ * block `B` must pass through block `A`. A block always dominates itself.
+ */
+ final predicate dominates(IRBlock block) { strictlyDominates(block) or this = block }
+
+ /**
+ * Gets a block on the dominance frontier of this block.
+ *
+ * The dominance frontier of block `A` is the set of blocks `B` such that block `A` does not
+ * dominate block `B`, but block `A` does dominate an immediate predecessor of block `B`.
+ */
+ pragma[noinline]
+ final IRBlock dominanceFrontier() {
+ dominates(result.getAPredecessor()) and
+ not strictlyDominates(result)
+ }
+
+ /**
+ * Holds if this block immediately post-dominates `block`.
+ *
+ * Block `A` immediate post-dominates block `B` if block `A` strictly post-dominates block `B` and
+ * block `B` is a direct successor of block `A`.
+ */
+ final predicate immediatelyPostDominates(IRBlock block) {
+ blockImmediatelyPostDominates(this, block)
+ }
+
+ /**
+ * Holds if this block strictly post-dominates `block`.
+ *
+ * Block `A` strictly post-dominates block `B` if block `A` post-dominates block `B` and blocks `A`
+ * and `B` are not the same block.
+ */
+ final predicate strictlyPostDominates(IRBlock block) {
+ blockImmediatelyPostDominates+(this, block)
+ }
+
+ /**
+ * Holds if this block is a post-dominator of `block`.
+ *
+ * Block `A` post-dominates block `B` if any control flow path from `B` to the exit block of the
+ * function must pass through block `A`. A block always post-dominates itself.
+ */
+ final predicate postDominates(IRBlock block) { strictlyPostDominates(block) or this = block }
+
+ /**
+ * Gets a block on the post-dominance frontier of this block.
+ *
+ * The post-dominance frontier of block `A` is the set of blocks `B` such that block `A` does not
+ * post-dominate block `B`, but block `A` does post-dominate an immediate successor of block `B`.
+ */
+ pragma[noinline]
+ final IRBlock postPominanceFrontier() {
+ postDominates(result.getASuccessor()) and
+ not strictlyPostDominates(result)
+ }
+
+ /**
+ * Holds if this block is reachable from the entry block of its function.
+ */
+ final predicate isReachableFromFunctionEntry() {
+ this = getEnclosingIRFunction().getEntryBlock() or
+ getAPredecessor().isReachableFromFunctionEntry()
+ }
+}
+
+private predicate startsBasicBlock(Instruction instr) {
+ not instr instanceof PhiInstruction and
+ not adjacentInBlock(_, instr)
+}
+
+/** Holds if `i2` follows `i1` in a `IRBlock`. */
+private predicate adjacentInBlock(Instruction i1, Instruction i2) {
+ // - i2 must be the only successor of i1
+ i2 = unique(Instruction i | i = i1.getASuccessor()) and
+ // - i1 must be the only predecessor of i2
+ i1 = unique(Instruction i | i.getASuccessor() = i2) and
+ // - The edge between the two must be a GotoEdge. We just check that one
+ // exists since we've already checked that it's unique.
+ exists(GotoEdge edgeKind | exists(i1.getSuccessor(edgeKind))) and
+ // - The edge must not be a back edge. This means we get the same back edges
+ // in the basic-block graph as we do in the raw CFG.
+ not exists(Construction::getInstructionBackEdgeSuccessor(i1, _))
+ // This predicate could be simplified to remove one of the `unique`s if we
+ // were willing to rely on the CFG being well-formed and thus never having
+ // more than one successor to an instruction that has a `GotoEdge` out of it.
+}
+
+private predicate isEntryBlock(TIRBlock block) {
+ block = MkIRBlock(any(EnterFunctionInstruction enter))
+}
+
+cached
+private module Cached {
+ cached
+ newtype TIRBlock = MkIRBlock(Instruction firstInstr) { startsBasicBlock(firstInstr) }
+
+ /** Holds if `i` is the `index`th instruction the block starting with `first`. */
+ private Instruction getInstructionFromFirst(Instruction first, int index) =
+ shortestDistances(startsBasicBlock/1, adjacentInBlock/2)(first, result, index)
+
+ /** Holds if `i` is the `index`th instruction in `block`. */
+ cached
+ Instruction getInstruction(TIRBlock block, int index) {
+ result = getInstructionFromFirst(getFirstInstruction(block), index)
+ }
+
+ cached
+ int getInstructionCount(TIRBlock block) { result = strictcount(getInstruction(block, _)) }
+
+ cached
+ predicate blockSuccessor(TIRBlock pred, TIRBlock succ, EdgeKind kind) {
+ exists(Instruction predLast, Instruction succFirst |
+ predLast = getInstruction(pred, getInstructionCount(pred) - 1) and
+ succFirst = predLast.getSuccessor(kind) and
+ succ = MkIRBlock(succFirst)
+ )
+ }
+
+ pragma[noinline]
+ private predicate blockIdentity(TIRBlock b1, TIRBlock b2) { b1 = b2 }
+
+ pragma[noopt]
+ cached
+ predicate backEdgeSuccessor(TIRBlock pred, TIRBlock succ, EdgeKind kind) {
+ backEdgeSuccessorRaw(pred, succ, kind)
+ or
+ // See the QLDoc on `backEdgeSuccessorRaw`.
+ exists(TIRBlock pred2 |
+ // Joining with `blockIdentity` is a performance trick to get
+ // `forwardEdgeRaw` on the RHS of a join, where it's fast.
+ blockIdentity(pred, pred2) and
+ forwardEdgeRaw+(pred, pred2)
+ ) and
+ blockSuccessor(pred, succ, kind)
+ }
+
+ /**
+ * Holds if there is an edge from `pred` to `succ` that is not a back edge.
+ */
+ private predicate forwardEdgeRaw(TIRBlock pred, TIRBlock succ) {
+ exists(EdgeKind kind |
+ blockSuccessor(pred, succ, kind) and
+ not backEdgeSuccessorRaw(pred, succ, kind)
+ )
+ }
+
+ /**
+ * Holds if the `kind`-edge from `pred` to `succ` is a back edge according to
+ * `Construction`.
+ *
+ * There could be loops of non-back-edges if there is a flaw in the IR
+ * construction or back-edge detection, and this could cause non-termination
+ * of subsequent analysis. To prevent that, a subsequent predicate further
+ * classifies all edges as back edges if they are involved in a loop of
+ * non-back-edges.
+ */
+ private predicate backEdgeSuccessorRaw(TIRBlock pred, TIRBlock succ, EdgeKind kind) {
+ exists(Instruction predLast, Instruction succFirst |
+ predLast = getInstruction(pred, getInstructionCount(pred) - 1) and
+ succFirst = Construction::getInstructionBackEdgeSuccessor(predLast, kind) and
+ succ = MkIRBlock(succFirst)
+ )
+ }
+
+ cached
+ predicate blockSuccessor(TIRBlock pred, TIRBlock succ) { blockSuccessor(pred, succ, _) }
+
+ cached
+ predicate blockImmediatelyDominates(TIRBlock dominator, TIRBlock block) =
+ idominance(isEntryBlock/1, blockSuccessor/2)(_, dominator, block)
+}
+
+private Instruction getFirstInstruction(TIRBlock block) { block = MkIRBlock(result) }
+
+private predicate blockFunctionExit(IRBlock exit) {
+ exit.getLastInstruction() instanceof ExitFunctionInstruction
+}
+
+private predicate blockPredecessor(IRBlock src, IRBlock pred) { src.getAPredecessor() = pred }
+
+private predicate blockImmediatelyPostDominates(IRBlock postDominator, IRBlock block) =
+ idominance(blockFunctionExit/1, blockPredecessor/2)(_, postDominator, block)
diff --git a/repo-tests/codeql/cpp/ql/lib/semmle/code/cpp/ir/implementation/raw/IRBlockImports.qll b/repo-tests/codeql/cpp/ql/lib/semmle/code/cpp/ir/implementation/raw/IRBlockImports.qll
new file mode 100644
index 00000000000..d1b46ed35c8
--- /dev/null
+++ b/repo-tests/codeql/cpp/ql/lib/semmle/code/cpp/ir/implementation/raw/IRBlockImports.qll
@@ -0,0 +1 @@
+import semmle.code.cpp.ir.implementation.EdgeKind as EdgeKind
diff --git a/repo-tests/codeql/cpp/ql/lib/semmle/code/cpp/ir/implementation/raw/IRConsistency.ql b/repo-tests/codeql/cpp/ql/lib/semmle/code/cpp/ir/implementation/raw/IRConsistency.ql
new file mode 100644
index 00000000000..0d8dd13543b
--- /dev/null
+++ b/repo-tests/codeql/cpp/ql/lib/semmle/code/cpp/ir/implementation/raw/IRConsistency.ql
@@ -0,0 +1,8 @@
+/**
+ * @name Raw IR Consistency Check
+ * @description Performs consistency checks on the Intermediate Representation. This query should have no results.
+ * @kind table
+ * @id cpp/raw-ir-consistency-check
+ */
+
+import IRConsistency
diff --git a/repo-tests/codeql/cpp/ql/lib/semmle/code/cpp/ir/implementation/raw/IRConsistency.qll b/repo-tests/codeql/cpp/ql/lib/semmle/code/cpp/ir/implementation/raw/IRConsistency.qll
new file mode 100644
index 00000000000..31983d34247
--- /dev/null
+++ b/repo-tests/codeql/cpp/ql/lib/semmle/code/cpp/ir/implementation/raw/IRConsistency.qll
@@ -0,0 +1,527 @@
+private import IR
+import InstructionConsistency // module is below
+import IRTypeConsistency // module is in IRType.qll
+
+module InstructionConsistency {
+ private import internal.InstructionImports as Imports
+ private import Imports::OperandTag
+ private import Imports::Overlap
+ private import internal.IRInternal
+
+ private newtype TOptionalIRFunction =
+ TPresentIRFunction(IRFunction irFunc) or
+ TMissingIRFunction()
+
+ /**
+ * An `IRFunction` that might not exist. This is used so that we can produce consistency failures
+ * for IR that also incorrectly lacks a `getEnclosingIRFunction()`.
+ */
+ abstract private class OptionalIRFunction extends TOptionalIRFunction {
+ abstract string toString();
+
+ abstract Language::Location getLocation();
+ }
+
+ private class PresentIRFunction extends OptionalIRFunction, TPresentIRFunction {
+ private IRFunction irFunc;
+
+ PresentIRFunction() { this = TPresentIRFunction(irFunc) }
+
+ override string toString() {
+ result = concat(Language::getIdentityString(irFunc.getFunction()), "; ")
+ }
+
+ override Language::Location getLocation() {
+ // To avoid an overwhelming number of results when the extractor merges functions with the
+ // same name, just pick a single location.
+ result =
+ min(Language::Location loc | loc = irFunc.getLocation() | loc order by loc.toString())
+ }
+ }
+
+ private class MissingIRFunction extends OptionalIRFunction, TMissingIRFunction {
+ override string toString() { result = "" }
+
+ override Language::Location getLocation() { result instanceof Language::UnknownDefaultLocation }
+ }
+
+ private OptionalIRFunction getInstructionIRFunction(Instruction instr) {
+ result = TPresentIRFunction(instr.getEnclosingIRFunction())
+ or
+ not exists(instr.getEnclosingIRFunction()) and result = TMissingIRFunction()
+ }
+
+ pragma[inline]
+ private OptionalIRFunction getInstructionIRFunction(Instruction instr, string irFuncText) {
+ result = getInstructionIRFunction(instr) and
+ irFuncText = result.toString()
+ }
+
+ private OptionalIRFunction getOperandIRFunction(Operand operand) {
+ result = TPresentIRFunction(operand.getEnclosingIRFunction())
+ or
+ not exists(operand.getEnclosingIRFunction()) and result = TMissingIRFunction()
+ }
+
+ pragma[inline]
+ private OptionalIRFunction getOperandIRFunction(Operand operand, string irFuncText) {
+ result = getOperandIRFunction(operand) and
+ irFuncText = result.toString()
+ }
+
+ private OptionalIRFunction getBlockIRFunction(IRBlock block) {
+ result = TPresentIRFunction(block.getEnclosingIRFunction())
+ or
+ not exists(block.getEnclosingIRFunction()) and result = TMissingIRFunction()
+ }
+
+ /**
+ * Holds if instruction `instr` is missing an expected operand with tag `tag`.
+ */
+ query predicate missingOperand(
+ Instruction instr, string message, OptionalIRFunction irFunc, string irFuncText
+ ) {
+ exists(OperandTag tag |
+ instr.getOpcode().hasOperand(tag) and
+ not exists(NonPhiOperand operand |
+ operand = instr.getAnOperand() and
+ operand.getOperandTag() = tag
+ ) and
+ message =
+ "Instruction '" + instr.getOpcode().toString() +
+ "' is missing an expected operand with tag '" + tag.toString() + "' in function '$@'." and
+ irFunc = getInstructionIRFunction(instr, irFuncText)
+ )
+ }
+
+ /**
+ * Holds if instruction `instr` has an unexpected operand with tag `tag`.
+ */
+ query predicate unexpectedOperand(
+ Instruction instr, string message, OptionalIRFunction irFunc, string irFuncText
+ ) {
+ exists(OperandTag tag |
+ exists(NonPhiOperand operand |
+ operand = instr.getAnOperand() and
+ operand.getOperandTag() = tag
+ ) and
+ not instr.getOpcode().hasOperand(tag) and
+ not (instr instanceof CallInstruction and tag instanceof ArgumentOperandTag) and
+ not (
+ instr instanceof BuiltInOperationInstruction and tag instanceof PositionalArgumentOperandTag
+ ) and
+ not (instr instanceof InlineAsmInstruction and tag instanceof AsmOperandTag) and
+ message =
+ "Instruction '" + instr.toString() + "' has unexpected operand '" + tag.toString() +
+ "' in function '$@'." and
+ irFunc = getInstructionIRFunction(instr, irFuncText)
+ )
+ }
+
+ /**
+ * Holds if instruction `instr` has multiple operands with tag `tag`.
+ */
+ query predicate duplicateOperand(
+ Instruction instr, string message, OptionalIRFunction irFunc, string irFuncText
+ ) {
+ exists(OperandTag tag, int operandCount |
+ operandCount =
+ strictcount(NonPhiOperand operand |
+ operand = instr.getAnOperand() and
+ operand.getOperandTag() = tag
+ ) and
+ operandCount > 1 and
+ message =
+ "Instruction has " + operandCount + " operands with tag '" + tag.toString() + "'" +
+ " in function '$@'." and
+ irFunc = getInstructionIRFunction(instr, irFuncText)
+ )
+ }
+
+ /**
+ * Holds if `Phi` instruction `instr` is missing an operand corresponding to
+ * the predecessor block `pred`.
+ */
+ query predicate missingPhiOperand(
+ PhiInstruction instr, string message, OptionalIRFunction irFunc, string irFuncText
+ ) {
+ exists(IRBlock pred |
+ pred = instr.getBlock().getAPredecessor() and
+ not exists(PhiInputOperand operand |
+ operand = instr.getAnOperand() and
+ operand.getPredecessorBlock() = pred
+ ) and
+ message =
+ "Instruction '" + instr.toString() + "' is missing an operand for predecessor block '" +
+ pred.toString() + "' in function '$@'." and
+ irFunc = getInstructionIRFunction(instr, irFuncText)
+ )
+ }
+
+ query predicate missingOperandType(
+ Operand operand, string message, OptionalIRFunction irFunc, string irFuncText
+ ) {
+ exists(Instruction use |
+ not exists(operand.getType()) and
+ use = operand.getUse() and
+ message =
+ "Operand '" + operand.toString() + "' of instruction '" + use.getOpcode().toString() +
+ "' is missing a type in function '$@'." and
+ irFunc = getOperandIRFunction(operand, irFuncText)
+ )
+ }
+
+ query predicate duplicateChiOperand(
+ ChiInstruction chi, string message, OptionalIRFunction irFunc, string irFuncText
+ ) {
+ chi.getTotal() = chi.getPartial() and
+ message =
+ "Chi instruction for " + chi.getPartial().toString() +
+ " has duplicate operands in function '$@'." and
+ irFunc = getInstructionIRFunction(chi, irFuncText)
+ }
+
+ query predicate sideEffectWithoutPrimary(
+ SideEffectInstruction instr, string message, OptionalIRFunction irFunc, string irFuncText
+ ) {
+ not exists(instr.getPrimaryInstruction()) and
+ message =
+ "Side effect instruction '" + instr + "' is missing a primary instruction in function '$@'." and
+ irFunc = getInstructionIRFunction(instr, irFuncText)
+ }
+
+ /**
+ * Holds if an instruction, other than `ExitFunction`, has no successors.
+ */
+ query predicate instructionWithoutSuccessor(
+ Instruction instr, string message, OptionalIRFunction irFunc, string irFuncText
+ ) {
+ not exists(instr.getASuccessor()) and
+ not instr instanceof ExitFunctionInstruction and
+ // Phi instructions aren't linked into the instruction-level flow graph.
+ not instr instanceof PhiInstruction and
+ not instr instanceof UnreachedInstruction and
+ message = "Instruction '" + instr.toString() + "' has no successors in function '$@'." and
+ irFunc = getInstructionIRFunction(instr, irFuncText)
+ }
+
+ /**
+ * Holds if there are multiple edges of the same kind from `source`.
+ */
+ query predicate ambiguousSuccessors(
+ Instruction source, string message, OptionalIRFunction irFunc, string irFuncText
+ ) {
+ exists(EdgeKind kind, int n |
+ n = strictcount(Instruction t | source.getSuccessor(kind) = t) and
+ n > 1 and
+ message =
+ "Instruction '" + source.toString() + "' has " + n.toString() + " successors of kind '" +
+ kind.toString() + "' in function '$@'." and
+ irFunc = getInstructionIRFunction(source, irFuncText)
+ )
+ }
+
+ /**
+ * Holds if `instr` is part of a loop even though the AST of `instr`'s enclosing function
+ * contains no element that can cause loops.
+ */
+ query predicate unexplainedLoop(
+ Instruction instr, string message, OptionalIRFunction irFunc, string irFuncText
+ ) {
+ exists(Language::Function f |
+ exists(IRBlock block |
+ instr.getBlock() = block and
+ block.getEnclosingFunction() = f and
+ block.getASuccessor+() = block
+ ) and
+ not Language::hasPotentialLoop(f) and
+ message =
+ "Instruction '" + instr.toString() + "' is part of an unexplained loop in function '$@'." and
+ irFunc = getInstructionIRFunction(instr, irFuncText)
+ )
+ }
+
+ /**
+ * Holds if a `Phi` instruction is present in a block with fewer than two
+ * predecessors.
+ */
+ query predicate unnecessaryPhiInstruction(
+ PhiInstruction instr, string message, OptionalIRFunction irFunc, string irFuncText
+ ) {
+ exists(int n |
+ n = count(instr.getBlock().getAPredecessor()) and
+ n < 2 and
+ message =
+ "Instruction '" + instr.toString() + "' is in a block with only " + n.toString() +
+ " predecessors in function '$@'." and
+ irFunc = getInstructionIRFunction(instr, irFuncText)
+ )
+ }
+
+ /**
+ * Holds if a memory operand is connected to a definition with an unmodeled result.
+ */
+ query predicate memoryOperandDefinitionIsUnmodeled(
+ Instruction instr, string message, OptionalIRFunction irFunc, string irFuncText
+ ) {
+ exists(MemoryOperand operand, Instruction def |
+ operand = instr.getAnOperand() and
+ def = operand.getAnyDef() and
+ not def.isResultModeled() and
+ message =
+ "Memory operand definition on instruction '" + instr.toString() +
+ "' has unmodeled result in function '$@'." and
+ irFunc = getInstructionIRFunction(instr, irFuncText)
+ )
+ }
+
+ /**
+ * Holds if operand `operand` consumes a value that was defined in
+ * a different function.
+ */
+ query predicate operandAcrossFunctions(
+ Operand operand, string message, OptionalIRFunction useIRFunc, string useIRFuncText,
+ OptionalIRFunction defIRFunc, string defIRFuncText
+ ) {
+ exists(Instruction useInstr, Instruction defInstr |
+ operand.getUse() = useInstr and
+ operand.getAnyDef() = defInstr and
+ useIRFunc = getInstructionIRFunction(useInstr, useIRFuncText) and
+ defIRFunc = getInstructionIRFunction(defInstr, defIRFuncText) and
+ useIRFunc != defIRFunc and
+ message =
+ "Operand '" + operand.toString() + "' is used on instruction '" + useInstr.toString() +
+ "' in function '$@', but is defined on instruction '" + defInstr.toString() +
+ "' in function '$@'."
+ )
+ }
+
+ /**
+ * Holds if instruction `instr` is not in exactly one block.
+ */
+ query predicate instructionWithoutUniqueBlock(
+ Instruction instr, string message, OptionalIRFunction irFunc, string irFuncText
+ ) {
+ exists(int blockCount |
+ blockCount = count(instr.getBlock()) and
+ blockCount != 1 and
+ message =
+ "Instruction '" + instr.toString() + "' is a member of " + blockCount.toString() +
+ " blocks in function '$@'." and
+ irFunc = getInstructionIRFunction(instr, irFuncText)
+ )
+ }
+
+ private predicate forwardEdge(IRBlock b1, IRBlock b2) {
+ b1.getASuccessor() = b2 and
+ not b1.getBackEdgeSuccessor(_) = b2
+ }
+
+ /**
+ * Holds if `f` contains a loop in which no edge is a back edge.
+ *
+ * This check ensures we don't have too _few_ back edges.
+ */
+ query predicate containsLoopOfForwardEdges(IRFunction f, string message) {
+ exists(IRBlock block |
+ forwardEdge+(block, block) and
+ block.getEnclosingIRFunction() = f and
+ message = "Function contains a loop consisting of only forward edges."
+ )
+ }
+
+ /**
+ * Holds if `block` is reachable from its function entry point but would not
+ * be reachable by traversing only forward edges. This check is skipped for
+ * functions containing `goto` statements as the property does not generally
+ * hold there.
+ *
+ * This check ensures we don't have too _many_ back edges.
+ */
+ query predicate lostReachability(
+ IRBlock block, string message, OptionalIRFunction irFunc, string irFuncText
+ ) {
+ exists(IRFunction f, IRBlock entry |
+ entry = f.getEntryBlock() and
+ entry.getASuccessor+() = block and
+ not forwardEdge+(entry, block) and
+ not Language::hasGoto(f.getFunction()) and
+ message =
+ "Block '" + block.toString() +
+ "' is not reachable by traversing only forward edges in function '$@'." and
+ irFunc = TPresentIRFunction(f) and
+ irFuncText = irFunc.toString()
+ )
+ }
+
+ /**
+ * Holds if the number of back edges differs between the `Instruction` graph
+ * and the `IRBlock` graph.
+ */
+ query predicate backEdgeCountMismatch(OptionalIRFunction irFunc, string message) {
+ exists(int fromInstr, int fromBlock |
+ fromInstr =
+ count(Instruction i1, Instruction i2 |
+ getInstructionIRFunction(i1) = irFunc and i1.getBackEdgeSuccessor(_) = i2
+ ) and
+ fromBlock =
+ count(IRBlock b1, IRBlock b2 |
+ getBlockIRFunction(b1) = irFunc and b1.getBackEdgeSuccessor(_) = b2
+ ) and
+ fromInstr != fromBlock and
+ message =
+ "The instruction graph for function '" + irFunc.toString() + "' contains " +
+ fromInstr.toString() + " back edges, but the block graph contains " + fromBlock.toString()
+ + " back edges."
+ )
+ }
+
+ /**
+ * Gets the point in the function at which the specified operand is evaluated. For most operands,
+ * this is at the instruction that consumes the use. For a `PhiInputOperand`, the effective point
+ * of evaluation is at the end of the corresponding predecessor block.
+ */
+ private predicate pointOfEvaluation(Operand operand, IRBlock block, int index) {
+ block = operand.(PhiInputOperand).getPredecessorBlock() and
+ index = block.getInstructionCount()
+ or
+ exists(Instruction use |
+ use = operand.(NonPhiOperand).getUse() and
+ block.getInstruction(index) = use
+ )
+ }
+
+ /**
+ * Holds if `useOperand` has a definition that does not dominate the use.
+ */
+ query predicate useNotDominatedByDefinition(
+ Operand useOperand, string message, OptionalIRFunction irFunc, string irFuncText
+ ) {
+ exists(IRBlock useBlock, int useIndex, Instruction defInstr, IRBlock defBlock, int defIndex |
+ pointOfEvaluation(useOperand, useBlock, useIndex) and
+ defInstr = useOperand.getAnyDef() and
+ (
+ defInstr instanceof PhiInstruction and
+ defBlock = defInstr.getBlock() and
+ defIndex = -1
+ or
+ defBlock.getInstruction(defIndex) = defInstr
+ ) and
+ not (
+ defBlock.strictlyDominates(useBlock)
+ or
+ defBlock = useBlock and
+ defIndex < useIndex
+ ) and
+ message =
+ "Operand '" + useOperand.toString() +
+ "' is not dominated by its definition in function '$@'." and
+ irFunc = getOperandIRFunction(useOperand, irFuncText)
+ )
+ }
+
+ query predicate switchInstructionWithoutDefaultEdge(
+ SwitchInstruction switchInstr, string message, OptionalIRFunction irFunc, string irFuncText
+ ) {
+ not exists(switchInstr.getDefaultSuccessor()) and
+ message =
+ "SwitchInstruction " + switchInstr.toString() + " without a DefaultEdge in function '$@'." and
+ irFunc = getInstructionIRFunction(switchInstr, irFuncText)
+ }
+
+ /**
+ * Holds if `instr` is on the chain of chi/phi instructions for all aliased
+ * memory.
+ */
+ private predicate isOnAliasedDefinitionChain(Instruction instr) {
+ instr instanceof AliasedDefinitionInstruction
+ or
+ isOnAliasedDefinitionChain(instr.(ChiInstruction).getTotal())
+ or
+ isOnAliasedDefinitionChain(instr.(PhiInstruction).getAnInputOperand().getAnyDef())
+ }
+
+ private predicate shouldBeConflated(Instruction instr) {
+ isOnAliasedDefinitionChain(instr)
+ or
+ instr.getOpcode() instanceof Opcode::InitializeNonLocal
+ }
+
+ query predicate notMarkedAsConflated(
+ Instruction instr, string message, OptionalIRFunction irFunc, string irFuncText
+ ) {
+ shouldBeConflated(instr) and
+ not instr.isResultConflated() and
+ message =
+ "Instruction '" + instr.toString() +
+ "' should be marked as having a conflated result in function '$@'." and
+ irFunc = getInstructionIRFunction(instr, irFuncText)
+ }
+
+ query predicate wronglyMarkedAsConflated(
+ Instruction instr, string message, OptionalIRFunction irFunc, string irFuncText
+ ) {
+ instr.isResultConflated() and
+ not shouldBeConflated(instr) and
+ message =
+ "Instruction '" + instr.toString() +
+ "' should not be marked as having a conflated result in function '$@'." and
+ irFunc = getInstructionIRFunction(instr, irFuncText)
+ }
+
+ query predicate invalidOverlap(
+ MemoryOperand useOperand, string message, OptionalIRFunction irFunc, string irFuncText
+ ) {
+ exists(Overlap overlap |
+ overlap = useOperand.getDefinitionOverlap() and
+ overlap instanceof MayPartiallyOverlap and
+ message =
+ "MemoryOperand '" + useOperand.toString() + "' has a `getDefinitionOverlap()` of '" +
+ overlap.toString() + "'." and
+ irFunc = getOperandIRFunction(useOperand, irFuncText)
+ )
+ }
+
+ query predicate nonUniqueEnclosingIRFunction(
+ Instruction instr, string message, OptionalIRFunction irFunc, string irFuncText
+ ) {
+ exists(int irFuncCount |
+ irFuncCount = count(instr.getEnclosingIRFunction()) and
+ irFuncCount != 1 and
+ message =
+ "Instruction '" + instr.toString() + "' has " + irFuncCount.toString() +
+ " results for `getEnclosingIRFunction()` in function '$@'." and
+ irFunc = getInstructionIRFunction(instr, irFuncText)
+ )
+ }
+
+ /**
+ * Holds if the object address operand for the given `FieldAddress` instruction does not have an
+ * address type.
+ */
+ query predicate fieldAddressOnNonPointer(
+ FieldAddressInstruction instr, string message, OptionalIRFunction irFunc, string irFuncText
+ ) {
+ not instr.getObjectAddressOperand().getIRType() instanceof IRAddressType and
+ message =
+ "FieldAddress instruction '" + instr.toString() +
+ "' has an object address operand that is not an address, in function '$@'." and
+ irFunc = getInstructionIRFunction(instr, irFuncText)
+ }
+
+ /**
+ * Holds if the `this` argument operand for the given `Call` instruction does not have an address
+ * type.
+ */
+ query predicate thisArgumentIsNonPointer(
+ CallInstruction instr, string message, OptionalIRFunction irFunc, string irFuncText
+ ) {
+ exists(ThisArgumentOperand thisOperand | thisOperand = instr.getThisArgumentOperand() |
+ not thisOperand.getIRType() instanceof IRAddressType
+ ) and
+ message =
+ "Call instruction '" + instr.toString() +
+ "' has a `this` argument operand that is not an address, in function '$@'." and
+ irFunc = getInstructionIRFunction(instr, irFuncText)
+ }
+}
diff --git a/repo-tests/codeql/cpp/ql/lib/semmle/code/cpp/ir/implementation/raw/IRFunction.qll b/repo-tests/codeql/cpp/ql/lib/semmle/code/cpp/ir/implementation/raw/IRFunction.qll
new file mode 100644
index 00000000000..5968e58f90b
--- /dev/null
+++ b/repo-tests/codeql/cpp/ql/lib/semmle/code/cpp/ir/implementation/raw/IRFunction.qll
@@ -0,0 +1,59 @@
+/**
+ * Provides the class `IRFunction`, which represents the Intermediate Representation for the
+ * definition of a function.
+ */
+
+private import internal.IRInternal
+private import internal.IRFunctionImports as Imports
+import Imports::IRFunctionBase
+import Instruction
+
+/**
+ * The IR for a function.
+ */
+class IRFunction extends IRFunctionBase {
+ /**
+ * Gets the entry point for this function.
+ */
+ pragma[noinline]
+ final EnterFunctionInstruction getEnterFunctionInstruction() {
+ result.getEnclosingIRFunction() = this
+ }
+
+ /**
+ * Gets the exit point for this function.
+ */
+ pragma[noinline]
+ final ExitFunctionInstruction getExitFunctionInstruction() {
+ result.getEnclosingIRFunction() = this
+ }
+
+ /**
+ * Gets the single return instruction for this function.
+ */
+ pragma[noinline]
+ final ReturnInstruction getReturnInstruction() { result.getEnclosingIRFunction() = this }
+
+ /**
+ * Gets the variable used to hold the return value of this function. If this
+ * function does not return a value, this predicate does not hold.
+ */
+ pragma[noinline]
+ final IRReturnVariable getReturnVariable() { result.getEnclosingIRFunction() = this }
+
+ /**
+ * Gets the block containing the entry point of this function.
+ */
+ pragma[noinline]
+ final IRBlock getEntryBlock() { result.getFirstInstruction() = getEnterFunctionInstruction() }
+
+ /**
+ * Gets all instructions in this function.
+ */
+ final Instruction getAnInstruction() { result.getEnclosingIRFunction() = this }
+
+ /**
+ * Gets all blocks in this function.
+ */
+ final IRBlock getABlock() { result.getEnclosingIRFunction() = this }
+}
diff --git a/repo-tests/codeql/cpp/ql/lib/semmle/code/cpp/ir/implementation/raw/IRVariable.qll b/repo-tests/codeql/cpp/ql/lib/semmle/code/cpp/ir/implementation/raw/IRVariable.qll
new file mode 100644
index 00000000000..146fc270738
--- /dev/null
+++ b/repo-tests/codeql/cpp/ql/lib/semmle/code/cpp/ir/implementation/raw/IRVariable.qll
@@ -0,0 +1,327 @@
+/**
+ * Provides classes that represent variables accessed by the IR.
+ */
+
+private import internal.IRInternal
+import IRFunction
+private import internal.IRVariableImports as Imports
+import Imports::TempVariableTag
+private import Imports::IRUtilities
+private import Imports::TTempVariableTag
+private import Imports::TIRVariable
+private import Imports::IRType
+
+/**
+ * A variable referenced by the IR for a function.
+ *
+ * The variable may be a user-declared variable (`IRUserVariable`) or a temporary variable generated
+ * by the AST-to-IR translation (`IRTempVariable`).
+ */
+class IRVariable extends TIRVariable {
+ Language::Function func;
+
+ IRVariable() {
+ this = TIRUserVariable(_, _, func) or
+ this = TIRTempVariable(func, _, _, _) or
+ this = TIRStringLiteral(func, _, _, _) or
+ this = TIRDynamicInitializationFlag(func, _, _)
+ }
+
+ /** Gets a textual representation of this element. */
+ string toString() { none() }
+
+ /**
+ * Holds if this variable's value cannot be changed within a function. Currently used for string
+ * literals, but could also apply to `const` global and static variables.
+ */
+ predicate isReadOnly() { none() }
+
+ /**
+ * Gets the type of the variable.
+ */
+ final Language::Type getType() { getLanguageType().hasType(result, false) }
+
+ /**
+ * Gets the language-neutral type of the variable.
+ */
+ final IRType getIRType() { result = getLanguageType().getIRType() }
+
+ /**
+ * Gets the type of the variable.
+ */
+ Language::LanguageType getLanguageType() { none() }
+
+ /**
+ * Gets the AST node that declared this variable, or that introduced this
+ * variable as part of the AST-to-IR translation.
+ */
+ Language::AST getAST() { none() }
+
+ /**
+ * Gets an identifier string for the variable. This identifier is unique
+ * within the function.
+ */
+ string getUniqueId() { none() }
+
+ /**
+ * Gets the source location of this variable.
+ */
+ final Language::Location getLocation() { result = getAST().getLocation() }
+
+ /**
+ * Gets the IR for the function that references this variable.
+ */
+ final IRFunction getEnclosingIRFunction() { result.getFunction() = func }
+
+ /**
+ * Gets the function that references this variable.
+ */
+ final Language::Function getEnclosingFunction() { result = func }
+}
+
+/**
+ * A user-declared variable referenced by the IR for a function.
+ */
+class IRUserVariable extends IRVariable, TIRUserVariable {
+ Language::Variable var;
+ Language::LanguageType type;
+
+ IRUserVariable() { this = TIRUserVariable(var, type, func) }
+
+ final override string toString() { result = getVariable().toString() }
+
+ final override Language::AST getAST() { result = var }
+
+ final override string getUniqueId() {
+ result = getVariable().toString() + " " + getVariable().getLocation().toString()
+ }
+
+ final override Language::LanguageType getLanguageType() { result = type }
+
+ /**
+ * Gets the original user-declared variable.
+ */
+ Language::Variable getVariable() { result = var }
+}
+
+/**
+ * A variable (user-declared or temporary) that is allocated on the stack. This includes all
+ * parameters, non-static local variables, and temporary variables.
+ */
+class IRAutomaticVariable extends IRVariable {
+ IRAutomaticVariable() {
+ exists(Language::Variable var |
+ this = TIRUserVariable(var, _, func) and
+ Language::isVariableAutomatic(var)
+ )
+ or
+ this = TIRTempVariable(func, _, _, _)
+ }
+}
+
+/**
+ * A user-declared variable that is allocated on the stack. This includes all parameters and
+ * non-static local variables.
+ */
+class IRAutomaticUserVariable extends IRUserVariable, IRAutomaticVariable {
+ override Language::AutomaticVariable var;
+
+ final override Language::AutomaticVariable getVariable() { result = var }
+}
+
+/**
+ * A user-declared variable that is not allocated on the stack. This includes all global variables,
+ * namespace-scope variables, static fields, and static local variables.
+ */
+class IRStaticUserVariable extends IRUserVariable {
+ override Language::StaticVariable var;
+
+ IRStaticUserVariable() { not Language::isVariableAutomatic(var) }
+
+ final override Language::StaticVariable getVariable() { result = var }
+}
+
+/**
+ * A variable that is not user-declared. This includes temporary variables generated as part of IR
+ * construction, as well as string literals.
+ */
+class IRGeneratedVariable extends IRVariable {
+ Language::AST ast;
+ Language::LanguageType type;
+
+ IRGeneratedVariable() {
+ this = TIRTempVariable(func, ast, _, type) or
+ this = TIRStringLiteral(func, ast, type, _) or
+ this = TIRDynamicInitializationFlag(func, ast, type)
+ }
+
+ final override Language::LanguageType getLanguageType() { result = type }
+
+ final override Language::AST getAST() { result = ast }
+
+ override string toString() { result = getBaseString() + getLocationString() }
+
+ override string getUniqueId() { none() }
+
+ /**
+ * INTERNAL: Do not use.
+ *
+ * Gets a string containing the source code location of the AST that generated this variable.
+ *
+ * This is used by debugging and printing code only.
+ */
+ final string getLocationString() {
+ result =
+ ast.getLocation().getStartLine().toString() + ":" +
+ ast.getLocation().getStartColumn().toString()
+ }
+
+ /**
+ * INTERNAL: Do not use.
+ *
+ * Gets the string that is combined with the location of the variable to generate the string
+ * representation of this variable.
+ *
+ * This is used by debugging and printing code only.
+ */
+ string getBaseString() { none() }
+}
+
+/**
+ * A temporary variable introduced by IR construction. The most common examples are the variable
+ * generated to hold the return value of a function, or the variable generated to hold the result of
+ * a condition operator (`a ? b : c`).
+ */
+class IRTempVariable extends IRGeneratedVariable, IRAutomaticVariable, TIRTempVariable {
+ TempVariableTag tag;
+
+ IRTempVariable() { this = TIRTempVariable(func, ast, tag, type) }
+
+ final override string getUniqueId() {
+ result = "Temp: " + Construction::getTempVariableUniqueId(this)
+ }
+
+ /**
+ * Gets the "tag" object that differentiates this temporary variable from other temporary
+ * variables generated for the same AST.
+ */
+ final TempVariableTag getTag() { result = tag }
+
+ override string getBaseString() { result = "#temp" }
+}
+
+/**
+ * A temporary variable generated to hold the return value of a function.
+ */
+class IRReturnVariable extends IRTempVariable {
+ IRReturnVariable() { tag = ReturnValueTempVar() }
+
+ final override string toString() { result = "#return" }
+}
+
+/**
+ * A temporary variable generated to hold the exception thrown by a `ThrowValue` instruction.
+ */
+class IRThrowVariable extends IRTempVariable {
+ IRThrowVariable() { tag = ThrowTempVar() }
+
+ final override string getBaseString() { result = "#throw" }
+}
+
+/**
+ * A temporary variable generated to hold the contents of all arguments passed to the `...` of a
+ * function that accepts a variable number of arguments.
+ */
+class IREllipsisVariable extends IRTempVariable, IRParameter {
+ IREllipsisVariable() { tag = EllipsisTempVar() }
+
+ final override string toString() { result = "#ellipsis" }
+
+ final override int getIndex() { result = func.getNumberOfParameters() }
+}
+
+/**
+ * A temporary variable generated to hold the `this` pointer.
+ */
+class IRThisVariable extends IRTempVariable, IRParameter {
+ IRThisVariable() { tag = ThisTempVar() }
+
+ final override string toString() { result = "#this" }
+
+ final override int getIndex() { result = -1 }
+}
+
+/**
+ * A variable generated to represent the contents of a string literal. This variable acts much like
+ * a read-only global variable.
+ */
+class IRStringLiteral extends IRGeneratedVariable, TIRStringLiteral {
+ Language::StringLiteral literal;
+
+ IRStringLiteral() { this = TIRStringLiteral(func, ast, type, literal) }
+
+ final override predicate isReadOnly() { any() }
+
+ final override string getUniqueId() {
+ result = "String: " + getLocationString() + "=" + Language::getStringLiteralText(literal)
+ }
+
+ final override string getBaseString() { result = "#string" }
+
+ /**
+ * Gets the AST of the string literal represented by this `IRStringLiteral`.
+ */
+ final Language::StringLiteral getLiteral() { result = literal }
+}
+
+/**
+ * A variable generated to track whether a specific non-stack variable has been initialized. This is
+ * used to model the runtime initialization of static local variables in C++, as well as static
+ * fields in C#.
+ */
+class IRDynamicInitializationFlag extends IRGeneratedVariable, TIRDynamicInitializationFlag {
+ Language::Variable var;
+
+ IRDynamicInitializationFlag() {
+ this = TIRDynamicInitializationFlag(func, var, type) and ast = var
+ }
+
+ final override string toString() { result = var.toString() + "#init" }
+
+ /**
+ * Gets variable whose initialization is guarded by this flag.
+ */
+ final Language::Variable getVariable() { result = var }
+
+ final override string getUniqueId() {
+ result = "Init: " + getVariable().toString() + " " + getVariable().getLocation().toString()
+ }
+
+ final override string getBaseString() { result = "#init:" + var.toString() + ":" }
+}
+
+/**
+ * An IR variable which acts like a function parameter, including positional parameters and the
+ * temporary variables generated for `this` and ellipsis parameters.
+ */
+class IRParameter extends IRAutomaticVariable {
+ IRParameter() {
+ this.(IRAutomaticUserVariable).getVariable() instanceof Language::Parameter
+ or
+ this = TIRTempVariable(_, _, ThisTempVar(), _)
+ or
+ this = TIRTempVariable(_, _, EllipsisTempVar(), _)
+ }
+
+ /**
+ * Gets the zero-based index of this parameter. The `this` parameter has index -1.
+ */
+ int getIndex() { none() }
+}
+
+/**
+ * An IR variable representing a positional parameter.
+ */
+class IRPositionalParameter extends IRParameter, IRAutomaticUserVariable {
+ final override int getIndex() { result = getVariable().(Language::Parameter).getIndex() }
+}
diff --git a/repo-tests/codeql/cpp/ql/lib/semmle/code/cpp/ir/implementation/raw/Instruction.qll b/repo-tests/codeql/cpp/ql/lib/semmle/code/cpp/ir/implementation/raw/Instruction.qll
new file mode 100644
index 00000000000..6f471d8a7e8
--- /dev/null
+++ b/repo-tests/codeql/cpp/ql/lib/semmle/code/cpp/ir/implementation/raw/Instruction.qll
@@ -0,0 +1,2184 @@
+/**
+ * Provides classes that represent the individual instructions in the IR for a function.
+ */
+
+private import internal.IRInternal
+import IRFunction
+import IRBlock
+import IRVariable
+import Operand
+private import internal.InstructionImports as Imports
+import Imports::EdgeKind
+import Imports::IRType
+import Imports::MemoryAccessKind
+import Imports::Opcode
+private import Imports::OperandTag
+
+/**
+ * Gets an `Instruction` that is contained in `IRFunction`, and has a location with the specified
+ * `File` and line number. Used for assigning register names when printing IR.
+ */
+private Instruction getAnInstructionAtLine(IRFunction irFunc, Language::File file, int line) {
+ exists(IRConfiguration::IRConfiguration config |
+ config.shouldEvaluateDebugStringsForFunction(irFunc.getFunction())
+ ) and
+ exists(Language::Location location |
+ irFunc = result.getEnclosingIRFunction() and
+ location = result.getLocation() and
+ file = location.getFile() and
+ line = location.getStartLine()
+ )
+}
+
+/**
+ * A single instruction in the IR.
+ */
+class Instruction extends Construction::TStageInstruction {
+ Instruction() {
+ // The base `TStageInstruction` type is a superset of the actual instructions appearing in this
+ // stage. This call lets the stage filter out the ones that are not reused from raw IR.
+ Construction::hasInstruction(this)
+ }
+
+ /** Gets a textual representation of this element. */
+ final string toString() { result = getOpcode().toString() + ": " + getAST().toString() }
+
+ /**
+ * Gets a string showing the result, opcode, and operands of the instruction, equivalent to what
+ * would be printed by PrintIR.ql. For example:
+ *
+ * `mu0_28(int) = Store r0_26, r0_27`
+ */
+ final string getDumpString() {
+ result = getResultString() + " = " + getOperationString() + " " + getOperandsString()
+ }
+
+ private predicate shouldGenerateDumpStrings() {
+ exists(IRConfiguration::IRConfiguration config |
+ config.shouldEvaluateDebugStringsForFunction(this.getEnclosingFunction())
+ )
+ }
+
+ /**
+ * Gets a string describing the operation of this instruction. This includes
+ * the opcode and the immediate value, if any. For example:
+ *
+ * VariableAddress[x]
+ */
+ final string getOperationString() {
+ shouldGenerateDumpStrings() and
+ if exists(getImmediateString())
+ then result = getOperationPrefix() + getOpcode().toString() + "[" + getImmediateString() + "]"
+ else result = getOperationPrefix() + getOpcode().toString()
+ }
+
+ /**
+ * Gets a string describing the immediate value of this instruction, if any.
+ */
+ string getImmediateString() { none() }
+
+ private string getOperationPrefix() {
+ shouldGenerateDumpStrings() and
+ if this instanceof SideEffectInstruction then result = "^" else result = ""
+ }
+
+ private string getResultPrefix() {
+ shouldGenerateDumpStrings() and
+ if getResultIRType() instanceof IRVoidType
+ then result = "v"
+ else
+ if hasMemoryResult()
+ then if isResultModeled() then result = "m" else result = "mu"
+ else result = "r"
+ }
+
+ /**
+ * Gets the zero-based index of this instruction within its block. This is
+ * used by debugging and printing code only.
+ */
+ int getDisplayIndexInBlock() {
+ shouldGenerateDumpStrings() and
+ exists(IRBlock block |
+ this = block.getInstruction(result)
+ or
+ this =
+ rank[-result - 1](PhiInstruction phiInstr |
+ phiInstr = block.getAPhiInstruction()
+ |
+ phiInstr order by phiInstr.getUniqueId()
+ )
+ )
+ }
+
+ private int getLineRank() {
+ shouldGenerateDumpStrings() and
+ this =
+ rank[result](Instruction instr |
+ instr =
+ getAnInstructionAtLine(getEnclosingIRFunction(), getLocation().getFile(),
+ getLocation().getStartLine())
+ |
+ instr order by instr.getBlock().getDisplayIndex(), instr.getDisplayIndexInBlock()
+ )
+ }
+
+ /**
+ * Gets a human-readable string that uniquely identifies this instruction
+ * within the function. This string is used to refer to this instruction when
+ * printing IR dumps.
+ *
+ * Example: `r1_1`
+ */
+ string getResultId() {
+ shouldGenerateDumpStrings() and
+ result = getResultPrefix() + getAST().getLocation().getStartLine() + "_" + getLineRank()
+ }
+
+ /**
+ * Gets a string describing the result of this instruction, suitable for
+ * display in IR dumps. This consists of the result ID plus the type of the
+ * result.
+ *
+ * Example: `r1_1(int*)`
+ */
+ final string getResultString() {
+ shouldGenerateDumpStrings() and
+ result = getResultId() + "(" + getResultLanguageType().getDumpString() + ")"
+ }
+
+ /**
+ * Gets a string describing the operands of this instruction, suitable for
+ * display in IR dumps.
+ *
+ * Example: `func:r3_4, this:r3_5`
+ */
+ string getOperandsString() {
+ shouldGenerateDumpStrings() and
+ result =
+ concat(Operand operand |
+ operand = getAnOperand()
+ |
+ operand.getDumpString(), ", " order by operand.getDumpSortOrder()
+ )
+ }
+
+ /**
+ * Gets a string identifier for this function that is unique among all
+ * instructions in the same function.
+ *
+ * This is used for sorting IR output for tests, and is likely to be
+ * inefficient for any other use.
+ */
+ final string getUniqueId() { result = Construction::getInstructionUniqueId(this) }
+
+ /**
+ * INTERNAL: Do not use.
+ *
+ * Gets two sort keys for this instruction - used to order instructions for printing
+ * in test outputs.
+ */
+ final predicate hasSortKeys(int key1, int key2) {
+ Construction::instructionHasSortKeys(this, key1, key2)
+ }
+
+ /**
+ * Gets the basic block that contains this instruction.
+ */
+ final IRBlock getBlock() { result.getAnInstruction() = this }
+
+ /**
+ * Gets the function that contains this instruction.
+ */
+ final Language::Function getEnclosingFunction() {
+ result = getEnclosingIRFunction().getFunction()
+ }
+
+ /**
+ * Gets the IRFunction object that contains the IR for this instruction.
+ */
+ final IRFunction getEnclosingIRFunction() {
+ result = Construction::getInstructionEnclosingIRFunction(this)
+ }
+
+ /**
+ * Gets the AST that caused this instruction to be generated.
+ */
+ final Language::AST getAST() { result = Construction::getInstructionAST(this) }
+
+ /**
+ * Gets the location of the source code for this instruction.
+ */
+ final Language::Location getLocation() { result = getAST().getLocation() }
+
+ /**
+ * Gets the `Expr` whose result is computed by this instruction, if any. The `Expr` may be a
+ * conversion.
+ */
+ final Language::Expr getConvertedResultExpression() {
+ result = Raw::getInstructionConvertedResultExpression(this)
+ }
+
+ /**
+ * Gets the unconverted form of the `Expr` whose result is computed by this instruction, if any.
+ */
+ final Language::Expr getUnconvertedResultExpression() {
+ result = Raw::getInstructionUnconvertedResultExpression(this)
+ }
+
+ /**
+ * Gets the language-specific type of the result produced by this instruction.
+ *
+ * Most consumers of the IR should use `getResultIRType()` instead. `getResultIRType()` uses a
+ * less complex, language-neutral type system in which all semantically equivalent types share the
+ * same `IRType` instance. For example, in C++, four different `Instruction`s might have three
+ * different values for `getResultLanguageType()`: `unsigned int`, `char32_t`, and `wchar_t`,
+ * whereas all four instructions would have the same value for `getResultIRType()`, `uint4`.
+ */
+ final Language::LanguageType getResultLanguageType() {
+ result = Construction::getInstructionResultType(this)
+ }
+
+ /**
+ * Gets the type of the result produced by this instruction. If the instruction does not produce
+ * a result, its result type will be `IRVoidType`.
+ */
+ cached
+ final IRType getResultIRType() { result = getResultLanguageType().getIRType() }
+
+ /**
+ * Gets the type of the result produced by this instruction. If the
+ * instruction does not produce a result, its result type will be `VoidType`.
+ *
+ * If `isGLValue()` holds, then the result type of this instruction should be
+ * thought of as "pointer to `getResultType()`".
+ */
+ final Language::Type getResultType() {
+ exists(Language::LanguageType resultType |
+ resultType = getResultLanguageType() and
+ (
+ resultType.hasUnspecifiedType(result, _)
+ or
+ not resultType.hasUnspecifiedType(_, _) and result instanceof Language::UnknownType
+ )
+ )
+ }
+
+ /**
+ * Holds if the result produced by this instruction is a glvalue. If this
+ * holds, the result of the instruction represents the address of a location,
+ * and the type of the location is given by `getResultType()`. If this does
+ * not hold, the result of the instruction represents a value whose type is
+ * given by `getResultType()`.
+ *
+ * For example, the statement `y = x;` generates the following IR:
+ * ```
+ * r1_0(glval: int) = VariableAddress[x]
+ * r1_1(int) = Load r1_0, mu0_1
+ * r1_2(glval: int) = VariableAddress[y]
+ * mu1_3(int) = Store r1_2, r1_1
+ * ```
+ *
+ * The result of each `VariableAddress` instruction is a glvalue of type
+ * `int`, representing the address of the corresponding integer variable. The
+ * result of the `Load` instruction is a prvalue of type `int`, representing
+ * the integer value loaded from variable `x`.
+ */
+ final predicate isGLValue() { getResultLanguageType().hasType(_, true) }
+
+ /**
+ * Gets the size of the result produced by this instruction, in bytes. If the
+ * result does not have a known constant size, this predicate does not hold.
+ *
+ * If `this.isGLValue()` holds for this instruction, the value of
+ * `getResultSize()` will always be the size of a pointer.
+ */
+ final int getResultSize() { result = getResultLanguageType().getByteSize() }
+
+ /**
+ * Gets the opcode that specifies the operation performed by this instruction.
+ */
+ pragma[inline]
+ final Opcode getOpcode() { Construction::getInstructionOpcode(result, this) }
+
+ /**
+ * Gets all direct uses of the result of this instruction. The result can be
+ * an `Operand` for which `isDefinitionInexact` holds.
+ */
+ final Operand getAUse() { result.getAnyDef() = this }
+
+ /**
+ * Gets all of this instruction's operands.
+ */
+ final Operand getAnOperand() { result.getUse() = this }
+
+ /**
+ * Holds if this instruction produces a memory result.
+ */
+ final predicate hasMemoryResult() { exists(getResultMemoryAccess()) }
+
+ /**
+ * Gets the kind of memory access performed by this instruction's result.
+ * Holds only for instructions with a memory result.
+ */
+ pragma[inline]
+ final MemoryAccessKind getResultMemoryAccess() { result = getOpcode().getWriteMemoryAccess() }
+
+ /**
+ * Holds if the memory access performed by this instruction's result will not always write to
+ * every bit in the memory location. This is most commonly used for memory accesses that may or
+ * may not actually occur depending on runtime state (for example, the write side effect of an
+ * output parameter that is not written to on all paths), or for accesses where the memory
+ * location is a conservative estimate of the memory that might actually be accessed at runtime
+ * (for example, the global side effects of a function call).
+ */
+ pragma[inline]
+ final predicate hasResultMayMemoryAccess() { getOpcode().hasMayWriteMemoryAccess() }
+
+ /**
+ * Gets the operand that holds the memory address to which this instruction stores its
+ * result, if any. For example, in `m3 = Store r1, r2`, the result of `getResultAddressOperand()`
+ * is `r1`.
+ */
+ final AddressOperand getResultAddressOperand() {
+ getResultMemoryAccess().usesAddressOperand() and
+ result.getUse() = this
+ }
+
+ /**
+ * Gets the instruction that holds the exact memory address to which this instruction stores its
+ * result, if any. For example, in `m3 = Store r1, r2`, the result of `getResultAddressOperand()`
+ * is the instruction that defines `r1`.
+ */
+ final Instruction getResultAddress() { result = getResultAddressOperand().getDef() }
+
+ /**
+ * Holds if the result of this instruction is precisely modeled in SSA. Always
+ * holds for a register result. For a memory result, a modeled result is
+ * connected to its actual uses. An unmodeled result has no uses.
+ *
+ * For example:
+ * ```
+ * int x = 1;
+ * int *p = &x;
+ * int y = *p;
+ * ```
+ * In non-aliased SSA, `x` will not be modeled because it has its address
+ * taken. In that case, `isResultModeled()` would not hold for the result of
+ * the `Store` to `x`.
+ */
+ final predicate isResultModeled() {
+ // Register results are always in SSA form.
+ not hasMemoryResult() or
+ Construction::hasModeledMemoryResult(this)
+ }
+
+ /**
+ * Holds if this is an instruction with a memory result that represents a
+ * conflation of more than one memory allocation.
+ *
+ * This happens in practice when dereferencing a pointer that cannot be
+ * tracked back to a single local allocation. Such memory is instead modeled
+ * as originating on the `AliasedDefinitionInstruction` at the entry of the
+ * function.
+ */
+ final predicate isResultConflated() { Construction::hasConflatedMemoryResult(this) }
+
+ /**
+ * Gets the successor of this instruction along the control flow edge
+ * specified by `kind`.
+ */
+ final Instruction getSuccessor(EdgeKind kind) {
+ result = Construction::getInstructionSuccessor(this, kind)
+ }
+
+ /**
+ * Gets the a _back-edge successor_ of this instruction along the control
+ * flow edge specified by `kind`. A back edge in the control-flow graph is
+ * intuitively the edge that goes back around a loop. If all back edges are
+ * removed from the control-flow graph, it becomes acyclic.
+ */
+ final Instruction getBackEdgeSuccessor(EdgeKind kind) {
+ // We don't take these edges from
+ // `Construction::getInstructionBackEdgeSuccessor` since that relation has
+ // not been treated to remove any loops that might be left over due to
+ // flaws in the IR construction or back-edge detection.
+ exists(IRBlock block |
+ block = this.getBlock() and
+ this = block.getLastInstruction() and
+ result = block.getBackEdgeSuccessor(kind).getFirstInstruction()
+ )
+ }
+
+ /**
+ * Gets all direct successors of this instruction.
+ */
+ final Instruction getASuccessor() { result = getSuccessor(_) }
+
+ /**
+ * Gets a predecessor of this instruction such that the predecessor reaches
+ * this instruction along the control flow edge specified by `kind`.
+ */
+ final Instruction getPredecessor(EdgeKind kind) { result.getSuccessor(kind) = this }
+
+ /**
+ * Gets all direct predecessors of this instruction.
+ */
+ final Instruction getAPredecessor() { result = getPredecessor(_) }
+}
+
+/**
+ * An instruction that refers to a variable.
+ *
+ * This class is used for any instruction whose operation fundamentally depends on a specific
+ * variable. For example, it is used for `VariableAddress`, which returns the address of a specific
+ * variable, and `InitializeParameter`, which returns the value that was passed to the specified
+ * parameter by the caller. `VariableInstruction` is not used for `Load` or `Store` instructions
+ * that happen to load from or store to a particular variable; in those cases, the memory location
+ * being accessed is specified by the `AddressOperand` on the instruction, which may or may not be
+ * defined by the result of a `VariableAddress` instruction.
+ */
+class VariableInstruction extends Instruction {
+ IRVariable var;
+
+ VariableInstruction() { var = Raw::getInstructionVariable(this) }
+
+ override string getImmediateString() { result = var.toString() }
+
+ /**
+ * Gets the variable that this instruction references.
+ */
+ final IRVariable getIRVariable() { result = var }
+
+ /**
+ * Gets the AST variable that this instruction's IR variable refers to, if one exists.
+ */
+ final Language::Variable getASTVariable() { result = var.(IRUserVariable).getVariable() }
+}
+
+/**
+ * An instruction that refers to a field of a class, struct, or union.
+ *
+ * This class is used for any instruction whose operation fundamentally depends on a specific
+ * field. For example, it is used for `FieldAddress`, which computes the address of a specific
+ * field on an object. `FieldInstruction` is not used for `Load` or `Store` instructions that happen
+ * to load from or store to a particular field; in those cases, the memory location being accessed
+ * is specified by the `AddressOperand` on the instruction, which may or may not be defined by the
+ * result of a `FieldAddress` instruction.
+ */
+class FieldInstruction extends Instruction {
+ Language::Field field;
+
+ FieldInstruction() { field = Raw::getInstructionField(this) }
+
+ final override string getImmediateString() { result = field.toString() }
+
+ /**
+ * Gets the field that this instruction references.
+ */
+ final Language::Field getField() { result = field }
+}
+
+/**
+ * An instruction that refers to a function.
+ *
+ * This class is used for any instruction whose operation fundamentally depends on a specific
+ * function. For example, it is used for `FunctionAddress`, which returns the address of a specific
+ * function. `FunctionInstruction` is not used for `Call` instructions that happen to call a
+ * particular function; in that case, the function being called is specified by the
+ * `CallTargetOperand` on the instruction, which may or may not be defined by the result of a
+ * `FunctionAddress` instruction.
+ */
+class FunctionInstruction extends Instruction {
+ Language::Function funcSymbol;
+
+ FunctionInstruction() { funcSymbol = Raw::getInstructionFunction(this) }
+
+ final override string getImmediateString() { result = funcSymbol.toString() }
+
+ /**
+ * Gets the function that this instruction references.
+ */
+ final Language::Function getFunctionSymbol() { result = funcSymbol }
+}
+
+/**
+ * An instruction whose result is a compile-time constant value.
+ */
+class ConstantValueInstruction extends Instruction {
+ string value;
+
+ ConstantValueInstruction() { value = Raw::getInstructionConstantValue(this) }
+
+ final override string getImmediateString() { result = value }
+
+ /**
+ * Gets the constant value of this instruction's result.
+ */
+ final string getValue() { result = value }
+}
+
+/**
+ * An instruction that refers to an argument of a `Call` instruction.
+ *
+ * This instruction is used for side effects of a `Call` instruction that read or write memory
+ * pointed to by one of the arguments of the call.
+ */
+class IndexedInstruction extends Instruction {
+ int index;
+
+ IndexedInstruction() { index = Raw::getInstructionIndex(this) }
+
+ final override string getImmediateString() { result = index.toString() }
+
+ /**
+ * Gets the zero-based index of the argument that this instruction references.
+ */
+ final int getIndex() { result = index }
+}
+
+/**
+ * An instruction representing the entry point to a function.
+ *
+ * Each `IRFunction` has exactly one `EnterFunction` instruction. Execution of the function begins
+ * at this instruction. This instruction has no predecessors.
+ */
+class EnterFunctionInstruction extends Instruction {
+ EnterFunctionInstruction() { getOpcode() instanceof Opcode::EnterFunction }
+}
+
+/**
+ * An instruction that returns the address of a variable.
+ *
+ * This instruction returns the address of a local variable, parameter, static field,
+ * namespace-scope variable, or global variable. For the address of a non-static field of a class,
+ * struct, or union, see `FieldAddressInstruction`.
+ */
+class VariableAddressInstruction extends VariableInstruction {
+ VariableAddressInstruction() { getOpcode() instanceof Opcode::VariableAddress }
+}
+
+/**
+ * An instruction that returns the address of a function.
+ *
+ * This instruction returns the address of a function, including non-member functions, static member
+ * functions, and non-static member functions.
+ *
+ * The result has an `IRFunctionAddress` type.
+ */
+class FunctionAddressInstruction extends FunctionInstruction {
+ FunctionAddressInstruction() { getOpcode() instanceof Opcode::FunctionAddress }
+}
+
+/**
+ * An instruction that initializes a parameter of the enclosing function with the value of the
+ * corresponding argument passed by the caller.
+ *
+ * Each parameter of a function will have exactly one `InitializeParameter` instruction that
+ * initializes that parameter.
+ */
+class InitializeParameterInstruction extends VariableInstruction {
+ InitializeParameterInstruction() { getOpcode() instanceof Opcode::InitializeParameter }
+
+ /**
+ * Gets the parameter initialized by this instruction.
+ */
+ final Language::Parameter getParameter() { result = var.(IRUserVariable).getVariable() }
+
+ /**
+ * Holds if this instruction initializes the parameter with index `index`, or
+ * if `index` is `-1` and this instruction initializes `this`.
+ */
+ pragma[noinline]
+ final predicate hasIndex(int index) {
+ index >= 0 and index = this.getParameter().getIndex()
+ or
+ index = -1 and this.getIRVariable() instanceof IRThisVariable
+ }
+}
+
+/**
+ * An instruction that initializes all memory that existed before this function was called.
+ *
+ * This instruction provides a definition for memory that, because it was actually allocated and
+ * initialized elsewhere, would not otherwise have a definition in this function.
+ */
+class InitializeNonLocalInstruction extends Instruction {
+ InitializeNonLocalInstruction() { getOpcode() instanceof Opcode::InitializeNonLocal }
+}
+
+/**
+ * An instruction that initializes the memory pointed to by a parameter of the enclosing function
+ * with the value of that memory on entry to the function.
+ */
+class InitializeIndirectionInstruction extends VariableInstruction {
+ InitializeIndirectionInstruction() { getOpcode() instanceof Opcode::InitializeIndirection }
+
+ /**
+ * Gets the parameter initialized by this instruction.
+ */
+ final Language::Parameter getParameter() { result = var.(IRUserVariable).getVariable() }
+
+ /**
+ * Holds if this instruction initializes the memory pointed to by the parameter with
+ * index `index`, or if `index` is `-1` and this instruction initializes the memory
+ * pointed to by `this`.
+ */
+ pragma[noinline]
+ final predicate hasIndex(int index) {
+ index >= 0 and index = this.getParameter().getIndex()
+ or
+ index = -1 and this.getIRVariable() instanceof IRThisVariable
+ }
+}
+
+/**
+ * An instruction that initializes the `this` pointer parameter of the enclosing function.
+ */
+class InitializeThisInstruction extends Instruction {
+ InitializeThisInstruction() { getOpcode() instanceof Opcode::InitializeThis }
+}
+
+/**
+ * An instruction that computes the address of a non-static field of an object.
+ */
+class FieldAddressInstruction extends FieldInstruction {
+ FieldAddressInstruction() { getOpcode() instanceof Opcode::FieldAddress }
+
+ /**
+ * Gets the operand that provides the address of the object containing the field.
+ */
+ final UnaryOperand getObjectAddressOperand() { result = getAnOperand() }
+
+ /**
+ * Gets the instruction whose result provides the address of the object containing the field.
+ */
+ final Instruction getObjectAddress() { result = getObjectAddressOperand().getDef() }
+}
+
+/**
+ * An instruction that computes the address of the first element of a managed array.
+ *
+ * This instruction is used for element access to C# arrays.
+ */
+class ElementsAddressInstruction extends UnaryInstruction {
+ ElementsAddressInstruction() { getOpcode() instanceof Opcode::ElementsAddress }
+
+ /**
+ * Gets the operand that provides the address of the array object.
+ */
+ final UnaryOperand getArrayObjectAddressOperand() { result = getAnOperand() }
+
+ /**
+ * Gets the instruction whose result provides the address of the array object.
+ */
+ final Instruction getArrayObjectAddress() { result = getArrayObjectAddressOperand().getDef() }
+}
+
+/**
+ * An instruction that produces a well-defined but unknown result and has
+ * unknown side effects, including side effects that are not conservatively
+ * modeled in the SSA graph.
+ *
+ * This type of instruction appears when there is an `ErrorExpr` in the AST,
+ * meaning that the extractor could not understand the expression and therefore
+ * produced a partial AST. Queries that give alerts when some action is _not_
+ * taken may want to ignore any function that contains an `ErrorInstruction`.
+ */
+class ErrorInstruction extends Instruction {
+ ErrorInstruction() { getOpcode() instanceof Opcode::Error }
+}
+
+/**
+ * An instruction that returns an uninitialized value.
+ *
+ * This instruction is used to provide an initial definition for a stack variable that does not have
+ * an initializer, or whose initializer only partially initializes the variable.
+ */
+class UninitializedInstruction extends VariableInstruction {
+ UninitializedInstruction() { getOpcode() instanceof Opcode::Uninitialized }
+
+ /**
+ * Gets the variable that is uninitialized.
+ */
+ final Language::Variable getLocalVariable() { result = var.(IRUserVariable).getVariable() }
+}
+
+/**
+ * An instruction that has no effect.
+ *
+ * This instruction is typically inserted to ensure that a particular AST is associated with at
+ * least one instruction, even when the AST has no semantic effect.
+ */
+class NoOpInstruction extends Instruction {
+ NoOpInstruction() { getOpcode() instanceof Opcode::NoOp }
+}
+
+/**
+ * An instruction that returns control to the caller of the function.
+ *
+ * This instruction represents the normal (non-exception) return from a function, either from an
+ * explicit `return` statement or from control flow reaching the end of the function's body.
+ *
+ * Each function has exactly one `ReturnInstruction`. Each `return` statement in a function is
+ * represented as an initialization of the temporary variable that holds the return value, with
+ * control then flowing to the common `ReturnInstruction` for that function. Exception: A function
+ * that never returns will not have a `ReturnInstruction`.
+ *
+ * The `ReturnInstruction` for a function will have a control-flow successor edge to a block
+ * containing the `ExitFunction` instruction for that function.
+ *
+ * There are two differet return instructions: `ReturnValueInstruction`, for returning a value from
+ * a non-`void`-returning function, and `ReturnVoidInstruction`, for returning from a
+ * `void`-returning function.
+ */
+class ReturnInstruction extends Instruction {
+ ReturnInstruction() { getOpcode() instanceof ReturnOpcode }
+}
+
+/**
+ * An instruction that returns control to the caller of the function, without returning a value.
+ */
+class ReturnVoidInstruction extends ReturnInstruction {
+ ReturnVoidInstruction() { getOpcode() instanceof Opcode::ReturnVoid }
+}
+
+/**
+ * An instruction that returns control to the caller of the function, including a return value.
+ */
+class ReturnValueInstruction extends ReturnInstruction {
+ ReturnValueInstruction() { getOpcode() instanceof Opcode::ReturnValue }
+
+ /**
+ * Gets the operand that provides the value being returned by the function.
+ */
+ final LoadOperand getReturnValueOperand() { result = getAnOperand() }
+
+ /**
+ * Gets the instruction whose result provides the value being returned by the function, if an
+ * exact definition is available.
+ */
+ final Instruction getReturnValue() { result = getReturnValueOperand().getDef() }
+}
+
+/**
+ * An instruction that represents the use of the value pointed to by a parameter of the function
+ * after the function returns control to its caller.
+ *
+ * This instruction does not itself return control to the caller. It merely represents the potential
+ * for a caller to use the memory pointed to by the parameter sometime after the call returns. This
+ * is the counterpart to the `InitializeIndirection` instruction, which represents the possibility
+ * that the caller initialized the memory pointed to by the parameter before the call.
+ */
+class ReturnIndirectionInstruction extends VariableInstruction {
+ ReturnIndirectionInstruction() { getOpcode() instanceof Opcode::ReturnIndirection }
+
+ /**
+ * Gets the operand that provides the value of the pointed-to memory.
+ */
+ final SideEffectOperand getSideEffectOperand() { result = getAnOperand() }
+
+ /**
+ * Gets the instruction whose result provides the value of the pointed-to memory, if an exact
+ * definition is available.
+ */
+ final Instruction getSideEffect() { result = getSideEffectOperand().getDef() }
+
+ /**
+ * Gets the operand that provides the address of the pointed-to memory.
+ */
+ final AddressOperand getSourceAddressOperand() { result = getAnOperand() }
+
+ /**
+ * Gets the instruction whose result provides the address of the pointed-to memory.
+ */
+ final Instruction getSourceAddress() { result = getSourceAddressOperand().getDef() }
+
+ /**
+ * Gets the parameter for which this instruction reads the final pointed-to value within the
+ * function.
+ */
+ final Language::Parameter getParameter() { result = var.(IRUserVariable).getVariable() }
+
+ /**
+ * Holds if this instruction is the return indirection for `this`.
+ */
+ final predicate isThisIndirection() { var instanceof IRThisVariable }
+
+ /**
+ * Holds if this instruction is the return indirection for the parameter with index `index`, or
+ * if this instruction is the return indirection for `this` and `index` is `-1`.
+ */
+ pragma[noinline]
+ final predicate hasIndex(int index) {
+ index >= 0 and index = this.getParameter().getIndex()
+ or
+ index = -1 and this.isThisIndirection()
+ }
+}
+
+/**
+ * An instruction that returns a copy of its operand.
+ *
+ * There are several different copy instructions, depending on the source and destination of the
+ * copy operation:
+ * - `CopyValueInstruction` - Copies a register operand to a register result.
+ * - `LoadInstruction` - Copies a memory operand to a register result.
+ * - `StoreInstruction` - Copies a register operand to a memory result.
+ */
+class CopyInstruction extends Instruction {
+ CopyInstruction() { getOpcode() instanceof CopyOpcode }
+
+ /**
+ * Gets the operand that provides the input value of the copy.
+ */
+ Operand getSourceValueOperand() { none() }
+
+ /**
+ * Gets the instruction whose result provides the input value of the copy, if an exact definition
+ * is available.
+ */
+ final Instruction getSourceValue() { result = getSourceValueOperand().getDef() }
+}
+
+/**
+ * An instruction that returns a register result containing a copy of its register operand.
+ */
+class CopyValueInstruction extends CopyInstruction, UnaryInstruction {
+ CopyValueInstruction() { getOpcode() instanceof Opcode::CopyValue }
+
+ final override UnaryOperand getSourceValueOperand() { result = getAnOperand() }
+}
+
+/**
+ * Gets a string describing the location pointed to by the specified address operand.
+ */
+private string getAddressOperandDescription(AddressOperand operand) {
+ result = operand.getDef().(VariableAddressInstruction).getIRVariable().toString()
+ or
+ not operand.getDef() instanceof VariableAddressInstruction and
+ result = "?"
+}
+
+/**
+ * An instruction that returns a register result containing a copy of its memory operand.
+ */
+class LoadInstruction extends CopyInstruction {
+ LoadInstruction() { getOpcode() instanceof Opcode::Load }
+
+ final override string getImmediateString() {
+ result = getAddressOperandDescription(getSourceAddressOperand())
+ }
+
+ /**
+ * Gets the operand that provides the address of the value being loaded.
+ */
+ final AddressOperand getSourceAddressOperand() { result = getAnOperand() }
+
+ /**
+ * Gets the instruction whose result provides the address of the value being loaded.
+ */
+ final Instruction getSourceAddress() { result = getSourceAddressOperand().getDef() }
+
+ final override LoadOperand getSourceValueOperand() { result = getAnOperand() }
+}
+
+/**
+ * An instruction that returns a memory result containing a copy of its register operand.
+ */
+class StoreInstruction extends CopyInstruction {
+ StoreInstruction() { getOpcode() instanceof Opcode::Store }
+
+ final override string getImmediateString() {
+ result = getAddressOperandDescription(getDestinationAddressOperand())
+ }
+
+ /**
+ * Gets the operand that provides the address of the location to which the value will be stored.
+ */
+ final AddressOperand getDestinationAddressOperand() { result = getAnOperand() }
+
+ /**
+ * Gets the instruction whose result provides the address of the location to which the value will
+ * be stored, if an exact definition is available.
+ */
+ final Instruction getDestinationAddress() { result = getDestinationAddressOperand().getDef() }
+
+ final override StoreValueOperand getSourceValueOperand() { result = getAnOperand() }
+}
+
+/**
+ * An instruction that branches to one of two successor instructions based on the value of a Boolean
+ * operand.
+ */
+class ConditionalBranchInstruction extends Instruction {
+ ConditionalBranchInstruction() { getOpcode() instanceof Opcode::ConditionalBranch }
+
+ /**
+ * Gets the operand that provides the Boolean condition controlling the branch.
+ */
+ final ConditionOperand getConditionOperand() { result = getAnOperand() }
+
+ /**
+ * Gets the instruction whose result provides the Boolean condition controlling the branch.
+ */
+ final Instruction getCondition() { result = getConditionOperand().getDef() }
+
+ /**
+ * Gets the instruction to which control will flow if the condition is true.
+ */
+ final Instruction getTrueSuccessor() { result = getSuccessor(EdgeKind::trueEdge()) }
+
+ /**
+ * Gets the instruction to which control will flow if the condition is false.
+ */
+ final Instruction getFalseSuccessor() { result = getSuccessor(EdgeKind::falseEdge()) }
+}
+
+/**
+ * An instruction representing the exit point of a function.
+ *
+ * Each `IRFunction` has exactly one `ExitFunction` instruction, unless the function neither returns
+ * nor throws an exception. Control flows to the `ExitFunction` instruction from both normal returns
+ * (`ReturnVoid`, `ReturnValue`) and propagated exceptions (`Unwind`). This instruction has no
+ * successors.
+ */
+class ExitFunctionInstruction extends Instruction {
+ ExitFunctionInstruction() { getOpcode() instanceof Opcode::ExitFunction }
+}
+
+/**
+ * An instruction whose result is a constant value.
+ */
+class ConstantInstruction extends ConstantValueInstruction {
+ ConstantInstruction() { getOpcode() instanceof Opcode::Constant }
+}
+
+/**
+ * An instruction whose result is a constant value of integer or Boolean type.
+ */
+class IntegerConstantInstruction extends ConstantInstruction {
+ IntegerConstantInstruction() {
+ exists(IRType resultType |
+ resultType = getResultIRType() and
+ (resultType instanceof IRIntegerType or resultType instanceof IRBooleanType)
+ )
+ }
+}
+
+/**
+ * An instruction whose result is a constant value of floating-point type.
+ */
+class FloatConstantInstruction extends ConstantInstruction {
+ FloatConstantInstruction() { getResultIRType() instanceof IRFloatingPointType }
+}
+
+/**
+ * An instruction whose result is the address of a string literal.
+ */
+class StringConstantInstruction extends VariableInstruction {
+ override IRStringLiteral var;
+
+ final override string getImmediateString() { result = Language::getStringLiteralText(getValue()) }
+
+ /**
+ * Gets the string literal whose address is returned by this instruction.
+ */
+ final Language::StringLiteral getValue() { result = var.getLiteral() }
+}
+
+/**
+ * An instruction whose result is computed from two operands.
+ */
+class BinaryInstruction extends Instruction {
+ BinaryInstruction() { getOpcode() instanceof BinaryOpcode }
+
+ /**
+ * Gets the left operand of this binary instruction.
+ */
+ final LeftOperand getLeftOperand() { result = getAnOperand() }
+
+ /**
+ * Gets the right operand of this binary instruction.
+ */
+ final RightOperand getRightOperand() { result = getAnOperand() }
+
+ /**
+ * Gets the instruction whose result provides the value of the left operand of this binary
+ * instruction.
+ */
+ final Instruction getLeft() { result = getLeftOperand().getDef() }
+
+ /**
+ * Gets the instruction whose result provides the value of the right operand of this binary
+ * instruction.
+ */
+ final Instruction getRight() { result = getRightOperand().getDef() }
+
+ /**
+ * Holds if this instruction's operands are `op1` and `op2`, in either order.
+ */
+ final predicate hasOperands(Operand op1, Operand op2) {
+ op1 = getLeftOperand() and op2 = getRightOperand()
+ or
+ op1 = getRightOperand() and op2 = getLeftOperand()
+ }
+}
+
+/**
+ * An instruction that computes the result of an arithmetic operation.
+ */
+class ArithmeticInstruction extends Instruction {
+ ArithmeticInstruction() { getOpcode() instanceof ArithmeticOpcode }
+}
+
+/**
+ * An instruction that performs an arithmetic operation on two numeric operands.
+ */
+class BinaryArithmeticInstruction extends ArithmeticInstruction, BinaryInstruction { }
+
+/**
+ * An instruction whose result is computed by performing an arithmetic operation on a single
+ * numeric operand.
+ */
+class UnaryArithmeticInstruction extends ArithmeticInstruction, UnaryInstruction { }
+
+/**
+ * An instruction that computes the sum of two numeric operands.
+ *
+ * Both operands must have the same numeric type, which will also be the result type. The result of
+ * integer overflow is the infinite-precision result modulo 2^n. Floating-point addition is
+ * performed according to IEEE-754.
+ */
+class AddInstruction extends BinaryArithmeticInstruction {
+ AddInstruction() { getOpcode() instanceof Opcode::Add }
+}
+
+/**
+ * An instruction that computes the difference of two numeric operands.
+ *
+ * Both operands must have the same numeric type, which will also be the result type. The result of
+ * integer overflow is the infinite-precision result modulo 2^n. Floating-point subtraction is performed
+ * according to IEEE-754.
+ */
+class SubInstruction extends BinaryArithmeticInstruction {
+ SubInstruction() { getOpcode() instanceof Opcode::Sub }
+}
+
+/**
+ * An instruction that computes the product of two numeric operands.
+ *
+ * Both operands must have the same numeric type, which will also be the result type. The result of
+ * integer overflow is the infinite-precision result modulo 2^n. Floating-point multiplication is
+ * performed according to IEEE-754.
+ */
+class MulInstruction extends BinaryArithmeticInstruction {
+ MulInstruction() { getOpcode() instanceof Opcode::Mul }
+}
+
+/**
+ * An instruction that computes the quotient of two numeric operands.
+ *
+ * Both operands must have the same numeric type, which will also be the result type. The result of
+ * division by zero or integer overflow is undefined. Floating-point division is performed according
+ * to IEEE-754.
+ */
+class DivInstruction extends BinaryArithmeticInstruction {
+ DivInstruction() { getOpcode() instanceof Opcode::Div }
+}
+
+/**
+ * An instruction that computes the remainder of two integer operands.
+ *
+ * Both operands must have the same integer type, which will also be the result type. The result of
+ * division by zero or integer overflow is undefined.
+ */
+class RemInstruction extends BinaryArithmeticInstruction {
+ RemInstruction() { getOpcode() instanceof Opcode::Rem }
+}
+
+/**
+ * An instruction that negates a single numeric operand.
+ *
+ * The operand must have a numeric type, which will also be the result type. The result of integer
+ * negation uses two's complement, and is computed modulo 2^n. The result of floating-point negation
+ * is performed according to IEEE-754.
+ */
+class NegateInstruction extends UnaryArithmeticInstruction {
+ NegateInstruction() { getOpcode() instanceof Opcode::Negate }
+}
+
+/**
+ * An instruction that computes the result of a bitwise operation.
+ */
+class BitwiseInstruction extends Instruction {
+ BitwiseInstruction() { getOpcode() instanceof BitwiseOpcode }
+}
+
+/**
+ * An instruction that performs a bitwise operation on two integer operands.
+ */
+class BinaryBitwiseInstruction extends BitwiseInstruction, BinaryInstruction { }
+
+/**
+ * An instruction that performs a bitwise operation on a single integer operand.
+ */
+class UnaryBitwiseInstruction extends BitwiseInstruction, UnaryInstruction { }
+
+/**
+ * An instruction that computes the bitwise "and" of two integer operands.
+ *
+ * Both operands must have the same integer type, which will also be the result type.
+ */
+class BitAndInstruction extends BinaryBitwiseInstruction {
+ BitAndInstruction() { getOpcode() instanceof Opcode::BitAnd }
+}
+
+/**
+ * An instruction that computes the bitwise "or" of two integer operands.
+ *
+ * Both operands must have the same integer type, which will also be the result type.
+ */
+class BitOrInstruction extends BinaryBitwiseInstruction {
+ BitOrInstruction() { getOpcode() instanceof Opcode::BitOr }
+}
+
+/**
+ * An instruction that computes the bitwise "xor" of two integer operands.
+ *
+ * Both operands must have the same integer type, which will also be the result type.
+ */
+class BitXorInstruction extends BinaryBitwiseInstruction {
+ BitXorInstruction() { getOpcode() instanceof Opcode::BitXor }
+}
+
+/**
+ * An instruction that shifts its left operand to the left by the number of bits specified by its
+ * right operand.
+ *
+ * Both operands must have an integer type. The result has the same type as the left operand. The
+ * rightmost bits are zero-filled.
+ */
+class ShiftLeftInstruction extends BinaryBitwiseInstruction {
+ ShiftLeftInstruction() { getOpcode() instanceof Opcode::ShiftLeft }
+}
+
+/**
+ * An instruction that shifts its left operand to the right by the number of bits specified by its
+ * right operand.
+ *
+ * Both operands must have an integer type. The result has the same type as the left operand. If the
+ * left operand has an unsigned integer type, the leftmost bits are zero-filled. If the left operand
+ * has a signed integer type, the leftmost bits are filled by duplicating the most significant bit
+ * of the left operand.
+ */
+class ShiftRightInstruction extends BinaryBitwiseInstruction {
+ ShiftRightInstruction() { getOpcode() instanceof Opcode::ShiftRight }
+}
+
+/**
+ * An instruction that performs a binary arithmetic operation involving at least one pointer
+ * operand.
+ */
+class PointerArithmeticInstruction extends BinaryInstruction {
+ int elementSize;
+
+ PointerArithmeticInstruction() {
+ getOpcode() instanceof PointerArithmeticOpcode and
+ elementSize = Raw::getInstructionElementSize(this)
+ }
+
+ final override string getImmediateString() { result = elementSize.toString() }
+
+ /**
+ * Gets the size of the elements pointed to by the pointer operands, in bytes.
+ *
+ * When adding an integer offset to a pointer (`PointerAddInstruction`) or subtracting an integer
+ * offset from a pointer (`PointerSubInstruction`), the integer offset is multiplied by the
+ * element size to compute the actual number of bytes added to or subtracted from the pointer
+ * address. When computing the integer difference between two pointers (`PointerDiffInstruction`),
+ * the result is computed by computing the difference between the two pointer byte addresses, then
+ * dividing that byte count by the element size.
+ */
+ final int getElementSize() { result = elementSize }
+}
+
+/**
+ * An instruction that adds or subtracts an integer offset from a pointer.
+ */
+class PointerOffsetInstruction extends PointerArithmeticInstruction {
+ PointerOffsetInstruction() { getOpcode() instanceof PointerOffsetOpcode }
+}
+
+/**
+ * An instruction that adds an integer offset to a pointer.
+ *
+ * The result is the byte address computed by adding the value of the right (integer) operand,
+ * multiplied by the element size, to the value of the left (pointer) operand. The result of pointer
+ * overflow is undefined.
+ */
+class PointerAddInstruction extends PointerOffsetInstruction {
+ PointerAddInstruction() { getOpcode() instanceof Opcode::PointerAdd }
+}
+
+/**
+ * An instruction that subtracts an integer offset from a pointer.
+ *
+ * The result is the byte address computed by subtracting the value of the right (integer) operand,
+ * multiplied by the element size, from the value of the left (pointer) operand. The result of
+ * pointer underflow is undefined.
+ */
+class PointerSubInstruction extends PointerOffsetInstruction {
+ PointerSubInstruction() { getOpcode() instanceof Opcode::PointerSub }
+}
+
+/**
+ * An instruction that computes the difference between two pointers.
+ *
+ * Both operands must have the same pointer type. The result must have an integer type whose size is
+ * the same as that of the pointer operands. The result is computed by subtracting the byte address
+ * in the right operand from the byte address in the left operand, and dividing by the element size.
+ * If the difference in byte addresses is not divisible by the element size, the result is
+ * undefined.
+ */
+class PointerDiffInstruction extends PointerArithmeticInstruction {
+ PointerDiffInstruction() { getOpcode() instanceof Opcode::PointerDiff }
+}
+
+/**
+ * An instruction whose result is computed from a single operand.
+ */
+class UnaryInstruction extends Instruction {
+ UnaryInstruction() { getOpcode() instanceof UnaryOpcode }
+
+ /**
+ * Gets the sole operand of this instruction.
+ */
+ final UnaryOperand getUnaryOperand() { result = getAnOperand() }
+
+ /**
+ * Gets the instruction whose result provides the sole operand of this instruction.
+ */
+ final Instruction getUnary() { result = getUnaryOperand().getDef() }
+}
+
+/**
+ * An instruction that converts the value of its operand to a value of a different type.
+ */
+class ConvertInstruction extends UnaryInstruction {
+ ConvertInstruction() { getOpcode() instanceof Opcode::Convert }
+}
+
+/**
+ * An instruction that converts the address of a polymorphic object to the address of a different
+ * subobject of the same polymorphic object, returning a null address if the dynamic type of the
+ * object is not compatible with the result type.
+ *
+ * If the operand holds a null address, the result is a null address.
+ *
+ * This instruction is used to represent a C++ `dynamic_cast<>` to a pointer type, or a C# `is` or
+ * `as` expression.
+ */
+class CheckedConvertOrNullInstruction extends UnaryInstruction {
+ CheckedConvertOrNullInstruction() { getOpcode() instanceof Opcode::CheckedConvertOrNull }
+}
+
+/**
+ * An instruction that converts the address of a polymorphic object to the address of a different
+ * subobject of the same polymorphic object, throwing an exception if the dynamic type of the object
+ * is not compatible with the result type.
+ *
+ * If the operand holds a null address, the result is a null address.
+ *
+ * This instruction is used to represent a C++ `dynamic_cast<>` to a reference type, or a C# cast
+ * expression.
+ */
+class CheckedConvertOrThrowInstruction extends UnaryInstruction {
+ CheckedConvertOrThrowInstruction() { getOpcode() instanceof Opcode::CheckedConvertOrThrow }
+}
+
+/**
+ * An instruction that returns the address of the complete object that contains the subobject
+ * pointed to by its operand.
+ *
+ * If the operand holds a null address, the result is a null address.
+ *
+ * This instruction is used to represent `dyanmic_cast` in C++, which returns the pointer to
+ * the most-derived object.
+ */
+class CompleteObjectAddressInstruction extends UnaryInstruction {
+ CompleteObjectAddressInstruction() { getOpcode() instanceof Opcode::CompleteObjectAddress }
+}
+
+/**
+ * An instruction that converts the address of an object to the address of a different subobject of
+ * the same object, without any type checking at runtime.
+ */
+class InheritanceConversionInstruction extends UnaryInstruction {
+ Language::Class baseClass;
+ Language::Class derivedClass;
+
+ InheritanceConversionInstruction() {
+ Raw::getInstructionInheritance(this, baseClass, derivedClass)
+ }
+
+ final override string getImmediateString() {
+ result = derivedClass.toString() + " : " + baseClass.toString()
+ }
+
+ /**
+ * Gets the `ClassDerivation` for the inheritance relationship between
+ * the base and derived classes. This predicate does not hold if the
+ * conversion is to an indirect virtual base class.
+ */
+ final Language::ClassDerivation getDerivation() {
+ result.getBaseClass() = baseClass and result.getDerivedClass() = derivedClass
+ }
+
+ /**
+ * Gets the base class of the conversion. This will be either a direct
+ * base class of the derived class, or a virtual base class of the
+ * derived class.
+ */
+ final Language::Class getBaseClass() { result = baseClass }
+
+ /**
+ * Gets the derived class of the conversion.
+ */
+ final Language::Class getDerivedClass() { result = derivedClass }
+}
+
+/**
+ * An instruction that converts from the address of a derived class to the address of a base class.
+ */
+class ConvertToBaseInstruction extends InheritanceConversionInstruction {
+ ConvertToBaseInstruction() { getOpcode() instanceof ConvertToBaseOpcode }
+}
+
+/**
+ * An instruction that converts from the address of a derived class to the address of a direct
+ * non-virtual base class.
+ *
+ * If the operand holds a null address, the result is a null address.
+ */
+class ConvertToNonVirtualBaseInstruction extends ConvertToBaseInstruction {
+ ConvertToNonVirtualBaseInstruction() { getOpcode() instanceof Opcode::ConvertToNonVirtualBase }
+}
+
+/**
+ * An instruction that converts from the address of a derived class to the address of a virtual base
+ * class.
+ *
+ * If the operand holds a null address, the result is a null address.
+ */
+class ConvertToVirtualBaseInstruction extends ConvertToBaseInstruction {
+ ConvertToVirtualBaseInstruction() { getOpcode() instanceof Opcode::ConvertToVirtualBase }
+}
+
+/**
+ * An instruction that converts from the address of a base class to the address of a direct
+ * non-virtual derived class.
+ *
+ * If the operand holds a null address, the result is a null address.
+ */
+class ConvertToDerivedInstruction extends InheritanceConversionInstruction {
+ ConvertToDerivedInstruction() { getOpcode() instanceof Opcode::ConvertToDerived }
+}
+
+/**
+ * An instruction that computes the bitwise complement of its operand.
+ *
+ * The operand must have an integer type, which will also be the result type.
+ */
+class BitComplementInstruction extends UnaryBitwiseInstruction {
+ BitComplementInstruction() { getOpcode() instanceof Opcode::BitComplement }
+}
+
+/**
+ * An instruction that computes the logical complement of its operand.
+ *
+ * The operand must have a Boolean type, which will also be the result type.
+ */
+class LogicalNotInstruction extends UnaryInstruction {
+ LogicalNotInstruction() { getOpcode() instanceof Opcode::LogicalNot }
+}
+
+/**
+ * An instruction that compares two numeric operands.
+ */
+class CompareInstruction extends BinaryInstruction {
+ CompareInstruction() { getOpcode() instanceof CompareOpcode }
+}
+
+/**
+ * An instruction that returns a `true` result if its operands are equal.
+ *
+ * Both operands must have the same numeric or address type. The result must have a Boolean type.
+ * The result is `true` if `left == right`, and `false` if `left != right` or the two operands are
+ * unordered. Floating-point comparison is performed according to IEEE-754.
+ */
+class CompareEQInstruction extends CompareInstruction {
+ CompareEQInstruction() { getOpcode() instanceof Opcode::CompareEQ }
+}
+
+/**
+ * An instruction that returns a `true` result if its operands are not equal.
+ *
+ * Both operands must have the same numeric or address type. The result must have a Boolean type.
+ * The result is `true` if `left != right` or if the two operands are unordered, and `false` if
+ * `left == right`. Floating-point comparison is performed according to IEEE-754.
+ */
+class CompareNEInstruction extends CompareInstruction {
+ CompareNEInstruction() { getOpcode() instanceof Opcode::CompareNE }
+}
+
+/**
+ * An instruction that does a relative comparison of two values, such as `<` or `>=`.
+ */
+class RelationalInstruction extends CompareInstruction {
+ RelationalInstruction() { getOpcode() instanceof RelationalOpcode }
+
+ /**
+ * Gets the operand on the "greater" (or "greater-or-equal") side
+ * of this relational instruction, that is, the side that is larger
+ * if the overall instruction evaluates to `true`; for example on
+ * `x <= 20` this is the `20`, and on `y > 0` it is `y`.
+ */
+ Instruction getGreater() { none() }
+
+ /**
+ * Gets the operand on the "lesser" (or "lesser-or-equal") side
+ * of this relational instruction, that is, the side that is smaller
+ * if the overall instruction evaluates to `true`; for example on
+ * `x <= 20` this is `x`, and on `y > 0` it is the `0`.
+ */
+ Instruction getLesser() { none() }
+
+ /**
+ * Holds if this relational instruction is strict (is not an "or-equal" instruction).
+ */
+ predicate isStrict() { none() }
+}
+
+/**
+ * An instruction that returns a `true` result if its left operand is less than its right operand.
+ *
+ * Both operands must have the same numeric or address type. The result must have a Boolean type.
+ * The result is `true` if the `left < right`, and `false` if `left >= right` or if the two operands
+ * are unordered. Floating-point comparison is performed according to IEEE-754.
+ */
+class CompareLTInstruction extends RelationalInstruction {
+ CompareLTInstruction() { getOpcode() instanceof Opcode::CompareLT }
+
+ override Instruction getLesser() { result = getLeft() }
+
+ override Instruction getGreater() { result = getRight() }
+
+ override predicate isStrict() { any() }
+}
+
+/**
+ * An instruction that returns a `true` result if its left operand is greater than its right operand.
+ *
+ * Both operands must have the same numeric or address type. The result must have a Boolean type.
+ * The result is `true` if the `left > right`, and `false` if `left <= right` or if the two operands
+ * are unordered. Floating-point comparison is performed according to IEEE-754.
+ */
+class CompareGTInstruction extends RelationalInstruction {
+ CompareGTInstruction() { getOpcode() instanceof Opcode::CompareGT }
+
+ override Instruction getLesser() { result = getRight() }
+
+ override Instruction getGreater() { result = getLeft() }
+
+ override predicate isStrict() { any() }
+}
+
+/**
+ * An instruction that returns a `true` result if its left operand is less than or equal to its
+ * right operand.
+ *
+ * Both operands must have the same numeric or address type. The result must have a Boolean type.
+ * The result is `true` if the `left <= right`, and `false` if `left > right` or if the two operands
+ * are unordered. Floating-point comparison is performed according to IEEE-754.
+ */
+class CompareLEInstruction extends RelationalInstruction {
+ CompareLEInstruction() { getOpcode() instanceof Opcode::CompareLE }
+
+ override Instruction getLesser() { result = getLeft() }
+
+ override Instruction getGreater() { result = getRight() }
+
+ override predicate isStrict() { none() }
+}
+
+/**
+ * An instruction that returns a `true` result if its left operand is greater than or equal to its
+ * right operand.
+ *
+ * Both operands must have the same numeric or address type. The result must have a Boolean type.
+ * The result is `true` if the `left >= right`, and `false` if `left < right` or if the two operands
+ * are unordered. Floating-point comparison is performed according to IEEE-754.
+ */
+class CompareGEInstruction extends RelationalInstruction {
+ CompareGEInstruction() { getOpcode() instanceof Opcode::CompareGE }
+
+ override Instruction getLesser() { result = getRight() }
+
+ override Instruction getGreater() { result = getLeft() }
+
+ override predicate isStrict() { none() }
+}
+
+/**
+ * An instruction that branches to one of multiple successor instructions based on the value of an
+ * integer operand.
+ *
+ * This instruction will have zero or more successors whose edge kind is `CaseEdge`, each
+ * representing the branch that will be taken if the controlling expression is within the range
+ * specified for that case edge. The range of a case edge must be disjoint from the range of each
+ * other case edge.
+ *
+ * The instruction may optionally have a successor edge whose edge kind is `DefaultEdge`,
+ * representing the branch that will be taken if the controlling expression is not within the range
+ * of any case edge.
+ */
+class SwitchInstruction extends Instruction {
+ SwitchInstruction() { getOpcode() instanceof Opcode::Switch }
+
+ /** Gets the operand that provides the integer value controlling the switch. */
+ final ConditionOperand getExpressionOperand() { result = getAnOperand() }
+
+ /** Gets the instruction whose result provides the integer value controlling the switch. */
+ final Instruction getExpression() { result = getExpressionOperand().getDef() }
+
+ /** Gets the successor instructions along the case edges of the switch. */
+ final Instruction getACaseSuccessor() { exists(CaseEdge edge | result = getSuccessor(edge)) }
+
+ /** Gets the successor instruction along the default edge of the switch, if any. */
+ final Instruction getDefaultSuccessor() { result = getSuccessor(EdgeKind::defaultEdge()) }
+}
+
+/**
+ * An instruction that calls a function.
+ */
+class CallInstruction extends Instruction {
+ CallInstruction() { getOpcode() instanceof Opcode::Call }
+
+ final override string getImmediateString() {
+ result = getStaticCallTarget().toString()
+ or
+ not exists(getStaticCallTarget()) and result = "?"
+ }
+
+ /**
+ * Gets the operand the specifies the target function of the call.
+ */
+ final CallTargetOperand getCallTargetOperand() { result = getAnOperand() }
+
+ /**
+ * Gets the `Instruction` that computes the target function of the call. This is usually a
+ * `FunctionAddress` instruction, but can also be an arbitrary instruction that produces a
+ * function pointer.
+ */
+ final Instruction getCallTarget() { result = getCallTargetOperand().getDef() }
+
+ /**
+ * Gets all of the argument operands of the call, including the `this` pointer, if any.
+ */
+ final ArgumentOperand getAnArgumentOperand() { result = getAnOperand() }
+
+ /**
+ * Gets the `Function` that the call targets, if this is statically known.
+ */
+ final Language::Function getStaticCallTarget() {
+ result = getCallTarget().(FunctionAddressInstruction).getFunctionSymbol()
+ }
+
+ /**
+ * Gets all of the arguments of the call, including the `this` pointer, if any.
+ */
+ final Instruction getAnArgument() { result = getAnArgumentOperand().getDef() }
+
+ /**
+ * Gets the `this` pointer argument operand of the call, if any.
+ */
+ final ThisArgumentOperand getThisArgumentOperand() { result = getAnOperand() }
+
+ /**
+ * Gets the `this` pointer argument of the call, if any.
+ */
+ final Instruction getThisArgument() { result = getThisArgumentOperand().getDef() }
+
+ /**
+ * Gets the argument operand at the specified index.
+ */
+ pragma[noinline]
+ final PositionalArgumentOperand getPositionalArgumentOperand(int index) {
+ result = getAnOperand() and
+ result.getIndex() = index
+ }
+
+ /**
+ * Gets the argument at the specified index.
+ */
+ pragma[noinline]
+ final Instruction getPositionalArgument(int index) {
+ result = getPositionalArgumentOperand(index).getDef()
+ }
+
+ /**
+ * Gets the argument operand at the specified index, or `this` if `index` is `-1`.
+ */
+ pragma[noinline]
+ final ArgumentOperand getArgumentOperand(int index) {
+ index >= 0 and result = getPositionalArgumentOperand(index)
+ or
+ index = -1 and result = getThisArgumentOperand()
+ }
+
+ /**
+ * Gets the argument at the specified index, or `this` if `index` is `-1`.
+ */
+ pragma[noinline]
+ final Instruction getArgument(int index) { result = getArgumentOperand(index).getDef() }
+
+ /**
+ * Gets the number of arguments of the call, including the `this` pointer, if any.
+ */
+ final int getNumberOfArguments() { result = count(this.getAnArgumentOperand()) }
+
+ /**
+ * Holds if the result is a side effect for the argument at the specified index, or `this` if
+ * `index` is `-1`.
+ *
+ * This helper predicate makes it easy to join on both of these columns at once, avoiding
+ * pathological join orders in case the argument index should get joined first.
+ */
+ pragma[noinline]
+ final SideEffectInstruction getAParameterSideEffect(int index) {
+ this = result.getPrimaryInstruction() and
+ index = result.(IndexedInstruction).getIndex()
+ }
+}
+
+/**
+ * An instruction representing a side effect of a function call.
+ */
+class SideEffectInstruction extends Instruction {
+ SideEffectInstruction() { getOpcode() instanceof SideEffectOpcode }
+
+ /**
+ * Gets the instruction whose execution causes this side effect.
+ */
+ final Instruction getPrimaryInstruction() {
+ result = Construction::getPrimaryInstructionForSideEffect(this)
+ }
+}
+
+/**
+ * An instruction representing the side effect of a function call on any memory that might be
+ * accessed by that call.
+ */
+class CallSideEffectInstruction extends SideEffectInstruction {
+ CallSideEffectInstruction() { getOpcode() instanceof Opcode::CallSideEffect }
+}
+
+/**
+ * An instruction representing the side effect of a function call on any memory
+ * that might be read by that call.
+ *
+ * This instruction is emitted instead of `CallSideEffectInstruction` when it is certain that the
+ * call target cannot write to escaped memory.
+ */
+class CallReadSideEffectInstruction extends SideEffectInstruction {
+ CallReadSideEffectInstruction() { getOpcode() instanceof Opcode::CallReadSideEffect }
+}
+
+/**
+ * An instruction representing a read side effect of a function call on a
+ * specific parameter.
+ */
+class ReadSideEffectInstruction extends SideEffectInstruction, IndexedInstruction {
+ ReadSideEffectInstruction() { getOpcode() instanceof ReadSideEffectOpcode }
+
+ /** Gets the operand for the value that will be read from this instruction, if known. */
+ final SideEffectOperand getSideEffectOperand() { result = getAnOperand() }
+
+ /** Gets the value that will be read from this instruction, if known. */
+ final Instruction getSideEffect() { result = getSideEffectOperand().getDef() }
+
+ /** Gets the operand for the address from which this instruction may read. */
+ final AddressOperand getArgumentOperand() { result = getAnOperand() }
+
+ /** Gets the address from which this instruction may read. */
+ final Instruction getArgumentDef() { result = getArgumentOperand().getDef() }
+}
+
+/**
+ * An instruction representing the read of an indirect parameter within a function call.
+ */
+class IndirectReadSideEffectInstruction extends ReadSideEffectInstruction {
+ IndirectReadSideEffectInstruction() { getOpcode() instanceof Opcode::IndirectReadSideEffect }
+}
+
+/**
+ * An instruction representing the read of an indirect buffer parameter within a function call.
+ */
+class BufferReadSideEffectInstruction extends ReadSideEffectInstruction {
+ BufferReadSideEffectInstruction() { getOpcode() instanceof Opcode::BufferReadSideEffect }
+}
+
+/**
+ * An instruction representing the read of an indirect buffer parameter within a function call.
+ */
+class SizedBufferReadSideEffectInstruction extends ReadSideEffectInstruction {
+ SizedBufferReadSideEffectInstruction() {
+ getOpcode() instanceof Opcode::SizedBufferReadSideEffect
+ }
+
+ /**
+ * Gets the operand that holds the number of bytes read from the buffer.
+ */
+ final BufferSizeOperand getBufferSizeOperand() { result = getAnOperand() }
+
+ /**
+ * Gets the instruction whose result provides the number of bytes read from the buffer.
+ */
+ final Instruction getBufferSize() { result = getBufferSizeOperand().getDef() }
+}
+
+/**
+ * An instruction representing a write side effect of a function call on a
+ * specific parameter.
+ */
+class WriteSideEffectInstruction extends SideEffectInstruction, IndexedInstruction {
+ WriteSideEffectInstruction() { getOpcode() instanceof WriteSideEffectOpcode }
+
+ /**
+ * Get the operand that holds the address of the memory to be written.
+ */
+ final AddressOperand getDestinationAddressOperand() { result = getAnOperand() }
+
+ /**
+ * Gets the instruction whose result provides the address of the memory to be written.
+ */
+ Instruction getDestinationAddress() { result = getDestinationAddressOperand().getDef() }
+}
+
+/**
+ * An instruction representing the write of an indirect parameter within a function call.
+ */
+class IndirectMustWriteSideEffectInstruction extends WriteSideEffectInstruction {
+ IndirectMustWriteSideEffectInstruction() {
+ getOpcode() instanceof Opcode::IndirectMustWriteSideEffect
+ }
+}
+
+/**
+ * An instruction representing the write of an indirect buffer parameter within a function call. The
+ * entire buffer is overwritten.
+ */
+class BufferMustWriteSideEffectInstruction extends WriteSideEffectInstruction {
+ BufferMustWriteSideEffectInstruction() {
+ getOpcode() instanceof Opcode::BufferMustWriteSideEffect
+ }
+}
+
+/**
+ * An instruction representing the write of an indirect buffer parameter within a function call. The
+ * entire buffer is overwritten.
+ */
+class SizedBufferMustWriteSideEffectInstruction extends WriteSideEffectInstruction {
+ SizedBufferMustWriteSideEffectInstruction() {
+ getOpcode() instanceof Opcode::SizedBufferMustWriteSideEffect
+ }
+
+ /**
+ * Gets the operand that holds the number of bytes written to the buffer.
+ */
+ final BufferSizeOperand getBufferSizeOperand() { result = getAnOperand() }
+
+ /**
+ * Gets the instruction whose result provides the number of bytes written to the buffer.
+ */
+ final Instruction getBufferSize() { result = getBufferSizeOperand().getDef() }
+}
+
+/**
+ * An instruction representing the potential write of an indirect parameter within a function call.
+ *
+ * Unlike `IndirectWriteSideEffectInstruction`, the location might not be completely overwritten.
+ * written.
+ */
+class IndirectMayWriteSideEffectInstruction extends WriteSideEffectInstruction {
+ IndirectMayWriteSideEffectInstruction() {
+ getOpcode() instanceof Opcode::IndirectMayWriteSideEffect
+ }
+}
+
+/**
+ * An instruction representing the write of an indirect buffer parameter within a function call.
+ *
+ * Unlike `BufferWriteSideEffectInstruction`, the buffer might not be completely overwritten.
+ */
+class BufferMayWriteSideEffectInstruction extends WriteSideEffectInstruction {
+ BufferMayWriteSideEffectInstruction() { getOpcode() instanceof Opcode::BufferMayWriteSideEffect }
+}
+
+/**
+ * An instruction representing the write of an indirect buffer parameter within a function call.
+ *
+ * Unlike `BufferWriteSideEffectInstruction`, the buffer might not be completely overwritten.
+ */
+class SizedBufferMayWriteSideEffectInstruction extends WriteSideEffectInstruction {
+ SizedBufferMayWriteSideEffectInstruction() {
+ getOpcode() instanceof Opcode::SizedBufferMayWriteSideEffect
+ }
+
+ /**
+ * Gets the operand that holds the number of bytes written to the buffer.
+ */
+ final BufferSizeOperand getBufferSizeOperand() { result = getAnOperand() }
+
+ /**
+ * Gets the instruction whose result provides the number of bytes written to the buffer.
+ */
+ final Instruction getBufferSize() { result = getBufferSizeOperand().getDef() }
+}
+
+/**
+ * An instruction representing the initial value of newly allocated memory, such as the result of a
+ * call to `malloc`.
+ */
+class InitializeDynamicAllocationInstruction extends SideEffectInstruction {
+ InitializeDynamicAllocationInstruction() {
+ getOpcode() instanceof Opcode::InitializeDynamicAllocation
+ }
+
+ /**
+ * Gets the operand that represents the address of the allocation this instruction is initializing.
+ */
+ final AddressOperand getAllocationAddressOperand() { result = getAnOperand() }
+
+ /**
+ * Gets the address for the allocation this instruction is initializing.
+ */
+ final Instruction getAllocationAddress() { result = getAllocationAddressOperand().getDef() }
+}
+
+/**
+ * An instruction representing a GNU or MSVC inline assembly statement.
+ */
+class InlineAsmInstruction extends Instruction {
+ InlineAsmInstruction() { getOpcode() instanceof Opcode::InlineAsm }
+}
+
+/**
+ * An instruction that throws an exception.
+ */
+class ThrowInstruction extends Instruction {
+ ThrowInstruction() { getOpcode() instanceof ThrowOpcode }
+}
+
+/**
+ * An instruction that throws a new exception.
+ */
+class ThrowValueInstruction extends ThrowInstruction {
+ ThrowValueInstruction() { getOpcode() instanceof Opcode::ThrowValue }
+
+ /**
+ * Gets the address operand of the exception thrown by this instruction.
+ */
+ final AddressOperand getExceptionAddressOperand() { result = getAnOperand() }
+
+ /**
+ * Gets the address of the exception thrown by this instruction.
+ */
+ final Instruction getExceptionAddress() { result = getExceptionAddressOperand().getDef() }
+
+ /**
+ * Gets the operand for the exception thrown by this instruction.
+ */
+ final LoadOperand getExceptionOperand() { result = getAnOperand() }
+
+ /**
+ * Gets the exception thrown by this instruction.
+ */
+ final Instruction getException() { result = getExceptionOperand().getDef() }
+}
+
+/**
+ * An instruction that re-throws the current exception.
+ */
+class ReThrowInstruction extends ThrowInstruction {
+ ReThrowInstruction() { getOpcode() instanceof Opcode::ReThrow }
+}
+
+/**
+ * An instruction that exits the current function by propagating an exception.
+ */
+class UnwindInstruction extends Instruction {
+ UnwindInstruction() { getOpcode() instanceof Opcode::Unwind }
+}
+
+/**
+ * An instruction that starts a `catch` handler.
+ */
+class CatchInstruction extends Instruction {
+ CatchInstruction() { getOpcode() instanceof CatchOpcode }
+}
+
+/**
+ * An instruction that catches an exception of a specific type.
+ */
+class CatchByTypeInstruction extends CatchInstruction {
+ Language::LanguageType exceptionType;
+
+ CatchByTypeInstruction() {
+ getOpcode() instanceof Opcode::CatchByType and
+ exceptionType = Raw::getInstructionExceptionType(this)
+ }
+
+ final override string getImmediateString() { result = exceptionType.toString() }
+
+ /**
+ * Gets the type of exception to be caught.
+ */
+ final Language::LanguageType getExceptionType() { result = exceptionType }
+}
+
+/**
+ * An instruction that catches any exception.
+ */
+class CatchAnyInstruction extends CatchInstruction {
+ CatchAnyInstruction() { getOpcode() instanceof Opcode::CatchAny }
+}
+
+/**
+ * An instruction that initializes all escaped memory.
+ */
+class AliasedDefinitionInstruction extends Instruction {
+ AliasedDefinitionInstruction() { getOpcode() instanceof Opcode::AliasedDefinition }
+}
+
+/**
+ * An instruction that consumes all escaped memory on exit from the function.
+ */
+class AliasedUseInstruction extends Instruction {
+ AliasedUseInstruction() { getOpcode() instanceof Opcode::AliasedUse }
+}
+
+/**
+ * An instruction representing the choice of one of multiple input values based on control flow.
+ *
+ * A `PhiInstruction` is inserted at the beginning of a block whenever two different definitions of
+ * the same variable reach that block. The `PhiInstruction` will have one operand corresponding to
+ * each control flow predecessor of the block, with that operand representing the version of the
+ * variable that flows from that predecessor. The result value of the `PhiInstruction` will be
+ * a copy of whichever operand corresponds to the actual predecessor that entered the block at
+ * runtime.
+ */
+class PhiInstruction extends Instruction {
+ PhiInstruction() { getOpcode() instanceof Opcode::Phi }
+
+ /**
+ * Gets all of the instruction's `PhiInputOperand`s, representing the values that flow from each predecessor block.
+ */
+ final PhiInputOperand getAnInputOperand() { result = this.getAnOperand() }
+
+ /**
+ * Gets an instruction that defines the input to one of the operands of this
+ * instruction. It's possible for more than one operand to have the same
+ * defining instruction, so this predicate will have the same number of
+ * results as `getAnInputOperand()` or fewer.
+ */
+ pragma[noinline]
+ final Instruction getAnInput() { result = this.getAnInputOperand().getDef() }
+
+ /**
+ * Gets the input operand representing the value that flows from the specified predecessor block.
+ */
+ final PhiInputOperand getInputOperand(IRBlock predecessorBlock) {
+ result = this.getAnOperand() and
+ result.getPredecessorBlock() = predecessorBlock
+ }
+}
+
+/**
+ * An instruction representing the effect that a write to a memory may have on potential aliases of
+ * that memory.
+ *
+ * A `ChiInstruction` is inserted immediately after an instruction that writes to memory. The
+ * `ChiInstruction` has two operands. The first operand, given by `getTotalOperand()`, represents
+ * the previous state of all of the memory that might be aliased by the memory write. The second
+ * operand, given by `getPartialOperand()`, represents the memory that was actually modified by the
+ * memory write. The result of the `ChiInstruction` represents the same memory as
+ * `getTotalOperand()`, updated to include the changes due to the value that was actually stored by
+ * the memory write.
+ *
+ * As an example, suppose that variable `p` and `q` are pointers that may or may not point to the
+ * same memory:
+ * ```
+ * *p = 5;
+ * x = *q;
+ * ```
+ *
+ * The IR would look like:
+ * ```
+ * r1_1 = VariableAddress[p]
+ * r1_2 = Load r1_1, m0_0 // Load the value of `p`
+ * r1_3 = Constant[5]
+ * m1_4 = Store r1_2, r1_3 // Store to `*p`
+ * m1_5 = ^Chi m0_1, m1_4 // Side effect of the previous Store on aliased memory
+ * r1_6 = VariableAddress[x]
+ * r1_7 = VariableAddress[q]
+ * r1_8 = Load r1_7, m0_2 // Load the value of `q`
+ * r1_9 = Load r1_8, m1_5 // Load the value of `*q`
+ * m1_10 = Store r1_6, r1_9 // Store to x
+ * ```
+ *
+ * Note the `Chi` instruction after the store to `*p`. The indicates that the previous contents of
+ * aliased memory (`m0_1`) are merged with the new value written by the store (`m1_4`), producing a
+ * new version of aliased memory (`m1_5`). On the subsequent load from `*q`, the source operand of
+ * `*q` is `m1_5`, indicating that the store to `*p` may (or may not) have updated the memory
+ * pointed to by `q`.
+ *
+ * For more information about how `Chi` instructions are used to model memory side effects, see
+ * https://link.springer.com/content/pdf/10.1007%2F3-540-61053-7_66.pdf.
+ */
+class ChiInstruction extends Instruction {
+ ChiInstruction() { getOpcode() instanceof Opcode::Chi }
+
+ /**
+ * Gets the operand that represents the previous state of all memory that might be aliased by the
+ * memory write.
+ */
+ final ChiTotalOperand getTotalOperand() { result = getAnOperand() }
+
+ /**
+ * Gets the operand that represents the previous state of all memory that might be aliased by the
+ * memory write.
+ */
+ final Instruction getTotal() { result = getTotalOperand().getDef() }
+
+ /**
+ * Gets the operand that represents the new value written by the memory write.
+ */
+ final ChiPartialOperand getPartialOperand() { result = getAnOperand() }
+
+ /**
+ * Gets the operand that represents the new value written by the memory write.
+ */
+ final Instruction getPartial() { result = getPartialOperand().getDef() }
+
+ /**
+ * Gets the bit range `[startBit, endBit)` updated by the partial operand of this `ChiInstruction`, relative to the start address of the total operand.
+ */
+ final predicate getUpdatedInterval(int startBit, int endBit) {
+ Construction::getIntervalUpdatedByChi(this, startBit, endBit)
+ }
+
+ /**
+ * Holds if the `ChiPartialOperand` totally, but not exactly, overlaps with the `ChiTotalOperand`.
+ * This means that the `ChiPartialOperand` will not override the entire memory associated with the
+ * `ChiTotalOperand`.
+ */
+ final predicate isPartialUpdate() { Construction::chiOnlyPartiallyUpdatesLocation(this) }
+}
+
+/**
+ * An instruction representing unreachable code.
+ *
+ * This instruction is inserted in place of the original target instruction of a `ConditionalBranch`
+ * or `Switch` instruction where that particular edge is infeasible.
+ */
+class UnreachedInstruction extends Instruction {
+ UnreachedInstruction() { getOpcode() instanceof Opcode::Unreached }
+}
+
+/**
+ * An instruction representing a built-in operation.
+ *
+ * This is used to represent a variety of intrinsic operations provided by the compiler
+ * implementation, such as vector arithmetic.
+ */
+class BuiltInOperationInstruction extends Instruction {
+ Language::BuiltInOperation operation;
+
+ BuiltInOperationInstruction() {
+ getOpcode() instanceof BuiltInOperationOpcode and
+ operation = Raw::getInstructionBuiltInOperation(this)
+ }
+
+ /**
+ * Gets the language-specific `BuiltInOperation` object that specifies the operation that is
+ * performed by this instruction.
+ */
+ final Language::BuiltInOperation getBuiltInOperation() { result = operation }
+}
+
+/**
+ * An instruction representing a built-in operation that does not have a specific opcode. The
+ * actual operation is specified by the `getBuiltInOperation()` predicate.
+ */
+class BuiltInInstruction extends BuiltInOperationInstruction {
+ BuiltInInstruction() { getOpcode() instanceof Opcode::BuiltIn }
+
+ final override string getImmediateString() { result = getBuiltInOperation().toString() }
+}
+
+/**
+ * An instruction that returns a `va_list` to access the arguments passed to the `...` parameter.
+ *
+ * The operand specifies the address of the `IREllipsisVariable` used to represent the `...`
+ * parameter. The result is a `va_list` that initially refers to the first argument that was passed
+ * to the `...` parameter.
+ */
+class VarArgsStartInstruction extends UnaryInstruction {
+ VarArgsStartInstruction() { getOpcode() instanceof Opcode::VarArgsStart }
+}
+
+/**
+ * An instruction that cleans up a `va_list` after it is no longer in use.
+ *
+ * The operand specifies the address of the `va_list` to clean up. This instruction does not return
+ * a result.
+ */
+class VarArgsEndInstruction extends UnaryInstruction {
+ VarArgsEndInstruction() { getOpcode() instanceof Opcode::VarArgsEnd }
+}
+
+/**
+ * An instruction that returns the address of the argument currently pointed to by a `va_list`.
+ *
+ * The operand is the `va_list` that points to the argument. The result is the address of the
+ * argument.
+ */
+class VarArgInstruction extends UnaryInstruction {
+ VarArgInstruction() { getOpcode() instanceof Opcode::VarArg }
+}
+
+/**
+ * An instruction that modifies a `va_list` to point to the next argument that was passed to the
+ * `...` parameter.
+ *
+ * The operand is the current `va_list`. The result is an updated `va_list` that points to the next
+ * argument of the `...` parameter.
+ */
+class NextVarArgInstruction extends UnaryInstruction {
+ NextVarArgInstruction() { getOpcode() instanceof Opcode::NextVarArg }
+}
+
+/**
+ * An instruction that allocates a new object on the managed heap.
+ *
+ * This instruction is used to represent the allocation of a new object in C# using the `new`
+ * expression. This instruction does not invoke a constructor for the object. Instead, there will be
+ * a subsequent `Call` instruction to invoke the appropriate constructor directory, passing the
+ * result of the `NewObj` as the `this` argument.
+ *
+ * The result is the address of the newly allocated object.
+ */
+class NewObjInstruction extends Instruction {
+ NewObjInstruction() { getOpcode() instanceof Opcode::NewObj }
+}
diff --git a/repo-tests/codeql/cpp/ql/lib/semmle/code/cpp/ir/implementation/raw/Operand.qll b/repo-tests/codeql/cpp/ql/lib/semmle/code/cpp/ir/implementation/raw/Operand.qll
new file mode 100644
index 00000000000..d7cf89ca9aa
--- /dev/null
+++ b/repo-tests/codeql/cpp/ql/lib/semmle/code/cpp/ir/implementation/raw/Operand.qll
@@ -0,0 +1,484 @@
+/**
+ * Provides classes that represent the input values of IR instructions.
+ */
+
+private import internal.IRInternal
+private import Instruction
+private import IRBlock
+private import internal.OperandImports as Imports
+private import Imports::MemoryAccessKind
+private import Imports::IRType
+private import Imports::Overlap
+private import Imports::OperandTag
+private import Imports::TOperand
+private import internal.OperandInternal
+
+/**
+ * An operand of an `Instruction` in this stage of the IR. Implemented as a union of the branches
+ * of `TOperand` that are used in this stage.
+ */
+private class TStageOperand =
+ TRegisterOperand or TNonSSAMemoryOperand or TPhiOperand or TChiOperand;
+
+/**
+ * An operand of an `Instruction`. The operand represents a use of the result of one instruction
+ * (the defining instruction) in another instruction (the use instruction)
+ */
+class Operand extends TStageOperand {
+ cached
+ Operand() {
+ // Ensure that the operand does not refer to instructions from earlier stages that are unreachable here
+ exists(Instruction use, Instruction def | this = registerOperand(use, _, def))
+ or
+ exists(Instruction use | this = nonSSAMemoryOperand(use, _))
+ or
+ exists(Instruction use, Instruction def, IRBlock predecessorBlock |
+ this = phiOperand(use, def, predecessorBlock, _) or
+ this = reusedPhiOperand(use, def, predecessorBlock, _)
+ )
+ or
+ exists(Instruction use | this = chiOperand(use, _))
+ }
+
+ /** Gets a textual representation of this element. */
+ string toString() { result = "Operand" }
+
+ /**
+ * Gets the location of the source code for this operand.
+ */
+ final Language::Location getLocation() { result = getUse().getLocation() }
+
+ /**
+ * Gets the function that contains this operand.
+ */
+ final IRFunction getEnclosingIRFunction() { result = getUse().getEnclosingIRFunction() }
+
+ /**
+ * Gets the `Instruction` that consumes this operand.
+ */
+ Instruction getUse() { none() }
+
+ /**
+ * Gets the `Instruction` whose result is the value of the operand. Unlike
+ * `getDef`, this also has a result when `isDefinitionInexact` holds, which
+ * means that the resulting instruction may only _partially_ or _potentially_
+ * be the value of this operand.
+ */
+ Instruction getAnyDef() { none() }
+
+ /**
+ * Gets the `Instruction` whose result is the value of the operand. Unlike
+ * `getAnyDef`, this also has no result when `isDefinitionInexact` holds,
+ * which means that the resulting instruction must always be exactly the be
+ * the value of this operand.
+ */
+ final Instruction getDef() {
+ result = this.getAnyDef() and
+ getDefinitionOverlap() instanceof MustExactlyOverlap
+ }
+
+ /**
+ * DEPRECATED: renamed to `getUse`.
+ *
+ * Gets the `Instruction` that consumes this operand.
+ */
+ deprecated final Instruction getUseInstruction() { result = getUse() }
+
+ /**
+ * DEPRECATED: use `getAnyDef` or `getDef`. The exact replacement for this
+ * predicate is `getAnyDef`, but most uses of this predicate should probably
+ * be replaced with `getDef`.
+ *
+ * Gets the `Instruction` whose result is the value of the operand.
+ */
+ deprecated final Instruction getDefinitionInstruction() { result = getAnyDef() }
+
+ /**
+ * Gets the overlap relationship between the operand's definition and its use.
+ */
+ Overlap getDefinitionOverlap() { none() }
+
+ /**
+ * Holds if the result of the definition instruction does not exactly overlap this use.
+ */
+ final predicate isDefinitionInexact() { not getDefinitionOverlap() instanceof MustExactlyOverlap }
+
+ /**
+ * Gets a prefix to use when dumping the operand in an operand list.
+ */
+ string getDumpLabel() { result = "" }
+
+ /**
+ * Gets a string that uniquely identifies this operand on its use instruction.
+ */
+ string getDumpId() { result = "" }
+
+ /**
+ * Gets a string describing this operand, suitable for display in IR dumps. This consists of the
+ * result ID of the instruction consumed by the operand, plus a label identifying the operand
+ * kind.
+ *
+ * For example: `this:r3_5`
+ */
+ final string getDumpString() {
+ result = getDumpLabel() + getInexactSpecifier() + getDefinitionId()
+ }
+
+ /**
+ * Gets a string containing the identifier of the definition of this use, or `m?` if the
+ * definition is not modeled in SSA.
+ */
+ private string getDefinitionId() {
+ result = getAnyDef().getResultId()
+ or
+ not exists(getAnyDef()) and result = "m?"
+ }
+
+ /**
+ * Gets a string prefix to prepend to the operand's definition ID in an IR dump, specifying whether the operand is
+ * an exact or inexact use of its definition. For an inexact use, the prefix is "~". For an exact use, the prefix is
+ * the empty string.
+ */
+ private string getInexactSpecifier() {
+ if isDefinitionInexact() then result = "~" else result = ""
+ }
+
+ /**
+ * Get the order in which the operand should be sorted in the operand list.
+ */
+ int getDumpSortOrder() { result = -1 }
+
+ /**
+ * Gets the type of the value consumed by this operand. This is usually the same as the
+ * result type of the definition instruction consumed by this operand. For register operands,
+ * this is always the case. For some memory operands, the operand type may be different from
+ * the definition type, such as in the case of a partial read or a read from a pointer that
+ * has been cast to a different type.
+ */
+ Language::LanguageType getLanguageType() { result = getAnyDef().getResultLanguageType() }
+
+ /**
+ * Gets the language-neutral type of the value consumed by this operand. This is usually the same
+ * as the result type of the definition instruction consumed by this operand. For register
+ * operands, this is always the case. For some memory operands, the operand type may be different
+ * from the definition type, such as in the case of a partial read or a read from a pointer that
+ * has been cast to a different type.
+ */
+ final IRType getIRType() { result = getLanguageType().getIRType() }
+
+ /**
+ * Gets the type of the value consumed by this operand. This is usually the same as the
+ * result type of the definition instruction consumed by this operand. For register operands,
+ * this is always the case. For some memory operands, the operand type may be different from
+ * the definition type, such as in the case of a partial read or a read from a pointer that
+ * has been cast to a different type.
+ */
+ final Language::Type getType() { getLanguageType().hasType(result, _) }
+
+ /**
+ * Holds if the value consumed by this operand is a glvalue. If this
+ * holds, the value of the operand represents the address of a location,
+ * and the type of the location is given by `getType()`. If this does
+ * not hold, the value of the operand represents a value whose type is
+ * given by `getType()`.
+ */
+ final predicate isGLValue() { getLanguageType().hasType(_, true) }
+
+ /**
+ * Gets the size of the value consumed by this operand, in bytes. If the operand does not have
+ * a known constant size, this predicate does not hold.
+ */
+ final int getSize() { result = getLanguageType().getByteSize() }
+}
+
+/**
+ * An operand that consumes a memory result (e.g. the `LoadOperand` on a `Load` instruction).
+ */
+class MemoryOperand extends Operand {
+ cached
+ MemoryOperand() {
+ this instanceof TNonSSAMemoryOperand or
+ this instanceof TPhiOperand or
+ this instanceof TChiOperand
+ }
+
+ /**
+ * Gets the kind of memory access performed by the operand.
+ */
+ MemoryAccessKind getMemoryAccess() { result = getUse().getOpcode().getReadMemoryAccess() }
+
+ /**
+ * Holds if the memory access performed by this operand will not always read from every bit in the
+ * memory location. This is most commonly used for memory accesses that may or may not actually
+ * occur depending on runtime state (for example, the write side effect of an output parameter
+ * that is not written to on all paths), or for accesses where the memory location is a
+ * conservative estimate of the memory that might actually be accessed at runtime (for example,
+ * the global side effects of a function call).
+ */
+ predicate hasMayReadMemoryAccess() { getUse().getOpcode().hasMayReadMemoryAccess() }
+
+ /**
+ * Returns the operand that holds the memory address from which the current operand loads its
+ * value, if any. For example, in `r3 = Load r1, m2`, the result of `getAddressOperand()` for `m2`
+ * is `r1`.
+ */
+ final AddressOperand getAddressOperand() {
+ getMemoryAccess().usesAddressOperand() and
+ result.getUse() = getUse()
+ }
+}
+
+/**
+ * An operand that is not an operand of a `PhiInstruction`.
+ */
+class NonPhiOperand extends Operand {
+ Instruction useInstr;
+ OperandTag tag;
+
+ NonPhiOperand() {
+ this = registerOperand(useInstr, tag, _) or
+ this = nonSSAMemoryOperand(useInstr, tag) or
+ this = chiOperand(useInstr, tag)
+ }
+
+ final override Instruction getUse() { result = useInstr }
+
+ final override string getDumpLabel() { result = tag.getLabel() }
+
+ final override string getDumpId() { result = tag.getId() }
+
+ final override int getDumpSortOrder() { result = tag.getSortOrder() }
+
+ /**
+ * Gets the `OperandTag` that specifies how this operand is used by its `Instruction`.
+ */
+ final OperandTag getOperandTag() { result = tag }
+}
+
+/**
+ * An operand that consumes a register (non-memory) result.
+ */
+class RegisterOperand extends NonPhiOperand, TRegisterOperand {
+ override RegisterOperandTag tag;
+ Instruction defInstr;
+
+ cached
+ RegisterOperand() { this = registerOperand(useInstr, tag, defInstr) }
+
+ final override string toString() { result = tag.toString() }
+
+ final override Instruction getAnyDef() { result = defInstr }
+
+ final override Overlap getDefinitionOverlap() {
+ // All register results overlap exactly with their uses.
+ result instanceof MustExactlyOverlap
+ }
+}
+
+/**
+ * A memory operand other than the operand of a `Phi` instruction.
+ */
+class NonPhiMemoryOperand extends NonPhiOperand, MemoryOperand, TNonPhiMemoryOperand {
+ override MemoryOperandTag tag;
+
+ cached
+ NonPhiMemoryOperand() {
+ this = nonSSAMemoryOperand(useInstr, tag)
+ or
+ this = chiOperand(useInstr, tag)
+ }
+
+ final override string toString() { result = tag.toString() }
+
+ final override Instruction getAnyDef() {
+ result = unique(Instruction defInstr | hasDefinition(defInstr, _))
+ }
+
+ final override Overlap getDefinitionOverlap() { hasDefinition(_, result) }
+
+ pragma[noinline]
+ private predicate hasDefinition(Instruction defInstr, Overlap overlap) {
+ defInstr = Construction::getMemoryOperandDefinition(useInstr, tag, overlap) and
+ not Construction::isInCycle(useInstr) and
+ strictcount(Construction::getMemoryOperandDefinition(useInstr, tag, _)) = 1
+ }
+
+ /**
+ * Holds if the operand totally overlaps with its definition and consumes the
+ * bit range `[startBitOffset, endBitOffset)` relative to the start address of the definition.
+ */
+ predicate getUsedInterval(int startBitOffset, int endBitOffset) {
+ Construction::getUsedInterval(this, startBitOffset, endBitOffset)
+ }
+}
+
+/**
+ * A memory operand whose type may be different from the type of the result of its definition.
+ */
+class TypedOperand extends NonPhiMemoryOperand {
+ override TypedOperandTag tag;
+
+ final override Language::LanguageType getLanguageType() {
+ result = Construction::getInstructionOperandType(useInstr, tag)
+ }
+}
+
+/**
+ * The address operand of an instruction that loads or stores a value from
+ * memory (e.g. `Load`, `Store`).
+ */
+class AddressOperand extends RegisterOperand {
+ override AddressOperandTag tag;
+}
+
+/**
+ * The buffer size operand of an instruction that represents a read or write of
+ * a buffer.
+ */
+class BufferSizeOperand extends RegisterOperand {
+ override BufferSizeOperandTag tag;
+}
+
+/**
+ * The source value operand of an instruction that loads a value from memory (e.g. `Load`,
+ * `ReturnValue`, `ThrowValue`).
+ */
+class LoadOperand extends TypedOperand {
+ override LoadOperandTag tag;
+}
+
+/**
+ * The source value operand of a `Store` instruction.
+ */
+class StoreValueOperand extends RegisterOperand {
+ override StoreValueOperandTag tag;
+}
+
+/**
+ * The sole operand of a unary instruction (e.g. `Convert`, `Negate`, `Copy`).
+ */
+class UnaryOperand extends RegisterOperand {
+ override UnaryOperandTag tag;
+}
+
+/**
+ * The left operand of a binary instruction (e.g. `Add`, `CompareEQ`).
+ */
+class LeftOperand extends RegisterOperand {
+ override LeftOperandTag tag;
+}
+
+/**
+ * The right operand of a binary instruction (e.g. `Add`, `CompareEQ`).
+ */
+class RightOperand extends RegisterOperand {
+ override RightOperandTag tag;
+}
+
+/**
+ * The condition operand of a `ConditionalBranch` or `Switch` instruction.
+ */
+class ConditionOperand extends RegisterOperand {
+ override ConditionOperandTag tag;
+}
+
+/**
+ * The operand representing the target function of an `Call` instruction.
+ */
+class CallTargetOperand extends RegisterOperand {
+ override CallTargetOperandTag tag;
+}
+
+/**
+ * An operand representing an argument to a function call. This includes both
+ * positional arguments (represented by `PositionalArgumentOperand`) and the
+ * implicit `this` argument, if any (represented by `ThisArgumentOperand`).
+ */
+class ArgumentOperand extends RegisterOperand {
+ override ArgumentOperandTag tag;
+}
+
+/**
+ * An operand representing the implicit 'this' argument to a member function
+ * call.
+ */
+class ThisArgumentOperand extends ArgumentOperand {
+ override ThisArgumentOperandTag tag;
+}
+
+/**
+ * An operand representing an argument to a function call.
+ */
+class PositionalArgumentOperand extends ArgumentOperand {
+ override PositionalArgumentOperandTag tag;
+
+ /**
+ * Gets the zero-based index of the argument.
+ */
+ final int getIndex() { result = tag.getArgIndex() }
+}
+
+/**
+ * An operand representing memory read as a side effect of evaluating another instruction.
+ */
+class SideEffectOperand extends TypedOperand {
+ override SideEffectOperandTag tag;
+}
+
+/**
+ * An operand of a `PhiInstruction`.
+ */
+class PhiInputOperand extends MemoryOperand, TPhiOperand {
+ PhiInstruction useInstr;
+ Instruction defInstr;
+ IRBlock predecessorBlock;
+ Overlap overlap;
+
+ cached
+ PhiInputOperand() {
+ this = phiOperand(useInstr, defInstr, predecessorBlock, overlap)
+ or
+ this = reusedPhiOperand(useInstr, defInstr, predecessorBlock, overlap)
+ }
+
+ override string toString() { result = "Phi" }
+
+ final override PhiInstruction getUse() { result = useInstr }
+
+ final override Instruction getAnyDef() { result = defInstr }
+
+ final override Overlap getDefinitionOverlap() { result = overlap }
+
+ final override int getDumpSortOrder() { result = 11 + getPredecessorBlock().getDisplayIndex() }
+
+ final override string getDumpLabel() {
+ result = "from " + getPredecessorBlock().getDisplayIndex().toString() + ":"
+ }
+
+ final override string getDumpId() { result = getPredecessorBlock().getDisplayIndex().toString() }
+
+ /**
+ * Gets the predecessor block from which this value comes.
+ */
+ final IRBlock getPredecessorBlock() { result = predecessorBlock }
+
+ final override MemoryAccessKind getMemoryAccess() { result instanceof PhiMemoryAccess }
+}
+
+/**
+ * The total operand of a Chi node, representing the previous value of the memory.
+ */
+class ChiTotalOperand extends NonPhiMemoryOperand {
+ override ChiTotalOperandTag tag;
+
+ final override MemoryAccessKind getMemoryAccess() { result instanceof ChiTotalMemoryAccess }
+}
+
+/**
+ * The partial operand of a Chi node, representing the value being written to part of the memory.
+ */
+class ChiPartialOperand extends NonPhiMemoryOperand {
+ override ChiPartialOperandTag tag;
+
+ final override MemoryAccessKind getMemoryAccess() { result instanceof ChiPartialMemoryAccess }
+}
diff --git a/repo-tests/codeql/cpp/ql/lib/semmle/code/cpp/ir/implementation/raw/PrintIR.ql b/repo-tests/codeql/cpp/ql/lib/semmle/code/cpp/ir/implementation/raw/PrintIR.ql
new file mode 100644
index 00000000000..dcef74901e6
--- /dev/null
+++ b/repo-tests/codeql/cpp/ql/lib/semmle/code/cpp/ir/implementation/raw/PrintIR.ql
@@ -0,0 +1,8 @@
+/**
+ * @name Print Raw IR
+ * @description Outputs a representation of the Raw IR graph
+ * @id cpp/print-raw-ir
+ * @kind graph
+ */
+
+import PrintIR
diff --git a/repo-tests/codeql/cpp/ql/lib/semmle/code/cpp/ir/implementation/raw/PrintIR.qll b/repo-tests/codeql/cpp/ql/lib/semmle/code/cpp/ir/implementation/raw/PrintIR.qll
new file mode 100644
index 00000000000..59dadee7154
--- /dev/null
+++ b/repo-tests/codeql/cpp/ql/lib/semmle/code/cpp/ir/implementation/raw/PrintIR.qll
@@ -0,0 +1,329 @@
+/**
+ * Outputs a representation of the IR as a control flow graph.
+ *
+ * This file contains the actual implementation of `PrintIR.ql`. For test cases and very small
+ * databases, `PrintIR.ql` can be run directly to dump the IR for the entire database. For most
+ * uses, however, it is better to write a query that imports `PrintIR.qll`, extends
+ * `PrintIRConfiguration`, and overrides `shouldPrintFunction()` to select a subset of functions to
+ * dump.
+ */
+
+private import internal.IRInternal
+private import IR
+private import internal.PrintIRImports as Imports
+import Imports::IRConfiguration
+
+private newtype TPrintIRConfiguration = MkPrintIRConfiguration()
+
+/**
+ * The query can extend this class to control which functions are printed.
+ */
+class PrintIRConfiguration extends TPrintIRConfiguration {
+ /** Gets a textual representation of this configuration. */
+ string toString() { result = "PrintIRConfiguration" }
+
+ /**
+ * Holds if the IR for `func` should be printed. By default, holds for all
+ * functions.
+ */
+ predicate shouldPrintFunction(Language::Function func) { any() }
+}
+
+/**
+ * Override of `IRConfiguration` to only evaluate debug strings for the functions that are to be dumped.
+ */
+private class FilteredIRConfiguration extends IRConfiguration {
+ override predicate shouldEvaluateDebugStringsForFunction(Language::Function func) {
+ shouldPrintFunction(func)
+ }
+}
+
+private predicate shouldPrintFunction(Language::Function func) {
+ exists(PrintIRConfiguration config | config.shouldPrintFunction(func))
+}
+
+private string getAdditionalInstructionProperty(Instruction instr, string key) {
+ exists(IRPropertyProvider provider | result = provider.getInstructionProperty(instr, key))
+}
+
+private string getAdditionalBlockProperty(IRBlock block, string key) {
+ exists(IRPropertyProvider provider | result = provider.getBlockProperty(block, key))
+}
+
+/**
+ * Gets the properties of an operand from any active property providers.
+ */
+private string getAdditionalOperandProperty(Operand operand, string key) {
+ exists(IRPropertyProvider provider | result = provider.getOperandProperty(operand, key))
+}
+
+/**
+ * Gets a string listing the properties of the operand and their corresponding values. If the
+ * operand has no properties, this predicate has no result.
+ */
+private string getOperandPropertyListString(Operand operand) {
+ result =
+ strictconcat(string key, string value |
+ value = getAdditionalOperandProperty(operand, key)
+ |
+ key + ":" + value, ", "
+ )
+}
+
+/**
+ * Gets a string listing the properties of the operand and their corresponding values. The list is
+ * surrounded by curly braces. If the operand has no properties, this predicate returns an empty
+ * string.
+ */
+private string getOperandPropertyString(Operand operand) {
+ result = "{" + getOperandPropertyListString(operand) + "}"
+ or
+ not exists(getOperandPropertyListString(operand)) and result = ""
+}
+
+private newtype TPrintableIRNode =
+ TPrintableIRFunction(IRFunction irFunc) { shouldPrintFunction(irFunc.getFunction()) } or
+ TPrintableIRBlock(IRBlock block) { shouldPrintFunction(block.getEnclosingFunction()) } or
+ TPrintableInstruction(Instruction instr) { shouldPrintFunction(instr.getEnclosingFunction()) }
+
+/**
+ * A node to be emitted in the IR graph.
+ */
+abstract private class PrintableIRNode extends TPrintableIRNode {
+ abstract string toString();
+
+ /**
+ * Gets the location to be emitted for the node.
+ */
+ abstract Language::Location getLocation();
+
+ /**
+ * Gets the label to be emitted for the node.
+ */
+ abstract string getLabel();
+
+ /**
+ * Gets the order in which the node appears in its parent node.
+ */
+ abstract int getOrder();
+
+ /**
+ * Gets the parent of this node.
+ */
+ abstract PrintableIRNode getParent();
+
+ /**
+ * Gets the kind of graph represented by this node ("graph" or "tree").
+ */
+ string getGraphKind() { none() }
+
+ /**
+ * Holds if this node should always be rendered as text, even in a graphical
+ * viewer.
+ */
+ predicate forceText() { none() }
+
+ /**
+ * Gets the value of the node property with the specified key.
+ */
+ string getProperty(string key) {
+ key = "semmle.label" and result = getLabel()
+ or
+ key = "semmle.order" and result = getOrder().toString()
+ or
+ key = "semmle.graphKind" and result = getGraphKind()
+ or
+ key = "semmle.forceText" and forceText() and result = "true"
+ }
+}
+
+/**
+ * An IR graph node representing a `IRFunction` object.
+ */
+private class PrintableIRFunction extends PrintableIRNode, TPrintableIRFunction {
+ IRFunction irFunc;
+
+ PrintableIRFunction() { this = TPrintableIRFunction(irFunc) }
+
+ override string toString() { result = irFunc.toString() }
+
+ override Language::Location getLocation() { result = irFunc.getLocation() }
+
+ override string getLabel() { result = Language::getIdentityString(irFunc.getFunction()) }
+
+ override int getOrder() {
+ this =
+ rank[result + 1](PrintableIRFunction orderedFunc, Language::Location location |
+ location = orderedFunc.getIRFunction().getLocation()
+ |
+ orderedFunc
+ order by
+ location.getFile().getAbsolutePath(), location.getStartLine(), location.getStartColumn(),
+ orderedFunc.getLabel()
+ )
+ }
+
+ final override PrintableIRNode getParent() { none() }
+
+ final IRFunction getIRFunction() { result = irFunc }
+}
+
+/**
+ * An IR graph node representing an `IRBlock` object.
+ */
+private class PrintableIRBlock extends PrintableIRNode, TPrintableIRBlock {
+ IRBlock block;
+
+ PrintableIRBlock() { this = TPrintableIRBlock(block) }
+
+ override string toString() { result = getLabel() }
+
+ override Language::Location getLocation() { result = block.getLocation() }
+
+ override string getLabel() { result = "Block " + block.getDisplayIndex().toString() }
+
+ override int getOrder() { result = block.getDisplayIndex() }
+
+ final override string getGraphKind() { result = "tree" }
+
+ final override predicate forceText() { any() }
+
+ final override PrintableIRFunction getParent() {
+ result.getIRFunction() = block.getEnclosingIRFunction()
+ }
+
+ override string getProperty(string key) {
+ result = PrintableIRNode.super.getProperty(key) or
+ result = getAdditionalBlockProperty(block, key)
+ }
+
+ final IRBlock getBlock() { result = block }
+}
+
+/**
+ * An IR graph node representing an `Instruction`.
+ */
+private class PrintableInstruction extends PrintableIRNode, TPrintableInstruction {
+ Instruction instr;
+
+ PrintableInstruction() { this = TPrintableInstruction(instr) }
+
+ override string toString() { result = instr.toString() }
+
+ override Language::Location getLocation() { result = instr.getLocation() }
+
+ override string getLabel() {
+ exists(IRBlock block |
+ instr = block.getAnInstruction() and
+ exists(
+ string resultString, string operationString, string operandsString, int resultWidth,
+ int operationWidth
+ |
+ resultString = instr.getResultString() and
+ operationString = instr.getOperationString() and
+ operandsString = getOperandsString() and
+ columnWidths(block, resultWidth, operationWidth) and
+ result =
+ resultString + getPaddingString(resultWidth - resultString.length()) + " = " +
+ operationString + getPaddingString(operationWidth - operationString.length()) + " : " +
+ operandsString
+ )
+ )
+ }
+
+ override int getOrder() { result = instr.getDisplayIndexInBlock() }
+
+ final override PrintableIRBlock getParent() { result.getBlock() = instr.getBlock() }
+
+ final Instruction getInstruction() { result = instr }
+
+ override string getProperty(string key) {
+ result = PrintableIRNode.super.getProperty(key) or
+ result = getAdditionalInstructionProperty(instr, key)
+ }
+
+ /**
+ * Gets the string representation of the operand list. This is the same as
+ * `Instruction::getOperandsString()`, except that each operand is annotated with any properties
+ * provided by active `IRPropertyProvider` instances.
+ */
+ private string getOperandsString() {
+ result =
+ concat(Operand operand |
+ operand = instr.getAnOperand()
+ |
+ operand.getDumpString() + getOperandPropertyString(operand), ", "
+ order by
+ operand.getDumpSortOrder()
+ )
+ }
+}
+
+private predicate columnWidths(IRBlock block, int resultWidth, int operationWidth) {
+ resultWidth = max(Instruction instr | instr.getBlock() = block | instr.getResultString().length()) and
+ operationWidth =
+ max(Instruction instr | instr.getBlock() = block | instr.getOperationString().length())
+}
+
+private int maxColumnWidth() {
+ result =
+ max(Instruction instr, int width |
+ width = instr.getResultString().length() or
+ width = instr.getOperationString().length() or
+ width = instr.getOperandsString().length()
+ |
+ width
+ )
+}
+
+private string getPaddingString(int n) {
+ n = 0 and result = ""
+ or
+ n > 0 and n <= maxColumnWidth() and result = getPaddingString(n - 1) + " "
+}
+
+/**
+ * Holds if `node` belongs to the output graph, and its property `key` has the given `value`.
+ */
+query predicate nodes(PrintableIRNode node, string key, string value) {
+ value = node.getProperty(key)
+}
+
+private int getSuccessorIndex(IRBlock pred, IRBlock succ) {
+ succ =
+ rank[result + 1](IRBlock aSucc, EdgeKind kind |
+ aSucc = pred.getSuccessor(kind)
+ |
+ aSucc order by kind.toString()
+ )
+}
+
+/**
+ * Holds if the output graph contains an edge from `pred` to `succ`, and that edge's property `key`
+ * has the given `value`.
+ */
+query predicate edges(PrintableIRBlock pred, PrintableIRBlock succ, string key, string value) {
+ exists(EdgeKind kind, IRBlock predBlock, IRBlock succBlock |
+ predBlock = pred.getBlock() and
+ succBlock = succ.getBlock() and
+ predBlock.getSuccessor(kind) = succBlock and
+ (
+ (
+ key = "semmle.label" and
+ if predBlock.getBackEdgeSuccessor(kind) = succBlock
+ then value = kind.toString() + " (back edge)"
+ else value = kind.toString()
+ )
+ or
+ key = "semmle.order" and
+ value = getSuccessorIndex(predBlock, succBlock).toString()
+ )
+ )
+}
+
+/**
+ * Holds if `parent` is the parent node of `child` in the output graph.
+ */
+query predicate parents(PrintableIRNode child, PrintableIRNode parent) {
+ parent = child.getParent()
+}
diff --git a/repo-tests/codeql/cpp/ql/lib/semmle/code/cpp/ir/implementation/raw/constant/ConstantAnalysis.qll b/repo-tests/codeql/cpp/ql/lib/semmle/code/cpp/ir/implementation/raw/constant/ConstantAnalysis.qll
new file mode 100644
index 00000000000..76f52f8334a
--- /dev/null
+++ b/repo-tests/codeql/cpp/ql/lib/semmle/code/cpp/ir/implementation/raw/constant/ConstantAnalysis.qll
@@ -0,0 +1,53 @@
+private import internal.ConstantAnalysisInternal
+private import semmle.code.cpp.ir.internal.IntegerPartial
+private import IR
+
+language[monotonicAggregates]
+int getConstantValue(Instruction instr) {
+ result = instr.(IntegerConstantInstruction).getValue().toInt()
+ or
+ result = getBinaryInstructionValue(instr)
+ or
+ result = neg(getConstantValue(instr.(NegateInstruction).getUnary()))
+ or
+ result = getConstantValue(instr.(CopyInstruction).getSourceValue())
+ or
+ exists(PhiInstruction phi |
+ phi = instr and
+ result = unique(Operand op | op = phi.getAnInputOperand() | getConstantValue(op.getDef()))
+ )
+}
+
+pragma[noinline]
+private predicate binaryInstructionOperands(BinaryInstruction instr, int left, int right) {
+ left = getConstantValue(instr.getLeft()) and
+ right = getConstantValue(instr.getRight())
+}
+
+pragma[noinline]
+private int getBinaryInstructionValue(BinaryInstruction instr) {
+ exists(int left, int right |
+ binaryInstructionOperands(instr, left, right) and
+ (
+ instr instanceof AddInstruction and result = add(left, right)
+ or
+ instr instanceof SubInstruction and result = sub(left, right)
+ or
+ instr instanceof MulInstruction and result = mul(left, right)
+ or
+ instr instanceof DivInstruction and result = div(left, right)
+ or
+ instr instanceof CompareEQInstruction and result = compareEQ(left, right)
+ or
+ instr instanceof CompareNEInstruction and result = compareNE(left, right)
+ or
+ instr instanceof CompareLTInstruction and result = compareLT(left, right)
+ or
+ instr instanceof CompareGTInstruction and result = compareGT(left, right)
+ or
+ instr instanceof CompareLEInstruction and result = compareLE(left, right)
+ or
+ instr instanceof CompareGEInstruction and result = compareGE(left, right)
+ )
+ )
+}
diff --git a/repo-tests/codeql/cpp/ql/lib/semmle/code/cpp/ir/implementation/raw/constant/PrintConstantAnalysis.qll b/repo-tests/codeql/cpp/ql/lib/semmle/code/cpp/ir/implementation/raw/constant/PrintConstantAnalysis.qll
new file mode 100644
index 00000000000..57a7cf594ca
--- /dev/null
+++ b/repo-tests/codeql/cpp/ql/lib/semmle/code/cpp/ir/implementation/raw/constant/PrintConstantAnalysis.qll
@@ -0,0 +1,11 @@
+private import internal.ConstantAnalysisInternal
+private import semmle.code.cpp.ir.internal.IntegerConstant
+private import ConstantAnalysis
+import IR
+
+private class ConstantAnalysisPropertyProvider extends IRPropertyProvider {
+ override string getInstructionProperty(Instruction instr, string key) {
+ key = "ConstantValue" and
+ result = getValue(getConstantValue(instr)).toString()
+ }
+}
diff --git a/repo-tests/codeql/cpp/ql/lib/semmle/code/cpp/ir/implementation/raw/constant/internal/ConstantAnalysisInternal.qll b/repo-tests/codeql/cpp/ql/lib/semmle/code/cpp/ir/implementation/raw/constant/internal/ConstantAnalysisInternal.qll
new file mode 100644
index 00000000000..3b28a05290c
--- /dev/null
+++ b/repo-tests/codeql/cpp/ql/lib/semmle/code/cpp/ir/implementation/raw/constant/internal/ConstantAnalysisInternal.qll
@@ -0,0 +1 @@
+import semmle.code.cpp.ir.implementation.raw.IR as IR
diff --git a/repo-tests/codeql/cpp/ql/lib/semmle/code/cpp/ir/implementation/raw/gvn/PrintValueNumbering.qll b/repo-tests/codeql/cpp/ql/lib/semmle/code/cpp/ir/implementation/raw/gvn/PrintValueNumbering.qll
new file mode 100644
index 00000000000..a7fb1b3c07e
--- /dev/null
+++ b/repo-tests/codeql/cpp/ql/lib/semmle/code/cpp/ir/implementation/raw/gvn/PrintValueNumbering.qll
@@ -0,0 +1,17 @@
+private import internal.ValueNumberingImports
+private import ValueNumbering
+
+/**
+ * Provides additional information about value numbering in IR dumps.
+ */
+class ValueNumberPropertyProvider extends IRPropertyProvider {
+ override string getInstructionProperty(Instruction instr, string key) {
+ exists(ValueNumber vn |
+ vn = valueNumber(instr) and
+ key = "valnum" and
+ if strictcount(vn.getAnInstruction()) > 1
+ then result = vn.getDebugString()
+ else result = "unique"
+ )
+ }
+}
diff --git a/repo-tests/codeql/cpp/ql/lib/semmle/code/cpp/ir/implementation/raw/gvn/ValueNumbering.qll b/repo-tests/codeql/cpp/ql/lib/semmle/code/cpp/ir/implementation/raw/gvn/ValueNumbering.qll
new file mode 100644
index 00000000000..796fb792366
--- /dev/null
+++ b/repo-tests/codeql/cpp/ql/lib/semmle/code/cpp/ir/implementation/raw/gvn/ValueNumbering.qll
@@ -0,0 +1,88 @@
+private import internal.ValueNumberingInternal
+private import internal.ValueNumberingImports
+
+/**
+ * The value number assigned to a particular set of instructions that produce equivalent results.
+ */
+class ValueNumber extends TValueNumber {
+ final string toString() { result = "GVN" }
+
+ final string getDebugString() { result = strictconcat(getAnInstruction().getResultId(), ", ") }
+
+ final Language::Location getLocation() {
+ if
+ exists(Instruction i |
+ i = getAnInstruction() and not i.getLocation() instanceof Language::UnknownLocation
+ )
+ then
+ result =
+ min(Language::Location l |
+ l = getAnInstruction().getLocation() and not l instanceof Language::UnknownLocation
+ |
+ l
+ order by
+ l.getFile().getAbsolutePath(), l.getStartLine(), l.getStartColumn(), l.getEndLine(),
+ l.getEndColumn()
+ )
+ else result instanceof Language::UnknownDefaultLocation
+ }
+
+ /**
+ * Gets the instructions that have been assigned this value number. This will always produce at
+ * least one result.
+ */
+ final Instruction getAnInstruction() { this = valueNumber(result) }
+
+ /**
+ * Gets one of the instructions that was assigned this value number. The chosen instuction is
+ * deterministic but arbitrary. Intended for use only in debugging.
+ */
+ final Instruction getExampleInstruction() {
+ result =
+ min(Instruction instr |
+ instr = getAnInstruction()
+ |
+ instr order by instr.getBlock().getDisplayIndex(), instr.getDisplayIndexInBlock()
+ )
+ }
+
+ /**
+ * Gets an `Operand` whose definition is exact and has this value number.
+ */
+ final Operand getAUse() { this = valueNumber(result.getDef()) }
+
+ final string getKind() {
+ this instanceof TVariableAddressValueNumber and result = "VariableAddress"
+ or
+ this instanceof TInitializeParameterValueNumber and result = "InitializeParameter"
+ or
+ this instanceof TConstantValueNumber and result = "Constant"
+ or
+ this instanceof TStringConstantValueNumber and result = "StringConstant"
+ or
+ this instanceof TFieldAddressValueNumber and result = "FieldAddress"
+ or
+ this instanceof TBinaryValueNumber and result = "Binary"
+ or
+ this instanceof TPointerArithmeticValueNumber and result = "PointerArithmetic"
+ or
+ this instanceof TUnaryValueNumber and result = "Unary"
+ or
+ this instanceof TInheritanceConversionValueNumber and result = "InheritanceConversion"
+ or
+ this instanceof TLoadTotalOverlapValueNumber and result = "LoadTotalOverlap"
+ or
+ this instanceof TUniqueValueNumber and result = "Unique"
+ }
+}
+
+/**
+ * Gets the value number assigned to `instr`, if any. Returns at most one result.
+ */
+ValueNumber valueNumber(Instruction instr) { result = tvalueNumber(instr) }
+
+/**
+ * Gets the value number assigned to the exact definition of `op`, if any.
+ * Returns at most one result.
+ */
+ValueNumber valueNumberOfOperand(Operand op) { result = tvalueNumberOfOperand(op) }
diff --git a/repo-tests/codeql/cpp/ql/lib/semmle/code/cpp/ir/implementation/raw/gvn/internal/ValueNumberingImports.qll b/repo-tests/codeql/cpp/ql/lib/semmle/code/cpp/ir/implementation/raw/gvn/internal/ValueNumberingImports.qll
new file mode 100644
index 00000000000..8482a5e4b14
--- /dev/null
+++ b/repo-tests/codeql/cpp/ql/lib/semmle/code/cpp/ir/implementation/raw/gvn/internal/ValueNumberingImports.qll
@@ -0,0 +1,3 @@
+import semmle.code.cpp.ir.implementation.aliased_ssa.IR
+import semmle.code.cpp.ir.internal.Overlap
+import semmle.code.cpp.ir.internal.IRCppLanguage as Language
diff --git a/repo-tests/codeql/cpp/ql/lib/semmle/code/cpp/ir/implementation/raw/gvn/internal/ValueNumberingInternal.qll b/repo-tests/codeql/cpp/ql/lib/semmle/code/cpp/ir/implementation/raw/gvn/internal/ValueNumberingInternal.qll
new file mode 100644
index 00000000000..2467d961892
--- /dev/null
+++ b/repo-tests/codeql/cpp/ql/lib/semmle/code/cpp/ir/implementation/raw/gvn/internal/ValueNumberingInternal.qll
@@ -0,0 +1,311 @@
+private import ValueNumberingImports
+
+newtype TValueNumber =
+ TVariableAddressValueNumber(IRFunction irFunc, Language::AST ast) {
+ variableAddressValueNumber(_, irFunc, ast)
+ } or
+ TInitializeParameterValueNumber(IRFunction irFunc, Language::AST var) {
+ initializeParameterValueNumber(_, irFunc, var)
+ } or
+ TConstantValueNumber(IRFunction irFunc, IRType type, string value) {
+ constantValueNumber(_, irFunc, type, value)
+ } or
+ TStringConstantValueNumber(IRFunction irFunc, IRType type, string value) {
+ stringConstantValueNumber(_, irFunc, type, value)
+ } or
+ TFieldAddressValueNumber(IRFunction irFunc, Language::Field field, TValueNumber objectAddress) {
+ fieldAddressValueNumber(_, irFunc, field, objectAddress)
+ } or
+ TBinaryValueNumber(
+ IRFunction irFunc, Opcode opcode, TValueNumber leftOperand, TValueNumber rightOperand
+ ) {
+ binaryValueNumber(_, irFunc, opcode, leftOperand, rightOperand)
+ } or
+ TPointerArithmeticValueNumber(
+ IRFunction irFunc, Opcode opcode, int elementSize, TValueNumber leftOperand,
+ TValueNumber rightOperand
+ ) {
+ pointerArithmeticValueNumber(_, irFunc, opcode, elementSize, leftOperand, rightOperand)
+ } or
+ TUnaryValueNumber(IRFunction irFunc, Opcode opcode, TValueNumber operand) {
+ unaryValueNumber(_, irFunc, opcode, operand)
+ } or
+ TInheritanceConversionValueNumber(
+ IRFunction irFunc, Opcode opcode, Language::Class baseClass, Language::Class derivedClass,
+ TValueNumber operand
+ ) {
+ inheritanceConversionValueNumber(_, irFunc, opcode, baseClass, derivedClass, operand)
+ } or
+ TLoadTotalOverlapValueNumber(
+ IRFunction irFunc, IRType type, TValueNumber memOperand, TValueNumber operand
+ ) {
+ loadTotalOverlapValueNumber(_, irFunc, type, memOperand, operand)
+ } or
+ TUniqueValueNumber(IRFunction irFunc, Instruction instr) { uniqueValueNumber(instr, irFunc) }
+
+/**
+ * A `CopyInstruction` whose source operand's value is congruent to the definition of that source
+ * operand.
+ * For example:
+ * ```
+ * Point p = { 1, 2 };
+ * Point q = p;
+ * int a = p.x;
+ * ```
+ * The use of `p` on line 2 is linked to the definition of `p` on line 1, and is congruent to that
+ * definition because it accesses the exact same memory.
+ * The use of `p.x` on line 3 is linked to the definition of `p` on line 1 as well, but is not
+ * congruent to that definition because `p.x` accesses only a subset of the memory defined by `p`.
+ */
+class CongruentCopyInstruction extends CopyInstruction {
+ CongruentCopyInstruction() {
+ this.getSourceValueOperand().getDefinitionOverlap() instanceof MustExactlyOverlap
+ }
+}
+
+class LoadTotalOverlapInstruction extends LoadInstruction {
+ LoadTotalOverlapInstruction() {
+ this.getSourceValueOperand().getDefinitionOverlap() instanceof MustTotallyOverlap
+ }
+}
+
+/**
+ * Holds if this library knows how to assign a value number to the specified instruction, other than
+ * a `unique` value number that is never shared by multiple instructions.
+ */
+private predicate numberableInstruction(Instruction instr) {
+ instr instanceof VariableAddressInstruction
+ or
+ instr instanceof InitializeParameterInstruction
+ or
+ instr instanceof ConstantInstruction
+ or
+ instr instanceof StringConstantInstruction
+ or
+ instr instanceof FieldAddressInstruction
+ or
+ instr instanceof BinaryInstruction
+ or
+ instr instanceof UnaryInstruction and not instr instanceof CopyInstruction
+ or
+ instr instanceof PointerArithmeticInstruction
+ or
+ instr instanceof CongruentCopyInstruction
+ or
+ instr instanceof LoadTotalOverlapInstruction
+}
+
+private predicate filteredNumberableInstruction(Instruction instr) {
+ // count rather than strictcount to handle missing AST elements
+ // separate instanceof and inline casts to avoid failed casts with a count of 0
+ instr instanceof VariableAddressInstruction and
+ count(instr.(VariableAddressInstruction).getIRVariable().getAST()) != 1
+ or
+ instr instanceof ConstantInstruction and
+ count(instr.getResultIRType()) != 1
+ or
+ instr instanceof FieldAddressInstruction and
+ count(instr.(FieldAddressInstruction).getField()) != 1
+}
+
+private predicate variableAddressValueNumber(
+ VariableAddressInstruction instr, IRFunction irFunc, Language::AST ast
+) {
+ instr.getEnclosingIRFunction() = irFunc and
+ // The underlying AST element is used as value-numbering key instead of the
+ // `IRVariable` to work around a problem where a variable or expression with
+ // multiple types gives rise to multiple `IRVariable`s.
+ instr.getIRVariable().getAST() = ast and
+ strictcount(instr.getIRVariable().getAST()) = 1
+}
+
+private predicate initializeParameterValueNumber(
+ InitializeParameterInstruction instr, IRFunction irFunc, Language::AST var
+) {
+ instr.getEnclosingIRFunction() = irFunc and
+ // The underlying AST element is used as value-numbering key instead of the
+ // `IRVariable` to work around a problem where a variable or expression with
+ // multiple types gives rise to multiple `IRVariable`s.
+ instr.getIRVariable().getAST() = var
+}
+
+private predicate constantValueNumber(
+ ConstantInstruction instr, IRFunction irFunc, IRType type, string value
+) {
+ instr.getEnclosingIRFunction() = irFunc and
+ strictcount(instr.getResultIRType()) = 1 and
+ instr.getResultIRType() = type and
+ instr.getValue() = value
+}
+
+private predicate stringConstantValueNumber(
+ StringConstantInstruction instr, IRFunction irFunc, IRType type, string value
+) {
+ instr.getEnclosingIRFunction() = irFunc and
+ instr.getResultIRType() = type and
+ instr.getValue().getValue() = value
+}
+
+private predicate fieldAddressValueNumber(
+ FieldAddressInstruction instr, IRFunction irFunc, Language::Field field,
+ TValueNumber objectAddress
+) {
+ instr.getEnclosingIRFunction() = irFunc and
+ instr.getField() = field and
+ strictcount(instr.getField()) = 1 and
+ tvalueNumber(instr.getObjectAddress()) = objectAddress
+}
+
+private predicate binaryValueNumber(
+ BinaryInstruction instr, IRFunction irFunc, Opcode opcode, TValueNumber leftOperand,
+ TValueNumber rightOperand
+) {
+ instr.getEnclosingIRFunction() = irFunc and
+ not instr instanceof PointerArithmeticInstruction and
+ instr.getOpcode() = opcode and
+ tvalueNumber(instr.getLeft()) = leftOperand and
+ tvalueNumber(instr.getRight()) = rightOperand
+}
+
+private predicate pointerArithmeticValueNumber(
+ PointerArithmeticInstruction instr, IRFunction irFunc, Opcode opcode, int elementSize,
+ TValueNumber leftOperand, TValueNumber rightOperand
+) {
+ instr.getEnclosingIRFunction() = irFunc and
+ instr.getOpcode() = opcode and
+ instr.getElementSize() = elementSize and
+ tvalueNumber(instr.getLeft()) = leftOperand and
+ tvalueNumber(instr.getRight()) = rightOperand
+}
+
+private predicate unaryValueNumber(
+ UnaryInstruction instr, IRFunction irFunc, Opcode opcode, TValueNumber operand
+) {
+ instr.getEnclosingIRFunction() = irFunc and
+ not instr instanceof InheritanceConversionInstruction and
+ not instr instanceof CopyInstruction and
+ not instr instanceof FieldAddressInstruction and
+ instr.getOpcode() = opcode and
+ tvalueNumber(instr.getUnary()) = operand
+}
+
+private predicate inheritanceConversionValueNumber(
+ InheritanceConversionInstruction instr, IRFunction irFunc, Opcode opcode,
+ Language::Class baseClass, Language::Class derivedClass, TValueNumber operand
+) {
+ instr.getEnclosingIRFunction() = irFunc and
+ instr.getOpcode() = opcode and
+ instr.getBaseClass() = baseClass and
+ instr.getDerivedClass() = derivedClass and
+ tvalueNumber(instr.getUnary()) = operand
+}
+
+private predicate loadTotalOverlapValueNumber(
+ LoadTotalOverlapInstruction instr, IRFunction irFunc, IRType type, TValueNumber memOperand,
+ TValueNumber operand
+) {
+ instr.getEnclosingIRFunction() = irFunc and
+ tvalueNumber(instr.getAnOperand().(MemoryOperand).getAnyDef()) = memOperand and
+ tvalueNumberOfOperand(instr.getAnOperand().(AddressOperand)) = operand and
+ instr.getResultIRType() = type
+}
+
+/**
+ * Holds if `instr` should be assigned a unique value number because this library does not know how
+ * to determine if two instances of that instruction are equivalent.
+ */
+private predicate uniqueValueNumber(Instruction instr, IRFunction irFunc) {
+ instr.getEnclosingIRFunction() = irFunc and
+ not instr.getResultIRType() instanceof IRVoidType and
+ (
+ not numberableInstruction(instr)
+ or
+ filteredNumberableInstruction(instr)
+ )
+}
+
+/**
+ * Gets the value number assigned to `instr`, if any. Returns at most one result.
+ */
+cached
+TValueNumber tvalueNumber(Instruction instr) {
+ result = nonUniqueValueNumber(instr)
+ or
+ exists(IRFunction irFunc |
+ uniqueValueNumber(instr, irFunc) and
+ result = TUniqueValueNumber(irFunc, instr)
+ )
+}
+
+/**
+ * Gets the value number assigned to the exact definition of `op`, if any.
+ * Returns at most one result.
+ */
+TValueNumber tvalueNumberOfOperand(Operand op) { result = tvalueNumber(op.getDef()) }
+
+/**
+ * Gets the value number assigned to `instr`, if any, unless that instruction is assigned a unique
+ * value number.
+ */
+private TValueNumber nonUniqueValueNumber(Instruction instr) {
+ exists(IRFunction irFunc |
+ irFunc = instr.getEnclosingIRFunction() and
+ (
+ exists(Language::AST ast |
+ variableAddressValueNumber(instr, irFunc, ast) and
+ result = TVariableAddressValueNumber(irFunc, ast)
+ )
+ or
+ exists(Language::AST var |
+ initializeParameterValueNumber(instr, irFunc, var) and
+ result = TInitializeParameterValueNumber(irFunc, var)
+ )
+ or
+ exists(string value, IRType type |
+ constantValueNumber(instr, irFunc, type, value) and
+ result = TConstantValueNumber(irFunc, type, value)
+ )
+ or
+ exists(IRType type, string value |
+ stringConstantValueNumber(instr, irFunc, type, value) and
+ result = TStringConstantValueNumber(irFunc, type, value)
+ )
+ or
+ exists(Language::Field field, TValueNumber objectAddress |
+ fieldAddressValueNumber(instr, irFunc, field, objectAddress) and
+ result = TFieldAddressValueNumber(irFunc, field, objectAddress)
+ )
+ or
+ exists(Opcode opcode, TValueNumber leftOperand, TValueNumber rightOperand |
+ binaryValueNumber(instr, irFunc, opcode, leftOperand, rightOperand) and
+ result = TBinaryValueNumber(irFunc, opcode, leftOperand, rightOperand)
+ )
+ or
+ exists(Opcode opcode, TValueNumber operand |
+ unaryValueNumber(instr, irFunc, opcode, operand) and
+ result = TUnaryValueNumber(irFunc, opcode, operand)
+ )
+ or
+ exists(
+ Opcode opcode, Language::Class baseClass, Language::Class derivedClass, TValueNumber operand
+ |
+ inheritanceConversionValueNumber(instr, irFunc, opcode, baseClass, derivedClass, operand) and
+ result = TInheritanceConversionValueNumber(irFunc, opcode, baseClass, derivedClass, operand)
+ )
+ or
+ exists(Opcode opcode, int elementSize, TValueNumber leftOperand, TValueNumber rightOperand |
+ pointerArithmeticValueNumber(instr, irFunc, opcode, elementSize, leftOperand, rightOperand) and
+ result =
+ TPointerArithmeticValueNumber(irFunc, opcode, elementSize, leftOperand, rightOperand)
+ )
+ or
+ exists(IRType type, TValueNumber memOperand, TValueNumber operand |
+ loadTotalOverlapValueNumber(instr, irFunc, type, memOperand, operand) and
+ result = TLoadTotalOverlapValueNumber(irFunc, type, memOperand, operand)
+ )
+ or
+ // The value number of a copy is just the value number of its source value.
+ result = tvalueNumber(instr.(CongruentCopyInstruction).getSourceValue())
+ )
+ )
+}
diff --git a/repo-tests/codeql/cpp/ql/lib/semmle/code/cpp/ir/implementation/raw/internal/IRBlockImports.qll b/repo-tests/codeql/cpp/ql/lib/semmle/code/cpp/ir/implementation/raw/internal/IRBlockImports.qll
new file mode 100644
index 00000000000..d1b46ed35c8
--- /dev/null
+++ b/repo-tests/codeql/cpp/ql/lib/semmle/code/cpp/ir/implementation/raw/internal/IRBlockImports.qll
@@ -0,0 +1 @@
+import semmle.code.cpp.ir.implementation.EdgeKind as EdgeKind
diff --git a/repo-tests/codeql/cpp/ql/lib/semmle/code/cpp/ir/implementation/raw/internal/IRConstruction.qll b/repo-tests/codeql/cpp/ql/lib/semmle/code/cpp/ir/implementation/raw/internal/IRConstruction.qll
new file mode 100644
index 00000000000..e8fcf3fcdf3
--- /dev/null
+++ b/repo-tests/codeql/cpp/ql/lib/semmle/code/cpp/ir/implementation/raw/internal/IRConstruction.qll
@@ -0,0 +1,406 @@
+private import cpp
+import semmle.code.cpp.ir.implementation.raw.IR
+private import semmle.code.cpp.ir.implementation.internal.OperandTag
+private import semmle.code.cpp.ir.implementation.internal.IRFunctionBase
+private import semmle.code.cpp.ir.implementation.internal.TInstruction
+private import semmle.code.cpp.ir.implementation.internal.TIRVariable
+private import semmle.code.cpp.ir.internal.CppType
+private import semmle.code.cpp.ir.internal.Overlap
+private import semmle.code.cpp.ir.internal.TempVariableTag
+private import InstructionTag
+private import TranslatedCondition
+private import TranslatedElement
+private import TranslatedExpr
+private import TranslatedStmt
+private import TranslatedFunction
+
+TranslatedElement getInstructionTranslatedElement(Instruction instruction) {
+ instruction = TRawInstruction(result, _)
+}
+
+InstructionTag getInstructionTag(Instruction instruction) {
+ instruction = TRawInstruction(_, result)
+}
+
+/**
+ * Provides the portion of the parameterized IR interface that is used to construct the initial
+ * "raw" stage of the IR. The other stages of the IR do not expose these predicates.
+ */
+cached
+module Raw {
+ class InstructionTag1 = TranslatedElement;
+
+ class InstructionTag2 = InstructionTag;
+
+ cached
+ predicate functionHasIR(Function func) { exists(getTranslatedFunction(func)) }
+
+ cached
+ predicate hasInstruction(TranslatedElement element, InstructionTag tag) {
+ element.hasInstruction(_, tag, _)
+ }
+
+ cached
+ predicate hasUserVariable(Function func, Variable var, CppType type) {
+ getTranslatedFunction(func).hasUserVariable(var, type)
+ }
+
+ cached
+ predicate hasTempVariable(Function func, Locatable ast, TempVariableTag tag, CppType type) {
+ exists(TranslatedElement element |
+ element.getAST() = ast and
+ func = element.getFunction() and
+ element.hasTempVariable(tag, type)
+ )
+ }
+
+ cached
+ predicate hasStringLiteral(Function func, Locatable ast, CppType type, StringLiteral literal) {
+ literal = ast and
+ literal.getEnclosingFunction() = func and
+ getTypeForPRValue(literal.getType()) = type
+ }
+
+ cached
+ predicate hasDynamicInitializationFlag(Function func, StaticLocalVariable var, CppType type) {
+ var.getFunction() = func and
+ var.hasDynamicInitialization() and
+ type = getBoolType()
+ }
+
+ cached
+ TIRVariable getInstructionVariable(Instruction instruction) {
+ exists(TranslatedElement element, InstructionTag tag |
+ element = getInstructionTranslatedElement(instruction) and
+ tag = getInstructionTag(instruction) and
+ (
+ result = element.getInstructionVariable(tag) or
+ result.(IRStringLiteral).getAST() = element.getInstructionStringLiteral(tag)
+ )
+ )
+ }
+
+ cached
+ Field getInstructionField(Instruction instruction) {
+ result =
+ getInstructionTranslatedElement(instruction)
+ .getInstructionField(getInstructionTag(instruction))
+ }
+
+ cached
+ Function getInstructionFunction(Instruction instruction) {
+ result =
+ getInstructionTranslatedElement(instruction)
+ .getInstructionFunction(getInstructionTag(instruction))
+ }
+
+ cached
+ string getInstructionConstantValue(Instruction instruction) {
+ result =
+ getInstructionTranslatedElement(instruction)
+ .getInstructionConstantValue(getInstructionTag(instruction))
+ }
+
+ cached
+ int getInstructionIndex(Instruction instruction) {
+ result =
+ getInstructionTranslatedElement(instruction)
+ .getInstructionIndex(getInstructionTag(instruction))
+ }
+
+ cached
+ BuiltInOperation getInstructionBuiltInOperation(Instruction instruction) {
+ result =
+ getInstructionTranslatedElement(instruction)
+ .getInstructionBuiltInOperation(getInstructionTag(instruction))
+ }
+
+ cached
+ CppType getInstructionExceptionType(Instruction instruction) {
+ result =
+ getInstructionTranslatedElement(instruction)
+ .getInstructionExceptionType(getInstructionTag(instruction))
+ }
+
+ cached
+ predicate getInstructionInheritance(Instruction instruction, Class baseClass, Class derivedClass) {
+ getInstructionTranslatedElement(instruction)
+ .getInstructionInheritance(getInstructionTag(instruction), baseClass, derivedClass)
+ }
+
+ cached
+ int getInstructionElementSize(Instruction instruction) {
+ result =
+ getInstructionTranslatedElement(instruction)
+ .getInstructionElementSize(getInstructionTag(instruction))
+ }
+
+ cached
+ predicate needsUnknownOpaqueType(int byteSize) {
+ exists(TranslatedElement element | element.needsUnknownOpaqueType(byteSize))
+ }
+
+ cached
+ Expr getInstructionConvertedResultExpression(Instruction instruction) {
+ exists(TranslatedExpr translatedExpr |
+ translatedExpr = getTranslatedExpr(result) and
+ instruction = translatedExpr.getResult() and
+ // Only associate `instruction` with this expression if the translated
+ // expression actually produced the instruction; not if it merely
+ // forwarded the result of another translated expression.
+ instruction = translatedExpr.getInstruction(_)
+ )
+ }
+
+ cached
+ Expr getInstructionUnconvertedResultExpression(Instruction instruction) {
+ result = getInstructionConvertedResultExpression(instruction).getUnconverted()
+ }
+}
+
+class TStageInstruction = TRawInstruction;
+
+predicate hasInstruction(TRawInstruction instr) { any() }
+
+predicate hasModeledMemoryResult(Instruction instruction) { none() }
+
+predicate hasConflatedMemoryResult(Instruction instruction) {
+ instruction instanceof AliasedDefinitionInstruction
+ or
+ instruction.getOpcode() instanceof Opcode::InitializeNonLocal
+}
+
+Instruction getRegisterOperandDefinition(Instruction instruction, RegisterOperandTag tag) {
+ result =
+ getInstructionTranslatedElement(instruction)
+ .getInstructionRegisterOperand(getInstructionTag(instruction), tag)
+}
+
+Instruction getMemoryOperandDefinition(
+ Instruction instruction, MemoryOperandTag tag, Overlap overlap
+) {
+ none()
+}
+
+/**
+ * Holds if the partial operand of this `ChiInstruction` updates the bit range
+ * `[startBitOffset, endBitOffset)` of the total operand.
+ */
+predicate getIntervalUpdatedByChi(ChiInstruction chi, int startBit, int endBit) { none() }
+
+/**
+ * Holds if the operand totally overlaps with its definition and consumes the
+ * bit range `[startBitOffset, endBitOffset)`.
+ */
+predicate getUsedInterval(Operand operand, int startBit, int endBit) { none() }
+
+predicate chiOnlyPartiallyUpdatesLocation(ChiInstruction chi) { none() }
+
+/** Gets a non-phi instruction that defines an operand of `instr`. */
+private Instruction getNonPhiOperandDef(Instruction instr) {
+ result = getRegisterOperandDefinition(instr, _)
+ or
+ result = getMemoryOperandDefinition(instr, _, _)
+}
+
+/**
+ * Gets a non-phi instruction that defines an operand of `instr` but only if
+ * both `instr` and the result have neighbor on the other side of the edge
+ * between them. This is a necessary condition for being in a cycle, and it
+ * removes about two thirds of the tuples that would otherwise be in this
+ * predicate.
+ */
+private Instruction getNonPhiOperandDefOfIntermediate(Instruction instr) {
+ result = getNonPhiOperandDef(instr) and
+ exists(getNonPhiOperandDef(result)) and
+ instr = getNonPhiOperandDef(_)
+}
+
+/**
+ * Holds if `instr` is part of a cycle in the operand graph that doesn't go
+ * through a phi instruction and therefore should be impossible.
+ *
+ * If such cycles are present, either due to a programming error in the IR
+ * generation or due to a malformed database, it can cause infinite loops in
+ * analyses that assume a cycle-free graph of non-phi operands. Therefore it's
+ * better to remove these operands than to leave cycles in the operand graph.
+ */
+pragma[noopt]
+predicate isInCycle(Instruction instr) {
+ instr instanceof Instruction and
+ getNonPhiOperandDefOfIntermediate+(instr) = instr
+}
+
+CppType getInstructionOperandType(Instruction instruction, TypedOperandTag tag) {
+ // For all `LoadInstruction`s, the operand type of the `LoadOperand` is the same as
+ // the result type of the load.
+ tag instanceof LoadOperandTag and
+ result = instruction.(LoadInstruction).getResultLanguageType()
+ or
+ not instruction instanceof LoadInstruction and
+ result =
+ getInstructionTranslatedElement(instruction)
+ .getInstructionMemoryOperandType(getInstructionTag(instruction), tag)
+}
+
+Instruction getPhiOperandDefinition(
+ PhiInstruction instruction, IRBlock predecessorBlock, Overlap overlap
+) {
+ none()
+}
+
+Instruction getPhiInstructionBlockStart(PhiInstruction instr) { none() }
+
+Instruction getInstructionSuccessor(Instruction instruction, EdgeKind kind) {
+ result =
+ getInstructionTranslatedElement(instruction)
+ .getInstructionSuccessor(getInstructionTag(instruction), kind)
+}
+
+/**
+ * Holds if the CFG edge (`sourceElement`, `sourceTag`) ---`kind`-->
+ * `targetInstruction` is a back edge under the condition that
+ * `requiredAncestor` is an ancestor of `sourceElement`.
+ */
+private predicate backEdgeCandidate(
+ TranslatedElement sourceElement, InstructionTag sourceTag, TranslatedElement requiredAncestor,
+ Instruction targetInstruction, EdgeKind kind
+) {
+ // While loop:
+ // Any edge from within the body of the loop to the condition of the loop
+ // is a back edge. This includes edges from `continue` and the fall-through
+ // edge(s) after the last instruction(s) in the body.
+ exists(TranslatedWhileStmt s |
+ targetInstruction = s.getFirstConditionInstruction() and
+ targetInstruction = sourceElement.getInstructionSuccessor(sourceTag, kind) and
+ requiredAncestor = s.getBody()
+ )
+ or
+ // Do-while loop:
+ // The back edge should be the edge(s) from the condition to the
+ // body. This ensures that it's the back edge that will be pruned in a `do
+ // { ... } while (0)` statement. Note that all `continue` statements in a
+ // do-while loop produce forward edges.
+ exists(TranslatedDoStmt s |
+ targetInstruction = s.getBody().getFirstInstruction() and
+ targetInstruction = sourceElement.getInstructionSuccessor(sourceTag, kind) and
+ requiredAncestor = s.getCondition()
+ )
+ or
+ // For loop:
+ // Any edge from within the body or update of the loop to the condition of
+ // the loop is a back edge. When there is no loop update expression, this
+ // includes edges from `continue` and the fall-through edge(s) after the
+ // last instruction(s) in the body. A for loop may not have a condition, in
+ // which case `getFirstConditionInstruction` returns the body instead.
+ exists(TranslatedForStmt s |
+ targetInstruction = s.getFirstConditionInstruction() and
+ targetInstruction = sourceElement.getInstructionSuccessor(sourceTag, kind) and
+ (
+ requiredAncestor = s.getUpdate()
+ or
+ not exists(s.getUpdate()) and
+ requiredAncestor = s.getBody()
+ )
+ )
+ or
+ // Range-based for loop:
+ // Any edge from within the update of the loop to the condition of
+ // the loop is a back edge.
+ exists(TranslatedRangeBasedForStmt s |
+ targetInstruction = s.getCondition().getFirstInstruction() and
+ targetInstruction = sourceElement.getInstructionSuccessor(sourceTag, kind) and
+ requiredAncestor = s.getUpdate()
+ )
+}
+
+private predicate jumpSourceHasAncestor(TranslatedElement jumpSource, TranslatedElement ancestor) {
+ backEdgeCandidate(jumpSource, _, _, _, _) and
+ ancestor = jumpSource
+ or
+ // For performance, we don't want a fastTC here
+ jumpSourceHasAncestor(jumpSource, ancestor.getAChild())
+}
+
+Instruction getInstructionBackEdgeSuccessor(Instruction instruction, EdgeKind kind) {
+ exists(
+ TranslatedElement sourceElement, InstructionTag sourceTag, TranslatedElement requiredAncestor
+ |
+ backEdgeCandidate(sourceElement, sourceTag, requiredAncestor, result, kind) and
+ jumpSourceHasAncestor(sourceElement, requiredAncestor) and
+ instruction = sourceElement.getInstruction(sourceTag)
+ )
+ or
+ // Goto statement:
+ // As a conservative approximation, any edge out of `goto` is a back edge
+ // unless it goes strictly forward in the program text. A `goto` whose
+ // source and target are both inside a macro will be seen as having the
+ // same location for source and target, so we conservatively assume that
+ // such a `goto` creates a back edge.
+ exists(TranslatedElement s, GotoStmt goto |
+ not isStrictlyForwardGoto(goto) and
+ goto = s.getAST() and
+ exists(InstructionTag tag |
+ result = s.getInstructionSuccessor(tag, kind) and
+ instruction = s.getInstruction(tag)
+ )
+ )
+}
+
+/** Holds if `goto` jumps strictly forward in the program text. */
+private predicate isStrictlyForwardGoto(GotoStmt goto) {
+ goto.getLocation().isBefore(goto.getTarget().getLocation())
+}
+
+Locatable getInstructionAST(TStageInstruction instr) {
+ result = getInstructionTranslatedElement(instr).getAST()
+}
+
+CppType getInstructionResultType(TStageInstruction instr) {
+ getInstructionTranslatedElement(instr).hasInstruction(_, getInstructionTag(instr), result)
+}
+
+predicate getInstructionOpcode(Opcode opcode, TStageInstruction instr) {
+ getInstructionTranslatedElement(instr).hasInstruction(opcode, getInstructionTag(instr), _)
+}
+
+IRFunctionBase getInstructionEnclosingIRFunction(TStageInstruction instr) {
+ result.getFunction() = getInstructionTranslatedElement(instr).getFunction()
+}
+
+Instruction getPrimaryInstructionForSideEffect(SideEffectInstruction instruction) {
+ result =
+ getInstructionTranslatedElement(instruction)
+ .getPrimaryInstructionForSideEffect(getInstructionTag(instruction))
+}
+
+import CachedForDebugging
+
+cached
+private module CachedForDebugging {
+ cached
+ string getTempVariableUniqueId(IRTempVariable var) {
+ exists(TranslatedElement element |
+ var = element.getTempVariable(_) and
+ result = element.getId().toString() + ":" + getTempVariableTagId(var.getTag())
+ )
+ }
+
+ cached
+ predicate instructionHasSortKeys(Instruction instruction, int key1, int key2) {
+ key1 = getInstructionTranslatedElement(instruction).getId() and
+ getInstructionTag(instruction) =
+ rank[key2](InstructionTag tag, string tagId |
+ tagId = getInstructionTagId(tag)
+ |
+ tag order by tagId
+ )
+ }
+
+ cached
+ string getInstructionUniqueId(Instruction instruction) {
+ result =
+ getInstructionTranslatedElement(instruction).getId() + ":" +
+ getInstructionTagId(getInstructionTag(instruction))
+ }
+}
diff --git a/repo-tests/codeql/cpp/ql/lib/semmle/code/cpp/ir/implementation/raw/internal/IRFunctionImports.qll b/repo-tests/codeql/cpp/ql/lib/semmle/code/cpp/ir/implementation/raw/internal/IRFunctionImports.qll
new file mode 100644
index 00000000000..8ec63b7c1cb
--- /dev/null
+++ b/repo-tests/codeql/cpp/ql/lib/semmle/code/cpp/ir/implementation/raw/internal/IRFunctionImports.qll
@@ -0,0 +1 @@
+import semmle.code.cpp.ir.implementation.internal.IRFunctionBase as IRFunctionBase
diff --git a/repo-tests/codeql/cpp/ql/lib/semmle/code/cpp/ir/implementation/raw/internal/IRImports.qll b/repo-tests/codeql/cpp/ql/lib/semmle/code/cpp/ir/implementation/raw/internal/IRImports.qll
new file mode 100644
index 00000000000..42d6e7db693
--- /dev/null
+++ b/repo-tests/codeql/cpp/ql/lib/semmle/code/cpp/ir/implementation/raw/internal/IRImports.qll
@@ -0,0 +1,3 @@
+import semmle.code.cpp.ir.implementation.EdgeKind as EdgeKind
+import semmle.code.cpp.ir.implementation.IRType as IRType
+import semmle.code.cpp.ir.implementation.MemoryAccessKind as MemoryAccessKind
diff --git a/repo-tests/codeql/cpp/ql/lib/semmle/code/cpp/ir/implementation/raw/internal/IRInternal.qll b/repo-tests/codeql/cpp/ql/lib/semmle/code/cpp/ir/implementation/raw/internal/IRInternal.qll
new file mode 100644
index 00000000000..82cc38ac092
--- /dev/null
+++ b/repo-tests/codeql/cpp/ql/lib/semmle/code/cpp/ir/implementation/raw/internal/IRInternal.qll
@@ -0,0 +1,4 @@
+import semmle.code.cpp.ir.internal.IRCppLanguage as Language
+import IRConstruction as Construction
+import semmle.code.cpp.ir.implementation.IRConfiguration as IRConfiguration
+import IRConstruction::Raw as Raw
diff --git a/repo-tests/codeql/cpp/ql/lib/semmle/code/cpp/ir/implementation/raw/internal/IRVariableImports.qll b/repo-tests/codeql/cpp/ql/lib/semmle/code/cpp/ir/implementation/raw/internal/IRVariableImports.qll
new file mode 100644
index 00000000000..8c60565defc
--- /dev/null
+++ b/repo-tests/codeql/cpp/ql/lib/semmle/code/cpp/ir/implementation/raw/internal/IRVariableImports.qll
@@ -0,0 +1,5 @@
+import semmle.code.cpp.ir.implementation.IRType as IRType
+import semmle.code.cpp.ir.implementation.TempVariableTag as TempVariableTag
+import semmle.code.cpp.ir.internal.IRUtilities as IRUtilities
+import semmle.code.cpp.ir.internal.TempVariableTag as TTempVariableTag
+import semmle.code.cpp.ir.implementation.internal.TIRVariable as TIRVariable
diff --git a/repo-tests/codeql/cpp/ql/lib/semmle/code/cpp/ir/implementation/raw/internal/InstructionImports.qll b/repo-tests/codeql/cpp/ql/lib/semmle/code/cpp/ir/implementation/raw/internal/InstructionImports.qll
new file mode 100644
index 00000000000..946fd770e94
--- /dev/null
+++ b/repo-tests/codeql/cpp/ql/lib/semmle/code/cpp/ir/implementation/raw/internal/InstructionImports.qll
@@ -0,0 +1,6 @@
+import semmle.code.cpp.ir.implementation.EdgeKind as EdgeKind
+import semmle.code.cpp.ir.implementation.IRType as IRType
+import semmle.code.cpp.ir.implementation.MemoryAccessKind as MemoryAccessKind
+import semmle.code.cpp.ir.implementation.Opcode as Opcode
+import semmle.code.cpp.ir.implementation.internal.OperandTag as OperandTag
+import semmle.code.cpp.ir.internal.Overlap as Overlap
diff --git a/repo-tests/codeql/cpp/ql/lib/semmle/code/cpp/ir/implementation/raw/internal/InstructionTag.qll b/repo-tests/codeql/cpp/ql/lib/semmle/code/cpp/ir/implementation/raw/internal/InstructionTag.qll
new file mode 100644
index 00000000000..aed1ae21129
--- /dev/null
+++ b/repo-tests/codeql/cpp/ql/lib/semmle/code/cpp/ir/implementation/raw/internal/InstructionTag.qll
@@ -0,0 +1,224 @@
+private import cpp
+
+newtype TInstructionTag =
+ OnlyInstructionTag() or // Single instruction (not including implicit Load)
+ InitializerVariableAddressTag() or
+ InitializerLoadStringTag() or
+ InitializerStoreTag() or
+ InitializerIndirectAddressTag() or
+ InitializerIndirectStoreTag() or
+ DynamicInitializationFlagAddressTag() or
+ DynamicInitializationFlagLoadTag() or
+ DynamicInitializationConditionalBranchTag() or
+ DynamicInitializationFlagConstantTag() or
+ DynamicInitializationFlagStoreTag() or
+ ZeroPadStringConstantTag() or
+ ZeroPadStringElementIndexTag() or
+ ZeroPadStringElementAddressTag() or
+ ZeroPadStringStoreTag() or
+ AssignOperationConvertLeftTag() or
+ AssignOperationOpTag() or
+ AssignOperationConvertResultTag() or
+ AssignmentStoreTag() or
+ CrementConstantTag() or
+ CrementOpTag() or
+ CrementStoreTag() or
+ EnterFunctionTag() or
+ ReturnValueAddressTag() or
+ ReturnTag() or
+ ExitFunctionTag() or
+ AliasedDefinitionTag() or
+ InitializeNonLocalTag() or
+ AliasedUseTag() or
+ SwitchBranchTag() or
+ CallTargetTag() or
+ CallTag() or
+ CallSideEffectTag() or
+ AllocationSizeTag() or
+ AllocationElementSizeTag() or
+ AllocationExtentConvertTag() or
+ ValueConditionConditionalBranchTag() or
+ ConditionValueTrueTempAddressTag() or
+ ConditionValueTrueConstantTag() or
+ ConditionValueTrueStoreTag() or
+ ConditionValueFalseTempAddressTag() or
+ ConditionValueFalseConstantTag() or
+ ConditionValueFalseStoreTag() or
+ ConditionValueResultTempAddressTag() or
+ ConditionValueResultLoadTag() or
+ BoolConversionConstantTag() or
+ BoolConversionCompareTag() or
+ ResultCopyTag() or
+ LoadTag() or // Implicit load due to lvalue-to-rvalue conversion
+ CatchTag() or
+ ThrowTag() or
+ UnwindTag() or
+ InitializerUninitializedTag() or
+ InitializerFieldAddressTag() or
+ InitializerFieldDefaultValueTag() or
+ InitializerFieldDefaultValueStoreTag() or
+ InitializerElementIndexTag() or
+ InitializerElementAddressTag() or
+ InitializerElementDefaultValueTag() or
+ InitializerElementDefaultValueStoreTag() or
+ VarArgsStartEllipsisAddressTag() or
+ VarArgsStartTag() or
+ VarArgsVAListLoadTag() or
+ VarArgsArgAddressTag() or
+ VarArgsArgLoadTag() or
+ VarArgsMoveNextTag() or
+ VarArgsVAListStoreTag() or
+ AsmTag() or
+ AsmInputTag(int elementIndex) { exists(AsmStmt asm | exists(asm.getChild(elementIndex))) } or
+ ThisAddressTag() or
+ ThisLoadTag()
+
+class InstructionTag extends TInstructionTag {
+ final string toString() { result = "Tag" }
+}
+
+/**
+ * Gets a unique string for the instruction tag. Primarily used for generating
+ * instruction IDs to ensure stable IR dumps.
+ */
+string getInstructionTagId(TInstructionTag tag) {
+ tag = OnlyInstructionTag() and result = "Only" // Single instruction (not including implicit Load)
+ or
+ tag = InitializerVariableAddressTag() and result = "InitVarAddr"
+ or
+ tag = InitializerLoadStringTag() and result = "InitLoadStr"
+ or
+ tag = InitializerStoreTag() and result = "InitStore"
+ or
+ tag = InitializerUninitializedTag() and result = "InitUninit"
+ or
+ tag = InitializerIndirectAddressTag() and result = "InitIndirectAddr"
+ or
+ tag = InitializerIndirectStoreTag() and result = "InitIndirectStore"
+ or
+ tag = ZeroPadStringConstantTag() and result = "ZeroPadConst"
+ or
+ tag = ZeroPadStringElementIndexTag() and result = "ZeroPadElemIndex"
+ or
+ tag = ZeroPadStringElementAddressTag() and result = "ZeroPadElemAddr"
+ or
+ tag = ZeroPadStringStoreTag() and result = "ZeroPadStore"
+ or
+ tag = AssignOperationConvertLeftTag() and result = "AssignOpConvLeft"
+ or
+ tag = AssignOperationOpTag() and result = "AssignOpOp"
+ or
+ tag = AssignOperationConvertResultTag() and result = "AssignOpConvRes"
+ or
+ tag = AssignmentStoreTag() and result = "AssignStore"
+ or
+ tag = CrementConstantTag() and result = "CrementConst"
+ or
+ tag = CrementOpTag() and result = "CrementOp"
+ or
+ tag = CrementStoreTag() and result = "CrementStore"
+ or
+ tag = EnterFunctionTag() and result = "EnterFunc"
+ or
+ tag = ReturnValueAddressTag() and result = "RetValAddr"
+ or
+ tag = ReturnTag() and result = "Ret"
+ or
+ tag = ExitFunctionTag() and result = "ExitFunc"
+ or
+ tag = AliasedDefinitionTag() and result = "AliasedDef"
+ or
+ tag = InitializeNonLocalTag() and result = "InitNonLocal"
+ or
+ tag = AliasedUseTag() and result = "AliasedUse"
+ or
+ tag = SwitchBranchTag() and result = "SwitchBranch"
+ or
+ tag = CallTargetTag() and result = "CallTarget"
+ or
+ tag = CallTag() and result = "Call"
+ or
+ tag = CallSideEffectTag() and result = "CallSideEffect"
+ or
+ tag = AllocationSizeTag() and result = "AllocSize"
+ or
+ tag = AllocationElementSizeTag() and result = "AllocElemSize"
+ or
+ tag = AllocationExtentConvertTag() and result = "AllocExtConv"
+ or
+ tag = ValueConditionConditionalBranchTag() and result = "ValCondCondBranch"
+ or
+ tag = ConditionValueTrueTempAddressTag() and result = "CondValTrueTempAddr"
+ or
+ tag = ConditionValueTrueConstantTag() and result = "CondValTrueConst"
+ or
+ tag = ConditionValueTrueStoreTag() and result = "CondValTrueStore"
+ or
+ tag = ConditionValueFalseTempAddressTag() and result = "CondValFalseTempAddr"
+ or
+ tag = ConditionValueFalseConstantTag() and result = "CondValFalseConst"
+ or
+ tag = ConditionValueFalseStoreTag() and result = "CondValFalseStore"
+ or
+ tag = ConditionValueResultTempAddressTag() and result = "CondValResTempAddr"
+ or
+ tag = ConditionValueResultLoadTag() and result = "CondValResLoad"
+ or
+ tag = BoolConversionConstantTag() and result = "BoolConvConst"
+ or
+ tag = BoolConversionCompareTag() and result = "BoolConvComp"
+ or
+ tag = LoadTag() and result = "Load" // Implicit load due to lvalue-to-rvalue conversion
+ or
+ tag = CatchTag() and result = "Catch"
+ or
+ tag = ThrowTag() and result = "Throw"
+ or
+ tag = UnwindTag() and result = "Unwind"
+ or
+ tag = InitializerFieldAddressTag() and result = "InitFieldAddr"
+ or
+ tag = InitializerFieldDefaultValueTag() and result = "InitFieldDefVal"
+ or
+ tag = InitializerFieldDefaultValueStoreTag() and result = "InitFieldDefValStore"
+ or
+ tag = InitializerElementIndexTag() and result = "InitElemIndex"
+ or
+ tag = InitializerElementAddressTag() and result = "InitElemAddr"
+ or
+ tag = InitializerElementDefaultValueTag() and result = "InitElemDefVal"
+ or
+ tag = InitializerElementDefaultValueStoreTag() and result = "InitElemDefValStore"
+ or
+ tag = VarArgsStartEllipsisAddressTag() and result = "VarArgsStartEllipsisAddr"
+ or
+ tag = VarArgsStartTag() and result = "VarArgsStart"
+ or
+ tag = VarArgsVAListLoadTag() and result = "VarArgsVAListLoad"
+ or
+ tag = VarArgsArgAddressTag() and result = "VarArgsArgAddr"
+ or
+ tag = VarArgsArgLoadTag() and result = "VaArgsArgLoad"
+ or
+ tag = VarArgsMoveNextTag() and result = "VarArgsMoveNext"
+ or
+ tag = VarArgsVAListStoreTag() and result = "VarArgsVAListStore"
+ or
+ tag = AsmTag() and result = "Asm"
+ or
+ exists(int index | tag = AsmInputTag(index) and result = "AsmInputTag(" + index + ")")
+ or
+ tag = DynamicInitializationFlagAddressTag() and result = "DynInitFlagAddr"
+ or
+ tag = DynamicInitializationFlagLoadTag() and result = "DynInitFlagLoad"
+ or
+ tag = DynamicInitializationConditionalBranchTag() and result = "DynInitCondBranch"
+ or
+ tag = DynamicInitializationFlagConstantTag() and result = "DynInitFlagConst"
+ or
+ tag = DynamicInitializationFlagStoreTag() and result = "DynInitFlagStore"
+ or
+ tag = ThisAddressTag() and result = "ThisAddress"
+ or
+ tag = ThisLoadTag() and result = "ThisLoad"
+}
diff --git a/repo-tests/codeql/cpp/ql/lib/semmle/code/cpp/ir/implementation/raw/internal/OperandImports.qll b/repo-tests/codeql/cpp/ql/lib/semmle/code/cpp/ir/implementation/raw/internal/OperandImports.qll
new file mode 100644
index 00000000000..d0e013d1fba
--- /dev/null
+++ b/repo-tests/codeql/cpp/ql/lib/semmle/code/cpp/ir/implementation/raw/internal/OperandImports.qll
@@ -0,0 +1,5 @@
+import semmle.code.cpp.ir.implementation.MemoryAccessKind as MemoryAccessKind
+import semmle.code.cpp.ir.implementation.IRType as IRType
+import semmle.code.cpp.ir.internal.Overlap as Overlap
+import semmle.code.cpp.ir.implementation.internal.OperandTag as OperandTag
+import semmle.code.cpp.ir.implementation.internal.TOperand as TOperand
diff --git a/repo-tests/codeql/cpp/ql/lib/semmle/code/cpp/ir/implementation/raw/internal/OperandInternal.qll b/repo-tests/codeql/cpp/ql/lib/semmle/code/cpp/ir/implementation/raw/internal/OperandInternal.qll
new file mode 100644
index 00000000000..194e21e0d93
--- /dev/null
+++ b/repo-tests/codeql/cpp/ql/lib/semmle/code/cpp/ir/implementation/raw/internal/OperandInternal.qll
@@ -0,0 +1,2 @@
+private import semmle.code.cpp.ir.implementation.internal.TOperand
+import RawOperands
diff --git a/repo-tests/codeql/cpp/ql/lib/semmle/code/cpp/ir/implementation/raw/internal/PrintIRImports.qll b/repo-tests/codeql/cpp/ql/lib/semmle/code/cpp/ir/implementation/raw/internal/PrintIRImports.qll
new file mode 100644
index 00000000000..46254a6e3f2
--- /dev/null
+++ b/repo-tests/codeql/cpp/ql/lib/semmle/code/cpp/ir/implementation/raw/internal/PrintIRImports.qll
@@ -0,0 +1 @@
+import semmle.code.cpp.ir.IRConfiguration as IRConfiguration
diff --git a/repo-tests/codeql/cpp/ql/lib/semmle/code/cpp/ir/implementation/raw/internal/SideEffects.qll b/repo-tests/codeql/cpp/ql/lib/semmle/code/cpp/ir/implementation/raw/internal/SideEffects.qll
new file mode 100644
index 00000000000..50245fafde2
--- /dev/null
+++ b/repo-tests/codeql/cpp/ql/lib/semmle/code/cpp/ir/implementation/raw/internal/SideEffects.qll
@@ -0,0 +1,169 @@
+/**
+ * Predicates to compute the modeled side effects of calls during IR construction.
+ *
+ * These are used in `TranslatedElement.qll` to generate the `TTranslatedSideEffect` instances, and
+ * also in `TranslatedCall.qll` to inject the actual side effect instructions.
+ */
+
+private import cpp
+private import semmle.code.cpp.ir.implementation.Opcode
+private import semmle.code.cpp.models.interfaces.PointerWrapper
+private import semmle.code.cpp.models.interfaces.SideEffect
+
+private predicate isDeeplyConst(Type t) {
+ t.isConst() and
+ isDeeplyConstBelow(t)
+ or
+ isDeeplyConst(t.(Decltype).getBaseType())
+ or
+ isDeeplyConst(t.(ReferenceType).getBaseType())
+ or
+ exists(SpecifiedType specType | specType = t |
+ specType.getASpecifier().getName() = "const" and
+ isDeeplyConstBelow(specType.getBaseType())
+ )
+ or
+ isDeeplyConst(t.(ArrayType).getBaseType())
+}
+
+private predicate isDeeplyConstBelow(Type t) {
+ t instanceof BuiltInType
+ or
+ not t instanceof PointerWrapper and
+ t instanceof Class
+ or
+ t instanceof Enum
+ or
+ isDeeplyConstBelow(t.(Decltype).getBaseType())
+ or
+ isDeeplyConst(t.(PointerType).getBaseType())
+ or
+ isDeeplyConst(t.(ReferenceType).getBaseType())
+ or
+ isDeeplyConstBelow(t.(SpecifiedType).getBaseType())
+ or
+ isDeeplyConst(t.(ArrayType).getBaseType())
+ or
+ isDeeplyConst(t.(GNUVectorType).getBaseType())
+ or
+ isDeeplyConst(t.(FunctionPointerIshType).getBaseType())
+ or
+ isDeeplyConst(t.(PointerWrapper).getTemplateArgument(0))
+ or
+ isDeeplyConst(t.(PointerToMemberType).getBaseType())
+ or
+ isDeeplyConstBelow(t.(TypedefType).getBaseType())
+}
+
+private predicate isConstPointerLike(Type t) {
+ (
+ t instanceof PointerWrapper
+ or
+ t instanceof PointerType
+ or
+ t instanceof ArrayType
+ or
+ t instanceof ReferenceType
+ ) and
+ isDeeplyConstBelow(t)
+}
+
+/**
+ * Holds if the specified call has a side effect that does not come from a `SideEffectFunction`
+ * model.
+ */
+private predicate hasDefaultSideEffect(Call call, ParameterIndex i, boolean buffer, boolean isWrite) {
+ not call.getTarget() instanceof SideEffectFunction and
+ (
+ exists(MemberFunction mfunc |
+ // A non-static member function, including a constructor or destructor, may write to `*this`,
+ // and may also read from `*this` if it is not a constructor.
+ i = -1 and
+ mfunc = call.getTarget() and
+ not mfunc.isStatic() and
+ buffer = false and
+ (
+ isWrite = false and not mfunc instanceof Constructor
+ or
+ isWrite = true and not mfunc instanceof ConstMemberFunction
+ )
+ )
+ or
+ exists(Expr expr |
+ // A pointer-like argument is assumed to read from the pointed-to buffer, and may write to the
+ // buffer as well unless the pointer points to a `const` value.
+ i >= 0 and
+ buffer = true and
+ expr = call.getArgument(i).getFullyConverted() and
+ exists(Type t | t = expr.getUnspecifiedType() |
+ t instanceof ArrayType or
+ t instanceof PointerType or
+ t instanceof ReferenceType or
+ t instanceof PointerWrapper
+ ) and
+ (
+ isWrite = true and
+ not isConstPointerLike(call.getTarget().getParameter(i).getUnderlyingType())
+ or
+ isWrite = false
+ )
+ )
+ )
+}
+
+/**
+ * Returns a side effect opcode for parameter index `i` of the specified call.
+ *
+ * This predicate will return at most two results: one read side effect, and one write side effect.
+ */
+Opcode getASideEffectOpcode(Call call, ParameterIndex i) {
+ exists(boolean buffer |
+ (
+ call.getTarget().(SideEffectFunction).hasSpecificReadSideEffect(i, buffer)
+ or
+ not call.getTarget() instanceof SideEffectFunction and
+ hasDefaultSideEffect(call, i, buffer, false)
+ ) and
+ if exists(call.getTarget().(SideEffectFunction).getParameterSizeIndex(i))
+ then (
+ buffer = true and
+ result instanceof Opcode::SizedBufferReadSideEffect
+ ) else (
+ buffer = false and result instanceof Opcode::IndirectReadSideEffect
+ or
+ buffer = true and result instanceof Opcode::BufferReadSideEffect
+ )
+ )
+ or
+ exists(boolean buffer, boolean mustWrite |
+ (
+ call.getTarget().(SideEffectFunction).hasSpecificWriteSideEffect(i, buffer, mustWrite)
+ or
+ not call.getTarget() instanceof SideEffectFunction and
+ hasDefaultSideEffect(call, i, buffer, true) and
+ mustWrite = false
+ ) and
+ if exists(call.getTarget().(SideEffectFunction).getParameterSizeIndex(i))
+ then (
+ buffer = true and
+ mustWrite = false and
+ result instanceof Opcode::SizedBufferMayWriteSideEffect
+ or
+ buffer = true and
+ mustWrite = true and
+ result instanceof Opcode::SizedBufferMustWriteSideEffect
+ ) else (
+ buffer = false and
+ mustWrite = false and
+ result instanceof Opcode::IndirectMayWriteSideEffect
+ or
+ buffer = false and
+ mustWrite = true and
+ result instanceof Opcode::IndirectMustWriteSideEffect
+ or
+ buffer = true and mustWrite = false and result instanceof Opcode::BufferMayWriteSideEffect
+ or
+ buffer = true and mustWrite = true and result instanceof Opcode::BufferMustWriteSideEffect
+ )
+ )
+}
diff --git a/repo-tests/codeql/cpp/ql/lib/semmle/code/cpp/ir/implementation/raw/internal/TranslatedCall.qll b/repo-tests/codeql/cpp/ql/lib/semmle/code/cpp/ir/implementation/raw/internal/TranslatedCall.qll
new file mode 100644
index 00000000000..56d4c807ac8
--- /dev/null
+++ b/repo-tests/codeql/cpp/ql/lib/semmle/code/cpp/ir/implementation/raw/internal/TranslatedCall.qll
@@ -0,0 +1,587 @@
+private import cpp
+private import semmle.code.cpp.ir.implementation.Opcode
+private import semmle.code.cpp.ir.implementation.internal.OperandTag
+private import semmle.code.cpp.ir.internal.CppType
+private import semmle.code.cpp.models.interfaces.SideEffect
+private import InstructionTag
+private import SideEffects
+private import TranslatedElement
+private import TranslatedExpr
+private import TranslatedFunction
+
+/**
+ * Gets the `CallInstruction` from the `TranslatedCallExpr` for the specified expression.
+ */
+private CallInstruction getTranslatedCallInstruction(Call call) {
+ exists(TranslatedCallExpr translatedCall |
+ translatedCall.getExpr() = call and
+ result = translatedCall.getInstruction(CallTag())
+ )
+}
+
+/**
+ * The IR translation of a call to a function. The call may be from an actual
+ * call in the source code, or could be a call that is part of the translation
+ * of a higher-level constructor (e.g. the allocator call in a `NewExpr`).
+ */
+abstract class TranslatedCall extends TranslatedExpr {
+ final override TranslatedElement getChild(int id) {
+ // We choose the child's id in the order of evaluation.
+ // The qualifier is evaluated before the call target, because the value of
+ // the call target may depend on the value of the qualifier for virtual
+ // calls.
+ id = -2 and result = getQualifier()
+ or
+ id = -1 and result = getCallTarget()
+ or
+ result = getArgument(id)
+ or
+ id = getNumberOfArguments() and result = getSideEffects()
+ }
+
+ final override Instruction getFirstInstruction() {
+ if exists(getQualifier())
+ then result = getQualifier().getFirstInstruction()
+ else result = getFirstCallTargetInstruction()
+ }
+
+ override predicate hasInstruction(Opcode opcode, InstructionTag tag, CppType resultType) {
+ tag = CallTag() and
+ opcode instanceof Opcode::Call and
+ resultType = getTypeForPRValue(getCallResultType())
+ or
+ hasSideEffect() and
+ tag = CallSideEffectTag() and
+ (
+ if hasWriteSideEffect()
+ then (
+ opcode instanceof Opcode::CallSideEffect and
+ resultType = getUnknownType()
+ ) else (
+ opcode instanceof Opcode::CallReadSideEffect and
+ resultType = getVoidType()
+ )
+ )
+ }
+
+ override Instruction getChildSuccessor(TranslatedElement child) {
+ child = getQualifier() and
+ result = getFirstCallTargetInstruction()
+ or
+ child = getCallTarget() and
+ result = getFirstArgumentOrCallInstruction()
+ or
+ exists(int argIndex |
+ child = getArgument(argIndex) and
+ if exists(getArgument(argIndex + 1))
+ then result = getArgument(argIndex + 1).getFirstInstruction()
+ else result = getInstruction(CallTag())
+ )
+ or
+ child = getSideEffects() and
+ result = getParent().getChildSuccessor(this)
+ }
+
+ override Instruction getInstructionSuccessor(InstructionTag tag, EdgeKind kind) {
+ kind instanceof GotoEdge and
+ (
+ (
+ tag = CallTag() and
+ if hasSideEffect()
+ then result = getInstruction(CallSideEffectTag())
+ else
+ if hasPreciseSideEffect()
+ then result = getSideEffects().getFirstInstruction()
+ else result = getParent().getChildSuccessor(this)
+ )
+ or
+ (
+ hasSideEffect() and
+ tag = CallSideEffectTag() and
+ if hasPreciseSideEffect()
+ then result = getSideEffects().getFirstInstruction()
+ else result = getParent().getChildSuccessor(this)
+ )
+ )
+ }
+
+ override Instruction getInstructionRegisterOperand(InstructionTag tag, OperandTag operandTag) {
+ tag = CallTag() and
+ (
+ operandTag instanceof CallTargetOperandTag and
+ result = getCallTargetResult()
+ or
+ operandTag instanceof ThisArgumentOperandTag and
+ result = getQualifierResult()
+ or
+ exists(PositionalArgumentOperandTag argTag |
+ argTag = operandTag and
+ result = getArgument(argTag.getArgIndex()).getResult()
+ )
+ )
+ }
+
+ final override CppType getInstructionMemoryOperandType(
+ InstructionTag tag, TypedOperandTag operandTag
+ ) {
+ tag = CallSideEffectTag() and
+ hasSideEffect() and
+ operandTag instanceof SideEffectOperandTag and
+ result = getUnknownType()
+ }
+
+ final override Instruction getResult() { result = getInstruction(CallTag()) }
+
+ /**
+ * Gets the result type of the call.
+ */
+ abstract Type getCallResultType();
+
+ /**
+ * Holds if the call has a `this` argument.
+ */
+ predicate hasQualifier() { exists(getQualifier()) }
+
+ /**
+ * Gets the `TranslatedExpr` for the indirect target of the call, if any.
+ */
+ TranslatedExpr getCallTarget() { none() }
+
+ /**
+ * Gets the first instruction of the sequence to evaluate the call target.
+ * By default, this is just the first instruction of `getCallTarget()`, but
+ * it can be overridden by a subclass for cases where there is a call target
+ * that is not computed from an expression (e.g. a direct call).
+ */
+ Instruction getFirstCallTargetInstruction() { result = getCallTarget().getFirstInstruction() }
+
+ /**
+ * Gets the instruction whose result value is the target of the call. By
+ * default, this is just the result of `getCallTarget()`, but it can be
+ * overridden by a subclass for cases where there is a call target that is not
+ * computed from an expression (e.g. a direct call).
+ */
+ Instruction getCallTargetResult() { result = getCallTarget().getResult() }
+
+ /**
+ * Gets the `TranslatedExpr` for the qualifier of the call (i.e. the value
+ * that is passed as the `this` argument.
+ */
+ abstract TranslatedExpr getQualifier();
+
+ /**
+ * Gets the instruction whose result value is the `this` argument of the call.
+ * By default, this is just the result of `getQualifier()`, but it can be
+ * overridden by a subclass for cases where there is a `this` argument that is
+ * not computed from a child expression (e.g. a constructor call).
+ */
+ Instruction getQualifierResult() { result = getQualifier().getResult() }
+
+ /**
+ * Gets the argument with the specified `index`. Does not include the `this`
+ * argument.
+ */
+ abstract TranslatedExpr getArgument(int index);
+
+ abstract int getNumberOfArguments();
+
+ /**
+ * If there are any arguments, gets the first instruction of the first
+ * argument. Otherwise, returns the call instruction.
+ */
+ final Instruction getFirstArgumentOrCallInstruction() {
+ if hasArguments()
+ then result = getArgument(0).getFirstInstruction()
+ else result = getInstruction(CallTag())
+ }
+
+ /**
+ * Holds if the call has any arguments, not counting the `this` argument.
+ */
+ abstract predicate hasArguments();
+
+ predicate hasReadSideEffect() { any() }
+
+ predicate hasWriteSideEffect() { any() }
+
+ private predicate hasSideEffect() { hasReadSideEffect() or hasWriteSideEffect() }
+
+ override Instruction getPrimaryInstructionForSideEffect(InstructionTag tag) {
+ hasSideEffect() and
+ tag = CallSideEffectTag() and
+ result = getResult()
+ }
+
+ predicate hasPreciseSideEffect() { exists(getSideEffects()) }
+
+ final TranslatedSideEffects getSideEffects() { result.getExpr() = expr }
+}
+
+abstract class TranslatedSideEffects extends TranslatedElement {
+ abstract Expr getExpr();
+
+ final override Locatable getAST() { result = getExpr() }
+
+ final override Function getFunction() { result = getExpr().getEnclosingFunction() }
+
+ override TranslatedElement getChild(int i) {
+ result =
+ rank[i + 1](TranslatedSideEffect tse, int isWrite, int index |
+ (
+ tse.getCall() = getExpr() and
+ tse.getArgumentIndex() = index and
+ if tse.isWrite() then isWrite = 1 else isWrite = 0
+ )
+ |
+ tse order by isWrite, index
+ )
+ }
+
+ final override Instruction getChildSuccessor(TranslatedElement te) {
+ exists(int i |
+ getChild(i) = te and
+ if exists(getChild(i + 1))
+ then result = getChild(i + 1).getFirstInstruction()
+ else result = getParent().getChildSuccessor(this)
+ )
+ }
+
+ /**
+ * Gets the `TranslatedFunction` containing this expression.
+ */
+ final TranslatedFunction getEnclosingFunction() {
+ result = getTranslatedFunction(getExpr().getEnclosingFunction())
+ }
+}
+
+/**
+ * IR translation of a direct call to a specific function. Used for both
+ * explicit calls (`TranslatedFunctionCall`) and implicit calls
+ * (`TranslatedAllocatorCall`).
+ */
+abstract class TranslatedDirectCall extends TranslatedCall {
+ final override Instruction getFirstCallTargetInstruction() {
+ result = getInstruction(CallTargetTag())
+ }
+
+ final override Instruction getCallTargetResult() { result = getInstruction(CallTargetTag()) }
+
+ override predicate hasInstruction(Opcode opcode, InstructionTag tag, CppType resultType) {
+ TranslatedCall.super.hasInstruction(opcode, tag, resultType)
+ or
+ tag = CallTargetTag() and
+ opcode instanceof Opcode::FunctionAddress and
+ resultType = getFunctionGLValueType()
+ }
+
+ override Instruction getInstructionSuccessor(InstructionTag tag, EdgeKind kind) {
+ result = TranslatedCall.super.getInstructionSuccessor(tag, kind)
+ or
+ tag = CallTargetTag() and
+ kind instanceof GotoEdge and
+ result = getFirstArgumentOrCallInstruction()
+ }
+}
+
+/**
+ * The IR translation of a call to a function.
+ */
+abstract class TranslatedCallExpr extends TranslatedNonConstantExpr, TranslatedCall {
+ override Call expr;
+
+ final override Type getCallResultType() { result = expr.getType() }
+
+ final override predicate hasArguments() { exists(expr.getArgument(0)) }
+
+ final override TranslatedExpr getQualifier() {
+ result = getTranslatedExpr(expr.getQualifier().getFullyConverted())
+ }
+
+ final override TranslatedExpr getArgument(int index) {
+ result = getTranslatedExpr(expr.getArgument(index).getFullyConverted())
+ }
+
+ final override int getNumberOfArguments() { result = expr.getNumberOfArguments() }
+}
+
+/**
+ * Represents the IR translation of a call through a function pointer.
+ */
+class TranslatedExprCall extends TranslatedCallExpr {
+ override ExprCall expr;
+
+ override TranslatedExpr getCallTarget() {
+ result = getTranslatedExpr(expr.getExpr().getFullyConverted())
+ }
+}
+
+/**
+ * Represents the IR translation of a direct function call.
+ */
+class TranslatedFunctionCall extends TranslatedCallExpr, TranslatedDirectCall {
+ override FunctionCall expr;
+
+ override Function getInstructionFunction(InstructionTag tag) {
+ tag = CallTargetTag() and result = expr.getTarget()
+ }
+
+ override predicate hasReadSideEffect() {
+ not expr.getTarget().(SideEffectFunction).hasOnlySpecificReadSideEffects()
+ }
+
+ override predicate hasWriteSideEffect() {
+ not expr.getTarget().(SideEffectFunction).hasOnlySpecificWriteSideEffects()
+ }
+
+ override Instruction getQualifierResult() {
+ hasQualifier() and
+ result = getQualifier().getResult()
+ }
+
+ override predicate hasQualifier() {
+ exists(getQualifier()) and
+ not exists(MemberFunction func | expr.getTarget() = func and func.isStatic())
+ }
+}
+
+/**
+ * Represents the IR translation of a call to a constructor.
+ */
+class TranslatedStructorCall extends TranslatedFunctionCall {
+ TranslatedStructorCall() {
+ expr instanceof ConstructorCall or
+ expr instanceof DestructorCall
+ }
+
+ override Instruction getQualifierResult() {
+ exists(StructorCallContext context |
+ context = getParent() and
+ result = context.getReceiver()
+ )
+ }
+
+ override predicate hasQualifier() { any() }
+}
+
+class TranslatedAllocationSideEffects extends TranslatedSideEffects,
+ TTranslatedAllocationSideEffects {
+ AllocationExpr expr;
+
+ TranslatedAllocationSideEffects() { this = TTranslatedAllocationSideEffects(expr) }
+
+ final override AllocationExpr getExpr() { result = expr }
+
+ override string toString() { result = "(allocation side effects for " + expr.toString() + ")" }
+
+ override Instruction getFirstInstruction() { result = getInstruction(OnlyInstructionTag()) }
+
+ override predicate hasInstruction(Opcode opcode, InstructionTag tag, CppType type) {
+ opcode instanceof Opcode::InitializeDynamicAllocation and
+ tag = OnlyInstructionTag() and
+ type = getUnknownType()
+ }
+
+ override Instruction getInstructionSuccessor(InstructionTag tag, EdgeKind kind) {
+ tag = OnlyInstructionTag() and
+ kind = EdgeKind::gotoEdge() and
+ if exists(getChild(0))
+ then result = getChild(0).getFirstInstruction()
+ else result = getParent().getChildSuccessor(this)
+ }
+
+ override Instruction getInstructionRegisterOperand(InstructionTag tag, OperandTag operandTag) {
+ tag = OnlyInstructionTag() and
+ operandTag = addressOperand() and
+ result = getPrimaryInstructionForSideEffect(OnlyInstructionTag())
+ }
+
+ override Instruction getPrimaryInstructionForSideEffect(InstructionTag tag) {
+ tag = OnlyInstructionTag() and
+ if expr instanceof NewOrNewArrayExpr
+ then result = getTranslatedAllocatorCall(expr).getInstruction(CallTag())
+ else result = getTranslatedCallInstruction(expr)
+ }
+}
+
+class TranslatedCallSideEffects extends TranslatedSideEffects, TTranslatedCallSideEffects {
+ Call expr;
+
+ TranslatedCallSideEffects() { this = TTranslatedCallSideEffects(expr) }
+
+ override string toString() { result = "(side effects for " + expr.toString() + ")" }
+
+ override Call getExpr() { result = expr }
+
+ override predicate hasInstruction(Opcode opcode, InstructionTag tag, CppType type) { none() }
+
+ override Instruction getFirstInstruction() { result = getChild(0).getFirstInstruction() }
+
+ override Instruction getInstructionSuccessor(InstructionTag tag, EdgeKind kind) { none() }
+
+ override Instruction getPrimaryInstructionForSideEffect(InstructionTag tag) {
+ tag = OnlyInstructionTag() and
+ result = getTranslatedCallInstruction(expr)
+ }
+}
+
+class TranslatedStructorCallSideEffects extends TranslatedCallSideEffects {
+ TranslatedStructorCallSideEffects() {
+ getParent().(TranslatedStructorCall).hasQualifier() and
+ getASideEffectOpcode(expr, -1) instanceof WriteSideEffectOpcode
+ }
+
+ override predicate hasInstruction(Opcode opcode, InstructionTag tag, CppType t) {
+ tag instanceof OnlyInstructionTag and
+ t = getTypeForPRValue(expr.getTarget().getDeclaringType()) and
+ opcode = getASideEffectOpcode(expr, -1).(WriteSideEffectOpcode)
+ }
+
+ override Instruction getInstructionSuccessor(InstructionTag tag, EdgeKind kind) {
+ (
+ if exists(getChild(0))
+ then result = getChild(0).getFirstInstruction()
+ else result = getParent().getChildSuccessor(this)
+ ) and
+ tag = OnlyInstructionTag() and
+ kind instanceof GotoEdge
+ }
+
+ override Instruction getFirstInstruction() { result = getInstruction(OnlyInstructionTag()) }
+
+ override Instruction getInstructionRegisterOperand(InstructionTag tag, OperandTag operandTag) {
+ tag instanceof OnlyInstructionTag and
+ operandTag instanceof AddressOperandTag and
+ result = getParent().(TranslatedStructorCall).getQualifierResult()
+ }
+
+ final override int getInstructionIndex(InstructionTag tag) {
+ tag = OnlyInstructionTag() and
+ result = -1
+ }
+}
+
+class TranslatedSideEffect extends TranslatedElement, TTranslatedArgumentSideEffect {
+ Call call;
+ Expr arg;
+ int index;
+ SideEffectOpcode sideEffectOpcode;
+
+ TranslatedSideEffect() {
+ this = TTranslatedArgumentSideEffect(call, arg, index, sideEffectOpcode)
+ }
+
+ override Locatable getAST() { result = arg }
+
+ Expr getExpr() { result = arg }
+
+ Call getCall() { result = call }
+
+ int getArgumentIndex() { result = index }
+
+ predicate isWrite() { sideEffectOpcode instanceof WriteSideEffectOpcode }
+
+ override string toString() {
+ isWrite() and
+ result = "(write side effect for " + arg.toString() + ")"
+ or
+ not isWrite() and
+ result = "(read side effect for " + arg.toString() + ")"
+ }
+
+ override TranslatedElement getChild(int n) { none() }
+
+ override Instruction getChildSuccessor(TranslatedElement child) { none() }
+
+ override Instruction getFirstInstruction() { result = getInstruction(OnlyInstructionTag()) }
+
+ override predicate hasInstruction(Opcode opcode, InstructionTag tag, CppType type) {
+ tag = OnlyInstructionTag() and
+ opcode = sideEffectOpcode and
+ (
+ isWrite() and
+ (
+ opcode instanceof BufferAccessOpcode and
+ type = getUnknownType()
+ or
+ not opcode instanceof BufferAccessOpcode and
+ exists(Type baseType | baseType = arg.getUnspecifiedType().(DerivedType).getBaseType() |
+ if baseType instanceof VoidType
+ then type = getUnknownType()
+ else type = getTypeForPRValueOrUnknown(baseType)
+ )
+ or
+ index = -1 and
+ not arg.getUnspecifiedType() instanceof DerivedType and
+ type = getTypeForPRValueOrUnknown(arg.getUnspecifiedType())
+ )
+ or
+ not isWrite() and
+ type = getVoidType()
+ )
+ }
+
+ override Instruction getInstructionSuccessor(InstructionTag tag, EdgeKind kind) {
+ result = getParent().getChildSuccessor(this) and
+ tag = OnlyInstructionTag() and
+ kind instanceof GotoEdge
+ }
+
+ override Instruction getInstructionRegisterOperand(InstructionTag tag, OperandTag operandTag) {
+ tag instanceof OnlyInstructionTag and
+ operandTag instanceof AddressOperandTag and
+ result = getTranslatedExpr(arg).getResult()
+ or
+ tag instanceof OnlyInstructionTag and
+ operandTag instanceof BufferSizeOperandTag and
+ result =
+ getTranslatedExpr(call.getArgument(call.getTarget()
+ .(SideEffectFunction)
+ .getParameterSizeIndex(index)).getFullyConverted()).getResult()
+ }
+
+ override CppType getInstructionMemoryOperandType(InstructionTag tag, TypedOperandTag operandTag) {
+ not isWrite() and
+ if sideEffectOpcode instanceof BufferAccessOpcode
+ then
+ result = getUnknownType() and
+ tag instanceof OnlyInstructionTag and
+ operandTag instanceof SideEffectOperandTag
+ else
+ exists(Type operandType |
+ tag instanceof OnlyInstructionTag and
+ operandType = arg.getType().getUnspecifiedType().(DerivedType).getBaseType() and
+ operandTag instanceof SideEffectOperandTag
+ or
+ tag instanceof OnlyInstructionTag and
+ operandType = arg.getType().getUnspecifiedType() and
+ not operandType instanceof DerivedType and
+ operandTag instanceof SideEffectOperandTag
+ |
+ // If the type we select is an incomplete type (e.g. a forward-declared `struct`), there will
+ // not be a `CppType` that represents that type. In that case, fall back to `UnknownCppType`.
+ result = getTypeForPRValueOrUnknown(operandType)
+ )
+ }
+
+ override Instruction getPrimaryInstructionForSideEffect(InstructionTag tag) {
+ tag = OnlyInstructionTag() and
+ result = getTranslatedCallInstruction(call)
+ }
+
+ final override int getInstructionIndex(InstructionTag tag) {
+ tag = OnlyInstructionTag() and
+ result = index
+ }
+
+ /**
+ * Gets the `TranslatedFunction` containing this expression.
+ */
+ final TranslatedFunction getEnclosingFunction() {
+ result = getTranslatedFunction(arg.getEnclosingFunction())
+ }
+
+ /**
+ * Gets the `Function` containing this expression.
+ */
+ override Function getFunction() { result = arg.getEnclosingFunction() }
+}
diff --git a/repo-tests/codeql/cpp/ql/lib/semmle/code/cpp/ir/implementation/raw/internal/TranslatedCondition.qll b/repo-tests/codeql/cpp/ql/lib/semmle/code/cpp/ir/implementation/raw/internal/TranslatedCondition.qll
new file mode 100644
index 00000000000..0779d6fbda5
--- /dev/null
+++ b/repo-tests/codeql/cpp/ql/lib/semmle/code/cpp/ir/implementation/raw/internal/TranslatedCondition.qll
@@ -0,0 +1,192 @@
+private import cpp
+private import semmle.code.cpp.ir.implementation.Opcode
+private import semmle.code.cpp.ir.implementation.internal.OperandTag
+private import semmle.code.cpp.ir.internal.CppType
+private import InstructionTag
+private import TranslatedElement
+private import TranslatedExpr
+
+abstract class ConditionContext extends TranslatedElement {
+ abstract Instruction getChildTrueSuccessor(TranslatedCondition child);
+
+ abstract Instruction getChildFalseSuccessor(TranslatedCondition child);
+}
+
+TranslatedCondition getTranslatedCondition(Expr expr) { result.getExpr() = expr }
+
+abstract class TranslatedCondition extends TranslatedElement {
+ Expr expr;
+
+ final override string toString() { result = expr.toString() }
+
+ final override Locatable getAST() { result = expr }
+
+ final ConditionContext getConditionContext() { result = getParent() }
+
+ final Expr getExpr() { result = expr }
+
+ final override Function getFunction() { result = expr.getEnclosingFunction() }
+
+ final Type getResultType() { result = expr.getUnspecifiedType() }
+}
+
+abstract class TranslatedFlexibleCondition extends TranslatedCondition, ConditionContext,
+ TTranslatedFlexibleCondition {
+ TranslatedFlexibleCondition() { this = TTranslatedFlexibleCondition(expr) }
+
+ final override TranslatedElement getChild(int id) { id = 0 and result = getOperand() }
+
+ final override Instruction getFirstInstruction() { result = getOperand().getFirstInstruction() }
+
+ final override predicate hasInstruction(Opcode opcode, InstructionTag tag, CppType resultType) {
+ none()
+ }
+
+ final override Instruction getInstructionSuccessor(InstructionTag tag, EdgeKind kind) { none() }
+
+ final override Instruction getChildSuccessor(TranslatedElement child) { none() }
+
+ abstract TranslatedCondition getOperand();
+}
+
+class TranslatedParenthesisCondition extends TranslatedFlexibleCondition {
+ override ParenthesisExpr expr;
+
+ final override Instruction getChildTrueSuccessor(TranslatedCondition child) {
+ child = getOperand() and
+ result = getConditionContext().getChildTrueSuccessor(this)
+ }
+
+ final override Instruction getChildFalseSuccessor(TranslatedCondition child) {
+ child = getOperand() and
+ result = getConditionContext().getChildFalseSuccessor(this)
+ }
+
+ final override TranslatedCondition getOperand() {
+ result = getTranslatedCondition(expr.getExpr())
+ }
+}
+
+class TranslatedNotCondition extends TranslatedFlexibleCondition {
+ override NotExpr expr;
+
+ override Instruction getChildTrueSuccessor(TranslatedCondition child) {
+ child = getOperand() and
+ result = getConditionContext().getChildFalseSuccessor(this)
+ }
+
+ override Instruction getChildFalseSuccessor(TranslatedCondition child) {
+ child = getOperand() and
+ result = getConditionContext().getChildTrueSuccessor(this)
+ }
+
+ override TranslatedCondition getOperand() {
+ result = getTranslatedCondition(expr.getOperand().getFullyConverted())
+ }
+}
+
+abstract class TranslatedNativeCondition extends TranslatedCondition, TTranslatedNativeCondition {
+ TranslatedNativeCondition() { this = TTranslatedNativeCondition(expr) }
+
+ final override Instruction getChildSuccessor(TranslatedElement child) { none() }
+}
+
+abstract class TranslatedBinaryLogicalOperation extends TranslatedNativeCondition, ConditionContext {
+ override BinaryLogicalOperation expr;
+
+ final override TranslatedElement getChild(int id) {
+ id = 0 and result = getLeftOperand()
+ or
+ id = 1 and result = getRightOperand()
+ }
+
+ final override Instruction getFirstInstruction() {
+ result = getLeftOperand().getFirstInstruction()
+ }
+
+ final override predicate hasInstruction(Opcode opcode, InstructionTag tag, CppType resultType) {
+ none()
+ }
+
+ final override Instruction getInstructionSuccessor(InstructionTag tag, EdgeKind kind) { none() }
+
+ final TranslatedCondition getLeftOperand() {
+ result = getTranslatedCondition(expr.getLeftOperand().getFullyConverted())
+ }
+
+ final TranslatedCondition getRightOperand() {
+ result = getTranslatedCondition(expr.getRightOperand().getFullyConverted())
+ }
+}
+
+class TranslatedLogicalAndExpr extends TranslatedBinaryLogicalOperation {
+ TranslatedLogicalAndExpr() { expr instanceof LogicalAndExpr }
+
+ override Instruction getChildTrueSuccessor(TranslatedCondition child) {
+ child = getLeftOperand() and
+ result = getRightOperand().getFirstInstruction()
+ or
+ child = getRightOperand() and
+ result = getConditionContext().getChildTrueSuccessor(this)
+ }
+
+ override Instruction getChildFalseSuccessor(TranslatedCondition child) {
+ (child = getLeftOperand() or child = getRightOperand()) and
+ result = getConditionContext().getChildFalseSuccessor(this)
+ }
+}
+
+class TranslatedLogicalOrExpr extends TranslatedBinaryLogicalOperation {
+ override LogicalOrExpr expr;
+
+ override Instruction getChildTrueSuccessor(TranslatedCondition child) {
+ (child = getLeftOperand() or child = getRightOperand()) and
+ result = getConditionContext().getChildTrueSuccessor(this)
+ }
+
+ override Instruction getChildFalseSuccessor(TranslatedCondition child) {
+ child = getLeftOperand() and
+ result = getRightOperand().getFirstInstruction()
+ or
+ child = getRightOperand() and
+ result = getConditionContext().getChildFalseSuccessor(this)
+ }
+}
+
+class TranslatedValueCondition extends TranslatedCondition, TTranslatedValueCondition {
+ TranslatedValueCondition() { this = TTranslatedValueCondition(expr) }
+
+ override TranslatedElement getChild(int id) { id = 0 and result = getValueExpr() }
+
+ override Instruction getFirstInstruction() { result = getValueExpr().getFirstInstruction() }
+
+ override predicate hasInstruction(Opcode opcode, InstructionTag tag, CppType resultType) {
+ tag = ValueConditionConditionalBranchTag() and
+ opcode instanceof Opcode::ConditionalBranch and
+ resultType = getVoidType()
+ }
+
+ override Instruction getChildSuccessor(TranslatedElement child) {
+ child = getValueExpr() and
+ result = getInstruction(ValueConditionConditionalBranchTag())
+ }
+
+ override Instruction getInstructionSuccessor(InstructionTag tag, EdgeKind kind) {
+ tag = ValueConditionConditionalBranchTag() and
+ (
+ kind instanceof TrueEdge and
+ result = getConditionContext().getChildTrueSuccessor(this)
+ or
+ kind instanceof FalseEdge and
+ result = getConditionContext().getChildFalseSuccessor(this)
+ )
+ }
+
+ override Instruction getInstructionRegisterOperand(InstructionTag tag, OperandTag operandTag) {
+ tag = ValueConditionConditionalBranchTag() and
+ operandTag instanceof ConditionOperandTag and
+ result = getValueExpr().getResult()
+ }
+
+ private TranslatedExpr getValueExpr() { result = getTranslatedExpr(expr) }
+}
diff --git a/repo-tests/codeql/cpp/ql/lib/semmle/code/cpp/ir/implementation/raw/internal/TranslatedDeclarationEntry.qll b/repo-tests/codeql/cpp/ql/lib/semmle/code/cpp/ir/implementation/raw/internal/TranslatedDeclarationEntry.qll
new file mode 100644
index 00000000000..de63b81c876
--- /dev/null
+++ b/repo-tests/codeql/cpp/ql/lib/semmle/code/cpp/ir/implementation/raw/internal/TranslatedDeclarationEntry.qll
@@ -0,0 +1,288 @@
+private import cpp
+private import semmle.code.cpp.ir.implementation.Opcode
+private import semmle.code.cpp.ir.implementation.internal.OperandTag
+private import semmle.code.cpp.ir.internal.CppType
+private import semmle.code.cpp.ir.internal.IRUtilities
+private import InstructionTag
+private import TranslatedElement
+private import TranslatedExpr
+private import TranslatedFunction
+private import TranslatedInitialization
+
+/**
+ * Gets the `TranslatedDeclarationEntry` that represents the declaration
+ * `entry`.
+ */
+TranslatedDeclarationEntry getTranslatedDeclarationEntry(DeclarationEntry entry) {
+ result.getAST() = entry
+}
+
+/**
+ * Represents the IR translation of a declaration within the body of a function.
+ * Most often, this is the declaration of an automatic local variable, although
+ * it can also be the declaration of a static local variable. Declarations of extern variables and
+ * functions do not have a `TranslatedDeclarationEntry`.
+ */
+abstract class TranslatedDeclarationEntry extends TranslatedElement, TTranslatedDeclarationEntry {
+ DeclarationEntry entry;
+
+ TranslatedDeclarationEntry() { this = TTranslatedDeclarationEntry(entry) }
+
+ final override Function getFunction() {
+ exists(DeclStmt stmt |
+ stmt.getADeclarationEntry() = entry and
+ result = stmt.getEnclosingFunction()
+ )
+ }
+
+ final override string toString() { result = entry.toString() }
+
+ final override Locatable getAST() { result = entry }
+}
+
+/**
+ * Represents the IR translation of the declaration of a local variable,
+ * including its initialization, if any.
+ */
+abstract class TranslatedLocalVariableDeclaration extends TranslatedVariableInitialization {
+ /**
+ * Gets the local variable being declared.
+ */
+ abstract LocalVariable getVariable();
+
+ final override Type getTargetType() { result = getVariableType(getVariable()) }
+
+ final override TranslatedInitialization getInitialization() {
+ result =
+ getTranslatedInitialization(getVariable().getInitializer().getExpr().getFullyConverted())
+ }
+
+ final override Instruction getInitializationSuccessor() {
+ result = getParent().getChildSuccessor(this)
+ }
+
+ final override IRVariable getIRVariable() {
+ result = getIRUserVariable(getFunction(), getVariable())
+ }
+}
+
+/**
+ * The IR translation of a local variable declaration within a declaration statement.
+ */
+class TranslatedAutoVariableDeclarationEntry extends TranslatedLocalVariableDeclaration,
+ TranslatedDeclarationEntry {
+ StackVariable var;
+
+ TranslatedAutoVariableDeclarationEntry() { var = entry.getDeclaration() }
+
+ override LocalVariable getVariable() { result = var }
+}
+
+/**
+ * The IR translation of the declaration of a static local variable.
+ * This element generates the logic that determines whether or not the variable has already been
+ * initialized, and if not, invokes the initializer and sets the dynamic initialization flag for the
+ * variable. The actual initialization code is handled in
+ * `TranslatedStaticLocalVariableInitialization`, which is a child of this element.
+ *
+ * The generated code to do the initialization only once is:
+ * ```
+ * Block 1
+ * r1225_1(glval) = VariableAddress[c#init] :
+ * r1225_2(bool) = Load : &:r1225_1, ~mu1222_4
+ * v1225_3(void) = ConditionalBranch : r1225_2
+ * False -> Block 2
+ * True -> Block 3
+ *
+ * Block 2
+ * r1225_4(glval) = VariableAddress[c] :
+ *
+ * r1225_8(bool) = Constant[1] :
+ * mu1225_9(bool) = Store : &:r1225_1, r1225_8
+ * Goto -> Block 3
+ *
+ * Block 3
+ * ```
+ *
+ * Note that the flag variable, `c#init`, is assumed to be zero-initialized at program startup, just
+ * like any other variable with static storage duration.
+ */
+class TranslatedStaticLocalVariableDeclarationEntry extends TranslatedDeclarationEntry {
+ StaticLocalVariable var;
+
+ TranslatedStaticLocalVariableDeclarationEntry() { var = entry.getDeclaration() }
+
+ final override TranslatedElement getChild(int id) { id = 0 and result = getInitialization() }
+
+ final override predicate hasInstruction(Opcode opcode, InstructionTag tag, CppType type) {
+ tag = DynamicInitializationFlagAddressTag() and
+ opcode instanceof Opcode::VariableAddress and
+ type = getBoolGLValueType()
+ or
+ tag = DynamicInitializationFlagLoadTag() and
+ opcode instanceof Opcode::Load and
+ type = getBoolType()
+ or
+ tag = DynamicInitializationConditionalBranchTag() and
+ opcode instanceof Opcode::ConditionalBranch and
+ type = getVoidType()
+ or
+ tag = DynamicInitializationFlagConstantTag() and
+ opcode instanceof Opcode::Constant and
+ type = getBoolType()
+ or
+ tag = DynamicInitializationFlagStoreTag() and
+ opcode instanceof Opcode::Store and
+ type = getBoolType()
+ }
+
+ final override Instruction getFirstInstruction() {
+ result = getInstruction(DynamicInitializationFlagAddressTag())
+ }
+
+ final override Instruction getInstructionSuccessor(InstructionTag tag, EdgeKind kind) {
+ tag = DynamicInitializationFlagAddressTag() and
+ kind instanceof GotoEdge and
+ result = getInstruction(DynamicInitializationFlagLoadTag())
+ or
+ tag = DynamicInitializationFlagLoadTag() and
+ kind instanceof GotoEdge and
+ result = getInstruction(DynamicInitializationConditionalBranchTag())
+ or
+ tag = DynamicInitializationConditionalBranchTag() and
+ (
+ kind instanceof TrueEdge and
+ result = getParent().getChildSuccessor(this)
+ or
+ kind instanceof FalseEdge and
+ result = getInitialization().getFirstInstruction()
+ )
+ or
+ tag = DynamicInitializationFlagConstantTag() and
+ kind instanceof GotoEdge and
+ result = getInstruction(DynamicInitializationFlagStoreTag())
+ or
+ tag = DynamicInitializationFlagStoreTag() and
+ kind instanceof GotoEdge and
+ result = getParent().getChildSuccessor(this)
+ }
+
+ final override Instruction getChildSuccessor(TranslatedElement child) {
+ child = getInitialization() and
+ result = getInstruction(DynamicInitializationFlagConstantTag())
+ }
+
+ final override IRDynamicInitializationFlag getInstructionVariable(InstructionTag tag) {
+ tag = DynamicInitializationFlagAddressTag() and
+ result.getVariable() = var
+ }
+
+ final override string getInstructionConstantValue(InstructionTag tag) {
+ tag = DynamicInitializationFlagConstantTag() and result = "1"
+ }
+
+ final override Instruction getInstructionRegisterOperand(InstructionTag tag, OperandTag operandTag) {
+ tag = DynamicInitializationFlagLoadTag() and
+ (
+ operandTag instanceof AddressOperandTag and
+ result = getInstruction(DynamicInitializationFlagAddressTag())
+ )
+ or
+ tag = DynamicInitializationConditionalBranchTag() and
+ operandTag instanceof ConditionOperandTag and
+ result = getInstruction(DynamicInitializationFlagLoadTag())
+ or
+ tag = DynamicInitializationFlagStoreTag() and
+ (
+ operandTag instanceof AddressOperandTag and
+ result = getInstruction(DynamicInitializationFlagAddressTag())
+ or
+ operandTag instanceof StoreValueOperandTag and
+ result = getInstruction(DynamicInitializationFlagConstantTag())
+ )
+ }
+
+ private TranslatedStaticLocalVariableInitialization getInitialization() {
+ result.getVariable() = var
+ }
+}
+
+/**
+ * The initialization of a static local variable. This element will only exist for a static variable
+ * with a dynamic initializer.
+ */
+class TranslatedStaticLocalVariableInitialization extends TranslatedElement,
+ TranslatedLocalVariableDeclaration, TTranslatedStaticLocalVariableInitialization {
+ VariableDeclarationEntry entry;
+ StaticLocalVariable var;
+
+ TranslatedStaticLocalVariableInitialization() {
+ this = TTranslatedStaticLocalVariableInitialization(entry) and
+ var = entry.getDeclaration()
+ }
+
+ final override string toString() { result = "init: " + entry.toString() }
+
+ final override Locatable getAST() { result = entry }
+
+ final override LocalVariable getVariable() { result = var }
+
+ final override Function getFunction() { result = var.getFunction() }
+}
+
+/**
+ * Gets the `TranslatedRangeBasedForVariableDeclaration` that represents the declaration of
+ * `var`.
+ */
+TranslatedRangeBasedForVariableDeclaration getTranslatedRangeBasedForVariableDeclaration(
+ LocalVariable var
+) {
+ result.getVariable() = var
+}
+
+/**
+ * Represents the IR translation of a compiler-generated variable in a range-based `for` loop.
+ */
+class TranslatedRangeBasedForVariableDeclaration extends TranslatedLocalVariableDeclaration,
+ TTranslatedRangeBasedForVariableDeclaration {
+ RangeBasedForStmt forStmt;
+ LocalVariable var;
+
+ TranslatedRangeBasedForVariableDeclaration() {
+ this = TTranslatedRangeBasedForVariableDeclaration(forStmt, var)
+ }
+
+ override string toString() { result = var.toString() }
+
+ override Locatable getAST() { result = var }
+
+ override Function getFunction() { result = forStmt.getEnclosingFunction() }
+
+ override LocalVariable getVariable() { result = var }
+}
+
+TranslatedConditionDecl getTranslatedConditionDecl(ConditionDeclExpr expr) {
+ result.getAST() = expr
+}
+
+/**
+ * Represents the IR translation of the declaration portion of a `ConditionDeclExpr`, which
+ * represents the variable declared in code such as:
+ * ```
+ * if (int* p = &x) {
+ * }
+ * ```
+ */
+class TranslatedConditionDecl extends TranslatedLocalVariableDeclaration, TTranslatedConditionDecl {
+ ConditionDeclExpr conditionDeclExpr;
+
+ TranslatedConditionDecl() { this = TTranslatedConditionDecl(conditionDeclExpr) }
+
+ override string toString() { result = "decl: " + conditionDeclExpr.toString() }
+
+ override Locatable getAST() { result = conditionDeclExpr }
+
+ override Function getFunction() { result = conditionDeclExpr.getEnclosingFunction() }
+
+ override LocalVariable getVariable() { result = conditionDeclExpr.getVariable() }
+}
diff --git a/repo-tests/codeql/cpp/ql/lib/semmle/code/cpp/ir/implementation/raw/internal/TranslatedElement.qll b/repo-tests/codeql/cpp/ql/lib/semmle/code/cpp/ir/implementation/raw/internal/TranslatedElement.qll
new file mode 100644
index 00000000000..81c69cf0ea2
--- /dev/null
+++ b/repo-tests/codeql/cpp/ql/lib/semmle/code/cpp/ir/implementation/raw/internal/TranslatedElement.qll
@@ -0,0 +1,925 @@
+private import cpp
+import semmle.code.cpp.ir.implementation.raw.IR
+private import semmle.code.cpp.ir.IRConfiguration
+private import semmle.code.cpp.ir.implementation.Opcode
+private import semmle.code.cpp.ir.implementation.internal.OperandTag
+private import semmle.code.cpp.ir.internal.CppType
+private import semmle.code.cpp.ir.internal.TempVariableTag
+private import InstructionTag
+private import TranslatedCondition
+private import TranslatedFunction
+private import TranslatedStmt
+private import TranslatedExpr
+private import IRConstruction
+private import semmle.code.cpp.models.interfaces.SideEffect
+private import SideEffects
+
+/**
+ * Gets the "real" parent of `expr`. This predicate treats conversions as if
+ * they were explicit nodes in the expression tree, rather than as implicit
+ * nodes as in the regular AST representation.
+ */
+private Element getRealParent(Expr expr) {
+ result = expr.getParentWithConversions()
+ or
+ result.(Destructor).getADestruction() = expr
+}
+
+IRUserVariable getIRUserVariable(Function func, Variable var) {
+ result.getVariable() = var and
+ result.getEnclosingFunction() = func
+}
+
+IRTempVariable getIRTempVariable(Locatable ast, TempVariableTag tag) {
+ result.getAST() = ast and
+ result.getTag() = tag
+}
+
+/**
+ * Holds if `expr` is a constant of a type that can be replaced directly with
+ * its value in the IR. This does not include address constants as we have no
+ * means to express those as QL values.
+ */
+predicate isIRConstant(Expr expr) { exists(expr.getValue()) }
+
+// Pulled out for performance. See
+// https://github.com/github/codeql-coreql-team/issues/1044.
+private predicate isOrphan(Expr expr) { not exists(getRealParent(expr)) }
+
+/**
+ * Holds if `expr` should be ignored for the purposes of IR generation due to
+ * some property of `expr` or one of its ancestors.
+ */
+private predicate ignoreExprAndDescendants(Expr expr) {
+ // Ignore parentless expressions
+ isOrphan(expr)
+ or
+ // Ignore the constants in SwitchCase, since their values are embedded in the
+ // CaseEdge.
+ getRealParent(expr) instanceof SwitchCase
+ or
+ // Ignore descendants of constant expressions, since we'll just substitute the
+ // constant value.
+ isIRConstant(getRealParent(expr))
+ or
+ // Only translate the initializer of a static local if it uses run-time data.
+ // Otherwise the initializer does not run in function scope.
+ exists(Initializer init, StaticStorageDurationVariable var |
+ init = var.getInitializer() and
+ not var.hasDynamicInitialization() and
+ expr = init.getExpr().getFullyConverted()
+ )
+ or
+ // Ignore descendants of `__assume` expressions, since we translated these to `NoOp`.
+ getRealParent(expr) instanceof AssumeExpr
+ or
+ // The `DestructorCall` node for a `DestructorFieldDestruction` has a `FieldAccess`
+ // node as its qualifier, but that `FieldAccess` does not have a child of its own.
+ // We'll ignore that `FieldAccess`, and supply the receiver as part of the calling
+ // context, much like we do with constructor calls.
+ expr.getParent().(DestructorCall).getParent() instanceof DestructorFieldDestruction
+ or
+ exists(NewArrayExpr newExpr |
+ // REVIEW: Ignore initializers for `NewArrayExpr` until we determine how to
+ // represent them.
+ newExpr.getInitializer().getFullyConverted() = expr
+ )
+ or
+ // Do not translate input/output variables in GNU asm statements
+ // getRealParent(expr) instanceof AsmStmt
+ // or
+ ignoreExprAndDescendants(getRealParent(expr)) // recursive case
+ or
+ // We do not yet translate destructors properly, so for now we ignore any
+ // custom deallocator call, if present.
+ exists(DeleteExpr deleteExpr | deleteExpr.getAllocatorCall() = expr)
+ or
+ exists(DeleteArrayExpr deleteArrayExpr | deleteArrayExpr.getAllocatorCall() = expr)
+ or
+ exists(BuiltInVarArgsStart vaStartExpr |
+ vaStartExpr.getLastNamedParameter().getFullyConverted() = expr
+ )
+}
+
+/**
+ * Holds if `expr` (not including its descendants) should be ignored for the
+ * purposes of IR generation.
+ */
+private predicate ignoreExprOnly(Expr expr) {
+ exists(NewOrNewArrayExpr newExpr |
+ // Ignore the allocator call, because we always synthesize it. Don't ignore
+ // its arguments, though, because we use them as part of the synthesis.
+ newExpr.getAllocatorCall() = expr
+ )
+ or
+ // The extractor deliberately emits an `ErrorExpr` as the first argument to
+ // the allocator call, if any, of a `NewOrNewArrayExpr`. That `ErrorExpr`
+ // should not be translated.
+ exists(NewOrNewArrayExpr new | expr = new.getAllocatorCall().getArgument(0))
+ or
+ not translateFunction(expr.getEnclosingFunction())
+ or
+ // We do not yet translate destructors properly, so for now we ignore the
+ // destructor call. We do, however, translate the expression being
+ // destructed, and that expression can be a child of the destructor call.
+ exists(DeleteExpr deleteExpr | deleteExpr.getDestructorCall() = expr)
+ or
+ exists(DeleteArrayExpr deleteArrayExpr | deleteArrayExpr.getDestructorCall() = expr)
+}
+
+/**
+ * Holds if `expr` should be ignored for the purposes of IR generation.
+ */
+private predicate ignoreExpr(Expr expr) {
+ ignoreExprOnly(expr) or
+ ignoreExprAndDescendants(expr)
+}
+
+/**
+ * Holds if `func` contains an AST that cannot be translated into IR. This is mostly used to work
+ * around extractor bugs. Once the relevant extractor bugs are fixed, this predicate can be removed.
+ */
+private predicate isInvalidFunction(Function func) {
+ exists(Literal literal |
+ // Constructor field inits within a compiler-generated copy constructor have a source expression
+ // that is a `Literal` with no value.
+ literal = func.(Constructor).getAnInitializer().(ConstructorFieldInit).getExpr() and
+ not exists(literal.getValue())
+ )
+ or
+ exists(ThisExpr thisExpr |
+ // An instantiation of a member function template is not treated as a `MemberFunction` if it has
+ // only non-type template arguments.
+ thisExpr.getEnclosingFunction() = func and
+ not func instanceof MemberFunction
+ )
+ or
+ exists(Expr expr |
+ // Expression missing a type.
+ expr.getEnclosingFunction() = func and
+ not exists(expr.getType())
+ )
+}
+
+/**
+ * Holds if `func` should be translated to IR.
+ */
+private predicate translateFunction(Function func) {
+ not func.isFromUninstantiatedTemplate(_) and
+ func.hasEntryPoint() and
+ not isInvalidFunction(func) and
+ exists(IRConfiguration config | config.shouldCreateIRForFunction(func))
+}
+
+/**
+ * Holds if `stmt` should be translated to IR.
+ */
+private predicate translateStmt(Stmt stmt) { translateFunction(stmt.getEnclosingFunction()) }
+
+/**
+ * Holds if `expr` is most naturally evaluated as control flow, rather than as
+ * a value.
+ */
+private predicate isNativeCondition(Expr expr) {
+ expr instanceof BinaryLogicalOperation and
+ not isIRConstant(expr)
+}
+
+/**
+ * Holds if `expr` can be evaluated as either a condition or a value expression,
+ * depending on context.
+ */
+private predicate isFlexibleCondition(Expr expr) {
+ (
+ expr instanceof ParenthesisExpr or
+ expr instanceof NotExpr
+ ) and
+ usedAsCondition(expr) and
+ not isIRConstant(expr)
+}
+
+/**
+ * Holds if `expr` is used in a condition context, i.e. the Boolean result of
+ * the expression is directly used to determine control flow.
+ */
+private predicate usedAsCondition(Expr expr) {
+ exists(BinaryLogicalOperation op |
+ op.getLeftOperand().getFullyConverted() = expr or
+ op.getRightOperand().getFullyConverted() = expr
+ )
+ or
+ exists(Loop loop | loop.getCondition().getFullyConverted() = expr)
+ or
+ exists(IfStmt ifStmt | ifStmt.getCondition().getFullyConverted() = expr)
+ or
+ exists(ConditionalExpr condExpr |
+ // The two-operand form of `ConditionalExpr` treats its condition as a value, since it needs to
+ // be reused as a value if the condition is true.
+ condExpr.getCondition().getFullyConverted() = expr and not condExpr.isTwoOperand()
+ )
+ or
+ exists(NotExpr notExpr |
+ notExpr.getOperand().getFullyConverted() = expr and
+ usedAsCondition(notExpr)
+ )
+ or
+ exists(ParenthesisExpr paren |
+ paren.getExpr() = expr and
+ usedAsCondition(paren)
+ )
+}
+
+/**
+ * Holds if `conv` is an `InheritanceConversion` that requires a `TranslatedLoad`, despite not being
+ * marked as having an lvalue-to-rvalue conversion.
+ *
+ * This is necessary for an `InheritanceConversion` that is originally modeled as a
+ * prvalue-to-prvalue conversion, since we transform it into a glvalue-to-glvalue conversion. If it
+ * is actually consumed as a prvalue, such as on the right hand side of an assignment, we need to
+ * load the resulting glvalue.
+ */
+private predicate isInheritanceConversionWithImplicitLoad(InheritanceConversion conv) {
+ // Must have originally been a prvalue-to-prvalue conversion.
+ isClassPRValue(conv.getExpr()) and
+ not conv.hasLValueToRValueConversion() and
+ // Exclude that case where this will be consumed as a glvalue, such as when used as the qualifier
+ // of a field access.
+ not isPRValueConversionOnGLValue(conv)
+}
+
+/**
+ * Holds if `expr` is the result of a field access whose qualifier was a prvalue and whose result is
+ * a prvalue. These accesses are not marked as having loads, but we do need a load in the IR.
+ */
+private predicate isPRValueFieldAccessWithImplicitLoad(Expr expr) {
+ expr instanceof ValueFieldAccess and
+ expr.isPRValueCategory() and
+ // No need to do a load if we're replacing the result with a constant anyway.
+ not isIRConstant(expr) and
+ // Model an array prvalue as the address of the array, just like an array glvalue.
+ not expr.getUnspecifiedType() instanceof ArrayType
+}
+
+/**
+ * Holds if `expr` is a prvalue of class type.
+ *
+ * This same test is used in several places.
+ */
+pragma[inline]
+private predicate isClassPRValue(Expr expr) {
+ expr.isPRValueCategory() and
+ expr.getUnspecifiedType() instanceof Class
+}
+
+/**
+ * Holds if `expr` is consumed as a glvalue by its parent. If `expr` is actually a prvalue, it will
+ * have any lvalue-to-rvalue conversion ignored. If it does not have an lvalue-to-rvalue conversion,
+ * it will be materialized into a temporary object.
+ */
+private predicate consumedAsGLValue(Expr expr) {
+ isClassPRValue(expr) and
+ (
+ // Qualifier of a field access.
+ expr = any(FieldAccess a).getQualifier().getFullyConverted()
+ or
+ // Qualifier of a member function call.
+ expr = any(Call c).getQualifier().getFullyConverted()
+ or
+ // The operand of an inheritance conversion.
+ expr = any(InheritanceConversion c).getExpr()
+ )
+}
+
+/**
+ * Holds if `expr` is a conversion that is originally a prvalue-to-prvalue conversion, but which is
+ * applied to a prvalue that will actually be consumed as a glvalue.
+ */
+predicate isPRValueConversionOnGLValue(Conversion conv) {
+ exists(Expr consumed |
+ consumedAsGLValue(consumed) and
+ isClassPRValue(conv.getExpr()) and
+ (
+ // Example: The conversion of `std::string` to `const std::string` when evaluating
+ // `std::string("foo").c_str()`.
+ conv instanceof PrvalueAdjustmentConversion
+ or
+ // Parentheses are transparent.
+ conv instanceof ParenthesisExpr
+ or
+ // Example: The base class conversion in `f().m()`, when `m` is member function of a base
+ // class of the return type of `f()`.
+ conv instanceof InheritanceConversion
+ ) and
+ (
+ // Base case: The conversion is consumed directly.
+ conv = consumed
+ or
+ // Recursive case: The conversion is the operand of another prvalue conversion.
+ isPRValueConversionOnGLValue(conv.getConversion())
+ )
+ )
+}
+
+/**
+ * Holds if `expr` is a prvalue of class type that is used in a context that requires a glvalue.
+ *
+ * Any conversions between `expr` and the ancestor that consumes the glvalue will also be treated
+ * as glvalues, but are not part of this relation.
+ *
+ * For example:
+ * ```c++
+ * std::string("s").c_str();
+ * ```
+ * The object for the qualifier is a prvalue(load) of type `std::string`, but the actual
+ * fully-converted qualifier of the call to `c_str()` is a prvalue adjustment conversion that
+ * converts the type to `const std::string` to match the type of the `this` pointer of the
+ * member function. In this case, `mustTransformToGLValue()` will hold for the temporary
+ * `std::string` object, but not the prvalue adjustment on top of it.
+ * `isPRValueConversionOnGLValue()` would hold for the prvalue adjustment.
+ */
+private predicate mustTransformToGLValue(Expr expr) {
+ not isPRValueConversionOnGLValue(expr) and
+ (
+ // The expression is the fully converted qualifier, with no prvalue adjustments on top.
+ consumedAsGLValue(expr)
+ or
+ // The expression has conversions on top, but they are all prvalue adjustments.
+ isPRValueConversionOnGLValue(expr.getConversion())
+ )
+}
+
+/**
+ * Holds if `expr` has an lvalue-to-rvalue conversion that should be ignored
+ * when generating IR. This occurs for conversion from an lvalue of function type
+ * to an rvalue of function pointer type. The conversion is represented in the
+ * AST as an lvalue-to-rvalue conversion, but the IR represents both a function
+ * lvalue and a function pointer prvalue the same.
+ */
+predicate ignoreLoad(Expr expr) {
+ expr.hasLValueToRValueConversion() and
+ (
+ expr instanceof ThisExpr
+ or
+ expr instanceof FunctionAccess
+ or
+ expr.(PointerDereferenceExpr).getOperand().getFullyConverted().getType().getUnspecifiedType()
+ instanceof FunctionPointerType
+ or
+ expr.(ReferenceDereferenceExpr).getExpr().getType().getUnspecifiedType() instanceof
+ FunctionReferenceType
+ or
+ // The extractor represents the qualifier of a field access or member function call as a load of
+ // the temporary object if the original qualifier was a prvalue. For IR purposes, we always want
+ // to use the address of the temporary object as the qualifier of a field access or the `this`
+ // argument to a member function call.
+ mustTransformToGLValue(expr)
+ )
+}
+
+/**
+ * Holds if `expr` should have a load on it because it will be loaded as part
+ * of the translation of its parent. We want to associate this load with `expr`
+ * itself rather than its parent since in practical applications like data flow
+ * we maintain that the value of the `x` in `x++` should be what's loaded from
+ * `x`.
+ */
+private predicate needsLoadForParentExpr(Expr expr) {
+ exists(CrementOperation crement | expr = crement.getOperand().getFullyConverted())
+ or
+ exists(AssignOperation ao | expr = ao.getLValue().getFullyConverted())
+ or
+ // For arguments that are passed by value but require a constructor call, the extractor emits a
+ // `TemporaryObjectExpr` as the argument, and marks it as a glvalue. This is roughly how a code-
+ // generating compiler would implement this, passing the address of the temporary so that the
+ // callee is using the exact same memory location allocated by the caller. We don't fully model
+ // this yet, though, so we'll synthesize a load so that we appear to be passing the temporary
+ // object via a bitwise copy.
+ exists(Call call |
+ expr = call.getAnArgument().getFullyConverted().(TemporaryObjectExpr) and
+ expr.isGLValueCategory()
+ )
+}
+
+/**
+ * Holds if `expr` should have a `TranslatedLoad` on it.
+ */
+predicate hasTranslatedLoad(Expr expr) {
+ (
+ expr.hasLValueToRValueConversion()
+ or
+ needsLoadForParentExpr(expr)
+ or
+ isPRValueFieldAccessWithImplicitLoad(expr)
+ or
+ isInheritanceConversionWithImplicitLoad(expr)
+ ) and
+ not ignoreExpr(expr) and
+ not isNativeCondition(expr) and
+ not isFlexibleCondition(expr) and
+ not ignoreLoad(expr)
+}
+
+/**
+ * Holds if `expr` should have a `TranslatedSyntheticTemporaryObject` on it.
+ */
+predicate hasTranslatedSyntheticTemporaryObject(Expr expr) {
+ not ignoreExpr(expr) and
+ mustTransformToGLValue(expr) and
+ // If it's a load, we'll just ignore the load in `ignoreLoad()`.
+ not expr.hasLValueToRValueConversion()
+}
+
+/**
+ * Holds if the specified `DeclarationEntry` needs an IR translation. An IR translation is only
+ * necessary for automatic local variables, or for static local variables with dynamic
+ * initialization.
+ */
+private predicate translateDeclarationEntry(DeclarationEntry entry) {
+ exists(DeclStmt declStmt, LocalVariable var |
+ translateStmt(declStmt) and
+ declStmt.getADeclarationEntry() = entry and
+ // Only declarations of local variables need to be translated to IR.
+ var = entry.getDeclaration() and
+ (
+ not var.isStatic()
+ or
+ // Ignore static variables unless they have a dynamic initializer.
+ var.(StaticLocalVariable).hasDynamicInitialization()
+ )
+ )
+}
+
+newtype TTranslatedElement =
+ // An expression that is not being consumed as a condition
+ TTranslatedValueExpr(Expr expr) {
+ not ignoreExpr(expr) and
+ not isNativeCondition(expr) and
+ not isFlexibleCondition(expr)
+ } or
+ // A separate element to handle the lvalue-to-rvalue conversion step of an
+ // expression.
+ TTranslatedLoad(Expr expr) { hasTranslatedLoad(expr) } or
+ // A temporary object that we had to synthesize ourselves, so that we could do a field access or
+ // method call on a prvalue.
+ TTranslatedSyntheticTemporaryObject(Expr expr) { hasTranslatedSyntheticTemporaryObject(expr) } or
+ // For expressions that would not otherwise generate an instruction.
+ TTranslatedResultCopy(Expr expr) {
+ not ignoreExpr(expr) and
+ exprNeedsCopyIfNotLoaded(expr) and
+ not hasTranslatedLoad(expr)
+ } or
+ // An expression most naturally translated as control flow.
+ TTranslatedNativeCondition(Expr expr) {
+ not ignoreExpr(expr) and
+ isNativeCondition(expr)
+ } or
+ // An expression that can best be translated as control flow given the context
+ // in which it is used.
+ TTranslatedFlexibleCondition(Expr expr) {
+ not ignoreExpr(expr) and
+ isFlexibleCondition(expr)
+ } or
+ // An expression that is not naturally translated as control flow, but is
+ // consumed in a condition context. This element adapts the original element
+ // to the condition context.
+ TTranslatedValueCondition(Expr expr) {
+ not ignoreExpr(expr) and
+ not isNativeCondition(expr) and
+ not isFlexibleCondition(expr) and
+ usedAsCondition(expr)
+ } or
+ // An expression that is naturally translated as control flow, but is used in
+ // a context where a simple value is expected. This element adapts the
+ // original condition to the value context.
+ TTranslatedConditionValue(Expr expr) {
+ not ignoreExpr(expr) and
+ isNativeCondition(expr) and
+ not usedAsCondition(expr)
+ } or
+ // An expression used as an initializer.
+ TTranslatedInitialization(Expr expr) {
+ not ignoreExpr(expr) and
+ (
+ exists(Initializer init | init.getExpr().getFullyConverted() = expr) or
+ exists(ClassAggregateLiteral initList | initList.getFieldExpr(_).getFullyConverted() = expr) or
+ exists(ArrayOrVectorAggregateLiteral initList |
+ initList.getElementExpr(_).getFullyConverted() = expr
+ ) or
+ exists(ReturnStmt returnStmt | returnStmt.getExpr().getFullyConverted() = expr) or
+ exists(ConstructorFieldInit fieldInit | fieldInit.getExpr().getFullyConverted() = expr) or
+ exists(NewExpr newExpr | newExpr.getInitializer().getFullyConverted() = expr) or
+ exists(ThrowExpr throw | throw.getExpr().getFullyConverted() = expr) or
+ exists(TemporaryObjectExpr temp | temp.getExpr() = expr) or
+ exists(LambdaExpression lambda | lambda.getInitializer().getFullyConverted() = expr)
+ )
+ } or
+ // The initialization of a field via a member of an initializer list.
+ TTranslatedExplicitFieldInitialization(Expr ast, Field field, Expr expr) {
+ exists(ClassAggregateLiteral initList |
+ not ignoreExpr(initList) and
+ ast = initList and
+ expr = initList.getFieldExpr(field).getFullyConverted()
+ )
+ or
+ exists(ConstructorFieldInit init |
+ not ignoreExpr(init) and
+ ast = init and
+ field = init.getTarget() and
+ expr = init.getExpr().getFullyConverted()
+ )
+ } or
+ // The value initialization of a field due to an omitted member of an
+ // initializer list.
+ TTranslatedFieldValueInitialization(Expr ast, Field field) {
+ exists(ClassAggregateLiteral initList |
+ not ignoreExpr(initList) and
+ ast = initList and
+ initList.isValueInitialized(field)
+ )
+ } or
+ // The initialization of an array element via a member of an initializer list.
+ TTranslatedExplicitElementInitialization(ArrayOrVectorAggregateLiteral initList, int elementIndex) {
+ not ignoreExpr(initList) and
+ exists(initList.getElementExpr(elementIndex))
+ } or
+ // The value initialization of a range of array elements that were omitted
+ // from an initializer list.
+ TTranslatedElementValueInitialization(
+ ArrayOrVectorAggregateLiteral initList, int elementIndex, int elementCount
+ ) {
+ not ignoreExpr(initList) and
+ isFirstValueInitializedElementInRange(initList, elementIndex) and
+ elementCount = getEndOfValueInitializedRange(initList, elementIndex) - elementIndex
+ } or
+ // The initialization of a base class from within a constructor.
+ TTranslatedConstructorBaseInit(ConstructorBaseInit init) { not ignoreExpr(init) } or
+ // The destruction of a base class from within a destructor.
+ TTranslatedDestructorBaseDestruction(DestructorBaseDestruction destruction) {
+ not ignoreExpr(destruction)
+ } or
+ // The destruction of a field from within a destructor.
+ TTranslatedDestructorFieldDestruction(DestructorFieldDestruction destruction) {
+ not ignoreExpr(destruction)
+ } or
+ // A statement
+ TTranslatedStmt(Stmt stmt) { translateStmt(stmt) } or
+ // A function
+ TTranslatedFunction(Function func) { translateFunction(func) } or
+ // A constructor init list
+ TTranslatedConstructorInitList(Function func) { translateFunction(func) } or
+ // A destructor destruction list
+ TTranslatedDestructorDestructionList(Function func) { translateFunction(func) } or
+ TTranslatedThisParameter(Function func) {
+ translateFunction(func) and func.isMember() and not func.isStatic()
+ } or
+ // A function parameter
+ TTranslatedParameter(Parameter param) {
+ exists(Function func |
+ (
+ func = param.getFunction() or
+ func = param.getCatchBlock().getEnclosingFunction()
+ ) and
+ translateFunction(func)
+ )
+ } or
+ TTranslatedEllipsisParameter(Function func) { translateFunction(func) and func.isVarargs() } or
+ TTranslatedReadEffects(Function func) { translateFunction(func) } or
+ TTranslatedThisReadEffect(Function func) {
+ translateFunction(func) and func.isMember() and not func.isStatic()
+ } or
+ // The read side effects in a function's return block
+ TTranslatedParameterReadEffect(Parameter param) {
+ translateFunction(param.getFunction()) and
+ exists(Type t | t = param.getUnspecifiedType() |
+ t instanceof ArrayType or
+ t instanceof PointerType or
+ t instanceof ReferenceType
+ )
+ } or
+ // A local declaration
+ TTranslatedDeclarationEntry(DeclarationEntry entry) { translateDeclarationEntry(entry) } or
+ // The dynamic initialization of a static local variable. This is a separate object from the
+ // declaration entry.
+ TTranslatedStaticLocalVariableInitialization(DeclarationEntry entry) {
+ translateDeclarationEntry(entry) and
+ entry.getDeclaration() instanceof StaticLocalVariable
+ } or
+ // A compiler-generated variable to implement a range-based for loop. These don't have a
+ // `DeclarationEntry` in the database, so we have to go by the `Variable` itself.
+ TTranslatedRangeBasedForVariableDeclaration(RangeBasedForStmt forStmt, LocalVariable var) {
+ translateStmt(forStmt) and
+ (
+ var = forStmt.getRangeVariable() or
+ var = forStmt.getBeginEndDeclaration().getADeclaration() or
+ var = forStmt.getVariable()
+ )
+ } or
+ // An allocator call in a `new` or `new[]` expression
+ TTranslatedAllocatorCall(NewOrNewArrayExpr newExpr) { not ignoreExpr(newExpr) } or
+ // An allocation size for a `new` or `new[]` expression
+ TTranslatedAllocationSize(NewOrNewArrayExpr newExpr) { not ignoreExpr(newExpr) } or
+ // The declaration/initialization part of a `ConditionDeclExpr`
+ TTranslatedConditionDecl(ConditionDeclExpr expr) { not ignoreExpr(expr) } or
+ // The side effects of a `Call`
+ TTranslatedCallSideEffects(Call expr) {
+ // Exclude allocations such as `malloc` (which happen to also be function calls).
+ // Both `TranslatedCallSideEffects` and `TranslatedAllocationSideEffects` generate
+ // the same side effects for its children as they both extend the `TranslatedSideEffects`
+ // class.
+ // Note: We can separate allocation side effects and call side effects into two
+ // translated elements as no call can be both a `ConstructorCall` and an `AllocationExpr`.
+ not expr instanceof AllocationExpr and
+ (
+ exists(TTranslatedArgumentSideEffect(expr, _, _, _)) or
+ expr instanceof ConstructorCall
+ )
+ } or
+ // The side effects of an allocation, i.e. `new`, `new[]` or `malloc`
+ TTranslatedAllocationSideEffects(AllocationExpr expr) { not ignoreExpr(expr) } or
+ // A precise side effect of an argument to a `Call`
+ TTranslatedArgumentSideEffect(Call call, Expr expr, int n, SideEffectOpcode opcode) {
+ not ignoreExpr(expr) and
+ not ignoreExpr(call) and
+ (
+ n >= 0 and expr = call.getArgument(n).getFullyConverted()
+ or
+ n = -1 and expr = call.getQualifier().getFullyConverted()
+ ) and
+ opcode = getASideEffectOpcode(call, n)
+ }
+
+/**
+ * Gets the index of the first explicitly initialized element in `initList`
+ * whose index is greater than `afterElementIndex`, where `afterElementIndex`
+ * is a first value-initialized element in a value-initialized range in
+ * `initList`. If there are no remaining explicitly initialized elements in
+ * `initList`, the result is the total number of elements in the array being
+ * initialized.
+ */
+private int getEndOfValueInitializedRange(
+ ArrayOrVectorAggregateLiteral initList, int afterElementIndex
+) {
+ result = getNextExplicitlyInitializedElementAfter(initList, afterElementIndex)
+ or
+ isFirstValueInitializedElementInRange(initList, afterElementIndex) and
+ not exists(getNextExplicitlyInitializedElementAfter(initList, afterElementIndex)) and
+ result = initList.getArraySize()
+}
+
+/**
+ * Gets the index of the first explicitly initialized element in `initList`
+ * whose index is greater than `afterElementIndex`, where `afterElementIndex`
+ * is a first value-initialized element in a value-initialized range in
+ * `initList`.
+ */
+private int getNextExplicitlyInitializedElementAfter(
+ ArrayOrVectorAggregateLiteral initList, int afterElementIndex
+) {
+ isFirstValueInitializedElementInRange(initList, afterElementIndex) and
+ result = min(int i | exists(initList.getElementExpr(i)) and i > afterElementIndex)
+}
+
+/**
+ * Holds if element `elementIndex` is the first value-initialized element in a
+ * range of one or more consecutive value-initialized elements in `initList`.
+ */
+private predicate isFirstValueInitializedElementInRange(
+ ArrayOrVectorAggregateLiteral initList, int elementIndex
+) {
+ initList.isValueInitialized(elementIndex) and
+ (
+ elementIndex = 0 or
+ exists(initList.getElementExpr(elementIndex - 1))
+ )
+}
+
+/**
+ * Represents an AST node for which IR needs to be generated.
+ *
+ * In most cases, there is a single `TranslatedElement` for each AST node.
+ * However, when a single AST node performs two separable operations (e.g.
+ * a `VariableAccess` that is also a load), there may be multiple
+ * `TranslatedElement` nodes for a single AST node.
+ */
+abstract class TranslatedElement extends TTranslatedElement {
+ abstract string toString();
+
+ /**
+ * Gets the AST node being translated.
+ */
+ abstract Locatable getAST();
+
+ /**
+ * Get the first instruction to be executed in the evaluation of this element.
+ */
+ abstract Instruction getFirstInstruction();
+
+ /**
+ * Get the immediate child elements of this element.
+ */
+ final TranslatedElement getAChild() { result = getChild(_) }
+
+ /**
+ * Gets the immediate child element of this element. The `id` is unique
+ * among all children of this element, but the values are not necessarily
+ * consecutive.
+ */
+ abstract TranslatedElement getChild(int id);
+
+ /**
+ * Gets the an identifier string for the element. This id is unique within
+ * the scope of the element's function.
+ */
+ final int getId() { result = getUniqueId() }
+
+ private TranslatedElement getChildByRank(int rankIndex) {
+ result =
+ rank[rankIndex + 1](TranslatedElement child, int id | child = getChild(id) | child order by id)
+ }
+
+ language[monotonicAggregates]
+ private int getDescendantCount() {
+ result =
+ 1 + sum(TranslatedElement child | child = getChildByRank(_) | child.getDescendantCount())
+ }
+
+ private int getUniqueId() {
+ if not exists(getParent())
+ then result = 0
+ else
+ exists(TranslatedElement parent |
+ parent = getParent() and
+ if this = parent.getChildByRank(0)
+ then result = 1 + parent.getUniqueId()
+ else
+ exists(int childIndex, TranslatedElement previousChild |
+ this = parent.getChildByRank(childIndex) and
+ previousChild = parent.getChildByRank(childIndex - 1) and
+ result = previousChild.getUniqueId() + previousChild.getDescendantCount()
+ )
+ )
+ }
+
+ /**
+ * Holds if this element generates an instruction with opcode `opcode` and
+ * result type `resultType`. `tag` must be unique for each instruction
+ * generated from the same AST node (not just from the same
+ * `TranslatedElement`).
+ * If the instruction does not return a result, `resultType` should be
+ * `VoidType`.
+ */
+ abstract predicate hasInstruction(Opcode opcode, InstructionTag tag, CppType resultType);
+
+ /**
+ * Gets the `Function` that contains this element.
+ */
+ abstract Function getFunction();
+
+ /**
+ * Gets the successor instruction of the instruction that was generated by
+ * this element for tag `tag`. The successor edge kind is specified by `kind`.
+ */
+ abstract Instruction getInstructionSuccessor(InstructionTag tag, EdgeKind kind);
+
+ /**
+ * Gets the successor instruction to which control should flow after the
+ * child element specified by `child` has finished execution.
+ */
+ abstract Instruction getChildSuccessor(TranslatedElement child);
+
+ /**
+ * Gets the instruction to which control should flow if an exception is thrown
+ * within this element. This will generally return first `catch` block of the
+ * nearest enclosing `try`, or the `Unwind` instruction for the function if
+ * there is no enclosing `try`.
+ */
+ Instruction getExceptionSuccessorInstruction() {
+ result = getParent().getExceptionSuccessorInstruction()
+ }
+
+ /**
+ * Gets the primary instruction for the side effect instruction that was
+ * generated by this element for tag `tag`.
+ */
+ Instruction getPrimaryInstructionForSideEffect(InstructionTag tag) { none() }
+
+ /**
+ * Holds if this element generates a temporary variable with type `type`.
+ * `tag` must be unique for each variable generated from the same AST node
+ * (not just from the same `TranslatedElement`).
+ */
+ predicate hasTempVariable(TempVariableTag tag, CppType type) { none() }
+
+ /**
+ * If the instruction specified by `tag` is a `FunctionInstruction`, gets the
+ * `Function` for that instruction.
+ */
+ Function getInstructionFunction(InstructionTag tag) { none() }
+
+ /**
+ * If the instruction specified by `tag` is a `VariableInstruction`, gets the
+ * `IRVariable` for that instruction.
+ */
+ IRVariable getInstructionVariable(InstructionTag tag) { none() }
+
+ /**
+ * If the instruction specified by `tag` is a `FieldInstruction`, gets the
+ * `Field` for that instruction.
+ */
+ Field getInstructionField(InstructionTag tag) { none() }
+
+ /**
+ * If the instruction specified by `tag` is a `ConstantValueInstruction`, gets
+ * the constant value for that instruction.
+ */
+ string getInstructionConstantValue(InstructionTag tag) { none() }
+
+ /**
+ * If the instruction specified by `tag` is an `IndexedInstruction`, gets the
+ * index for that instruction.
+ */
+ int getInstructionIndex(InstructionTag tag) { none() }
+
+ /**
+ * If the instruction specified by `tag` is a `PointerArithmeticInstruction`,
+ * gets the size of the type pointed to by the pointer.
+ */
+ int getInstructionElementSize(InstructionTag tag) { none() }
+
+ /**
+ * Holds if the generated IR refers to an opaque type with size `byteSize`.
+ */
+ predicate needsUnknownOpaqueType(int byteSize) { none() }
+
+ /**
+ * If the instruction specified by `tag` is a `StringConstantInstruction`,
+ * gets the `StringLiteral` for that instruction.
+ */
+ StringLiteral getInstructionStringLiteral(InstructionTag tag) { none() }
+
+ /**
+ * If the instruction specified by `tag` is a `BuiltInInstruction`, gets the built-in operation.
+ */
+ BuiltInOperation getInstructionBuiltInOperation(InstructionTag tag) { none() }
+
+ /**
+ * If the instruction specified by `tag` is a `CatchByTypeInstruction`,
+ * gets the type of the exception to be caught.
+ */
+ CppType getInstructionExceptionType(InstructionTag tag) { none() }
+
+ /**
+ * If the instruction specified by `tag` is an `InheritanceConversionInstruction`,
+ * gets the inheritance relationship for that instruction.
+ */
+ predicate getInstructionInheritance(InstructionTag tag, Class baseClass, Class derivedClass) {
+ none()
+ }
+
+ /**
+ * Gets the instruction whose result is consumed as an operand of the
+ * instruction specified by `tag`, with the operand specified by `operandTag`.
+ */
+ Instruction getInstructionRegisterOperand(InstructionTag tag, OperandTag operandTag) { none() }
+
+ /**
+ * Gets the type of the memory operand specified by `operandTag` on the the instruction specified by `tag`.
+ */
+ CppType getInstructionMemoryOperandType(InstructionTag tag, TypedOperandTag operandTag) { none() }
+
+ /**
+ * Gets the size of the memory operand specified by `operandTag` on the the instruction specified by `tag`.
+ * Only holds for operands whose type is `UnknownType`.
+ */
+ int getInstructionOperandSize(InstructionTag tag, SideEffectOperandTag operandTag) { none() }
+
+ /**
+ * Gets the instruction generated by this element with tag `tag`.
+ */
+ final Instruction getInstruction(InstructionTag tag) {
+ getInstructionTranslatedElement(result) = this and
+ getInstructionTag(result) = tag
+ }
+
+ /**
+ * Gets the temporary variable generated by this element with tag `tag`.
+ */
+ final IRTempVariable getTempVariable(TempVariableTag tag) {
+ exists(Locatable ast |
+ result.getAST() = ast and
+ result.getTag() = tag and
+ hasTempVariableAndAST(tag, ast)
+ )
+ }
+
+ pragma[noinline]
+ private predicate hasTempVariableAndAST(TempVariableTag tag, Locatable ast) {
+ hasTempVariable(tag, _) and
+ ast = getAST()
+ }
+
+ /**
+ * Gets the parent element of this element.
+ */
+ final TranslatedElement getParent() { result.getAChild() = this }
+}
diff --git a/repo-tests/codeql/cpp/ql/lib/semmle/code/cpp/ir/implementation/raw/internal/TranslatedExpr.qll b/repo-tests/codeql/cpp/ql/lib/semmle/code/cpp/ir/implementation/raw/internal/TranslatedExpr.qll
new file mode 100644
index 00000000000..a9f408bf161
--- /dev/null
+++ b/repo-tests/codeql/cpp/ql/lib/semmle/code/cpp/ir/implementation/raw/internal/TranslatedExpr.qll
@@ -0,0 +1,3048 @@
+private import cpp
+private import semmle.code.cpp.ir.implementation.IRType
+private import semmle.code.cpp.ir.implementation.Opcode
+private import semmle.code.cpp.ir.implementation.internal.OperandTag
+private import semmle.code.cpp.ir.internal.CppType
+private import semmle.code.cpp.ir.internal.TempVariableTag
+private import InstructionTag
+private import TranslatedCondition
+private import TranslatedDeclarationEntry
+private import TranslatedElement
+private import TranslatedFunction
+private import TranslatedInitialization
+private import TranslatedFunction
+private import TranslatedStmt
+import TranslatedCall
+
+/**
+ * Gets the TranslatedExpr for the specified expression. If `expr` is a load or synthesized
+ * temporary object, the result is the TranslatedExpr for the load or synthetic temporary object
+ * portion.
+ */
+TranslatedExpr getTranslatedExpr(Expr expr) {
+ result.getExpr() = expr and
+ result.producesExprResult()
+}
+
+/**
+ * The IR translation of some part of an expression.
+ * A single `Expr` may consist of multiple `TranslatedExpr` objects. Every
+ * `Expr` has a single `TranslatedCoreExpr`, which produces the result of the
+ * expression before any implicit lvalue-to-rvalue conversion. Any expression
+ * with an lvalue-to-rvalue conversion will also have a `TranslatedLoad` to
+ * perform that conversion on the original result. A few expressions have
+ * additional `TranslatedExpr` objects that compute intermediate values, such
+ * as the `TranslatedAllocatorCall` and `TranslatedAllocationSize` within the
+ * translation of a `NewExpr`.
+ */
+abstract class TranslatedExpr extends TranslatedElement {
+ Expr expr;
+
+ /**
+ * Gets the instruction that produces the result of the expression.
+ */
+ abstract Instruction getResult();
+
+ /**
+ * Holds if this `TranslatedExpr` produces the final result of the original
+ * expression from the AST.
+ *
+ * For example, in `y = x;`, the TranslatedLoad for the VariableAccess `x`
+ * produces the result of that VariableAccess expression, but the
+ * TranslatedVariableAccess for `x` does not. The TranslatedVariableAccess
+ * for `y` does produce its result, however, because there is no load on `y`.
+ */
+ abstract predicate producesExprResult();
+
+ final CppType getResultType() {
+ if isResultGLValue()
+ then result = getTypeForGLValue(expr.getType())
+ else result = getTypeForPRValue(expr.getType())
+ }
+
+ /**
+ * Holds if the result of this `TranslatedExpr` is a glvalue.
+ */
+ predicate isResultGLValue() {
+ // This implementation is overridden in `TranslatedCoreExpr` to mark them as
+ // glvalues if they have loads on them. It's also overridden in
+ // `TranslatedLoad` to always mark loads as glvalues since a
+ // `TranslatedLoad` may have been created as a result of
+ // `needsLoadForParentExpr`. It's not overridden in `TranslatedResultCopy`
+ // since result copies never have loads.
+ expr.isGLValueCategory()
+ }
+
+ final override Locatable getAST() { result = expr }
+
+ final override Function getFunction() { result = expr.getEnclosingFunction() }
+
+ /**
+ * Gets the expression from which this `TranslatedExpr` is generated.
+ */
+ final Expr getExpr() { result = expr }
+
+ /**
+ * Gets the `TranslatedFunction` containing this expression.
+ */
+ final TranslatedFunction getEnclosingFunction() {
+ result = getTranslatedFunction(expr.getEnclosingFunction())
+ }
+}
+
+/**
+ * The IR translation of the "core" part of an expression. This is the part of
+ * the expression that produces the result value of the expression, before any
+ * lvalue-to-rvalue conversion on the result. Every expression has a single
+ * `TranslatedCoreExpr`.
+ */
+abstract class TranslatedCoreExpr extends TranslatedExpr {
+ final override string toString() { result = expr.toString() }
+
+ /**
+ * Holds if the result of this `TranslatedExpr` is a glvalue.
+ */
+ override predicate isResultGLValue() {
+ super.isResultGLValue()
+ or
+ // If this TranslatedExpr doesn't produce the result, then it must represent
+ // a glvalue that is then loaded by a TranslatedLoad.
+ hasTranslatedLoad(expr)
+ or
+ // The expression should be treated as a glvalue because its operand was forced to be a glvalue,
+ // such as for the qualifier of a member access.
+ isPRValueConversionOnGLValue(expr)
+ }
+
+ final override predicate producesExprResult() {
+ // If there's no load or temp object, then this is the only TranslatedExpr for this
+ // expression.
+ not hasTranslatedLoad(expr) and
+ not hasTranslatedSyntheticTemporaryObject(expr) and
+ // If there's a result copy, then this expression's result is the copy.
+ not exprNeedsCopyIfNotLoaded(expr)
+ }
+}
+
+class TranslatedConditionValue extends TranslatedCoreExpr, ConditionContext,
+ TTranslatedConditionValue {
+ TranslatedConditionValue() { this = TTranslatedConditionValue(expr) }
+
+ override TranslatedElement getChild(int id) { id = 0 and result = getCondition() }
+
+ override Instruction getFirstInstruction() { result = getCondition().getFirstInstruction() }
+
+ override predicate hasInstruction(Opcode opcode, InstructionTag tag, CppType resultType) {
+ (
+ tag = ConditionValueTrueTempAddressTag() or
+ tag = ConditionValueFalseTempAddressTag() or
+ tag = ConditionValueResultTempAddressTag()
+ ) and
+ opcode instanceof Opcode::VariableAddress and
+ resultType = getTypeForGLValue(expr.getType())
+ or
+ (
+ tag = ConditionValueTrueConstantTag() or
+ tag = ConditionValueFalseConstantTag()
+ ) and
+ opcode instanceof Opcode::Constant and
+ resultType = getResultType()
+ or
+ (
+ tag = ConditionValueTrueStoreTag() or
+ tag = ConditionValueFalseStoreTag()
+ ) and
+ opcode instanceof Opcode::Store and
+ resultType = getResultType()
+ or
+ tag = ConditionValueResultLoadTag() and
+ opcode instanceof Opcode::Load and
+ resultType = getResultType()
+ }
+
+ override Instruction getInstructionSuccessor(InstructionTag tag, EdgeKind kind) {
+ kind instanceof GotoEdge and
+ (
+ tag = ConditionValueTrueTempAddressTag() and
+ result = getInstruction(ConditionValueTrueConstantTag())
+ or
+ tag = ConditionValueTrueConstantTag() and
+ result = getInstruction(ConditionValueTrueStoreTag())
+ or
+ tag = ConditionValueTrueStoreTag() and
+ result = getInstruction(ConditionValueResultTempAddressTag())
+ or
+ tag = ConditionValueFalseTempAddressTag() and
+ result = getInstruction(ConditionValueFalseConstantTag())
+ or
+ tag = ConditionValueFalseConstantTag() and
+ result = getInstruction(ConditionValueFalseStoreTag())
+ or
+ tag = ConditionValueFalseStoreTag() and
+ result = getInstruction(ConditionValueResultTempAddressTag())
+ or
+ tag = ConditionValueResultTempAddressTag() and
+ result = getInstruction(ConditionValueResultLoadTag())
+ or
+ tag = ConditionValueResultLoadTag() and
+ result = getParent().getChildSuccessor(this)
+ )
+ }
+
+ override Instruction getInstructionRegisterOperand(InstructionTag tag, OperandTag operandTag) {
+ tag = ConditionValueTrueStoreTag() and
+ (
+ operandTag instanceof AddressOperandTag and
+ result = getInstruction(ConditionValueTrueTempAddressTag())
+ or
+ operandTag instanceof StoreValueOperandTag and
+ result = getInstruction(ConditionValueTrueConstantTag())
+ )
+ or
+ tag = ConditionValueFalseStoreTag() and
+ (
+ operandTag instanceof AddressOperandTag and
+ result = getInstruction(ConditionValueFalseTempAddressTag())
+ or
+ operandTag instanceof StoreValueOperandTag and
+ result = getInstruction(ConditionValueFalseConstantTag())
+ )
+ or
+ tag = ConditionValueResultLoadTag() and
+ (
+ operandTag instanceof AddressOperandTag and
+ result = getInstruction(ConditionValueResultTempAddressTag())
+ )
+ }
+
+ override predicate hasTempVariable(TempVariableTag tag, CppType type) {
+ tag = ConditionValueTempVar() and
+ type = getTypeForPRValue(expr.getType())
+ }
+
+ override IRVariable getInstructionVariable(InstructionTag tag) {
+ (
+ tag = ConditionValueTrueTempAddressTag() or
+ tag = ConditionValueFalseTempAddressTag() or
+ tag = ConditionValueResultTempAddressTag()
+ ) and
+ result = getTempVariable(ConditionValueTempVar())
+ }
+
+ override string getInstructionConstantValue(InstructionTag tag) {
+ tag = ConditionValueTrueConstantTag() and result = "1"
+ or
+ tag = ConditionValueFalseConstantTag() and result = "0"
+ }
+
+ override Instruction getResult() { result = getInstruction(ConditionValueResultLoadTag()) }
+
+ override Instruction getChildSuccessor(TranslatedElement child) { none() }
+
+ override Instruction getChildTrueSuccessor(TranslatedCondition child) {
+ child = getCondition() and
+ result = getInstruction(ConditionValueTrueTempAddressTag())
+ }
+
+ override Instruction getChildFalseSuccessor(TranslatedCondition child) {
+ child = getCondition() and
+ result = getInstruction(ConditionValueFalseTempAddressTag())
+ }
+
+ private TranslatedCondition getCondition() { result = getTranslatedCondition(expr) }
+}
+
+/**
+ * The IR translation of a node synthesized to adjust the value category of its operand.
+ * One of:
+ * - `TranslatedLoad` - Convert from glvalue to prvalue by loading from the location.
+ * - `TranslatedSyntheticTemporaryObject` - Convert from prvalue to glvalue by storing to a
+ * temporary variable.
+ */
+abstract class TranslatedValueCategoryAdjustment extends TranslatedExpr {
+ final override Instruction getFirstInstruction() { result = getOperand().getFirstInstruction() }
+
+ final override TranslatedElement getChild(int id) { id = 0 and result = getOperand() }
+
+ final override predicate producesExprResult() {
+ // A temp object always produces the result of the expression.
+ any()
+ }
+
+ final TranslatedCoreExpr getOperand() { result.getExpr() = expr }
+}
+
+/**
+ * IR translation of an implicit lvalue-to-rvalue conversion on the result of
+ * an expression.
+ */
+class TranslatedLoad extends TranslatedValueCategoryAdjustment, TTranslatedLoad {
+ TranslatedLoad() { this = TTranslatedLoad(expr) }
+
+ override string toString() { result = "Load of " + expr.toString() }
+
+ override predicate hasInstruction(Opcode opcode, InstructionTag tag, CppType resultType) {
+ tag = LoadTag() and
+ opcode instanceof Opcode::Load and
+ resultType = getResultType()
+ }
+
+ override predicate isResultGLValue() { none() }
+
+ override Instruction getInstructionSuccessor(InstructionTag tag, EdgeKind kind) {
+ tag = LoadTag() and
+ result = getParent().getChildSuccessor(this) and
+ kind instanceof GotoEdge
+ }
+
+ override Instruction getChildSuccessor(TranslatedElement child) {
+ child = getOperand() and result = getInstruction(LoadTag())
+ }
+
+ override Instruction getResult() { result = getInstruction(LoadTag()) }
+
+ override Instruction getInstructionRegisterOperand(InstructionTag tag, OperandTag operandTag) {
+ tag = LoadTag() and
+ (
+ operandTag instanceof AddressOperandTag and
+ result = getOperand().getResult()
+ )
+ }
+}
+
+/**
+ * The IR translation of a temporary object synthesized by the IR to hold a class prvalue on which
+ * a member access is going to be performed. This differs from `TranslatedTemporaryObjectExpr` in
+ * that instances of `TranslatedSyntheticTemporaryObject` are synthesized during IR construction,
+ * whereas `TranslatedTemporaryObjectExpr` instances are created from `TemporaryObjectExpr` nodes
+ * from the AST.
+ */
+class TranslatedSyntheticTemporaryObject extends TranslatedValueCategoryAdjustment,
+ TTranslatedSyntheticTemporaryObject {
+ TranslatedSyntheticTemporaryObject() { this = TTranslatedSyntheticTemporaryObject(expr) }
+
+ override string toString() { result = "Temporary materialization of " + expr.toString() }
+
+ override predicate hasInstruction(Opcode opcode, InstructionTag tag, CppType resultType) {
+ tag = InitializerVariableAddressTag() and
+ opcode instanceof Opcode::VariableAddress and
+ resultType = getTypeForGLValue(expr.getType())
+ or
+ tag = InitializerStoreTag() and
+ opcode instanceof Opcode::Store and
+ resultType = getTypeForPRValue(expr.getType())
+ }
+
+ override predicate isResultGLValue() { any() }
+
+ override Instruction getInstructionSuccessor(InstructionTag tag, EdgeKind kind) {
+ tag = InitializerVariableAddressTag() and
+ result = getInstruction(InitializerStoreTag()) and
+ kind instanceof GotoEdge
+ or
+ tag = InitializerStoreTag() and
+ result = getParent().getChildSuccessor(this) and
+ kind instanceof GotoEdge
+ }
+
+ override Instruction getChildSuccessor(TranslatedElement child) {
+ child = getOperand() and result = getInstruction(InitializerVariableAddressTag())
+ }
+
+ override Instruction getResult() { result = getInstruction(InitializerVariableAddressTag()) }
+
+ override Instruction getInstructionRegisterOperand(InstructionTag tag, OperandTag operandTag) {
+ tag = InitializerStoreTag() and
+ (
+ operandTag instanceof AddressOperandTag and
+ result = getInstruction(InitializerVariableAddressTag())
+ or
+ operandTag instanceof StoreValueOperandTag and
+ result = getOperand().getResult()
+ )
+ }
+
+ final override predicate hasTempVariable(TempVariableTag tag, CppType type) {
+ tag = TempObjectTempVar() and
+ type = getTypeForPRValue(expr.getType())
+ }
+
+ final override IRVariable getInstructionVariable(InstructionTag tag) {
+ tag = InitializerVariableAddressTag() and
+ result = getIRTempVariable(expr, TempObjectTempVar())
+ }
+}
+
+/**
+ * IR translation of an expression that simply returns its result. We generate an otherwise useless
+ * `CopyValue` instruction for these expressions so that there is at least one instruction
+ * associated with the expression.
+ */
+class TranslatedResultCopy extends TranslatedExpr, TTranslatedResultCopy {
+ TranslatedResultCopy() { this = TTranslatedResultCopy(expr) }
+
+ override string toString() { result = "Result of " + expr.toString() }
+
+ override Instruction getFirstInstruction() { result = getOperand().getFirstInstruction() }
+
+ override TranslatedElement getChild(int id) { id = 0 and result = getOperand() }
+
+ override predicate hasInstruction(Opcode opcode, InstructionTag tag, CppType resultType) {
+ tag = ResultCopyTag() and
+ opcode instanceof Opcode::CopyValue and
+ resultType = getOperand().getResultType()
+ }
+
+ override Instruction getInstructionSuccessor(InstructionTag tag, EdgeKind kind) {
+ tag = ResultCopyTag() and
+ result = getParent().getChildSuccessor(this) and
+ kind instanceof GotoEdge
+ }
+
+ override Instruction getChildSuccessor(TranslatedElement child) {
+ child = getOperand() and result = getInstruction(ResultCopyTag())
+ }
+
+ override Instruction getResult() { result = getInstruction(ResultCopyTag()) }
+
+ override Instruction getInstructionRegisterOperand(InstructionTag tag, OperandTag operandTag) {
+ tag = ResultCopyTag() and
+ operandTag instanceof UnaryOperandTag and
+ result = getOperand().getResult()
+ }
+
+ final override predicate producesExprResult() { any() }
+
+ private TranslatedCoreExpr getOperand() { result.getExpr() = expr }
+}
+
+class TranslatedCommaExpr extends TranslatedNonConstantExpr {
+ override CommaExpr expr;
+
+ override Instruction getFirstInstruction() { result = getLeftOperand().getFirstInstruction() }
+
+ override TranslatedElement getChild(int id) {
+ id = 0 and result = getLeftOperand()
+ or
+ id = 1 and result = getRightOperand()
+ }
+
+ override Instruction getResult() { result = getRightOperand().getResult() }
+
+ override Instruction getInstructionSuccessor(InstructionTag tag, EdgeKind kind) { none() }
+
+ override Instruction getChildSuccessor(TranslatedElement child) {
+ child = getLeftOperand() and
+ result = getRightOperand().getFirstInstruction()
+ or
+ child = getRightOperand() and result = getParent().getChildSuccessor(this)
+ }
+
+ override predicate hasInstruction(Opcode opcode, InstructionTag tag, CppType resultType) {
+ none()
+ }
+
+ override Instruction getInstructionRegisterOperand(InstructionTag tag, OperandTag operandTag) {
+ none()
+ }
+
+ private TranslatedExpr getLeftOperand() {
+ result = getTranslatedExpr(expr.getLeftOperand().getFullyConverted())
+ }
+
+ private TranslatedExpr getRightOperand() {
+ result = getTranslatedExpr(expr.getRightOperand().getFullyConverted())
+ }
+}
+
+private int getElementSize(Type type) {
+ result = max(type.getUnspecifiedType().(PointerType).getBaseType().getSize())
+}
+
+abstract class TranslatedCrementOperation extends TranslatedNonConstantExpr {
+ override CrementOperation expr;
+
+ final override TranslatedElement getChild(int id) { id = 0 and result = getLoadedOperand() }
+
+ final override string getInstructionConstantValue(InstructionTag tag) {
+ tag = CrementConstantTag() and
+ exists(Type resultType |
+ resultType = expr.getUnspecifiedType() and
+ (
+ resultType instanceof IntegralOrEnumType and result = "1"
+ or
+ resultType instanceof FloatingPointType and result = "1.0"
+ or
+ resultType instanceof PointerType and result = "1"
+ )
+ )
+ }
+
+ private CppType getConstantType() {
+ exists(Type resultType |
+ resultType = expr.getUnspecifiedType() and
+ (
+ resultType instanceof ArithmeticType and
+ result = getTypeForPRValue(expr.getType())
+ or
+ resultType instanceof PointerType and result = getIntType()
+ )
+ )
+ }
+
+ final override predicate hasInstruction(Opcode opcode, InstructionTag tag, CppType resultType) {
+ tag = CrementConstantTag() and
+ opcode instanceof Opcode::Constant and
+ resultType = getConstantType()
+ or
+ tag = CrementOpTag() and
+ opcode = getOpcode() and
+ resultType = getTypeForPRValue(expr.getType())
+ or
+ tag = CrementStoreTag() and
+ opcode instanceof Opcode::Store and
+ resultType = getTypeForPRValue(expr.getType())
+ }
+
+ final override Instruction getInstructionRegisterOperand(InstructionTag tag, OperandTag operandTag) {
+ tag = CrementOpTag() and
+ (
+ operandTag instanceof LeftOperandTag and
+ result = getLoadedOperand().getResult()
+ or
+ operandTag instanceof RightOperandTag and
+ result = getInstruction(CrementConstantTag())
+ )
+ or
+ tag = CrementStoreTag() and
+ (
+ operandTag instanceof AddressOperandTag and
+ result = getUnloadedOperand().getResult()
+ or
+ operandTag instanceof StoreValueOperandTag and
+ result = getInstruction(CrementOpTag())
+ )
+ }
+
+ final override Instruction getFirstInstruction() {
+ result = getLoadedOperand().getFirstInstruction()
+ }
+
+ final override Instruction getInstructionSuccessor(InstructionTag tag, EdgeKind kind) {
+ kind instanceof GotoEdge and
+ (
+ tag = CrementConstantTag() and
+ result = getInstruction(CrementOpTag())
+ or
+ tag = CrementOpTag() and
+ result = getInstruction(CrementStoreTag())
+ or
+ tag = CrementStoreTag() and
+ result = getParent().getChildSuccessor(this)
+ )
+ }
+
+ final override Instruction getChildSuccessor(TranslatedElement child) {
+ child = getLoadedOperand() and result = getInstruction(CrementConstantTag())
+ }
+
+ final override int getInstructionElementSize(InstructionTag tag) {
+ tag = CrementOpTag() and
+ (
+ getOpcode() instanceof Opcode::PointerAdd or
+ getOpcode() instanceof Opcode::PointerSub
+ ) and
+ result = getElementSize(expr.getType())
+ }
+
+ /**
+ * Gets the `TranslatedLoad` on the `e` in this `e++`, which is the element
+ * that holds the value to be cremented. It's guaranteed that there's a load
+ * on `e` because of the `needsLoadForParentExpr` predicate.
+ */
+ final TranslatedLoad getLoadedOperand() {
+ result = getTranslatedExpr(expr.getOperand().getFullyConverted())
+ }
+
+ /**
+ * Gets the address to which the result of this crement will be stored.
+ */
+ final TranslatedExpr getUnloadedOperand() { result = getLoadedOperand().getOperand() }
+
+ final Opcode getOpcode() {
+ exists(Type resultType |
+ resultType = expr.getUnspecifiedType() and
+ (
+ (
+ expr instanceof IncrementOperation and
+ if resultType instanceof PointerType
+ then result instanceof Opcode::PointerAdd
+ else result instanceof Opcode::Add
+ )
+ or
+ (
+ expr instanceof DecrementOperation and
+ if resultType instanceof PointerType
+ then result instanceof Opcode::PointerSub
+ else result instanceof Opcode::Sub
+ )
+ )
+ )
+ }
+}
+
+class TranslatedPrefixCrementOperation extends TranslatedCrementOperation {
+ override PrefixCrementOperation expr;
+
+ override Instruction getResult() {
+ if expr.isPRValueCategory()
+ then
+ // If this is C, then the result of a prefix crement is a prvalue for the
+ // new value assigned to the operand. If this is C++, then the result is
+ // an lvalue, but that lvalue is being loaded as part of this expression.
+ // EDG doesn't mark this as a load.
+ result = getInstruction(CrementOpTag())
+ else
+ // This is C++, where the result is an lvalue for the operand, and that
+ // lvalue is not being loaded as part of this expression.
+ result = getUnloadedOperand().getResult()
+ }
+}
+
+class TranslatedPostfixCrementOperation extends TranslatedCrementOperation {
+ override PostfixCrementOperation expr;
+
+ override Instruction getResult() { result = getLoadedOperand().getResult() }
+}
+
+/**
+ * IR translation of an array access expression (e.g. `a[i]`). The array being accessed will either
+ * be a prvalue of pointer type (possibly due to an implicit array-to-pointer conversion), or a
+ * glvalue of a GNU vector type.
+ */
+class TranslatedArrayExpr extends TranslatedNonConstantExpr {
+ override ArrayExpr expr;
+
+ final override Instruction getFirstInstruction() {
+ result = getBaseOperand().getFirstInstruction()
+ }
+
+ final override TranslatedElement getChild(int id) {
+ id = 0 and result = getBaseOperand()
+ or
+ id = 1 and result = getOffsetOperand()
+ }
+
+ override Instruction getInstructionSuccessor(InstructionTag tag, EdgeKind kind) {
+ tag = OnlyInstructionTag() and
+ result = getParent().getChildSuccessor(this) and
+ kind instanceof GotoEdge
+ }
+
+ override Instruction getChildSuccessor(TranslatedElement child) {
+ child = getBaseOperand() and
+ result = getOffsetOperand().getFirstInstruction()
+ or
+ child = getOffsetOperand() and
+ result = getInstruction(OnlyInstructionTag())
+ }
+
+ override Instruction getResult() { result = getInstruction(OnlyInstructionTag()) }
+
+ override predicate hasInstruction(Opcode opcode, InstructionTag tag, CppType resultType) {
+ tag = OnlyInstructionTag() and
+ opcode instanceof Opcode::PointerAdd and
+ resultType = getTypeForGLValue(expr.getType())
+ }
+
+ override Instruction getInstructionRegisterOperand(InstructionTag tag, OperandTag operandTag) {
+ tag = OnlyInstructionTag() and
+ (
+ operandTag instanceof LeftOperandTag and
+ result = getBaseOperand().getResult()
+ or
+ operandTag instanceof RightOperandTag and
+ result = getOffsetOperand().getResult()
+ )
+ }
+
+ override int getInstructionElementSize(InstructionTag tag) {
+ tag = OnlyInstructionTag() and
+ result = max(expr.getUnspecifiedType().getSize())
+ }
+
+ private TranslatedExpr getBaseOperand() {
+ result = getTranslatedExpr(expr.getArrayBase().getFullyConverted())
+ }
+
+ private TranslatedExpr getOffsetOperand() {
+ result = getTranslatedExpr(expr.getArrayOffset().getFullyConverted())
+ }
+}
+
+abstract class TranslatedTransparentExpr extends TranslatedNonConstantExpr {
+ final override Instruction getFirstInstruction() { result = getOperand().getFirstInstruction() }
+
+ final override TranslatedElement getChild(int id) { id = 0 and result = getOperand() }
+
+ final override Instruction getInstructionSuccessor(InstructionTag tag, EdgeKind kind) { none() }
+
+ final override Instruction getChildSuccessor(TranslatedElement child) {
+ child = getOperand() and result = getParent().getChildSuccessor(this)
+ }
+
+ final override predicate hasInstruction(Opcode opcode, InstructionTag tag, CppType resultType) {
+ none()
+ }
+
+ final override Instruction getResult() { result = getOperand().getResult() }
+
+ final override Instruction getInstructionRegisterOperand(InstructionTag tag, OperandTag operandTag) {
+ none()
+ }
+
+ abstract TranslatedExpr getOperand();
+}
+
+class TranslatedTransparentUnaryOperation extends TranslatedTransparentExpr {
+ override UnaryOperation expr;
+
+ TranslatedTransparentUnaryOperation() {
+ (
+ // *p is the same as p until the result is loaded.
+ expr instanceof PointerDereferenceExpr or
+ // &x is the same as x. &x isn't loadable, but is included
+ // here to avoid having two nearly identical classes.
+ expr instanceof AddressOfExpr or
+ expr instanceof BuiltInOperationBuiltInAddressOf
+ )
+ }
+
+ override TranslatedExpr getOperand() {
+ result = getTranslatedExpr(expr.getOperand().getFullyConverted())
+ }
+}
+
+class TranslatedTransparentConversion extends TranslatedTransparentExpr {
+ override Conversion expr;
+
+ TranslatedTransparentConversion() {
+ (
+ expr instanceof ParenthesisExpr or
+ expr instanceof ReferenceDereferenceExpr or
+ expr instanceof ReferenceToExpr
+ )
+ }
+
+ override TranslatedExpr getOperand() { result = getTranslatedExpr(expr.getExpr()) }
+}
+
+class TranslatedThisExpr extends TranslatedNonConstantExpr {
+ override ThisExpr expr;
+
+ final override TranslatedElement getChild(int id) { none() }
+
+ final override predicate hasInstruction(Opcode opcode, InstructionTag tag, CppType resultType) {
+ tag = ThisAddressTag() and
+ opcode instanceof Opcode::VariableAddress and
+ resultType = getTypeForGLValue(any(UnknownType t))
+ or
+ tag = ThisLoadTag() and
+ opcode instanceof Opcode::Load and
+ resultType = getResultType()
+ }
+
+ final override Instruction getResult() { result = getInstruction(ThisLoadTag()) }
+
+ final override Instruction getFirstInstruction() { result = getInstruction(ThisAddressTag()) }
+
+ final override Instruction getInstructionSuccessor(InstructionTag tag, EdgeKind kind) {
+ kind instanceof GotoEdge and
+ tag = ThisAddressTag() and
+ result = getInstruction(ThisLoadTag())
+ or
+ kind instanceof GotoEdge and
+ tag = ThisLoadTag() and
+ result = getParent().getChildSuccessor(this)
+ }
+
+ final override Instruction getChildSuccessor(TranslatedElement child) { none() }
+
+ final override Instruction getInstructionRegisterOperand(InstructionTag tag, OperandTag operandTag) {
+ tag = ThisLoadTag() and
+ operandTag instanceof AddressOperandTag and
+ result = getInstruction(ThisAddressTag())
+ }
+
+ override IRVariable getInstructionVariable(InstructionTag tag) {
+ tag = ThisAddressTag() and
+ result = this.getEnclosingFunction().getThisVariable()
+ }
+}
+
+abstract class TranslatedVariableAccess extends TranslatedNonConstantExpr {
+ override VariableAccess expr;
+
+ final override TranslatedElement getChild(int id) {
+ id = 0 and result = getQualifier() // Might not exist
+ }
+
+ final TranslatedExpr getQualifier() {
+ result = getTranslatedExpr(expr.getQualifier().getFullyConverted())
+ }
+
+ override Instruction getResult() { result = getInstruction(OnlyInstructionTag()) }
+
+ final override Instruction getInstructionSuccessor(InstructionTag tag, EdgeKind kind) {
+ tag = OnlyInstructionTag() and
+ result = getParent().getChildSuccessor(this) and
+ kind instanceof GotoEdge
+ }
+
+ final override Instruction getChildSuccessor(TranslatedElement child) {
+ child = getQualifier() and result = getInstruction(OnlyInstructionTag())
+ }
+}
+
+class TranslatedNonFieldVariableAccess extends TranslatedVariableAccess {
+ TranslatedNonFieldVariableAccess() { not expr instanceof FieldAccess }
+
+ override Instruction getFirstInstruction() {
+ if exists(getQualifier())
+ then result = getQualifier().getFirstInstruction()
+ else result = getInstruction(OnlyInstructionTag())
+ }
+
+ override Instruction getInstructionRegisterOperand(InstructionTag tag, OperandTag operandTag) {
+ none()
+ }
+
+ override predicate hasInstruction(Opcode opcode, InstructionTag tag, CppType resultType) {
+ tag = OnlyInstructionTag() and
+ opcode instanceof Opcode::VariableAddress and
+ resultType = getTypeForGLValue(expr.getType())
+ }
+
+ override IRVariable getInstructionVariable(InstructionTag tag) {
+ tag = OnlyInstructionTag() and
+ result = getIRUserVariable(expr.getEnclosingFunction(), expr.getTarget())
+ }
+}
+
+class TranslatedFieldAccess extends TranslatedVariableAccess {
+ override FieldAccess expr;
+
+ override Instruction getFirstInstruction() { result = getQualifier().getFirstInstruction() }
+
+ override Instruction getInstructionRegisterOperand(InstructionTag tag, OperandTag operandTag) {
+ tag = OnlyInstructionTag() and
+ operandTag instanceof UnaryOperandTag and
+ result = getQualifier().getResult()
+ }
+
+ override predicate hasInstruction(Opcode opcode, InstructionTag tag, CppType resultType) {
+ tag = OnlyInstructionTag() and
+ opcode instanceof Opcode::FieldAddress and
+ resultType = getTypeForGLValue(expr.getType())
+ }
+
+ override Field getInstructionField(InstructionTag tag) {
+ tag = OnlyInstructionTag() and
+ result = expr.getTarget()
+ }
+}
+
+class TranslatedFunctionAccess extends TranslatedNonConstantExpr {
+ override FunctionAccess expr;
+
+ override TranslatedElement getChild(int id) { none() }
+
+ override Instruction getFirstInstruction() { result = getInstruction(OnlyInstructionTag()) }
+
+ override Instruction getResult() { result = getInstruction(OnlyInstructionTag()) }
+
+ final override Instruction getInstructionSuccessor(InstructionTag tag, EdgeKind kind) {
+ tag = OnlyInstructionTag() and
+ result = getParent().getChildSuccessor(this) and
+ kind instanceof GotoEdge
+ }
+
+ override predicate hasInstruction(Opcode opcode, InstructionTag tag, CppType resultType) {
+ tag = OnlyInstructionTag() and
+ opcode instanceof Opcode::FunctionAddress and
+ resultType = getResultType()
+ }
+
+ override Function getInstructionFunction(InstructionTag tag) {
+ tag = OnlyInstructionTag() and
+ result = expr.getTarget()
+ }
+
+ override Instruction getChildSuccessor(TranslatedElement child) { none() }
+}
+
+/**
+ * IR translation of an expression whose value is not known at compile time.
+ */
+abstract class TranslatedNonConstantExpr extends TranslatedCoreExpr, TTranslatedValueExpr {
+ TranslatedNonConstantExpr() {
+ this = TTranslatedValueExpr(expr) and
+ not isIRConstant(expr)
+ }
+}
+
+/**
+ * IR translation of an expression with a compile-time constant value. This
+ * includes not only literals, but also "integral constant expressions" (e.g.
+ * `1 + 2`).
+ */
+abstract class TranslatedConstantExpr extends TranslatedCoreExpr, TTranslatedValueExpr {
+ TranslatedConstantExpr() {
+ this = TTranslatedValueExpr(expr) and
+ isIRConstant(expr)
+ }
+
+ final override Instruction getFirstInstruction() { result = getInstruction(OnlyInstructionTag()) }
+
+ final override TranslatedElement getChild(int id) { none() }
+
+ final override Instruction getResult() { result = getInstruction(OnlyInstructionTag()) }
+
+ final override Instruction getInstructionRegisterOperand(InstructionTag tag, OperandTag operandTag) {
+ none()
+ }
+
+ final override predicate hasInstruction(Opcode opcode, InstructionTag tag, CppType resultType) {
+ tag = OnlyInstructionTag() and
+ opcode = getOpcode() and
+ resultType = getResultType()
+ }
+
+ final override Instruction getInstructionSuccessor(InstructionTag tag, EdgeKind kind) {
+ tag = OnlyInstructionTag() and
+ result = getParent().getChildSuccessor(this) and
+ kind instanceof GotoEdge
+ }
+
+ final override Instruction getChildSuccessor(TranslatedElement child) { none() }
+
+ abstract Opcode getOpcode();
+}
+
+class TranslatedArithmeticLiteral extends TranslatedConstantExpr {
+ TranslatedArithmeticLiteral() { not expr instanceof StringLiteral }
+
+ override string getInstructionConstantValue(InstructionTag tag) {
+ tag = OnlyInstructionTag() and
+ result = expr.getValue()
+ }
+
+ override Opcode getOpcode() { result instanceof Opcode::Constant }
+}
+
+class TranslatedStringLiteral extends TranslatedConstantExpr {
+ override StringLiteral expr;
+
+ override StringLiteral getInstructionStringLiteral(InstructionTag tag) {
+ tag = OnlyInstructionTag() and
+ result = expr
+ }
+
+ override Opcode getOpcode() { result instanceof Opcode::StringConstant }
+}
+
+/**
+ * IR translation of an expression that performs a single operation on its
+ * operands and returns the result.
+ */
+abstract class TranslatedSingleInstructionExpr extends TranslatedNonConstantExpr {
+ /**
+ * Gets the `Opcode` of the operation to be performed.
+ */
+ abstract Opcode getOpcode();
+
+ final override predicate hasInstruction(Opcode opcode, InstructionTag tag, CppType resultType) {
+ opcode = getOpcode() and
+ tag = OnlyInstructionTag() and
+ resultType = getResultType()
+ }
+
+ final override Instruction getResult() { result = getInstruction(OnlyInstructionTag()) }
+}
+
+class TranslatedUnaryExpr extends TranslatedSingleInstructionExpr {
+ TranslatedUnaryExpr() {
+ expr instanceof NotExpr or
+ expr instanceof ComplementExpr or
+ expr instanceof UnaryPlusExpr or
+ expr instanceof UnaryMinusExpr
+ }
+
+ final override Instruction getFirstInstruction() { result = getOperand().getFirstInstruction() }
+
+ final override TranslatedElement getChild(int id) { id = 0 and result = getOperand() }
+
+ final override Instruction getInstructionSuccessor(InstructionTag tag, EdgeKind kind) {
+ tag = OnlyInstructionTag() and
+ result = getParent().getChildSuccessor(this) and
+ kind instanceof GotoEdge
+ }
+
+ final override Instruction getChildSuccessor(TranslatedElement child) {
+ child = getOperand() and result = getInstruction(OnlyInstructionTag())
+ }
+
+ final override Instruction getInstructionRegisterOperand(InstructionTag tag, OperandTag operandTag) {
+ tag = OnlyInstructionTag() and
+ result = getOperand().getResult() and
+ operandTag instanceof UnaryOperandTag
+ }
+
+ final override Opcode getOpcode() {
+ expr instanceof NotExpr and result instanceof Opcode::LogicalNot
+ or
+ expr instanceof ComplementExpr and result instanceof Opcode::BitComplement
+ or
+ expr instanceof UnaryPlusExpr and result instanceof Opcode::CopyValue
+ or
+ expr instanceof UnaryMinusExpr and result instanceof Opcode::Negate
+ }
+
+ private TranslatedExpr getOperand() {
+ result = getTranslatedExpr(expr.(UnaryOperation).getOperand().getFullyConverted())
+ }
+}
+
+abstract class TranslatedConversion extends TranslatedNonConstantExpr {
+ override Conversion expr;
+
+ override Instruction getFirstInstruction() { result = getOperand().getFirstInstruction() }
+
+ final override TranslatedElement getChild(int id) { id = 0 and result = getOperand() }
+
+ final TranslatedExpr getOperand() { result = getTranslatedExpr(expr.(Conversion).getExpr()) }
+}
+
+/**
+ * Represents the translation of a conversion expression that generates a
+ * single instruction.
+ */
+abstract class TranslatedSingleInstructionConversion extends TranslatedConversion {
+ override Instruction getInstructionSuccessor(InstructionTag tag, EdgeKind kind) {
+ tag = OnlyInstructionTag() and
+ result = getParent().getChildSuccessor(this) and
+ kind instanceof GotoEdge
+ }
+
+ override Instruction getChildSuccessor(TranslatedElement child) {
+ child = getOperand() and result = getInstruction(OnlyInstructionTag())
+ }
+
+ override predicate hasInstruction(Opcode opcode, InstructionTag tag, CppType resultType) {
+ tag = OnlyInstructionTag() and
+ opcode = getOpcode() and
+ resultType = getResultType()
+ }
+
+ override Instruction getResult() { result = getInstruction(OnlyInstructionTag()) }
+
+ override Instruction getInstructionRegisterOperand(InstructionTag tag, OperandTag operandTag) {
+ tag = OnlyInstructionTag() and
+ operandTag instanceof UnaryOperandTag and
+ result = getOperand().getResult()
+ }
+
+ /**
+ * Gets the opcode of the generated instruction.
+ */
+ abstract Opcode getOpcode();
+}
+
+/**
+ * Represents the translation of a conversion expression that generates a
+ * `Convert` instruction.
+ */
+class TranslatedSimpleConversion extends TranslatedSingleInstructionConversion {
+ TranslatedSimpleConversion() {
+ expr instanceof ArithmeticConversion or
+ expr instanceof PointerConversion or
+ expr instanceof PointerToMemberConversion or
+ expr instanceof PointerToIntegralConversion or
+ expr instanceof IntegralToPointerConversion or
+ expr instanceof GlvalueConversion or
+ expr instanceof ArrayToPointerConversion or
+ expr instanceof PrvalueAdjustmentConversion or
+ expr instanceof VoidConversion
+ }
+
+ override Opcode getOpcode() { result instanceof Opcode::Convert }
+}
+
+/**
+ * Represents the translation of a dynamic_cast expression.
+ */
+class TranslatedDynamicCast extends TranslatedSingleInstructionConversion {
+ TranslatedDynamicCast() { expr instanceof DynamicCast }
+
+ override Opcode getOpcode() {
+ exists(Type resultType |
+ resultType = expr.getUnspecifiedType() and
+ if resultType instanceof PointerType
+ then
+ if resultType.(PointerType).getBaseType() instanceof VoidType
+ then result instanceof Opcode::CompleteObjectAddress
+ else result instanceof Opcode::CheckedConvertOrNull
+ else result instanceof Opcode::CheckedConvertOrThrow
+ )
+ }
+}
+
+/**
+ * Represents the translation of a `BaseClassConversion` or `DerivedClassConversion`
+ * expression.
+ */
+class TranslatedInheritanceConversion extends TranslatedSingleInstructionConversion {
+ override InheritanceConversion expr;
+
+ override predicate getInstructionInheritance(
+ InstructionTag tag, Class baseClass, Class derivedClass
+ ) {
+ tag = OnlyInstructionTag() and
+ baseClass = expr.getBaseClass() and
+ derivedClass = expr.getDerivedClass()
+ }
+
+ override Opcode getOpcode() {
+ if expr instanceof BaseClassConversion
+ then
+ if expr.(BaseClassConversion).isVirtual()
+ then result instanceof Opcode::ConvertToVirtualBase
+ else result instanceof Opcode::ConvertToNonVirtualBase
+ else result instanceof Opcode::ConvertToDerived
+ }
+}
+
+/**
+ * Represents the translation of a `BoolConversion` expression, which generates
+ * a comparison with zero.
+ */
+class TranslatedBoolConversion extends TranslatedConversion {
+ override BoolConversion expr;
+
+ override Instruction getInstructionSuccessor(InstructionTag tag, EdgeKind kind) {
+ kind instanceof GotoEdge and
+ (
+ tag = BoolConversionConstantTag() and
+ result = getInstruction(BoolConversionCompareTag())
+ or
+ tag = BoolConversionCompareTag() and
+ result = getParent().getChildSuccessor(this)
+ )
+ }
+
+ override Instruction getChildSuccessor(TranslatedElement child) {
+ child = getOperand() and result = getInstruction(BoolConversionConstantTag())
+ }
+
+ override predicate hasInstruction(Opcode opcode, InstructionTag tag, CppType resultType) {
+ tag = BoolConversionConstantTag() and
+ opcode instanceof Opcode::Constant and
+ resultType = getOperand().getResultType()
+ or
+ tag = BoolConversionCompareTag() and
+ opcode instanceof Opcode::CompareNE and
+ resultType = getBoolType()
+ }
+
+ override Instruction getResult() { result = getInstruction(BoolConversionCompareTag()) }
+
+ override Instruction getInstructionRegisterOperand(InstructionTag tag, OperandTag operandTag) {
+ tag = BoolConversionCompareTag() and
+ (
+ operandTag instanceof LeftOperandTag and
+ result = getOperand().getResult()
+ or
+ operandTag instanceof RightOperandTag and
+ result = getInstruction(BoolConversionConstantTag())
+ )
+ }
+
+ override string getInstructionConstantValue(InstructionTag tag) {
+ tag = BoolConversionConstantTag() and
+ result = "0"
+ }
+}
+
+private Opcode binaryBitwiseOpcode(BinaryBitwiseOperation expr) {
+ expr instanceof LShiftExpr and result instanceof Opcode::ShiftLeft
+ or
+ expr instanceof RShiftExpr and result instanceof Opcode::ShiftRight
+ or
+ expr instanceof BitwiseAndExpr and result instanceof Opcode::BitAnd
+ or
+ expr instanceof BitwiseOrExpr and result instanceof Opcode::BitOr
+ or
+ expr instanceof BitwiseXorExpr and result instanceof Opcode::BitXor
+}
+
+private Opcode binaryArithmeticOpcode(BinaryArithmeticOperation expr) {
+ (
+ expr instanceof AddExpr
+ or
+ expr instanceof ImaginaryRealAddExpr
+ or
+ expr instanceof RealImaginaryAddExpr
+ ) and
+ result instanceof Opcode::Add
+ or
+ (
+ expr instanceof SubExpr
+ or
+ expr instanceof ImaginaryRealSubExpr
+ or
+ expr instanceof RealImaginarySubExpr
+ ) and
+ result instanceof Opcode::Sub
+ or
+ (
+ expr instanceof MulExpr
+ or
+ expr instanceof ImaginaryMulExpr
+ ) and
+ result instanceof Opcode::Mul
+ or
+ (
+ expr instanceof DivExpr or
+ expr instanceof ImaginaryDivExpr
+ ) and
+ result instanceof Opcode::Div
+ or
+ expr instanceof RemExpr and result instanceof Opcode::Rem
+ or
+ expr instanceof PointerAddExpr and result instanceof Opcode::PointerAdd
+ or
+ expr instanceof PointerSubExpr and result instanceof Opcode::PointerSub
+ or
+ expr instanceof PointerDiffExpr and result instanceof Opcode::PointerDiff
+}
+
+private Opcode comparisonOpcode(ComparisonOperation expr) {
+ expr instanceof EQExpr and result instanceof Opcode::CompareEQ
+ or
+ expr instanceof NEExpr and result instanceof Opcode::CompareNE
+ or
+ expr instanceof LTExpr and result instanceof Opcode::CompareLT
+ or
+ expr instanceof GTExpr and result instanceof Opcode::CompareGT
+ or
+ expr instanceof LEExpr and result instanceof Opcode::CompareLE
+ or
+ expr instanceof GEExpr and result instanceof Opcode::CompareGE
+}
+
+/**
+ * IR translation of a simple binary operation.
+ */
+class TranslatedBinaryOperation extends TranslatedSingleInstructionExpr {
+ TranslatedBinaryOperation() {
+ expr instanceof BinaryArithmeticOperation or
+ expr instanceof BinaryBitwiseOperation or
+ expr instanceof ComparisonOperation
+ }
+
+ override Instruction getFirstInstruction() { result = getLeftOperand().getFirstInstruction() }
+
+ final override TranslatedElement getChild(int id) {
+ id = 0 and result = getLeftOperand()
+ or
+ id = 1 and result = getRightOperand()
+ }
+
+ final override Instruction getInstructionRegisterOperand(InstructionTag tag, OperandTag operandTag) {
+ tag = OnlyInstructionTag() and
+ if swapOperandsOnOp()
+ then (
+ operandTag instanceof RightOperandTag and
+ result = getLeftOperand().getResult()
+ or
+ operandTag instanceof LeftOperandTag and
+ result = getRightOperand().getResult()
+ ) else (
+ operandTag instanceof LeftOperandTag and
+ result = getLeftOperand().getResult()
+ or
+ operandTag instanceof RightOperandTag and
+ result = getRightOperand().getResult()
+ )
+ }
+
+ override Instruction getInstructionSuccessor(InstructionTag tag, EdgeKind kind) {
+ tag = OnlyInstructionTag() and
+ result = getParent().getChildSuccessor(this) and
+ kind instanceof GotoEdge
+ }
+
+ override Instruction getChildSuccessor(TranslatedElement child) {
+ child = getLeftOperand() and
+ result = getRightOperand().getFirstInstruction()
+ or
+ child = getRightOperand() and
+ result = getInstruction(OnlyInstructionTag())
+ }
+
+ override Opcode getOpcode() {
+ result = binaryArithmeticOpcode(expr.(BinaryArithmeticOperation)) or
+ result = binaryBitwiseOpcode(expr.(BinaryBitwiseOperation)) or
+ result = comparisonOpcode(expr.(ComparisonOperation))
+ }
+
+ override int getInstructionElementSize(InstructionTag tag) {
+ tag = OnlyInstructionTag() and
+ exists(Opcode opcode |
+ opcode = getOpcode() and
+ (
+ opcode instanceof Opcode::PointerAdd or
+ opcode instanceof Opcode::PointerSub or
+ opcode instanceof Opcode::PointerDiff
+ ) and
+ result = getElementSize(getPointerOperand().getExpr().getType())
+ )
+ }
+
+ private TranslatedExpr getPointerOperand() {
+ if swapOperandsOnOp() then result = getRightOperand() else result = getLeftOperand()
+ }
+
+ private predicate swapOperandsOnOp() {
+ // Swap the operands on a pointer add 'i + p', so that the pointer operand
+ // always comes first. Note that we still evaluate the operands
+ // left-to-right.
+ exists(PointerAddExpr ptrAdd, Type rightType |
+ ptrAdd = expr and
+ rightType = ptrAdd.getRightOperand().getUnspecifiedType() and
+ rightType instanceof PointerType
+ )
+ }
+
+ private TranslatedExpr getLeftOperand() {
+ result = getTranslatedExpr(expr.(BinaryOperation).getLeftOperand().getFullyConverted())
+ }
+
+ private TranslatedExpr getRightOperand() {
+ result = getTranslatedExpr(expr.(BinaryOperation).getRightOperand().getFullyConverted())
+ }
+}
+
+class TranslatedAssignExpr extends TranslatedNonConstantExpr {
+ override AssignExpr expr;
+
+ final override TranslatedElement getChild(int id) {
+ id = 0 and result = getLeftOperand()
+ or
+ id = 1 and result = getRightOperand()
+ }
+
+ final override Instruction getFirstInstruction() {
+ // Evaluation is right-to-left
+ result = getRightOperand().getFirstInstruction()
+ }
+
+ final override Instruction getResult() {
+ if expr.isPRValueCategory()
+ then
+ // If this is C, then the result of an assignment is a prvalue for the new
+ // value assigned to the left operand. If this is C++, then the result is
+ // an lvalue, but that lvalue is being loaded as part of this expression.
+ // EDG doesn't mark this as a load.
+ result = getRightOperand().getResult()
+ else
+ // This is C++, where the result is an lvalue for the left operand,
+ // and that lvalue is not being loaded as part of this expression.
+ result = getLeftOperand().getResult()
+ }
+
+ abstract Instruction getStoredValue();
+
+ final TranslatedExpr getLeftOperand() {
+ result = getTranslatedExpr(expr.getLValue().getFullyConverted())
+ }
+
+ final TranslatedExpr getRightOperand() {
+ result = getTranslatedExpr(expr.getRValue().getFullyConverted())
+ }
+
+ override Instruction getInstructionSuccessor(InstructionTag tag, EdgeKind kind) {
+ tag = AssignmentStoreTag() and
+ result = getParent().getChildSuccessor(this) and
+ kind instanceof GotoEdge
+ }
+
+ override Instruction getChildSuccessor(TranslatedElement child) {
+ // Operands are evaluated right-to-left.
+ child = getRightOperand() and
+ result = getLeftOperand().getFirstInstruction()
+ or
+ child = getLeftOperand() and
+ result = getInstruction(AssignmentStoreTag())
+ }
+
+ override predicate hasInstruction(Opcode opcode, InstructionTag tag, CppType resultType) {
+ tag = AssignmentStoreTag() and
+ opcode instanceof Opcode::Store and
+ resultType = getTypeForPRValue(expr.getType()) // Always a prvalue
+ }
+
+ override Instruction getInstructionRegisterOperand(InstructionTag tag, OperandTag operandTag) {
+ tag = AssignmentStoreTag() and
+ (
+ operandTag instanceof AddressOperandTag and
+ result = getLeftOperand().getResult()
+ or
+ operandTag instanceof StoreValueOperandTag and
+ result = getRightOperand().getResult()
+ )
+ }
+}
+
+class TranslatedAssignOperation extends TranslatedNonConstantExpr {
+ override AssignOperation expr;
+
+ final override TranslatedElement getChild(int id) {
+ id = 0 and result = getLoadedLeftOperand()
+ or
+ id = 1 and result = getRightOperand()
+ }
+
+ final override Instruction getFirstInstruction() {
+ // Evaluation is right-to-left
+ result = getRightOperand().getFirstInstruction()
+ }
+
+ final override Instruction getResult() {
+ if expr.isPRValueCategory()
+ then
+ // If this is C, then the result of an assignment is a prvalue for the new
+ // value assigned to the left operand. If this is C++, then the result is
+ // an lvalue, but that lvalue is being loaded as part of this expression.
+ // EDG doesn't mark this as a load.
+ result = getStoredValue()
+ else
+ // This is C++, where the result is an lvalue for the left operand,
+ // and that lvalue is not being loaded as part of this expression.
+ result = getUnloadedLeftOperand().getResult()
+ }
+
+ final TranslatedExpr getUnloadedLeftOperand() { result = getLoadedLeftOperand().getOperand() }
+
+ /**
+ * Gets the `TranslatedLoad` on the `e` in this `e += ...` which is the
+ * element that holds the value to be cremented. It's guaranteed that there's
+ * a load on `e` because of the `needsLoadForParentExpr` predicate.
+ */
+ final TranslatedLoad getLoadedLeftOperand() {
+ result = getTranslatedExpr(expr.getLValue().getFullyConverted())
+ }
+
+ /**
+ * Gets the address to which the result of this operation will be stored.
+ */
+ final TranslatedExpr getRightOperand() {
+ result = getTranslatedExpr(expr.getRValue().getFullyConverted())
+ }
+
+ override Instruction getInstructionSuccessor(InstructionTag tag, EdgeKind kind) {
+ kind instanceof GotoEdge and
+ (
+ tag = AssignOperationConvertLeftTag() and
+ result = getInstruction(AssignOperationOpTag())
+ or
+ (
+ tag = AssignOperationOpTag() and
+ if leftOperandNeedsConversion()
+ then result = getInstruction(AssignOperationConvertResultTag())
+ else result = getInstruction(AssignmentStoreTag())
+ )
+ or
+ tag = AssignOperationConvertResultTag() and
+ result = getInstruction(AssignmentStoreTag())
+ or
+ tag = AssignmentStoreTag() and
+ result = getParent().getChildSuccessor(this)
+ )
+ }
+
+ override Instruction getChildSuccessor(TranslatedElement child) {
+ // Operands are evaluated right-to-left.
+ child = getRightOperand() and
+ result = getLoadedLeftOperand().getFirstInstruction()
+ or
+ child = getLoadedLeftOperand() and
+ if leftOperandNeedsConversion()
+ then result = getInstruction(AssignOperationConvertLeftTag())
+ else result = getInstruction(AssignOperationOpTag())
+ }
+
+ private Instruction getStoredValue() {
+ if leftOperandNeedsConversion()
+ then result = getInstruction(AssignOperationConvertResultTag())
+ else result = getInstruction(AssignOperationOpTag())
+ }
+
+ private Type getConvertedLeftOperandType() {
+ if
+ expr instanceof AssignLShiftExpr or
+ expr instanceof AssignRShiftExpr or
+ expr instanceof AssignPointerAddExpr or
+ expr instanceof AssignPointerSubExpr
+ then
+ // No need to convert for a shift. Technically, the left side should
+ // undergo integral promotion, and then the result would be converted back
+ // to the destination type. There's not much point to this, though,
+ // because the result will be the same for any well-defined program
+ // anyway. If we really want to model this case perfectly, we'll need the
+ // extractor to tell us what the promoted type of the left operand would
+ // be.
+ result = getLoadedLeftOperand().getExpr().getType()
+ else
+ // The right operand has already been converted to the type of the op.
+ result = getRightOperand().getExpr().getType()
+ }
+
+ private predicate leftOperandNeedsConversion() {
+ getConvertedLeftOperandType().getUnspecifiedType() !=
+ getLoadedLeftOperand().getExpr().getUnspecifiedType()
+ }
+
+ private Opcode getOpcode() {
+ expr instanceof AssignAddExpr and result instanceof Opcode::Add
+ or
+ expr instanceof AssignSubExpr and result instanceof Opcode::Sub
+ or
+ expr instanceof AssignMulExpr and result instanceof Opcode::Mul
+ or
+ expr instanceof AssignDivExpr and result instanceof Opcode::Div
+ or
+ expr instanceof AssignRemExpr and result instanceof Opcode::Rem
+ or
+ expr instanceof AssignAndExpr and result instanceof Opcode::BitAnd
+ or
+ expr instanceof AssignOrExpr and result instanceof Opcode::BitOr
+ or
+ expr instanceof AssignXorExpr and result instanceof Opcode::BitXor
+ or
+ expr instanceof AssignLShiftExpr and result instanceof Opcode::ShiftLeft
+ or
+ expr instanceof AssignRShiftExpr and result instanceof Opcode::ShiftRight
+ or
+ expr instanceof AssignPointerAddExpr and result instanceof Opcode::PointerAdd
+ or
+ expr instanceof AssignPointerSubExpr and result instanceof Opcode::PointerSub
+ }
+
+ override predicate hasInstruction(Opcode opcode, InstructionTag tag, CppType resultType) {
+ tag = AssignOperationOpTag() and
+ opcode = getOpcode() and
+ resultType = getTypeForPRValue(getConvertedLeftOperandType())
+ or
+ tag = AssignmentStoreTag() and
+ opcode instanceof Opcode::Store and
+ resultType = getTypeForPRValue(expr.getType()) // Always a prvalue
+ or
+ leftOperandNeedsConversion() and
+ opcode instanceof Opcode::Convert and
+ (
+ tag = AssignOperationConvertLeftTag() and
+ resultType = getTypeForPRValue(getConvertedLeftOperandType())
+ or
+ tag = AssignOperationConvertResultTag() and
+ resultType = getTypeForPRValue(getLoadedLeftOperand().getExpr().getType())
+ )
+ }
+
+ override int getInstructionElementSize(InstructionTag tag) {
+ tag = AssignOperationOpTag() and
+ exists(Opcode opcode |
+ opcode = getOpcode() and
+ (opcode instanceof Opcode::PointerAdd or opcode instanceof Opcode::PointerSub)
+ ) and
+ result = getElementSize(expr.getType())
+ }
+
+ override Instruction getInstructionRegisterOperand(InstructionTag tag, OperandTag operandTag) {
+ leftOperandNeedsConversion() and
+ tag = AssignOperationConvertLeftTag() and
+ operandTag instanceof UnaryOperandTag and
+ result = getLoadedLeftOperand().getResult()
+ or
+ tag = AssignOperationOpTag() and
+ (
+ (
+ operandTag instanceof LeftOperandTag and
+ if leftOperandNeedsConversion()
+ then result = getInstruction(AssignOperationConvertLeftTag())
+ else result = getLoadedLeftOperand().getResult()
+ )
+ or
+ operandTag instanceof RightOperandTag and
+ result = getRightOperand().getResult()
+ )
+ or
+ leftOperandNeedsConversion() and
+ tag = AssignOperationConvertResultTag() and
+ operandTag instanceof UnaryOperandTag and
+ result = getInstruction(AssignOperationOpTag())
+ or
+ tag = AssignmentStoreTag() and
+ (
+ operandTag instanceof AddressOperandTag and
+ result = getUnloadedLeftOperand().getResult()
+ or
+ operandTag instanceof StoreValueOperandTag and
+ result = getStoredValue()
+ )
+ }
+}
+
+/**
+ * The IR translation of the allocation size argument passed to `operator new`
+ * in a `new` expression.
+ *
+ * We have to synthesize this because not all `NewExpr` nodes have an allocator
+ * call, and even the ones that do pass an `ErrorExpr` as the argument.
+ */
+abstract class TranslatedAllocationSize extends TranslatedExpr, TTranslatedAllocationSize {
+ override NewOrNewArrayExpr expr;
+
+ TranslatedAllocationSize() { this = TTranslatedAllocationSize(expr) }
+
+ final override string toString() { result = "Allocation size for " + expr.toString() }
+
+ final override predicate producesExprResult() { none() }
+
+ final override Instruction getResult() { result = getInstruction(AllocationSizeTag()) }
+}
+
+TranslatedAllocationSize getTranslatedAllocationSize(NewOrNewArrayExpr newExpr) {
+ result.getAST() = newExpr
+}
+
+/**
+ * The IR translation of a constant allocation size.
+ *
+ * The allocation size for a `new` expression is always a constant. The
+ * allocation size for a `new[]` expression is a constant if the array extent
+ * is a compile-time constant.
+ */
+class TranslatedConstantAllocationSize extends TranslatedAllocationSize {
+ TranslatedConstantAllocationSize() { not exists(expr.(NewArrayExpr).getExtent()) }
+
+ final override Instruction getFirstInstruction() { result = getInstruction(AllocationSizeTag()) }
+
+ final override predicate hasInstruction(Opcode opcode, InstructionTag tag, CppType resultType) {
+ tag = AllocationSizeTag() and
+ opcode instanceof Opcode::Constant and
+ resultType = getTypeForPRValue(expr.getAllocator().getParameter(0).getType())
+ }
+
+ final override Instruction getInstructionSuccessor(InstructionTag tag, EdgeKind kind) {
+ tag = AllocationSizeTag() and
+ kind instanceof GotoEdge and
+ result = getParent().getChildSuccessor(this)
+ }
+
+ final override TranslatedElement getChild(int id) { none() }
+
+ final override Instruction getChildSuccessor(TranslatedElement child) { none() }
+
+ final override string getInstructionConstantValue(InstructionTag tag) {
+ tag = AllocationSizeTag() and
+ result = expr.getAllocatedType().getSize().toString()
+ }
+}
+
+/**
+ * The IR translation of a non-constant allocation size.
+ *
+ * This class is used for the allocation size of a `new[]` expression where the
+ * array extent is not known at compile time. It performs the multiplication of
+ * the extent by the element size.
+ */
+class TranslatedNonConstantAllocationSize extends TranslatedAllocationSize {
+ override NewArrayExpr expr;
+
+ TranslatedNonConstantAllocationSize() { exists(expr.getExtent()) }
+
+ final override Instruction getFirstInstruction() { result = getExtent().getFirstInstruction() }
+
+ final override predicate hasInstruction(Opcode opcode, InstructionTag tag, CppType resultType) {
+ resultType = getTypeForPRValue(expr.getAllocator().getParameter(0).getType()) and
+ (
+ // Convert the extent to `size_t`, because the AST doesn't do this already.
+ tag = AllocationExtentConvertTag() and opcode instanceof Opcode::Convert
+ or
+ tag = AllocationElementSizeTag() and opcode instanceof Opcode::Constant
+ or
+ tag = AllocationSizeTag() and opcode instanceof Opcode::Mul // REVIEW: Overflow?
+ )
+ }
+
+ final override Instruction getInstructionSuccessor(InstructionTag tag, EdgeKind kind) {
+ kind instanceof GotoEdge and
+ (
+ tag = AllocationExtentConvertTag() and
+ result = getInstruction(AllocationElementSizeTag())
+ or
+ tag = AllocationElementSizeTag() and
+ result = getInstruction(AllocationSizeTag())
+ or
+ tag = AllocationSizeTag() and
+ result = getParent().getChildSuccessor(this)
+ )
+ }
+
+ final override TranslatedElement getChild(int id) { id = 0 and result = getExtent() }
+
+ final override Instruction getChildSuccessor(TranslatedElement child) {
+ child = getExtent() and
+ result = getInstruction(AllocationExtentConvertTag())
+ }
+
+ final override string getInstructionConstantValue(InstructionTag tag) {
+ tag = AllocationElementSizeTag() and
+ result = expr.getAllocatedElementType().getSize().toString()
+ }
+
+ final override Instruction getInstructionRegisterOperand(InstructionTag tag, OperandTag operandTag) {
+ tag = AllocationSizeTag() and
+ (
+ operandTag instanceof LeftOperandTag and result = getInstruction(AllocationExtentConvertTag())
+ or
+ operandTag instanceof RightOperandTag and result = getInstruction(AllocationElementSizeTag())
+ )
+ or
+ tag = AllocationExtentConvertTag() and
+ operandTag instanceof UnaryOperandTag and
+ result = getExtent().getResult()
+ }
+
+ private TranslatedExpr getExtent() {
+ result = getTranslatedExpr(expr.getExtent().getFullyConverted())
+ }
+}
+
+/**
+ * The IR translation of a call to `operator new` as part of a `new` or `new[]`
+ * expression.
+ */
+class TranslatedAllocatorCall extends TTranslatedAllocatorCall, TranslatedDirectCall {
+ override NewOrNewArrayExpr expr;
+
+ TranslatedAllocatorCall() { this = TTranslatedAllocatorCall(expr) }
+
+ final override string toString() { result = "Allocator call for " + expr.toString() }
+
+ final override predicate producesExprResult() { none() }
+
+ override Function getInstructionFunction(InstructionTag tag) {
+ tag = CallTargetTag() and result = expr.getAllocator()
+ }
+
+ final override Type getCallResultType() { result = expr.getAllocator().getType() }
+
+ final override TranslatedExpr getQualifier() { none() }
+
+ final override predicate hasArguments() {
+ // All allocator calls have at least one argument.
+ any()
+ }
+
+ final override int getNumberOfArguments() {
+ result = expr.getAllocatorCall().getNumberOfArguments()
+ or
+ // Make sure there's a result even when there is no allocator, as otherwise
+ // TranslatedCall::getChild() will not return the side effects for this call.
+ not exists(expr.getAllocatorCall()) and
+ result = 0
+ }
+
+ final override TranslatedExpr getArgument(int index) {
+ // If the allocator is the default operator new(void*), there will be no
+ // allocator call in the AST. Otherwise, there will be an allocator call
+ // that includes all arguments to the allocator, including the size,
+ // alignment (if any), and placement args. However, the size argument is
+ // an error node, so we need to provide the correct size argument in any
+ // case.
+ if index = 0
+ then result = getTranslatedAllocationSize(expr)
+ else
+ if index = 1 and expr.hasAlignedAllocation()
+ then result = getTranslatedExpr(expr.getAlignmentArgument())
+ else
+ result = getTranslatedExpr(expr.getAllocatorCall().getArgument(index).getFullyConverted())
+ }
+}
+
+TranslatedAllocatorCall getTranslatedAllocatorCall(NewOrNewArrayExpr newExpr) {
+ result.getAST() = newExpr
+}
+
+/**
+ * Abstract class implemented by any `TranslatedElement` that has a child
+ * expression that is a call to a constructor or destructor, in order to
+ * provide a pointer to the object being constructed or destroyed.
+ */
+abstract class StructorCallContext extends TranslatedElement {
+ /**
+ * Gets the instruction whose result value is the address of the object to be
+ * constructed or destroyed.
+ */
+ abstract Instruction getReceiver();
+}
+
+/**
+ * Represents the IR translation of the destruction of a field from within
+ * the destructor of the field's declaring class.
+ */
+class TranslatedDestructorFieldDestruction extends TranslatedNonConstantExpr, StructorCallContext {
+ override DestructorFieldDestruction expr;
+
+ final override TranslatedElement getChild(int id) { id = 0 and result = getDestructorCall() }
+
+ final override predicate hasInstruction(Opcode opcode, InstructionTag tag, CppType resultType) {
+ tag = OnlyInstructionTag() and
+ opcode instanceof Opcode::FieldAddress and
+ resultType = getTypeForGLValue(expr.getTarget().getType())
+ }
+
+ final override Instruction getInstructionSuccessor(InstructionTag tag, EdgeKind kind) {
+ tag = OnlyInstructionTag() and
+ kind instanceof GotoEdge and
+ result = getDestructorCall().getFirstInstruction()
+ }
+
+ final override Instruction getChildSuccessor(TranslatedElement child) {
+ child = getDestructorCall() and
+ result = getParent().getChildSuccessor(this)
+ }
+
+ final override Instruction getResult() { none() }
+
+ final override Instruction getFirstInstruction() { result = getInstruction(OnlyInstructionTag()) }
+
+ final override Instruction getInstructionRegisterOperand(InstructionTag tag, OperandTag operandTag) {
+ tag = OnlyInstructionTag() and
+ operandTag instanceof UnaryOperandTag and
+ result = getTranslatedFunction(expr.getEnclosingFunction()).getInitializeThisInstruction()
+ }
+
+ final override Field getInstructionField(InstructionTag tag) {
+ tag = OnlyInstructionTag() and
+ result = expr.getTarget()
+ }
+
+ final override Instruction getReceiver() { result = getInstruction(OnlyInstructionTag()) }
+
+ private TranslatedExpr getDestructorCall() { result = getTranslatedExpr(expr.getExpr()) }
+}
+
+/**
+ * The IR translation of the `?:` operator. This class has the portions of the implementation that
+ * are shared between the standard three-operand form (`a ? b : c`) and the GCC-extension
+ * two-operand form (`a ?: c`).
+ */
+abstract class TranslatedConditionalExpr extends TranslatedNonConstantExpr {
+ override ConditionalExpr expr;
+
+ override predicate hasInstruction(Opcode opcode, InstructionTag tag, CppType resultType) {
+ // Note that the ternary flavor needs no explicit `ConditionalBranch` instruction here, because
+ // the condition is a `TranslatedCondition`, which will simply connect the successor edges of
+ // the condition directly to the appropriate then/else block via
+ // `getChild[True|False]Successor()`.
+ // The binary flavor will override this predicate to add the `ConditionalBranch`.
+ not resultIsVoid() and
+ (
+ (
+ not thenIsVoid() and tag = ConditionValueTrueTempAddressTag()
+ or
+ not elseIsVoid() and tag = ConditionValueFalseTempAddressTag()
+ or
+ tag = ConditionValueResultTempAddressTag()
+ ) and
+ opcode instanceof Opcode::VariableAddress and
+ (
+ if expr.isGLValueCategory()
+ then resultType = getTypeForGLValue(any(UnknownType t)) // glvalue to a glvalue
+ else resultType = getTypeForGLValue(expr.getType()) // glvalue to the result type
+ )
+ or
+ (
+ not thenIsVoid() and tag = ConditionValueTrueStoreTag()
+ or
+ not elseIsVoid() and tag = ConditionValueFalseStoreTag()
+ ) and
+ opcode instanceof Opcode::Store and
+ resultType = getResultType()
+ or
+ tag = ConditionValueResultLoadTag() and
+ opcode instanceof Opcode::Load and
+ resultType = getResultType()
+ )
+ }
+
+ override Instruction getInstructionSuccessor(InstructionTag tag, EdgeKind kind) {
+ not resultIsVoid() and
+ kind instanceof GotoEdge and
+ (
+ not thenIsVoid() and
+ (
+ tag = ConditionValueTrueTempAddressTag() and
+ result = getInstruction(ConditionValueTrueStoreTag())
+ or
+ tag = ConditionValueTrueStoreTag() and
+ result = getInstruction(ConditionValueResultTempAddressTag())
+ )
+ or
+ not elseIsVoid() and
+ (
+ tag = ConditionValueFalseTempAddressTag() and
+ result = getInstruction(ConditionValueFalseStoreTag())
+ or
+ tag = ConditionValueFalseStoreTag() and
+ result = getInstruction(ConditionValueResultTempAddressTag())
+ )
+ or
+ tag = ConditionValueResultTempAddressTag() and
+ result = getInstruction(ConditionValueResultLoadTag())
+ or
+ tag = ConditionValueResultLoadTag() and
+ result = getParent().getChildSuccessor(this)
+ )
+ }
+
+ override Instruction getInstructionRegisterOperand(InstructionTag tag, OperandTag operandTag) {
+ not resultIsVoid() and
+ (
+ not thenIsVoid() and
+ tag = ConditionValueTrueStoreTag() and
+ (
+ operandTag instanceof AddressOperandTag and
+ result = getInstruction(ConditionValueTrueTempAddressTag())
+ or
+ operandTag instanceof StoreValueOperandTag and
+ result = getThen().getResult()
+ )
+ or
+ not elseIsVoid() and
+ tag = ConditionValueFalseStoreTag() and
+ (
+ operandTag instanceof AddressOperandTag and
+ result = getInstruction(ConditionValueFalseTempAddressTag())
+ or
+ operandTag instanceof StoreValueOperandTag and
+ result = getElse().getResult()
+ )
+ or
+ tag = ConditionValueResultLoadTag() and
+ (
+ operandTag instanceof AddressOperandTag and
+ result = getInstruction(ConditionValueResultTempAddressTag())
+ )
+ )
+ }
+
+ final override predicate hasTempVariable(TempVariableTag tag, CppType type) {
+ not resultIsVoid() and
+ tag = ConditionValueTempVar() and
+ type = getResultType()
+ }
+
+ final override IRVariable getInstructionVariable(InstructionTag tag) {
+ not resultIsVoid() and
+ (
+ tag = ConditionValueTrueTempAddressTag() or
+ tag = ConditionValueFalseTempAddressTag() or
+ tag = ConditionValueResultTempAddressTag()
+ ) and
+ result = getTempVariable(ConditionValueTempVar())
+ }
+
+ final override Instruction getResult() {
+ not resultIsVoid() and
+ result = getInstruction(ConditionValueResultLoadTag())
+ }
+
+ override Instruction getChildSuccessor(TranslatedElement child) {
+ child = getElse() and
+ if elseIsVoid()
+ then result = getParent().getChildSuccessor(this)
+ else result = getInstruction(ConditionValueFalseTempAddressTag())
+ }
+
+ /**
+ * Gets the `TranslatedExpr` for the "then" result. Note that nothing in the base implementation
+ * of this class assumes that `getThen()` is disjoint from `getCondition()`.
+ */
+ abstract TranslatedExpr getThen();
+
+ /**
+ * Gets the `TranslatedExpr` for the "else" result.
+ */
+ final TranslatedExpr getElse() { result = getTranslatedExpr(expr.getElse().getFullyConverted()) }
+
+ final predicate thenIsVoid() {
+ getThen().getResultType().getIRType() instanceof IRVoidType
+ or
+ // A `ThrowExpr.getType()` incorrectly returns the type of exception being
+ // thrown, rather than `void`. Handle that case here.
+ expr.getThen() instanceof ThrowExpr
+ }
+
+ private predicate elseIsVoid() {
+ getElse().getResultType().getIRType() instanceof IRVoidType
+ or
+ // A `ThrowExpr.getType()` incorrectly returns the type of exception being
+ // thrown, rather than `void`. Handle that case here.
+ expr.getElse() instanceof ThrowExpr
+ }
+
+ private predicate resultIsVoid() { getResultType().getIRType() instanceof IRVoidType }
+}
+
+/**
+ * The IR translation of the ternary conditional operator (`a ? b : c`).
+ * For this version, we expand the condition as a `TranslatedCondition`, rather than a
+ * `TranslatedExpr`, to simplify the control flow in the presence of short-ciruit logical operators.
+ */
+class TranslatedTernaryConditionalExpr extends TranslatedConditionalExpr, ConditionContext {
+ TranslatedTernaryConditionalExpr() { not expr.isTwoOperand() }
+
+ final override TranslatedElement getChild(int id) {
+ id = 0 and result = getCondition()
+ or
+ id = 1 and result = getThen()
+ or
+ id = 2 and result = getElse()
+ }
+
+ override Instruction getFirstInstruction() { result = getCondition().getFirstInstruction() }
+
+ override Instruction getChildSuccessor(TranslatedElement child) {
+ result = TranslatedConditionalExpr.super.getChildSuccessor(child)
+ or
+ (
+ child = getThen() and
+ if thenIsVoid()
+ then result = getParent().getChildSuccessor(this)
+ else result = getInstruction(ConditionValueTrueTempAddressTag())
+ )
+ }
+
+ override Instruction getChildTrueSuccessor(TranslatedCondition child) {
+ child = getCondition() and
+ result = getThen().getFirstInstruction()
+ }
+
+ override Instruction getChildFalseSuccessor(TranslatedCondition child) {
+ child = getCondition() and
+ result = getElse().getFirstInstruction()
+ }
+
+ private TranslatedCondition getCondition() {
+ result = getTranslatedCondition(expr.getCondition().getFullyConverted())
+ }
+
+ final override TranslatedExpr getThen() {
+ result = getTranslatedExpr(expr.getThen().getFullyConverted())
+ }
+}
+
+/**
+ * The IR translation of a two-operand conditional operator (`a ?: b`). This is a GCC language
+ * extension.
+ * This version of the conditional expression returns its first operand (the condition) if that
+ * condition is non-zero. Since we'll be reusing the value of the condition, we'll compute that
+ * value directly before branching, even if that value was a short-circuit logical expression.
+ */
+class TranslatedBinaryConditionalExpr extends TranslatedConditionalExpr {
+ TranslatedBinaryConditionalExpr() { expr.isTwoOperand() }
+
+ final override TranslatedElement getChild(int id) {
+ // We only truly have two children, because our "condition" and "then" are the same as far as
+ // the extractor is concerned.
+ id = 0 and result = getCondition()
+ or
+ id = 1 and result = getElse()
+ }
+
+ override Instruction getFirstInstruction() { result = getCondition().getFirstInstruction() }
+
+ override predicate hasInstruction(Opcode opcode, InstructionTag tag, CppType resultType) {
+ super.hasInstruction(opcode, tag, resultType)
+ or
+ // For the binary variant, we create our own conditional branch.
+ tag = ValueConditionConditionalBranchTag() and
+ opcode instanceof Opcode::ConditionalBranch and
+ resultType = getVoidType()
+ }
+
+ override Instruction getInstructionSuccessor(InstructionTag tag, EdgeKind kind) {
+ result = super.getInstructionSuccessor(tag, kind)
+ or
+ tag = ValueConditionConditionalBranchTag() and
+ (
+ kind instanceof TrueEdge and
+ result = getInstruction(ConditionValueTrueTempAddressTag())
+ or
+ kind instanceof FalseEdge and
+ result = getElse().getFirstInstruction()
+ )
+ }
+
+ override Instruction getInstructionRegisterOperand(InstructionTag tag, OperandTag operandTag) {
+ result = super.getInstructionRegisterOperand(tag, operandTag)
+ or
+ tag = ValueConditionConditionalBranchTag() and
+ operandTag instanceof ConditionOperandTag and
+ result = getCondition().getResult()
+ }
+
+ override Instruction getChildSuccessor(TranslatedElement child) {
+ result = super.getChildSuccessor(child)
+ or
+ child = getCondition() and result = getInstruction(ValueConditionConditionalBranchTag())
+ }
+
+ private TranslatedExpr getCondition() {
+ result = getTranslatedExpr(expr.getCondition().getFullyConverted())
+ }
+
+ final override TranslatedExpr getThen() {
+ // The extractor returns the exact same expression for `ConditionalExpr::getCondition()` and
+ // `ConditionalExpr::getThen()`, even though the condition may have been converted to `bool`,
+ // and the "then" may have been converted to the result type. We'll strip the top-level implicit
+ // conversions from this, to skip any conversion to `bool`. We don't have enough information to
+ // know how to convert the result to the destination type, especially in the class pointer case,
+ // so we'll still sometimes wind up with one operand as the wrong type. This is better than
+ // always converting the "then" operand to `bool`, which is almost always the wrong type.
+ result = getTranslatedExpr(expr.getThen().getExplicitlyConverted())
+ }
+}
+
+/**
+ * IR translation of the materialization of a temporary object.
+ *
+ * This translation allocates a temporary variable, and initializes it treating `expr.getExpr()` as
+ * its initializer.
+ */
+class TranslatedTemporaryObjectExpr extends TranslatedNonConstantExpr,
+ TranslatedVariableInitialization {
+ override TemporaryObjectExpr expr;
+
+ final override predicate hasTempVariable(TempVariableTag tag, CppType type) {
+ tag = TempObjectTempVar() and
+ type = getTypeForPRValue(expr.getType())
+ }
+
+ override Type getTargetType() { result = expr.getType() }
+
+ final override TranslatedInitialization getInitialization() {
+ result = getTranslatedInitialization(expr.getExpr())
+ }
+
+ final override IRVariable getIRVariable() {
+ result = getIRTempVariable(expr, TempObjectTempVar())
+ }
+
+ final override Instruction getInitializationSuccessor() {
+ result = getParent().getChildSuccessor(this)
+ }
+
+ final override Instruction getResult() { result = getTargetAddress() }
+}
+
+/**
+ * IR translation of a `throw` expression.
+ */
+abstract class TranslatedThrowExpr extends TranslatedNonConstantExpr {
+ override ThrowExpr expr;
+
+ override predicate hasInstruction(Opcode opcode, InstructionTag tag, CppType resultType) {
+ tag = ThrowTag() and
+ opcode = getThrowOpcode() and
+ resultType = getVoidType()
+ }
+
+ override Instruction getInstructionSuccessor(InstructionTag tag, EdgeKind kind) {
+ tag = ThrowTag() and
+ kind instanceof ExceptionEdge and
+ result = getParent().getExceptionSuccessorInstruction()
+ }
+
+ override Instruction getResult() { none() }
+
+ abstract Opcode getThrowOpcode();
+}
+
+/**
+ * IR translation of a `throw` expression with an argument
+ * (e.g. `throw std::bad_alloc()`).
+ */
+class TranslatedThrowValueExpr extends TranslatedThrowExpr, TranslatedVariableInitialization {
+ TranslatedThrowValueExpr() { not expr instanceof ReThrowExpr }
+
+ override predicate hasInstruction(Opcode opcode, InstructionTag tag, CppType resultType) {
+ TranslatedThrowExpr.super.hasInstruction(opcode, tag, resultType)
+ or
+ TranslatedVariableInitialization.super.hasInstruction(opcode, tag, resultType)
+ }
+
+ override Instruction getInstructionSuccessor(InstructionTag tag, EdgeKind kind) {
+ result = TranslatedThrowExpr.super.getInstructionSuccessor(tag, kind)
+ or
+ result = TranslatedVariableInitialization.super.getInstructionSuccessor(tag, kind)
+ }
+
+ final override Instruction getInitializationSuccessor() { result = getInstruction(ThrowTag()) }
+
+ final override predicate hasTempVariable(TempVariableTag tag, CppType type) {
+ tag = ThrowTempVar() and
+ type = getTypeForPRValue(getExceptionType())
+ }
+
+ final override Instruction getInstructionRegisterOperand(InstructionTag tag, OperandTag operandTag) {
+ result = TranslatedVariableInitialization.super.getInstructionRegisterOperand(tag, operandTag)
+ or
+ tag = ThrowTag() and
+ (
+ operandTag instanceof AddressOperandTag and
+ result = getInstruction(InitializerVariableAddressTag())
+ )
+ }
+
+ final override CppType getInstructionMemoryOperandType(
+ InstructionTag tag, TypedOperandTag operandTag
+ ) {
+ tag = ThrowTag() and
+ operandTag instanceof LoadOperandTag and
+ result = getTypeForPRValue(getExceptionType())
+ }
+
+ override Type getTargetType() { result = getExceptionType() }
+
+ final override TranslatedInitialization getInitialization() {
+ result = getTranslatedInitialization(expr.getExpr().getFullyConverted())
+ }
+
+ final override IRVariable getIRVariable() { result = getIRTempVariable(expr, ThrowTempVar()) }
+
+ final override Opcode getThrowOpcode() { result instanceof Opcode::ThrowValue }
+
+ private Type getExceptionType() { result = expr.getType() }
+}
+
+/**
+ * IR translation of a `throw` expression with no argument (e.g. `throw;`).
+ */
+class TranslatedReThrowExpr extends TranslatedThrowExpr {
+ override ReThrowExpr expr;
+
+ override TranslatedElement getChild(int id) { none() }
+
+ override Instruction getFirstInstruction() { result = getInstruction(ThrowTag()) }
+
+ override Instruction getChildSuccessor(TranslatedElement child) { none() }
+
+ final override Opcode getThrowOpcode() { result instanceof Opcode::ReThrow }
+}
+
+/**
+ * The IR translation of a built-in operation (i.e. anything that extends
+ * `BuiltInOperation`).
+ */
+class TranslatedBuiltInOperation extends TranslatedNonConstantExpr {
+ override BuiltInOperation expr;
+
+ TranslatedBuiltInOperation() {
+ // The following expressions are handled specially.
+ not expr instanceof BuiltInOperationBuiltInAddressOf and
+ not expr instanceof BuiltInVarArgsStart and
+ not expr instanceof BuiltInVarArg and
+ not expr instanceof BuiltInVarArgsEnd and
+ not expr instanceof BuiltInVarArgCopy
+ }
+
+ final override Instruction getResult() { result = getInstruction(OnlyInstructionTag()) }
+
+ final override Instruction getFirstInstruction() {
+ if exists(getChild(0))
+ then result = getChild(0).getFirstInstruction()
+ else result = getInstruction(OnlyInstructionTag())
+ }
+
+ final override TranslatedElement getChild(int id) {
+ result = getTranslatedExpr(expr.getChild(id).getFullyConverted())
+ }
+
+ final override Instruction getInstructionSuccessor(InstructionTag tag, EdgeKind kind) {
+ tag = OnlyInstructionTag() and
+ kind instanceof GotoEdge and
+ result = getParent().getChildSuccessor(this)
+ }
+
+ final override Instruction getChildSuccessor(TranslatedElement child) {
+ exists(int id |
+ child = getChild(id) and
+ (
+ result = getChild(id + 1).getFirstInstruction()
+ or
+ not exists(getChild(id + 1)) and result = getInstruction(OnlyInstructionTag())
+ )
+ )
+ }
+
+ final override predicate hasInstruction(Opcode opcode, InstructionTag tag, CppType resultType) {
+ tag = OnlyInstructionTag() and
+ opcode = getOpcode() and
+ resultType = getResultType()
+ }
+
+ final override Instruction getInstructionRegisterOperand(InstructionTag tag, OperandTag operandTag) {
+ tag = OnlyInstructionTag() and
+ exists(int index |
+ operandTag = positionalArgumentOperand(index) and
+ result = getChild(index).(TranslatedExpr).getResult()
+ )
+ }
+
+ Opcode getOpcode() { result instanceof Opcode::BuiltIn }
+
+ final override BuiltInOperation getInstructionBuiltInOperation(InstructionTag tag) {
+ tag = OnlyInstructionTag() and
+ result = expr
+ }
+}
+
+/**
+ * Holds if the expression `expr` is one of the `va_list` operands to a `va_*` macro.
+ */
+private predicate isVAListExpr(Expr expr) {
+ exists(VarArgsExpr parent, Expr originalExpr |
+ (
+ originalExpr = parent.(BuiltInVarArgsStart).getVAList()
+ or
+ originalExpr = parent.(BuiltInVarArgsEnd).getVAList()
+ or
+ originalExpr = parent.(BuiltInVarArg).getVAList()
+ or
+ originalExpr = parent.(BuiltInVarArgCopy).getSourceVAList()
+ or
+ originalExpr = parent.(BuiltInVarArgCopy).getDestinationVAList()
+ ) and
+ expr = originalExpr.getFullyConverted()
+ )
+}
+
+/**
+ * Gets the type of the `va_list` being accessed by `expr`, where `expr` is a `va_list` operand of a
+ * `va_*` macro.
+ *
+ * In the Unix ABI, `va_list` is declared as `typedef struct __va_list_tag va_list[1];`. When used
+ * as the type of a local variable, this gets an implicit array-to-pointer conversion, so that the
+ * actual argument to the `va_*` macro is a prvalue of type `__va_list_tag*`. When used as the type
+ * of a function parameter, the parameter's type decays to `__va_list_tag*`, so that the argument
+ * to the `va_*` macro is still a prvalue of type `__va_list_tag*`, with no implicit conversion
+ * necessary. In either case, we treat `__va_list_tag` as the representative type of the `va_list`.
+ *
+ * In the Windows ABI, `va_list` is declared as a pointer type (usually `char*`). Whether used as
+ * the type of a local variable or of a parameter, this means that the argument to the `va_*` macro
+ * is always an _lvalue_ of type `char*`. We treat `char*` as the representative type of the
+ * `va_list`.
+ */
+private Type getVAListType(Expr expr) {
+ isVAListExpr(expr) and
+ if expr.isPRValueCategory()
+ then
+ // In the Unix ABI, this will be a prvalue of type `__va_list_tag*`. We want the `__va_list_tag`
+ // type.
+ result = expr.getType().getUnderlyingType().(PointerType).getBaseType()
+ else
+ // In the Windows ABI, this will be an lvalue of some pointer type. We want that pointer type.
+ result = expr.getType()
+}
+
+/**
+ * The IR translation of a `BuiltInVarArgsStart` expression.
+ */
+class TranslatedVarArgsStart extends TranslatedNonConstantExpr {
+ override BuiltInVarArgsStart expr;
+
+ final override predicate hasInstruction(Opcode opcode, InstructionTag tag, CppType resultType) {
+ tag = VarArgsStartEllipsisAddressTag() and
+ opcode instanceof Opcode::VariableAddress and
+ resultType = getEllipsisVariableGLValueType()
+ or
+ tag = VarArgsStartTag() and
+ opcode instanceof Opcode::VarArgsStart and
+ resultType = getTypeForPRValue(getVAListType(expr.getVAList().getFullyConverted()))
+ or
+ tag = VarArgsVAListStoreTag() and
+ opcode instanceof Opcode::Store and
+ resultType = getTypeForPRValue(getVAListType(expr.getVAList().getFullyConverted()))
+ }
+
+ final override Instruction getFirstInstruction() {
+ result = getInstruction(VarArgsStartEllipsisAddressTag())
+ }
+
+ final override Instruction getResult() { none() }
+
+ final override TranslatedElement getChild(int id) { id = 0 and result = getVAList() }
+
+ private TranslatedExpr getVAList() {
+ result = getTranslatedExpr(expr.getVAList().getFullyConverted())
+ }
+
+ final override Instruction getInstructionSuccessor(InstructionTag tag, EdgeKind kind) {
+ tag = VarArgsStartEllipsisAddressTag() and
+ kind instanceof GotoEdge and
+ result = getInstruction(VarArgsStartTag())
+ or
+ tag = VarArgsStartTag() and
+ kind instanceof GotoEdge and
+ result = getVAList().getFirstInstruction()
+ or
+ tag = VarArgsVAListStoreTag() and
+ kind instanceof GotoEdge and
+ result = getParent().getChildSuccessor(this)
+ }
+
+ final override Instruction getChildSuccessor(TranslatedElement child) {
+ child = getVAList() and
+ result = getInstruction(VarArgsVAListStoreTag())
+ }
+
+ final override IRVariable getInstructionVariable(InstructionTag tag) {
+ tag = VarArgsStartEllipsisAddressTag() and
+ result = getEnclosingFunction().getEllipsisVariable()
+ }
+
+ final override Instruction getInstructionRegisterOperand(InstructionTag tag, OperandTag operandTag) {
+ tag = VarArgsStartTag() and
+ operandTag instanceof UnaryOperandTag and
+ result = getInstruction(VarArgsStartEllipsisAddressTag())
+ or
+ tag = VarArgsVAListStoreTag() and
+ (
+ operandTag instanceof AddressOperandTag and result = getVAList().getResult()
+ or
+ operandTag instanceof StoreValueOperandTag and result = getInstruction(VarArgsStartTag())
+ )
+ }
+}
+
+/**
+ * The IR translation of a `BuiltInVarArg` expression.
+ */
+class TranslatedVarArg extends TranslatedNonConstantExpr {
+ override BuiltInVarArg expr;
+
+ final override predicate hasInstruction(Opcode opcode, InstructionTag tag, CppType resultType) {
+ tag = VarArgsVAListLoadTag() and
+ opcode instanceof Opcode::Load and
+ resultType = getTypeForPRValue(getVAListType(expr.getVAList().getFullyConverted()))
+ or
+ tag = VarArgsArgAddressTag() and
+ opcode instanceof Opcode::VarArg and
+ resultType = getResultType()
+ or
+ tag = VarArgsMoveNextTag() and
+ opcode instanceof Opcode::NextVarArg and
+ resultType = getTypeForPRValue(getVAListType(expr.getVAList().getFullyConverted()))
+ or
+ tag = VarArgsVAListStoreTag() and
+ opcode instanceof Opcode::Store and
+ resultType = getTypeForPRValue(getVAListType(expr.getVAList().getFullyConverted()))
+ }
+
+ final override Instruction getFirstInstruction() { result = getVAList().getFirstInstruction() }
+
+ final override Instruction getResult() { result = getInstruction(VarArgsArgAddressTag()) }
+
+ final override TranslatedElement getChild(int id) { id = 0 and result = getVAList() }
+
+ private TranslatedExpr getVAList() {
+ result = getTranslatedExpr(expr.getVAList().getFullyConverted())
+ }
+
+ final override Instruction getInstructionSuccessor(InstructionTag tag, EdgeKind kind) {
+ tag = VarArgsVAListLoadTag() and
+ kind instanceof GotoEdge and
+ result = getInstruction(VarArgsArgAddressTag())
+ or
+ tag = VarArgsArgAddressTag() and
+ kind instanceof GotoEdge and
+ result = getInstruction(VarArgsMoveNextTag())
+ or
+ tag = VarArgsMoveNextTag() and
+ kind instanceof GotoEdge and
+ result = getInstruction(VarArgsVAListStoreTag())
+ or
+ tag = VarArgsVAListStoreTag() and
+ kind instanceof GotoEdge and
+ result = getParent().getChildSuccessor(this)
+ }
+
+ final override Instruction getChildSuccessor(TranslatedElement child) {
+ child = getVAList() and
+ result = getInstruction(VarArgsVAListLoadTag())
+ }
+
+ final override Instruction getInstructionRegisterOperand(InstructionTag tag, OperandTag operandTag) {
+ tag = VarArgsVAListLoadTag() and
+ (
+ operandTag instanceof AddressOperandTag and
+ result = getVAList().getResult()
+ )
+ or
+ tag = VarArgsArgAddressTag() and
+ operandTag instanceof UnaryOperandTag and
+ result = getInstruction(VarArgsVAListLoadTag())
+ or
+ tag = VarArgsMoveNextTag() and
+ operandTag instanceof UnaryOperandTag and
+ result = getInstruction(VarArgsVAListLoadTag())
+ or
+ tag = VarArgsVAListStoreTag() and
+ (
+ operandTag instanceof AddressOperandTag and result = getVAList().getResult()
+ or
+ operandTag instanceof StoreValueOperandTag and result = getInstruction(VarArgsMoveNextTag())
+ )
+ }
+}
+
+/**
+ * The IR translation of a `BuiltInVarArgsEnd` expression.
+ */
+class TranslatedVarArgsEnd extends TranslatedNonConstantExpr {
+ override BuiltInVarArgsEnd expr;
+
+ final override predicate hasInstruction(Opcode opcode, InstructionTag tag, CppType resultType) {
+ tag = OnlyInstructionTag() and
+ opcode instanceof Opcode::VarArgsEnd and
+ resultType = getVoidType()
+ }
+
+ final override Instruction getFirstInstruction() { result = getVAList().getFirstInstruction() }
+
+ final override Instruction getResult() { none() }
+
+ final override TranslatedElement getChild(int id) { id = 0 and result = getVAList() }
+
+ private TranslatedExpr getVAList() {
+ result = getTranslatedExpr(expr.getVAList().getFullyConverted())
+ }
+
+ final override Instruction getInstructionSuccessor(InstructionTag tag, EdgeKind kind) {
+ tag = OnlyInstructionTag() and
+ kind instanceof GotoEdge and
+ result = getParent().getChildSuccessor(this)
+ }
+
+ final override Instruction getChildSuccessor(TranslatedElement child) {
+ child = getVAList() and
+ result = getInstruction(OnlyInstructionTag())
+ }
+
+ final override Instruction getInstructionRegisterOperand(InstructionTag tag, OperandTag operandTag) {
+ tag = OnlyInstructionTag() and
+ operandTag instanceof UnaryOperandTag and
+ result = getVAList().getResult()
+ }
+}
+
+/**
+ * The IR translation of a `BuiltInVarArgCopy` expression.
+ */
+class TranslatedVarArgCopy extends TranslatedNonConstantExpr {
+ override BuiltInVarArgCopy expr;
+
+ final override predicate hasInstruction(Opcode opcode, InstructionTag tag, CppType resultType) {
+ tag = VarArgsVAListLoadTag() and
+ opcode instanceof Opcode::Load and
+ resultType = getTypeForPRValue(getVAListType(expr.getSourceVAList().getFullyConverted()))
+ or
+ tag = VarArgsVAListStoreTag() and
+ opcode instanceof Opcode::Store and
+ resultType = getTypeForPRValue(getVAListType(expr.getDestinationVAList().getFullyConverted()))
+ }
+
+ final override Instruction getFirstInstruction() {
+ result = getSourceVAList().getFirstInstruction()
+ }
+
+ final override Instruction getResult() { result = getInstruction(VarArgsVAListStoreTag()) }
+
+ final override TranslatedElement getChild(int id) {
+ id = 0 and result = getDestinationVAList()
+ or
+ id = 1 and result = getSourceVAList()
+ }
+
+ private TranslatedExpr getDestinationVAList() {
+ result = getTranslatedExpr(expr.getDestinationVAList().getFullyConverted())
+ }
+
+ private TranslatedExpr getSourceVAList() {
+ result = getTranslatedExpr(expr.getSourceVAList().getFullyConverted())
+ }
+
+ final override Instruction getInstructionSuccessor(InstructionTag tag, EdgeKind kind) {
+ tag = VarArgsVAListLoadTag() and
+ kind instanceof GotoEdge and
+ result = getDestinationVAList().getFirstInstruction()
+ or
+ tag = VarArgsVAListStoreTag() and
+ kind instanceof GotoEdge and
+ result = getParent().getChildSuccessor(this)
+ }
+
+ final override Instruction getChildSuccessor(TranslatedElement child) {
+ child = getSourceVAList() and
+ result = getInstruction(VarArgsVAListLoadTag())
+ or
+ child = getDestinationVAList() and
+ result = getInstruction(VarArgsVAListStoreTag())
+ }
+
+ final override Instruction getInstructionRegisterOperand(InstructionTag tag, OperandTag operandTag) {
+ tag = VarArgsVAListLoadTag() and
+ (
+ operandTag instanceof AddressOperandTag and
+ result = getSourceVAList().getResult()
+ )
+ or
+ tag = VarArgsVAListStoreTag() and
+ (
+ operandTag instanceof AddressOperandTag and result = getDestinationVAList().getResult()
+ or
+ operandTag instanceof StoreValueOperandTag and result = getInstruction(VarArgsVAListLoadTag())
+ )
+ }
+}
+
+/**
+ * The IR translation of a `new` or `new[]` expression.
+ */
+abstract class TranslatedNewOrNewArrayExpr extends TranslatedNonConstantExpr, InitializationContext {
+ override NewOrNewArrayExpr expr;
+
+ final override TranslatedElement getChild(int id) {
+ id = 0 and result = getAllocatorCall()
+ or
+ id = 1 and result = getInitialization()
+ }
+
+ final override predicate hasInstruction(Opcode opcode, InstructionTag tag, CppType resultType) {
+ tag = OnlyInstructionTag() and
+ opcode instanceof Opcode::Convert and
+ resultType = getResultType()
+ }
+
+ final override Instruction getFirstInstruction() {
+ result = getAllocatorCall().getFirstInstruction()
+ }
+
+ final override Instruction getResult() { result = getInstruction(OnlyInstructionTag()) }
+
+ final override Instruction getInstructionSuccessor(InstructionTag tag, EdgeKind kind) {
+ kind instanceof GotoEdge and
+ tag = OnlyInstructionTag() and
+ if exists(getInitialization())
+ then result = getInitialization().getFirstInstruction()
+ else result = getParent().getChildSuccessor(this)
+ }
+
+ final override Instruction getChildSuccessor(TranslatedElement child) {
+ child = getAllocatorCall() and result = getInstruction(OnlyInstructionTag())
+ or
+ child = getInitialization() and result = getParent().getChildSuccessor(this)
+ }
+
+ final override Instruction getInstructionRegisterOperand(InstructionTag tag, OperandTag operandTag) {
+ tag = OnlyInstructionTag() and
+ operandTag instanceof UnaryOperandTag and
+ result = getAllocatorCall().getResult()
+ }
+
+ final override Instruction getTargetAddress() { result = getInstruction(OnlyInstructionTag()) }
+
+ private TranslatedAllocatorCall getAllocatorCall() { result = getTranslatedAllocatorCall(expr) }
+
+ abstract TranslatedInitialization getInitialization();
+}
+
+/**
+ * The IR translation of a `new` expression.
+ */
+class TranslatedNewExpr extends TranslatedNewOrNewArrayExpr {
+ override NewExpr expr;
+
+ final override Type getTargetType() { result = expr.getAllocatedType().getUnspecifiedType() }
+
+ final override TranslatedInitialization getInitialization() {
+ result = getTranslatedInitialization(expr.getInitializer())
+ }
+}
+
+/**
+ * The IR translation of a `new[]` expression.
+ */
+class TranslatedNewArrayExpr extends TranslatedNewOrNewArrayExpr {
+ override NewArrayExpr expr;
+
+ final override Type getTargetType() { result = expr.getAllocatedType().getUnspecifiedType() }
+
+ final override TranslatedInitialization getInitialization() {
+ // REVIEW: Figure out how we want to model array initialization in the IR.
+ none()
+ }
+}
+
+/**
+ * A placeholder for the translation of a `delete[]` expression.
+ *
+ * Proper translation is not yet implemented, but this stub implementation
+ * ensures that code following a `delete[]` is not unreachable.
+ */
+class TranslatedDeleteArrayExprPlaceHolder extends TranslatedSingleInstructionExpr {
+ override DeleteArrayExpr expr;
+
+ final override Instruction getFirstInstruction() { result = getOperand().getFirstInstruction() }
+
+ final override TranslatedElement getChild(int id) { id = 0 and result = getOperand() }
+
+ final override Instruction getInstructionSuccessor(InstructionTag tag, EdgeKind kind) {
+ tag = OnlyInstructionTag() and
+ result = getParent().getChildSuccessor(this) and
+ kind instanceof GotoEdge
+ }
+
+ final override Instruction getChildSuccessor(TranslatedElement child) {
+ child = getOperand() and result = getInstruction(OnlyInstructionTag())
+ }
+
+ final override Instruction getInstructionRegisterOperand(InstructionTag tag, OperandTag operandTag) {
+ none()
+ }
+
+ final override Opcode getOpcode() { result instanceof Opcode::NoOp }
+
+ private TranslatedExpr getOperand() {
+ result = getTranslatedExpr(expr.getExpr().getFullyConverted())
+ }
+}
+
+/**
+ * A placeholder for the translation of a `delete` expression.
+ *
+ * Proper translation is not yet implemented, but this stub implementation
+ * ensures that code following a `delete` is not unreachable.
+ */
+class TranslatedDeleteExprPlaceHolder extends TranslatedSingleInstructionExpr {
+ override DeleteExpr expr;
+
+ final override Instruction getFirstInstruction() { result = getOperand().getFirstInstruction() }
+
+ final override TranslatedElement getChild(int id) { id = 0 and result = getOperand() }
+
+ final override Instruction getInstructionSuccessor(InstructionTag tag, EdgeKind kind) {
+ tag = OnlyInstructionTag() and
+ result = getParent().getChildSuccessor(this) and
+ kind instanceof GotoEdge
+ }
+
+ final override Instruction getChildSuccessor(TranslatedElement child) {
+ child = getOperand() and result = getInstruction(OnlyInstructionTag())
+ }
+
+ final override Instruction getInstructionRegisterOperand(InstructionTag tag, OperandTag operandTag) {
+ none()
+ }
+
+ final override Opcode getOpcode() { result instanceof Opcode::NoOp }
+
+ private TranslatedExpr getOperand() {
+ result = getTranslatedExpr(expr.getExpr().getFullyConverted())
+ }
+}
+
+/**
+ * The IR translation of a `ConditionDeclExpr`, which represents the value of the declared variable
+ * after conversion to `bool` in code such as:
+ * ```
+ * if (int* p = &x) {
+ * }
+ * ```
+ */
+class TranslatedConditionDeclExpr extends TranslatedNonConstantExpr {
+ override ConditionDeclExpr expr;
+
+ final override Instruction getFirstInstruction() { result = getDecl().getFirstInstruction() }
+
+ final override TranslatedElement getChild(int id) {
+ id = 0 and result = getDecl()
+ or
+ id = 1 and result = getConditionExpr()
+ }
+
+ override Instruction getResult() { result = getConditionExpr().getResult() }
+
+ override Instruction getInstructionSuccessor(InstructionTag tag, EdgeKind kind) { none() }
+
+ override Instruction getChildSuccessor(TranslatedElement child) {
+ child = getDecl() and
+ result = getConditionExpr().getFirstInstruction()
+ or
+ child = getConditionExpr() and result = getParent().getChildSuccessor(this)
+ }
+
+ override predicate hasInstruction(Opcode opcode, InstructionTag tag, CppType resultType) {
+ none()
+ }
+
+ private TranslatedConditionDecl getDecl() { result = getTranslatedConditionDecl(expr) }
+
+ private TranslatedExpr getConditionExpr() {
+ result = getTranslatedExpr(expr.getVariableAccess().getFullyConverted())
+ }
+}
+
+/**
+ * The IR translation of a lambda expression. This initializes a temporary variable whose type is that of the lambda,
+ * using the initializer list that represents the captures of the lambda.
+ */
+class TranslatedLambdaExpr extends TranslatedNonConstantExpr, InitializationContext {
+ override LambdaExpression expr;
+
+ final override Instruction getFirstInstruction() {
+ result = getInstruction(InitializerVariableAddressTag())
+ }
+
+ final override TranslatedElement getChild(int id) { id = 0 and result = getInitialization() }
+
+ override Instruction getResult() { result = getInstruction(LoadTag()) }
+
+ override Instruction getInstructionSuccessor(InstructionTag tag, EdgeKind kind) {
+ tag = InitializerVariableAddressTag() and
+ kind instanceof GotoEdge and
+ result = getInstruction(InitializerStoreTag())
+ or
+ tag = InitializerStoreTag() and
+ kind instanceof GotoEdge and
+ (
+ result = getInitialization().getFirstInstruction()
+ or
+ not hasInitializer() and result = getInstruction(LoadTag())
+ )
+ or
+ tag = LoadTag() and
+ kind instanceof GotoEdge and
+ result = getParent().getChildSuccessor(this)
+ }
+
+ override Instruction getChildSuccessor(TranslatedElement child) {
+ child = getInitialization() and
+ result = getInstruction(LoadTag())
+ }
+
+ override predicate hasInstruction(Opcode opcode, InstructionTag tag, CppType resultType) {
+ tag = InitializerVariableAddressTag() and
+ opcode instanceof Opcode::VariableAddress and
+ resultType = getTypeForGLValue(expr.getType())
+ or
+ tag = InitializerStoreTag() and
+ opcode instanceof Opcode::Uninitialized and
+ resultType = getTypeForPRValue(expr.getType())
+ or
+ tag = LoadTag() and
+ opcode instanceof Opcode::Load and
+ resultType = getTypeForPRValue(expr.getType())
+ }
+
+ override Instruction getInstructionRegisterOperand(InstructionTag tag, OperandTag operandTag) {
+ tag = InitializerStoreTag() and
+ operandTag instanceof AddressOperandTag and
+ result = getInstruction(InitializerVariableAddressTag())
+ or
+ tag = LoadTag() and
+ (
+ operandTag instanceof AddressOperandTag and
+ result = getInstruction(InitializerVariableAddressTag())
+ )
+ }
+
+ override IRVariable getInstructionVariable(InstructionTag tag) {
+ (
+ tag = InitializerVariableAddressTag() or
+ tag = InitializerStoreTag()
+ ) and
+ result = getTempVariable(LambdaTempVar())
+ }
+
+ override predicate hasTempVariable(TempVariableTag tag, CppType type) {
+ tag = LambdaTempVar() and
+ type = getTypeForPRValue(expr.getType())
+ }
+
+ final override Instruction getTargetAddress() {
+ result = getInstruction(InitializerVariableAddressTag())
+ }
+
+ final override Type getTargetType() { result = expr.getType() }
+
+ private predicate hasInitializer() { exists(getInitialization()) }
+
+ private TranslatedInitialization getInitialization() {
+ result = getTranslatedInitialization(expr.getChild(0).getFullyConverted())
+ }
+}
+
+/**
+ * The IR translation of `StmtExpr` (the GNU statement expression extension to C/C++), such as
+ * ``` ({ doSomething(); a + b; })```
+ */
+class TranslatedStmtExpr extends TranslatedNonConstantExpr {
+ override StmtExpr expr;
+
+ final override Instruction getFirstInstruction() { result = getStmt().getFirstInstruction() }
+
+ final override TranslatedElement getChild(int id) { id = 0 and result = getStmt() }
+
+ override Instruction getInstructionSuccessor(InstructionTag tag, EdgeKind kind) {
+ tag instanceof OnlyInstructionTag and
+ kind instanceof GotoEdge and
+ result = getParent().getChildSuccessor(this)
+ }
+
+ override Instruction getChildSuccessor(TranslatedElement child) {
+ child = getStmt() and
+ result = getInstruction(OnlyInstructionTag())
+ }
+
+ override predicate hasInstruction(Opcode opcode, InstructionTag tag, CppType resultType) {
+ opcode instanceof Opcode::CopyValue and
+ tag instanceof OnlyInstructionTag and
+ resultType = getResultType()
+ }
+
+ override Instruction getResult() { result = getInstruction(OnlyInstructionTag()) }
+
+ override Instruction getInstructionRegisterOperand(InstructionTag tag, OperandTag operandTag) {
+ tag instanceof OnlyInstructionTag and
+ operandTag instanceof UnaryOperandTag and
+ result = getTranslatedExpr(expr.getResultExpr().getFullyConverted()).getResult()
+ }
+
+ TranslatedStmt getStmt() { result = getTranslatedStmt(expr.getStmt()) }
+}
+
+class TranslatedErrorExpr extends TranslatedSingleInstructionExpr {
+ override ErrorExpr expr;
+
+ final override Instruction getFirstInstruction() { result = getInstruction(OnlyInstructionTag()) }
+
+ final override TranslatedElement getChild(int id) { none() }
+
+ final override Instruction getInstructionSuccessor(InstructionTag tag, EdgeKind kind) {
+ tag = OnlyInstructionTag() and
+ result = getParent().getChildSuccessor(this) and
+ kind instanceof GotoEdge
+ }
+
+ final override Instruction getChildSuccessor(TranslatedElement child) { none() }
+
+ final override Instruction getInstructionRegisterOperand(InstructionTag tag, OperandTag operandTag) {
+ none()
+ }
+
+ final override Opcode getOpcode() { result instanceof Opcode::Error }
+}
+
+/**
+ * Holds if the translation of `expr` will not directly generate any
+ * `Instruction` for use as result. For such instructions we can synthesize a
+ * `CopyValue` instruction to ensure that there is a 1-to-1 mapping between
+ * expressions and result-bearing instructions.
+ */
+// This should ideally be a dispatch predicate on TranslatedNonConstantExpr,
+// but it doesn't look monotonic to QL.
+predicate exprNeedsCopyIfNotLoaded(Expr expr) {
+ (
+ expr instanceof AssignExpr
+ or
+ expr instanceof AssignOperation and
+ not expr.isPRValueCategory() // is C++
+ or
+ expr instanceof PrefixCrementOperation and
+ not expr.isPRValueCategory() // is C++
+ or
+ // Because the load is on the `e` in `e++`.
+ expr instanceof PostfixCrementOperation
+ or
+ expr instanceof PointerDereferenceExpr
+ or
+ expr instanceof AddressOfExpr
+ or
+ expr instanceof BuiltInOperationBuiltInAddressOf
+ or
+ // No case for ParenthesisExpr to avoid getting too many instructions
+ expr instanceof ReferenceDereferenceExpr
+ or
+ expr instanceof ReferenceToExpr
+ or
+ expr instanceof CommaExpr
+ or
+ expr instanceof ConditionDeclExpr
+ // TODO: simplify TranslatedStmtExpr too
+ ) and
+ not exprImmediatelyDiscarded(expr)
+}
+
+/**
+ * Holds if `expr` is immediately discarded. Such expressions do not need a
+ * `CopyValue` because it's unlikely that anyone is interested in their value.
+ */
+private predicate exprImmediatelyDiscarded(Expr expr) {
+ exists(ExprStmt s |
+ s = expr.getParent() and
+ not exists(StmtExpr se | s = se.getStmt().(BlockStmt).getLastStmt())
+ )
+ or
+ exists(CommaExpr c | c.getLeftOperand() = expr)
+ or
+ exists(ForStmt for | for.getUpdate() = expr)
+}
+
+/**
+ * The IR translation of an `__assume` expression. We currently translate these as `NoOp`. In the
+ * future, we will probably want to do something better. At a minimum, we can model `__assume(0)` as
+ * `Unreached`.
+ */
+class TranslatedAssumeExpr extends TranslatedSingleInstructionExpr {
+ override AssumeExpr expr;
+
+ final override Opcode getOpcode() { result instanceof Opcode::NoOp }
+
+ final override Instruction getFirstInstruction() { result = getInstruction(OnlyInstructionTag()) }
+
+ final override TranslatedElement getChild(int id) { none() }
+
+ final override Instruction getInstructionSuccessor(InstructionTag tag, EdgeKind kind) {
+ tag = OnlyInstructionTag() and
+ result = getParent().getChildSuccessor(this) and
+ kind instanceof GotoEdge
+ }
+
+ final override Instruction getChildSuccessor(TranslatedElement child) { none() }
+}
diff --git a/repo-tests/codeql/cpp/ql/lib/semmle/code/cpp/ir/implementation/raw/internal/TranslatedFunction.qll b/repo-tests/codeql/cpp/ql/lib/semmle/code/cpp/ir/implementation/raw/internal/TranslatedFunction.qll
new file mode 100644
index 00000000000..2a0b58ce96a
--- /dev/null
+++ b/repo-tests/codeql/cpp/ql/lib/semmle/code/cpp/ir/implementation/raw/internal/TranslatedFunction.qll
@@ -0,0 +1,795 @@
+private import cpp
+import semmle.code.cpp.ir.implementation.raw.IR
+private import semmle.code.cpp.ir.implementation.Opcode
+private import semmle.code.cpp.ir.internal.CppType
+private import semmle.code.cpp.ir.internal.IRUtilities
+private import semmle.code.cpp.ir.implementation.internal.OperandTag
+private import semmle.code.cpp.ir.internal.TempVariableTag
+private import InstructionTag
+private import TranslatedElement
+private import TranslatedExpr
+private import TranslatedInitialization
+private import TranslatedStmt
+private import VarArgs
+
+/**
+ * Gets the `TranslatedFunction` that represents function `func`.
+ */
+TranslatedFunction getTranslatedFunction(Function func) { result.getAST() = func }
+
+/**
+ * Gets the size, in bytes, of the variable used to represent the `...` parameter in a varargs
+ * function. This is determined by finding the total size of all of the arguments passed to the
+ * `...` in each call in the program, and choosing the maximum of those, with a minimum of 8 bytes.
+ */
+private int getEllipsisVariableByteSize() {
+ result =
+ max(int variableSize |
+ variableSize =
+ max(Call call, int callSize |
+ callSize =
+ sum(int argIndex |
+ isEllipsisArgumentIndex(call, argIndex)
+ |
+ call.getArgument(argIndex).getType().getSize()
+ )
+ |
+ callSize
+ )
+ or
+ variableSize = 8
+ |
+ variableSize
+ )
+}
+
+CppType getEllipsisVariablePRValueType() {
+ result = getUnknownOpaqueType(getEllipsisVariableByteSize())
+}
+
+CppType getEllipsisVariableGLValueType() { result = getTypeForGLValue(any(UnknownType t)) }
+
+/**
+ * Holds if the function returns a value, as opposed to returning `void`.
+ */
+predicate hasReturnValue(Function func) { not func.getUnspecifiedType() instanceof VoidType }
+
+/**
+ * Represents the IR translation of a function. This is the root elements for
+ * all other elements associated with this function.
+ */
+class TranslatedFunction extends TranslatedElement, TTranslatedFunction {
+ Function func;
+
+ TranslatedFunction() { this = TTranslatedFunction(func) }
+
+ final override string toString() { result = func.toString() }
+
+ final override Locatable getAST() { result = func }
+
+ /**
+ * Gets the function being translated.
+ */
+ final override Function getFunction() { result = func }
+
+ final override TranslatedElement getChild(int id) {
+ id = -5 and result = getReadEffects()
+ or
+ id = -4 and result = getConstructorInitList()
+ or
+ id = -3 and result = getBody()
+ or
+ id = -2 and result = getDestructorDestructionList()
+ or
+ id >= -1 and result = getParameter(id)
+ }
+
+ final private TranslatedConstructorInitList getConstructorInitList() {
+ result = getTranslatedConstructorInitList(func)
+ }
+
+ final private TranslatedDestructorDestructionList getDestructorDestructionList() {
+ result = getTranslatedDestructorDestructionList(func)
+ }
+
+ final private TranslatedStmt getBody() { result = getTranslatedStmt(func.getEntryPoint()) }
+
+ final private TranslatedReadEffects getReadEffects() { result = getTranslatedReadEffects(func) }
+
+ final private TranslatedParameter getParameter(int index) {
+ result = getTranslatedThisParameter(func) and
+ index = -1
+ or
+ result = getTranslatedParameter(func.getParameter(index))
+ or
+ index = getEllipsisParameterIndexForFunction(func) and
+ result = getTranslatedEllipsisParameter(func)
+ }
+
+ final override Instruction getFirstInstruction() { result = getInstruction(EnterFunctionTag()) }
+
+ final override Instruction getInstructionSuccessor(InstructionTag tag, EdgeKind kind) {
+ kind instanceof GotoEdge and
+ (
+ tag = EnterFunctionTag() and
+ result = getInstruction(AliasedDefinitionTag())
+ or
+ tag = AliasedDefinitionTag() and
+ result = getInstruction(InitializeNonLocalTag())
+ or
+ (
+ tag = InitializeNonLocalTag() and
+ if exists(getThisType())
+ then result = getParameter(-1).getFirstInstruction()
+ else
+ if exists(getParameter(0))
+ then result = getParameter(0).getFirstInstruction()
+ else result = getBody().getFirstInstruction()
+ )
+ or
+ tag = ReturnValueAddressTag() and
+ result = getInstruction(ReturnTag())
+ or
+ tag = ReturnTag() and
+ result = getInstruction(AliasedUseTag())
+ or
+ tag = UnwindTag() and
+ result = getInstruction(AliasedUseTag())
+ or
+ tag = AliasedUseTag() and
+ result = getInstruction(ExitFunctionTag())
+ )
+ }
+
+ final override Instruction getChildSuccessor(TranslatedElement child) {
+ exists(int paramIndex |
+ child = getParameter(paramIndex) and
+ if
+ exists(func.getParameter(paramIndex + 1)) or
+ getEllipsisParameterIndexForFunction(func) = paramIndex + 1
+ then result = getParameter(paramIndex + 1).getFirstInstruction()
+ else result = getConstructorInitList().getFirstInstruction()
+ )
+ or
+ child = getConstructorInitList() and
+ result = getBody().getFirstInstruction()
+ or
+ child = getBody() and
+ result = getReturnSuccessorInstruction()
+ or
+ child = getDestructorDestructionList() and
+ result = getReadEffects().getFirstInstruction()
+ or
+ child = getReadEffects() and
+ if hasReturnValue()
+ then result = getInstruction(ReturnValueAddressTag())
+ else result = getInstruction(ReturnTag())
+ }
+
+ final override predicate hasInstruction(Opcode opcode, InstructionTag tag, CppType resultType) {
+ (
+ tag = EnterFunctionTag() and
+ opcode instanceof Opcode::EnterFunction and
+ resultType = getVoidType()
+ or
+ tag = AliasedDefinitionTag() and
+ opcode instanceof Opcode::AliasedDefinition and
+ resultType = getUnknownType()
+ or
+ tag = InitializeNonLocalTag() and
+ opcode instanceof Opcode::InitializeNonLocal and
+ resultType = getUnknownType()
+ or
+ tag = ReturnValueAddressTag() and
+ opcode instanceof Opcode::VariableAddress and
+ resultType = getTypeForGLValue(getReturnType()) and
+ hasReturnValue()
+ or
+ (
+ tag = ReturnTag() and
+ resultType = getVoidType() and
+ if hasReturnValue()
+ then opcode instanceof Opcode::ReturnValue
+ else opcode instanceof Opcode::ReturnVoid
+ )
+ or
+ tag = UnwindTag() and
+ opcode instanceof Opcode::Unwind and
+ resultType = getVoidType() and
+ (
+ // Only generate the `Unwind` instruction if there is any exception
+ // handling present in the function.
+ exists(TryStmt try | try.getEnclosingFunction() = func) or
+ exists(ThrowExpr throw | throw.getEnclosingFunction() = func)
+ )
+ or
+ tag = AliasedUseTag() and
+ opcode instanceof Opcode::AliasedUse and
+ resultType = getVoidType()
+ or
+ tag = ExitFunctionTag() and
+ opcode instanceof Opcode::ExitFunction and
+ resultType = getVoidType()
+ )
+ }
+
+ final override Instruction getExceptionSuccessorInstruction() {
+ result = getInstruction(UnwindTag())
+ }
+
+ final override Instruction getInstructionRegisterOperand(InstructionTag tag, OperandTag operandTag) {
+ tag = ReturnTag() and
+ hasReturnValue() and
+ operandTag instanceof AddressOperandTag and
+ result = getInstruction(ReturnValueAddressTag())
+ }
+
+ final override CppType getInstructionMemoryOperandType(
+ InstructionTag tag, TypedOperandTag operandTag
+ ) {
+ tag = ReturnTag() and
+ hasReturnValue() and
+ operandTag instanceof LoadOperandTag and
+ result = getTypeForPRValue(getReturnType())
+ or
+ tag = AliasedUseTag() and
+ operandTag instanceof SideEffectOperandTag and
+ result = getUnknownType()
+ }
+
+ final override IRVariable getInstructionVariable(InstructionTag tag) {
+ tag = ReturnValueAddressTag() and
+ result = getReturnVariable()
+ }
+
+ final override predicate needsUnknownOpaqueType(int byteSize) {
+ byteSize = getEllipsisVariableByteSize()
+ }
+
+ final override predicate hasTempVariable(TempVariableTag tag, CppType type) {
+ tag = ReturnValueTempVar() and
+ hasReturnValue() and
+ type = getTypeForPRValue(getReturnType())
+ or
+ tag = EllipsisTempVar() and
+ func.isVarargs() and
+ type = getEllipsisVariablePRValueType()
+ or
+ tag = ThisTempVar() and
+ type = getTypeForGLValue(getThisType())
+ }
+
+ /**
+ * Gets the instruction to which control should flow after a `return`
+ * statement.
+ */
+ final Instruction getReturnSuccessorInstruction() {
+ result = getDestructorDestructionList().getFirstInstruction()
+ }
+
+ /**
+ * Gets the variable that represents the return value of this function.
+ */
+ final IRReturnVariable getReturnVariable() {
+ result = getIRTempVariable(func, ReturnValueTempVar())
+ }
+
+ /**
+ * Get the variable that represents the `...` parameter, if any.
+ */
+ final IREllipsisVariable getEllipsisVariable() { result.getEnclosingFunction() = func }
+
+ /**
+ * Gets the variable that represents the `this` pointer for this function, if any.
+ */
+ final IRThisVariable getThisVariable() { result = getIRTempVariable(func, ThisTempVar()) }
+
+ /**
+ * Holds if the function has a non-`void` return type.
+ */
+ final predicate hasReturnValue() { hasReturnValue(func) }
+
+ /**
+ * Gets the single `InitializeThis` instruction for this function. Holds only
+ * if the function is an instance member function, constructor, or destructor.
+ */
+ final Instruction getInitializeThisInstruction() {
+ result = getTranslatedThisParameter(func).getInstruction(InitializerStoreTag())
+ }
+
+ /**
+ * Gets the type pointed to by the `this` pointer for this function (i.e. `*this`).
+ * Holds only if the function is an instance member function, constructor, or destructor.
+ */
+ final Type getThisType() {
+ exists(MemberFunction mfunc |
+ mfunc = func and
+ not mfunc.isStatic() and
+ result = mfunc.getDeclaringType()
+ )
+ }
+
+ /**
+ * Holds if this function defines or accesses variable `var` with type `type`. This includes all
+ * parameters and local variables, plus any global variables or static data members that are
+ * directly accessed by the function.
+ */
+ final predicate hasUserVariable(Variable var, CppType type) {
+ (
+ (
+ var instanceof GlobalOrNamespaceVariable
+ or
+ var instanceof MemberVariable and not var instanceof Field
+ ) and
+ exists(VariableAccess access |
+ access.getTarget() = var and
+ access.getEnclosingFunction() = func
+ )
+ or
+ var.(LocalScopeVariable).getFunction() = func
+ or
+ var.(Parameter).getCatchBlock().getEnclosingFunction() = func
+ ) and
+ type = getTypeForPRValue(getVariableType(var))
+ }
+
+ final Type getReturnType() { result = func.getType() }
+}
+
+/**
+ * Gets the `TranslatedThisParameter` for function `func`, if one exists.
+ */
+TranslatedThisParameter getTranslatedThisParameter(Function func) { result.getFunction() = func }
+
+/**
+ * Gets the `TranslatedPositionalParameter` that represents parameter `param`.
+ */
+TranslatedPositionalParameter getTranslatedParameter(Parameter param) { result.getAST() = param }
+
+/**
+ * Gets the `TranslatedEllipsisParameter` for function `func`, if one exists.
+ */
+TranslatedEllipsisParameter getTranslatedEllipsisParameter(Function func) {
+ result.getFunction() = func
+}
+
+/**
+ * The IR translation of a parameter to a function. This can be either a user-declared parameter
+ * (`TranslatedPositionParameter`), the synthesized parameter used to represent `this`, or the
+ * synthesized parameter used to represent a `...` in a varargs function
+ * (`TranslatedEllipsisParameter`).
+ */
+abstract class TranslatedParameter extends TranslatedElement {
+ final override TranslatedElement getChild(int id) { none() }
+
+ final override Instruction getFirstInstruction() {
+ result = getInstruction(InitializerVariableAddressTag())
+ }
+
+ final override Instruction getInstructionSuccessor(InstructionTag tag, EdgeKind kind) {
+ kind instanceof GotoEdge and
+ (
+ tag = InitializerVariableAddressTag() and
+ result = getInstruction(InitializerStoreTag())
+ or
+ tag = InitializerStoreTag() and
+ if hasIndirection()
+ then result = getInstruction(InitializerIndirectAddressTag())
+ else result = getParent().getChildSuccessor(this)
+ or
+ tag = InitializerIndirectAddressTag() and
+ result = getInstruction(InitializerIndirectStoreTag())
+ or
+ tag = InitializerIndirectStoreTag() and
+ result = getParent().getChildSuccessor(this)
+ )
+ }
+
+ final override Instruction getChildSuccessor(TranslatedElement child) { none() }
+
+ final override predicate hasInstruction(Opcode opcode, InstructionTag tag, CppType resultType) {
+ tag = InitializerVariableAddressTag() and
+ opcode instanceof Opcode::VariableAddress and
+ resultType = getGLValueType()
+ or
+ tag = InitializerStoreTag() and
+ opcode instanceof Opcode::InitializeParameter and
+ resultType = getPRValueType()
+ or
+ hasIndirection() and
+ tag = InitializerIndirectAddressTag() and
+ opcode instanceof Opcode::Load and
+ resultType = getPRValueType()
+ or
+ hasIndirection() and
+ tag = InitializerIndirectStoreTag() and
+ opcode instanceof Opcode::InitializeIndirection and
+ resultType = getInitializationResultType()
+ }
+
+ final override IRVariable getInstructionVariable(InstructionTag tag) {
+ (
+ tag = InitializerStoreTag() or
+ tag = InitializerVariableAddressTag() or
+ tag = InitializerIndirectStoreTag()
+ ) and
+ result = getIRVariable()
+ }
+
+ final override Instruction getInstructionRegisterOperand(InstructionTag tag, OperandTag operandTag) {
+ tag = InitializerStoreTag() and
+ (
+ operandTag instanceof AddressOperandTag and
+ result = getInstruction(InitializerVariableAddressTag())
+ )
+ or
+ // this feels a little strange, but I think it's the best we can do
+ tag = InitializerIndirectAddressTag() and
+ (
+ operandTag instanceof AddressOperandTag and
+ result = getInstruction(InitializerVariableAddressTag())
+ )
+ or
+ tag = InitializerIndirectStoreTag() and
+ operandTag instanceof AddressOperandTag and
+ result = getInstruction(InitializerIndirectAddressTag())
+ }
+
+ abstract predicate hasIndirection();
+
+ abstract CppType getGLValueType();
+
+ abstract CppType getPRValueType();
+
+ abstract CppType getInitializationResultType();
+
+ abstract IRAutomaticVariable getIRVariable();
+}
+
+/**
+ * The IR translation of the synthesized parameter used to represent the `...` in a varargs
+ * function.
+ */
+class TranslatedThisParameter extends TranslatedParameter, TTranslatedThisParameter {
+ Function func;
+
+ TranslatedThisParameter() { this = TTranslatedThisParameter(func) }
+
+ final override string toString() { result = "this" }
+
+ final override Locatable getAST() { result = func }
+
+ final override Function getFunction() { result = func }
+
+ final override predicate hasIndirection() { any() }
+
+ final override CppType getGLValueType() { result = getTypeForGLValue(any(UnknownType t)) }
+
+ final override CppType getPRValueType() {
+ result = getTypeForGLValue(getTranslatedFunction(func).getThisType())
+ }
+
+ final override CppType getInitializationResultType() {
+ result = getTypeForPRValue(getTranslatedFunction(func).getThisType())
+ }
+
+ final override IRThisVariable getIRVariable() {
+ result = getTranslatedFunction(func).getThisVariable()
+ }
+}
+
+/**
+ * Represents the IR translation of a function parameter, including the
+ * initialization of that parameter with the incoming argument.
+ */
+class TranslatedPositionalParameter extends TranslatedParameter, TTranslatedParameter {
+ Parameter param;
+
+ TranslatedPositionalParameter() { this = TTranslatedParameter(param) }
+
+ final override string toString() { result = param.toString() }
+
+ final override Locatable getAST() { result = param }
+
+ final override Function getFunction() {
+ result = param.getFunction() or
+ result = param.getCatchBlock().getEnclosingFunction()
+ }
+
+ final override predicate hasIndirection() {
+ exists(Type t | t = param.getUnspecifiedType() |
+ t instanceof ArrayType or
+ t instanceof PointerType or
+ t instanceof ReferenceType
+ )
+ }
+
+ final override CppType getGLValueType() { result = getTypeForGLValue(getVariableType(param)) }
+
+ final override CppType getPRValueType() { result = getTypeForPRValue(getVariableType(param)) }
+
+ final override CppType getInitializationResultType() { result = getUnknownType() }
+
+ final override IRAutomaticUserVariable getIRVariable() {
+ result = getIRUserVariable(getFunction(), param)
+ }
+}
+
+/**
+ * The IR translation of the synthesized parameter used to represent the `...` in a varargs
+ * function.
+ */
+class TranslatedEllipsisParameter extends TranslatedParameter, TTranslatedEllipsisParameter {
+ Function func;
+
+ TranslatedEllipsisParameter() { this = TTranslatedEllipsisParameter(func) }
+
+ final override string toString() { result = "..." }
+
+ final override Locatable getAST() { result = func }
+
+ final override Function getFunction() { result = func }
+
+ final override predicate hasIndirection() { any() }
+
+ final override CppType getGLValueType() { result = getEllipsisVariableGLValueType() }
+
+ final override CppType getPRValueType() { result = getEllipsisVariablePRValueType() }
+
+ final override CppType getInitializationResultType() { result = getUnknownType() }
+
+ final override IREllipsisVariable getIRVariable() {
+ result = getTranslatedFunction(func).getEllipsisVariable()
+ }
+}
+
+private TranslatedConstructorInitList getTranslatedConstructorInitList(Function func) {
+ result.getAST() = func
+}
+
+/**
+ * Represents the IR translation of a constructor initializer list. To simplify
+ * the implementation of `TranslatedFunction`, a `TranslatedConstructorInitList`
+ * exists for every function, not just for constructors. Of course, only the
+ * instances for constructors can actually contain initializers.
+ */
+class TranslatedConstructorInitList extends TranslatedElement, InitializationContext,
+ TTranslatedConstructorInitList {
+ Function func;
+
+ TranslatedConstructorInitList() { this = TTranslatedConstructorInitList(func) }
+
+ override string toString() { result = "ctor init: " + func.toString() }
+
+ override Locatable getAST() { result = func }
+
+ override TranslatedElement getChild(int id) {
+ exists(ConstructorFieldInit fieldInit |
+ fieldInit = func.(Constructor).getInitializer(id) and
+ result = getTranslatedConstructorFieldInitialization(fieldInit)
+ )
+ or
+ exists(ConstructorBaseInit baseInit |
+ baseInit = func.(Constructor).getInitializer(id) and
+ result = getTranslatedConstructorBaseInit(baseInit)
+ )
+ }
+
+ override Instruction getFirstInstruction() {
+ if exists(getChild(0))
+ then result = getChild(0).getFirstInstruction()
+ else result = getParent().getChildSuccessor(this)
+ }
+
+ override predicate hasInstruction(Opcode opcode, InstructionTag tag, CppType resultType) {
+ none()
+ }
+
+ override Function getFunction() { result = func }
+
+ override Instruction getInstructionSuccessor(InstructionTag tag, EdgeKind kind) { none() }
+
+ override Instruction getChildSuccessor(TranslatedElement child) {
+ exists(int id |
+ child = getChild(id) and
+ if exists(getChild(id + 1))
+ then result = getChild(id + 1).getFirstInstruction()
+ else result = getParent().getChildSuccessor(this)
+ )
+ }
+
+ override Instruction getTargetAddress() {
+ result = getTranslatedFunction(func).getInitializeThisInstruction()
+ }
+
+ override Type getTargetType() { result = getTranslatedFunction(func).getThisType() }
+}
+
+private TranslatedDestructorDestructionList getTranslatedDestructorDestructionList(Function func) {
+ result.getAST() = func
+}
+
+/**
+ * Represents the IR translation of a destructor's implicit calls to destructors
+ * for fields and base classes. To simplify the implementation of `TranslatedFunction`,
+ * a `TranslatedDestructorDestructionList` exists for every function, not just for
+ * destructors. Of course, only the instances for destructors can actually contain
+ * destructions.
+ */
+class TranslatedDestructorDestructionList extends TranslatedElement,
+ TTranslatedDestructorDestructionList {
+ Function func;
+
+ TranslatedDestructorDestructionList() { this = TTranslatedDestructorDestructionList(func) }
+
+ override string toString() { result = "dtor destruction: " + func.toString() }
+
+ override Locatable getAST() { result = func }
+
+ override TranslatedElement getChild(int id) {
+ exists(DestructorFieldDestruction fieldDestruction |
+ fieldDestruction = func.(Destructor).getDestruction(id) and
+ result = getTranslatedExpr(fieldDestruction)
+ )
+ or
+ exists(DestructorBaseDestruction baseDestruction |
+ baseDestruction = func.(Destructor).getDestruction(id) and
+ result = getTranslatedDestructorBaseDestruction(baseDestruction)
+ )
+ }
+
+ override Instruction getFirstInstruction() {
+ if exists(getChild(0))
+ then result = getChild(0).getFirstInstruction()
+ else result = getParent().getChildSuccessor(this)
+ }
+
+ override predicate hasInstruction(Opcode opcode, InstructionTag tag, CppType resultType) {
+ none()
+ }
+
+ override Function getFunction() { result = func }
+
+ override Instruction getInstructionSuccessor(InstructionTag tag, EdgeKind kind) { none() }
+
+ override Instruction getChildSuccessor(TranslatedElement child) {
+ exists(int id |
+ child = getChild(id) and
+ if exists(getChild(id + 1))
+ then result = getChild(id + 1).getFirstInstruction()
+ else result = getParent().getChildSuccessor(this)
+ )
+ }
+}
+
+TranslatedReadEffects getTranslatedReadEffects(Function func) { result.getAST() = func }
+
+class TranslatedReadEffects extends TranslatedElement, TTranslatedReadEffects {
+ Function func;
+
+ TranslatedReadEffects() { this = TTranslatedReadEffects(func) }
+
+ override Locatable getAST() { result = func }
+
+ override Function getFunction() { result = func }
+
+ override string toString() { result = "read effects: " + func.toString() }
+
+ override TranslatedElement getChild(int id) {
+ result = getTranslatedThisReadEffect(func) and
+ id = -1
+ or
+ result = getTranslatedParameterReadEffect(func.getParameter(id))
+ }
+
+ override Instruction getFirstInstruction() {
+ if exists(getAChild())
+ then
+ result =
+ min(TranslatedElement child, int id | child = getChild(id) | child order by id)
+ .getFirstInstruction()
+ else result = getParent().getChildSuccessor(this)
+ }
+
+ override Instruction getChildSuccessor(TranslatedElement child) {
+ exists(int id | child = getChild(id) |
+ if exists(TranslatedReadEffect child2, int id2 | id2 > id and child2 = getChild(id2))
+ then
+ result =
+ min(TranslatedReadEffect child2, int id2 |
+ child2 = getChild(id2) and id2 > id
+ |
+ child2 order by id2
+ ).getFirstInstruction()
+ else result = getParent().getChildSuccessor(this)
+ )
+ }
+
+ override predicate hasInstruction(Opcode opcode, InstructionTag tag, CppType resultType) {
+ none()
+ }
+
+ override Instruction getInstructionSuccessor(InstructionTag tag, EdgeKind kind) { none() }
+}
+
+private TranslatedThisReadEffect getTranslatedThisReadEffect(Function func) {
+ result.getAST() = func
+}
+
+private TranslatedParameterReadEffect getTranslatedParameterReadEffect(Parameter param) {
+ result.getAST() = param
+}
+
+abstract class TranslatedReadEffect extends TranslatedElement {
+ override TranslatedElement getChild(int id) { none() }
+
+ override Instruction getChildSuccessor(TranslatedElement child) { none() }
+
+ override Instruction getInstructionSuccessor(InstructionTag tag, EdgeKind kind) {
+ tag = OnlyInstructionTag() and
+ kind = EdgeKind::gotoEdge() and
+ result = getParent().getChildSuccessor(this)
+ }
+
+ override Instruction getFirstInstruction() { result = getInstruction(OnlyInstructionTag()) }
+
+ override predicate hasInstruction(Opcode opcode, InstructionTag tag, CppType resultType) {
+ opcode instanceof Opcode::ReturnIndirection and
+ tag = OnlyInstructionTag() and
+ resultType = getVoidType()
+ }
+
+ final override CppType getInstructionMemoryOperandType(
+ InstructionTag tag, TypedOperandTag operandTag
+ ) {
+ tag = OnlyInstructionTag() and
+ operandTag = sideEffectOperand() and
+ result = getUnknownType()
+ }
+}
+
+class TranslatedThisReadEffect extends TranslatedReadEffect, TTranslatedThisReadEffect {
+ Function func;
+
+ TranslatedThisReadEffect() { this = TTranslatedThisReadEffect(func) }
+
+ override Locatable getAST() { result = func }
+
+ override Function getFunction() { result = func }
+
+ override string toString() { result = "read effect: this" }
+
+ final override Instruction getInstructionRegisterOperand(InstructionTag tag, OperandTag operandTag) {
+ tag = OnlyInstructionTag() and
+ operandTag = addressOperand() and
+ result = getTranslatedThisParameter(func).getInstruction(InitializerIndirectAddressTag())
+ }
+
+ final override IRVariable getInstructionVariable(InstructionTag tag) {
+ tag = OnlyInstructionTag() and
+ result = getTranslatedFunction(func).getThisVariable()
+ }
+}
+
+class TranslatedParameterReadEffect extends TranslatedReadEffect, TTranslatedParameterReadEffect {
+ Parameter param;
+
+ TranslatedParameterReadEffect() { this = TTranslatedParameterReadEffect(param) }
+
+ override Locatable getAST() { result = param }
+
+ override string toString() { result = "read effect: " + param.toString() }
+
+ override Function getFunction() { result = param.getFunction() }
+
+ final override Instruction getInstructionRegisterOperand(InstructionTag tag, OperandTag operandTag) {
+ tag = OnlyInstructionTag() and
+ operandTag = addressOperand() and
+ result = getTranslatedParameter(param).getInstruction(InitializerIndirectAddressTag())
+ }
+
+ final override IRVariable getInstructionVariable(InstructionTag tag) {
+ tag = OnlyInstructionTag() and
+ result = getIRUserVariable(getFunction(), param)
+ }
+}
diff --git a/repo-tests/codeql/cpp/ql/lib/semmle/code/cpp/ir/implementation/raw/internal/TranslatedInitialization.qll b/repo-tests/codeql/cpp/ql/lib/semmle/code/cpp/ir/implementation/raw/internal/TranslatedInitialization.qll
new file mode 100644
index 00000000000..4b6538654db
--- /dev/null
+++ b/repo-tests/codeql/cpp/ql/lib/semmle/code/cpp/ir/implementation/raw/internal/TranslatedInitialization.qll
@@ -0,0 +1,919 @@
+private import cpp
+private import semmle.code.cpp.ir.implementation.Opcode
+private import semmle.code.cpp.ir.implementation.internal.OperandTag
+private import semmle.code.cpp.ir.internal.CppType
+private import InstructionTag
+private import TranslatedElement
+private import TranslatedExpr
+private import TranslatedFunction
+
+/**
+ * Gets the `TranslatedInitialization` for the expression `expr`.
+ */
+TranslatedInitialization getTranslatedInitialization(Expr expr) { result.getExpr() = expr }
+
+/**
+ * Base class for any `TranslatedElement` that has an initialization as a child.
+ * Provides the child with the address and type of the location to be
+ * initialized.
+ */
+abstract class InitializationContext extends TranslatedElement {
+ /**
+ * Gets the instruction that produces the address of the location to be
+ * initialized.
+ */
+ abstract Instruction getTargetAddress();
+
+ /**
+ * Gets the type of the location to be initialized.
+ */
+ abstract Type getTargetType();
+}
+
+/**
+ * Base class for any element that initializes a stack variable. Examples include local variable
+ * declarations, `return` statements, and `throw` expressions.
+ */
+abstract class TranslatedVariableInitialization extends TranslatedElement, InitializationContext {
+ final override TranslatedElement getChild(int id) { id = 0 and result = getInitialization() }
+
+ final override Instruction getFirstInstruction() {
+ result = getInstruction(InitializerVariableAddressTag())
+ }
+
+ override predicate hasInstruction(Opcode opcode, InstructionTag tag, CppType resultType) {
+ tag = InitializerVariableAddressTag() and
+ opcode instanceof Opcode::VariableAddress and
+ resultType = getTypeForGLValue(getTargetType())
+ or
+ hasUninitializedInstruction() and
+ tag = InitializerStoreTag() and
+ opcode instanceof Opcode::Uninitialized and
+ resultType = getTypeForPRValue(getTargetType())
+ }
+
+ override Instruction getInstructionSuccessor(InstructionTag tag, EdgeKind kind) {
+ (
+ tag = InitializerVariableAddressTag() and
+ kind instanceof GotoEdge and
+ if hasUninitializedInstruction()
+ then result = getInstruction(InitializerStoreTag())
+ else result = getInitialization().getFirstInstruction()
+ )
+ or
+ hasUninitializedInstruction() and
+ kind instanceof GotoEdge and
+ tag = InitializerStoreTag() and
+ (
+ result = getInitialization().getFirstInstruction()
+ or
+ not exists(getInitialization()) and result = getInitializationSuccessor()
+ )
+ }
+
+ final override Instruction getChildSuccessor(TranslatedElement child) {
+ child = getInitialization() and result = getInitializationSuccessor()
+ }
+
+ override Instruction getInstructionRegisterOperand(InstructionTag tag, OperandTag operandTag) {
+ hasUninitializedInstruction() and
+ tag = InitializerStoreTag() and
+ operandTag instanceof AddressOperandTag and
+ result = getInstruction(InitializerVariableAddressTag())
+ }
+
+ final override IRVariable getInstructionVariable(InstructionTag tag) {
+ (
+ tag = InitializerVariableAddressTag()
+ or
+ hasUninitializedInstruction() and tag = InitializerStoreTag()
+ ) and
+ result = getIRVariable()
+ }
+
+ final override Instruction getTargetAddress() {
+ result = getInstruction(InitializerVariableAddressTag())
+ }
+
+ /**
+ * Get the initialization for the variable.
+ */
+ abstract TranslatedInitialization getInitialization();
+
+ /**
+ * Get the `IRVariable` to be initialized. This may be an `IRTempVariable`.
+ */
+ abstract IRVariable getIRVariable();
+
+ /**
+ * Gets the `Instruction` to be executed immediately after the initialization.
+ */
+ abstract Instruction getInitializationSuccessor();
+
+ /**
+ * Holds if this initialization requires an `Uninitialized` instruction to be emitted before
+ * evaluating the initializer.
+ */
+ final predicate hasUninitializedInstruction() {
+ (
+ not exists(getInitialization()) or
+ getInitialization() instanceof TranslatedListInitialization or
+ getInitialization() instanceof TranslatedConstructorInitialization or
+ getInitialization().(TranslatedStringLiteralInitialization).zeroInitRange(_, _)
+ ) and
+ // Variables with static or thread-local storage duration are zero-initialized at program startup.
+ getIRVariable() instanceof IRAutomaticVariable
+ }
+}
+
+/**
+ * Represents the IR translation of any initialization, whether from an
+ * initializer list or from a direct initializer.
+ */
+abstract class TranslatedInitialization extends TranslatedElement, TTranslatedInitialization {
+ Expr expr;
+
+ TranslatedInitialization() { this = TTranslatedInitialization(expr) }
+
+ final override string toString() { result = "init: " + expr.toString() }
+
+ final override Function getFunction() { result = expr.getEnclosingFunction() }
+
+ final override Locatable getAST() { result = expr }
+
+ /**
+ * Gets the expression that is doing the initialization.
+ */
+ final Expr getExpr() { result = expr }
+
+ /**
+ * Gets the initialization context that describes the location being
+ * initialized.
+ */
+ final InitializationContext getContext() { result = getParent() }
+
+ final TranslatedFunction getEnclosingFunction() {
+ result = getTranslatedFunction(expr.getEnclosingFunction())
+ }
+}
+
+/**
+ * Represents the IR translation of an initialization from an initializer list.
+ */
+abstract class TranslatedListInitialization extends TranslatedInitialization, InitializationContext {
+ override Instruction getFirstInstruction() {
+ result = getChild(0).getFirstInstruction()
+ or
+ not exists(getChild(0)) and result = getParent().getChildSuccessor(this)
+ }
+
+ override Instruction getChildSuccessor(TranslatedElement child) {
+ exists(int index |
+ child = getChild(index) and
+ if exists(getChild(index + 1))
+ then result = getChild(index + 1).getFirstInstruction()
+ else result = getParent().getChildSuccessor(this)
+ )
+ }
+
+ final override predicate hasInstruction(Opcode opcode, InstructionTag tag, CppType resultType) {
+ none()
+ }
+
+ final override Instruction getInstructionSuccessor(InstructionTag tag, EdgeKind kind) { none() }
+
+ override Instruction getTargetAddress() { result = getContext().getTargetAddress() }
+
+ override Type getTargetType() { result = getContext().getTargetType() }
+}
+
+/**
+ * Represents the IR translation of an initialization of a class object from an
+ * initializer list.
+ */
+class TranslatedClassListInitialization extends TranslatedListInitialization {
+ override ClassAggregateLiteral expr;
+
+ override TranslatedElement getChild(int id) {
+ exists(TranslatedFieldInitialization fieldInit |
+ result = fieldInit and
+ fieldInit = getTranslatedFieldInitialization(expr, _) and
+ fieldInit.getOrder() = id
+ )
+ }
+}
+
+/**
+ * Represents the IR translation of an initialization of an array from an
+ * initializer list.
+ */
+class TranslatedArrayListInitialization extends TranslatedListInitialization {
+ override ArrayOrVectorAggregateLiteral expr;
+
+ override TranslatedElement getChild(int id) {
+ // The children are in initialization order
+ result =
+ rank[id + 1](TranslatedElementInitialization init |
+ init.getInitList() = expr
+ |
+ init order by init.getElementIndex()
+ )
+ }
+}
+
+/**
+ * Represents the IR translation of an initialization from a single initializer
+ * expression.
+ */
+abstract class TranslatedDirectInitialization extends TranslatedInitialization {
+ TranslatedDirectInitialization() { not expr instanceof AggregateLiteral }
+
+ override TranslatedElement getChild(int id) { id = 0 and result = getInitializer() }
+
+ override Instruction getFirstInstruction() { result = getInitializer().getFirstInstruction() }
+
+ final TranslatedExpr getInitializer() { result = getTranslatedExpr(expr) }
+}
+
+/**
+ * Represents the IR translation of an initialization from a single initializer
+ * expression, where the initialization is performed via bitwise copy (as
+ * opposed to a constructor).
+ */
+class TranslatedSimpleDirectInitialization extends TranslatedDirectInitialization {
+ TranslatedSimpleDirectInitialization() {
+ not expr instanceof ConstructorCall and
+ not expr instanceof StringLiteral
+ }
+
+ override predicate hasInstruction(Opcode opcode, InstructionTag tag, CppType resultType) {
+ tag = InitializerStoreTag() and
+ opcode instanceof Opcode::Store and
+ resultType = getTypeForPRValue(getContext().getTargetType())
+ }
+
+ override Instruction getInstructionSuccessor(InstructionTag tag, EdgeKind kind) {
+ tag = InitializerStoreTag() and
+ result = getParent().getChildSuccessor(this) and
+ kind instanceof GotoEdge
+ }
+
+ override Instruction getChildSuccessor(TranslatedElement child) {
+ child = getInitializer() and result = getInstruction(InitializerStoreTag())
+ }
+
+ override Instruction getInstructionRegisterOperand(InstructionTag tag, OperandTag operandTag) {
+ tag = InitializerStoreTag() and
+ (
+ operandTag instanceof AddressOperandTag and
+ result = getContext().getTargetAddress()
+ or
+ operandTag instanceof StoreValueOperandTag and
+ result = getInitializer().getResult()
+ )
+ }
+}
+
+/**
+ * Represents the IR translation of an initialization of an array from a string
+ * literal.
+ */
+class TranslatedStringLiteralInitialization extends TranslatedDirectInitialization {
+ override StringLiteral expr;
+
+ override predicate hasInstruction(Opcode opcode, InstructionTag tag, CppType resultType) {
+ // Load the string literal to make it a prvalue of type `char[len]`
+ tag = InitializerLoadStringTag() and
+ opcode instanceof Opcode::Load and
+ resultType = getTypeForPRValue(expr.getType())
+ or
+ // Store the string into the target.
+ tag = InitializerStoreTag() and
+ opcode instanceof Opcode::Store and
+ resultType = getTypeForPRValue(expr.getType())
+ or
+ exists(int startIndex, int elementCount |
+ // If the initializer string isn't large enough to fill the target, then
+ // we have to generate another instruction sequence to store a constant
+ // zero into the remainder of the array.
+ zeroInitRange(startIndex, elementCount) and
+ (
+ // Create a constant zero whose size is the size of the remaining
+ // space in the target array.
+ tag = ZeroPadStringConstantTag() and
+ opcode instanceof Opcode::Constant and
+ resultType = getUnknownOpaqueType(elementCount * getElementType().getSize())
+ or
+ // The index of the first element to be zero initialized.
+ tag = ZeroPadStringElementIndexTag() and
+ opcode instanceof Opcode::Constant and
+ resultType = getIntType()
+ or
+ // Compute the address of the first element to be zero initialized.
+ tag = ZeroPadStringElementAddressTag() and
+ opcode instanceof Opcode::PointerAdd and
+ resultType = getTypeForGLValue(getElementType())
+ or
+ // Store the constant zero into the remainder of the string.
+ tag = ZeroPadStringStoreTag() and
+ opcode instanceof Opcode::Store and
+ resultType = getUnknownOpaqueType(elementCount * getElementType().getSize())
+ )
+ )
+ }
+
+ override Instruction getInstructionSuccessor(InstructionTag tag, EdgeKind kind) {
+ kind instanceof GotoEdge and
+ (
+ tag = InitializerLoadStringTag() and
+ result = getInstruction(InitializerStoreTag())
+ or
+ if zeroInitRange(_, _)
+ then (
+ tag = InitializerStoreTag() and
+ result = getInstruction(ZeroPadStringConstantTag())
+ or
+ tag = ZeroPadStringConstantTag() and
+ result = getInstruction(ZeroPadStringElementIndexTag())
+ or
+ tag = ZeroPadStringElementIndexTag() and
+ result = getInstruction(ZeroPadStringElementAddressTag())
+ or
+ tag = ZeroPadStringElementAddressTag() and
+ result = getInstruction(ZeroPadStringStoreTag())
+ or
+ tag = ZeroPadStringStoreTag() and
+ result = getParent().getChildSuccessor(this)
+ ) else (
+ tag = InitializerStoreTag() and
+ result = getParent().getChildSuccessor(this)
+ )
+ )
+ }
+
+ override Instruction getChildSuccessor(TranslatedElement child) {
+ child = getInitializer() and result = getInstruction(InitializerLoadStringTag())
+ }
+
+ override Instruction getInstructionRegisterOperand(InstructionTag tag, OperandTag operandTag) {
+ tag = InitializerLoadStringTag() and
+ (
+ operandTag instanceof AddressOperandTag and
+ result = getInitializer().getResult()
+ )
+ or
+ tag = InitializerStoreTag() and
+ (
+ operandTag instanceof AddressOperandTag and
+ result = getContext().getTargetAddress()
+ or
+ operandTag instanceof StoreValueOperandTag and
+ result = getInstruction(InitializerLoadStringTag())
+ )
+ or
+ tag = ZeroPadStringElementAddressTag() and
+ (
+ operandTag instanceof LeftOperandTag and
+ result = getContext().getTargetAddress()
+ or
+ operandTag instanceof RightOperandTag and
+ result = getInstruction(ZeroPadStringElementIndexTag())
+ )
+ or
+ tag = ZeroPadStringStoreTag() and
+ (
+ operandTag instanceof AddressOperandTag and
+ result = getInstruction(ZeroPadStringElementAddressTag())
+ or
+ operandTag instanceof StoreValueOperandTag and
+ result = getInstruction(ZeroPadStringConstantTag())
+ )
+ }
+
+ override int getInstructionElementSize(InstructionTag tag) {
+ tag = ZeroPadStringElementAddressTag() and
+ result = max(getElementType().getSize())
+ }
+
+ override string getInstructionConstantValue(InstructionTag tag) {
+ exists(int startIndex |
+ zeroInitRange(startIndex, _) and
+ (
+ tag = ZeroPadStringConstantTag() and
+ result = "0"
+ or
+ tag = ZeroPadStringElementIndexTag() and
+ result = startIndex.toString()
+ )
+ )
+ }
+
+ override predicate needsUnknownOpaqueType(int byteSize) {
+ exists(int elementCount |
+ zeroInitRange(_, elementCount) and
+ byteSize = elementCount * getElementType().getSize()
+ )
+ }
+
+ private Type getElementType() {
+ result = getContext().getTargetType().getUnspecifiedType().(ArrayType).getBaseType()
+ }
+
+ /**
+ * Holds if the `elementCount` array elements starting at `startIndex` must be
+ * zero initialized.
+ */
+ predicate zeroInitRange(int startIndex, int elementCount) {
+ exists(int targetCount |
+ startIndex = expr.getUnspecifiedType().(ArrayType).getArraySize() and
+ targetCount = getContext().getTargetType().getUnspecifiedType().(ArrayType).getArraySize() and
+ elementCount = targetCount - startIndex and
+ elementCount > 0
+ )
+ }
+}
+
+class TranslatedConstructorInitialization extends TranslatedDirectInitialization,
+ StructorCallContext {
+ override ConstructorCall expr;
+
+ override predicate hasInstruction(Opcode opcode, InstructionTag tag, CppType resultType) {
+ none()
+ }
+
+ override Instruction getInstructionSuccessor(InstructionTag tag, EdgeKind kind) { none() }
+
+ override Instruction getChildSuccessor(TranslatedElement child) {
+ child = getInitializer() and result = getParent().getChildSuccessor(this)
+ }
+
+ override Instruction getInstructionRegisterOperand(InstructionTag tag, OperandTag operandTag) {
+ none()
+ }
+
+ override Instruction getReceiver() { result = getContext().getTargetAddress() }
+}
+
+/**
+ * Gets the `TranslatedFieldInitialization` for field `field` within initializer
+ * list `initList`.
+ */
+TranslatedFieldInitialization getTranslatedFieldInitialization(
+ ClassAggregateLiteral initList, Field field
+) {
+ result.getAST() = initList and result.getField() = field
+}
+
+TranslatedFieldInitialization getTranslatedConstructorFieldInitialization(ConstructorFieldInit init) {
+ result.getAST() = init
+}
+
+/**
+ * Represents the IR translation of the initialization of a field from an
+ * element of an initializer list.
+ */
+abstract class TranslatedFieldInitialization extends TranslatedElement {
+ Expr ast;
+ Field field;
+
+ final override string toString() { result = ast.toString() + "." + field.toString() }
+
+ final override Locatable getAST() { result = ast }
+
+ final override Function getFunction() { result = ast.getEnclosingFunction() }
+
+ final override Instruction getFirstInstruction() { result = getInstruction(getFieldAddressTag()) }
+
+ /**
+ * Gets the zero-based index describing the order in which this field is to be
+ * initialized relative to the other fields in the class.
+ */
+ final int getOrder() { result = field.getInitializationOrder() }
+
+ override predicate hasInstruction(Opcode opcode, InstructionTag tag, CppType resultType) {
+ tag = getFieldAddressTag() and
+ opcode instanceof Opcode::FieldAddress and
+ resultType = getTypeForGLValue(field.getType())
+ }
+
+ override Instruction getInstructionRegisterOperand(InstructionTag tag, OperandTag operandTag) {
+ tag = getFieldAddressTag() and
+ operandTag instanceof UnaryOperandTag and
+ result = getParent().(InitializationContext).getTargetAddress()
+ }
+
+ override Field getInstructionField(InstructionTag tag) {
+ tag = getFieldAddressTag() and result = field
+ }
+
+ final InstructionTag getFieldAddressTag() { result = InitializerFieldAddressTag() }
+
+ final Field getField() { result = field }
+}
+
+/**
+ * Represents the IR translation of the initialization of a field from an
+ * explicit element in an initializer list.
+ */
+class TranslatedExplicitFieldInitialization extends TranslatedFieldInitialization,
+ InitializationContext, TTranslatedExplicitFieldInitialization {
+ Expr expr;
+
+ TranslatedExplicitFieldInitialization() {
+ this = TTranslatedExplicitFieldInitialization(ast, field, expr)
+ }
+
+ override Instruction getTargetAddress() { result = getInstruction(getFieldAddressTag()) }
+
+ override Type getTargetType() { result = field.getUnspecifiedType() }
+
+ override Instruction getInstructionSuccessor(InstructionTag tag, EdgeKind kind) {
+ tag = getFieldAddressTag() and
+ result = getInitialization().getFirstInstruction() and
+ kind instanceof GotoEdge
+ }
+
+ override Instruction getChildSuccessor(TranslatedElement child) {
+ child = getInitialization() and result = getParent().getChildSuccessor(this)
+ }
+
+ override TranslatedElement getChild(int id) { id = 0 and result = getInitialization() }
+
+ private TranslatedInitialization getInitialization() {
+ result = getTranslatedInitialization(expr)
+ }
+}
+
+private string getZeroValue(Type type) {
+ if type instanceof FloatingPointType then result = "0.0" else result = "0"
+}
+
+/**
+ * Represents the IR translation of the initialization of a field without a
+ * corresponding element in the initializer list.
+ */
+class TranslatedFieldValueInitialization extends TranslatedFieldInitialization,
+ TTranslatedFieldValueInitialization {
+ TranslatedFieldValueInitialization() { this = TTranslatedFieldValueInitialization(ast, field) }
+
+ override predicate hasInstruction(Opcode opcode, InstructionTag tag, CppType resultType) {
+ TranslatedFieldInitialization.super.hasInstruction(opcode, tag, resultType)
+ or
+ tag = getFieldDefaultValueTag() and
+ opcode instanceof Opcode::Constant and
+ resultType = getTypeForPRValue(field.getType())
+ or
+ tag = getFieldDefaultValueStoreTag() and
+ opcode instanceof Opcode::Store and
+ resultType = getTypeForPRValue(field.getUnspecifiedType())
+ }
+
+ override Instruction getInstructionSuccessor(InstructionTag tag, EdgeKind kind) {
+ kind instanceof GotoEdge and
+ (
+ tag = getFieldAddressTag() and
+ result = getInstruction(getFieldDefaultValueTag())
+ or
+ tag = getFieldDefaultValueTag() and
+ result = getInstruction(getFieldDefaultValueStoreTag())
+ or
+ tag = getFieldDefaultValueStoreTag() and
+ result = getParent().getChildSuccessor(this)
+ )
+ }
+
+ override string getInstructionConstantValue(InstructionTag tag) {
+ tag = getFieldDefaultValueTag() and
+ result = getZeroValue(field.getUnspecifiedType())
+ }
+
+ override Instruction getInstructionRegisterOperand(InstructionTag tag, OperandTag operandTag) {
+ result = TranslatedFieldInitialization.super.getInstructionRegisterOperand(tag, operandTag)
+ or
+ tag = getFieldDefaultValueStoreTag() and
+ (
+ operandTag instanceof AddressOperandTag and
+ result = getInstruction(getFieldAddressTag())
+ or
+ operandTag instanceof StoreValueOperandTag and
+ result = getInstruction(getFieldDefaultValueTag())
+ )
+ }
+
+ override Instruction getChildSuccessor(TranslatedElement child) { none() }
+
+ override TranslatedElement getChild(int id) { none() }
+
+ private InstructionTag getFieldDefaultValueTag() { result = InitializerFieldDefaultValueTag() }
+
+ private InstructionTag getFieldDefaultValueStoreTag() {
+ result = InitializerFieldDefaultValueStoreTag()
+ }
+}
+
+/**
+ * Represents the IR translation of the initialization of an array element from
+ * an element of an initializer list.
+ */
+abstract class TranslatedElementInitialization extends TranslatedElement {
+ ArrayOrVectorAggregateLiteral initList;
+
+ final override string toString() {
+ result = initList.toString() + "[" + getElementIndex().toString() + "]"
+ }
+
+ final override Locatable getAST() { result = initList }
+
+ final override Function getFunction() { result = initList.getEnclosingFunction() }
+
+ final override Instruction getFirstInstruction() { result = getInstruction(getElementIndexTag()) }
+
+ override predicate hasInstruction(Opcode opcode, InstructionTag tag, CppType resultType) {
+ tag = getElementIndexTag() and
+ opcode instanceof Opcode::Constant and
+ resultType = getIntType()
+ or
+ tag = getElementAddressTag() and
+ opcode instanceof Opcode::PointerAdd and
+ resultType = getTypeForGLValue(getElementType())
+ }
+
+ override Instruction getInstructionSuccessor(InstructionTag tag, EdgeKind kind) {
+ tag = getElementIndexTag() and
+ result = getInstruction(getElementAddressTag()) and
+ kind instanceof GotoEdge
+ }
+
+ override Instruction getInstructionRegisterOperand(InstructionTag tag, OperandTag operandTag) {
+ tag = getElementAddressTag() and
+ (
+ operandTag instanceof LeftOperandTag and
+ result = getParent().(InitializationContext).getTargetAddress()
+ or
+ operandTag instanceof RightOperandTag and
+ result = getInstruction(getElementIndexTag())
+ )
+ }
+
+ override int getInstructionElementSize(InstructionTag tag) {
+ tag = getElementAddressTag() and
+ result = max(getElementType().getSize())
+ }
+
+ override string getInstructionConstantValue(InstructionTag tag) {
+ tag = getElementIndexTag() and
+ result = getElementIndex().toString()
+ }
+
+ abstract int getElementIndex();
+
+ final InstructionTag getElementAddressTag() { result = InitializerElementAddressTag() }
+
+ final InstructionTag getElementIndexTag() { result = InitializerElementIndexTag() }
+
+ final ArrayOrVectorAggregateLiteral getInitList() { result = initList }
+
+ final Type getElementType() { result = initList.getElementType() }
+}
+
+/**
+ * Represents the IR translation of the initialization of an array element from
+ * an explicit element in an initializer list.
+ */
+class TranslatedExplicitElementInitialization extends TranslatedElementInitialization,
+ TTranslatedExplicitElementInitialization, InitializationContext {
+ int elementIndex;
+
+ TranslatedExplicitElementInitialization() {
+ this = TTranslatedExplicitElementInitialization(initList, elementIndex)
+ }
+
+ override Instruction getTargetAddress() { result = getInstruction(getElementAddressTag()) }
+
+ override Type getTargetType() { result = getElementType() }
+
+ override Instruction getInstructionSuccessor(InstructionTag tag, EdgeKind kind) {
+ result = TranslatedElementInitialization.super.getInstructionSuccessor(tag, kind)
+ or
+ tag = getElementAddressTag() and
+ result = getInitialization().getFirstInstruction() and
+ kind instanceof GotoEdge
+ }
+
+ override Instruction getChildSuccessor(TranslatedElement child) {
+ child = getInitialization() and result = getParent().getChildSuccessor(this)
+ }
+
+ override TranslatedElement getChild(int id) { id = 0 and result = getInitialization() }
+
+ override int getElementIndex() { result = elementIndex }
+
+ TranslatedInitialization getInitialization() {
+ result = getTranslatedInitialization(initList.getElementExpr(elementIndex).getFullyConverted())
+ }
+}
+
+/**
+ * Represents the IR translation of the initialization of a range of array
+ * elements without corresponding elements in the initializer list.
+ */
+class TranslatedElementValueInitialization extends TranslatedElementInitialization,
+ TTranslatedElementValueInitialization {
+ int elementIndex;
+ int elementCount;
+
+ TranslatedElementValueInitialization() {
+ this = TTranslatedElementValueInitialization(initList, elementIndex, elementCount)
+ }
+
+ override predicate hasInstruction(Opcode opcode, InstructionTag tag, CppType resultType) {
+ TranslatedElementInitialization.super.hasInstruction(opcode, tag, resultType)
+ or
+ tag = getElementDefaultValueTag() and
+ opcode instanceof Opcode::Constant and
+ resultType = getDefaultValueType()
+ or
+ tag = getElementDefaultValueStoreTag() and
+ opcode instanceof Opcode::Store and
+ resultType = getDefaultValueType()
+ }
+
+ override Instruction getInstructionSuccessor(InstructionTag tag, EdgeKind kind) {
+ result = TranslatedElementInitialization.super.getInstructionSuccessor(tag, kind)
+ or
+ kind instanceof GotoEdge and
+ (
+ tag = getElementAddressTag() and
+ result = getInstruction(getElementDefaultValueTag())
+ or
+ tag = getElementDefaultValueTag() and
+ result = getInstruction(getElementDefaultValueStoreTag())
+ or
+ tag = getElementDefaultValueStoreTag() and
+ result = getParent().getChildSuccessor(this)
+ )
+ }
+
+ override string getInstructionConstantValue(InstructionTag tag) {
+ result = TranslatedElementInitialization.super.getInstructionConstantValue(tag)
+ or
+ tag = getElementDefaultValueTag() and
+ result = getZeroValue(getElementType())
+ }
+
+ override Instruction getInstructionRegisterOperand(InstructionTag tag, OperandTag operandTag) {
+ result = TranslatedElementInitialization.super.getInstructionRegisterOperand(tag, operandTag)
+ or
+ tag = getElementDefaultValueStoreTag() and
+ (
+ operandTag instanceof AddressOperandTag and
+ result = getInstruction(getElementAddressTag())
+ or
+ operandTag instanceof StoreValueOperandTag and
+ result = getInstruction(getElementDefaultValueTag())
+ )
+ }
+
+ override Instruction getChildSuccessor(TranslatedElement child) { none() }
+
+ override TranslatedElement getChild(int id) { none() }
+
+ override int getElementIndex() { result = elementIndex }
+
+ override predicate needsUnknownOpaqueType(int byteSize) {
+ elementCount != 0 and byteSize = elementCount * getElementType().getSize()
+ }
+
+ private InstructionTag getElementDefaultValueTag() {
+ result = InitializerElementDefaultValueTag()
+ }
+
+ private InstructionTag getElementDefaultValueStoreTag() {
+ result = InitializerElementDefaultValueStoreTag()
+ }
+
+ private CppType getDefaultValueType() {
+ if elementCount = 1
+ then result = getTypeForPRValue(getElementType())
+ else result = getUnknownOpaqueType(elementCount * getElementType().getSize())
+ }
+}
+
+abstract class TranslatedStructorCallFromStructor extends TranslatedElement, StructorCallContext {
+ FunctionCall call;
+
+ final override Locatable getAST() { result = call }
+
+ final override TranslatedElement getChild(int id) {
+ id = 0 and
+ result = getStructorCall()
+ }
+
+ final override Function getFunction() { result = call.getEnclosingFunction() }
+
+ final override Instruction getChildSuccessor(TranslatedElement child) {
+ child = getStructorCall() and
+ result = getParent().getChildSuccessor(this)
+ }
+
+ final TranslatedExpr getStructorCall() { result = getTranslatedExpr(call) }
+}
+
+/**
+ * Represents the IR translation of a call to a base class constructor or
+ * destructor from within a derived class constructor or destructor.
+ */
+abstract class TranslatedBaseStructorCall extends TranslatedStructorCallFromStructor {
+ final override Instruction getFirstInstruction() { result = getInstruction(OnlyInstructionTag()) }
+
+ final override predicate hasInstruction(Opcode opcode, InstructionTag tag, CppType resultType) {
+ tag = OnlyInstructionTag() and
+ opcode instanceof Opcode::ConvertToNonVirtualBase and
+ resultType = getTypeForGLValue(call.getTarget().getDeclaringType())
+ }
+
+ final override Instruction getInstructionSuccessor(InstructionTag tag, EdgeKind kind) {
+ tag = OnlyInstructionTag() and
+ kind instanceof GotoEdge and
+ result = getStructorCall().getFirstInstruction()
+ }
+
+ final override Instruction getReceiver() { result = getInstruction(OnlyInstructionTag()) }
+
+ final override Instruction getInstructionRegisterOperand(InstructionTag tag, OperandTag operandTag) {
+ tag = OnlyInstructionTag() and
+ operandTag instanceof UnaryOperandTag and
+ result = getTranslatedFunction(getFunction()).getInitializeThisInstruction()
+ }
+
+ final override predicate getInstructionInheritance(
+ InstructionTag tag, Class baseClass, Class derivedClass
+ ) {
+ tag = OnlyInstructionTag() and
+ baseClass = call.getTarget().getDeclaringType().getUnspecifiedType() and
+ derivedClass = getFunction().getDeclaringType().getUnspecifiedType()
+ }
+}
+
+/**
+ * Represents a call to a delegating or base class constructor from within a constructor.
+ */
+abstract class TranslatedConstructorCallFromConstructor extends TranslatedStructorCallFromStructor,
+ TTranslatedConstructorBaseInit {
+ TranslatedConstructorCallFromConstructor() { this = TTranslatedConstructorBaseInit(call) }
+}
+
+TranslatedConstructorCallFromConstructor getTranslatedConstructorBaseInit(ConstructorBaseInit init) {
+ result.getAST() = init
+}
+
+/**
+ * Represents the IR translation of a delegating constructor call from within a constructor.
+ */
+class TranslatedConstructorDelegationInit extends TranslatedConstructorCallFromConstructor {
+ override ConstructorDelegationInit call;
+
+ final override string toString() { result = "delegation construct: " + call.toString() }
+
+ final override Instruction getFirstInstruction() {
+ result = getStructorCall().getFirstInstruction()
+ }
+
+ final override predicate hasInstruction(Opcode opcode, InstructionTag tag, CppType resultType) {
+ none()
+ }
+
+ final override Instruction getInstructionSuccessor(InstructionTag tag, EdgeKind kind) { none() }
+
+ final override Instruction getReceiver() {
+ result = getTranslatedFunction(getFunction()).getInitializeThisInstruction()
+ }
+}
+
+/**
+ * Represents the IR translation of a call to a base class constructor from within a
+ * derived class constructor
+ */
+class TranslatedConstructorBaseInit extends TranslatedConstructorCallFromConstructor,
+ TranslatedBaseStructorCall {
+ TranslatedConstructorBaseInit() { not call instanceof ConstructorDelegationInit }
+
+ final override string toString() { result = "construct base: " + call.toString() }
+}
+
+TranslatedDestructorBaseDestruction getTranslatedDestructorBaseDestruction(
+ DestructorBaseDestruction destruction
+) {
+ result.getAST() = destruction
+}
+
+/**
+ * Represents the IR translation of a call to a base class destructor from within a
+ * derived class destructor.
+ */
+class TranslatedDestructorBaseDestruction extends TranslatedBaseStructorCall,
+ TTranslatedDestructorBaseDestruction {
+ TranslatedDestructorBaseDestruction() { this = TTranslatedDestructorBaseDestruction(call) }
+
+ final override string toString() { result = "destroy base: " + call.toString() }
+}
diff --git a/repo-tests/codeql/cpp/ql/lib/semmle/code/cpp/ir/implementation/raw/internal/TranslatedStmt.qll b/repo-tests/codeql/cpp/ql/lib/semmle/code/cpp/ir/implementation/raw/internal/TranslatedStmt.qll
new file mode 100644
index 00000000000..ce08fc9367f
--- /dev/null
+++ b/repo-tests/codeql/cpp/ql/lib/semmle/code/cpp/ir/implementation/raw/internal/TranslatedStmt.qll
@@ -0,0 +1,792 @@
+private import cpp
+private import semmle.code.cpp.ir.internal.IRUtilities
+private import semmle.code.cpp.ir.implementation.internal.OperandTag
+private import semmle.code.cpp.ir.internal.CppType
+private import semmle.code.cpp.ir.internal.TempVariableTag
+private import InstructionTag
+private import TranslatedCondition
+private import TranslatedDeclarationEntry
+private import TranslatedElement
+private import TranslatedExpr
+private import TranslatedFunction
+private import TranslatedInitialization
+
+TranslatedStmt getTranslatedStmt(Stmt stmt) { result.getAST() = stmt }
+
+abstract class TranslatedStmt extends TranslatedElement, TTranslatedStmt {
+ Stmt stmt;
+
+ TranslatedStmt() { this = TTranslatedStmt(stmt) }
+
+ final override string toString() { result = stmt.toString() }
+
+ final override Locatable getAST() { result = stmt }
+
+ final override Function getFunction() { result = stmt.getEnclosingFunction() }
+}
+
+class TranslatedEmptyStmt extends TranslatedStmt {
+ TranslatedEmptyStmt() {
+ stmt instanceof EmptyStmt or
+ stmt instanceof LabelStmt or
+ stmt instanceof SwitchCase
+ }
+
+ override TranslatedElement getChild(int id) { none() }
+
+ override Instruction getFirstInstruction() { result = getInstruction(OnlyInstructionTag()) }
+
+ override predicate hasInstruction(Opcode opcode, InstructionTag tag, CppType resultType) {
+ tag = OnlyInstructionTag() and
+ opcode instanceof Opcode::NoOp and
+ resultType = getVoidType()
+ }
+
+ override Instruction getInstructionSuccessor(InstructionTag tag, EdgeKind kind) {
+ tag = OnlyInstructionTag() and
+ result = getParent().getChildSuccessor(this) and
+ kind instanceof GotoEdge
+ }
+
+ override Instruction getChildSuccessor(TranslatedElement child) { none() }
+}
+
+/**
+ * The IR translation of a declaration statement. This consists of the IR for each of the individual
+ * local variables declared by the statement. Declarations for extern variables and functions
+ * do not generate any instructions.
+ */
+class TranslatedDeclStmt extends TranslatedStmt {
+ override DeclStmt stmt;
+
+ override TranslatedElement getChild(int id) { result = getDeclarationEntry(id) }
+
+ override predicate hasInstruction(Opcode opcode, InstructionTag tag, CppType resultType) {
+ none()
+ }
+
+ override Instruction getFirstInstruction() {
+ result = getDeclarationEntry(0).getFirstInstruction()
+ or
+ not exists(getDeclarationEntry(0)) and result = getParent().getChildSuccessor(this)
+ }
+
+ private int getChildCount() { result = count(getDeclarationEntry(_)) }
+
+ /**
+ * Gets the `TranslatedDeclarationEntry` child at zero-based index `index`. Since not all
+ * `DeclarationEntry` objects have a `TranslatedDeclarationEntry` (e.g. extern functions), we map
+ * the original children into a contiguous range containing only those with an actual
+ * `TranslatedDeclarationEntry`.
+ */
+ private TranslatedDeclarationEntry getDeclarationEntry(int index) {
+ result =
+ rank[index + 1](TranslatedDeclarationEntry entry, int originalIndex |
+ entry = getTranslatedDeclarationEntry(stmt.getDeclarationEntry(originalIndex))
+ |
+ entry order by originalIndex
+ )
+ }
+
+ override Instruction getInstructionSuccessor(InstructionTag tag, EdgeKind kind) { none() }
+
+ override Instruction getChildSuccessor(TranslatedElement child) {
+ exists(int index |
+ child = getDeclarationEntry(index) and
+ if index = (getChildCount() - 1)
+ then result = getParent().getChildSuccessor(this)
+ else result = getDeclarationEntry(index + 1).getFirstInstruction()
+ )
+ }
+}
+
+class TranslatedExprStmt extends TranslatedStmt {
+ override ExprStmt stmt;
+
+ TranslatedExpr getExpr() {
+ result = getTranslatedExpr(stmt.(ExprStmt).getExpr().getFullyConverted())
+ }
+
+ override TranslatedElement getChild(int id) { id = 0 and result = getExpr() }
+
+ override predicate hasInstruction(Opcode opcode, InstructionTag tag, CppType resultType) {
+ none()
+ }
+
+ override Instruction getFirstInstruction() { result = getExpr().getFirstInstruction() }
+
+ override Instruction getInstructionSuccessor(InstructionTag tag, EdgeKind kind) { none() }
+
+ override Instruction getChildSuccessor(TranslatedElement child) {
+ child = getExpr() and
+ result = getParent().getChildSuccessor(this)
+ }
+}
+
+abstract class TranslatedReturnStmt extends TranslatedStmt {
+ override ReturnStmt stmt;
+
+ final TranslatedFunction getEnclosingFunction() {
+ result = getTranslatedFunction(stmt.getEnclosingFunction())
+ }
+}
+
+/**
+ * The IR translation of a `return` statement that returns a value.
+ */
+class TranslatedReturnValueStmt extends TranslatedReturnStmt, TranslatedVariableInitialization {
+ TranslatedReturnValueStmt() { stmt.hasExpr() and hasReturnValue(stmt.getEnclosingFunction()) }
+
+ final override Instruction getInitializationSuccessor() {
+ result = getEnclosingFunction().getReturnSuccessorInstruction()
+ }
+
+ final override Type getTargetType() { result = getEnclosingFunction().getReturnType() }
+
+ final override TranslatedInitialization getInitialization() {
+ result = getTranslatedInitialization(stmt.getExpr().getFullyConverted())
+ }
+
+ final override IRVariable getIRVariable() { result = getEnclosingFunction().getReturnVariable() }
+}
+
+/**
+ * The IR translation of a `return` statement that returns an expression of `void` type.
+ */
+class TranslatedReturnVoidExpressionStmt extends TranslatedReturnStmt {
+ TranslatedReturnVoidExpressionStmt() {
+ stmt.hasExpr() and not hasReturnValue(stmt.getEnclosingFunction())
+ }
+
+ override TranslatedElement getChild(int id) {
+ id = 0 and
+ result = getExpr()
+ }
+
+ override Instruction getFirstInstruction() { result = getExpr().getFirstInstruction() }
+
+ override predicate hasInstruction(Opcode opcode, InstructionTag tag, CppType resultType) {
+ tag = OnlyInstructionTag() and
+ opcode instanceof Opcode::NoOp and
+ resultType = getVoidType()
+ }
+
+ override Instruction getInstructionSuccessor(InstructionTag tag, EdgeKind kind) {
+ tag = OnlyInstructionTag() and
+ result = getEnclosingFunction().getReturnSuccessorInstruction() and
+ kind instanceof GotoEdge
+ }
+
+ override Instruction getChildSuccessor(TranslatedElement child) {
+ child = getExpr() and
+ result = getInstruction(OnlyInstructionTag())
+ }
+
+ private TranslatedExpr getExpr() { result = getTranslatedExpr(stmt.getExpr()) }
+}
+
+/**
+ * The IR translation of a `return` statement that does not return a value. This includes implicit
+ * return statements at the end of `void`-returning functions.
+ */
+class TranslatedReturnVoidStmt extends TranslatedReturnStmt {
+ TranslatedReturnVoidStmt() {
+ not stmt.hasExpr() and not hasReturnValue(stmt.getEnclosingFunction())
+ }
+
+ override TranslatedElement getChild(int id) { none() }
+
+ override Instruction getFirstInstruction() { result = getInstruction(OnlyInstructionTag()) }
+
+ override predicate hasInstruction(Opcode opcode, InstructionTag tag, CppType resultType) {
+ tag = OnlyInstructionTag() and
+ opcode instanceof Opcode::NoOp and
+ resultType = getVoidType()
+ }
+
+ override Instruction getInstructionSuccessor(InstructionTag tag, EdgeKind kind) {
+ tag = OnlyInstructionTag() and
+ result = getEnclosingFunction().getReturnSuccessorInstruction() and
+ kind instanceof GotoEdge
+ }
+
+ override Instruction getChildSuccessor(TranslatedElement child) { none() }
+}
+
+/**
+ * The IR translation of an implicit `return` statement generated by the extractor to handle control
+ * flow that reaches the end of a non-`void`-returning function body. Since such control flow
+ * produces undefined behavior, we simply generate an `Unreached` instruction to prevent that flow
+ * from continuing on to pollute other analysis. The assumption is that the developer is certain
+ * that the implicit `return` is unreachable, even if the compiler cannot prove it.
+ */
+class TranslatedUnreachableReturnStmt extends TranslatedReturnStmt {
+ TranslatedUnreachableReturnStmt() {
+ not stmt.hasExpr() and hasReturnValue(stmt.getEnclosingFunction())
+ }
+
+ override TranslatedElement getChild(int id) { none() }
+
+ override Instruction getFirstInstruction() { result = getInstruction(OnlyInstructionTag()) }
+
+ override predicate hasInstruction(Opcode opcode, InstructionTag tag, CppType resultType) {
+ tag = OnlyInstructionTag() and
+ opcode instanceof Opcode::Unreached and
+ resultType = getVoidType()
+ }
+
+ override Instruction getInstructionSuccessor(InstructionTag tag, EdgeKind kind) { none() }
+
+ override Instruction getChildSuccessor(TranslatedElement child) { none() }
+}
+
+/**
+ * The IR translation of a C++ `try` statement.
+ */
+class TranslatedTryStmt extends TranslatedStmt {
+ override TryStmt stmt;
+
+ override TranslatedElement getChild(int id) {
+ id = 0 and result = getBody()
+ or
+ result = getHandler(id - 1)
+ }
+
+ override predicate hasInstruction(Opcode opcode, InstructionTag tag, CppType resultType) {
+ none()
+ }
+
+ override Instruction getInstructionSuccessor(InstructionTag tag, EdgeKind kind) { none() }
+
+ override Instruction getFirstInstruction() { result = getBody().getFirstInstruction() }
+
+ override Instruction getChildSuccessor(TranslatedElement child) {
+ // All children go to the successor of the `try`.
+ child = getAChild() and result = getParent().getChildSuccessor(this)
+ }
+
+ final Instruction getNextHandler(TranslatedHandler handler) {
+ exists(int index |
+ handler = getHandler(index) and
+ result = getHandler(index + 1).getFirstInstruction()
+ )
+ or
+ // The last catch clause flows to the exception successor of the parent
+ // of the `try`, because the exception successor of the `try` itself is
+ // the first catch clause.
+ handler = getHandler(stmt.getNumberOfCatchClauses() - 1) and
+ result = getParent().getExceptionSuccessorInstruction()
+ }
+
+ final override Instruction getExceptionSuccessorInstruction() {
+ result = getHandler(0).getFirstInstruction()
+ }
+
+ private TranslatedHandler getHandler(int index) {
+ result = getTranslatedStmt(stmt.getChild(index + 1))
+ }
+
+ private TranslatedStmt getBody() { result = getTranslatedStmt(stmt.getStmt()) }
+}
+
+class TranslatedBlock extends TranslatedStmt {
+ override BlockStmt stmt;
+
+ override TranslatedElement getChild(int id) { result = getStmt(id) }
+
+ override predicate hasInstruction(Opcode opcode, InstructionTag tag, CppType resultType) {
+ isEmpty() and
+ opcode instanceof Opcode::NoOp and
+ tag = OnlyInstructionTag() and
+ resultType = getVoidType()
+ }
+
+ override Instruction getFirstInstruction() {
+ if isEmpty()
+ then result = getInstruction(OnlyInstructionTag())
+ else result = getStmt(0).getFirstInstruction()
+ }
+
+ private predicate isEmpty() { not exists(stmt.getStmt(0)) }
+
+ private TranslatedStmt getStmt(int index) { result = getTranslatedStmt(stmt.getStmt(index)) }
+
+ private int getStmtCount() { result = stmt.getNumStmt() }
+
+ override Instruction getInstructionSuccessor(InstructionTag tag, EdgeKind kind) {
+ tag = OnlyInstructionTag() and
+ result = getParent().getChildSuccessor(this) and
+ kind instanceof GotoEdge
+ }
+
+ override Instruction getChildSuccessor(TranslatedElement child) {
+ exists(int index |
+ child = getStmt(index) and
+ if index = (getStmtCount() - 1)
+ then result = getParent().getChildSuccessor(this)
+ else result = getStmt(index + 1).getFirstInstruction()
+ )
+ }
+}
+
+/**
+ * The IR translation of a C++ `catch` handler.
+ */
+abstract class TranslatedHandler extends TranslatedStmt {
+ override Handler stmt;
+
+ override TranslatedElement getChild(int id) { id = 1 and result = getBlock() }
+
+ override Instruction getFirstInstruction() { result = getInstruction(CatchTag()) }
+
+ override Instruction getChildSuccessor(TranslatedElement child) {
+ child = getBlock() and result = getParent().getChildSuccessor(this)
+ }
+
+ override Instruction getExceptionSuccessorInstruction() {
+ // A throw from within a `catch` block flows to the handler for the parent of
+ // the `try`.
+ result = getParent().getParent().getExceptionSuccessorInstruction()
+ }
+
+ TranslatedStmt getBlock() { result = getTranslatedStmt(stmt.getBlock()) }
+}
+
+/**
+ * The IR translation of a C++ `catch` block that catches an exception with a
+ * specific type (e.g. `catch (const std::exception&)`).
+ */
+class TranslatedCatchByTypeHandler extends TranslatedHandler {
+ TranslatedCatchByTypeHandler() { exists(stmt.getParameter()) }
+
+ override predicate hasInstruction(Opcode opcode, InstructionTag tag, CppType resultType) {
+ tag = CatchTag() and
+ opcode instanceof Opcode::CatchByType and
+ resultType = getVoidType()
+ }
+
+ override TranslatedElement getChild(int id) {
+ result = super.getChild(id)
+ or
+ id = 0 and result = getParameter()
+ }
+
+ override Instruction getChildSuccessor(TranslatedElement child) {
+ result = super.getChildSuccessor(child)
+ or
+ child = getParameter() and result = getBlock().getFirstInstruction()
+ }
+
+ override Instruction getInstructionSuccessor(InstructionTag tag, EdgeKind kind) {
+ tag = CatchTag() and
+ (
+ kind instanceof GotoEdge and
+ result = getParameter().getFirstInstruction()
+ or
+ kind instanceof ExceptionEdge and
+ result = getParent().(TranslatedTryStmt).getNextHandler(this)
+ )
+ }
+
+ override CppType getInstructionExceptionType(InstructionTag tag) {
+ tag = CatchTag() and
+ result = getTypeForPRValue(stmt.getParameter().getType())
+ }
+
+ private TranslatedParameter getParameter() {
+ result = getTranslatedParameter(stmt.getParameter())
+ }
+}
+
+/**
+ * The IR translation of a C++ `catch (...)` block.
+ */
+class TranslatedCatchAnyHandler extends TranslatedHandler {
+ TranslatedCatchAnyHandler() { not exists(stmt.getParameter()) }
+
+ override predicate hasInstruction(Opcode opcode, InstructionTag tag, CppType resultType) {
+ tag = CatchTag() and
+ opcode instanceof Opcode::CatchAny and
+ resultType = getVoidType()
+ }
+
+ override Instruction getInstructionSuccessor(InstructionTag tag, EdgeKind kind) {
+ tag = CatchTag() and
+ kind instanceof GotoEdge and
+ result = getBlock().getFirstInstruction()
+ }
+}
+
+class TranslatedIfStmt extends TranslatedStmt, ConditionContext {
+ override IfStmt stmt;
+
+ override Instruction getFirstInstruction() { result = getCondition().getFirstInstruction() }
+
+ override TranslatedElement getChild(int id) {
+ id = 0 and result = getCondition()
+ or
+ id = 1 and result = getThen()
+ or
+ id = 2 and result = getElse()
+ }
+
+ private TranslatedCondition getCondition() {
+ result = getTranslatedCondition(stmt.getCondition().getFullyConverted())
+ }
+
+ private TranslatedStmt getThen() { result = getTranslatedStmt(stmt.getThen()) }
+
+ private TranslatedStmt getElse() { result = getTranslatedStmt(stmt.getElse()) }
+
+ private predicate hasElse() { exists(stmt.getElse()) }
+
+ override Instruction getInstructionSuccessor(InstructionTag tag, EdgeKind kind) { none() }
+
+ override Instruction getChildTrueSuccessor(TranslatedCondition child) {
+ child = getCondition() and
+ result = getThen().getFirstInstruction()
+ }
+
+ override Instruction getChildFalseSuccessor(TranslatedCondition child) {
+ child = getCondition() and
+ if hasElse()
+ then result = getElse().getFirstInstruction()
+ else result = getParent().getChildSuccessor(this)
+ }
+
+ override Instruction getChildSuccessor(TranslatedElement child) {
+ (child = getThen() or child = getElse()) and
+ result = getParent().getChildSuccessor(this)
+ }
+
+ override predicate hasInstruction(Opcode opcode, InstructionTag tag, CppType resultType) {
+ none()
+ }
+}
+
+abstract class TranslatedLoop extends TranslatedStmt, ConditionContext {
+ override Loop stmt;
+
+ final TranslatedCondition getCondition() {
+ result = getTranslatedCondition(stmt.getCondition().getFullyConverted())
+ }
+
+ final TranslatedStmt getBody() { result = getTranslatedStmt(stmt.getStmt()) }
+
+ final Instruction getFirstConditionInstruction() {
+ if hasCondition()
+ then result = getCondition().getFirstInstruction()
+ else result = getBody().getFirstInstruction()
+ }
+
+ final predicate hasCondition() { exists(stmt.getCondition()) }
+
+ override TranslatedElement getChild(int id) {
+ id = 0 and result = getCondition()
+ or
+ id = 1 and result = getBody()
+ }
+
+ override predicate hasInstruction(Opcode opcode, InstructionTag tag, CppType resultType) {
+ none()
+ }
+
+ final override Instruction getInstructionSuccessor(InstructionTag tag, EdgeKind kind) { none() }
+
+ final override Instruction getChildTrueSuccessor(TranslatedCondition child) {
+ child = getCondition() and result = getBody().getFirstInstruction()
+ }
+
+ final override Instruction getChildFalseSuccessor(TranslatedCondition child) {
+ child = getCondition() and result = getParent().getChildSuccessor(this)
+ }
+}
+
+class TranslatedWhileStmt extends TranslatedLoop {
+ TranslatedWhileStmt() { stmt instanceof WhileStmt }
+
+ override Instruction getFirstInstruction() { result = getFirstConditionInstruction() }
+
+ override Instruction getChildSuccessor(TranslatedElement child) {
+ child = getBody() and result = getFirstConditionInstruction()
+ }
+}
+
+class TranslatedDoStmt extends TranslatedLoop {
+ TranslatedDoStmt() { stmt instanceof DoStmt }
+
+ override Instruction getFirstInstruction() { result = getBody().getFirstInstruction() }
+
+ override Instruction getChildSuccessor(TranslatedElement child) {
+ child = getBody() and result = getFirstConditionInstruction()
+ }
+}
+
+class TranslatedForStmt extends TranslatedLoop {
+ override ForStmt stmt;
+
+ override TranslatedElement getChild(int id) {
+ id = 0 and result = getInitialization()
+ or
+ id = 1 and result = getCondition()
+ or
+ id = 2 and result = getUpdate()
+ or
+ id = 3 and result = getBody()
+ }
+
+ private TranslatedStmt getInitialization() {
+ result = getTranslatedStmt(stmt.getInitialization())
+ }
+
+ private predicate hasInitialization() { exists(stmt.getInitialization()) }
+
+ TranslatedExpr getUpdate() { result = getTranslatedExpr(stmt.getUpdate().getFullyConverted()) }
+
+ private predicate hasUpdate() { exists(stmt.getUpdate()) }
+
+ override Instruction getFirstInstruction() {
+ if hasInitialization()
+ then result = getInitialization().getFirstInstruction()
+ else result = getFirstConditionInstruction()
+ }
+
+ override Instruction getChildSuccessor(TranslatedElement child) {
+ child = getInitialization() and
+ result = getFirstConditionInstruction()
+ or
+ (
+ child = getBody() and
+ if hasUpdate()
+ then result = getUpdate().getFirstInstruction()
+ else result = getFirstConditionInstruction()
+ )
+ or
+ child = getUpdate() and result = getFirstConditionInstruction()
+ }
+}
+
+/**
+ * The IR translation of a range-based `for` loop.
+ * Note that this class does not extend `TranslatedLoop`. This is because the "body" of the
+ * range-based `for` loop consists of the per-iteration variable declaration followed by the
+ * user-written body statement. It is easier to handle the control flow of the loop separately,
+ * rather than synthesizing a single body or complicating the interface of `TranslatedLoop`.
+ */
+class TranslatedRangeBasedForStmt extends TranslatedStmt, ConditionContext {
+ override RangeBasedForStmt stmt;
+
+ override TranslatedElement getChild(int id) {
+ id = 0 and result = getRangeVariableDeclaration()
+ or
+ id = 1 and result = getBeginVariableDeclaration()
+ or
+ id = 2 and result = getEndVariableDeclaration()
+ or
+ id = 3 and result = getCondition()
+ or
+ id = 4 and result = getUpdate()
+ or
+ id = 5 and result = getVariableDeclaration()
+ or
+ id = 6 and result = getBody()
+ }
+
+ override Instruction getFirstInstruction() {
+ result = getRangeVariableDeclaration().getFirstInstruction()
+ }
+
+ override Instruction getChildSuccessor(TranslatedElement child) {
+ child = getRangeVariableDeclaration() and
+ result = getBeginVariableDeclaration().getFirstInstruction()
+ or
+ child = getBeginVariableDeclaration() and
+ result = getEndVariableDeclaration().getFirstInstruction()
+ or
+ child = getEndVariableDeclaration() and
+ result = getCondition().getFirstInstruction()
+ or
+ child = getVariableDeclaration() and
+ result = getBody().getFirstInstruction()
+ or
+ child = getBody() and
+ result = getUpdate().getFirstInstruction()
+ or
+ child = getUpdate() and
+ result = getCondition().getFirstInstruction()
+ }
+
+ override predicate hasInstruction(Opcode opcode, InstructionTag tag, CppType resultType) {
+ none()
+ }
+
+ override Instruction getInstructionSuccessor(InstructionTag tag, EdgeKind kind) { none() }
+
+ override Instruction getChildTrueSuccessor(TranslatedCondition child) {
+ child = getCondition() and result = getVariableDeclaration().getFirstInstruction()
+ }
+
+ override Instruction getChildFalseSuccessor(TranslatedCondition child) {
+ child = getCondition() and result = getParent().getChildSuccessor(this)
+ }
+
+ private TranslatedRangeBasedForVariableDeclaration getRangeVariableDeclaration() {
+ result = getTranslatedRangeBasedForVariableDeclaration(stmt.getRangeVariable())
+ }
+
+ private TranslatedRangeBasedForVariableDeclaration getBeginVariableDeclaration() {
+ result = getTranslatedRangeBasedForVariableDeclaration(stmt.getBeginVariable())
+ }
+
+ private TranslatedRangeBasedForVariableDeclaration getEndVariableDeclaration() {
+ result = getTranslatedRangeBasedForVariableDeclaration(stmt.getEndVariable())
+ }
+
+ // Public for getInstructionBackEdgeSuccessor
+ final TranslatedCondition getCondition() {
+ result = getTranslatedCondition(stmt.getCondition().getFullyConverted())
+ }
+
+ // Public for getInstructionBackEdgeSuccessor
+ final TranslatedExpr getUpdate() {
+ result = getTranslatedExpr(stmt.getUpdate().getFullyConverted())
+ }
+
+ private TranslatedRangeBasedForVariableDeclaration getVariableDeclaration() {
+ result = getTranslatedRangeBasedForVariableDeclaration(stmt.getVariable())
+ }
+
+ private TranslatedStmt getBody() { result = getTranslatedStmt(stmt.getStmt()) }
+}
+
+class TranslatedJumpStmt extends TranslatedStmt {
+ override JumpStmt stmt;
+
+ override Instruction getFirstInstruction() { result = getInstruction(OnlyInstructionTag()) }
+
+ override TranslatedElement getChild(int id) { none() }
+
+ override predicate hasInstruction(Opcode opcode, InstructionTag tag, CppType resultType) {
+ tag = OnlyInstructionTag() and
+ opcode instanceof Opcode::NoOp and
+ resultType = getVoidType()
+ }
+
+ override Instruction getInstructionSuccessor(InstructionTag tag, EdgeKind kind) {
+ tag = OnlyInstructionTag() and
+ kind instanceof GotoEdge and
+ result = getTranslatedStmt(stmt.getTarget()).getFirstInstruction()
+ }
+
+ override Instruction getChildSuccessor(TranslatedElement child) { none() }
+}
+
+private EdgeKind getCaseEdge(SwitchCase switchCase) {
+ exists(CaseEdge edge |
+ result = edge and
+ hasCaseEdge(switchCase, edge.getMinValue(), edge.getMaxValue())
+ )
+ or
+ switchCase instanceof DefaultCase and result instanceof DefaultEdge
+}
+
+class TranslatedSwitchStmt extends TranslatedStmt {
+ override SwitchStmt stmt;
+
+ private TranslatedExpr getExpr() {
+ result = getTranslatedExpr(stmt.getExpr().getFullyConverted())
+ }
+
+ private TranslatedStmt getBody() { result = getTranslatedStmt(stmt.getStmt()) }
+
+ override Instruction getFirstInstruction() { result = getExpr().getFirstInstruction() }
+
+ override TranslatedElement getChild(int id) {
+ id = 0 and result = getExpr()
+ or
+ id = 1 and result = getBody()
+ }
+
+ override predicate hasInstruction(Opcode opcode, InstructionTag tag, CppType resultType) {
+ tag = SwitchBranchTag() and
+ opcode instanceof Opcode::Switch and
+ resultType = getVoidType()
+ }
+
+ override Instruction getInstructionRegisterOperand(InstructionTag tag, OperandTag operandTag) {
+ tag = SwitchBranchTag() and
+ operandTag instanceof ConditionOperandTag and
+ result = getExpr().getResult()
+ }
+
+ override Instruction getInstructionSuccessor(InstructionTag tag, EdgeKind kind) {
+ tag = SwitchBranchTag() and
+ exists(SwitchCase switchCase |
+ switchCase = stmt.getASwitchCase() and
+ kind = getCaseEdge(switchCase) and
+ result = getTranslatedStmt(switchCase).getFirstInstruction()
+ )
+ or
+ not stmt.hasDefaultCase() and
+ tag = SwitchBranchTag() and
+ kind instanceof DefaultEdge and
+ result = getParent().getChildSuccessor(this)
+ }
+
+ override Instruction getChildSuccessor(TranslatedElement child) {
+ child = getExpr() and result = getInstruction(SwitchBranchTag())
+ or
+ child = getBody() and result = getParent().getChildSuccessor(this)
+ }
+}
+
+class TranslatedAsmStmt extends TranslatedStmt {
+ override AsmStmt stmt;
+
+ override TranslatedExpr getChild(int id) {
+ result = getTranslatedExpr(stmt.getChild(id).(Expr).getFullyConverted())
+ }
+
+ override Instruction getFirstInstruction() {
+ if exists(getChild(0))
+ then result = getChild(0).getFirstInstruction()
+ else result = getInstruction(AsmTag())
+ }
+
+ override predicate hasInstruction(Opcode opcode, InstructionTag tag, CppType resultType) {
+ tag = AsmTag() and
+ opcode instanceof Opcode::InlineAsm and
+ resultType = getUnknownType()
+ }
+
+ override Instruction getInstructionRegisterOperand(InstructionTag tag, OperandTag operandTag) {
+ exists(int index |
+ tag = AsmTag() and
+ operandTag = asmOperand(index) and
+ result = getChild(index).getResult()
+ )
+ }
+
+ final override CppType getInstructionMemoryOperandType(
+ InstructionTag tag, TypedOperandTag operandTag
+ ) {
+ tag = AsmTag() and
+ operandTag instanceof SideEffectOperandTag and
+ result = getUnknownType()
+ }
+
+ override Instruction getInstructionSuccessor(InstructionTag tag, EdgeKind kind) {
+ tag = AsmTag() and
+ result = getParent().getChildSuccessor(this) and
+ kind instanceof GotoEdge
+ }
+
+ override Instruction getChildSuccessor(TranslatedElement child) {
+ exists(int index |
+ child = getChild(index) and
+ if exists(getChild(index + 1))
+ then result = getChild(index + 1).getFirstInstruction()
+ else result = getInstruction(AsmTag())
+ )
+ }
+}
diff --git a/repo-tests/codeql/cpp/ql/lib/semmle/code/cpp/ir/implementation/raw/internal/VarArgs.qll b/repo-tests/codeql/cpp/ql/lib/semmle/code/cpp/ir/implementation/raw/internal/VarArgs.qll
new file mode 100644
index 00000000000..83b46497817
--- /dev/null
+++ b/repo-tests/codeql/cpp/ql/lib/semmle/code/cpp/ir/implementation/raw/internal/VarArgs.qll
@@ -0,0 +1,62 @@
+/**
+ * Utilities for determining which parameters and arguments correspond to the `...` parameter for
+ * varargs functions.
+ */
+
+private import cpp
+
+/**
+ * Gets the index of the `...` parameter, if any. If present, the value will always be equal to
+ * `func.getNumberOfParameters()`.
+ */
+int getEllipsisParameterIndexForFunction(Function func) {
+ func.isVarargs() and result = func.getNumberOfParameters()
+}
+
+/**
+ * Gets the index of the `...` parameter, if any.
+ */
+int getEllipsisParameterIndexForRoutineType(RoutineType type) {
+ // Since the extractor doesn't record this information directly, we look for routine types whose
+ // last parameter type is `UnknownType`.
+ type.getParameterType(result) instanceof UnknownType and
+ result = strictcount(type.getAParameterType()) - 1
+}
+
+/**
+ * Gets the index of the `...` parameter, if any. This will be one greater than the index of the
+ * last declared positional parameter.
+ */
+int getEllipsisParameterIndex(Call call) {
+ exists(FunctionCall funcCall |
+ funcCall = call and
+ if funcCall.getTargetType() instanceof RoutineType
+ then result = getEllipsisParameterIndexForRoutineType(funcCall.getTargetType())
+ else result = getEllipsisParameterIndexForFunction(funcCall.getTarget())
+ )
+ or
+ exists(ExprCall exprCall |
+ exprCall = call and
+ result = getEllipsisParameterIndexForRoutineType(exprCall.getExpr().getType().stripType())
+ )
+}
+
+/**
+ * Gets the index of the parameter that will be initialized with the value of the argument
+ * specified by `argIndex`. For ordinary positional parameters, the argument and parameter indices
+ * will be equal. For a call to a varargs function, all arguments passed to the `...` will be
+ * mapped to the index returned by `getEllipsisParameterIndex()`.
+ */
+int getParameterIndexForArgument(Call call, int argIndex) {
+ exists(call.getArgument(argIndex)) and
+ if argIndex >= getEllipsisParameterIndex(call)
+ then result = getEllipsisParameterIndex(call)
+ else result = argIndex
+}
+
+/**
+ * Holds if the argument specified by `index` is an argument to the `...` of a varargs function.
+ */
+predicate isEllipsisArgumentIndex(Call call, int index) {
+ exists(call.getArgument(index)) and index >= getEllipsisParameterIndex(call)
+}
diff --git a/repo-tests/codeql/cpp/ql/lib/semmle/code/cpp/ir/implementation/raw/internal/reachability/Dominance.qll b/repo-tests/codeql/cpp/ql/lib/semmle/code/cpp/ir/implementation/raw/internal/reachability/Dominance.qll
new file mode 100644
index 00000000000..cddc3e23d7e
--- /dev/null
+++ b/repo-tests/codeql/cpp/ql/lib/semmle/code/cpp/ir/implementation/raw/internal/reachability/Dominance.qll
@@ -0,0 +1,22 @@
+private import DominanceInternal
+
+predicate blockImmediatelyDominates(Graph::Block dominator, Graph::Block block) =
+ idominance(Graph::isEntryBlock/1, Graph::blockSuccessor/2)(_, dominator, block)
+
+predicate blockStrictlyDominates(Graph::Block dominator, Graph::Block block) {
+ blockImmediatelyDominates+(dominator, block)
+}
+
+predicate blockDominates(Graph::Block dominator, Graph::Block block) {
+ blockStrictlyDominates(dominator, block) or dominator = block
+}
+
+Graph::Block getDominanceFrontier(Graph::Block dominator) {
+ Graph::blockSuccessor(dominator, result) and
+ not blockImmediatelyDominates(dominator, result)
+ or
+ exists(Graph::Block prev | result = getDominanceFrontier(prev) |
+ blockImmediatelyDominates(dominator, prev) and
+ not blockImmediatelyDominates(dominator, result)
+ )
+}
diff --git a/repo-tests/codeql/cpp/ql/lib/semmle/code/cpp/ir/implementation/raw/internal/reachability/DominanceInternal.qll b/repo-tests/codeql/cpp/ql/lib/semmle/code/cpp/ir/implementation/raw/internal/reachability/DominanceInternal.qll
new file mode 100644
index 00000000000..cee8fa1543b
--- /dev/null
+++ b/repo-tests/codeql/cpp/ql/lib/semmle/code/cpp/ir/implementation/raw/internal/reachability/DominanceInternal.qll
@@ -0,0 +1,9 @@
+private import ReachableBlock as Reachability
+
+private module ReachabilityGraph = Reachability::Graph;
+
+module Graph {
+ import Reachability::Graph
+
+ class Block = Reachability::ReachableBlock;
+}
diff --git a/repo-tests/codeql/cpp/ql/lib/semmle/code/cpp/ir/implementation/raw/internal/reachability/PrintDominance.qll b/repo-tests/codeql/cpp/ql/lib/semmle/code/cpp/ir/implementation/raw/internal/reachability/PrintDominance.qll
new file mode 100644
index 00000000000..f26565bc278
--- /dev/null
+++ b/repo-tests/codeql/cpp/ql/lib/semmle/code/cpp/ir/implementation/raw/internal/reachability/PrintDominance.qll
@@ -0,0 +1,22 @@
+private import DominanceInternal
+private import ReachableBlockInternal
+private import Dominance
+import IR
+
+private class DominancePropertyProvider extends IRPropertyProvider {
+ override string getBlockProperty(IRBlock block, string key) {
+ exists(IRBlock dominator |
+ blockImmediatelyDominates(dominator, block) and
+ key = "ImmediateDominator" and
+ result = "Block " + dominator.getDisplayIndex().toString()
+ )
+ or
+ key = "DominanceFrontier" and
+ result =
+ strictconcat(IRBlock frontierBlock |
+ frontierBlock = getDominanceFrontier(block)
+ |
+ frontierBlock.getDisplayIndex().toString(), ", " order by frontierBlock.getDisplayIndex()
+ )
+ }
+}
diff --git a/repo-tests/codeql/cpp/ql/lib/semmle/code/cpp/ir/implementation/raw/internal/reachability/PrintReachableBlock.qll b/repo-tests/codeql/cpp/ql/lib/semmle/code/cpp/ir/implementation/raw/internal/reachability/PrintReachableBlock.qll
new file mode 100644
index 00000000000..6befad72336
--- /dev/null
+++ b/repo-tests/codeql/cpp/ql/lib/semmle/code/cpp/ir/implementation/raw/internal/reachability/PrintReachableBlock.qll
@@ -0,0 +1,17 @@
+private import ReachableBlockInternal
+private import ReachableBlock
+import IR
+
+private class ReachableBlockPropertyProvider extends IRPropertyProvider {
+ override string getBlockProperty(IRBlock block, string key) {
+ not block instanceof ReachableBlock and
+ key = "Unreachable" and
+ result = "true"
+ or
+ exists(EdgeKind kind |
+ isInfeasibleEdge(block, kind) and
+ key = "Infeasible(" + kind.toString() + ")" and
+ result = "true"
+ )
+ }
+}
diff --git a/repo-tests/codeql/cpp/ql/lib/semmle/code/cpp/ir/implementation/raw/internal/reachability/ReachableBlock.qll b/repo-tests/codeql/cpp/ql/lib/semmle/code/cpp/ir/implementation/raw/internal/reachability/ReachableBlock.qll
new file mode 100644
index 00000000000..25a53bbefe8
--- /dev/null
+++ b/repo-tests/codeql/cpp/ql/lib/semmle/code/cpp/ir/implementation/raw/internal/reachability/ReachableBlock.qll
@@ -0,0 +1,53 @@
+private import ReachableBlockInternal
+private import IR
+private import ConstantAnalysis
+
+predicate isInfeasibleInstructionSuccessor(Instruction instr, EdgeKind kind) {
+ exists(int conditionValue |
+ conditionValue = getConstantValue(instr.(ConditionalBranchInstruction).getCondition()) and
+ if conditionValue = 0 then kind instanceof TrueEdge else kind instanceof FalseEdge
+ )
+}
+
+pragma[noinline]
+predicate isInfeasibleEdge(IRBlockBase block, EdgeKind kind) {
+ isInfeasibleInstructionSuccessor(block.getLastInstruction(), kind)
+}
+
+private IRBlock getAFeasiblePredecessorBlock(IRBlock successor) {
+ exists(EdgeKind kind |
+ result.getSuccessor(kind) = successor and
+ not isInfeasibleEdge(result, kind)
+ )
+}
+
+private predicate isBlockReachable(IRBlock block) {
+ exists(IRFunction f | getAFeasiblePredecessorBlock*(block) = f.getEntryBlock())
+}
+
+/**
+ * An IR block that is reachable from the entry block of the function, considering only feasible
+ * edges.
+ */
+class ReachableBlock extends IRBlockBase {
+ ReachableBlock() { isBlockReachable(this) }
+
+ final ReachableBlock getAFeasiblePredecessor() { result = getAFeasiblePredecessorBlock(this) }
+
+ final ReachableBlock getAFeasibleSuccessor() { this = getAFeasiblePredecessorBlock(result) }
+}
+
+/**
+ * An instruction that is contained in a reachable block.
+ */
+class ReachableInstruction extends Instruction {
+ ReachableInstruction() { this.getBlock() instanceof ReachableBlock }
+}
+
+module Graph {
+ predicate isEntryBlock(ReachableBlock block) { exists(IRFunction f | block = f.getEntryBlock()) }
+
+ predicate blockSuccessor(ReachableBlock pred, ReachableBlock succ) {
+ succ = pred.getAFeasibleSuccessor()
+ }
+}
diff --git a/repo-tests/codeql/cpp/ql/lib/semmle/code/cpp/ir/implementation/raw/internal/reachability/ReachableBlockInternal.qll b/repo-tests/codeql/cpp/ql/lib/semmle/code/cpp/ir/implementation/raw/internal/reachability/ReachableBlockInternal.qll
new file mode 100644
index 00000000000..3041b662962
--- /dev/null
+++ b/repo-tests/codeql/cpp/ql/lib/semmle/code/cpp/ir/implementation/raw/internal/reachability/ReachableBlockInternal.qll
@@ -0,0 +1,2 @@
+import semmle.code.cpp.ir.implementation.raw.IR as IR
+import semmle.code.cpp.ir.implementation.raw.constant.ConstantAnalysis as ConstantAnalysis
diff --git a/repo-tests/codeql/cpp/ql/lib/semmle/code/cpp/ir/implementation/unaliased_ssa/IR.qll b/repo-tests/codeql/cpp/ql/lib/semmle/code/cpp/ir/implementation/unaliased_ssa/IR.qll
new file mode 100644
index 00000000000..c96783fe6e8
--- /dev/null
+++ b/repo-tests/codeql/cpp/ql/lib/semmle/code/cpp/ir/implementation/unaliased_ssa/IR.qll
@@ -0,0 +1,80 @@
+/**
+ * Provides classes that describe the Intermediate Representation (IR) of the program.
+ *
+ * The IR is a representation of the semantics of the program, with very little dependence on the
+ * syntax that was used to write the program. For example, in C++, the statements `i += 1;`, `i++`,
+ * and `++i` all have the same semantic effect, but appear in the AST as three different types of
+ * `Expr` node. In the IR, all three statements are broken down into a sequence of fundamental
+ * operations similar to:
+ *
+ * ```
+ * r1(int*) = VariableAddress[i] // Compute the address of variable `i`
+ * r2(int) = Load &:r1, m0 // Load the value of `i`
+ * r3(int) = Constant[1] // An integer constant with the value `1`
+ * r4(int) = Add r2, r3 // Add `1` to the value of `i`
+ * r5(int) = Store &r1, r4 // Store the new value back into the variable `i`
+ * ```
+ *
+ * This allows IR-based analysis to focus on the fundamental operations, rather than having to be
+ * concerned with the various ways of expressing those operations in source code.
+ *
+ * The key classes in the IR are:
+ *
+ * - `IRFunction` - Contains the IR for an entire function definition, including all of that
+ * function's `Instruction`s, `IRBlock`s, and `IRVariables`.
+ * - `Instruction` - A single operation in the IR. An instruction specifies the operation to be
+ * performed, the operands that produce the inputs to that operation, and the type of the result
+ * of the operation. Control flows from an `Instruction` to one of a set of successor
+ * `Instruction`s.
+ * - `Operand` - An input value of an `Instruction`. All inputs of an `Instruction` are explicitly
+ * represented as `Operand`s, even if the input was implicit in the source code. An `Operand` has
+ * a link to the `Instruction` that consumes its value (its "use") and a link to the `Instruction`
+ * that produces its value (its "definition").
+ * - `IRVariable` - A variable accessed by the IR for a particular function. An `IRVariable` is
+ * created for each variable directly accessed by the function. In addition, `IRVariable`s are
+ * created to represent certain temporary storage locations that do not have explicitly declared
+ * variables in the source code, such as the return value of the function.
+ * - `IRBlock` - A "basic block" in the control flow graph of a function. An `IRBlock` contains a
+ * sequence of instructions such that control flow can only enter the block at the first
+ * instruction, and can only leave the block from the last instruction.
+ * - `IRType` - The type of a value accessed in the IR. Unlike the `Type` class in the AST, `IRType`
+ * is language-neutral. For example, in C++, `unsigned int`, `char32_t`, and `wchar_t` might all
+ * be represented as the `IRType` `uint4`, a four-byte unsigned integer.
+ */
+
+import IRFunction
+import Instruction
+import IRBlock
+import IRVariable
+import Operand
+private import internal.IRImports as Imports
+import Imports::EdgeKind
+import Imports::IRType
+import Imports::MemoryAccessKind
+
+private newtype TIRPropertyProvider = MkIRPropertyProvider()
+
+/**
+ * A class that provides additional properties to be dumped for IR instructions and blocks when using
+ * the PrintIR module. Libraries that compute additional facts about IR elements can extend the
+ * single instance of this class to specify the additional properties computed by the library.
+ */
+class IRPropertyProvider extends TIRPropertyProvider {
+ /** Gets a textual representation of this element. */
+ string toString() { result = "IRPropertyProvider" }
+
+ /**
+ * Gets the value of the property named `key` for the specified instruction.
+ */
+ string getInstructionProperty(Instruction instruction, string key) { none() }
+
+ /**
+ * Gets the value of the property named `key` for the specified block.
+ */
+ string getBlockProperty(IRBlock block, string key) { none() }
+
+ /**
+ * Gets the value of the property named `key` for the specified operand.
+ */
+ string getOperandProperty(Operand operand, string key) { none() }
+}
diff --git a/repo-tests/codeql/cpp/ql/lib/semmle/code/cpp/ir/implementation/unaliased_ssa/IRBlock.qll b/repo-tests/codeql/cpp/ql/lib/semmle/code/cpp/ir/implementation/unaliased_ssa/IRBlock.qll
new file mode 100644
index 00000000000..4b86f9a7cec
--- /dev/null
+++ b/repo-tests/codeql/cpp/ql/lib/semmle/code/cpp/ir/implementation/unaliased_ssa/IRBlock.qll
@@ -0,0 +1,331 @@
+/**
+ * Provides classes describing basic blocks in the IR of a function.
+ */
+
+private import internal.IRInternal
+import Instruction
+private import internal.IRBlockImports as Imports
+import Imports::EdgeKind
+private import Cached
+
+/**
+ * A basic block in the IR. A basic block consists of a sequence of `Instructions` with the only
+ * incoming edges at the beginning of the sequence and the only outgoing edges at the end of the
+ * sequence.
+ *
+ * This class does not contain any members that query the predecessor or successor edges of the
+ * block. This allows different classes that extend `IRBlockBase` to expose different subsets of
+ * edges (e.g. ignoring unreachable edges).
+ *
+ * Most consumers should use the class `IRBlock`.
+ */
+class IRBlockBase extends TIRBlock {
+ /** Gets a textual representation of this block. */
+ final string toString() { result = getFirstInstruction(this).toString() }
+
+ /** Gets the source location of the first non-`Phi` instruction in this block. */
+ final Language::Location getLocation() { result = getFirstInstruction().getLocation() }
+
+ /**
+ * INTERNAL: Do not use.
+ *
+ * Gets the zero-based index of the block within its function.
+ *
+ * This predicate is used by debugging and printing code only.
+ */
+ int getDisplayIndex() {
+ exists(IRConfiguration::IRConfiguration config |
+ config.shouldEvaluateDebugStringsForFunction(this.getEnclosingFunction())
+ ) and
+ this =
+ rank[result + 1](IRBlock funcBlock, int sortOverride, int sortKey1, int sortKey2 |
+ funcBlock.getEnclosingFunction() = getEnclosingFunction() and
+ funcBlock.getFirstInstruction().hasSortKeys(sortKey1, sortKey2) and
+ // Ensure that the block containing `EnterFunction` always comes first.
+ if funcBlock.getFirstInstruction() instanceof EnterFunctionInstruction
+ then sortOverride = 0
+ else sortOverride = 1
+ |
+ funcBlock order by sortOverride, sortKey1, sortKey2
+ )
+ }
+
+ /**
+ * Gets the `index`th non-`Phi` instruction in this block.
+ */
+ final Instruction getInstruction(int index) { result = getInstruction(this, index) }
+
+ /**
+ * Get the `Phi` instructions that appear at the start of this block.
+ */
+ final PhiInstruction getAPhiInstruction() {
+ Construction::getPhiInstructionBlockStart(result) = getFirstInstruction()
+ }
+
+ /**
+ * Gets an instruction in this block. This includes `Phi` instructions.
+ */
+ final Instruction getAnInstruction() {
+ result = getInstruction(_) or
+ result = getAPhiInstruction()
+ }
+
+ /**
+ * Gets the first non-`Phi` instruction in this block.
+ */
+ final Instruction getFirstInstruction() { result = getFirstInstruction(this) }
+
+ /**
+ * Gets the last instruction in this block.
+ */
+ final Instruction getLastInstruction() { result = getInstruction(getInstructionCount() - 1) }
+
+ /**
+ * Gets the number of non-`Phi` instructions in this block.
+ */
+ final int getInstructionCount() { result = getInstructionCount(this) }
+
+ /**
+ * Gets the `IRFunction` that contains this block.
+ */
+ final IRFunction getEnclosingIRFunction() {
+ result = getFirstInstruction(this).getEnclosingIRFunction()
+ }
+
+ /**
+ * Gets the `Function` that contains this block.
+ */
+ final Language::Function getEnclosingFunction() {
+ result = getFirstInstruction(this).getEnclosingFunction()
+ }
+}
+
+/**
+ * A basic block with additional information about its predecessor and successor edges. Each edge
+ * corresponds to the control flow between the last instruction of one block and the first
+ * instruction of another block.
+ */
+class IRBlock extends IRBlockBase {
+ /**
+ * Gets a block to which control flows directly from this block.
+ */
+ final IRBlock getASuccessor() { blockSuccessor(this, result) }
+
+ /**
+ * Gets a block from which control flows directly to this block.
+ */
+ final IRBlock getAPredecessor() { blockSuccessor(result, this) }
+
+ /**
+ * Gets the block to which control flows directly from this block along an edge of kind `kind`.
+ */
+ final IRBlock getSuccessor(EdgeKind kind) { blockSuccessor(this, result, kind) }
+
+ /**
+ * Gets the block to which control flows directly from this block along a back edge of kind
+ * `kind`.
+ */
+ final IRBlock getBackEdgeSuccessor(EdgeKind kind) { backEdgeSuccessor(this, result, kind) }
+
+ /**
+ * Holds if this block immediately dominates `block`.
+ *
+ * Block `A` immediate dominates block `B` if block `A` strictly dominates block `B` and block `B`
+ * is a direct successor of block `A`.
+ */
+ final predicate immediatelyDominates(IRBlock block) { blockImmediatelyDominates(this, block) }
+
+ /**
+ * Holds if this block strictly dominates `block`.
+ *
+ * Block `A` strictly dominates block `B` if block `A` dominates block `B` and blocks `A` and `B`
+ * are not the same block.
+ */
+ final predicate strictlyDominates(IRBlock block) { blockImmediatelyDominates+(this, block) }
+
+ /**
+ * Holds if this block dominates `block`.
+ *
+ * Block `A` dominates block `B` if any control flow path from the entry block of the function to
+ * block `B` must pass through block `A`. A block always dominates itself.
+ */
+ final predicate dominates(IRBlock block) { strictlyDominates(block) or this = block }
+
+ /**
+ * Gets a block on the dominance frontier of this block.
+ *
+ * The dominance frontier of block `A` is the set of blocks `B` such that block `A` does not
+ * dominate block `B`, but block `A` does dominate an immediate predecessor of block `B`.
+ */
+ pragma[noinline]
+ final IRBlock dominanceFrontier() {
+ dominates(result.getAPredecessor()) and
+ not strictlyDominates(result)
+ }
+
+ /**
+ * Holds if this block immediately post-dominates `block`.
+ *
+ * Block `A` immediate post-dominates block `B` if block `A` strictly post-dominates block `B` and
+ * block `B` is a direct successor of block `A`.
+ */
+ final predicate immediatelyPostDominates(IRBlock block) {
+ blockImmediatelyPostDominates(this, block)
+ }
+
+ /**
+ * Holds if this block strictly post-dominates `block`.
+ *
+ * Block `A` strictly post-dominates block `B` if block `A` post-dominates block `B` and blocks `A`
+ * and `B` are not the same block.
+ */
+ final predicate strictlyPostDominates(IRBlock block) {
+ blockImmediatelyPostDominates+(this, block)
+ }
+
+ /**
+ * Holds if this block is a post-dominator of `block`.
+ *
+ * Block `A` post-dominates block `B` if any control flow path from `B` to the exit block of the
+ * function must pass through block `A`. A block always post-dominates itself.
+ */
+ final predicate postDominates(IRBlock block) { strictlyPostDominates(block) or this = block }
+
+ /**
+ * Gets a block on the post-dominance frontier of this block.
+ *
+ * The post-dominance frontier of block `A` is the set of blocks `B` such that block `A` does not
+ * post-dominate block `B`, but block `A` does post-dominate an immediate successor of block `B`.
+ */
+ pragma[noinline]
+ final IRBlock postPominanceFrontier() {
+ postDominates(result.getASuccessor()) and
+ not strictlyPostDominates(result)
+ }
+
+ /**
+ * Holds if this block is reachable from the entry block of its function.
+ */
+ final predicate isReachableFromFunctionEntry() {
+ this = getEnclosingIRFunction().getEntryBlock() or
+ getAPredecessor().isReachableFromFunctionEntry()
+ }
+}
+
+private predicate startsBasicBlock(Instruction instr) {
+ not instr instanceof PhiInstruction and
+ not adjacentInBlock(_, instr)
+}
+
+/** Holds if `i2` follows `i1` in a `IRBlock`. */
+private predicate adjacentInBlock(Instruction i1, Instruction i2) {
+ // - i2 must be the only successor of i1
+ i2 = unique(Instruction i | i = i1.getASuccessor()) and
+ // - i1 must be the only predecessor of i2
+ i1 = unique(Instruction i | i.getASuccessor() = i2) and
+ // - The edge between the two must be a GotoEdge. We just check that one
+ // exists since we've already checked that it's unique.
+ exists(GotoEdge edgeKind | exists(i1.getSuccessor(edgeKind))) and
+ // - The edge must not be a back edge. This means we get the same back edges
+ // in the basic-block graph as we do in the raw CFG.
+ not exists(Construction::getInstructionBackEdgeSuccessor(i1, _))
+ // This predicate could be simplified to remove one of the `unique`s if we
+ // were willing to rely on the CFG being well-formed and thus never having
+ // more than one successor to an instruction that has a `GotoEdge` out of it.
+}
+
+private predicate isEntryBlock(TIRBlock block) {
+ block = MkIRBlock(any(EnterFunctionInstruction enter))
+}
+
+cached
+private module Cached {
+ cached
+ newtype TIRBlock = MkIRBlock(Instruction firstInstr) { startsBasicBlock(firstInstr) }
+
+ /** Holds if `i` is the `index`th instruction the block starting with `first`. */
+ private Instruction getInstructionFromFirst(Instruction first, int index) =
+ shortestDistances(startsBasicBlock/1, adjacentInBlock/2)(first, result, index)
+
+ /** Holds if `i` is the `index`th instruction in `block`. */
+ cached
+ Instruction getInstruction(TIRBlock block, int index) {
+ result = getInstructionFromFirst(getFirstInstruction(block), index)
+ }
+
+ cached
+ int getInstructionCount(TIRBlock block) { result = strictcount(getInstruction(block, _)) }
+
+ cached
+ predicate blockSuccessor(TIRBlock pred, TIRBlock succ, EdgeKind kind) {
+ exists(Instruction predLast, Instruction succFirst |
+ predLast = getInstruction(pred, getInstructionCount(pred) - 1) and
+ succFirst = predLast.getSuccessor(kind) and
+ succ = MkIRBlock(succFirst)
+ )
+ }
+
+ pragma[noinline]
+ private predicate blockIdentity(TIRBlock b1, TIRBlock b2) { b1 = b2 }
+
+ pragma[noopt]
+ cached
+ predicate backEdgeSuccessor(TIRBlock pred, TIRBlock succ, EdgeKind kind) {
+ backEdgeSuccessorRaw(pred, succ, kind)
+ or
+ // See the QLDoc on `backEdgeSuccessorRaw`.
+ exists(TIRBlock pred2 |
+ // Joining with `blockIdentity` is a performance trick to get
+ // `forwardEdgeRaw` on the RHS of a join, where it's fast.
+ blockIdentity(pred, pred2) and
+ forwardEdgeRaw+(pred, pred2)
+ ) and
+ blockSuccessor(pred, succ, kind)
+ }
+
+ /**
+ * Holds if there is an edge from `pred` to `succ` that is not a back edge.
+ */
+ private predicate forwardEdgeRaw(TIRBlock pred, TIRBlock succ) {
+ exists(EdgeKind kind |
+ blockSuccessor(pred, succ, kind) and
+ not backEdgeSuccessorRaw(pred, succ, kind)
+ )
+ }
+
+ /**
+ * Holds if the `kind`-edge from `pred` to `succ` is a back edge according to
+ * `Construction`.
+ *
+ * There could be loops of non-back-edges if there is a flaw in the IR
+ * construction or back-edge detection, and this could cause non-termination
+ * of subsequent analysis. To prevent that, a subsequent predicate further
+ * classifies all edges as back edges if they are involved in a loop of
+ * non-back-edges.
+ */
+ private predicate backEdgeSuccessorRaw(TIRBlock pred, TIRBlock succ, EdgeKind kind) {
+ exists(Instruction predLast, Instruction succFirst |
+ predLast = getInstruction(pred, getInstructionCount(pred) - 1) and
+ succFirst = Construction::getInstructionBackEdgeSuccessor(predLast, kind) and
+ succ = MkIRBlock(succFirst)
+ )
+ }
+
+ cached
+ predicate blockSuccessor(TIRBlock pred, TIRBlock succ) { blockSuccessor(pred, succ, _) }
+
+ cached
+ predicate blockImmediatelyDominates(TIRBlock dominator, TIRBlock block) =
+ idominance(isEntryBlock/1, blockSuccessor/2)(_, dominator, block)
+}
+
+private Instruction getFirstInstruction(TIRBlock block) { block = MkIRBlock(result) }
+
+private predicate blockFunctionExit(IRBlock exit) {
+ exit.getLastInstruction() instanceof ExitFunctionInstruction
+}
+
+private predicate blockPredecessor(IRBlock src, IRBlock pred) { src.getAPredecessor() = pred }
+
+private predicate blockImmediatelyPostDominates(IRBlock postDominator, IRBlock block) =
+ idominance(blockFunctionExit/1, blockPredecessor/2)(_, postDominator, block)
diff --git a/repo-tests/codeql/cpp/ql/lib/semmle/code/cpp/ir/implementation/unaliased_ssa/IRConsistency.ql b/repo-tests/codeql/cpp/ql/lib/semmle/code/cpp/ir/implementation/unaliased_ssa/IRConsistency.ql
new file mode 100644
index 00000000000..909a7a5fc24
--- /dev/null
+++ b/repo-tests/codeql/cpp/ql/lib/semmle/code/cpp/ir/implementation/unaliased_ssa/IRConsistency.ql
@@ -0,0 +1,8 @@
+/**
+ * @name SSA IR Consistency Check
+ * @description Performs consistency checks on the Intermediate Representation. This query should have no results.
+ * @kind table
+ * @id cpp/ssa-ir-consistency-check
+ */
+
+import IRConsistency
diff --git a/repo-tests/codeql/cpp/ql/lib/semmle/code/cpp/ir/implementation/unaliased_ssa/IRConsistency.qll b/repo-tests/codeql/cpp/ql/lib/semmle/code/cpp/ir/implementation/unaliased_ssa/IRConsistency.qll
new file mode 100644
index 00000000000..31983d34247
--- /dev/null
+++ b/repo-tests/codeql/cpp/ql/lib/semmle/code/cpp/ir/implementation/unaliased_ssa/IRConsistency.qll
@@ -0,0 +1,527 @@
+private import IR
+import InstructionConsistency // module is below
+import IRTypeConsistency // module is in IRType.qll
+
+module InstructionConsistency {
+ private import internal.InstructionImports as Imports
+ private import Imports::OperandTag
+ private import Imports::Overlap
+ private import internal.IRInternal
+
+ private newtype TOptionalIRFunction =
+ TPresentIRFunction(IRFunction irFunc) or
+ TMissingIRFunction()
+
+ /**
+ * An `IRFunction` that might not exist. This is used so that we can produce consistency failures
+ * for IR that also incorrectly lacks a `getEnclosingIRFunction()`.
+ */
+ abstract private class OptionalIRFunction extends TOptionalIRFunction {
+ abstract string toString();
+
+ abstract Language::Location getLocation();
+ }
+
+ private class PresentIRFunction extends OptionalIRFunction, TPresentIRFunction {
+ private IRFunction irFunc;
+
+ PresentIRFunction() { this = TPresentIRFunction(irFunc) }
+
+ override string toString() {
+ result = concat(Language::getIdentityString(irFunc.getFunction()), "; ")
+ }
+
+ override Language::Location getLocation() {
+ // To avoid an overwhelming number of results when the extractor merges functions with the
+ // same name, just pick a single location.
+ result =
+ min(Language::Location loc | loc = irFunc.getLocation() | loc order by loc.toString())
+ }
+ }
+
+ private class MissingIRFunction extends OptionalIRFunction, TMissingIRFunction {
+ override string toString() { result = "" }
+
+ override Language::Location getLocation() { result instanceof Language::UnknownDefaultLocation }
+ }
+
+ private OptionalIRFunction getInstructionIRFunction(Instruction instr) {
+ result = TPresentIRFunction(instr.getEnclosingIRFunction())
+ or
+ not exists(instr.getEnclosingIRFunction()) and result = TMissingIRFunction()
+ }
+
+ pragma[inline]
+ private OptionalIRFunction getInstructionIRFunction(Instruction instr, string irFuncText) {
+ result = getInstructionIRFunction(instr) and
+ irFuncText = result.toString()
+ }
+
+ private OptionalIRFunction getOperandIRFunction(Operand operand) {
+ result = TPresentIRFunction(operand.getEnclosingIRFunction())
+ or
+ not exists(operand.getEnclosingIRFunction()) and result = TMissingIRFunction()
+ }
+
+ pragma[inline]
+ private OptionalIRFunction getOperandIRFunction(Operand operand, string irFuncText) {
+ result = getOperandIRFunction(operand) and
+ irFuncText = result.toString()
+ }
+
+ private OptionalIRFunction getBlockIRFunction(IRBlock block) {
+ result = TPresentIRFunction(block.getEnclosingIRFunction())
+ or
+ not exists(block.getEnclosingIRFunction()) and result = TMissingIRFunction()
+ }
+
+ /**
+ * Holds if instruction `instr` is missing an expected operand with tag `tag`.
+ */
+ query predicate missingOperand(
+ Instruction instr, string message, OptionalIRFunction irFunc, string irFuncText
+ ) {
+ exists(OperandTag tag |
+ instr.getOpcode().hasOperand(tag) and
+ not exists(NonPhiOperand operand |
+ operand = instr.getAnOperand() and
+ operand.getOperandTag() = tag
+ ) and
+ message =
+ "Instruction '" + instr.getOpcode().toString() +
+ "' is missing an expected operand with tag '" + tag.toString() + "' in function '$@'." and
+ irFunc = getInstructionIRFunction(instr, irFuncText)
+ )
+ }
+
+ /**
+ * Holds if instruction `instr` has an unexpected operand with tag `tag`.
+ */
+ query predicate unexpectedOperand(
+ Instruction instr, string message, OptionalIRFunction irFunc, string irFuncText
+ ) {
+ exists(OperandTag tag |
+ exists(NonPhiOperand operand |
+ operand = instr.getAnOperand() and
+ operand.getOperandTag() = tag
+ ) and
+ not instr.getOpcode().hasOperand(tag) and
+ not (instr instanceof CallInstruction and tag instanceof ArgumentOperandTag) and
+ not (
+ instr instanceof BuiltInOperationInstruction and tag instanceof PositionalArgumentOperandTag
+ ) and
+ not (instr instanceof InlineAsmInstruction and tag instanceof AsmOperandTag) and
+ message =
+ "Instruction '" + instr.toString() + "' has unexpected operand '" + tag.toString() +
+ "' in function '$@'." and
+ irFunc = getInstructionIRFunction(instr, irFuncText)
+ )
+ }
+
+ /**
+ * Holds if instruction `instr` has multiple operands with tag `tag`.
+ */
+ query predicate duplicateOperand(
+ Instruction instr, string message, OptionalIRFunction irFunc, string irFuncText
+ ) {
+ exists(OperandTag tag, int operandCount |
+ operandCount =
+ strictcount(NonPhiOperand operand |
+ operand = instr.getAnOperand() and
+ operand.getOperandTag() = tag
+ ) and
+ operandCount > 1 and
+ message =
+ "Instruction has " + operandCount + " operands with tag '" + tag.toString() + "'" +
+ " in function '$@'." and
+ irFunc = getInstructionIRFunction(instr, irFuncText)
+ )
+ }
+
+ /**
+ * Holds if `Phi` instruction `instr` is missing an operand corresponding to
+ * the predecessor block `pred`.
+ */
+ query predicate missingPhiOperand(
+ PhiInstruction instr, string message, OptionalIRFunction irFunc, string irFuncText
+ ) {
+ exists(IRBlock pred |
+ pred = instr.getBlock().getAPredecessor() and
+ not exists(PhiInputOperand operand |
+ operand = instr.getAnOperand() and
+ operand.getPredecessorBlock() = pred
+ ) and
+ message =
+ "Instruction '" + instr.toString() + "' is missing an operand for predecessor block '" +
+ pred.toString() + "' in function '$@'." and
+ irFunc = getInstructionIRFunction(instr, irFuncText)
+ )
+ }
+
+ query predicate missingOperandType(
+ Operand operand, string message, OptionalIRFunction irFunc, string irFuncText
+ ) {
+ exists(Instruction use |
+ not exists(operand.getType()) and
+ use = operand.getUse() and
+ message =
+ "Operand '" + operand.toString() + "' of instruction '" + use.getOpcode().toString() +
+ "' is missing a type in function '$@'." and
+ irFunc = getOperandIRFunction(operand, irFuncText)
+ )
+ }
+
+ query predicate duplicateChiOperand(
+ ChiInstruction chi, string message, OptionalIRFunction irFunc, string irFuncText
+ ) {
+ chi.getTotal() = chi.getPartial() and
+ message =
+ "Chi instruction for " + chi.getPartial().toString() +
+ " has duplicate operands in function '$@'." and
+ irFunc = getInstructionIRFunction(chi, irFuncText)
+ }
+
+ query predicate sideEffectWithoutPrimary(
+ SideEffectInstruction instr, string message, OptionalIRFunction irFunc, string irFuncText
+ ) {
+ not exists(instr.getPrimaryInstruction()) and
+ message =
+ "Side effect instruction '" + instr + "' is missing a primary instruction in function '$@'." and
+ irFunc = getInstructionIRFunction(instr, irFuncText)
+ }
+
+ /**
+ * Holds if an instruction, other than `ExitFunction`, has no successors.
+ */
+ query predicate instructionWithoutSuccessor(
+ Instruction instr, string message, OptionalIRFunction irFunc, string irFuncText
+ ) {
+ not exists(instr.getASuccessor()) and
+ not instr instanceof ExitFunctionInstruction and
+ // Phi instructions aren't linked into the instruction-level flow graph.
+ not instr instanceof PhiInstruction and
+ not instr instanceof UnreachedInstruction and
+ message = "Instruction '" + instr.toString() + "' has no successors in function '$@'." and
+ irFunc = getInstructionIRFunction(instr, irFuncText)
+ }
+
+ /**
+ * Holds if there are multiple edges of the same kind from `source`.
+ */
+ query predicate ambiguousSuccessors(
+ Instruction source, string message, OptionalIRFunction irFunc, string irFuncText
+ ) {
+ exists(EdgeKind kind, int n |
+ n = strictcount(Instruction t | source.getSuccessor(kind) = t) and
+ n > 1 and
+ message =
+ "Instruction '" + source.toString() + "' has " + n.toString() + " successors of kind '" +
+ kind.toString() + "' in function '$@'." and
+ irFunc = getInstructionIRFunction(source, irFuncText)
+ )
+ }
+
+ /**
+ * Holds if `instr` is part of a loop even though the AST of `instr`'s enclosing function
+ * contains no element that can cause loops.
+ */
+ query predicate unexplainedLoop(
+ Instruction instr, string message, OptionalIRFunction irFunc, string irFuncText
+ ) {
+ exists(Language::Function f |
+ exists(IRBlock block |
+ instr.getBlock() = block and
+ block.getEnclosingFunction() = f and
+ block.getASuccessor+() = block
+ ) and
+ not Language::hasPotentialLoop(f) and
+ message =
+ "Instruction '" + instr.toString() + "' is part of an unexplained loop in function '$@'." and
+ irFunc = getInstructionIRFunction(instr, irFuncText)
+ )
+ }
+
+ /**
+ * Holds if a `Phi` instruction is present in a block with fewer than two
+ * predecessors.
+ */
+ query predicate unnecessaryPhiInstruction(
+ PhiInstruction instr, string message, OptionalIRFunction irFunc, string irFuncText
+ ) {
+ exists(int n |
+ n = count(instr.getBlock().getAPredecessor()) and
+ n < 2 and
+ message =
+ "Instruction '" + instr.toString() + "' is in a block with only " + n.toString() +
+ " predecessors in function '$@'." and
+ irFunc = getInstructionIRFunction(instr, irFuncText)
+ )
+ }
+
+ /**
+ * Holds if a memory operand is connected to a definition with an unmodeled result.
+ */
+ query predicate memoryOperandDefinitionIsUnmodeled(
+ Instruction instr, string message, OptionalIRFunction irFunc, string irFuncText
+ ) {
+ exists(MemoryOperand operand, Instruction def |
+ operand = instr.getAnOperand() and
+ def = operand.getAnyDef() and
+ not def.isResultModeled() and
+ message =
+ "Memory operand definition on instruction '" + instr.toString() +
+ "' has unmodeled result in function '$@'." and
+ irFunc = getInstructionIRFunction(instr, irFuncText)
+ )
+ }
+
+ /**
+ * Holds if operand `operand` consumes a value that was defined in
+ * a different function.
+ */
+ query predicate operandAcrossFunctions(
+ Operand operand, string message, OptionalIRFunction useIRFunc, string useIRFuncText,
+ OptionalIRFunction defIRFunc, string defIRFuncText
+ ) {
+ exists(Instruction useInstr, Instruction defInstr |
+ operand.getUse() = useInstr and
+ operand.getAnyDef() = defInstr and
+ useIRFunc = getInstructionIRFunction(useInstr, useIRFuncText) and
+ defIRFunc = getInstructionIRFunction(defInstr, defIRFuncText) and
+ useIRFunc != defIRFunc and
+ message =
+ "Operand '" + operand.toString() + "' is used on instruction '" + useInstr.toString() +
+ "' in function '$@', but is defined on instruction '" + defInstr.toString() +
+ "' in function '$@'."
+ )
+ }
+
+ /**
+ * Holds if instruction `instr` is not in exactly one block.
+ */
+ query predicate instructionWithoutUniqueBlock(
+ Instruction instr, string message, OptionalIRFunction irFunc, string irFuncText
+ ) {
+ exists(int blockCount |
+ blockCount = count(instr.getBlock()) and
+ blockCount != 1 and
+ message =
+ "Instruction '" + instr.toString() + "' is a member of " + blockCount.toString() +
+ " blocks in function '$@'." and
+ irFunc = getInstructionIRFunction(instr, irFuncText)
+ )
+ }
+
+ private predicate forwardEdge(IRBlock b1, IRBlock b2) {
+ b1.getASuccessor() = b2 and
+ not b1.getBackEdgeSuccessor(_) = b2
+ }
+
+ /**
+ * Holds if `f` contains a loop in which no edge is a back edge.
+ *
+ * This check ensures we don't have too _few_ back edges.
+ */
+ query predicate containsLoopOfForwardEdges(IRFunction f, string message) {
+ exists(IRBlock block |
+ forwardEdge+(block, block) and
+ block.getEnclosingIRFunction() = f and
+ message = "Function contains a loop consisting of only forward edges."
+ )
+ }
+
+ /**
+ * Holds if `block` is reachable from its function entry point but would not
+ * be reachable by traversing only forward edges. This check is skipped for
+ * functions containing `goto` statements as the property does not generally
+ * hold there.
+ *
+ * This check ensures we don't have too _many_ back edges.
+ */
+ query predicate lostReachability(
+ IRBlock block, string message, OptionalIRFunction irFunc, string irFuncText
+ ) {
+ exists(IRFunction f, IRBlock entry |
+ entry = f.getEntryBlock() and
+ entry.getASuccessor+() = block and
+ not forwardEdge+(entry, block) and
+ not Language::hasGoto(f.getFunction()) and
+ message =
+ "Block '" + block.toString() +
+ "' is not reachable by traversing only forward edges in function '$@'." and
+ irFunc = TPresentIRFunction(f) and
+ irFuncText = irFunc.toString()
+ )
+ }
+
+ /**
+ * Holds if the number of back edges differs between the `Instruction` graph
+ * and the `IRBlock` graph.
+ */
+ query predicate backEdgeCountMismatch(OptionalIRFunction irFunc, string message) {
+ exists(int fromInstr, int fromBlock |
+ fromInstr =
+ count(Instruction i1, Instruction i2 |
+ getInstructionIRFunction(i1) = irFunc and i1.getBackEdgeSuccessor(_) = i2
+ ) and
+ fromBlock =
+ count(IRBlock b1, IRBlock b2 |
+ getBlockIRFunction(b1) = irFunc and b1.getBackEdgeSuccessor(_) = b2
+ ) and
+ fromInstr != fromBlock and
+ message =
+ "The instruction graph for function '" + irFunc.toString() + "' contains " +
+ fromInstr.toString() + " back edges, but the block graph contains " + fromBlock.toString()
+ + " back edges."
+ )
+ }
+
+ /**
+ * Gets the point in the function at which the specified operand is evaluated. For most operands,
+ * this is at the instruction that consumes the use. For a `PhiInputOperand`, the effective point
+ * of evaluation is at the end of the corresponding predecessor block.
+ */
+ private predicate pointOfEvaluation(Operand operand, IRBlock block, int index) {
+ block = operand.(PhiInputOperand).getPredecessorBlock() and
+ index = block.getInstructionCount()
+ or
+ exists(Instruction use |
+ use = operand.(NonPhiOperand).getUse() and
+ block.getInstruction(index) = use
+ )
+ }
+
+ /**
+ * Holds if `useOperand` has a definition that does not dominate the use.
+ */
+ query predicate useNotDominatedByDefinition(
+ Operand useOperand, string message, OptionalIRFunction irFunc, string irFuncText
+ ) {
+ exists(IRBlock useBlock, int useIndex, Instruction defInstr, IRBlock defBlock, int defIndex |
+ pointOfEvaluation(useOperand, useBlock, useIndex) and
+ defInstr = useOperand.getAnyDef() and
+ (
+ defInstr instanceof PhiInstruction and
+ defBlock = defInstr.getBlock() and
+ defIndex = -1
+ or
+ defBlock.getInstruction(defIndex) = defInstr
+ ) and
+ not (
+ defBlock.strictlyDominates(useBlock)
+ or
+ defBlock = useBlock and
+ defIndex < useIndex
+ ) and
+ message =
+ "Operand '" + useOperand.toString() +
+ "' is not dominated by its definition in function '$@'." and
+ irFunc = getOperandIRFunction(useOperand, irFuncText)
+ )
+ }
+
+ query predicate switchInstructionWithoutDefaultEdge(
+ SwitchInstruction switchInstr, string message, OptionalIRFunction irFunc, string irFuncText
+ ) {
+ not exists(switchInstr.getDefaultSuccessor()) and
+ message =
+ "SwitchInstruction " + switchInstr.toString() + " without a DefaultEdge in function '$@'." and
+ irFunc = getInstructionIRFunction(switchInstr, irFuncText)
+ }
+
+ /**
+ * Holds if `instr` is on the chain of chi/phi instructions for all aliased
+ * memory.
+ */
+ private predicate isOnAliasedDefinitionChain(Instruction instr) {
+ instr instanceof AliasedDefinitionInstruction
+ or
+ isOnAliasedDefinitionChain(instr.(ChiInstruction).getTotal())
+ or
+ isOnAliasedDefinitionChain(instr.(PhiInstruction).getAnInputOperand().getAnyDef())
+ }
+
+ private predicate shouldBeConflated(Instruction instr) {
+ isOnAliasedDefinitionChain(instr)
+ or
+ instr.getOpcode() instanceof Opcode::InitializeNonLocal
+ }
+
+ query predicate notMarkedAsConflated(
+ Instruction instr, string message, OptionalIRFunction irFunc, string irFuncText
+ ) {
+ shouldBeConflated(instr) and
+ not instr.isResultConflated() and
+ message =
+ "Instruction '" + instr.toString() +
+ "' should be marked as having a conflated result in function '$@'." and
+ irFunc = getInstructionIRFunction(instr, irFuncText)
+ }
+
+ query predicate wronglyMarkedAsConflated(
+ Instruction instr, string message, OptionalIRFunction irFunc, string irFuncText
+ ) {
+ instr.isResultConflated() and
+ not shouldBeConflated(instr) and
+ message =
+ "Instruction '" + instr.toString() +
+ "' should not be marked as having a conflated result in function '$@'." and
+ irFunc = getInstructionIRFunction(instr, irFuncText)
+ }
+
+ query predicate invalidOverlap(
+ MemoryOperand useOperand, string message, OptionalIRFunction irFunc, string irFuncText
+ ) {
+ exists(Overlap overlap |
+ overlap = useOperand.getDefinitionOverlap() and
+ overlap instanceof MayPartiallyOverlap and
+ message =
+ "MemoryOperand '" + useOperand.toString() + "' has a `getDefinitionOverlap()` of '" +
+ overlap.toString() + "'." and
+ irFunc = getOperandIRFunction(useOperand, irFuncText)
+ )
+ }
+
+ query predicate nonUniqueEnclosingIRFunction(
+ Instruction instr, string message, OptionalIRFunction irFunc, string irFuncText
+ ) {
+ exists(int irFuncCount |
+ irFuncCount = count(instr.getEnclosingIRFunction()) and
+ irFuncCount != 1 and
+ message =
+ "Instruction '" + instr.toString() + "' has " + irFuncCount.toString() +
+ " results for `getEnclosingIRFunction()` in function '$@'." and
+ irFunc = getInstructionIRFunction(instr, irFuncText)
+ )
+ }
+
+ /**
+ * Holds if the object address operand for the given `FieldAddress` instruction does not have an
+ * address type.
+ */
+ query predicate fieldAddressOnNonPointer(
+ FieldAddressInstruction instr, string message, OptionalIRFunction irFunc, string irFuncText
+ ) {
+ not instr.getObjectAddressOperand().getIRType() instanceof IRAddressType and
+ message =
+ "FieldAddress instruction '" + instr.toString() +
+ "' has an object address operand that is not an address, in function '$@'." and
+ irFunc = getInstructionIRFunction(instr, irFuncText)
+ }
+
+ /**
+ * Holds if the `this` argument operand for the given `Call` instruction does not have an address
+ * type.
+ */
+ query predicate thisArgumentIsNonPointer(
+ CallInstruction instr, string message, OptionalIRFunction irFunc, string irFuncText
+ ) {
+ exists(ThisArgumentOperand thisOperand | thisOperand = instr.getThisArgumentOperand() |
+ not thisOperand.getIRType() instanceof IRAddressType
+ ) and
+ message =
+ "Call instruction '" + instr.toString() +
+ "' has a `this` argument operand that is not an address, in function '$@'." and
+ irFunc = getInstructionIRFunction(instr, irFuncText)
+ }
+}
diff --git a/repo-tests/codeql/cpp/ql/lib/semmle/code/cpp/ir/implementation/unaliased_ssa/IRFunction.qll b/repo-tests/codeql/cpp/ql/lib/semmle/code/cpp/ir/implementation/unaliased_ssa/IRFunction.qll
new file mode 100644
index 00000000000..5968e58f90b
--- /dev/null
+++ b/repo-tests/codeql/cpp/ql/lib/semmle/code/cpp/ir/implementation/unaliased_ssa/IRFunction.qll
@@ -0,0 +1,59 @@
+/**
+ * Provides the class `IRFunction`, which represents the Intermediate Representation for the
+ * definition of a function.
+ */
+
+private import internal.IRInternal
+private import internal.IRFunctionImports as Imports
+import Imports::IRFunctionBase
+import Instruction
+
+/**
+ * The IR for a function.
+ */
+class IRFunction extends IRFunctionBase {
+ /**
+ * Gets the entry point for this function.
+ */
+ pragma[noinline]
+ final EnterFunctionInstruction getEnterFunctionInstruction() {
+ result.getEnclosingIRFunction() = this
+ }
+
+ /**
+ * Gets the exit point for this function.
+ */
+ pragma[noinline]
+ final ExitFunctionInstruction getExitFunctionInstruction() {
+ result.getEnclosingIRFunction() = this
+ }
+
+ /**
+ * Gets the single return instruction for this function.
+ */
+ pragma[noinline]
+ final ReturnInstruction getReturnInstruction() { result.getEnclosingIRFunction() = this }
+
+ /**
+ * Gets the variable used to hold the return value of this function. If this
+ * function does not return a value, this predicate does not hold.
+ */
+ pragma[noinline]
+ final IRReturnVariable getReturnVariable() { result.getEnclosingIRFunction() = this }
+
+ /**
+ * Gets the block containing the entry point of this function.
+ */
+ pragma[noinline]
+ final IRBlock getEntryBlock() { result.getFirstInstruction() = getEnterFunctionInstruction() }
+
+ /**
+ * Gets all instructions in this function.
+ */
+ final Instruction getAnInstruction() { result.getEnclosingIRFunction() = this }
+
+ /**
+ * Gets all blocks in this function.
+ */
+ final IRBlock getABlock() { result.getEnclosingIRFunction() = this }
+}
diff --git a/repo-tests/codeql/cpp/ql/lib/semmle/code/cpp/ir/implementation/unaliased_ssa/IRVariable.qll b/repo-tests/codeql/cpp/ql/lib/semmle/code/cpp/ir/implementation/unaliased_ssa/IRVariable.qll
new file mode 100644
index 00000000000..146fc270738
--- /dev/null
+++ b/repo-tests/codeql/cpp/ql/lib/semmle/code/cpp/ir/implementation/unaliased_ssa/IRVariable.qll
@@ -0,0 +1,327 @@
+/**
+ * Provides classes that represent variables accessed by the IR.
+ */
+
+private import internal.IRInternal
+import IRFunction
+private import internal.IRVariableImports as Imports
+import Imports::TempVariableTag
+private import Imports::IRUtilities
+private import Imports::TTempVariableTag
+private import Imports::TIRVariable
+private import Imports::IRType
+
+/**
+ * A variable referenced by the IR for a function.
+ *
+ * The variable may be a user-declared variable (`IRUserVariable`) or a temporary variable generated
+ * by the AST-to-IR translation (`IRTempVariable`).
+ */
+class IRVariable extends TIRVariable {
+ Language::Function func;
+
+ IRVariable() {
+ this = TIRUserVariable(_, _, func) or
+ this = TIRTempVariable(func, _, _, _) or
+ this = TIRStringLiteral(func, _, _, _) or
+ this = TIRDynamicInitializationFlag(func, _, _)
+ }
+
+ /** Gets a textual representation of this element. */
+ string toString() { none() }
+
+ /**
+ * Holds if this variable's value cannot be changed within a function. Currently used for string
+ * literals, but could also apply to `const` global and static variables.
+ */
+ predicate isReadOnly() { none() }
+
+ /**
+ * Gets the type of the variable.
+ */
+ final Language::Type getType() { getLanguageType().hasType(result, false) }
+
+ /**
+ * Gets the language-neutral type of the variable.
+ */
+ final IRType getIRType() { result = getLanguageType().getIRType() }
+
+ /**
+ * Gets the type of the variable.
+ */
+ Language::LanguageType getLanguageType() { none() }
+
+ /**
+ * Gets the AST node that declared this variable, or that introduced this
+ * variable as part of the AST-to-IR translation.
+ */
+ Language::AST getAST() { none() }
+
+ /**
+ * Gets an identifier string for the variable. This identifier is unique
+ * within the function.
+ */
+ string getUniqueId() { none() }
+
+ /**
+ * Gets the source location of this variable.
+ */
+ final Language::Location getLocation() { result = getAST().getLocation() }
+
+ /**
+ * Gets the IR for the function that references this variable.
+ */
+ final IRFunction getEnclosingIRFunction() { result.getFunction() = func }
+
+ /**
+ * Gets the function that references this variable.
+ */
+ final Language::Function getEnclosingFunction() { result = func }
+}
+
+/**
+ * A user-declared variable referenced by the IR for a function.
+ */
+class IRUserVariable extends IRVariable, TIRUserVariable {
+ Language::Variable var;
+ Language::LanguageType type;
+
+ IRUserVariable() { this = TIRUserVariable(var, type, func) }
+
+ final override string toString() { result = getVariable().toString() }
+
+ final override Language::AST getAST() { result = var }
+
+ final override string getUniqueId() {
+ result = getVariable().toString() + " " + getVariable().getLocation().toString()
+ }
+
+ final override Language::LanguageType getLanguageType() { result = type }
+
+ /**
+ * Gets the original user-declared variable.
+ */
+ Language::Variable getVariable() { result = var }
+}
+
+/**
+ * A variable (user-declared or temporary) that is allocated on the stack. This includes all
+ * parameters, non-static local variables, and temporary variables.
+ */
+class IRAutomaticVariable extends IRVariable {
+ IRAutomaticVariable() {
+ exists(Language::Variable var |
+ this = TIRUserVariable(var, _, func) and
+ Language::isVariableAutomatic(var)
+ )
+ or
+ this = TIRTempVariable(func, _, _, _)
+ }
+}
+
+/**
+ * A user-declared variable that is allocated on the stack. This includes all parameters and
+ * non-static local variables.
+ */
+class IRAutomaticUserVariable extends IRUserVariable, IRAutomaticVariable {
+ override Language::AutomaticVariable var;
+
+ final override Language::AutomaticVariable getVariable() { result = var }
+}
+
+/**
+ * A user-declared variable that is not allocated on the stack. This includes all global variables,
+ * namespace-scope variables, static fields, and static local variables.
+ */
+class IRStaticUserVariable extends IRUserVariable {
+ override Language::StaticVariable var;
+
+ IRStaticUserVariable() { not Language::isVariableAutomatic(var) }
+
+ final override Language::StaticVariable getVariable() { result = var }
+}
+
+/**
+ * A variable that is not user-declared. This includes temporary variables generated as part of IR
+ * construction, as well as string literals.
+ */
+class IRGeneratedVariable extends IRVariable {
+ Language::AST ast;
+ Language::LanguageType type;
+
+ IRGeneratedVariable() {
+ this = TIRTempVariable(func, ast, _, type) or
+ this = TIRStringLiteral(func, ast, type, _) or
+ this = TIRDynamicInitializationFlag(func, ast, type)
+ }
+
+ final override Language::LanguageType getLanguageType() { result = type }
+
+ final override Language::AST getAST() { result = ast }
+
+ override string toString() { result = getBaseString() + getLocationString() }
+
+ override string getUniqueId() { none() }
+
+ /**
+ * INTERNAL: Do not use.
+ *
+ * Gets a string containing the source code location of the AST that generated this variable.
+ *
+ * This is used by debugging and printing code only.
+ */
+ final string getLocationString() {
+ result =
+ ast.getLocation().getStartLine().toString() + ":" +
+ ast.getLocation().getStartColumn().toString()
+ }
+
+ /**
+ * INTERNAL: Do not use.
+ *
+ * Gets the string that is combined with the location of the variable to generate the string
+ * representation of this variable.
+ *
+ * This is used by debugging and printing code only.
+ */
+ string getBaseString() { none() }
+}
+
+/**
+ * A temporary variable introduced by IR construction. The most common examples are the variable
+ * generated to hold the return value of a function, or the variable generated to hold the result of
+ * a condition operator (`a ? b : c`).
+ */
+class IRTempVariable extends IRGeneratedVariable, IRAutomaticVariable, TIRTempVariable {
+ TempVariableTag tag;
+
+ IRTempVariable() { this = TIRTempVariable(func, ast, tag, type) }
+
+ final override string getUniqueId() {
+ result = "Temp: " + Construction::getTempVariableUniqueId(this)
+ }
+
+ /**
+ * Gets the "tag" object that differentiates this temporary variable from other temporary
+ * variables generated for the same AST.
+ */
+ final TempVariableTag getTag() { result = tag }
+
+ override string getBaseString() { result = "#temp" }
+}
+
+/**
+ * A temporary variable generated to hold the return value of a function.
+ */
+class IRReturnVariable extends IRTempVariable {
+ IRReturnVariable() { tag = ReturnValueTempVar() }
+
+ final override string toString() { result = "#return" }
+}
+
+/**
+ * A temporary variable generated to hold the exception thrown by a `ThrowValue` instruction.
+ */
+class IRThrowVariable extends IRTempVariable {
+ IRThrowVariable() { tag = ThrowTempVar() }
+
+ final override string getBaseString() { result = "#throw" }
+}
+
+/**
+ * A temporary variable generated to hold the contents of all arguments passed to the `...` of a
+ * function that accepts a variable number of arguments.
+ */
+class IREllipsisVariable extends IRTempVariable, IRParameter {
+ IREllipsisVariable() { tag = EllipsisTempVar() }
+
+ final override string toString() { result = "#ellipsis" }
+
+ final override int getIndex() { result = func.getNumberOfParameters() }
+}
+
+/**
+ * A temporary variable generated to hold the `this` pointer.
+ */
+class IRThisVariable extends IRTempVariable, IRParameter {
+ IRThisVariable() { tag = ThisTempVar() }
+
+ final override string toString() { result = "#this" }
+
+ final override int getIndex() { result = -1 }
+}
+
+/**
+ * A variable generated to represent the contents of a string literal. This variable acts much like
+ * a read-only global variable.
+ */
+class IRStringLiteral extends IRGeneratedVariable, TIRStringLiteral {
+ Language::StringLiteral literal;
+
+ IRStringLiteral() { this = TIRStringLiteral(func, ast, type, literal) }
+
+ final override predicate isReadOnly() { any() }
+
+ final override string getUniqueId() {
+ result = "String: " + getLocationString() + "=" + Language::getStringLiteralText(literal)
+ }
+
+ final override string getBaseString() { result = "#string" }
+
+ /**
+ * Gets the AST of the string literal represented by this `IRStringLiteral`.
+ */
+ final Language::StringLiteral getLiteral() { result = literal }
+}
+
+/**
+ * A variable generated to track whether a specific non-stack variable has been initialized. This is
+ * used to model the runtime initialization of static local variables in C++, as well as static
+ * fields in C#.
+ */
+class IRDynamicInitializationFlag extends IRGeneratedVariable, TIRDynamicInitializationFlag {
+ Language::Variable var;
+
+ IRDynamicInitializationFlag() {
+ this = TIRDynamicInitializationFlag(func, var, type) and ast = var
+ }
+
+ final override string toString() { result = var.toString() + "#init" }
+
+ /**
+ * Gets variable whose initialization is guarded by this flag.
+ */
+ final Language::Variable getVariable() { result = var }
+
+ final override string getUniqueId() {
+ result = "Init: " + getVariable().toString() + " " + getVariable().getLocation().toString()
+ }
+
+ final override string getBaseString() { result = "#init:" + var.toString() + ":" }
+}
+
+/**
+ * An IR variable which acts like a function parameter, including positional parameters and the
+ * temporary variables generated for `this` and ellipsis parameters.
+ */
+class IRParameter extends IRAutomaticVariable {
+ IRParameter() {
+ this.(IRAutomaticUserVariable).getVariable() instanceof Language::Parameter
+ or
+ this = TIRTempVariable(_, _, ThisTempVar(), _)
+ or
+ this = TIRTempVariable(_, _, EllipsisTempVar(), _)
+ }
+
+ /**
+ * Gets the zero-based index of this parameter. The `this` parameter has index -1.
+ */
+ int getIndex() { none() }
+}
+
+/**
+ * An IR variable representing a positional parameter.
+ */
+class IRPositionalParameter extends IRParameter, IRAutomaticUserVariable {
+ final override int getIndex() { result = getVariable().(Language::Parameter).getIndex() }
+}
diff --git a/repo-tests/codeql/cpp/ql/lib/semmle/code/cpp/ir/implementation/unaliased_ssa/Instruction.qll b/repo-tests/codeql/cpp/ql/lib/semmle/code/cpp/ir/implementation/unaliased_ssa/Instruction.qll
new file mode 100644
index 00000000000..6f471d8a7e8
--- /dev/null
+++ b/repo-tests/codeql/cpp/ql/lib/semmle/code/cpp/ir/implementation/unaliased_ssa/Instruction.qll
@@ -0,0 +1,2184 @@
+/**
+ * Provides classes that represent the individual instructions in the IR for a function.
+ */
+
+private import internal.IRInternal
+import IRFunction
+import IRBlock
+import IRVariable
+import Operand
+private import internal.InstructionImports as Imports
+import Imports::EdgeKind
+import Imports::IRType
+import Imports::MemoryAccessKind
+import Imports::Opcode
+private import Imports::OperandTag
+
+/**
+ * Gets an `Instruction` that is contained in `IRFunction`, and has a location with the specified
+ * `File` and line number. Used for assigning register names when printing IR.
+ */
+private Instruction getAnInstructionAtLine(IRFunction irFunc, Language::File file, int line) {
+ exists(IRConfiguration::IRConfiguration config |
+ config.shouldEvaluateDebugStringsForFunction(irFunc.getFunction())
+ ) and
+ exists(Language::Location location |
+ irFunc = result.getEnclosingIRFunction() and
+ location = result.getLocation() and
+ file = location.getFile() and
+ line = location.getStartLine()
+ )
+}
+
+/**
+ * A single instruction in the IR.
+ */
+class Instruction extends Construction::TStageInstruction {
+ Instruction() {
+ // The base `TStageInstruction` type is a superset of the actual instructions appearing in this
+ // stage. This call lets the stage filter out the ones that are not reused from raw IR.
+ Construction::hasInstruction(this)
+ }
+
+ /** Gets a textual representation of this element. */
+ final string toString() { result = getOpcode().toString() + ": " + getAST().toString() }
+
+ /**
+ * Gets a string showing the result, opcode, and operands of the instruction, equivalent to what
+ * would be printed by PrintIR.ql. For example:
+ *
+ * `mu0_28(int) = Store r0_26, r0_27`
+ */
+ final string getDumpString() {
+ result = getResultString() + " = " + getOperationString() + " " + getOperandsString()
+ }
+
+ private predicate shouldGenerateDumpStrings() {
+ exists(IRConfiguration::IRConfiguration config |
+ config.shouldEvaluateDebugStringsForFunction(this.getEnclosingFunction())
+ )
+ }
+
+ /**
+ * Gets a string describing the operation of this instruction. This includes
+ * the opcode and the immediate value, if any. For example:
+ *
+ * VariableAddress[x]
+ */
+ final string getOperationString() {
+ shouldGenerateDumpStrings() and
+ if exists(getImmediateString())
+ then result = getOperationPrefix() + getOpcode().toString() + "[" + getImmediateString() + "]"
+ else result = getOperationPrefix() + getOpcode().toString()
+ }
+
+ /**
+ * Gets a string describing the immediate value of this instruction, if any.
+ */
+ string getImmediateString() { none() }
+
+ private string getOperationPrefix() {
+ shouldGenerateDumpStrings() and
+ if this instanceof SideEffectInstruction then result = "^" else result = ""
+ }
+
+ private string getResultPrefix() {
+ shouldGenerateDumpStrings() and
+ if getResultIRType() instanceof IRVoidType
+ then result = "v"
+ else
+ if hasMemoryResult()
+ then if isResultModeled() then result = "m" else result = "mu"
+ else result = "r"
+ }
+
+ /**
+ * Gets the zero-based index of this instruction within its block. This is
+ * used by debugging and printing code only.
+ */
+ int getDisplayIndexInBlock() {
+ shouldGenerateDumpStrings() and
+ exists(IRBlock block |
+ this = block.getInstruction(result)
+ or
+ this =
+ rank[-result - 1](PhiInstruction phiInstr |
+ phiInstr = block.getAPhiInstruction()
+ |
+ phiInstr order by phiInstr.getUniqueId()
+ )
+ )
+ }
+
+ private int getLineRank() {
+ shouldGenerateDumpStrings() and
+ this =
+ rank[result](Instruction instr |
+ instr =
+ getAnInstructionAtLine(getEnclosingIRFunction(), getLocation().getFile(),
+ getLocation().getStartLine())
+ |
+ instr order by instr.getBlock().getDisplayIndex(), instr.getDisplayIndexInBlock()
+ )
+ }
+
+ /**
+ * Gets a human-readable string that uniquely identifies this instruction
+ * within the function. This string is used to refer to this instruction when
+ * printing IR dumps.
+ *
+ * Example: `r1_1`
+ */
+ string getResultId() {
+ shouldGenerateDumpStrings() and
+ result = getResultPrefix() + getAST().getLocation().getStartLine() + "_" + getLineRank()
+ }
+
+ /**
+ * Gets a string describing the result of this instruction, suitable for
+ * display in IR dumps. This consists of the result ID plus the type of the
+ * result.
+ *
+ * Example: `r1_1(int*)`
+ */
+ final string getResultString() {
+ shouldGenerateDumpStrings() and
+ result = getResultId() + "(" + getResultLanguageType().getDumpString() + ")"
+ }
+
+ /**
+ * Gets a string describing the operands of this instruction, suitable for
+ * display in IR dumps.
+ *
+ * Example: `func:r3_4, this:r3_5`
+ */
+ string getOperandsString() {
+ shouldGenerateDumpStrings() and
+ result =
+ concat(Operand operand |
+ operand = getAnOperand()
+ |
+ operand.getDumpString(), ", " order by operand.getDumpSortOrder()
+ )
+ }
+
+ /**
+ * Gets a string identifier for this function that is unique among all
+ * instructions in the same function.
+ *
+ * This is used for sorting IR output for tests, and is likely to be
+ * inefficient for any other use.
+ */
+ final string getUniqueId() { result = Construction::getInstructionUniqueId(this) }
+
+ /**
+ * INTERNAL: Do not use.
+ *
+ * Gets two sort keys for this instruction - used to order instructions for printing
+ * in test outputs.
+ */
+ final predicate hasSortKeys(int key1, int key2) {
+ Construction::instructionHasSortKeys(this, key1, key2)
+ }
+
+ /**
+ * Gets the basic block that contains this instruction.
+ */
+ final IRBlock getBlock() { result.getAnInstruction() = this }
+
+ /**
+ * Gets the function that contains this instruction.
+ */
+ final Language::Function getEnclosingFunction() {
+ result = getEnclosingIRFunction().getFunction()
+ }
+
+ /**
+ * Gets the IRFunction object that contains the IR for this instruction.
+ */
+ final IRFunction getEnclosingIRFunction() {
+ result = Construction::getInstructionEnclosingIRFunction(this)
+ }
+
+ /**
+ * Gets the AST that caused this instruction to be generated.
+ */
+ final Language::AST getAST() { result = Construction::getInstructionAST(this) }
+
+ /**
+ * Gets the location of the source code for this instruction.
+ */
+ final Language::Location getLocation() { result = getAST().getLocation() }
+
+ /**
+ * Gets the `Expr` whose result is computed by this instruction, if any. The `Expr` may be a
+ * conversion.
+ */
+ final Language::Expr getConvertedResultExpression() {
+ result = Raw::getInstructionConvertedResultExpression(this)
+ }
+
+ /**
+ * Gets the unconverted form of the `Expr` whose result is computed by this instruction, if any.
+ */
+ final Language::Expr getUnconvertedResultExpression() {
+ result = Raw::getInstructionUnconvertedResultExpression(this)
+ }
+
+ /**
+ * Gets the language-specific type of the result produced by this instruction.
+ *
+ * Most consumers of the IR should use `getResultIRType()` instead. `getResultIRType()` uses a
+ * less complex, language-neutral type system in which all semantically equivalent types share the
+ * same `IRType` instance. For example, in C++, four different `Instruction`s might have three
+ * different values for `getResultLanguageType()`: `unsigned int`, `char32_t`, and `wchar_t`,
+ * whereas all four instructions would have the same value for `getResultIRType()`, `uint4`.
+ */
+ final Language::LanguageType getResultLanguageType() {
+ result = Construction::getInstructionResultType(this)
+ }
+
+ /**
+ * Gets the type of the result produced by this instruction. If the instruction does not produce
+ * a result, its result type will be `IRVoidType`.
+ */
+ cached
+ final IRType getResultIRType() { result = getResultLanguageType().getIRType() }
+
+ /**
+ * Gets the type of the result produced by this instruction. If the
+ * instruction does not produce a result, its result type will be `VoidType`.
+ *
+ * If `isGLValue()` holds, then the result type of this instruction should be
+ * thought of as "pointer to `getResultType()`".
+ */
+ final Language::Type getResultType() {
+ exists(Language::LanguageType resultType |
+ resultType = getResultLanguageType() and
+ (
+ resultType.hasUnspecifiedType(result, _)
+ or
+ not resultType.hasUnspecifiedType(_, _) and result instanceof Language::UnknownType
+ )
+ )
+ }
+
+ /**
+ * Holds if the result produced by this instruction is a glvalue. If this
+ * holds, the result of the instruction represents the address of a location,
+ * and the type of the location is given by `getResultType()`. If this does
+ * not hold, the result of the instruction represents a value whose type is
+ * given by `getResultType()`.
+ *
+ * For example, the statement `y = x;` generates the following IR:
+ * ```
+ * r1_0(glval: int) = VariableAddress[x]
+ * r1_1(int) = Load r1_0, mu0_1
+ * r1_2(glval: int) = VariableAddress[y]
+ * mu1_3(int) = Store r1_2, r1_1
+ * ```
+ *
+ * The result of each `VariableAddress` instruction is a glvalue of type
+ * `int`, representing the address of the corresponding integer variable. The
+ * result of the `Load` instruction is a prvalue of type `int`, representing
+ * the integer value loaded from variable `x`.
+ */
+ final predicate isGLValue() { getResultLanguageType().hasType(_, true) }
+
+ /**
+ * Gets the size of the result produced by this instruction, in bytes. If the
+ * result does not have a known constant size, this predicate does not hold.
+ *
+ * If `this.isGLValue()` holds for this instruction, the value of
+ * `getResultSize()` will always be the size of a pointer.
+ */
+ final int getResultSize() { result = getResultLanguageType().getByteSize() }
+
+ /**
+ * Gets the opcode that specifies the operation performed by this instruction.
+ */
+ pragma[inline]
+ final Opcode getOpcode() { Construction::getInstructionOpcode(result, this) }
+
+ /**
+ * Gets all direct uses of the result of this instruction. The result can be
+ * an `Operand` for which `isDefinitionInexact` holds.
+ */
+ final Operand getAUse() { result.getAnyDef() = this }
+
+ /**
+ * Gets all of this instruction's operands.
+ */
+ final Operand getAnOperand() { result.getUse() = this }
+
+ /**
+ * Holds if this instruction produces a memory result.
+ */
+ final predicate hasMemoryResult() { exists(getResultMemoryAccess()) }
+
+ /**
+ * Gets the kind of memory access performed by this instruction's result.
+ * Holds only for instructions with a memory result.
+ */
+ pragma[inline]
+ final MemoryAccessKind getResultMemoryAccess() { result = getOpcode().getWriteMemoryAccess() }
+
+ /**
+ * Holds if the memory access performed by this instruction's result will not always write to
+ * every bit in the memory location. This is most commonly used for memory accesses that may or
+ * may not actually occur depending on runtime state (for example, the write side effect of an
+ * output parameter that is not written to on all paths), or for accesses where the memory
+ * location is a conservative estimate of the memory that might actually be accessed at runtime
+ * (for example, the global side effects of a function call).
+ */
+ pragma[inline]
+ final predicate hasResultMayMemoryAccess() { getOpcode().hasMayWriteMemoryAccess() }
+
+ /**
+ * Gets the operand that holds the memory address to which this instruction stores its
+ * result, if any. For example, in `m3 = Store r1, r2`, the result of `getResultAddressOperand()`
+ * is `r1`.
+ */
+ final AddressOperand getResultAddressOperand() {
+ getResultMemoryAccess().usesAddressOperand() and
+ result.getUse() = this
+ }
+
+ /**
+ * Gets the instruction that holds the exact memory address to which this instruction stores its
+ * result, if any. For example, in `m3 = Store r1, r2`, the result of `getResultAddressOperand()`
+ * is the instruction that defines `r1`.
+ */
+ final Instruction getResultAddress() { result = getResultAddressOperand().getDef() }
+
+ /**
+ * Holds if the result of this instruction is precisely modeled in SSA. Always
+ * holds for a register result. For a memory result, a modeled result is
+ * connected to its actual uses. An unmodeled result has no uses.
+ *
+ * For example:
+ * ```
+ * int x = 1;
+ * int *p = &x;
+ * int y = *p;
+ * ```
+ * In non-aliased SSA, `x` will not be modeled because it has its address
+ * taken. In that case, `isResultModeled()` would not hold for the result of
+ * the `Store` to `x`.
+ */
+ final predicate isResultModeled() {
+ // Register results are always in SSA form.
+ not hasMemoryResult() or
+ Construction::hasModeledMemoryResult(this)
+ }
+
+ /**
+ * Holds if this is an instruction with a memory result that represents a
+ * conflation of more than one memory allocation.
+ *
+ * This happens in practice when dereferencing a pointer that cannot be
+ * tracked back to a single local allocation. Such memory is instead modeled
+ * as originating on the `AliasedDefinitionInstruction` at the entry of the
+ * function.
+ */
+ final predicate isResultConflated() { Construction::hasConflatedMemoryResult(this) }
+
+ /**
+ * Gets the successor of this instruction along the control flow edge
+ * specified by `kind`.
+ */
+ final Instruction getSuccessor(EdgeKind kind) {
+ result = Construction::getInstructionSuccessor(this, kind)
+ }
+
+ /**
+ * Gets the a _back-edge successor_ of this instruction along the control
+ * flow edge specified by `kind`. A back edge in the control-flow graph is
+ * intuitively the edge that goes back around a loop. If all back edges are
+ * removed from the control-flow graph, it becomes acyclic.
+ */
+ final Instruction getBackEdgeSuccessor(EdgeKind kind) {
+ // We don't take these edges from
+ // `Construction::getInstructionBackEdgeSuccessor` since that relation has
+ // not been treated to remove any loops that might be left over due to
+ // flaws in the IR construction or back-edge detection.
+ exists(IRBlock block |
+ block = this.getBlock() and
+ this = block.getLastInstruction() and
+ result = block.getBackEdgeSuccessor(kind).getFirstInstruction()
+ )
+ }
+
+ /**
+ * Gets all direct successors of this instruction.
+ */
+ final Instruction getASuccessor() { result = getSuccessor(_) }
+
+ /**
+ * Gets a predecessor of this instruction such that the predecessor reaches
+ * this instruction along the control flow edge specified by `kind`.
+ */
+ final Instruction getPredecessor(EdgeKind kind) { result.getSuccessor(kind) = this }
+
+ /**
+ * Gets all direct predecessors of this instruction.
+ */
+ final Instruction getAPredecessor() { result = getPredecessor(_) }
+}
+
+/**
+ * An instruction that refers to a variable.
+ *
+ * This class is used for any instruction whose operation fundamentally depends on a specific
+ * variable. For example, it is used for `VariableAddress`, which returns the address of a specific
+ * variable, and `InitializeParameter`, which returns the value that was passed to the specified
+ * parameter by the caller. `VariableInstruction` is not used for `Load` or `Store` instructions
+ * that happen to load from or store to a particular variable; in those cases, the memory location
+ * being accessed is specified by the `AddressOperand` on the instruction, which may or may not be
+ * defined by the result of a `VariableAddress` instruction.
+ */
+class VariableInstruction extends Instruction {
+ IRVariable var;
+
+ VariableInstruction() { var = Raw::getInstructionVariable(this) }
+
+ override string getImmediateString() { result = var.toString() }
+
+ /**
+ * Gets the variable that this instruction references.
+ */
+ final IRVariable getIRVariable() { result = var }
+
+ /**
+ * Gets the AST variable that this instruction's IR variable refers to, if one exists.
+ */
+ final Language::Variable getASTVariable() { result = var.(IRUserVariable).getVariable() }
+}
+
+/**
+ * An instruction that refers to a field of a class, struct, or union.
+ *
+ * This class is used for any instruction whose operation fundamentally depends on a specific
+ * field. For example, it is used for `FieldAddress`, which computes the address of a specific
+ * field on an object. `FieldInstruction` is not used for `Load` or `Store` instructions that happen
+ * to load from or store to a particular field; in those cases, the memory location being accessed
+ * is specified by the `AddressOperand` on the instruction, which may or may not be defined by the
+ * result of a `FieldAddress` instruction.
+ */
+class FieldInstruction extends Instruction {
+ Language::Field field;
+
+ FieldInstruction() { field = Raw::getInstructionField(this) }
+
+ final override string getImmediateString() { result = field.toString() }
+
+ /**
+ * Gets the field that this instruction references.
+ */
+ final Language::Field getField() { result = field }
+}
+
+/**
+ * An instruction that refers to a function.
+ *
+ * This class is used for any instruction whose operation fundamentally depends on a specific
+ * function. For example, it is used for `FunctionAddress`, which returns the address of a specific
+ * function. `FunctionInstruction` is not used for `Call` instructions that happen to call a
+ * particular function; in that case, the function being called is specified by the
+ * `CallTargetOperand` on the instruction, which may or may not be defined by the result of a
+ * `FunctionAddress` instruction.
+ */
+class FunctionInstruction extends Instruction {
+ Language::Function funcSymbol;
+
+ FunctionInstruction() { funcSymbol = Raw::getInstructionFunction(this) }
+
+ final override string getImmediateString() { result = funcSymbol.toString() }
+
+ /**
+ * Gets the function that this instruction references.
+ */
+ final Language::Function getFunctionSymbol() { result = funcSymbol }
+}
+
+/**
+ * An instruction whose result is a compile-time constant value.
+ */
+class ConstantValueInstruction extends Instruction {
+ string value;
+
+ ConstantValueInstruction() { value = Raw::getInstructionConstantValue(this) }
+
+ final override string getImmediateString() { result = value }
+
+ /**
+ * Gets the constant value of this instruction's result.
+ */
+ final string getValue() { result = value }
+}
+
+/**
+ * An instruction that refers to an argument of a `Call` instruction.
+ *
+ * This instruction is used for side effects of a `Call` instruction that read or write memory
+ * pointed to by one of the arguments of the call.
+ */
+class IndexedInstruction extends Instruction {
+ int index;
+
+ IndexedInstruction() { index = Raw::getInstructionIndex(this) }
+
+ final override string getImmediateString() { result = index.toString() }
+
+ /**
+ * Gets the zero-based index of the argument that this instruction references.
+ */
+ final int getIndex() { result = index }
+}
+
+/**
+ * An instruction representing the entry point to a function.
+ *
+ * Each `IRFunction` has exactly one `EnterFunction` instruction. Execution of the function begins
+ * at this instruction. This instruction has no predecessors.
+ */
+class EnterFunctionInstruction extends Instruction {
+ EnterFunctionInstruction() { getOpcode() instanceof Opcode::EnterFunction }
+}
+
+/**
+ * An instruction that returns the address of a variable.
+ *
+ * This instruction returns the address of a local variable, parameter, static field,
+ * namespace-scope variable, or global variable. For the address of a non-static field of a class,
+ * struct, or union, see `FieldAddressInstruction`.
+ */
+class VariableAddressInstruction extends VariableInstruction {
+ VariableAddressInstruction() { getOpcode() instanceof Opcode::VariableAddress }
+}
+
+/**
+ * An instruction that returns the address of a function.
+ *
+ * This instruction returns the address of a function, including non-member functions, static member
+ * functions, and non-static member functions.
+ *
+ * The result has an `IRFunctionAddress` type.
+ */
+class FunctionAddressInstruction extends FunctionInstruction {
+ FunctionAddressInstruction() { getOpcode() instanceof Opcode::FunctionAddress }
+}
+
+/**
+ * An instruction that initializes a parameter of the enclosing function with the value of the
+ * corresponding argument passed by the caller.
+ *
+ * Each parameter of a function will have exactly one `InitializeParameter` instruction that
+ * initializes that parameter.
+ */
+class InitializeParameterInstruction extends VariableInstruction {
+ InitializeParameterInstruction() { getOpcode() instanceof Opcode::InitializeParameter }
+
+ /**
+ * Gets the parameter initialized by this instruction.
+ */
+ final Language::Parameter getParameter() { result = var.(IRUserVariable).getVariable() }
+
+ /**
+ * Holds if this instruction initializes the parameter with index `index`, or
+ * if `index` is `-1` and this instruction initializes `this`.
+ */
+ pragma[noinline]
+ final predicate hasIndex(int index) {
+ index >= 0 and index = this.getParameter().getIndex()
+ or
+ index = -1 and this.getIRVariable() instanceof IRThisVariable
+ }
+}
+
+/**
+ * An instruction that initializes all memory that existed before this function was called.
+ *
+ * This instruction provides a definition for memory that, because it was actually allocated and
+ * initialized elsewhere, would not otherwise have a definition in this function.
+ */
+class InitializeNonLocalInstruction extends Instruction {
+ InitializeNonLocalInstruction() { getOpcode() instanceof Opcode::InitializeNonLocal }
+}
+
+/**
+ * An instruction that initializes the memory pointed to by a parameter of the enclosing function
+ * with the value of that memory on entry to the function.
+ */
+class InitializeIndirectionInstruction extends VariableInstruction {
+ InitializeIndirectionInstruction() { getOpcode() instanceof Opcode::InitializeIndirection }
+
+ /**
+ * Gets the parameter initialized by this instruction.
+ */
+ final Language::Parameter getParameter() { result = var.(IRUserVariable).getVariable() }
+
+ /**
+ * Holds if this instruction initializes the memory pointed to by the parameter with
+ * index `index`, or if `index` is `-1` and this instruction initializes the memory
+ * pointed to by `this`.
+ */
+ pragma[noinline]
+ final predicate hasIndex(int index) {
+ index >= 0 and index = this.getParameter().getIndex()
+ or
+ index = -1 and this.getIRVariable() instanceof IRThisVariable
+ }
+}
+
+/**
+ * An instruction that initializes the `this` pointer parameter of the enclosing function.
+ */
+class InitializeThisInstruction extends Instruction {
+ InitializeThisInstruction() { getOpcode() instanceof Opcode::InitializeThis }
+}
+
+/**
+ * An instruction that computes the address of a non-static field of an object.
+ */
+class FieldAddressInstruction extends FieldInstruction {
+ FieldAddressInstruction() { getOpcode() instanceof Opcode::FieldAddress }
+
+ /**
+ * Gets the operand that provides the address of the object containing the field.
+ */
+ final UnaryOperand getObjectAddressOperand() { result = getAnOperand() }
+
+ /**
+ * Gets the instruction whose result provides the address of the object containing the field.
+ */
+ final Instruction getObjectAddress() { result = getObjectAddressOperand().getDef() }
+}
+
+/**
+ * An instruction that computes the address of the first element of a managed array.
+ *
+ * This instruction is used for element access to C# arrays.
+ */
+class ElementsAddressInstruction extends UnaryInstruction {
+ ElementsAddressInstruction() { getOpcode() instanceof Opcode::ElementsAddress }
+
+ /**
+ * Gets the operand that provides the address of the array object.
+ */
+ final UnaryOperand getArrayObjectAddressOperand() { result = getAnOperand() }
+
+ /**
+ * Gets the instruction whose result provides the address of the array object.
+ */
+ final Instruction getArrayObjectAddress() { result = getArrayObjectAddressOperand().getDef() }
+}
+
+/**
+ * An instruction that produces a well-defined but unknown result and has
+ * unknown side effects, including side effects that are not conservatively
+ * modeled in the SSA graph.
+ *
+ * This type of instruction appears when there is an `ErrorExpr` in the AST,
+ * meaning that the extractor could not understand the expression and therefore
+ * produced a partial AST. Queries that give alerts when some action is _not_
+ * taken may want to ignore any function that contains an `ErrorInstruction`.
+ */
+class ErrorInstruction extends Instruction {
+ ErrorInstruction() { getOpcode() instanceof Opcode::Error }
+}
+
+/**
+ * An instruction that returns an uninitialized value.
+ *
+ * This instruction is used to provide an initial definition for a stack variable that does not have
+ * an initializer, or whose initializer only partially initializes the variable.
+ */
+class UninitializedInstruction extends VariableInstruction {
+ UninitializedInstruction() { getOpcode() instanceof Opcode::Uninitialized }
+
+ /**
+ * Gets the variable that is uninitialized.
+ */
+ final Language::Variable getLocalVariable() { result = var.(IRUserVariable).getVariable() }
+}
+
+/**
+ * An instruction that has no effect.
+ *
+ * This instruction is typically inserted to ensure that a particular AST is associated with at
+ * least one instruction, even when the AST has no semantic effect.
+ */
+class NoOpInstruction extends Instruction {
+ NoOpInstruction() { getOpcode() instanceof Opcode::NoOp }
+}
+
+/**
+ * An instruction that returns control to the caller of the function.
+ *
+ * This instruction represents the normal (non-exception) return from a function, either from an
+ * explicit `return` statement or from control flow reaching the end of the function's body.
+ *
+ * Each function has exactly one `ReturnInstruction`. Each `return` statement in a function is
+ * represented as an initialization of the temporary variable that holds the return value, with
+ * control then flowing to the common `ReturnInstruction` for that function. Exception: A function
+ * that never returns will not have a `ReturnInstruction`.
+ *
+ * The `ReturnInstruction` for a function will have a control-flow successor edge to a block
+ * containing the `ExitFunction` instruction for that function.
+ *
+ * There are two differet return instructions: `ReturnValueInstruction`, for returning a value from
+ * a non-`void`-returning function, and `ReturnVoidInstruction`, for returning from a
+ * `void`-returning function.
+ */
+class ReturnInstruction extends Instruction {
+ ReturnInstruction() { getOpcode() instanceof ReturnOpcode }
+}
+
+/**
+ * An instruction that returns control to the caller of the function, without returning a value.
+ */
+class ReturnVoidInstruction extends ReturnInstruction {
+ ReturnVoidInstruction() { getOpcode() instanceof Opcode::ReturnVoid }
+}
+
+/**
+ * An instruction that returns control to the caller of the function, including a return value.
+ */
+class ReturnValueInstruction extends ReturnInstruction {
+ ReturnValueInstruction() { getOpcode() instanceof Opcode::ReturnValue }
+
+ /**
+ * Gets the operand that provides the value being returned by the function.
+ */
+ final LoadOperand getReturnValueOperand() { result = getAnOperand() }
+
+ /**
+ * Gets the instruction whose result provides the value being returned by the function, if an
+ * exact definition is available.
+ */
+ final Instruction getReturnValue() { result = getReturnValueOperand().getDef() }
+}
+
+/**
+ * An instruction that represents the use of the value pointed to by a parameter of the function
+ * after the function returns control to its caller.
+ *
+ * This instruction does not itself return control to the caller. It merely represents the potential
+ * for a caller to use the memory pointed to by the parameter sometime after the call returns. This
+ * is the counterpart to the `InitializeIndirection` instruction, which represents the possibility
+ * that the caller initialized the memory pointed to by the parameter before the call.
+ */
+class ReturnIndirectionInstruction extends VariableInstruction {
+ ReturnIndirectionInstruction() { getOpcode() instanceof Opcode::ReturnIndirection }
+
+ /**
+ * Gets the operand that provides the value of the pointed-to memory.
+ */
+ final SideEffectOperand getSideEffectOperand() { result = getAnOperand() }
+
+ /**
+ * Gets the instruction whose result provides the value of the pointed-to memory, if an exact
+ * definition is available.
+ */
+ final Instruction getSideEffect() { result = getSideEffectOperand().getDef() }
+
+ /**
+ * Gets the operand that provides the address of the pointed-to memory.
+ */
+ final AddressOperand getSourceAddressOperand() { result = getAnOperand() }
+
+ /**
+ * Gets the instruction whose result provides the address of the pointed-to memory.
+ */
+ final Instruction getSourceAddress() { result = getSourceAddressOperand().getDef() }
+
+ /**
+ * Gets the parameter for which this instruction reads the final pointed-to value within the
+ * function.
+ */
+ final Language::Parameter getParameter() { result = var.(IRUserVariable).getVariable() }
+
+ /**
+ * Holds if this instruction is the return indirection for `this`.
+ */
+ final predicate isThisIndirection() { var instanceof IRThisVariable }
+
+ /**
+ * Holds if this instruction is the return indirection for the parameter with index `index`, or
+ * if this instruction is the return indirection for `this` and `index` is `-1`.
+ */
+ pragma[noinline]
+ final predicate hasIndex(int index) {
+ index >= 0 and index = this.getParameter().getIndex()
+ or
+ index = -1 and this.isThisIndirection()
+ }
+}
+
+/**
+ * An instruction that returns a copy of its operand.
+ *
+ * There are several different copy instructions, depending on the source and destination of the
+ * copy operation:
+ * - `CopyValueInstruction` - Copies a register operand to a register result.
+ * - `LoadInstruction` - Copies a memory operand to a register result.
+ * - `StoreInstruction` - Copies a register operand to a memory result.
+ */
+class CopyInstruction extends Instruction {
+ CopyInstruction() { getOpcode() instanceof CopyOpcode }
+
+ /**
+ * Gets the operand that provides the input value of the copy.
+ */
+ Operand getSourceValueOperand() { none() }
+
+ /**
+ * Gets the instruction whose result provides the input value of the copy, if an exact definition
+ * is available.
+ */
+ final Instruction getSourceValue() { result = getSourceValueOperand().getDef() }
+}
+
+/**
+ * An instruction that returns a register result containing a copy of its register operand.
+ */
+class CopyValueInstruction extends CopyInstruction, UnaryInstruction {
+ CopyValueInstruction() { getOpcode() instanceof Opcode::CopyValue }
+
+ final override UnaryOperand getSourceValueOperand() { result = getAnOperand() }
+}
+
+/**
+ * Gets a string describing the location pointed to by the specified address operand.
+ */
+private string getAddressOperandDescription(AddressOperand operand) {
+ result = operand.getDef().(VariableAddressInstruction).getIRVariable().toString()
+ or
+ not operand.getDef() instanceof VariableAddressInstruction and
+ result = "?"
+}
+
+/**
+ * An instruction that returns a register result containing a copy of its memory operand.
+ */
+class LoadInstruction extends CopyInstruction {
+ LoadInstruction() { getOpcode() instanceof Opcode::Load }
+
+ final override string getImmediateString() {
+ result = getAddressOperandDescription(getSourceAddressOperand())
+ }
+
+ /**
+ * Gets the operand that provides the address of the value being loaded.
+ */
+ final AddressOperand getSourceAddressOperand() { result = getAnOperand() }
+
+ /**
+ * Gets the instruction whose result provides the address of the value being loaded.
+ */
+ final Instruction getSourceAddress() { result = getSourceAddressOperand().getDef() }
+
+ final override LoadOperand getSourceValueOperand() { result = getAnOperand() }
+}
+
+/**
+ * An instruction that returns a memory result containing a copy of its register operand.
+ */
+class StoreInstruction extends CopyInstruction {
+ StoreInstruction() { getOpcode() instanceof Opcode::Store }
+
+ final override string getImmediateString() {
+ result = getAddressOperandDescription(getDestinationAddressOperand())
+ }
+
+ /**
+ * Gets the operand that provides the address of the location to which the value will be stored.
+ */
+ final AddressOperand getDestinationAddressOperand() { result = getAnOperand() }
+
+ /**
+ * Gets the instruction whose result provides the address of the location to which the value will
+ * be stored, if an exact definition is available.
+ */
+ final Instruction getDestinationAddress() { result = getDestinationAddressOperand().getDef() }
+
+ final override StoreValueOperand getSourceValueOperand() { result = getAnOperand() }
+}
+
+/**
+ * An instruction that branches to one of two successor instructions based on the value of a Boolean
+ * operand.
+ */
+class ConditionalBranchInstruction extends Instruction {
+ ConditionalBranchInstruction() { getOpcode() instanceof Opcode::ConditionalBranch }
+
+ /**
+ * Gets the operand that provides the Boolean condition controlling the branch.
+ */
+ final ConditionOperand getConditionOperand() { result = getAnOperand() }
+
+ /**
+ * Gets the instruction whose result provides the Boolean condition controlling the branch.
+ */
+ final Instruction getCondition() { result = getConditionOperand().getDef() }
+
+ /**
+ * Gets the instruction to which control will flow if the condition is true.
+ */
+ final Instruction getTrueSuccessor() { result = getSuccessor(EdgeKind::trueEdge()) }
+
+ /**
+ * Gets the instruction to which control will flow if the condition is false.
+ */
+ final Instruction getFalseSuccessor() { result = getSuccessor(EdgeKind::falseEdge()) }
+}
+
+/**
+ * An instruction representing the exit point of a function.
+ *
+ * Each `IRFunction` has exactly one `ExitFunction` instruction, unless the function neither returns
+ * nor throws an exception. Control flows to the `ExitFunction` instruction from both normal returns
+ * (`ReturnVoid`, `ReturnValue`) and propagated exceptions (`Unwind`). This instruction has no
+ * successors.
+ */
+class ExitFunctionInstruction extends Instruction {
+ ExitFunctionInstruction() { getOpcode() instanceof Opcode::ExitFunction }
+}
+
+/**
+ * An instruction whose result is a constant value.
+ */
+class ConstantInstruction extends ConstantValueInstruction {
+ ConstantInstruction() { getOpcode() instanceof Opcode::Constant }
+}
+
+/**
+ * An instruction whose result is a constant value of integer or Boolean type.
+ */
+class IntegerConstantInstruction extends ConstantInstruction {
+ IntegerConstantInstruction() {
+ exists(IRType resultType |
+ resultType = getResultIRType() and
+ (resultType instanceof IRIntegerType or resultType instanceof IRBooleanType)
+ )
+ }
+}
+
+/**
+ * An instruction whose result is a constant value of floating-point type.
+ */
+class FloatConstantInstruction extends ConstantInstruction {
+ FloatConstantInstruction() { getResultIRType() instanceof IRFloatingPointType }
+}
+
+/**
+ * An instruction whose result is the address of a string literal.
+ */
+class StringConstantInstruction extends VariableInstruction {
+ override IRStringLiteral var;
+
+ final override string getImmediateString() { result = Language::getStringLiteralText(getValue()) }
+
+ /**
+ * Gets the string literal whose address is returned by this instruction.
+ */
+ final Language::StringLiteral getValue() { result = var.getLiteral() }
+}
+
+/**
+ * An instruction whose result is computed from two operands.
+ */
+class BinaryInstruction extends Instruction {
+ BinaryInstruction() { getOpcode() instanceof BinaryOpcode }
+
+ /**
+ * Gets the left operand of this binary instruction.
+ */
+ final LeftOperand getLeftOperand() { result = getAnOperand() }
+
+ /**
+ * Gets the right operand of this binary instruction.
+ */
+ final RightOperand getRightOperand() { result = getAnOperand() }
+
+ /**
+ * Gets the instruction whose result provides the value of the left operand of this binary
+ * instruction.
+ */
+ final Instruction getLeft() { result = getLeftOperand().getDef() }
+
+ /**
+ * Gets the instruction whose result provides the value of the right operand of this binary
+ * instruction.
+ */
+ final Instruction getRight() { result = getRightOperand().getDef() }
+
+ /**
+ * Holds if this instruction's operands are `op1` and `op2`, in either order.
+ */
+ final predicate hasOperands(Operand op1, Operand op2) {
+ op1 = getLeftOperand() and op2 = getRightOperand()
+ or
+ op1 = getRightOperand() and op2 = getLeftOperand()
+ }
+}
+
+/**
+ * An instruction that computes the result of an arithmetic operation.
+ */
+class ArithmeticInstruction extends Instruction {
+ ArithmeticInstruction() { getOpcode() instanceof ArithmeticOpcode }
+}
+
+/**
+ * An instruction that performs an arithmetic operation on two numeric operands.
+ */
+class BinaryArithmeticInstruction extends ArithmeticInstruction, BinaryInstruction { }
+
+/**
+ * An instruction whose result is computed by performing an arithmetic operation on a single
+ * numeric operand.
+ */
+class UnaryArithmeticInstruction extends ArithmeticInstruction, UnaryInstruction { }
+
+/**
+ * An instruction that computes the sum of two numeric operands.
+ *
+ * Both operands must have the same numeric type, which will also be the result type. The result of
+ * integer overflow is the infinite-precision result modulo 2^n. Floating-point addition is
+ * performed according to IEEE-754.
+ */
+class AddInstruction extends BinaryArithmeticInstruction {
+ AddInstruction() { getOpcode() instanceof Opcode::Add }
+}
+
+/**
+ * An instruction that computes the difference of two numeric operands.
+ *
+ * Both operands must have the same numeric type, which will also be the result type. The result of
+ * integer overflow is the infinite-precision result modulo 2^n. Floating-point subtraction is performed
+ * according to IEEE-754.
+ */
+class SubInstruction extends BinaryArithmeticInstruction {
+ SubInstruction() { getOpcode() instanceof Opcode::Sub }
+}
+
+/**
+ * An instruction that computes the product of two numeric operands.
+ *
+ * Both operands must have the same numeric type, which will also be the result type. The result of
+ * integer overflow is the infinite-precision result modulo 2^n. Floating-point multiplication is
+ * performed according to IEEE-754.
+ */
+class MulInstruction extends BinaryArithmeticInstruction {
+ MulInstruction() { getOpcode() instanceof Opcode::Mul }
+}
+
+/**
+ * An instruction that computes the quotient of two numeric operands.
+ *
+ * Both operands must have the same numeric type, which will also be the result type. The result of
+ * division by zero or integer overflow is undefined. Floating-point division is performed according
+ * to IEEE-754.
+ */
+class DivInstruction extends BinaryArithmeticInstruction {
+ DivInstruction() { getOpcode() instanceof Opcode::Div }
+}
+
+/**
+ * An instruction that computes the remainder of two integer operands.
+ *
+ * Both operands must have the same integer type, which will also be the result type. The result of
+ * division by zero or integer overflow is undefined.
+ */
+class RemInstruction extends BinaryArithmeticInstruction {
+ RemInstruction() { getOpcode() instanceof Opcode::Rem }
+}
+
+/**
+ * An instruction that negates a single numeric operand.
+ *
+ * The operand must have a numeric type, which will also be the result type. The result of integer
+ * negation uses two's complement, and is computed modulo 2^n. The result of floating-point negation
+ * is performed according to IEEE-754.
+ */
+class NegateInstruction extends UnaryArithmeticInstruction {
+ NegateInstruction() { getOpcode() instanceof Opcode::Negate }
+}
+
+/**
+ * An instruction that computes the result of a bitwise operation.
+ */
+class BitwiseInstruction extends Instruction {
+ BitwiseInstruction() { getOpcode() instanceof BitwiseOpcode }
+}
+
+/**
+ * An instruction that performs a bitwise operation on two integer operands.
+ */
+class BinaryBitwiseInstruction extends BitwiseInstruction, BinaryInstruction { }
+
+/**
+ * An instruction that performs a bitwise operation on a single integer operand.
+ */
+class UnaryBitwiseInstruction extends BitwiseInstruction, UnaryInstruction { }
+
+/**
+ * An instruction that computes the bitwise "and" of two integer operands.
+ *
+ * Both operands must have the same integer type, which will also be the result type.
+ */
+class BitAndInstruction extends BinaryBitwiseInstruction {
+ BitAndInstruction() { getOpcode() instanceof Opcode::BitAnd }
+}
+
+/**
+ * An instruction that computes the bitwise "or" of two integer operands.
+ *
+ * Both operands must have the same integer type, which will also be the result type.
+ */
+class BitOrInstruction extends BinaryBitwiseInstruction {
+ BitOrInstruction() { getOpcode() instanceof Opcode::BitOr }
+}
+
+/**
+ * An instruction that computes the bitwise "xor" of two integer operands.
+ *
+ * Both operands must have the same integer type, which will also be the result type.
+ */
+class BitXorInstruction extends BinaryBitwiseInstruction {
+ BitXorInstruction() { getOpcode() instanceof Opcode::BitXor }
+}
+
+/**
+ * An instruction that shifts its left operand to the left by the number of bits specified by its
+ * right operand.
+ *
+ * Both operands must have an integer type. The result has the same type as the left operand. The
+ * rightmost bits are zero-filled.
+ */
+class ShiftLeftInstruction extends BinaryBitwiseInstruction {
+ ShiftLeftInstruction() { getOpcode() instanceof Opcode::ShiftLeft }
+}
+
+/**
+ * An instruction that shifts its left operand to the right by the number of bits specified by its
+ * right operand.
+ *
+ * Both operands must have an integer type. The result has the same type as the left operand. If the
+ * left operand has an unsigned integer type, the leftmost bits are zero-filled. If the left operand
+ * has a signed integer type, the leftmost bits are filled by duplicating the most significant bit
+ * of the left operand.
+ */
+class ShiftRightInstruction extends BinaryBitwiseInstruction {
+ ShiftRightInstruction() { getOpcode() instanceof Opcode::ShiftRight }
+}
+
+/**
+ * An instruction that performs a binary arithmetic operation involving at least one pointer
+ * operand.
+ */
+class PointerArithmeticInstruction extends BinaryInstruction {
+ int elementSize;
+
+ PointerArithmeticInstruction() {
+ getOpcode() instanceof PointerArithmeticOpcode and
+ elementSize = Raw::getInstructionElementSize(this)
+ }
+
+ final override string getImmediateString() { result = elementSize.toString() }
+
+ /**
+ * Gets the size of the elements pointed to by the pointer operands, in bytes.
+ *
+ * When adding an integer offset to a pointer (`PointerAddInstruction`) or subtracting an integer
+ * offset from a pointer (`PointerSubInstruction`), the integer offset is multiplied by the
+ * element size to compute the actual number of bytes added to or subtracted from the pointer
+ * address. When computing the integer difference between two pointers (`PointerDiffInstruction`),
+ * the result is computed by computing the difference between the two pointer byte addresses, then
+ * dividing that byte count by the element size.
+ */
+ final int getElementSize() { result = elementSize }
+}
+
+/**
+ * An instruction that adds or subtracts an integer offset from a pointer.
+ */
+class PointerOffsetInstruction extends PointerArithmeticInstruction {
+ PointerOffsetInstruction() { getOpcode() instanceof PointerOffsetOpcode }
+}
+
+/**
+ * An instruction that adds an integer offset to a pointer.
+ *
+ * The result is the byte address computed by adding the value of the right (integer) operand,
+ * multiplied by the element size, to the value of the left (pointer) operand. The result of pointer
+ * overflow is undefined.
+ */
+class PointerAddInstruction extends PointerOffsetInstruction {
+ PointerAddInstruction() { getOpcode() instanceof Opcode::PointerAdd }
+}
+
+/**
+ * An instruction that subtracts an integer offset from a pointer.
+ *
+ * The result is the byte address computed by subtracting the value of the right (integer) operand,
+ * multiplied by the element size, from the value of the left (pointer) operand. The result of
+ * pointer underflow is undefined.
+ */
+class PointerSubInstruction extends PointerOffsetInstruction {
+ PointerSubInstruction() { getOpcode() instanceof Opcode::PointerSub }
+}
+
+/**
+ * An instruction that computes the difference between two pointers.
+ *
+ * Both operands must have the same pointer type. The result must have an integer type whose size is
+ * the same as that of the pointer operands. The result is computed by subtracting the byte address
+ * in the right operand from the byte address in the left operand, and dividing by the element size.
+ * If the difference in byte addresses is not divisible by the element size, the result is
+ * undefined.
+ */
+class PointerDiffInstruction extends PointerArithmeticInstruction {
+ PointerDiffInstruction() { getOpcode() instanceof Opcode::PointerDiff }
+}
+
+/**
+ * An instruction whose result is computed from a single operand.
+ */
+class UnaryInstruction extends Instruction {
+ UnaryInstruction() { getOpcode() instanceof UnaryOpcode }
+
+ /**
+ * Gets the sole operand of this instruction.
+ */
+ final UnaryOperand getUnaryOperand() { result = getAnOperand() }
+
+ /**
+ * Gets the instruction whose result provides the sole operand of this instruction.
+ */
+ final Instruction getUnary() { result = getUnaryOperand().getDef() }
+}
+
+/**
+ * An instruction that converts the value of its operand to a value of a different type.
+ */
+class ConvertInstruction extends UnaryInstruction {
+ ConvertInstruction() { getOpcode() instanceof Opcode::Convert }
+}
+
+/**
+ * An instruction that converts the address of a polymorphic object to the address of a different
+ * subobject of the same polymorphic object, returning a null address if the dynamic type of the
+ * object is not compatible with the result type.
+ *
+ * If the operand holds a null address, the result is a null address.
+ *
+ * This instruction is used to represent a C++ `dynamic_cast<>` to a pointer type, or a C# `is` or
+ * `as` expression.
+ */
+class CheckedConvertOrNullInstruction extends UnaryInstruction {
+ CheckedConvertOrNullInstruction() { getOpcode() instanceof Opcode::CheckedConvertOrNull }
+}
+
+/**
+ * An instruction that converts the address of a polymorphic object to the address of a different
+ * subobject of the same polymorphic object, throwing an exception if the dynamic type of the object
+ * is not compatible with the result type.
+ *
+ * If the operand holds a null address, the result is a null address.
+ *
+ * This instruction is used to represent a C++ `dynamic_cast<>` to a reference type, or a C# cast
+ * expression.
+ */
+class CheckedConvertOrThrowInstruction extends UnaryInstruction {
+ CheckedConvertOrThrowInstruction() { getOpcode() instanceof Opcode::CheckedConvertOrThrow }
+}
+
+/**
+ * An instruction that returns the address of the complete object that contains the subobject
+ * pointed to by its operand.
+ *
+ * If the operand holds a null address, the result is a null address.
+ *
+ * This instruction is used to represent `dyanmic_cast` in C++, which returns the pointer to
+ * the most-derived object.
+ */
+class CompleteObjectAddressInstruction extends UnaryInstruction {
+ CompleteObjectAddressInstruction() { getOpcode() instanceof Opcode::CompleteObjectAddress }
+}
+
+/**
+ * An instruction that converts the address of an object to the address of a different subobject of
+ * the same object, without any type checking at runtime.
+ */
+class InheritanceConversionInstruction extends UnaryInstruction {
+ Language::Class baseClass;
+ Language::Class derivedClass;
+
+ InheritanceConversionInstruction() {
+ Raw::getInstructionInheritance(this, baseClass, derivedClass)
+ }
+
+ final override string getImmediateString() {
+ result = derivedClass.toString() + " : " + baseClass.toString()
+ }
+
+ /**
+ * Gets the `ClassDerivation` for the inheritance relationship between
+ * the base and derived classes. This predicate does not hold if the
+ * conversion is to an indirect virtual base class.
+ */
+ final Language::ClassDerivation getDerivation() {
+ result.getBaseClass() = baseClass and result.getDerivedClass() = derivedClass
+ }
+
+ /**
+ * Gets the base class of the conversion. This will be either a direct
+ * base class of the derived class, or a virtual base class of the
+ * derived class.
+ */
+ final Language::Class getBaseClass() { result = baseClass }
+
+ /**
+ * Gets the derived class of the conversion.
+ */
+ final Language::Class getDerivedClass() { result = derivedClass }
+}
+
+/**
+ * An instruction that converts from the address of a derived class to the address of a base class.
+ */
+class ConvertToBaseInstruction extends InheritanceConversionInstruction {
+ ConvertToBaseInstruction() { getOpcode() instanceof ConvertToBaseOpcode }
+}
+
+/**
+ * An instruction that converts from the address of a derived class to the address of a direct
+ * non-virtual base class.
+ *
+ * If the operand holds a null address, the result is a null address.
+ */
+class ConvertToNonVirtualBaseInstruction extends ConvertToBaseInstruction {
+ ConvertToNonVirtualBaseInstruction() { getOpcode() instanceof Opcode::ConvertToNonVirtualBase }
+}
+
+/**
+ * An instruction that converts from the address of a derived class to the address of a virtual base
+ * class.
+ *
+ * If the operand holds a null address, the result is a null address.
+ */
+class ConvertToVirtualBaseInstruction extends ConvertToBaseInstruction {
+ ConvertToVirtualBaseInstruction() { getOpcode() instanceof Opcode::ConvertToVirtualBase }
+}
+
+/**
+ * An instruction that converts from the address of a base class to the address of a direct
+ * non-virtual derived class.
+ *
+ * If the operand holds a null address, the result is a null address.
+ */
+class ConvertToDerivedInstruction extends InheritanceConversionInstruction {
+ ConvertToDerivedInstruction() { getOpcode() instanceof Opcode::ConvertToDerived }
+}
+
+/**
+ * An instruction that computes the bitwise complement of its operand.
+ *
+ * The operand must have an integer type, which will also be the result type.
+ */
+class BitComplementInstruction extends UnaryBitwiseInstruction {
+ BitComplementInstruction() { getOpcode() instanceof Opcode::BitComplement }
+}
+
+/**
+ * An instruction that computes the logical complement of its operand.
+ *
+ * The operand must have a Boolean type, which will also be the result type.
+ */
+class LogicalNotInstruction extends UnaryInstruction {
+ LogicalNotInstruction() { getOpcode() instanceof Opcode::LogicalNot }
+}
+
+/**
+ * An instruction that compares two numeric operands.
+ */
+class CompareInstruction extends BinaryInstruction {
+ CompareInstruction() { getOpcode() instanceof CompareOpcode }
+}
+
+/**
+ * An instruction that returns a `true` result if its operands are equal.
+ *
+ * Both operands must have the same numeric or address type. The result must have a Boolean type.
+ * The result is `true` if `left == right`, and `false` if `left != right` or the two operands are
+ * unordered. Floating-point comparison is performed according to IEEE-754.
+ */
+class CompareEQInstruction extends CompareInstruction {
+ CompareEQInstruction() { getOpcode() instanceof Opcode::CompareEQ }
+}
+
+/**
+ * An instruction that returns a `true` result if its operands are not equal.
+ *
+ * Both operands must have the same numeric or address type. The result must have a Boolean type.
+ * The result is `true` if `left != right` or if the two operands are unordered, and `false` if
+ * `left == right`. Floating-point comparison is performed according to IEEE-754.
+ */
+class CompareNEInstruction extends CompareInstruction {
+ CompareNEInstruction() { getOpcode() instanceof Opcode::CompareNE }
+}
+
+/**
+ * An instruction that does a relative comparison of two values, such as `<` or `>=`.
+ */
+class RelationalInstruction extends CompareInstruction {
+ RelationalInstruction() { getOpcode() instanceof RelationalOpcode }
+
+ /**
+ * Gets the operand on the "greater" (or "greater-or-equal") side
+ * of this relational instruction, that is, the side that is larger
+ * if the overall instruction evaluates to `true`; for example on
+ * `x <= 20` this is the `20`, and on `y > 0` it is `y`.
+ */
+ Instruction getGreater() { none() }
+
+ /**
+ * Gets the operand on the "lesser" (or "lesser-or-equal") side
+ * of this relational instruction, that is, the side that is smaller
+ * if the overall instruction evaluates to `true`; for example on
+ * `x <= 20` this is `x`, and on `y > 0` it is the `0`.
+ */
+ Instruction getLesser() { none() }
+
+ /**
+ * Holds if this relational instruction is strict (is not an "or-equal" instruction).
+ */
+ predicate isStrict() { none() }
+}
+
+/**
+ * An instruction that returns a `true` result if its left operand is less than its right operand.
+ *
+ * Both operands must have the same numeric or address type. The result must have a Boolean type.
+ * The result is `true` if the `left < right`, and `false` if `left >= right` or if the two operands
+ * are unordered. Floating-point comparison is performed according to IEEE-754.
+ */
+class CompareLTInstruction extends RelationalInstruction {
+ CompareLTInstruction() { getOpcode() instanceof Opcode::CompareLT }
+
+ override Instruction getLesser() { result = getLeft() }
+
+ override Instruction getGreater() { result = getRight() }
+
+ override predicate isStrict() { any() }
+}
+
+/**
+ * An instruction that returns a `true` result if its left operand is greater than its right operand.
+ *
+ * Both operands must have the same numeric or address type. The result must have a Boolean type.
+ * The result is `true` if the `left > right`, and `false` if `left <= right` or if the two operands
+ * are unordered. Floating-point comparison is performed according to IEEE-754.
+ */
+class CompareGTInstruction extends RelationalInstruction {
+ CompareGTInstruction() { getOpcode() instanceof Opcode::CompareGT }
+
+ override Instruction getLesser() { result = getRight() }
+
+ override Instruction getGreater() { result = getLeft() }
+
+ override predicate isStrict() { any() }
+}
+
+/**
+ * An instruction that returns a `true` result if its left operand is less than or equal to its
+ * right operand.
+ *
+ * Both operands must have the same numeric or address type. The result must have a Boolean type.
+ * The result is `true` if the `left <= right`, and `false` if `left > right` or if the two operands
+ * are unordered. Floating-point comparison is performed according to IEEE-754.
+ */
+class CompareLEInstruction extends RelationalInstruction {
+ CompareLEInstruction() { getOpcode() instanceof Opcode::CompareLE }
+
+ override Instruction getLesser() { result = getLeft() }
+
+ override Instruction getGreater() { result = getRight() }
+
+ override predicate isStrict() { none() }
+}
+
+/**
+ * An instruction that returns a `true` result if its left operand is greater than or equal to its
+ * right operand.
+ *
+ * Both operands must have the same numeric or address type. The result must have a Boolean type.
+ * The result is `true` if the `left >= right`, and `false` if `left < right` or if the two operands
+ * are unordered. Floating-point comparison is performed according to IEEE-754.
+ */
+class CompareGEInstruction extends RelationalInstruction {
+ CompareGEInstruction() { getOpcode() instanceof Opcode::CompareGE }
+
+ override Instruction getLesser() { result = getRight() }
+
+ override Instruction getGreater() { result = getLeft() }
+
+ override predicate isStrict() { none() }
+}
+
+/**
+ * An instruction that branches to one of multiple successor instructions based on the value of an
+ * integer operand.
+ *
+ * This instruction will have zero or more successors whose edge kind is `CaseEdge`, each
+ * representing the branch that will be taken if the controlling expression is within the range
+ * specified for that case edge. The range of a case edge must be disjoint from the range of each
+ * other case edge.
+ *
+ * The instruction may optionally have a successor edge whose edge kind is `DefaultEdge`,
+ * representing the branch that will be taken if the controlling expression is not within the range
+ * of any case edge.
+ */
+class SwitchInstruction extends Instruction {
+ SwitchInstruction() { getOpcode() instanceof Opcode::Switch }
+
+ /** Gets the operand that provides the integer value controlling the switch. */
+ final ConditionOperand getExpressionOperand() { result = getAnOperand() }
+
+ /** Gets the instruction whose result provides the integer value controlling the switch. */
+ final Instruction getExpression() { result = getExpressionOperand().getDef() }
+
+ /** Gets the successor instructions along the case edges of the switch. */
+ final Instruction getACaseSuccessor() { exists(CaseEdge edge | result = getSuccessor(edge)) }
+
+ /** Gets the successor instruction along the default edge of the switch, if any. */
+ final Instruction getDefaultSuccessor() { result = getSuccessor(EdgeKind::defaultEdge()) }
+}
+
+/**
+ * An instruction that calls a function.
+ */
+class CallInstruction extends Instruction {
+ CallInstruction() { getOpcode() instanceof Opcode::Call }
+
+ final override string getImmediateString() {
+ result = getStaticCallTarget().toString()
+ or
+ not exists(getStaticCallTarget()) and result = "?"
+ }
+
+ /**
+ * Gets the operand the specifies the target function of the call.
+ */
+ final CallTargetOperand getCallTargetOperand() { result = getAnOperand() }
+
+ /**
+ * Gets the `Instruction` that computes the target function of the call. This is usually a
+ * `FunctionAddress` instruction, but can also be an arbitrary instruction that produces a
+ * function pointer.
+ */
+ final Instruction getCallTarget() { result = getCallTargetOperand().getDef() }
+
+ /**
+ * Gets all of the argument operands of the call, including the `this` pointer, if any.
+ */
+ final ArgumentOperand getAnArgumentOperand() { result = getAnOperand() }
+
+ /**
+ * Gets the `Function` that the call targets, if this is statically known.
+ */
+ final Language::Function getStaticCallTarget() {
+ result = getCallTarget().(FunctionAddressInstruction).getFunctionSymbol()
+ }
+
+ /**
+ * Gets all of the arguments of the call, including the `this` pointer, if any.
+ */
+ final Instruction getAnArgument() { result = getAnArgumentOperand().getDef() }
+
+ /**
+ * Gets the `this` pointer argument operand of the call, if any.
+ */
+ final ThisArgumentOperand getThisArgumentOperand() { result = getAnOperand() }
+
+ /**
+ * Gets the `this` pointer argument of the call, if any.
+ */
+ final Instruction getThisArgument() { result = getThisArgumentOperand().getDef() }
+
+ /**
+ * Gets the argument operand at the specified index.
+ */
+ pragma[noinline]
+ final PositionalArgumentOperand getPositionalArgumentOperand(int index) {
+ result = getAnOperand() and
+ result.getIndex() = index
+ }
+
+ /**
+ * Gets the argument at the specified index.
+ */
+ pragma[noinline]
+ final Instruction getPositionalArgument(int index) {
+ result = getPositionalArgumentOperand(index).getDef()
+ }
+
+ /**
+ * Gets the argument operand at the specified index, or `this` if `index` is `-1`.
+ */
+ pragma[noinline]
+ final ArgumentOperand getArgumentOperand(int index) {
+ index >= 0 and result = getPositionalArgumentOperand(index)
+ or
+ index = -1 and result = getThisArgumentOperand()
+ }
+
+ /**
+ * Gets the argument at the specified index, or `this` if `index` is `-1`.
+ */
+ pragma[noinline]
+ final Instruction getArgument(int index) { result = getArgumentOperand(index).getDef() }
+
+ /**
+ * Gets the number of arguments of the call, including the `this` pointer, if any.
+ */
+ final int getNumberOfArguments() { result = count(this.getAnArgumentOperand()) }
+
+ /**
+ * Holds if the result is a side effect for the argument at the specified index, or `this` if
+ * `index` is `-1`.
+ *
+ * This helper predicate makes it easy to join on both of these columns at once, avoiding
+ * pathological join orders in case the argument index should get joined first.
+ */
+ pragma[noinline]
+ final SideEffectInstruction getAParameterSideEffect(int index) {
+ this = result.getPrimaryInstruction() and
+ index = result.(IndexedInstruction).getIndex()
+ }
+}
+
+/**
+ * An instruction representing a side effect of a function call.
+ */
+class SideEffectInstruction extends Instruction {
+ SideEffectInstruction() { getOpcode() instanceof SideEffectOpcode }
+
+ /**
+ * Gets the instruction whose execution causes this side effect.
+ */
+ final Instruction getPrimaryInstruction() {
+ result = Construction::getPrimaryInstructionForSideEffect(this)
+ }
+}
+
+/**
+ * An instruction representing the side effect of a function call on any memory that might be
+ * accessed by that call.
+ */
+class CallSideEffectInstruction extends SideEffectInstruction {
+ CallSideEffectInstruction() { getOpcode() instanceof Opcode::CallSideEffect }
+}
+
+/**
+ * An instruction representing the side effect of a function call on any memory
+ * that might be read by that call.
+ *
+ * This instruction is emitted instead of `CallSideEffectInstruction` when it is certain that the
+ * call target cannot write to escaped memory.
+ */
+class CallReadSideEffectInstruction extends SideEffectInstruction {
+ CallReadSideEffectInstruction() { getOpcode() instanceof Opcode::CallReadSideEffect }
+}
+
+/**
+ * An instruction representing a read side effect of a function call on a
+ * specific parameter.
+ */
+class ReadSideEffectInstruction extends SideEffectInstruction, IndexedInstruction {
+ ReadSideEffectInstruction() { getOpcode() instanceof ReadSideEffectOpcode }
+
+ /** Gets the operand for the value that will be read from this instruction, if known. */
+ final SideEffectOperand getSideEffectOperand() { result = getAnOperand() }
+
+ /** Gets the value that will be read from this instruction, if known. */
+ final Instruction getSideEffect() { result = getSideEffectOperand().getDef() }
+
+ /** Gets the operand for the address from which this instruction may read. */
+ final AddressOperand getArgumentOperand() { result = getAnOperand() }
+
+ /** Gets the address from which this instruction may read. */
+ final Instruction getArgumentDef() { result = getArgumentOperand().getDef() }
+}
+
+/**
+ * An instruction representing the read of an indirect parameter within a function call.
+ */
+class IndirectReadSideEffectInstruction extends ReadSideEffectInstruction {
+ IndirectReadSideEffectInstruction() { getOpcode() instanceof Opcode::IndirectReadSideEffect }
+}
+
+/**
+ * An instruction representing the read of an indirect buffer parameter within a function call.
+ */
+class BufferReadSideEffectInstruction extends ReadSideEffectInstruction {
+ BufferReadSideEffectInstruction() { getOpcode() instanceof Opcode::BufferReadSideEffect }
+}
+
+/**
+ * An instruction representing the read of an indirect buffer parameter within a function call.
+ */
+class SizedBufferReadSideEffectInstruction extends ReadSideEffectInstruction {
+ SizedBufferReadSideEffectInstruction() {
+ getOpcode() instanceof Opcode::SizedBufferReadSideEffect
+ }
+
+ /**
+ * Gets the operand that holds the number of bytes read from the buffer.
+ */
+ final BufferSizeOperand getBufferSizeOperand() { result = getAnOperand() }
+
+ /**
+ * Gets the instruction whose result provides the number of bytes read from the buffer.
+ */
+ final Instruction getBufferSize() { result = getBufferSizeOperand().getDef() }
+}
+
+/**
+ * An instruction representing a write side effect of a function call on a
+ * specific parameter.
+ */
+class WriteSideEffectInstruction extends SideEffectInstruction, IndexedInstruction {
+ WriteSideEffectInstruction() { getOpcode() instanceof WriteSideEffectOpcode }
+
+ /**
+ * Get the operand that holds the address of the memory to be written.
+ */
+ final AddressOperand getDestinationAddressOperand() { result = getAnOperand() }
+
+ /**
+ * Gets the instruction whose result provides the address of the memory to be written.
+ */
+ Instruction getDestinationAddress() { result = getDestinationAddressOperand().getDef() }
+}
+
+/**
+ * An instruction representing the write of an indirect parameter within a function call.
+ */
+class IndirectMustWriteSideEffectInstruction extends WriteSideEffectInstruction {
+ IndirectMustWriteSideEffectInstruction() {
+ getOpcode() instanceof Opcode::IndirectMustWriteSideEffect
+ }
+}
+
+/**
+ * An instruction representing the write of an indirect buffer parameter within a function call. The
+ * entire buffer is overwritten.
+ */
+class BufferMustWriteSideEffectInstruction extends WriteSideEffectInstruction {
+ BufferMustWriteSideEffectInstruction() {
+ getOpcode() instanceof Opcode::BufferMustWriteSideEffect
+ }
+}
+
+/**
+ * An instruction representing the write of an indirect buffer parameter within a function call. The
+ * entire buffer is overwritten.
+ */
+class SizedBufferMustWriteSideEffectInstruction extends WriteSideEffectInstruction {
+ SizedBufferMustWriteSideEffectInstruction() {
+ getOpcode() instanceof Opcode::SizedBufferMustWriteSideEffect
+ }
+
+ /**
+ * Gets the operand that holds the number of bytes written to the buffer.
+ */
+ final BufferSizeOperand getBufferSizeOperand() { result = getAnOperand() }
+
+ /**
+ * Gets the instruction whose result provides the number of bytes written to the buffer.
+ */
+ final Instruction getBufferSize() { result = getBufferSizeOperand().getDef() }
+}
+
+/**
+ * An instruction representing the potential write of an indirect parameter within a function call.
+ *
+ * Unlike `IndirectWriteSideEffectInstruction`, the location might not be completely overwritten.
+ * written.
+ */
+class IndirectMayWriteSideEffectInstruction extends WriteSideEffectInstruction {
+ IndirectMayWriteSideEffectInstruction() {
+ getOpcode() instanceof Opcode::IndirectMayWriteSideEffect
+ }
+}
+
+/**
+ * An instruction representing the write of an indirect buffer parameter within a function call.
+ *
+ * Unlike `BufferWriteSideEffectInstruction`, the buffer might not be completely overwritten.
+ */
+class BufferMayWriteSideEffectInstruction extends WriteSideEffectInstruction {
+ BufferMayWriteSideEffectInstruction() { getOpcode() instanceof Opcode::BufferMayWriteSideEffect }
+}
+
+/**
+ * An instruction representing the write of an indirect buffer parameter within a function call.
+ *
+ * Unlike `BufferWriteSideEffectInstruction`, the buffer might not be completely overwritten.
+ */
+class SizedBufferMayWriteSideEffectInstruction extends WriteSideEffectInstruction {
+ SizedBufferMayWriteSideEffectInstruction() {
+ getOpcode() instanceof Opcode::SizedBufferMayWriteSideEffect
+ }
+
+ /**
+ * Gets the operand that holds the number of bytes written to the buffer.
+ */
+ final BufferSizeOperand getBufferSizeOperand() { result = getAnOperand() }
+
+ /**
+ * Gets the instruction whose result provides the number of bytes written to the buffer.
+ */
+ final Instruction getBufferSize() { result = getBufferSizeOperand().getDef() }
+}
+
+/**
+ * An instruction representing the initial value of newly allocated memory, such as the result of a
+ * call to `malloc`.
+ */
+class InitializeDynamicAllocationInstruction extends SideEffectInstruction {
+ InitializeDynamicAllocationInstruction() {
+ getOpcode() instanceof Opcode::InitializeDynamicAllocation
+ }
+
+ /**
+ * Gets the operand that represents the address of the allocation this instruction is initializing.
+ */
+ final AddressOperand getAllocationAddressOperand() { result = getAnOperand() }
+
+ /**
+ * Gets the address for the allocation this instruction is initializing.
+ */
+ final Instruction getAllocationAddress() { result = getAllocationAddressOperand().getDef() }
+}
+
+/**
+ * An instruction representing a GNU or MSVC inline assembly statement.
+ */
+class InlineAsmInstruction extends Instruction {
+ InlineAsmInstruction() { getOpcode() instanceof Opcode::InlineAsm }
+}
+
+/**
+ * An instruction that throws an exception.
+ */
+class ThrowInstruction extends Instruction {
+ ThrowInstruction() { getOpcode() instanceof ThrowOpcode }
+}
+
+/**
+ * An instruction that throws a new exception.
+ */
+class ThrowValueInstruction extends ThrowInstruction {
+ ThrowValueInstruction() { getOpcode() instanceof Opcode::ThrowValue }
+
+ /**
+ * Gets the address operand of the exception thrown by this instruction.
+ */
+ final AddressOperand getExceptionAddressOperand() { result = getAnOperand() }
+
+ /**
+ * Gets the address of the exception thrown by this instruction.
+ */
+ final Instruction getExceptionAddress() { result = getExceptionAddressOperand().getDef() }
+
+ /**
+ * Gets the operand for the exception thrown by this instruction.
+ */
+ final LoadOperand getExceptionOperand() { result = getAnOperand() }
+
+ /**
+ * Gets the exception thrown by this instruction.
+ */
+ final Instruction getException() { result = getExceptionOperand().getDef() }
+}
+
+/**
+ * An instruction that re-throws the current exception.
+ */
+class ReThrowInstruction extends ThrowInstruction {
+ ReThrowInstruction() { getOpcode() instanceof Opcode::ReThrow }
+}
+
+/**
+ * An instruction that exits the current function by propagating an exception.
+ */
+class UnwindInstruction extends Instruction {
+ UnwindInstruction() { getOpcode() instanceof Opcode::Unwind }
+}
+
+/**
+ * An instruction that starts a `catch` handler.
+ */
+class CatchInstruction extends Instruction {
+ CatchInstruction() { getOpcode() instanceof CatchOpcode }
+}
+
+/**
+ * An instruction that catches an exception of a specific type.
+ */
+class CatchByTypeInstruction extends CatchInstruction {
+ Language::LanguageType exceptionType;
+
+ CatchByTypeInstruction() {
+ getOpcode() instanceof Opcode::CatchByType and
+ exceptionType = Raw::getInstructionExceptionType(this)
+ }
+
+ final override string getImmediateString() { result = exceptionType.toString() }
+
+ /**
+ * Gets the type of exception to be caught.
+ */
+ final Language::LanguageType getExceptionType() { result = exceptionType }
+}
+
+/**
+ * An instruction that catches any exception.
+ */
+class CatchAnyInstruction extends CatchInstruction {
+ CatchAnyInstruction() { getOpcode() instanceof Opcode::CatchAny }
+}
+
+/**
+ * An instruction that initializes all escaped memory.
+ */
+class AliasedDefinitionInstruction extends Instruction {
+ AliasedDefinitionInstruction() { getOpcode() instanceof Opcode::AliasedDefinition }
+}
+
+/**
+ * An instruction that consumes all escaped memory on exit from the function.
+ */
+class AliasedUseInstruction extends Instruction {
+ AliasedUseInstruction() { getOpcode() instanceof Opcode::AliasedUse }
+}
+
+/**
+ * An instruction representing the choice of one of multiple input values based on control flow.
+ *
+ * A `PhiInstruction` is inserted at the beginning of a block whenever two different definitions of
+ * the same variable reach that block. The `PhiInstruction` will have one operand corresponding to
+ * each control flow predecessor of the block, with that operand representing the version of the
+ * variable that flows from that predecessor. The result value of the `PhiInstruction` will be
+ * a copy of whichever operand corresponds to the actual predecessor that entered the block at
+ * runtime.
+ */
+class PhiInstruction extends Instruction {
+ PhiInstruction() { getOpcode() instanceof Opcode::Phi }
+
+ /**
+ * Gets all of the instruction's `PhiInputOperand`s, representing the values that flow from each predecessor block.
+ */
+ final PhiInputOperand getAnInputOperand() { result = this.getAnOperand() }
+
+ /**
+ * Gets an instruction that defines the input to one of the operands of this
+ * instruction. It's possible for more than one operand to have the same
+ * defining instruction, so this predicate will have the same number of
+ * results as `getAnInputOperand()` or fewer.
+ */
+ pragma[noinline]
+ final Instruction getAnInput() { result = this.getAnInputOperand().getDef() }
+
+ /**
+ * Gets the input operand representing the value that flows from the specified predecessor block.
+ */
+ final PhiInputOperand getInputOperand(IRBlock predecessorBlock) {
+ result = this.getAnOperand() and
+ result.getPredecessorBlock() = predecessorBlock
+ }
+}
+
+/**
+ * An instruction representing the effect that a write to a memory may have on potential aliases of
+ * that memory.
+ *
+ * A `ChiInstruction` is inserted immediately after an instruction that writes to memory. The
+ * `ChiInstruction` has two operands. The first operand, given by `getTotalOperand()`, represents
+ * the previous state of all of the memory that might be aliased by the memory write. The second
+ * operand, given by `getPartialOperand()`, represents the memory that was actually modified by the
+ * memory write. The result of the `ChiInstruction` represents the same memory as
+ * `getTotalOperand()`, updated to include the changes due to the value that was actually stored by
+ * the memory write.
+ *
+ * As an example, suppose that variable `p` and `q` are pointers that may or may not point to the
+ * same memory:
+ * ```
+ * *p = 5;
+ * x = *q;
+ * ```
+ *
+ * The IR would look like:
+ * ```
+ * r1_1 = VariableAddress[p]
+ * r1_2 = Load r1_1, m0_0 // Load the value of `p`
+ * r1_3 = Constant[5]
+ * m1_4 = Store r1_2, r1_3 // Store to `*p`
+ * m1_5 = ^Chi m0_1, m1_4 // Side effect of the previous Store on aliased memory
+ * r1_6 = VariableAddress[x]
+ * r1_7 = VariableAddress[q]
+ * r1_8 = Load r1_7, m0_2 // Load the value of `q`
+ * r1_9 = Load r1_8, m1_5 // Load the value of `*q`
+ * m1_10 = Store r1_6, r1_9 // Store to x
+ * ```
+ *
+ * Note the `Chi` instruction after the store to `*p`. The indicates that the previous contents of
+ * aliased memory (`m0_1`) are merged with the new value written by the store (`m1_4`), producing a
+ * new version of aliased memory (`m1_5`). On the subsequent load from `*q`, the source operand of
+ * `*q` is `m1_5`, indicating that the store to `*p` may (or may not) have updated the memory
+ * pointed to by `q`.
+ *
+ * For more information about how `Chi` instructions are used to model memory side effects, see
+ * https://link.springer.com/content/pdf/10.1007%2F3-540-61053-7_66.pdf.
+ */
+class ChiInstruction extends Instruction {
+ ChiInstruction() { getOpcode() instanceof Opcode::Chi }
+
+ /**
+ * Gets the operand that represents the previous state of all memory that might be aliased by the
+ * memory write.
+ */
+ final ChiTotalOperand getTotalOperand() { result = getAnOperand() }
+
+ /**
+ * Gets the operand that represents the previous state of all memory that might be aliased by the
+ * memory write.
+ */
+ final Instruction getTotal() { result = getTotalOperand().getDef() }
+
+ /**
+ * Gets the operand that represents the new value written by the memory write.
+ */
+ final ChiPartialOperand getPartialOperand() { result = getAnOperand() }
+
+ /**
+ * Gets the operand that represents the new value written by the memory write.
+ */
+ final Instruction getPartial() { result = getPartialOperand().getDef() }
+
+ /**
+ * Gets the bit range `[startBit, endBit)` updated by the partial operand of this `ChiInstruction`, relative to the start address of the total operand.
+ */
+ final predicate getUpdatedInterval(int startBit, int endBit) {
+ Construction::getIntervalUpdatedByChi(this, startBit, endBit)
+ }
+
+ /**
+ * Holds if the `ChiPartialOperand` totally, but not exactly, overlaps with the `ChiTotalOperand`.
+ * This means that the `ChiPartialOperand` will not override the entire memory associated with the
+ * `ChiTotalOperand`.
+ */
+ final predicate isPartialUpdate() { Construction::chiOnlyPartiallyUpdatesLocation(this) }
+}
+
+/**
+ * An instruction representing unreachable code.
+ *
+ * This instruction is inserted in place of the original target instruction of a `ConditionalBranch`
+ * or `Switch` instruction where that particular edge is infeasible.
+ */
+class UnreachedInstruction extends Instruction {
+ UnreachedInstruction() { getOpcode() instanceof Opcode::Unreached }
+}
+
+/**
+ * An instruction representing a built-in operation.
+ *
+ * This is used to represent a variety of intrinsic operations provided by the compiler
+ * implementation, such as vector arithmetic.
+ */
+class BuiltInOperationInstruction extends Instruction {
+ Language::BuiltInOperation operation;
+
+ BuiltInOperationInstruction() {
+ getOpcode() instanceof BuiltInOperationOpcode and
+ operation = Raw::getInstructionBuiltInOperation(this)
+ }
+
+ /**
+ * Gets the language-specific `BuiltInOperation` object that specifies the operation that is
+ * performed by this instruction.
+ */
+ final Language::BuiltInOperation getBuiltInOperation() { result = operation }
+}
+
+/**
+ * An instruction representing a built-in operation that does not have a specific opcode. The
+ * actual operation is specified by the `getBuiltInOperation()` predicate.
+ */
+class BuiltInInstruction extends BuiltInOperationInstruction {
+ BuiltInInstruction() { getOpcode() instanceof Opcode::BuiltIn }
+
+ final override string getImmediateString() { result = getBuiltInOperation().toString() }
+}
+
+/**
+ * An instruction that returns a `va_list` to access the arguments passed to the `...` parameter.
+ *
+ * The operand specifies the address of the `IREllipsisVariable` used to represent the `...`
+ * parameter. The result is a `va_list` that initially refers to the first argument that was passed
+ * to the `...` parameter.
+ */
+class VarArgsStartInstruction extends UnaryInstruction {
+ VarArgsStartInstruction() { getOpcode() instanceof Opcode::VarArgsStart }
+}
+
+/**
+ * An instruction that cleans up a `va_list` after it is no longer in use.
+ *
+ * The operand specifies the address of the `va_list` to clean up. This instruction does not return
+ * a result.
+ */
+class VarArgsEndInstruction extends UnaryInstruction {
+ VarArgsEndInstruction() { getOpcode() instanceof Opcode::VarArgsEnd }
+}
+
+/**
+ * An instruction that returns the address of the argument currently pointed to by a `va_list`.
+ *
+ * The operand is the `va_list` that points to the argument. The result is the address of the
+ * argument.
+ */
+class VarArgInstruction extends UnaryInstruction {
+ VarArgInstruction() { getOpcode() instanceof Opcode::VarArg }
+}
+
+/**
+ * An instruction that modifies a `va_list` to point to the next argument that was passed to the
+ * `...` parameter.
+ *
+ * The operand is the current `va_list`. The result is an updated `va_list` that points to the next
+ * argument of the `...` parameter.
+ */
+class NextVarArgInstruction extends UnaryInstruction {
+ NextVarArgInstruction() { getOpcode() instanceof Opcode::NextVarArg }
+}
+
+/**
+ * An instruction that allocates a new object on the managed heap.
+ *
+ * This instruction is used to represent the allocation of a new object in C# using the `new`
+ * expression. This instruction does not invoke a constructor for the object. Instead, there will be
+ * a subsequent `Call` instruction to invoke the appropriate constructor directory, passing the
+ * result of the `NewObj` as the `this` argument.
+ *
+ * The result is the address of the newly allocated object.
+ */
+class NewObjInstruction extends Instruction {
+ NewObjInstruction() { getOpcode() instanceof Opcode::NewObj }
+}
diff --git a/repo-tests/codeql/cpp/ql/lib/semmle/code/cpp/ir/implementation/unaliased_ssa/Operand.qll b/repo-tests/codeql/cpp/ql/lib/semmle/code/cpp/ir/implementation/unaliased_ssa/Operand.qll
new file mode 100644
index 00000000000..d7cf89ca9aa
--- /dev/null
+++ b/repo-tests/codeql/cpp/ql/lib/semmle/code/cpp/ir/implementation/unaliased_ssa/Operand.qll
@@ -0,0 +1,484 @@
+/**
+ * Provides classes that represent the input values of IR instructions.
+ */
+
+private import internal.IRInternal
+private import Instruction
+private import IRBlock
+private import internal.OperandImports as Imports
+private import Imports::MemoryAccessKind
+private import Imports::IRType
+private import Imports::Overlap
+private import Imports::OperandTag
+private import Imports::TOperand
+private import internal.OperandInternal
+
+/**
+ * An operand of an `Instruction` in this stage of the IR. Implemented as a union of the branches
+ * of `TOperand` that are used in this stage.
+ */
+private class TStageOperand =
+ TRegisterOperand or TNonSSAMemoryOperand or TPhiOperand or TChiOperand;
+
+/**
+ * An operand of an `Instruction`. The operand represents a use of the result of one instruction
+ * (the defining instruction) in another instruction (the use instruction)
+ */
+class Operand extends TStageOperand {
+ cached
+ Operand() {
+ // Ensure that the operand does not refer to instructions from earlier stages that are unreachable here
+ exists(Instruction use, Instruction def | this = registerOperand(use, _, def))
+ or
+ exists(Instruction use | this = nonSSAMemoryOperand(use, _))
+ or
+ exists(Instruction use, Instruction def, IRBlock predecessorBlock |
+ this = phiOperand(use, def, predecessorBlock, _) or
+ this = reusedPhiOperand(use, def, predecessorBlock, _)
+ )
+ or
+ exists(Instruction use | this = chiOperand(use, _))
+ }
+
+ /** Gets a textual representation of this element. */
+ string toString() { result = "Operand" }
+
+ /**
+ * Gets the location of the source code for this operand.
+ */
+ final Language::Location getLocation() { result = getUse().getLocation() }
+
+ /**
+ * Gets the function that contains this operand.
+ */
+ final IRFunction getEnclosingIRFunction() { result = getUse().getEnclosingIRFunction() }
+
+ /**
+ * Gets the `Instruction` that consumes this operand.
+ */
+ Instruction getUse() { none() }
+
+ /**
+ * Gets the `Instruction` whose result is the value of the operand. Unlike
+ * `getDef`, this also has a result when `isDefinitionInexact` holds, which
+ * means that the resulting instruction may only _partially_ or _potentially_
+ * be the value of this operand.
+ */
+ Instruction getAnyDef() { none() }
+
+ /**
+ * Gets the `Instruction` whose result is the value of the operand. Unlike
+ * `getAnyDef`, this also has no result when `isDefinitionInexact` holds,
+ * which means that the resulting instruction must always be exactly the be
+ * the value of this operand.
+ */
+ final Instruction getDef() {
+ result = this.getAnyDef() and
+ getDefinitionOverlap() instanceof MustExactlyOverlap
+ }
+
+ /**
+ * DEPRECATED: renamed to `getUse`.
+ *
+ * Gets the `Instruction` that consumes this operand.
+ */
+ deprecated final Instruction getUseInstruction() { result = getUse() }
+
+ /**
+ * DEPRECATED: use `getAnyDef` or `getDef`. The exact replacement for this
+ * predicate is `getAnyDef`, but most uses of this predicate should probably
+ * be replaced with `getDef`.
+ *
+ * Gets the `Instruction` whose result is the value of the operand.
+ */
+ deprecated final Instruction getDefinitionInstruction() { result = getAnyDef() }
+
+ /**
+ * Gets the overlap relationship between the operand's definition and its use.
+ */
+ Overlap getDefinitionOverlap() { none() }
+
+ /**
+ * Holds if the result of the definition instruction does not exactly overlap this use.
+ */
+ final predicate isDefinitionInexact() { not getDefinitionOverlap() instanceof MustExactlyOverlap }
+
+ /**
+ * Gets a prefix to use when dumping the operand in an operand list.
+ */
+ string getDumpLabel() { result = "" }
+
+ /**
+ * Gets a string that uniquely identifies this operand on its use instruction.
+ */
+ string getDumpId() { result = "" }
+
+ /**
+ * Gets a string describing this operand, suitable for display in IR dumps. This consists of the
+ * result ID of the instruction consumed by the operand, plus a label identifying the operand
+ * kind.
+ *
+ * For example: `this:r3_5`
+ */
+ final string getDumpString() {
+ result = getDumpLabel() + getInexactSpecifier() + getDefinitionId()
+ }
+
+ /**
+ * Gets a string containing the identifier of the definition of this use, or `m?` if the
+ * definition is not modeled in SSA.
+ */
+ private string getDefinitionId() {
+ result = getAnyDef().getResultId()
+ or
+ not exists(getAnyDef()) and result = "m?"
+ }
+
+ /**
+ * Gets a string prefix to prepend to the operand's definition ID in an IR dump, specifying whether the operand is
+ * an exact or inexact use of its definition. For an inexact use, the prefix is "~". For an exact use, the prefix is
+ * the empty string.
+ */
+ private string getInexactSpecifier() {
+ if isDefinitionInexact() then result = "~" else result = ""
+ }
+
+ /**
+ * Get the order in which the operand should be sorted in the operand list.
+ */
+ int getDumpSortOrder() { result = -1 }
+
+ /**
+ * Gets the type of the value consumed by this operand. This is usually the same as the
+ * result type of the definition instruction consumed by this operand. For register operands,
+ * this is always the case. For some memory operands, the operand type may be different from
+ * the definition type, such as in the case of a partial read or a read from a pointer that
+ * has been cast to a different type.
+ */
+ Language::LanguageType getLanguageType() { result = getAnyDef().getResultLanguageType() }
+
+ /**
+ * Gets the language-neutral type of the value consumed by this operand. This is usually the same
+ * as the result type of the definition instruction consumed by this operand. For register
+ * operands, this is always the case. For some memory operands, the operand type may be different
+ * from the definition type, such as in the case of a partial read or a read from a pointer that
+ * has been cast to a different type.
+ */
+ final IRType getIRType() { result = getLanguageType().getIRType() }
+
+ /**
+ * Gets the type of the value consumed by this operand. This is usually the same as the
+ * result type of the definition instruction consumed by this operand. For register operands,
+ * this is always the case. For some memory operands, the operand type may be different from
+ * the definition type, such as in the case of a partial read or a read from a pointer that
+ * has been cast to a different type.
+ */
+ final Language::Type getType() { getLanguageType().hasType(result, _) }
+
+ /**
+ * Holds if the value consumed by this operand is a glvalue. If this
+ * holds, the value of the operand represents the address of a location,
+ * and the type of the location is given by `getType()`. If this does
+ * not hold, the value of the operand represents a value whose type is
+ * given by `getType()`.
+ */
+ final predicate isGLValue() { getLanguageType().hasType(_, true) }
+
+ /**
+ * Gets the size of the value consumed by this operand, in bytes. If the operand does not have
+ * a known constant size, this predicate does not hold.
+ */
+ final int getSize() { result = getLanguageType().getByteSize() }
+}
+
+/**
+ * An operand that consumes a memory result (e.g. the `LoadOperand` on a `Load` instruction).
+ */
+class MemoryOperand extends Operand {
+ cached
+ MemoryOperand() {
+ this instanceof TNonSSAMemoryOperand or
+ this instanceof TPhiOperand or
+ this instanceof TChiOperand
+ }
+
+ /**
+ * Gets the kind of memory access performed by the operand.
+ */
+ MemoryAccessKind getMemoryAccess() { result = getUse().getOpcode().getReadMemoryAccess() }
+
+ /**
+ * Holds if the memory access performed by this operand will not always read from every bit in the
+ * memory location. This is most commonly used for memory accesses that may or may not actually
+ * occur depending on runtime state (for example, the write side effect of an output parameter
+ * that is not written to on all paths), or for accesses where the memory location is a
+ * conservative estimate of the memory that might actually be accessed at runtime (for example,
+ * the global side effects of a function call).
+ */
+ predicate hasMayReadMemoryAccess() { getUse().getOpcode().hasMayReadMemoryAccess() }
+
+ /**
+ * Returns the operand that holds the memory address from which the current operand loads its
+ * value, if any. For example, in `r3 = Load r1, m2`, the result of `getAddressOperand()` for `m2`
+ * is `r1`.
+ */
+ final AddressOperand getAddressOperand() {
+ getMemoryAccess().usesAddressOperand() and
+ result.getUse() = getUse()
+ }
+}
+
+/**
+ * An operand that is not an operand of a `PhiInstruction`.
+ */
+class NonPhiOperand extends Operand {
+ Instruction useInstr;
+ OperandTag tag;
+
+ NonPhiOperand() {
+ this = registerOperand(useInstr, tag, _) or
+ this = nonSSAMemoryOperand(useInstr, tag) or
+ this = chiOperand(useInstr, tag)
+ }
+
+ final override Instruction getUse() { result = useInstr }
+
+ final override string getDumpLabel() { result = tag.getLabel() }
+
+ final override string getDumpId() { result = tag.getId() }
+
+ final override int getDumpSortOrder() { result = tag.getSortOrder() }
+
+ /**
+ * Gets the `OperandTag` that specifies how this operand is used by its `Instruction`.
+ */
+ final OperandTag getOperandTag() { result = tag }
+}
+
+/**
+ * An operand that consumes a register (non-memory) result.
+ */
+class RegisterOperand extends NonPhiOperand, TRegisterOperand {
+ override RegisterOperandTag tag;
+ Instruction defInstr;
+
+ cached
+ RegisterOperand() { this = registerOperand(useInstr, tag, defInstr) }
+
+ final override string toString() { result = tag.toString() }
+
+ final override Instruction getAnyDef() { result = defInstr }
+
+ final override Overlap getDefinitionOverlap() {
+ // All register results overlap exactly with their uses.
+ result instanceof MustExactlyOverlap
+ }
+}
+
+/**
+ * A memory operand other than the operand of a `Phi` instruction.
+ */
+class NonPhiMemoryOperand extends NonPhiOperand, MemoryOperand, TNonPhiMemoryOperand {
+ override MemoryOperandTag tag;
+
+ cached
+ NonPhiMemoryOperand() {
+ this = nonSSAMemoryOperand(useInstr, tag)
+ or
+ this = chiOperand(useInstr, tag)
+ }
+
+ final override string toString() { result = tag.toString() }
+
+ final override Instruction getAnyDef() {
+ result = unique(Instruction defInstr | hasDefinition(defInstr, _))
+ }
+
+ final override Overlap getDefinitionOverlap() { hasDefinition(_, result) }
+
+ pragma[noinline]
+ private predicate hasDefinition(Instruction defInstr, Overlap overlap) {
+ defInstr = Construction::getMemoryOperandDefinition(useInstr, tag, overlap) and
+ not Construction::isInCycle(useInstr) and
+ strictcount(Construction::getMemoryOperandDefinition(useInstr, tag, _)) = 1
+ }
+
+ /**
+ * Holds if the operand totally overlaps with its definition and consumes the
+ * bit range `[startBitOffset, endBitOffset)` relative to the start address of the definition.
+ */
+ predicate getUsedInterval(int startBitOffset, int endBitOffset) {
+ Construction::getUsedInterval(this, startBitOffset, endBitOffset)
+ }
+}
+
+/**
+ * A memory operand whose type may be different from the type of the result of its definition.
+ */
+class TypedOperand extends NonPhiMemoryOperand {
+ override TypedOperandTag tag;
+
+ final override Language::LanguageType getLanguageType() {
+ result = Construction::getInstructionOperandType(useInstr, tag)
+ }
+}
+
+/**
+ * The address operand of an instruction that loads or stores a value from
+ * memory (e.g. `Load`, `Store`).
+ */
+class AddressOperand extends RegisterOperand {
+ override AddressOperandTag tag;
+}
+
+/**
+ * The buffer size operand of an instruction that represents a read or write of
+ * a buffer.
+ */
+class BufferSizeOperand extends RegisterOperand {
+ override BufferSizeOperandTag tag;
+}
+
+/**
+ * The source value operand of an instruction that loads a value from memory (e.g. `Load`,
+ * `ReturnValue`, `ThrowValue`).
+ */
+class LoadOperand extends TypedOperand {
+ override LoadOperandTag tag;
+}
+
+/**
+ * The source value operand of a `Store` instruction.
+ */
+class StoreValueOperand extends RegisterOperand {
+ override StoreValueOperandTag tag;
+}
+
+/**
+ * The sole operand of a unary instruction (e.g. `Convert`, `Negate`, `Copy`).
+ */
+class UnaryOperand extends RegisterOperand {
+ override UnaryOperandTag tag;
+}
+
+/**
+ * The left operand of a binary instruction (e.g. `Add`, `CompareEQ`).
+ */
+class LeftOperand extends RegisterOperand {
+ override LeftOperandTag tag;
+}
+
+/**
+ * The right operand of a binary instruction (e.g. `Add`, `CompareEQ`).
+ */
+class RightOperand extends RegisterOperand {
+ override RightOperandTag tag;
+}
+
+/**
+ * The condition operand of a `ConditionalBranch` or `Switch` instruction.
+ */
+class ConditionOperand extends RegisterOperand {
+ override ConditionOperandTag tag;
+}
+
+/**
+ * The operand representing the target function of an `Call` instruction.
+ */
+class CallTargetOperand extends RegisterOperand {
+ override CallTargetOperandTag tag;
+}
+
+/**
+ * An operand representing an argument to a function call. This includes both
+ * positional arguments (represented by `PositionalArgumentOperand`) and the
+ * implicit `this` argument, if any (represented by `ThisArgumentOperand`).
+ */
+class ArgumentOperand extends RegisterOperand {
+ override ArgumentOperandTag tag;
+}
+
+/**
+ * An operand representing the implicit 'this' argument to a member function
+ * call.
+ */
+class ThisArgumentOperand extends ArgumentOperand {
+ override ThisArgumentOperandTag tag;
+}
+
+/**
+ * An operand representing an argument to a function call.
+ */
+class PositionalArgumentOperand extends ArgumentOperand {
+ override PositionalArgumentOperandTag tag;
+
+ /**
+ * Gets the zero-based index of the argument.
+ */
+ final int getIndex() { result = tag.getArgIndex() }
+}
+
+/**
+ * An operand representing memory read as a side effect of evaluating another instruction.
+ */
+class SideEffectOperand extends TypedOperand {
+ override SideEffectOperandTag tag;
+}
+
+/**
+ * An operand of a `PhiInstruction`.
+ */
+class PhiInputOperand extends MemoryOperand, TPhiOperand {
+ PhiInstruction useInstr;
+ Instruction defInstr;
+ IRBlock predecessorBlock;
+ Overlap overlap;
+
+ cached
+ PhiInputOperand() {
+ this = phiOperand(useInstr, defInstr, predecessorBlock, overlap)
+ or
+ this = reusedPhiOperand(useInstr, defInstr, predecessorBlock, overlap)
+ }
+
+ override string toString() { result = "Phi" }
+
+ final override PhiInstruction getUse() { result = useInstr }
+
+ final override Instruction getAnyDef() { result = defInstr }
+
+ final override Overlap getDefinitionOverlap() { result = overlap }
+
+ final override int getDumpSortOrder() { result = 11 + getPredecessorBlock().getDisplayIndex() }
+
+ final override string getDumpLabel() {
+ result = "from " + getPredecessorBlock().getDisplayIndex().toString() + ":"
+ }
+
+ final override string getDumpId() { result = getPredecessorBlock().getDisplayIndex().toString() }
+
+ /**
+ * Gets the predecessor block from which this value comes.
+ */
+ final IRBlock getPredecessorBlock() { result = predecessorBlock }
+
+ final override MemoryAccessKind getMemoryAccess() { result instanceof PhiMemoryAccess }
+}
+
+/**
+ * The total operand of a Chi node, representing the previous value of the memory.
+ */
+class ChiTotalOperand extends NonPhiMemoryOperand {
+ override ChiTotalOperandTag tag;
+
+ final override MemoryAccessKind getMemoryAccess() { result instanceof ChiTotalMemoryAccess }
+}
+
+/**
+ * The partial operand of a Chi node, representing the value being written to part of the memory.
+ */
+class ChiPartialOperand extends NonPhiMemoryOperand {
+ override ChiPartialOperandTag tag;
+
+ final override MemoryAccessKind getMemoryAccess() { result instanceof ChiPartialMemoryAccess }
+}
diff --git a/repo-tests/codeql/cpp/ql/lib/semmle/code/cpp/ir/implementation/unaliased_ssa/PrintIR.ql b/repo-tests/codeql/cpp/ql/lib/semmle/code/cpp/ir/implementation/unaliased_ssa/PrintIR.ql
new file mode 100644
index 00000000000..83e2e37234b
--- /dev/null
+++ b/repo-tests/codeql/cpp/ql/lib/semmle/code/cpp/ir/implementation/unaliased_ssa/PrintIR.ql
@@ -0,0 +1,8 @@
+/**
+ * @name Print SSA IR
+ * @description Outputs a representation of the SSA IR graph
+ * @id cpp/print-ssa-ir
+ * @kind graph
+ */
+
+import PrintIR
diff --git a/repo-tests/codeql/cpp/ql/lib/semmle/code/cpp/ir/implementation/unaliased_ssa/PrintIR.qll b/repo-tests/codeql/cpp/ql/lib/semmle/code/cpp/ir/implementation/unaliased_ssa/PrintIR.qll
new file mode 100644
index 00000000000..59dadee7154
--- /dev/null
+++ b/repo-tests/codeql/cpp/ql/lib/semmle/code/cpp/ir/implementation/unaliased_ssa/PrintIR.qll
@@ -0,0 +1,329 @@
+/**
+ * Outputs a representation of the IR as a control flow graph.
+ *
+ * This file contains the actual implementation of `PrintIR.ql`. For test cases and very small
+ * databases, `PrintIR.ql` can be run directly to dump the IR for the entire database. For most
+ * uses, however, it is better to write a query that imports `PrintIR.qll`, extends
+ * `PrintIRConfiguration`, and overrides `shouldPrintFunction()` to select a subset of functions to
+ * dump.
+ */
+
+private import internal.IRInternal
+private import IR
+private import internal.PrintIRImports as Imports
+import Imports::IRConfiguration
+
+private newtype TPrintIRConfiguration = MkPrintIRConfiguration()
+
+/**
+ * The query can extend this class to control which functions are printed.
+ */
+class PrintIRConfiguration extends TPrintIRConfiguration {
+ /** Gets a textual representation of this configuration. */
+ string toString() { result = "PrintIRConfiguration" }
+
+ /**
+ * Holds if the IR for `func` should be printed. By default, holds for all
+ * functions.
+ */
+ predicate shouldPrintFunction(Language::Function func) { any() }
+}
+
+/**
+ * Override of `IRConfiguration` to only evaluate debug strings for the functions that are to be dumped.
+ */
+private class FilteredIRConfiguration extends IRConfiguration {
+ override predicate shouldEvaluateDebugStringsForFunction(Language::Function func) {
+ shouldPrintFunction(func)
+ }
+}
+
+private predicate shouldPrintFunction(Language::Function func) {
+ exists(PrintIRConfiguration config | config.shouldPrintFunction(func))
+}
+
+private string getAdditionalInstructionProperty(Instruction instr, string key) {
+ exists(IRPropertyProvider provider | result = provider.getInstructionProperty(instr, key))
+}
+
+private string getAdditionalBlockProperty(IRBlock block, string key) {
+ exists(IRPropertyProvider provider | result = provider.getBlockProperty(block, key))
+}
+
+/**
+ * Gets the properties of an operand from any active property providers.
+ */
+private string getAdditionalOperandProperty(Operand operand, string key) {
+ exists(IRPropertyProvider provider | result = provider.getOperandProperty(operand, key))
+}
+
+/**
+ * Gets a string listing the properties of the operand and their corresponding values. If the
+ * operand has no properties, this predicate has no result.
+ */
+private string getOperandPropertyListString(Operand operand) {
+ result =
+ strictconcat(string key, string value |
+ value = getAdditionalOperandProperty(operand, key)
+ |
+ key + ":" + value, ", "
+ )
+}
+
+/**
+ * Gets a string listing the properties of the operand and their corresponding values. The list is
+ * surrounded by curly braces. If the operand has no properties, this predicate returns an empty
+ * string.
+ */
+private string getOperandPropertyString(Operand operand) {
+ result = "{" + getOperandPropertyListString(operand) + "}"
+ or
+ not exists(getOperandPropertyListString(operand)) and result = ""
+}
+
+private newtype TPrintableIRNode =
+ TPrintableIRFunction(IRFunction irFunc) { shouldPrintFunction(irFunc.getFunction()) } or
+ TPrintableIRBlock(IRBlock block) { shouldPrintFunction(block.getEnclosingFunction()) } or
+ TPrintableInstruction(Instruction instr) { shouldPrintFunction(instr.getEnclosingFunction()) }
+
+/**
+ * A node to be emitted in the IR graph.
+ */
+abstract private class PrintableIRNode extends TPrintableIRNode {
+ abstract string toString();
+
+ /**
+ * Gets the location to be emitted for the node.
+ */
+ abstract Language::Location getLocation();
+
+ /**
+ * Gets the label to be emitted for the node.
+ */
+ abstract string getLabel();
+
+ /**
+ * Gets the order in which the node appears in its parent node.
+ */
+ abstract int getOrder();
+
+ /**
+ * Gets the parent of this node.
+ */
+ abstract PrintableIRNode getParent();
+
+ /**
+ * Gets the kind of graph represented by this node ("graph" or "tree").
+ */
+ string getGraphKind() { none() }
+
+ /**
+ * Holds if this node should always be rendered as text, even in a graphical
+ * viewer.
+ */
+ predicate forceText() { none() }
+
+ /**
+ * Gets the value of the node property with the specified key.
+ */
+ string getProperty(string key) {
+ key = "semmle.label" and result = getLabel()
+ or
+ key = "semmle.order" and result = getOrder().toString()
+ or
+ key = "semmle.graphKind" and result = getGraphKind()
+ or
+ key = "semmle.forceText" and forceText() and result = "true"
+ }
+}
+
+/**
+ * An IR graph node representing a `IRFunction` object.
+ */
+private class PrintableIRFunction extends PrintableIRNode, TPrintableIRFunction {
+ IRFunction irFunc;
+
+ PrintableIRFunction() { this = TPrintableIRFunction(irFunc) }
+
+ override string toString() { result = irFunc.toString() }
+
+ override Language::Location getLocation() { result = irFunc.getLocation() }
+
+ override string getLabel() { result = Language::getIdentityString(irFunc.getFunction()) }
+
+ override int getOrder() {
+ this =
+ rank[result + 1](PrintableIRFunction orderedFunc, Language::Location location |
+ location = orderedFunc.getIRFunction().getLocation()
+ |
+ orderedFunc
+ order by
+ location.getFile().getAbsolutePath(), location.getStartLine(), location.getStartColumn(),
+ orderedFunc.getLabel()
+ )
+ }
+
+ final override PrintableIRNode getParent() { none() }
+
+ final IRFunction getIRFunction() { result = irFunc }
+}
+
+/**
+ * An IR graph node representing an `IRBlock` object.
+ */
+private class PrintableIRBlock extends PrintableIRNode, TPrintableIRBlock {
+ IRBlock block;
+
+ PrintableIRBlock() { this = TPrintableIRBlock(block) }
+
+ override string toString() { result = getLabel() }
+
+ override Language::Location getLocation() { result = block.getLocation() }
+
+ override string getLabel() { result = "Block " + block.getDisplayIndex().toString() }
+
+ override int getOrder() { result = block.getDisplayIndex() }
+
+ final override string getGraphKind() { result = "tree" }
+
+ final override predicate forceText() { any() }
+
+ final override PrintableIRFunction getParent() {
+ result.getIRFunction() = block.getEnclosingIRFunction()
+ }
+
+ override string getProperty(string key) {
+ result = PrintableIRNode.super.getProperty(key) or
+ result = getAdditionalBlockProperty(block, key)
+ }
+
+ final IRBlock getBlock() { result = block }
+}
+
+/**
+ * An IR graph node representing an `Instruction`.
+ */
+private class PrintableInstruction extends PrintableIRNode, TPrintableInstruction {
+ Instruction instr;
+
+ PrintableInstruction() { this = TPrintableInstruction(instr) }
+
+ override string toString() { result = instr.toString() }
+
+ override Language::Location getLocation() { result = instr.getLocation() }
+
+ override string getLabel() {
+ exists(IRBlock block |
+ instr = block.getAnInstruction() and
+ exists(
+ string resultString, string operationString, string operandsString, int resultWidth,
+ int operationWidth
+ |
+ resultString = instr.getResultString() and
+ operationString = instr.getOperationString() and
+ operandsString = getOperandsString() and
+ columnWidths(block, resultWidth, operationWidth) and
+ result =
+ resultString + getPaddingString(resultWidth - resultString.length()) + " = " +
+ operationString + getPaddingString(operationWidth - operationString.length()) + " : " +
+ operandsString
+ )
+ )
+ }
+
+ override int getOrder() { result = instr.getDisplayIndexInBlock() }
+
+ final override PrintableIRBlock getParent() { result.getBlock() = instr.getBlock() }
+
+ final Instruction getInstruction() { result = instr }
+
+ override string getProperty(string key) {
+ result = PrintableIRNode.super.getProperty(key) or
+ result = getAdditionalInstructionProperty(instr, key)
+ }
+
+ /**
+ * Gets the string representation of the operand list. This is the same as
+ * `Instruction::getOperandsString()`, except that each operand is annotated with any properties
+ * provided by active `IRPropertyProvider` instances.
+ */
+ private string getOperandsString() {
+ result =
+ concat(Operand operand |
+ operand = instr.getAnOperand()
+ |
+ operand.getDumpString() + getOperandPropertyString(operand), ", "
+ order by
+ operand.getDumpSortOrder()
+ )
+ }
+}
+
+private predicate columnWidths(IRBlock block, int resultWidth, int operationWidth) {
+ resultWidth = max(Instruction instr | instr.getBlock() = block | instr.getResultString().length()) and
+ operationWidth =
+ max(Instruction instr | instr.getBlock() = block | instr.getOperationString().length())
+}
+
+private int maxColumnWidth() {
+ result =
+ max(Instruction instr, int width |
+ width = instr.getResultString().length() or
+ width = instr.getOperationString().length() or
+ width = instr.getOperandsString().length()
+ |
+ width
+ )
+}
+
+private string getPaddingString(int n) {
+ n = 0 and result = ""
+ or
+ n > 0 and n <= maxColumnWidth() and result = getPaddingString(n - 1) + " "
+}
+
+/**
+ * Holds if `node` belongs to the output graph, and its property `key` has the given `value`.
+ */
+query predicate nodes(PrintableIRNode node, string key, string value) {
+ value = node.getProperty(key)
+}
+
+private int getSuccessorIndex(IRBlock pred, IRBlock succ) {
+ succ =
+ rank[result + 1](IRBlock aSucc, EdgeKind kind |
+ aSucc = pred.getSuccessor(kind)
+ |
+ aSucc order by kind.toString()
+ )
+}
+
+/**
+ * Holds if the output graph contains an edge from `pred` to `succ`, and that edge's property `key`
+ * has the given `value`.
+ */
+query predicate edges(PrintableIRBlock pred, PrintableIRBlock succ, string key, string value) {
+ exists(EdgeKind kind, IRBlock predBlock, IRBlock succBlock |
+ predBlock = pred.getBlock() and
+ succBlock = succ.getBlock() and
+ predBlock.getSuccessor(kind) = succBlock and
+ (
+ (
+ key = "semmle.label" and
+ if predBlock.getBackEdgeSuccessor(kind) = succBlock
+ then value = kind.toString() + " (back edge)"
+ else value = kind.toString()
+ )
+ or
+ key = "semmle.order" and
+ value = getSuccessorIndex(predBlock, succBlock).toString()
+ )
+ )
+}
+
+/**
+ * Holds if `parent` is the parent node of `child` in the output graph.
+ */
+query predicate parents(PrintableIRNode child, PrintableIRNode parent) {
+ parent = child.getParent()
+}
diff --git a/repo-tests/codeql/cpp/ql/lib/semmle/code/cpp/ir/implementation/unaliased_ssa/constant/ConstantAnalysis.qll b/repo-tests/codeql/cpp/ql/lib/semmle/code/cpp/ir/implementation/unaliased_ssa/constant/ConstantAnalysis.qll
new file mode 100644
index 00000000000..76f52f8334a
--- /dev/null
+++ b/repo-tests/codeql/cpp/ql/lib/semmle/code/cpp/ir/implementation/unaliased_ssa/constant/ConstantAnalysis.qll
@@ -0,0 +1,53 @@
+private import internal.ConstantAnalysisInternal
+private import semmle.code.cpp.ir.internal.IntegerPartial
+private import IR
+
+language[monotonicAggregates]
+int getConstantValue(Instruction instr) {
+ result = instr.(IntegerConstantInstruction).getValue().toInt()
+ or
+ result = getBinaryInstructionValue(instr)
+ or
+ result = neg(getConstantValue(instr.(NegateInstruction).getUnary()))
+ or
+ result = getConstantValue(instr.(CopyInstruction).getSourceValue())
+ or
+ exists(PhiInstruction phi |
+ phi = instr and
+ result = unique(Operand op | op = phi.getAnInputOperand() | getConstantValue(op.getDef()))
+ )
+}
+
+pragma[noinline]
+private predicate binaryInstructionOperands(BinaryInstruction instr, int left, int right) {
+ left = getConstantValue(instr.getLeft()) and
+ right = getConstantValue(instr.getRight())
+}
+
+pragma[noinline]
+private int getBinaryInstructionValue(BinaryInstruction instr) {
+ exists(int left, int right |
+ binaryInstructionOperands(instr, left, right) and
+ (
+ instr instanceof AddInstruction and result = add(left, right)
+ or
+ instr instanceof SubInstruction and result = sub(left, right)
+ or
+ instr instanceof MulInstruction and result = mul(left, right)
+ or
+ instr instanceof DivInstruction and result = div(left, right)
+ or
+ instr instanceof CompareEQInstruction and result = compareEQ(left, right)
+ or
+ instr instanceof CompareNEInstruction and result = compareNE(left, right)
+ or
+ instr instanceof CompareLTInstruction and result = compareLT(left, right)
+ or
+ instr instanceof CompareGTInstruction and result = compareGT(left, right)
+ or
+ instr instanceof CompareLEInstruction and result = compareLE(left, right)
+ or
+ instr instanceof CompareGEInstruction and result = compareGE(left, right)
+ )
+ )
+}
diff --git a/repo-tests/codeql/cpp/ql/lib/semmle/code/cpp/ir/implementation/unaliased_ssa/constant/PrintConstantAnalysis.qll b/repo-tests/codeql/cpp/ql/lib/semmle/code/cpp/ir/implementation/unaliased_ssa/constant/PrintConstantAnalysis.qll
new file mode 100644
index 00000000000..57a7cf594ca
--- /dev/null
+++ b/repo-tests/codeql/cpp/ql/lib/semmle/code/cpp/ir/implementation/unaliased_ssa/constant/PrintConstantAnalysis.qll
@@ -0,0 +1,11 @@
+private import internal.ConstantAnalysisInternal
+private import semmle.code.cpp.ir.internal.IntegerConstant
+private import ConstantAnalysis
+import IR
+
+private class ConstantAnalysisPropertyProvider extends IRPropertyProvider {
+ override string getInstructionProperty(Instruction instr, string key) {
+ key = "ConstantValue" and
+ result = getValue(getConstantValue(instr)).toString()
+ }
+}
diff --git a/repo-tests/codeql/cpp/ql/lib/semmle/code/cpp/ir/implementation/unaliased_ssa/constant/internal/ConstantAnalysisInternal.qll b/repo-tests/codeql/cpp/ql/lib/semmle/code/cpp/ir/implementation/unaliased_ssa/constant/internal/ConstantAnalysisInternal.qll
new file mode 100644
index 00000000000..9b4f813a10b
--- /dev/null
+++ b/repo-tests/codeql/cpp/ql/lib/semmle/code/cpp/ir/implementation/unaliased_ssa/constant/internal/ConstantAnalysisInternal.qll
@@ -0,0 +1 @@
+import semmle.code.cpp.ir.implementation.unaliased_ssa.IR as IR
diff --git a/repo-tests/codeql/cpp/ql/lib/semmle/code/cpp/ir/implementation/unaliased_ssa/gvn/PrintValueNumbering.qll b/repo-tests/codeql/cpp/ql/lib/semmle/code/cpp/ir/implementation/unaliased_ssa/gvn/PrintValueNumbering.qll
new file mode 100644
index 00000000000..a7fb1b3c07e
--- /dev/null
+++ b/repo-tests/codeql/cpp/ql/lib/semmle/code/cpp/ir/implementation/unaliased_ssa/gvn/PrintValueNumbering.qll
@@ -0,0 +1,17 @@
+private import internal.ValueNumberingImports
+private import ValueNumbering
+
+/**
+ * Provides additional information about value numbering in IR dumps.
+ */
+class ValueNumberPropertyProvider extends IRPropertyProvider {
+ override string getInstructionProperty(Instruction instr, string key) {
+ exists(ValueNumber vn |
+ vn = valueNumber(instr) and
+ key = "valnum" and
+ if strictcount(vn.getAnInstruction()) > 1
+ then result = vn.getDebugString()
+ else result = "unique"
+ )
+ }
+}
diff --git a/repo-tests/codeql/cpp/ql/lib/semmle/code/cpp/ir/implementation/unaliased_ssa/gvn/ValueNumbering.qll b/repo-tests/codeql/cpp/ql/lib/semmle/code/cpp/ir/implementation/unaliased_ssa/gvn/ValueNumbering.qll
new file mode 100644
index 00000000000..796fb792366
--- /dev/null
+++ b/repo-tests/codeql/cpp/ql/lib/semmle/code/cpp/ir/implementation/unaliased_ssa/gvn/ValueNumbering.qll
@@ -0,0 +1,88 @@
+private import internal.ValueNumberingInternal
+private import internal.ValueNumberingImports
+
+/**
+ * The value number assigned to a particular set of instructions that produce equivalent results.
+ */
+class ValueNumber extends TValueNumber {
+ final string toString() { result = "GVN" }
+
+ final string getDebugString() { result = strictconcat(getAnInstruction().getResultId(), ", ") }
+
+ final Language::Location getLocation() {
+ if
+ exists(Instruction i |
+ i = getAnInstruction() and not i.getLocation() instanceof Language::UnknownLocation
+ )
+ then
+ result =
+ min(Language::Location l |
+ l = getAnInstruction().getLocation() and not l instanceof Language::UnknownLocation
+ |
+ l
+ order by
+ l.getFile().getAbsolutePath(), l.getStartLine(), l.getStartColumn(), l.getEndLine(),
+ l.getEndColumn()
+ )
+ else result instanceof Language::UnknownDefaultLocation
+ }
+
+ /**
+ * Gets the instructions that have been assigned this value number. This will always produce at
+ * least one result.
+ */
+ final Instruction getAnInstruction() { this = valueNumber(result) }
+
+ /**
+ * Gets one of the instructions that was assigned this value number. The chosen instuction is
+ * deterministic but arbitrary. Intended for use only in debugging.
+ */
+ final Instruction getExampleInstruction() {
+ result =
+ min(Instruction instr |
+ instr = getAnInstruction()
+ |
+ instr order by instr.getBlock().getDisplayIndex(), instr.getDisplayIndexInBlock()
+ )
+ }
+
+ /**
+ * Gets an `Operand` whose definition is exact and has this value number.
+ */
+ final Operand getAUse() { this = valueNumber(result.getDef()) }
+
+ final string getKind() {
+ this instanceof TVariableAddressValueNumber and result = "VariableAddress"
+ or
+ this instanceof TInitializeParameterValueNumber and result = "InitializeParameter"
+ or
+ this instanceof TConstantValueNumber and result = "Constant"
+ or
+ this instanceof TStringConstantValueNumber and result = "StringConstant"
+ or
+ this instanceof TFieldAddressValueNumber and result = "FieldAddress"
+ or
+ this instanceof TBinaryValueNumber and result = "Binary"
+ or
+ this instanceof TPointerArithmeticValueNumber and result = "PointerArithmetic"
+ or
+ this instanceof TUnaryValueNumber and result = "Unary"
+ or
+ this instanceof TInheritanceConversionValueNumber and result = "InheritanceConversion"
+ or
+ this instanceof TLoadTotalOverlapValueNumber and result = "LoadTotalOverlap"
+ or
+ this instanceof TUniqueValueNumber and result = "Unique"
+ }
+}
+
+/**
+ * Gets the value number assigned to `instr`, if any. Returns at most one result.
+ */
+ValueNumber valueNumber(Instruction instr) { result = tvalueNumber(instr) }
+
+/**
+ * Gets the value number assigned to the exact definition of `op`, if any.
+ * Returns at most one result.
+ */
+ValueNumber valueNumberOfOperand(Operand op) { result = tvalueNumberOfOperand(op) }
diff --git a/repo-tests/codeql/cpp/ql/lib/semmle/code/cpp/ir/implementation/unaliased_ssa/gvn/internal/ValueNumberingImports.qll b/repo-tests/codeql/cpp/ql/lib/semmle/code/cpp/ir/implementation/unaliased_ssa/gvn/internal/ValueNumberingImports.qll
new file mode 100644
index 00000000000..8482a5e4b14
--- /dev/null
+++ b/repo-tests/codeql/cpp/ql/lib/semmle/code/cpp/ir/implementation/unaliased_ssa/gvn/internal/ValueNumberingImports.qll
@@ -0,0 +1,3 @@
+import semmle.code.cpp.ir.implementation.aliased_ssa.IR
+import semmle.code.cpp.ir.internal.Overlap
+import semmle.code.cpp.ir.internal.IRCppLanguage as Language
diff --git a/repo-tests/codeql/cpp/ql/lib/semmle/code/cpp/ir/implementation/unaliased_ssa/gvn/internal/ValueNumberingInternal.qll b/repo-tests/codeql/cpp/ql/lib/semmle/code/cpp/ir/implementation/unaliased_ssa/gvn/internal/ValueNumberingInternal.qll
new file mode 100644
index 00000000000..2467d961892
--- /dev/null
+++ b/repo-tests/codeql/cpp/ql/lib/semmle/code/cpp/ir/implementation/unaliased_ssa/gvn/internal/ValueNumberingInternal.qll
@@ -0,0 +1,311 @@
+private import ValueNumberingImports
+
+newtype TValueNumber =
+ TVariableAddressValueNumber(IRFunction irFunc, Language::AST ast) {
+ variableAddressValueNumber(_, irFunc, ast)
+ } or
+ TInitializeParameterValueNumber(IRFunction irFunc, Language::AST var) {
+ initializeParameterValueNumber(_, irFunc, var)
+ } or
+ TConstantValueNumber(IRFunction irFunc, IRType type, string value) {
+ constantValueNumber(_, irFunc, type, value)
+ } or
+ TStringConstantValueNumber(IRFunction irFunc, IRType type, string value) {
+ stringConstantValueNumber(_, irFunc, type, value)
+ } or
+ TFieldAddressValueNumber(IRFunction irFunc, Language::Field field, TValueNumber objectAddress) {
+ fieldAddressValueNumber(_, irFunc, field, objectAddress)
+ } or
+ TBinaryValueNumber(
+ IRFunction irFunc, Opcode opcode, TValueNumber leftOperand, TValueNumber rightOperand
+ ) {
+ binaryValueNumber(_, irFunc, opcode, leftOperand, rightOperand)
+ } or
+ TPointerArithmeticValueNumber(
+ IRFunction irFunc, Opcode opcode, int elementSize, TValueNumber leftOperand,
+ TValueNumber rightOperand
+ ) {
+ pointerArithmeticValueNumber(_, irFunc, opcode, elementSize, leftOperand, rightOperand)
+ } or
+ TUnaryValueNumber(IRFunction irFunc, Opcode opcode, TValueNumber operand) {
+ unaryValueNumber(_, irFunc, opcode, operand)
+ } or
+ TInheritanceConversionValueNumber(
+ IRFunction irFunc, Opcode opcode, Language::Class baseClass, Language::Class derivedClass,
+ TValueNumber operand
+ ) {
+ inheritanceConversionValueNumber(_, irFunc, opcode, baseClass, derivedClass, operand)
+ } or
+ TLoadTotalOverlapValueNumber(
+ IRFunction irFunc, IRType type, TValueNumber memOperand, TValueNumber operand
+ ) {
+ loadTotalOverlapValueNumber(_, irFunc, type, memOperand, operand)
+ } or
+ TUniqueValueNumber(IRFunction irFunc, Instruction instr) { uniqueValueNumber(instr, irFunc) }
+
+/**
+ * A `CopyInstruction` whose source operand's value is congruent to the definition of that source
+ * operand.
+ * For example:
+ * ```
+ * Point p = { 1, 2 };
+ * Point q = p;
+ * int a = p.x;
+ * ```
+ * The use of `p` on line 2 is linked to the definition of `p` on line 1, and is congruent to that
+ * definition because it accesses the exact same memory.
+ * The use of `p.x` on line 3 is linked to the definition of `p` on line 1 as well, but is not
+ * congruent to that definition because `p.x` accesses only a subset of the memory defined by `p`.
+ */
+class CongruentCopyInstruction extends CopyInstruction {
+ CongruentCopyInstruction() {
+ this.getSourceValueOperand().getDefinitionOverlap() instanceof MustExactlyOverlap
+ }
+}
+
+class LoadTotalOverlapInstruction extends LoadInstruction {
+ LoadTotalOverlapInstruction() {
+ this.getSourceValueOperand().getDefinitionOverlap() instanceof MustTotallyOverlap
+ }
+}
+
+/**
+ * Holds if this library knows how to assign a value number to the specified instruction, other than
+ * a `unique` value number that is never shared by multiple instructions.
+ */
+private predicate numberableInstruction(Instruction instr) {
+ instr instanceof VariableAddressInstruction
+ or
+ instr instanceof InitializeParameterInstruction
+ or
+ instr instanceof ConstantInstruction
+ or
+ instr instanceof StringConstantInstruction
+ or
+ instr instanceof FieldAddressInstruction
+ or
+ instr instanceof BinaryInstruction
+ or
+ instr instanceof UnaryInstruction and not instr instanceof CopyInstruction
+ or
+ instr instanceof PointerArithmeticInstruction
+ or
+ instr instanceof CongruentCopyInstruction
+ or
+ instr instanceof LoadTotalOverlapInstruction
+}
+
+private predicate filteredNumberableInstruction(Instruction instr) {
+ // count rather than strictcount to handle missing AST elements
+ // separate instanceof and inline casts to avoid failed casts with a count of 0
+ instr instanceof VariableAddressInstruction and
+ count(instr.(VariableAddressInstruction).getIRVariable().getAST()) != 1
+ or
+ instr instanceof ConstantInstruction and
+ count(instr.getResultIRType()) != 1
+ or
+ instr instanceof FieldAddressInstruction and
+ count(instr.(FieldAddressInstruction).getField()) != 1
+}
+
+private predicate variableAddressValueNumber(
+ VariableAddressInstruction instr, IRFunction irFunc, Language::AST ast
+) {
+ instr.getEnclosingIRFunction() = irFunc and
+ // The underlying AST element is used as value-numbering key instead of the
+ // `IRVariable` to work around a problem where a variable or expression with
+ // multiple types gives rise to multiple `IRVariable`s.
+ instr.getIRVariable().getAST() = ast and
+ strictcount(instr.getIRVariable().getAST()) = 1
+}
+
+private predicate initializeParameterValueNumber(
+ InitializeParameterInstruction instr, IRFunction irFunc, Language::AST var
+) {
+ instr.getEnclosingIRFunction() = irFunc and
+ // The underlying AST element is used as value-numbering key instead of the
+ // `IRVariable` to work around a problem where a variable or expression with
+ // multiple types gives rise to multiple `IRVariable`s.
+ instr.getIRVariable().getAST() = var
+}
+
+private predicate constantValueNumber(
+ ConstantInstruction instr, IRFunction irFunc, IRType type, string value
+) {
+ instr.getEnclosingIRFunction() = irFunc and
+ strictcount(instr.getResultIRType()) = 1 and
+ instr.getResultIRType() = type and
+ instr.getValue() = value
+}
+
+private predicate stringConstantValueNumber(
+ StringConstantInstruction instr, IRFunction irFunc, IRType type, string value
+) {
+ instr.getEnclosingIRFunction() = irFunc and
+ instr.getResultIRType() = type and
+ instr.getValue().getValue() = value
+}
+
+private predicate fieldAddressValueNumber(
+ FieldAddressInstruction instr, IRFunction irFunc, Language::Field field,
+ TValueNumber objectAddress
+) {
+ instr.getEnclosingIRFunction() = irFunc and
+ instr.getField() = field and
+ strictcount(instr.getField()) = 1 and
+ tvalueNumber(instr.getObjectAddress()) = objectAddress
+}
+
+private predicate binaryValueNumber(
+ BinaryInstruction instr, IRFunction irFunc, Opcode opcode, TValueNumber leftOperand,
+ TValueNumber rightOperand
+) {
+ instr.getEnclosingIRFunction() = irFunc and
+ not instr instanceof PointerArithmeticInstruction and
+ instr.getOpcode() = opcode and
+ tvalueNumber(instr.getLeft()) = leftOperand and
+ tvalueNumber(instr.getRight()) = rightOperand
+}
+
+private predicate pointerArithmeticValueNumber(
+ PointerArithmeticInstruction instr, IRFunction irFunc, Opcode opcode, int elementSize,
+ TValueNumber leftOperand, TValueNumber rightOperand
+) {
+ instr.getEnclosingIRFunction() = irFunc and
+ instr.getOpcode() = opcode and
+ instr.getElementSize() = elementSize and
+ tvalueNumber(instr.getLeft()) = leftOperand and
+ tvalueNumber(instr.getRight()) = rightOperand
+}
+
+private predicate unaryValueNumber(
+ UnaryInstruction instr, IRFunction irFunc, Opcode opcode, TValueNumber operand
+) {
+ instr.getEnclosingIRFunction() = irFunc and
+ not instr instanceof InheritanceConversionInstruction and
+ not instr instanceof CopyInstruction and
+ not instr instanceof FieldAddressInstruction and
+ instr.getOpcode() = opcode and
+ tvalueNumber(instr.getUnary()) = operand
+}
+
+private predicate inheritanceConversionValueNumber(
+ InheritanceConversionInstruction instr, IRFunction irFunc, Opcode opcode,
+ Language::Class baseClass, Language::Class derivedClass, TValueNumber operand
+) {
+ instr.getEnclosingIRFunction() = irFunc and
+ instr.getOpcode() = opcode and
+ instr.getBaseClass() = baseClass and
+ instr.getDerivedClass() = derivedClass and
+ tvalueNumber(instr.getUnary()) = operand
+}
+
+private predicate loadTotalOverlapValueNumber(
+ LoadTotalOverlapInstruction instr, IRFunction irFunc, IRType type, TValueNumber memOperand,
+ TValueNumber operand
+) {
+ instr.getEnclosingIRFunction() = irFunc and
+ tvalueNumber(instr.getAnOperand().(MemoryOperand).getAnyDef()) = memOperand and
+ tvalueNumberOfOperand(instr.getAnOperand().(AddressOperand)) = operand and
+ instr.getResultIRType() = type
+}
+
+/**
+ * Holds if `instr` should be assigned a unique value number because this library does not know how
+ * to determine if two instances of that instruction are equivalent.
+ */
+private predicate uniqueValueNumber(Instruction instr, IRFunction irFunc) {
+ instr.getEnclosingIRFunction() = irFunc and
+ not instr.getResultIRType() instanceof IRVoidType and
+ (
+ not numberableInstruction(instr)
+ or
+ filteredNumberableInstruction(instr)
+ )
+}
+
+/**
+ * Gets the value number assigned to `instr`, if any. Returns at most one result.
+ */
+cached
+TValueNumber tvalueNumber(Instruction instr) {
+ result = nonUniqueValueNumber(instr)
+ or
+ exists(IRFunction irFunc |
+ uniqueValueNumber(instr, irFunc) and
+ result = TUniqueValueNumber(irFunc, instr)
+ )
+}
+
+/**
+ * Gets the value number assigned to the exact definition of `op`, if any.
+ * Returns at most one result.
+ */
+TValueNumber tvalueNumberOfOperand(Operand op) { result = tvalueNumber(op.getDef()) }
+
+/**
+ * Gets the value number assigned to `instr`, if any, unless that instruction is assigned a unique
+ * value number.
+ */
+private TValueNumber nonUniqueValueNumber(Instruction instr) {
+ exists(IRFunction irFunc |
+ irFunc = instr.getEnclosingIRFunction() and
+ (
+ exists(Language::AST ast |
+ variableAddressValueNumber(instr, irFunc, ast) and
+ result = TVariableAddressValueNumber(irFunc, ast)
+ )
+ or
+ exists(Language::AST var |
+ initializeParameterValueNumber(instr, irFunc, var) and
+ result = TInitializeParameterValueNumber(irFunc, var)
+ )
+ or
+ exists(string value, IRType type |
+ constantValueNumber(instr, irFunc, type, value) and
+ result = TConstantValueNumber(irFunc, type, value)
+ )
+ or
+ exists(IRType type, string value |
+ stringConstantValueNumber(instr, irFunc, type, value) and
+ result = TStringConstantValueNumber(irFunc, type, value)
+ )
+ or
+ exists(Language::Field field, TValueNumber objectAddress |
+ fieldAddressValueNumber(instr, irFunc, field, objectAddress) and
+ result = TFieldAddressValueNumber(irFunc, field, objectAddress)
+ )
+ or
+ exists(Opcode opcode, TValueNumber leftOperand, TValueNumber rightOperand |
+ binaryValueNumber(instr, irFunc, opcode, leftOperand, rightOperand) and
+ result = TBinaryValueNumber(irFunc, opcode, leftOperand, rightOperand)
+ )
+ or
+ exists(Opcode opcode, TValueNumber operand |
+ unaryValueNumber(instr, irFunc, opcode, operand) and
+ result = TUnaryValueNumber(irFunc, opcode, operand)
+ )
+ or
+ exists(
+ Opcode opcode, Language::Class baseClass, Language::Class derivedClass, TValueNumber operand
+ |
+ inheritanceConversionValueNumber(instr, irFunc, opcode, baseClass, derivedClass, operand) and
+ result = TInheritanceConversionValueNumber(irFunc, opcode, baseClass, derivedClass, operand)
+ )
+ or
+ exists(Opcode opcode, int elementSize, TValueNumber leftOperand, TValueNumber rightOperand |
+ pointerArithmeticValueNumber(instr, irFunc, opcode, elementSize, leftOperand, rightOperand) and
+ result =
+ TPointerArithmeticValueNumber(irFunc, opcode, elementSize, leftOperand, rightOperand)
+ )
+ or
+ exists(IRType type, TValueNumber memOperand, TValueNumber operand |
+ loadTotalOverlapValueNumber(instr, irFunc, type, memOperand, operand) and
+ result = TLoadTotalOverlapValueNumber(irFunc, type, memOperand, operand)
+ )
+ or
+ // The value number of a copy is just the value number of its source value.
+ result = tvalueNumber(instr.(CongruentCopyInstruction).getSourceValue())
+ )
+ )
+}
diff --git a/repo-tests/codeql/cpp/ql/lib/semmle/code/cpp/ir/implementation/unaliased_ssa/internal/AliasAnalysis.qll b/repo-tests/codeql/cpp/ql/lib/semmle/code/cpp/ir/implementation/unaliased_ssa/internal/AliasAnalysis.qll
new file mode 100644
index 00000000000..9997b5b49a7
--- /dev/null
+++ b/repo-tests/codeql/cpp/ql/lib/semmle/code/cpp/ir/implementation/unaliased_ssa/internal/AliasAnalysis.qll
@@ -0,0 +1,461 @@
+private import AliasAnalysisInternal
+private import InputIR
+private import AliasAnalysisImports
+
+private class IntValue = Ints::IntValue;
+
+/**
+ * If `instr` is a `SideEffectInstruction`, gets the primary `CallInstruction` that caused the side
+ * effect. If `instr` is a `CallInstruction`, gets that same `CallInstruction`.
+ */
+private CallInstruction getPrimaryCall(Instruction instr) {
+ result = instr
+ or
+ result = instr.(SideEffectInstruction).getPrimaryInstruction()
+}
+
+/**
+ * Holds if `operand` serves as an input argument (or indirection) to `call`, in the position
+ * specified by `input`.
+ */
+private predicate isCallInput(
+ CallInstruction call, Operand operand, AliasModels::FunctionInput input
+) {
+ call = getPrimaryCall(operand.getUse()) and
+ (
+ exists(int index |
+ input.isParameterOrQualifierAddress(index) and
+ operand = call.getArgumentOperand(index)
+ )
+ or
+ exists(int index, ReadSideEffectInstruction read |
+ input.isParameterDerefOrQualifierObject(index) and
+ read = call.getAParameterSideEffect(index) and
+ operand = read.getSideEffectOperand()
+ )
+ )
+}
+
+/**
+ * Holds if `instr` serves as a return value or output argument indirection for `call`, in the
+ * position specified by `output`.
+ */
+private predicate isCallOutput(
+ CallInstruction call, Instruction instr, AliasModels::FunctionOutput output
+) {
+ call = getPrimaryCall(instr) and
+ (
+ output.isReturnValue() and instr = call
+ or
+ exists(int index, WriteSideEffectInstruction write |
+ output.isParameterDerefOrQualifierObject(index) and
+ write = call.getAParameterSideEffect(index) and
+ instr = write
+ )
+ )
+}
+
+/**
+ * Holds if the address in `operand` flows directly to the result of `resultInstr` due to modeled
+ * address flow through a function call.
+ */
+private predicate hasAddressFlowThroughCall(Operand operand, Instruction resultInstr) {
+ exists(
+ CallInstruction call, AliasModels::FunctionInput input, AliasModels::FunctionOutput output
+ |
+ call.getStaticCallTarget().(AliasModels::AliasFunction).hasAddressFlow(input, output) and
+ isCallInput(call, operand, input) and
+ isCallOutput(call, resultInstr, output)
+ )
+}
+
+/**
+ * Holds if the operand `tag` of instruction `instr` is used in a way that does
+ * not result in any address held in that operand from escaping beyond the
+ * instruction.
+ */
+private predicate operandIsConsumedWithoutEscaping(Operand operand) {
+ // The source/destination address of a Load/Store does not escape (but the
+ // loaded/stored value could).
+ operand instanceof AddressOperand
+ or
+ exists(Instruction instr |
+ instr = operand.getUse() and
+ (
+ // Neither operand of a Compare escapes.
+ instr instanceof CompareInstruction
+ or
+ // Neither operand of a PointerDiff escapes.
+ instr instanceof PointerDiffInstruction
+ or
+ // Converting an address to a `bool` does not escape the address.
+ instr.(ConvertInstruction).getResultIRType() instanceof IRBooleanType
+ or
+ instr instanceof CallInstruction and
+ not exists(IREscapeAnalysisConfiguration config | config.useSoundEscapeAnalysis())
+ )
+ )
+ or
+ // Some standard function arguments never escape
+ isNeverEscapesArgument(operand)
+}
+
+private predicate operandEscapesDomain(Operand operand) {
+ not operandIsConsumedWithoutEscaping(operand) and
+ not operandIsPropagated(operand, _, _) and
+ not isArgumentForParameter(_, operand, _) and
+ not isOnlyEscapesViaReturnArgument(operand) and
+ not operand.getUse() instanceof ReturnValueInstruction and
+ not operand.getUse() instanceof ReturnIndirectionInstruction and
+ not operand instanceof PhiInputOperand
+}
+
+/**
+ * If the result of instruction `instr` is an integer constant, returns the
+ * value of that constant. Otherwise, returns unknown.
+ */
+IntValue getConstantValue(Instruction instr) {
+ if instr instanceof IntegerConstantInstruction
+ then result = instr.(IntegerConstantInstruction).getValue().toInt()
+ else result = Ints::unknown()
+}
+
+/**
+ * Computes the offset, in bits, by which the result of `instr` differs from the
+ * pointer argument to `instr`, if that offset is a constant. Otherwise, returns
+ * unknown.
+ */
+IntValue getPointerBitOffset(PointerOffsetInstruction instr) {
+ exists(IntValue bitOffset |
+ bitOffset = Ints::mul(Ints::mul(getConstantValue(instr.getRight()), instr.getElementSize()), 8) and
+ (
+ instr instanceof PointerAddInstruction and result = bitOffset
+ or
+ instr instanceof PointerSubInstruction and result = Ints::neg(bitOffset)
+ )
+ )
+}
+
+/**
+ * Holds if any address held in operand `operand` is propagated to the result of `instr`, offset by
+ * the number of bits in `bitOffset`. If the address is propagated, but the offset is not known to
+ * be a constant, then `bitOffset` is `unknown()`.
+ */
+private predicate operandIsPropagated(Operand operand, IntValue bitOffset, Instruction instr) {
+ // Some functions are known to propagate an argument
+ hasAddressFlowThroughCall(operand, instr) and
+ bitOffset = 0
+ or
+ instr = operand.getUse() and
+ (
+ // Converting to a non-virtual base class adds the offset of the base class.
+ exists(ConvertToNonVirtualBaseInstruction convert |
+ convert = instr and
+ bitOffset = Ints::mul(convert.getDerivation().getByteOffset(), 8)
+ )
+ or
+ // Conversion using dynamic_cast results in an unknown offset
+ instr instanceof CheckedConvertOrNullInstruction and
+ bitOffset = Ints::unknown()
+ or
+ // Converting to a derived class subtracts the offset of the base class.
+ exists(ConvertToDerivedInstruction convert |
+ convert = instr and
+ bitOffset = Ints::neg(Ints::mul(convert.getDerivation().getByteOffset(), 8))
+ )
+ or
+ // Converting to a virtual base class adds an unknown offset.
+ instr instanceof ConvertToVirtualBaseInstruction and
+ bitOffset = Ints::unknown()
+ or
+ // Conversion to another pointer type propagates the source address.
+ exists(ConvertInstruction convert, IRType resultType |
+ convert = instr and
+ resultType = convert.getResultIRType() and
+ resultType instanceof IRAddressType and
+ bitOffset = 0
+ )
+ or
+ // Adding an integer to or subtracting an integer from a pointer propagates
+ // the address with an offset.
+ exists(PointerOffsetInstruction ptrOffset |
+ ptrOffset = instr and
+ operand = ptrOffset.getLeftOperand() and
+ bitOffset = getPointerBitOffset(ptrOffset)
+ )
+ or
+ // Computing a field address from a pointer propagates the address plus the
+ // offset of the field.
+ bitOffset = Language::getFieldBitOffset(instr.(FieldAddressInstruction).getField())
+ or
+ // A copy propagates the source value.
+ operand = instr.(CopyInstruction).getSourceValueOperand() and bitOffset = 0
+ )
+}
+
+private predicate operandEscapesNonReturn(Operand operand) {
+ exists(Instruction instr |
+ // The address is propagated to the result of the instruction, and that result itself is returned
+ operandIsPropagated(operand, _, instr) and resultEscapesNonReturn(instr)
+ )
+ or
+ // The operand is used in a function call which returns it, and the return value is then returned
+ exists(CallInstruction ci, Instruction init |
+ isArgumentForParameter(ci, operand, init) and
+ (
+ resultMayReachReturn(init) and
+ resultEscapesNonReturn(ci)
+ or
+ resultEscapesNonReturn(init)
+ )
+ )
+ or
+ isOnlyEscapesViaReturnArgument(operand) and resultEscapesNonReturn(operand.getUse())
+ or
+ operand instanceof PhiInputOperand and
+ resultEscapesNonReturn(operand.getUse())
+ or
+ operandEscapesDomain(operand)
+}
+
+private predicate operandMayReachReturn(Operand operand) {
+ exists(Instruction instr |
+ // The address is propagated to the result of the instruction, and that result itself is returned
+ operandIsPropagated(operand, _, instr) and
+ resultMayReachReturn(instr)
+ )
+ or
+ // The operand is used in a function call which returns it, and the return value is then returned
+ exists(CallInstruction ci, Instruction init |
+ isArgumentForParameter(ci, operand, init) and
+ resultMayReachReturn(init) and
+ resultMayReachReturn(ci)
+ )
+ or
+ // The address is returned
+ operand.getUse() instanceof ReturnValueInstruction
+ or
+ isOnlyEscapesViaReturnArgument(operand) and resultMayReachReturn(operand.getUse())
+ or
+ operand instanceof PhiInputOperand and
+ resultMayReachReturn(operand.getUse())
+}
+
+private predicate operandReturned(Operand operand, IntValue bitOffset) {
+ // The address is propagated to the result of the instruction, and that result itself is returned
+ exists(Instruction instr, IntValue bitOffset1, IntValue bitOffset2 |
+ operandIsPropagated(operand, bitOffset1, instr) and
+ resultReturned(instr, bitOffset2) and
+ bitOffset = Ints::add(bitOffset1, bitOffset2)
+ )
+ or
+ // The operand is used in a function call which returns it, and the return value is then returned
+ exists(CallInstruction ci, Instruction init, IntValue bitOffset1, IntValue bitOffset2 |
+ isArgumentForParameter(ci, operand, init) and
+ resultReturned(init, bitOffset1) and
+ resultReturned(ci, bitOffset2) and
+ bitOffset = Ints::add(bitOffset1, bitOffset2)
+ )
+ or
+ // The address is returned
+ operand.getUse() instanceof ReturnValueInstruction and
+ bitOffset = 0
+ or
+ isOnlyEscapesViaReturnArgument(operand) and
+ resultReturned(operand.getUse(), _) and
+ bitOffset = Ints::unknown()
+}
+
+private predicate isArgumentForParameter(
+ CallInstruction ci, Operand operand, InitializeParameterInstruction init
+) {
+ exists(Language::Function f |
+ ci = operand.getUse() and
+ f = ci.getStaticCallTarget() and
+ (
+ init.getParameter() = f.getParameter(operand.(PositionalArgumentOperand).getIndex())
+ or
+ init.getIRVariable() instanceof IRThisVariable and
+ unique( | | init.getEnclosingFunction()) = f and
+ operand instanceof ThisArgumentOperand
+ ) and
+ not Language::isFunctionVirtual(f) and
+ not f instanceof AliasModels::AliasFunction
+ )
+}
+
+private predicate isOnlyEscapesViaReturnArgument(Operand operand) {
+ exists(AliasModels::AliasFunction f |
+ f = operand.getUse().(CallInstruction).getStaticCallTarget() and
+ (
+ f.parameterEscapesOnlyViaReturn(operand.(PositionalArgumentOperand).getIndex())
+ or
+ f.parameterEscapesOnlyViaReturn(-1) and
+ operand instanceof ThisArgumentOperand
+ )
+ )
+}
+
+private predicate isNeverEscapesArgument(Operand operand) {
+ exists(AliasModels::AliasFunction f |
+ f = operand.getUse().(CallInstruction).getStaticCallTarget() and
+ (
+ f.parameterNeverEscapes(operand.(PositionalArgumentOperand).getIndex())
+ or
+ f.parameterNeverEscapes(-1) and
+ operand instanceof ThisArgumentOperand
+ )
+ )
+}
+
+private predicate resultReturned(Instruction instr, IntValue bitOffset) {
+ operandReturned(instr.getAUse(), bitOffset)
+}
+
+private predicate resultMayReachReturn(Instruction instr) { operandMayReachReturn(instr.getAUse()) }
+
+/**
+ * Holds if any address held in the result of instruction `instr` escapes
+ * outside the domain of the analysis.
+ */
+private predicate resultEscapesNonReturn(Instruction instr) {
+ // The result escapes if it has at least one use that escapes.
+ operandEscapesNonReturn(instr.getAUse())
+ or
+ // The result also escapes if it is not modeled in SSA, because we do not know where it might be
+ // used.
+ not instr.isResultModeled()
+}
+
+/**
+ * Holds if the address of `allocation` escapes outside the domain of the analysis. This can occur
+ * either because the allocation's address is taken within the function and escapes, or because the
+ * allocation is marked as always escaping via `alwaysEscapes()`.
+ */
+predicate allocationEscapes(Configuration::Allocation allocation) {
+ allocation.alwaysEscapes()
+ or
+ exists(IREscapeAnalysisConfiguration config |
+ config.useSoundEscapeAnalysis() and resultEscapesNonReturn(allocation.getABaseInstruction())
+ )
+ or
+ Configuration::phaseNeedsSoundEscapeAnalysis() and
+ resultEscapesNonReturn(allocation.getABaseInstruction())
+}
+
+/**
+ * Equivalent to `operandIsPropagated()`, but includes interprocedural propagation.
+ */
+private predicate operandIsPropagatedIncludingByCall(
+ Operand operand, IntValue bitOffset, Instruction instr
+) {
+ operandIsPropagated(operand, bitOffset, instr)
+ or
+ exists(CallInstruction call, Instruction init |
+ isArgumentForParameter(call, operand, init) and
+ resultReturned(init, bitOffset) and
+ instr = call
+ )
+}
+
+/**
+ * Holds if `addrOperand` is at offset `bitOffset` from the value of instruction `base`. The offset
+ * may be `unknown()`.
+ */
+private predicate hasBaseAndOffset(AddressOperand addrOperand, Instruction base, IntValue bitOffset) {
+ base = addrOperand.getDef() and bitOffset = 0 // Base case
+ or
+ exists(
+ Instruction middle, int previousBitOffset, Operand middleOperand, IntValue additionalBitOffset
+ |
+ // We already have an offset from `middle`.
+ hasBaseAndOffset(addrOperand, middle, previousBitOffset) and
+ // `middle` is propagated from `base`.
+ operandIsPropagatedIncludingByCall(middleOperand, additionalBitOffset, middle) and
+ base = middleOperand.getDef() and
+ bitOffset = Ints::add(previousBitOffset, additionalBitOffset)
+ )
+}
+
+/**
+ * Holds if `addrOperand` is at constant offset `bitOffset` from the value of instruction `base`.
+ * Only holds for the `base` with the longest chain of propagation to `addrOperand`.
+ */
+predicate addressOperandBaseAndConstantOffset(
+ AddressOperand addrOperand, Instruction base, int bitOffset
+) {
+ hasBaseAndOffset(addrOperand, base, bitOffset) and
+ Ints::hasValue(bitOffset) and
+ not exists(Instruction previousBase, int previousBitOffset |
+ hasBaseAndOffset(addrOperand, previousBase, previousBitOffset) and
+ previousBase = base.getAnOperand().getDef() and
+ Ints::hasValue(previousBitOffset)
+ )
+}
+
+/**
+ * Gets the allocation into which `addrOperand` points, if known.
+ */
+Configuration::Allocation getAddressOperandAllocation(AddressOperand addrOperand) {
+ addressOperandAllocationAndOffset(addrOperand, result, _)
+}
+
+/**
+ * Holds if `addrOperand` is at offset `bitOffset` from a base instruction of `allocation`. The
+ * offset may be `unknown()`.
+ */
+predicate addressOperandAllocationAndOffset(
+ AddressOperand addrOperand, Configuration::Allocation allocation, IntValue bitOffset
+) {
+ exists(Instruction base |
+ allocation.getABaseInstruction() = base and
+ hasBaseAndOffset(addrOperand, base, bitOffset) and
+ not exists(Instruction previousBase |
+ hasBaseAndOffset(addrOperand, pragma[only_bind_out](previousBase), _) and
+ previousBase = base.getAnOperand().getDef()
+ )
+ )
+}
+
+/**
+ * Predicates used only for printing annotated IR dumps. These should not be used in production
+ * queries.
+ */
+module Print {
+ string getOperandProperty(Operand operand, string key) {
+ key = "alloc" and
+ result =
+ strictconcat(Configuration::Allocation allocation, IntValue bitOffset |
+ addressOperandAllocationAndOffset(operand, allocation, bitOffset)
+ |
+ allocation.toString() + Ints::getBitOffsetString(bitOffset), ", "
+ )
+ or
+ key = "prop" and
+ result =
+ strictconcat(Instruction destInstr, IntValue bitOffset, string value |
+ operandIsPropagatedIncludingByCall(operand, bitOffset, destInstr) and
+ if destInstr = operand.getUse()
+ then value = "@" + Ints::getBitOffsetString(bitOffset) + "->result"
+ else value = "@" + Ints::getBitOffsetString(bitOffset) + "->" + destInstr.getResultId()
+ |
+ value, ", "
+ )
+ }
+
+ string getInstructionProperty(Instruction instr, string key) {
+ key = "prop" and
+ result =
+ strictconcat(IntValue bitOffset, Operand sourceOperand, string value |
+ operandIsPropagatedIncludingByCall(sourceOperand, bitOffset, instr) and
+ if instr = sourceOperand.getUse()
+ then value = sourceOperand.getDumpId() + Ints::getBitOffsetString(bitOffset) + "->@"
+ else
+ value =
+ sourceOperand.getUse().getResultId() + "." + sourceOperand.getDumpId() +
+ Ints::getBitOffsetString(bitOffset) + "->@"
+ |
+ value, ", "
+ )
+ }
+}
diff --git a/repo-tests/codeql/cpp/ql/lib/semmle/code/cpp/ir/implementation/unaliased_ssa/internal/AliasAnalysisImports.qll b/repo-tests/codeql/cpp/ql/lib/semmle/code/cpp/ir/implementation/unaliased_ssa/internal/AliasAnalysisImports.qll
new file mode 100644
index 00000000000..c4aeaf93cce
--- /dev/null
+++ b/repo-tests/codeql/cpp/ql/lib/semmle/code/cpp/ir/implementation/unaliased_ssa/internal/AliasAnalysisImports.qll
@@ -0,0 +1,3 @@
+import semmle.code.cpp.ir.implementation.IRConfiguration
+import semmle.code.cpp.ir.internal.IntegerConstant as Ints
+import semmle.code.cpp.models.interfaces.Alias as AliasModels
diff --git a/repo-tests/codeql/cpp/ql/lib/semmle/code/cpp/ir/implementation/unaliased_ssa/internal/AliasAnalysisInternal.qll b/repo-tests/codeql/cpp/ql/lib/semmle/code/cpp/ir/implementation/unaliased_ssa/internal/AliasAnalysisInternal.qll
new file mode 100644
index 00000000000..08a563abc73
--- /dev/null
+++ b/repo-tests/codeql/cpp/ql/lib/semmle/code/cpp/ir/implementation/unaliased_ssa/internal/AliasAnalysisInternal.qll
@@ -0,0 +1,3 @@
+import semmle.code.cpp.ir.internal.IRCppLanguage as Language
+import semmle.code.cpp.ir.implementation.raw.IR as InputIR
+import AliasConfiguration as Configuration
diff --git a/repo-tests/codeql/cpp/ql/lib/semmle/code/cpp/ir/implementation/unaliased_ssa/internal/AliasConfiguration.qll b/repo-tests/codeql/cpp/ql/lib/semmle/code/cpp/ir/implementation/unaliased_ssa/internal/AliasConfiguration.qll
new file mode 100644
index 00000000000..dbdd3c14c85
--- /dev/null
+++ b/repo-tests/codeql/cpp/ql/lib/semmle/code/cpp/ir/implementation/unaliased_ssa/internal/AliasConfiguration.qll
@@ -0,0 +1,18 @@
+private import AliasConfigurationImports
+
+/**
+ * A memory allocation that can be tracked by the SimpleSSA alias analysis.
+ * All automatic variables are tracked.
+ */
+class Allocation extends IRAutomaticVariable {
+ VariableAddressInstruction getABaseInstruction() { result.getIRVariable() = this }
+
+ final string getAllocationString() { result = toString() }
+
+ predicate alwaysEscapes() {
+ // An automatic variable only escapes if its address is taken and escapes.
+ none()
+ }
+}
+
+predicate phaseNeedsSoundEscapeAnalysis() { any() }
diff --git a/repo-tests/codeql/cpp/ql/lib/semmle/code/cpp/ir/implementation/unaliased_ssa/internal/AliasConfigurationImports.qll b/repo-tests/codeql/cpp/ql/lib/semmle/code/cpp/ir/implementation/unaliased_ssa/internal/AliasConfigurationImports.qll
new file mode 100644
index 00000000000..07cbc6308b7
--- /dev/null
+++ b/repo-tests/codeql/cpp/ql/lib/semmle/code/cpp/ir/implementation/unaliased_ssa/internal/AliasConfigurationImports.qll
@@ -0,0 +1 @@
+import semmle.code.cpp.ir.implementation.raw.IR
diff --git a/repo-tests/codeql/cpp/ql/lib/semmle/code/cpp/ir/implementation/unaliased_ssa/internal/IRBlockImports.qll b/repo-tests/codeql/cpp/ql/lib/semmle/code/cpp/ir/implementation/unaliased_ssa/internal/IRBlockImports.qll
new file mode 100644
index 00000000000..d1b46ed35c8
--- /dev/null
+++ b/repo-tests/codeql/cpp/ql/lib/semmle/code/cpp/ir/implementation/unaliased_ssa/internal/IRBlockImports.qll
@@ -0,0 +1 @@
+import semmle.code.cpp.ir.implementation.EdgeKind as EdgeKind
diff --git a/repo-tests/codeql/cpp/ql/lib/semmle/code/cpp/ir/implementation/unaliased_ssa/internal/IRFunctionImports.qll b/repo-tests/codeql/cpp/ql/lib/semmle/code/cpp/ir/implementation/unaliased_ssa/internal/IRFunctionImports.qll
new file mode 100644
index 00000000000..8ec63b7c1cb
--- /dev/null
+++ b/repo-tests/codeql/cpp/ql/lib/semmle/code/cpp/ir/implementation/unaliased_ssa/internal/IRFunctionImports.qll
@@ -0,0 +1 @@
+import semmle.code.cpp.ir.implementation.internal.IRFunctionBase as IRFunctionBase
diff --git a/repo-tests/codeql/cpp/ql/lib/semmle/code/cpp/ir/implementation/unaliased_ssa/internal/IRImports.qll b/repo-tests/codeql/cpp/ql/lib/semmle/code/cpp/ir/implementation/unaliased_ssa/internal/IRImports.qll
new file mode 100644
index 00000000000..42d6e7db693
--- /dev/null
+++ b/repo-tests/codeql/cpp/ql/lib/semmle/code/cpp/ir/implementation/unaliased_ssa/internal/IRImports.qll
@@ -0,0 +1,3 @@
+import semmle.code.cpp.ir.implementation.EdgeKind as EdgeKind
+import semmle.code.cpp.ir.implementation.IRType as IRType
+import semmle.code.cpp.ir.implementation.MemoryAccessKind as MemoryAccessKind
diff --git a/repo-tests/codeql/cpp/ql/lib/semmle/code/cpp/ir/implementation/unaliased_ssa/internal/IRInternal.qll b/repo-tests/codeql/cpp/ql/lib/semmle/code/cpp/ir/implementation/unaliased_ssa/internal/IRInternal.qll
new file mode 100644
index 00000000000..3a7a08accc0
--- /dev/null
+++ b/repo-tests/codeql/cpp/ql/lib/semmle/code/cpp/ir/implementation/unaliased_ssa/internal/IRInternal.qll
@@ -0,0 +1,4 @@
+import semmle.code.cpp.ir.internal.IRCppLanguage as Language
+import SSAConstruction as Construction
+import semmle.code.cpp.ir.implementation.IRConfiguration as IRConfiguration
+import semmle.code.cpp.ir.implementation.raw.internal.IRConstruction::Raw as Raw
diff --git a/repo-tests/codeql/cpp/ql/lib/semmle/code/cpp/ir/implementation/unaliased_ssa/internal/IRVariableImports.qll b/repo-tests/codeql/cpp/ql/lib/semmle/code/cpp/ir/implementation/unaliased_ssa/internal/IRVariableImports.qll
new file mode 100644
index 00000000000..8c60565defc
--- /dev/null
+++ b/repo-tests/codeql/cpp/ql/lib/semmle/code/cpp/ir/implementation/unaliased_ssa/internal/IRVariableImports.qll
@@ -0,0 +1,5 @@
+import semmle.code.cpp.ir.implementation.IRType as IRType
+import semmle.code.cpp.ir.implementation.TempVariableTag as TempVariableTag
+import semmle.code.cpp.ir.internal.IRUtilities as IRUtilities
+import semmle.code.cpp.ir.internal.TempVariableTag as TTempVariableTag
+import semmle.code.cpp.ir.implementation.internal.TIRVariable as TIRVariable
diff --git a/repo-tests/codeql/cpp/ql/lib/semmle/code/cpp/ir/implementation/unaliased_ssa/internal/InstructionImports.qll b/repo-tests/codeql/cpp/ql/lib/semmle/code/cpp/ir/implementation/unaliased_ssa/internal/InstructionImports.qll
new file mode 100644
index 00000000000..946fd770e94
--- /dev/null
+++ b/repo-tests/codeql/cpp/ql/lib/semmle/code/cpp/ir/implementation/unaliased_ssa/internal/InstructionImports.qll
@@ -0,0 +1,6 @@
+import semmle.code.cpp.ir.implementation.EdgeKind as EdgeKind
+import semmle.code.cpp.ir.implementation.IRType as IRType
+import semmle.code.cpp.ir.implementation.MemoryAccessKind as MemoryAccessKind
+import semmle.code.cpp.ir.implementation.Opcode as Opcode
+import semmle.code.cpp.ir.implementation.internal.OperandTag as OperandTag
+import semmle.code.cpp.ir.internal.Overlap as Overlap
diff --git a/repo-tests/codeql/cpp/ql/lib/semmle/code/cpp/ir/implementation/unaliased_ssa/internal/OperandImports.qll b/repo-tests/codeql/cpp/ql/lib/semmle/code/cpp/ir/implementation/unaliased_ssa/internal/OperandImports.qll
new file mode 100644
index 00000000000..d0e013d1fba
--- /dev/null
+++ b/repo-tests/codeql/cpp/ql/lib/semmle/code/cpp/ir/implementation/unaliased_ssa/internal/OperandImports.qll
@@ -0,0 +1,5 @@
+import semmle.code.cpp.ir.implementation.MemoryAccessKind as MemoryAccessKind
+import semmle.code.cpp.ir.implementation.IRType as IRType
+import semmle.code.cpp.ir.internal.Overlap as Overlap
+import semmle.code.cpp.ir.implementation.internal.OperandTag as OperandTag
+import semmle.code.cpp.ir.implementation.internal.TOperand as TOperand
diff --git a/repo-tests/codeql/cpp/ql/lib/semmle/code/cpp/ir/implementation/unaliased_ssa/internal/OperandInternal.qll b/repo-tests/codeql/cpp/ql/lib/semmle/code/cpp/ir/implementation/unaliased_ssa/internal/OperandInternal.qll
new file mode 100644
index 00000000000..80e06a381a1
--- /dev/null
+++ b/repo-tests/codeql/cpp/ql/lib/semmle/code/cpp/ir/implementation/unaliased_ssa/internal/OperandInternal.qll
@@ -0,0 +1,2 @@
+private import semmle.code.cpp.ir.implementation.internal.TOperand
+import UnaliasedSSAOperands
diff --git a/repo-tests/codeql/cpp/ql/lib/semmle/code/cpp/ir/implementation/unaliased_ssa/internal/PrintAliasAnalysis.qll b/repo-tests/codeql/cpp/ql/lib/semmle/code/cpp/ir/implementation/unaliased_ssa/internal/PrintAliasAnalysis.qll
new file mode 100644
index 00000000000..262088245e8
--- /dev/null
+++ b/repo-tests/codeql/cpp/ql/lib/semmle/code/cpp/ir/implementation/unaliased_ssa/internal/PrintAliasAnalysis.qll
@@ -0,0 +1,19 @@
+/**
+ * Include this module to annotate IR dumps with information computed by `AliasAnalysis.qll`.
+ */
+
+private import AliasAnalysisInternal
+private import InputIR
+private import AliasAnalysisImports
+private import AliasAnalysis
+private import semmle.code.cpp.ir.internal.IntegerConstant
+
+private class AliasPropertyProvider extends IRPropertyProvider {
+ override string getOperandProperty(Operand operand, string key) {
+ result = Print::getOperandProperty(operand, key)
+ }
+
+ override string getInstructionProperty(Instruction instr, string key) {
+ result = Print::getInstructionProperty(instr, key)
+ }
+}
diff --git a/repo-tests/codeql/cpp/ql/lib/semmle/code/cpp/ir/implementation/unaliased_ssa/internal/PrintIRImports.qll b/repo-tests/codeql/cpp/ql/lib/semmle/code/cpp/ir/implementation/unaliased_ssa/internal/PrintIRImports.qll
new file mode 100644
index 00000000000..46254a6e3f2
--- /dev/null
+++ b/repo-tests/codeql/cpp/ql/lib/semmle/code/cpp/ir/implementation/unaliased_ssa/internal/PrintIRImports.qll
@@ -0,0 +1 @@
+import semmle.code.cpp.ir.IRConfiguration as IRConfiguration
diff --git a/repo-tests/codeql/cpp/ql/lib/semmle/code/cpp/ir/implementation/unaliased_ssa/internal/PrintSSA.qll b/repo-tests/codeql/cpp/ql/lib/semmle/code/cpp/ir/implementation/unaliased_ssa/internal/PrintSSA.qll
new file mode 100644
index 00000000000..72bb239c153
--- /dev/null
+++ b/repo-tests/codeql/cpp/ql/lib/semmle/code/cpp/ir/implementation/unaliased_ssa/internal/PrintSSA.qll
@@ -0,0 +1,157 @@
+private import SSAConstructionInternal
+private import OldIR
+private import Alias
+private import SSAConstruction
+private import DebugSSA
+
+bindingset[offset]
+private string getKeySuffixForOffset(int offset) {
+ offset >= 0 and
+ if offset % 2 = 0 then result = "" else result = "_Chi"
+}
+
+bindingset[offset]
+private int getIndexForOffset(int offset) { offset >= 0 and result = offset / 2 }
+
+/**
+ * Property provide that dumps the memory access of each result. Useful for debugging SSA
+ * construction.
+ */
+class PropertyProvider extends IRPropertyProvider {
+ override string getInstructionProperty(Instruction instruction, string key) {
+ key = "ResultMemoryLocation" and
+ result =
+ strictconcat(MemoryLocation loc |
+ loc = getResultMemoryLocation(instruction)
+ |
+ loc.toString(), ","
+ )
+ or
+ key = "ResultVirtualVariable" and
+ result =
+ strictconcat(MemoryLocation loc |
+ loc = getResultMemoryLocation(instruction)
+ |
+ loc.getVirtualVariable().toString(), ","
+ )
+ or
+ key = "OperandMemoryLocation" and
+ result =
+ strictconcat(MemoryLocation loc |
+ loc = getOperandMemoryLocation(instruction.getAnOperand())
+ |
+ loc.toString(), ","
+ )
+ or
+ key = "OperandVirtualVariable" and
+ result =
+ strictconcat(MemoryLocation loc |
+ loc = getOperandMemoryLocation(instruction.getAnOperand())
+ |
+ loc.getVirtualVariable().toString(), ","
+ )
+ or
+ exists(MemoryLocation useLocation, IRBlock defBlock, int defRank, int defOffset |
+ hasDefinitionAtRank(useLocation, _, defBlock, defRank, defOffset) and
+ defBlock.getInstruction(getIndexForOffset(defOffset)) = instruction and
+ key = "DefinitionRank" + getKeySuffixForOffset(defOffset) + "[" + useLocation.toString() + "]" and
+ result = defRank.toString()
+ )
+ or
+ exists(MemoryLocation useLocation, IRBlock useBlock, int useRank |
+ hasUseAtRank(useLocation, useBlock, useRank, instruction) and
+ key = "UseRank[" + useLocation.toString() + "]" and
+ result = useRank.toString()
+ )
+ or
+ exists(MemoryLocation useLocation, IRBlock defBlock, int defRank, int defOffset |
+ hasDefinitionAtRank(useLocation, _, defBlock, defRank, defOffset) and
+ defBlock.getInstruction(getIndexForOffset(defOffset)) = instruction and
+ key =
+ "DefinitionReachesUse" + getKeySuffixForOffset(defOffset) + "[" + useLocation.toString() +
+ "]" and
+ result =
+ strictconcat(IRBlock useBlock, int useRank, int useIndex |
+ exists(Instruction useInstruction |
+ hasUseAtRank(useLocation, useBlock, useRank, useInstruction) and
+ useBlock.getInstruction(useIndex) = useInstruction and
+ definitionReachesUse(useLocation, defBlock, defRank, useBlock, useRank)
+ )
+ |
+ useBlock.getDisplayIndex().toString() + "_" + useIndex, ", "
+ order by
+ useBlock.getDisplayIndex(), useIndex
+ )
+ )
+ }
+
+ override string getBlockProperty(IRBlock block, string key) {
+ exists(MemoryLocation useLocation, int defRank, int defIndex |
+ hasDefinitionAtRank(useLocation, _, block, defRank, defIndex) and
+ defIndex = -1 and
+ key = "DefinitionRank(Phi)[" + useLocation.toString() + "]" and
+ result = defRank.toString()
+ )
+ or
+ exists(MemoryLocation useLocation, MemoryLocation defLocation, int defRank, int defIndex |
+ hasDefinitionAtRank(useLocation, defLocation, block, defRank, defIndex) and
+ defIndex = -1 and
+ key = "DefinitionReachesUse(Phi)[" + useLocation.toString() + "]" and
+ result =
+ strictconcat(IRBlock useBlock, int useRank, int useIndex |
+ exists(Instruction useInstruction |
+ hasUseAtRank(useLocation, useBlock, useRank, useInstruction) and
+ useBlock.getInstruction(useIndex) = useInstruction and
+ definitionReachesUse(useLocation, block, defRank, useBlock, useRank) and
+ exists(getOverlap(defLocation, useLocation))
+ )
+ |
+ useBlock.getDisplayIndex().toString() + "_" + useIndex, ", "
+ order by
+ useBlock.getDisplayIndex(), useIndex
+ )
+ )
+ or
+ exists(
+ MemoryLocation useLocation, IRBlock predBlock, IRBlock defBlock, int defIndex, Overlap overlap
+ |
+ hasPhiOperandDefinition(_, useLocation, block, predBlock, defBlock, defIndex) and
+ key =
+ "PhiUse[" + useLocation.toString() + " from " + predBlock.getDisplayIndex().toString() + "]" and
+ result =
+ defBlock.getDisplayIndex().toString() + "_" + defIndex + " (" + overlap.toString() + ")"
+ )
+ or
+ key = "LiveOnEntry" and
+ result =
+ strictconcat(MemoryLocation useLocation |
+ locationLiveOnEntryToBlock(useLocation, block)
+ |
+ useLocation.toString(), ", " order by useLocation.toString()
+ )
+ or
+ key = "LiveOnExit" and
+ result =
+ strictconcat(MemoryLocation useLocation |
+ locationLiveOnExitFromBlock(useLocation, block)
+ |
+ useLocation.toString(), ", " order by useLocation.toString()
+ )
+ or
+ key = "DefsLiveOnEntry" and
+ result =
+ strictconcat(MemoryLocation defLocation |
+ definitionLiveOnEntryToBlock(defLocation, block)
+ |
+ defLocation.toString(), ", " order by defLocation.toString()
+ )
+ or
+ key = "DefsLiveOnExit" and
+ result =
+ strictconcat(MemoryLocation defLocation |
+ definitionLiveOnExitFromBlock(defLocation, block)
+ |
+ defLocation.toString(), ", " order by defLocation.toString()
+ )
+ }
+}
diff --git a/repo-tests/codeql/cpp/ql/lib/semmle/code/cpp/ir/implementation/unaliased_ssa/internal/SSAConsistency.ql b/repo-tests/codeql/cpp/ql/lib/semmle/code/cpp/ir/implementation/unaliased_ssa/internal/SSAConsistency.ql
new file mode 100644
index 00000000000..25f9d5d454a
--- /dev/null
+++ b/repo-tests/codeql/cpp/ql/lib/semmle/code/cpp/ir/implementation/unaliased_ssa/internal/SSAConsistency.ql
@@ -0,0 +1,8 @@
+/**
+ * @name Unaliased SSA Consistency Check
+ * @description Performs consistency checks on the SSA construction. This query should have no results.
+ * @kind table
+ * @id cpp/unaliased-ssa-consistency-check
+ */
+
+import SSAConsistency
diff --git a/repo-tests/codeql/cpp/ql/lib/semmle/code/cpp/ir/implementation/unaliased_ssa/internal/SSAConsistency.qll b/repo-tests/codeql/cpp/ql/lib/semmle/code/cpp/ir/implementation/unaliased_ssa/internal/SSAConsistency.qll
new file mode 100644
index 00000000000..5686bb439eb
--- /dev/null
+++ b/repo-tests/codeql/cpp/ql/lib/semmle/code/cpp/ir/implementation/unaliased_ssa/internal/SSAConsistency.qll
@@ -0,0 +1,2 @@
+private import SSAConstruction as SSA
+import SSA::SSAConsistency
diff --git a/repo-tests/codeql/cpp/ql/lib/semmle/code/cpp/ir/implementation/unaliased_ssa/internal/SSAConstruction.qll b/repo-tests/codeql/cpp/ql/lib/semmle/code/cpp/ir/implementation/unaliased_ssa/internal/SSAConstruction.qll
new file mode 100644
index 00000000000..5092e921cb3
--- /dev/null
+++ b/repo-tests/codeql/cpp/ql/lib/semmle/code/cpp/ir/implementation/unaliased_ssa/internal/SSAConstruction.qll
@@ -0,0 +1,1131 @@
+import SSAConstructionInternal
+private import SSAConstructionImports as Imports
+private import Imports::Opcode
+private import Imports::OperandTag
+private import Imports::Overlap
+private import Imports::TInstruction
+private import Imports::RawIR as RawIR
+private import SSAInstructions
+private import SSAOperands
+private import NewIR
+
+private class OldBlock = Reachability::ReachableBlock;
+
+private class OldInstruction = Reachability::ReachableInstruction;
+
+import Cached
+
+cached
+private module Cached {
+ cached
+ predicate hasPhiInstructionCached(
+ OldInstruction blockStartInstr, Alias::MemoryLocation defLocation
+ ) {
+ exists(OldBlock oldBlock |
+ definitionHasPhiNode(defLocation, oldBlock) and
+ blockStartInstr = oldBlock.getFirstInstruction()
+ )
+ }
+
+ cached
+ predicate hasChiInstructionCached(OldInstruction primaryInstruction) {
+ hasChiNode(_, primaryInstruction)
+ }
+
+ cached
+ predicate hasUnreachedInstructionCached(IRFunction irFunc) {
+ exists(OldInstruction oldInstruction |
+ irFunc = oldInstruction.getEnclosingIRFunction() and
+ Reachability::isInfeasibleInstructionSuccessor(oldInstruction, _)
+ )
+ }
+
+ class TStageInstruction =
+ TRawInstruction or TPhiInstruction or TChiInstruction or TUnreachedInstruction;
+
+ /**
+ * If `oldInstruction` is a `Phi` instruction that has exactly one reachable predecessor block,
+ * this predicate returns the `PhiInputOperand` corresponding to that predecessor block.
+ * Otherwise, this predicate does not hold.
+ */
+ private OldIR::PhiInputOperand getDegeneratePhiOperand(OldInstruction oldInstruction) {
+ result =
+ unique(OldIR::PhiInputOperand operand |
+ operand = oldInstruction.(OldIR::PhiInstruction).getAnInputOperand() and
+ operand.getPredecessorBlock() instanceof OldBlock
+ )
+ }
+
+ cached
+ predicate hasInstruction(TStageInstruction instr) {
+ instr instanceof TRawInstruction and instr instanceof OldInstruction
+ or
+ instr = phiInstruction(_, _)
+ or
+ instr = reusedPhiInstruction(_) and
+ // Check that the phi instruction is *not* degenerate, but we can't use
+ // getDegeneratePhiOperand in the first stage with phi instyructions
+ not exists(
+ unique(OldIR::PhiInputOperand operand |
+ operand = instr.(OldIR::PhiInstruction).getAnInputOperand() and
+ operand.getPredecessorBlock() instanceof OldBlock
+ )
+ )
+ or
+ instr instanceof TChiInstruction
+ or
+ instr instanceof TUnreachedInstruction
+ }
+
+ cached
+ IRBlock getNewBlock(OldBlock oldBlock) {
+ exists(Instruction newEnd, OldIR::Instruction oldEnd |
+ (
+ result.getLastInstruction() = newEnd and
+ not newEnd instanceof ChiInstruction
+ or
+ newEnd = result.getLastInstruction().(ChiInstruction).getAPredecessor() // does this work?
+ ) and
+ (
+ oldBlock.getLastInstruction() = oldEnd and
+ not oldEnd instanceof OldIR::ChiInstruction
+ or
+ oldEnd = oldBlock.getLastInstruction().(OldIR::ChiInstruction).getAPredecessor() // does this work?
+ ) and
+ oldEnd = getNewInstruction(newEnd)
+ )
+ }
+
+ /**
+ * Gets the block from the old IR that corresponds to `newBlock`.
+ */
+ private OldBlock getOldBlock(IRBlock newBlock) { getNewBlock(result) = newBlock }
+
+ /**
+ * Holds if this iteration of SSA can model the def/use information for the result of
+ * `oldInstruction`, either because alias analysis has determined a memory location for that
+ * result, or because a previous iteration of the IR already computed that def/use information
+ * completely.
+ */
+ private predicate canModelResultForOldInstruction(OldInstruction oldInstruction) {
+ // We're modeling the result's memory location ourselves.
+ exists(Alias::getResultMemoryLocation(oldInstruction))
+ or
+ // This result was already modeled by a previous iteration of SSA.
+ Alias::canReuseSSAForOldResult(oldInstruction)
+ }
+
+ cached
+ predicate hasModeledMemoryResult(Instruction instruction) {
+ canModelResultForOldInstruction(getOldInstruction(instruction)) or
+ instruction instanceof PhiInstruction or // Phis always have modeled results
+ instruction instanceof ChiInstruction // Chis always have modeled results
+ }
+
+ cached
+ predicate hasConflatedMemoryResult(Instruction instruction) {
+ instruction instanceof AliasedDefinitionInstruction
+ or
+ instruction.getOpcode() instanceof Opcode::InitializeNonLocal
+ or
+ // Chi instructions track virtual variables, and therefore a chi instruction is
+ // conflated if it's associated with the aliased virtual variable.
+ exists(OldInstruction oldInstruction | instruction = getChi(oldInstruction) |
+ Alias::getResultMemoryLocation(oldInstruction).getVirtualVariable() instanceof
+ Alias::AliasedVirtualVariable
+ )
+ or
+ // Phi instructions track locations, and therefore a phi instruction is
+ // conflated if it's associated with a conflated location.
+ exists(Alias::MemoryLocation location |
+ instruction = getPhi(_, location) and
+ not exists(location.getAllocation())
+ )
+ }
+
+ cached
+ Instruction getRegisterOperandDefinition(Instruction instruction, RegisterOperandTag tag) {
+ exists(OldInstruction oldInstruction, OldIR::RegisterOperand oldOperand |
+ oldInstruction = getOldInstruction(instruction) and
+ oldOperand = oldInstruction.getAnOperand() and
+ tag = oldOperand.getOperandTag() and
+ result = getNewInstruction(oldOperand.getAnyDef())
+ )
+ }
+
+ pragma[noopt]
+ private predicate hasMemoryOperandDefinition(
+ OldInstruction oldInstruction, OldIR::NonPhiMemoryOperand oldOperand, Overlap overlap,
+ Instruction instr
+ ) {
+ oldOperand = oldInstruction.getAnOperand() and
+ oldOperand instanceof OldIR::NonPhiMemoryOperand and
+ exists(
+ OldBlock useBlock, int useRank, Alias::MemoryLocation useLocation,
+ Alias::MemoryLocation defLocation, OldBlock defBlock, int defRank, int defOffset,
+ Alias::MemoryLocation actualDefLocation
+ |
+ useLocation = Alias::getOperandMemoryLocation(oldOperand) and
+ hasUseAtRank(useLocation, useBlock, useRank, oldInstruction) and
+ definitionReachesUse(useLocation, defBlock, defRank, useBlock, useRank) and
+ hasDefinitionAtRank(useLocation, defLocation, defBlock, defRank, defOffset) and
+ instr = getDefinitionOrChiInstruction(defBlock, defOffset, defLocation, actualDefLocation) and
+ overlap = Alias::getOverlap(actualDefLocation, useLocation)
+ )
+ }
+
+ /**
+ * Gets the new definition instruction for `oldOperand` based on `oldOperand`'s definition in the
+ * old IR. Usually, this will just get the old definition of `oldOperand` and map it to the
+ * corresponding new instruction. However, if the old definition of `oldOperand` is a `Phi`
+ * instruction that is now degenerate due all but one of its predecessor branches being
+ * unreachable, this predicate will recurse through any degenerate `Phi` instructions to find the
+ * true definition.
+ */
+ private Instruction getNewDefinitionFromOldSSA(OldIR::MemoryOperand oldOperand, Overlap overlap) {
+ exists(Overlap originalOverlap |
+ originalOverlap = oldOperand.getDefinitionOverlap() and
+ (
+ result = getNewInstruction(oldOperand.getAnyDef()) and
+ overlap = originalOverlap
+ or
+ exists(OldIR::PhiInputOperand phiOperand, Overlap phiOperandOverlap |
+ phiOperand = getDegeneratePhiOperand(oldOperand.getAnyDef()) and
+ result = getNewDefinitionFromOldSSA(phiOperand, phiOperandOverlap) and
+ overlap =
+ combineOverlap(pragma[only_bind_out](phiOperandOverlap),
+ pragma[only_bind_out](originalOverlap))
+ )
+ )
+ )
+ }
+
+ cached
+ private Instruction getMemoryOperandDefinition0(
+ Instruction instruction, MemoryOperandTag tag, Overlap overlap
+ ) {
+ exists(OldInstruction oldInstruction, OldIR::NonPhiMemoryOperand oldOperand |
+ oldInstruction = getOldInstruction(instruction) and
+ oldOperand = oldInstruction.getAnOperand() and
+ tag = oldOperand.getOperandTag() and
+ hasMemoryOperandDefinition(oldInstruction, oldOperand, overlap, result)
+ )
+ or
+ instruction = getChi(getOldInstruction(result)) and
+ tag instanceof ChiPartialOperandTag and
+ overlap instanceof MustExactlyOverlap
+ or
+ tag instanceof ChiTotalOperandTag and
+ result = getChiInstructionTotalOperand(instruction) and
+ overlap instanceof MustExactlyOverlap
+ }
+
+ cached
+ Instruction getMemoryOperandDefinition(
+ Instruction instruction, MemoryOperandTag tag, Overlap overlap
+ ) {
+ // getMemoryOperandDefinition0 currently has a bug where it can match with multiple overlaps.
+ // This predicate ensures that the chosen overlap is the most conservative if there's any doubt.
+ result = getMemoryOperandDefinition0(instruction, tag, overlap) and
+ not (
+ overlap instanceof MustExactlyOverlap and
+ exists(MustTotallyOverlap o | exists(getMemoryOperandDefinition0(instruction, tag, o)))
+ )
+ or
+ exists(OldIR::NonPhiMemoryOperand oldOperand |
+ result = getNewDefinitionFromOldSSA(oldOperand, overlap) and
+ oldOperand.getUse() = instruction and
+ tag = oldOperand.getOperandTag()
+ )
+ }
+
+ /**
+ * Holds if the partial operand of this `ChiInstruction` updates the bit range
+ * `[startBitOffset, endBitOffset)` of the total operand.
+ */
+ cached
+ predicate getIntervalUpdatedByChi(ChiInstruction chi, int startBitOffset, int endBitOffset) {
+ exists(Alias::MemoryLocation location, OldInstruction oldInstruction |
+ oldInstruction = getOldInstruction(chi.getPartial()) and
+ location = Alias::getResultMemoryLocation(oldInstruction) and
+ startBitOffset = Alias::getStartBitOffset(location) and
+ endBitOffset = Alias::getEndBitOffset(location)
+ )
+ }
+
+ /**
+ * Holds if `operand` totally overlaps with its definition and consumes the bit range
+ * `[startBitOffset, endBitOffset)`.
+ */
+ cached
+ predicate getUsedInterval(NonPhiMemoryOperand operand, int startBitOffset, int endBitOffset) {
+ exists(Alias::MemoryLocation location, OldIR::NonPhiMemoryOperand oldOperand |
+ oldOperand = operand.getUse().(OldInstruction).getAnOperand() and
+ location = Alias::getOperandMemoryLocation(oldOperand) and
+ startBitOffset = Alias::getStartBitOffset(location) and
+ endBitOffset = Alias::getEndBitOffset(location)
+ )
+ }
+
+ /**
+ * Holds if the `ChiPartialOperand` only partially overlaps with the `ChiTotalOperand`.
+ * This means that the `ChiPartialOperand` will not override the entire memory associated
+ * with the `ChiTotalOperand`.
+ */
+ cached
+ predicate chiOnlyPartiallyUpdatesLocation(ChiInstruction chi) {
+ exists(Alias::MemoryLocation location, OldInstruction oldInstruction |
+ oldInstruction = getOldInstruction(chi.getPartial()) and
+ location = Alias::getResultMemoryLocation(oldInstruction)
+ |
+ Alias::getStartBitOffset(location) != 0 or
+ Alias::getEndBitOffset(location) != 8 * location.getType().getByteSize()
+ )
+ }
+
+ /**
+ * Holds if `instr` is part of a cycle in the operand graph that doesn't go
+ * through a phi instruction and therefore should be impossible.
+ *
+ * For performance reasons, this predicate is not implemented (never holds)
+ * for the SSA stages of the IR.
+ */
+ cached
+ predicate isInCycle(Instruction instr) { none() }
+
+ cached
+ Language::LanguageType getInstructionOperandType(Instruction instr, TypedOperandTag tag) {
+ exists(OldInstruction oldInstruction, OldIR::TypedOperand oldOperand |
+ oldInstruction = getOldInstruction(instr) and
+ oldOperand = oldInstruction.getAnOperand() and
+ tag = oldOperand.getOperandTag() and
+ result = oldOperand.getLanguageType()
+ )
+ }
+
+ /**
+ * Gets the new definition instruction for the operand of `instr` that flows from the block
+ * `newPredecessorBlock`, based on that operand's definition in the old IR.
+ */
+ private Instruction getNewPhiOperandDefinitionFromOldSSA(
+ Instruction instr, IRBlock newPredecessorBlock, Overlap overlap
+ ) {
+ exists(OldIR::PhiInstruction oldPhi, OldIR::PhiInputOperand oldOperand |
+ oldPhi = getOldInstruction(instr) and
+ oldOperand = oldPhi.getInputOperand(getOldBlock(newPredecessorBlock)) and
+ result = getNewDefinitionFromOldSSA(oldOperand, overlap)
+ )
+ }
+
+ pragma[noopt]
+ cached
+ Instruction getPhiOperandDefinition(
+ Instruction instr, IRBlock newPredecessorBlock, Overlap overlap
+ ) {
+ exists(
+ Alias::MemoryLocation defLocation, Alias::MemoryLocation useLocation, OldBlock phiBlock,
+ OldBlock predBlock, OldBlock defBlock, int defOffset, Alias::MemoryLocation actualDefLocation
+ |
+ hasPhiOperandDefinition(defLocation, useLocation, phiBlock, predBlock, defBlock, defOffset) and
+ instr = getPhi(phiBlock, useLocation) and
+ newPredecessorBlock = getNewBlock(predBlock) and
+ result = getDefinitionOrChiInstruction(defBlock, defOffset, defLocation, actualDefLocation) and
+ overlap = Alias::getOverlap(actualDefLocation, useLocation)
+ )
+ or
+ result = getNewPhiOperandDefinitionFromOldSSA(instr, newPredecessorBlock, overlap)
+ }
+
+ cached
+ Instruction getChiInstructionTotalOperand(ChiInstruction chiInstr) {
+ exists(
+ Alias::VirtualVariable vvar, OldInstruction oldInstr, Alias::MemoryLocation defLocation,
+ OldBlock defBlock, int defRank, int defOffset, OldBlock useBlock, int useRank
+ |
+ chiInstr = getChi(oldInstr) and
+ vvar = Alias::getResultMemoryLocation(oldInstr).getVirtualVariable() and
+ hasDefinitionAtRank(vvar, defLocation, defBlock, defRank, defOffset) and
+ hasUseAtRank(vvar, useBlock, useRank, oldInstr) and
+ definitionReachesUse(vvar, defBlock, defRank, useBlock, useRank) and
+ result = getDefinitionOrChiInstruction(defBlock, defOffset, vvar, _)
+ )
+ }
+
+ cached
+ Instruction getPhiInstructionBlockStart(PhiInstruction instr) {
+ exists(OldBlock oldBlock |
+ (
+ instr = getPhi(oldBlock, _)
+ or
+ // Any `Phi` that we propagated from the previous iteration stays in the same block.
+ getOldInstruction(instr).getBlock() = oldBlock
+ ) and
+ result = getNewInstruction(oldBlock.getFirstInstruction())
+ )
+ }
+
+ /*
+ * This adds Chi nodes to the instruction successor relation; if an instruction has a Chi node,
+ * that node is its successor in the new successor relation, and the Chi node's successors are
+ * the new instructions generated from the successors of the old instruction
+ */
+
+ cached
+ Instruction getInstructionSuccessor(Instruction instruction, EdgeKind kind) {
+ if hasChiNode(_, getOldInstruction(instruction))
+ then
+ result = getChi(getOldInstruction(instruction)) and
+ kind instanceof GotoEdge
+ else (
+ exists(OldInstruction oldInstruction |
+ oldInstruction = getOldInstruction(instruction) and
+ (
+ if Reachability::isInfeasibleInstructionSuccessor(oldInstruction, kind)
+ then result = unreachedInstruction(instruction.getEnclosingIRFunction())
+ else result = getNewInstruction(oldInstruction.getSuccessor(kind))
+ )
+ )
+ or
+ exists(OldInstruction oldInstruction |
+ instruction = getChi(oldInstruction) and
+ result = getNewInstruction(oldInstruction.getSuccessor(kind))
+ )
+ )
+ }
+
+ cached
+ Instruction getInstructionBackEdgeSuccessor(Instruction instruction, EdgeKind kind) {
+ exists(OldInstruction oldInstruction |
+ not Reachability::isInfeasibleInstructionSuccessor(oldInstruction, kind) and
+ // There is only one case for the translation into `result` because the
+ // SSA construction never inserts extra instructions _before_ an existing
+ // instruction.
+ getOldInstruction(result) = oldInstruction.getBackEdgeSuccessor(kind) and
+ // There are two cases for the translation into `instruction` because the
+ // SSA construction might have inserted a chi node _after_
+ // `oldInstruction`, in which case the back edge should come out of the
+ // chi node instead.
+ if hasChiNode(_, oldInstruction)
+ then instruction = getChi(oldInstruction)
+ else instruction = getNewInstruction(oldInstruction)
+ )
+ }
+
+ cached
+ Language::AST getInstructionAST(Instruction instr) {
+ result = getOldInstruction(instr).getAST()
+ or
+ exists(RawIR::Instruction blockStartInstr |
+ instr = phiInstruction(blockStartInstr, _) and
+ result = blockStartInstr.getAST()
+ )
+ or
+ exists(RawIR::Instruction primaryInstr |
+ instr = chiInstruction(primaryInstr) and
+ result = primaryInstr.getAST()
+ )
+ or
+ exists(IRFunctionBase irFunc |
+ instr = unreachedInstruction(irFunc) and result = irFunc.getFunction()
+ )
+ }
+
+ cached
+ Language::LanguageType getInstructionResultType(Instruction instr) {
+ result = instr.(RawIR::Instruction).getResultLanguageType()
+ or
+ exists(Alias::MemoryLocation defLocation |
+ instr = phiInstruction(_, defLocation) and
+ result = defLocation.getType()
+ )
+ or
+ exists(Instruction primaryInstr, Alias::VirtualVariable vvar |
+ instr = chiInstruction(primaryInstr) and
+ hasChiNode(vvar, primaryInstr) and
+ result = vvar.getType()
+ )
+ or
+ instr = reusedPhiInstruction(_) and
+ result = instr.(OldInstruction).getResultLanguageType()
+ or
+ instr = unreachedInstruction(_) and result = Language::getVoidType()
+ }
+
+ /**
+ * Holds if `opcode` is the opcode that specifies the operation performed by `instr`.
+ *
+ * The parameters are ordered such that they produce a clean join (with no need for reordering)
+ * in the characteristic predicates of the `Instruction` subclasses.
+ */
+ cached
+ predicate getInstructionOpcode(Opcode opcode, Instruction instr) {
+ opcode = getOldInstruction(instr).getOpcode()
+ or
+ instr = phiInstruction(_, _) and opcode instanceof Opcode::Phi
+ or
+ instr = chiInstruction(_) and opcode instanceof Opcode::Chi
+ or
+ instr = unreachedInstruction(_) and opcode instanceof Opcode::Unreached
+ }
+
+ cached
+ IRFunctionBase getInstructionEnclosingIRFunction(Instruction instr) {
+ result = getOldInstruction(instr).getEnclosingIRFunction()
+ or
+ exists(OldInstruction blockStartInstr |
+ instr = phiInstruction(blockStartInstr, _) and
+ result = blockStartInstr.getEnclosingIRFunction()
+ )
+ or
+ exists(OldInstruction primaryInstr |
+ instr = chiInstruction(primaryInstr) and result = primaryInstr.getEnclosingIRFunction()
+ )
+ or
+ instr = unreachedInstruction(result)
+ }
+
+ cached
+ Instruction getPrimaryInstructionForSideEffect(Instruction instruction) {
+ exists(OldIR::SideEffectInstruction oldInstruction |
+ oldInstruction = getOldInstruction(instruction) and
+ result = getNewInstruction(oldInstruction.getPrimaryInstruction())
+ )
+ or
+ exists(OldIR::Instruction oldInstruction |
+ instruction = getChi(oldInstruction) and
+ result = getNewInstruction(oldInstruction)
+ )
+ }
+}
+
+private Instruction getNewInstruction(OldInstruction instr) { getOldInstruction(result) = instr }
+
+private OldInstruction getOldInstruction(Instruction instr) { instr = result }
+
+private ChiInstruction getChi(OldInstruction primaryInstr) { result = chiInstruction(primaryInstr) }
+
+private PhiInstruction getPhi(OldBlock defBlock, Alias::MemoryLocation defLocation) {
+ result = phiInstruction(defBlock.getFirstInstruction(), defLocation)
+}
+
+/**
+ * Holds if instruction `def` needs to have a `Chi` instruction inserted after it, to account for a partial definition
+ * of a virtual variable. The `Chi` instruction provides a definition of the entire virtual variable of which the
+ * original definition location is a member.
+ */
+private predicate hasChiNode(Alias::VirtualVariable vvar, OldInstruction def) {
+ exists(Alias::MemoryLocation defLocation |
+ defLocation = Alias::getResultMemoryLocation(def) and
+ defLocation.getVirtualVariable() = vvar and
+ // If the definition totally (or exactly) overlaps the virtual variable, then there's no need for a `Chi`
+ // instruction.
+ Alias::getOverlap(defLocation, vvar) instanceof MayPartiallyOverlap
+ )
+}
+
+private import PhiInsertion
+
+/**
+ * Module to handle insertion of `Phi` instructions at the correct blocks. We insert a `Phi` instruction at the
+ * beginning of a block for a given location when that block is on the dominance frontier of a definition of the
+ * location and there is a use of that location reachable from that block without an intervening definition of the
+ * location.
+ * Within the approach outlined above, we treat a location slightly differently depending on whether or not it is a
+ * virtual variable. For a virtual variable, we will insert a `Phi` instruction on the dominance frontier if there is
+ * a use of any member location of that virtual variable that is reachable from the `Phi` instruction. For a location
+ * that is not a virtual variable, we insert a `Phi` instruction only if there is an exactly-overlapping use of the
+ * location reachable from the `Phi` instruction. This ensures that we insert a `Phi` instruction for a non-virtual
+ * variable only if doing so would allow dataflow analysis to get a more precise result than if we just used a `Phi`
+ * instruction for the virtual variable as a whole.
+ */
+private module PhiInsertion {
+ /**
+ * Holds if `phiBlock` is a block in the dominance frontier of a block that has a definition of the
+ * memory location `defLocation`.
+ */
+ pragma[noinline]
+ private predicate dominanceFrontierOfDefinition(
+ Alias::MemoryLocation defLocation, OldBlock phiBlock
+ ) {
+ exists(OldBlock defBlock |
+ phiBlock = Dominance::getDominanceFrontier(defBlock) and
+ definitionHasDefinitionInBlock(defLocation, defBlock)
+ )
+ }
+
+ /**
+ * Holds if a `Phi` instruction needs to be inserted for location `defLocation` at the beginning of block `phiBlock`.
+ */
+ predicate definitionHasPhiNode(Alias::MemoryLocation defLocation, OldBlock phiBlock) {
+ dominanceFrontierOfDefinition(defLocation, phiBlock) and
+ /* We can also eliminate those nodes where the definition is not live on any incoming edge */
+ definitionLiveOnEntryToBlock(defLocation, phiBlock)
+ }
+
+ /**
+ * Holds if the memory location `defLocation` has a definition in block `block`, either because of an existing
+ * instruction, a `Phi` node, or a `Chi` node.
+ */
+ private predicate definitionHasDefinitionInBlock(Alias::MemoryLocation defLocation, OldBlock block) {
+ definitionHasPhiNode(defLocation, block)
+ or
+ exists(OldInstruction def, Alias::MemoryLocation resultLocation |
+ def.getBlock() = block and
+ resultLocation = Alias::getResultMemoryLocation(def) and
+ (
+ defLocation = resultLocation
+ or
+ // For a virtual variable, any definition of a member location will either generate a `Chi` node that defines
+ // the virtual variable, or will totally overlap the virtual variable. Either way, treat this as a definition of
+ // the virtual variable.
+ defLocation = resultLocation.getVirtualVariable()
+ )
+ )
+ }
+
+ /**
+ * Holds if there is a use at (`block`, `index`) that could consume the result of a `Phi` instruction for
+ * `defLocation`.
+ */
+ private predicate definitionHasUse(Alias::MemoryLocation defLocation, OldBlock block, int index) {
+ exists(OldInstruction use |
+ block.getInstruction(index) = use and
+ if defLocation instanceof Alias::VirtualVariable
+ then (
+ exists(Alias::MemoryLocation useLocation |
+ // For a virtual variable, any use of a location that is a member of the virtual variable counts as a use.
+ useLocation = Alias::getOperandMemoryLocation(use.getAnOperand()) and
+ defLocation = useLocation.getVirtualVariable()
+ )
+ or
+ // A `Chi` instruction consumes the enclosing virtual variable of its use location.
+ hasChiNode(defLocation, use)
+ ) else (
+ // For other locations, only an exactly-overlapping use of the same location counts as a use.
+ defLocation = Alias::getOperandMemoryLocation(use.getAnOperand()) and
+ Alias::getOverlap(defLocation, defLocation) instanceof MustExactlyOverlap
+ )
+ )
+ }
+
+ /**
+ * Holds if the location `defLocation` is redefined at (`block`, `index`). A location is considered "redefined" if
+ * there is a definition that would prevent a previous definition of `defLocation` from being consumed as the operand
+ * of a `Phi` node that occurs after the redefinition.
+ */
+ private predicate definitionHasRedefinition(
+ Alias::MemoryLocation defLocation, OldBlock block, int index
+ ) {
+ exists(OldInstruction redef, Alias::MemoryLocation redefLocation |
+ block.getInstruction(index) = redef and
+ redefLocation = Alias::getResultMemoryLocation(redef) and
+ if defLocation instanceof Alias::VirtualVariable
+ then
+ // For a virtual variable, the definition may be consumed by any use of a location that is a member of the
+ // virtual variable. Thus, the definition is live until a subsequent redefinition of the entire virtual
+ // variable.
+ exists(Overlap overlap |
+ overlap = Alias::getOverlap(redefLocation, defLocation) and
+ not overlap instanceof MayPartiallyOverlap
+ )
+ else
+ // For other locations, the definition may only be consumed by an exactly-overlapping use of the same location.
+ // Thus, the definition is live until a subsequent definition of any location that may overlap the original
+ // definition location.
+ exists(Alias::getOverlap(redefLocation, defLocation))
+ )
+ }
+
+ /**
+ * Holds if the definition `defLocation` is live on entry to block `block`. The definition is live if there is at
+ * least one use of that definition before any intervening instruction that redefines the definition location.
+ */
+ predicate definitionLiveOnEntryToBlock(Alias::MemoryLocation defLocation, OldBlock block) {
+ exists(int firstAccess |
+ definitionHasUse(defLocation, block, firstAccess) and
+ firstAccess =
+ min(int index |
+ definitionHasUse(defLocation, block, index)
+ or
+ definitionHasRedefinition(defLocation, block, index)
+ )
+ )
+ or
+ definitionLiveOnExitFromBlock(defLocation, block) and
+ not definitionHasRedefinition(defLocation, block, _)
+ }
+
+ /**
+ * Holds if the definition `defLocation` is live on exit from block `block`. The definition is live on exit if it is
+ * live on entry to any of the successors of `block`.
+ */
+ pragma[noinline]
+ predicate definitionLiveOnExitFromBlock(Alias::MemoryLocation defLocation, OldBlock block) {
+ definitionLiveOnEntryToBlock(defLocation, block.getAFeasibleSuccessor())
+ }
+}
+
+private import DefUse
+
+/**
+ * Module containing the predicates that connect uses to their reaching definition. The reaching definitions are
+ * computed separately for each unique use `MemoryLocation`. An instruction is treated as a definition of a use location
+ * if the defined location overlaps the use location in any way. Thus, a single instruction may serve as a definition
+ * for multiple use locations, since a single definition location may overlap many use locations.
+ *
+ * Definitions and uses are identified by a block and an integer "offset". An offset of -1 indicates the definition
+ * from a `Phi` instruction at the beginning of the block. An offset of 2*i indicates a definition or use on the
+ * instruction at index `i` in the block. An offset of 2*i+1 indicates a definition or use on the `Chi` instruction that
+ * will be inserted immediately after the instruction at index `i` in the block.
+ *
+ * For a given use location, each definition and use is also assigned a "rank" within its block. The rank is simply the
+ * one-based index of that definition or use within the list of definitions and uses of that location within the block,
+ * ordered by offset. The rank allows the various reachability predicates to be computed more efficiently than they
+ * would if based solely on offset, since the set of possible ranks is dense while the set of possible offsets is
+ * potentially very sparse.
+ */
+module DefUse {
+ /**
+ * Gets the `Instruction` for the definition at offset `defOffset` in block `defBlock`.
+ */
+ Instruction getDefinitionOrChiInstruction(
+ OldBlock defBlock, int defOffset, Alias::MemoryLocation defLocation,
+ Alias::MemoryLocation actualDefLocation
+ ) {
+ exists(OldInstruction oldInstr, int oldOffset |
+ oldInstr = defBlock.getInstruction(oldOffset) and
+ oldOffset >= 0
+ |
+ // An odd offset corresponds to the `Chi` instruction.
+ defOffset = oldOffset * 2 + 1 and
+ result = getChi(oldInstr) and
+ (
+ defLocation = Alias::getResultMemoryLocation(oldInstr) or
+ defLocation = Alias::getResultMemoryLocation(oldInstr).getVirtualVariable()
+ ) and
+ actualDefLocation = defLocation.getVirtualVariable()
+ or
+ // An even offset corresponds to the original instruction.
+ defOffset = oldOffset * 2 and
+ result = getNewInstruction(oldInstr) and
+ (
+ defLocation = Alias::getResultMemoryLocation(oldInstr) or
+ defLocation = Alias::getResultMemoryLocation(oldInstr).getVirtualVariable()
+ ) and
+ actualDefLocation = defLocation
+ )
+ or
+ defOffset = -1 and
+ hasDefinition(_, defLocation, defBlock, defOffset) and
+ result = getPhi(defBlock, defLocation) and
+ actualDefLocation = defLocation
+ }
+
+ /**
+ * Gets the rank index of a hyphothetical use one instruction past the end of
+ * the block. This index can be used to determine if a definition reaches the
+ * end of the block, even if the definition is the last instruction in the
+ * block.
+ */
+ private int exitRank(Alias::MemoryLocation useLocation, OldBlock block) {
+ result = max(int rankIndex | defUseRank(useLocation, block, rankIndex, _)) + 1
+ }
+
+ /**
+ * Holds if a definition that overlaps `useLocation` at (`defBlock`, `defRank`) reaches the use of `useLocation` at
+ * (`useBlock`, `useRank`) without any intervening definitions that overlap `useLocation`, where `defBlock` and
+ * `useBlock` are the same block.
+ */
+ private predicate definitionReachesUseWithinBlock(
+ Alias::MemoryLocation useLocation, OldBlock defBlock, int defRank, OldBlock useBlock,
+ int useRank
+ ) {
+ defBlock = useBlock and
+ hasDefinitionAtRank(useLocation, _, defBlock, defRank, _) and
+ hasUseAtRank(useLocation, useBlock, useRank, _) and
+ definitionReachesRank(useLocation, defBlock, defRank, useRank)
+ }
+
+ /**
+ * Holds if a definition that overlaps `useLocation` at (`defBlock`, `defRank`) reaches the use of `useLocation` at
+ * (`useBlock`, `useRank`) without any intervening definitions that overlap `useLocation`.
+ */
+ predicate definitionReachesUse(
+ Alias::MemoryLocation useLocation, OldBlock defBlock, int defRank, OldBlock useBlock,
+ int useRank
+ ) {
+ hasUseAtRank(useLocation, useBlock, useRank, _) and
+ (
+ definitionReachesUseWithinBlock(useLocation, defBlock, defRank, useBlock, useRank)
+ or
+ definitionReachesEndOfBlock(useLocation, defBlock, defRank, useBlock.getAFeasiblePredecessor()) and
+ not definitionReachesUseWithinBlock(useLocation, useBlock, _, useBlock, useRank)
+ )
+ }
+
+ /**
+ * Holds if the definition that overlaps `useLocation` at `(block, defRank)` reaches the rank
+ * index `reachesRank` in block `block`.
+ */
+ private predicate definitionReachesRank(
+ Alias::MemoryLocation useLocation, OldBlock block, int defRank, int reachesRank
+ ) {
+ // The def always reaches the next use, even if there is also a def on the
+ // use instruction.
+ hasDefinitionAtRank(useLocation, _, block, defRank, _) and
+ reachesRank = defRank + 1
+ or
+ // If the def reached the previous rank, it also reaches the current rank,
+ // unless there was another def at the previous rank.
+ exists(int prevRank |
+ reachesRank = prevRank + 1 and
+ definitionReachesRank(useLocation, block, defRank, prevRank) and
+ not prevRank = exitRank(useLocation, block) and
+ not hasDefinitionAtRank(useLocation, _, block, prevRank, _)
+ )
+ }
+
+ /**
+ * Holds if the definition that overlaps `useLocation` at `(defBlock, defRank)` reaches the end of
+ * block `block` without any intervening definitions that overlap `useLocation`.
+ */
+ predicate definitionReachesEndOfBlock(
+ Alias::MemoryLocation useLocation, OldBlock defBlock, int defRank, OldBlock block
+ ) {
+ hasDefinitionAtRank(useLocation, _, defBlock, defRank, _) and
+ (
+ // If we're looking at the def's own block, just see if it reaches the exit
+ // rank of the block.
+ block = defBlock and
+ locationLiveOnExitFromBlock(useLocation, defBlock) and
+ definitionReachesRank(useLocation, defBlock, defRank, exitRank(useLocation, defBlock))
+ or
+ exists(OldBlock idom |
+ definitionReachesEndOfBlock(useLocation, defBlock, defRank, idom) and
+ noDefinitionsSinceIDominator(useLocation, idom, block)
+ )
+ )
+ }
+
+ pragma[noinline]
+ private predicate noDefinitionsSinceIDominator(
+ Alias::MemoryLocation useLocation, OldBlock idom, OldBlock block
+ ) {
+ Dominance::blockImmediatelyDominates(idom, block) and // It is sufficient to traverse the dominator graph, cf. discussion above.
+ locationLiveOnExitFromBlock(useLocation, block) and
+ not hasDefinition(useLocation, _, block, _)
+ }
+
+ /**
+ * Holds if the specified `useLocation` is live on entry to `block`. This holds if there is a use of `useLocation`
+ * that is reachable from the start of `block` without passing through a definition that overlaps `useLocation`.
+ * Note that even a partially-overlapping definition blocks liveness, because such a definition will insert a `Chi`
+ * instruction whose result totally overlaps the location.
+ */
+ predicate locationLiveOnEntryToBlock(Alias::MemoryLocation useLocation, OldBlock block) {
+ definitionHasPhiNode(useLocation, block)
+ or
+ exists(int firstAccess |
+ hasUse(useLocation, block, firstAccess, _) and
+ firstAccess =
+ min(int offset |
+ hasUse(useLocation, block, offset, _)
+ or
+ hasNonPhiDefinition(useLocation, _, block, offset)
+ )
+ )
+ or
+ locationLiveOnExitFromBlock(useLocation, block) and
+ not hasNonPhiDefinition(useLocation, _, block, _)
+ }
+
+ /**
+ * Holds if the specified `useLocation` is live on exit from `block`.
+ */
+ pragma[noinline]
+ predicate locationLiveOnExitFromBlock(Alias::MemoryLocation useLocation, OldBlock block) {
+ locationLiveOnEntryToBlock(useLocation, block.getAFeasibleSuccessor())
+ }
+
+ /**
+ * Holds if there is a definition at offset `offset` in block `block` that overlaps memory location `useLocation`.
+ * This predicate does not include definitions for Phi nodes.
+ */
+ private predicate hasNonPhiDefinition(
+ Alias::MemoryLocation useLocation, Alias::MemoryLocation defLocation, OldBlock block, int offset
+ ) {
+ exists(OldInstruction def, Overlap overlap, int index |
+ defLocation = Alias::getResultMemoryLocation(def) and
+ block.getInstruction(index) = def and
+ overlap = Alias::getOverlap(defLocation, useLocation) and
+ if overlap instanceof MayPartiallyOverlap
+ then offset = (index * 2) + 1 // The use will be connected to the definition on the `Chi` instruction.
+ else offset = index * 2 // The use will be connected to the definition on the original instruction.
+ )
+ }
+
+ /**
+ * Holds if there is a definition at offset `offset` in block `block` that overlaps memory location `useLocation`.
+ * This predicate includes definitions for Phi nodes (at offset -1).
+ */
+ private predicate hasDefinition(
+ Alias::MemoryLocation useLocation, Alias::MemoryLocation defLocation, OldBlock block, int offset
+ ) {
+ (
+ // If there is a Phi node for the use location itself, treat that as a definition at offset -1.
+ offset = -1 and
+ if definitionHasPhiNode(useLocation, block)
+ then defLocation = useLocation
+ else (
+ definitionHasPhiNode(defLocation, block) and
+ defLocation = useLocation.getVirtualVariable() and
+ // Handle the unusual case where a virtual variable does not overlap one of its member
+ // locations. For example, a definition of the virtual variable representing all aliased
+ // memory does not overlap a use of a string literal, because the contents of a string
+ // literal can never be redefined. The string literal's location could still be a member of
+ // the `AliasedVirtualVariable` due to something like:
+ // ```
+ // char s[10];
+ // strcpy(s, p);
+ // const char* p = b ? "SomeLiteral" : s;
+ // return p[3];
+ // ```
+ // In the above example, `p[3]` may access either the string literal or the local variable
+ // `s`, so both of those locations must be members of the `AliasedVirtualVariable`.
+ exists(Alias::getOverlap(defLocation, useLocation))
+ )
+ )
+ or
+ hasNonPhiDefinition(useLocation, defLocation, block, offset)
+ }
+
+ /**
+ * Holds if there is a definition at offset `offset` in block `block` that overlaps memory location `useLocation`.
+ * `rankIndex` is the rank of the definition as computed by `defUseRank()`.
+ */
+ predicate hasDefinitionAtRank(
+ Alias::MemoryLocation useLocation, Alias::MemoryLocation defLocation, OldBlock block,
+ int rankIndex, int offset
+ ) {
+ hasDefinition(useLocation, defLocation, block, offset) and
+ defUseRank(useLocation, block, rankIndex, offset)
+ }
+
+ /**
+ * Holds if there is a use of `useLocation` on instruction `use` at offset `offset` in block `block`.
+ */
+ private predicate hasUse(
+ Alias::MemoryLocation useLocation, OldBlock block, int offset, OldInstruction use
+ ) {
+ exists(int index |
+ block.getInstruction(index) = use and
+ (
+ // A direct use of the location.
+ useLocation = Alias::getOperandMemoryLocation(use.getAnOperand()) and offset = index * 2
+ or
+ // A `Chi` instruction will include a use of the virtual variable.
+ hasChiNode(useLocation, use) and offset = (index * 2) + 1
+ )
+ )
+ }
+
+ /**
+ * Holds if there is a use of memory location `useLocation` on instruction `use` in block `block`. `rankIndex` is the
+ * rank of the use use as computed by `defUseRank`.
+ */
+ predicate hasUseAtRank(
+ Alias::MemoryLocation useLocation, OldBlock block, int rankIndex, OldInstruction use
+ ) {
+ exists(int offset |
+ hasUse(useLocation, block, offset, use) and
+ defUseRank(useLocation, block, rankIndex, offset)
+ )
+ }
+
+ /**
+ * Holds if there is a definition at offset `offset` in block `block` that overlaps memory location `useLocation`, or
+ * a use of `useLocation` at offset `offset` in block `block`. `rankIndex` is the sequence number of the definition
+ * or use within `block`, counting only uses of `useLocation` and definitions that overlap `useLocation`.
+ */
+ private predicate defUseRank(
+ Alias::MemoryLocation useLocation, OldBlock block, int rankIndex, int offset
+ ) {
+ offset =
+ rank[rankIndex](int j |
+ hasDefinition(useLocation, _, block, j) or hasUse(useLocation, block, j, _)
+ )
+ }
+
+ /**
+ * Holds if the `Phi` instruction for location `useLocation` at the beginning of block `phiBlock` has an operand along
+ * the incoming edge from `predBlock`, where that operand's definition is at offset `defOffset` in block `defBlock`.
+ */
+ pragma[noopt]
+ predicate hasPhiOperandDefinition(
+ Alias::MemoryLocation defLocation, Alias::MemoryLocation useLocation, OldBlock phiBlock,
+ OldBlock predBlock, OldBlock defBlock, int defOffset
+ ) {
+ exists(int defRank |
+ definitionHasPhiNode(useLocation, phiBlock) and
+ predBlock = phiBlock.getAFeasiblePredecessor() and
+ definitionReachesEndOfBlock(useLocation, defBlock, defRank, predBlock) and
+ hasDefinitionAtRank(useLocation, defLocation, defBlock, defRank, defOffset) and
+ exists(Alias::getOverlap(defLocation, useLocation))
+ )
+ }
+}
+
+predicate canReuseSSAForMemoryResult(Instruction instruction) {
+ exists(OldInstruction oldInstruction |
+ oldInstruction = getOldInstruction(instruction) and
+ (
+ // The previous iteration said it was reusable, so we should mark it as reusable as well.
+ Alias::canReuseSSAForOldResult(oldInstruction)
+ or
+ // The current alias analysis says it is reusable.
+ Alias::getResultMemoryLocation(oldInstruction).canReuseSSA()
+ )
+ )
+ or
+ exists(Alias::MemoryLocation defLocation |
+ // This is a `Phi` for a reusable location, so the result of the `Phi` is reusable as well.
+ instruction = phiInstruction(_, defLocation) and
+ defLocation.canReuseSSA()
+ )
+ // We don't support reusing SSA for any location that could create a `Chi` instruction.
+}
+
+/**
+ * Expose some of the internal predicates to PrintSSA.qll. We do this by publically importing those modules in the
+ * `DebugSSA` module, which is then imported by PrintSSA.
+ */
+module DebugSSA {
+ import PhiInsertion
+ import DefUse
+}
+
+import CachedForDebugging
+
+cached
+private module CachedForDebugging {
+ cached
+ string getTempVariableUniqueId(IRTempVariable var) {
+ result = getOldTempVariable(var).getUniqueId()
+ }
+
+ cached
+ string getInstructionUniqueId(Instruction instr) {
+ exists(OldInstruction oldInstr |
+ oldInstr = getOldInstruction(instr) and
+ result = "NonSSA: " + oldInstr.getUniqueId()
+ )
+ or
+ exists(Alias::MemoryLocation location, OldBlock phiBlock, string specificity |
+ instr = getPhi(phiBlock, location) and
+ result =
+ "Phi Block(" + phiBlock.getFirstInstruction().getUniqueId() + ")[" + specificity + "]: " +
+ location.getUniqueId() and
+ if location instanceof Alias::VirtualVariable
+ then
+ // Sort Phi nodes for virtual variables before Phi nodes for member locations.
+ specificity = "g"
+ else specificity = "s"
+ )
+ or
+ instr = unreachedInstruction(_) and
+ result = "Unreached"
+ }
+
+ private OldIR::IRTempVariable getOldTempVariable(IRTempVariable var) {
+ result.getEnclosingFunction() = var.getEnclosingFunction() and
+ result.getAST() = var.getAST() and
+ result.getTag() = var.getTag()
+ }
+
+ cached
+ predicate instructionHasSortKeys(Instruction instr, int key1, int key2) {
+ exists(OldInstruction oldInstr |
+ oldInstr = getOldInstruction(instr) and
+ oldInstr.hasSortKeys(key1, key2)
+ )
+ or
+ instr instanceof TUnreachedInstruction and
+ key1 = maxValue() and
+ key2 = maxValue()
+ }
+
+ /**
+ * Returns the value of the maximum representable integer.
+ */
+ cached
+ int maxValue() { result = 2147483647 }
+}
+
+module SSAConsistency {
+ /**
+ * Holds if a `MemoryOperand` has more than one `MemoryLocation` assigned by alias analysis.
+ */
+ query predicate multipleOperandMemoryLocations(
+ OldIR::MemoryOperand operand, string message, OldIR::IRFunction func, string funcText
+ ) {
+ exists(int locationCount |
+ locationCount = strictcount(Alias::getOperandMemoryLocation(operand)) and
+ locationCount > 1 and
+ func = operand.getEnclosingIRFunction() and
+ funcText = Language::getIdentityString(func.getFunction()) and
+ message =
+ operand.getUse().toString() + " " + "Operand has " + locationCount.toString() +
+ " memory accesses in function '$@': " +
+ strictconcat(Alias::getOperandMemoryLocation(operand).toString(), ", ")
+ )
+ }
+
+ /**
+ * Holds if a `MemoryLocation` does not have an associated `VirtualVariable`.
+ */
+ query predicate missingVirtualVariableForMemoryLocation(
+ Alias::MemoryLocation location, string message, OldIR::IRFunction func, string funcText
+ ) {
+ not exists(location.getVirtualVariable()) and
+ func = location.getIRFunction() and
+ funcText = Language::getIdentityString(func.getFunction()) and
+ message = "Memory location has no virtual variable in function '$@'."
+ }
+
+ /**
+ * Holds if a `MemoryLocation` is a member of more than one `VirtualVariable`.
+ */
+ query predicate multipleVirtualVariablesForMemoryLocation(
+ Alias::MemoryLocation location, string message, OldIR::IRFunction func, string funcText
+ ) {
+ exists(int vvarCount |
+ vvarCount = strictcount(location.getVirtualVariable()) and
+ vvarCount > 1 and
+ func = location.getIRFunction() and
+ funcText = Language::getIdentityString(func.getFunction()) and
+ message =
+ "Memory location has " + vvarCount.toString() + " virtual variables in function '$@': (" +
+ concat(Alias::VirtualVariable vvar |
+ vvar = location.getVirtualVariable()
+ |
+ vvar.toString(), ", "
+ ) + ")."
+ )
+ }
+}
+
+/**
+ * Provides the portion of the parameterized IR interface that is used to construct the SSA stages
+ * of the IR. The raw stage of the IR does not expose these predicates.
+ * These predicates are all just aliases for predicates defined in the `Cached` module. This ensures
+ * that all of SSA construction will be evaluated in the same stage.
+ */
+module SSA {
+ class MemoryLocation = Alias::MemoryLocation;
+
+ predicate hasPhiInstruction = Cached::hasPhiInstructionCached/2;
+
+ predicate hasChiInstruction = Cached::hasChiInstructionCached/1;
+
+ predicate hasUnreachedInstruction = Cached::hasUnreachedInstructionCached/1;
+}
diff --git a/repo-tests/codeql/cpp/ql/lib/semmle/code/cpp/ir/implementation/unaliased_ssa/internal/SSAConstructionImports.qll b/repo-tests/codeql/cpp/ql/lib/semmle/code/cpp/ir/implementation/unaliased_ssa/internal/SSAConstructionImports.qll
new file mode 100644
index 00000000000..219180d9f4d
--- /dev/null
+++ b/repo-tests/codeql/cpp/ql/lib/semmle/code/cpp/ir/implementation/unaliased_ssa/internal/SSAConstructionImports.qll
@@ -0,0 +1,6 @@
+import semmle.code.cpp.ir.implementation.Opcode as Opcode
+import semmle.code.cpp.ir.implementation.internal.OperandTag as OperandTag
+import semmle.code.cpp.ir.internal.Overlap as Overlap
+import semmle.code.cpp.ir.implementation.internal.TInstruction as TInstruction
+import semmle.code.cpp.ir.implementation.raw.IR as RawIR
+import semmle.code.cpp.ir.implementation.internal.TOperand as TOperand
diff --git a/repo-tests/codeql/cpp/ql/lib/semmle/code/cpp/ir/implementation/unaliased_ssa/internal/SSAConstructionInternal.qll b/repo-tests/codeql/cpp/ql/lib/semmle/code/cpp/ir/implementation/unaliased_ssa/internal/SSAConstructionInternal.qll
new file mode 100644
index 00000000000..70d44e03267
--- /dev/null
+++ b/repo-tests/codeql/cpp/ql/lib/semmle/code/cpp/ir/implementation/unaliased_ssa/internal/SSAConstructionInternal.qll
@@ -0,0 +1,9 @@
+import semmle.code.cpp.ir.implementation.raw.IR as OldIR
+import semmle.code.cpp.ir.implementation.raw.internal.reachability.ReachableBlock as Reachability
+import semmle.code.cpp.ir.implementation.raw.internal.reachability.Dominance as Dominance
+import semmle.code.cpp.ir.implementation.unaliased_ssa.IR as NewIR
+import semmle.code.cpp.ir.implementation.raw.internal.IRConstruction as RawStage
+import semmle.code.cpp.ir.implementation.internal.TInstruction::UnaliasedSSAInstructions as SSAInstructions
+import semmle.code.cpp.ir.internal.IRCppLanguage as Language
+import SimpleSSA as Alias
+import semmle.code.cpp.ir.implementation.internal.TOperand::UnaliasedSSAOperands as SSAOperands
diff --git a/repo-tests/codeql/cpp/ql/lib/semmle/code/cpp/ir/implementation/unaliased_ssa/internal/SimpleSSA.qll b/repo-tests/codeql/cpp/ql/lib/semmle/code/cpp/ir/implementation/unaliased_ssa/internal/SimpleSSA.qll
new file mode 100644
index 00000000000..f3e02c9f6a8
--- /dev/null
+++ b/repo-tests/codeql/cpp/ql/lib/semmle/code/cpp/ir/implementation/unaliased_ssa/internal/SimpleSSA.qll
@@ -0,0 +1,108 @@
+import AliasAnalysis
+private import SimpleSSAImports
+import SimpleSSAPublicImports
+private import AliasConfiguration
+
+private predicate isTotalAccess(Allocation var, AddressOperand addrOperand, IRType type) {
+ exists(Instruction constantBase, int bitOffset |
+ addressOperandBaseAndConstantOffset(addrOperand, constantBase, bitOffset) and
+ bitOffset = 0 and
+ constantBase = var.getABaseInstruction() and
+ type = var.getIRType()
+ )
+}
+
+/**
+ * Holds if the specified variable should be modeled in SSA form. For unaliased SSA, we only model a
+ * variable if its address never escapes and all reads and writes of that variable access the entire
+ * variable using the original type of the variable.
+ */
+predicate isVariableModeled(Allocation var) {
+ not allocationEscapes(var) and
+ forall(Instruction instr, AddressOperand addrOperand, IRType type |
+ addrOperand = instr.getResultAddressOperand() and
+ type = instr.getResultIRType() and
+ var = getAddressOperandAllocation(addrOperand)
+ |
+ isTotalAccess(var, addrOperand, type) and not instr.hasResultMayMemoryAccess()
+ ) and
+ forall(MemoryOperand memOperand, AddressOperand addrOperand, IRType type |
+ addrOperand = memOperand.getAddressOperand() and
+ type = memOperand.getIRType() and
+ var = getAddressOperandAllocation(addrOperand)
+ |
+ isTotalAccess(var, addrOperand, type) and not memOperand.hasMayReadMemoryAccess()
+ )
+}
+
+/**
+ * Holds if the SSA use/def chain for the specified variable can be safely reused by later
+ * iterations of SSA construction. This will hold only if we modeled the variable soundly, so that
+ * subsequent iterations will recompute SSA for any variable that we assumed did not escape, but
+ * actually would have escaped if we had used a sound escape analysis.
+ */
+predicate canReuseSSAForVariable(IRAutomaticVariable var) {
+ isVariableModeled(var) and
+ not allocationEscapes(var)
+}
+
+private newtype TMemoryLocation = MkMemoryLocation(Allocation var) { isVariableModeled(var) }
+
+private MemoryLocation getMemoryLocation(Allocation var) { result.getAllocation() = var }
+
+class MemoryLocation extends TMemoryLocation {
+ Allocation var;
+
+ MemoryLocation() { this = MkMemoryLocation(var) }
+
+ final string toString() { result = var.getAllocationString() }
+
+ final Allocation getAllocation() { result = var }
+
+ final Language::Location getLocation() { result = var.getLocation() }
+
+ final IRFunction getIRFunction() { result = var.getEnclosingIRFunction() }
+
+ final VirtualVariable getVirtualVariable() { result = this }
+
+ final Language::LanguageType getType() { result = var.getLanguageType() }
+
+ final string getUniqueId() { result = var.getUniqueId() }
+
+ final predicate canReuseSSA() { canReuseSSAForVariable(var) }
+}
+
+predicate canReuseSSAForOldResult(Instruction instr) { none() }
+
+/**
+ * Represents a set of `MemoryLocation`s that cannot overlap with
+ * `MemoryLocation`s outside of the set. The `VirtualVariable` will be
+ * represented by a `MemoryLocation` that totally overlaps all other
+ * `MemoryLocations` in the set.
+ */
+class VirtualVariable extends MemoryLocation { }
+
+/** A virtual variable that groups all escaped memory within a function. */
+class AliasedVirtualVariable extends VirtualVariable {
+ AliasedVirtualVariable() { none() }
+}
+
+Overlap getOverlap(MemoryLocation def, MemoryLocation use) {
+ def = use and result instanceof MustExactlyOverlap
+ or
+ none() // Avoid compiler error in SSAConstruction
+}
+
+MemoryLocation getResultMemoryLocation(Instruction instr) {
+ result = getMemoryLocation(getAddressOperandAllocation(instr.getResultAddressOperand()))
+}
+
+MemoryLocation getOperandMemoryLocation(MemoryOperand operand) {
+ result = getMemoryLocation(getAddressOperandAllocation(operand.getAddressOperand()))
+}
+
+/** Gets the start bit offset of a `MemoryLocation`, if any. */
+int getStartBitOffset(MemoryLocation location) { none() }
+
+/** Gets the end bit offset of a `MemoryLocation`, if any. */
+int getEndBitOffset(MemoryLocation location) { none() }
diff --git a/repo-tests/codeql/cpp/ql/lib/semmle/code/cpp/ir/implementation/unaliased_ssa/internal/SimpleSSAImports.qll b/repo-tests/codeql/cpp/ql/lib/semmle/code/cpp/ir/implementation/unaliased_ssa/internal/SimpleSSAImports.qll
new file mode 100644
index 00000000000..33d2bbbadec
--- /dev/null
+++ b/repo-tests/codeql/cpp/ql/lib/semmle/code/cpp/ir/implementation/unaliased_ssa/internal/SimpleSSAImports.qll
@@ -0,0 +1,4 @@
+import semmle.code.cpp.ir.implementation.raw.IR
+import semmle.code.cpp.ir.internal.IntegerConstant as Ints
+import semmle.code.cpp.ir.implementation.internal.OperandTag
+import semmle.code.cpp.ir.internal.IRCppLanguage as Language
diff --git a/repo-tests/codeql/cpp/ql/lib/semmle/code/cpp/ir/implementation/unaliased_ssa/internal/SimpleSSAPublicImports.qll b/repo-tests/codeql/cpp/ql/lib/semmle/code/cpp/ir/implementation/unaliased_ssa/internal/SimpleSSAPublicImports.qll
new file mode 100644
index 00000000000..94e6fbf2a22
--- /dev/null
+++ b/repo-tests/codeql/cpp/ql/lib/semmle/code/cpp/ir/implementation/unaliased_ssa/internal/SimpleSSAPublicImports.qll
@@ -0,0 +1 @@
+import semmle.code.cpp.ir.internal.Overlap
diff --git a/repo-tests/codeql/cpp/ql/lib/semmle/code/cpp/ir/implementation/unaliased_ssa/internal/reachability/Dominance.qll b/repo-tests/codeql/cpp/ql/lib/semmle/code/cpp/ir/implementation/unaliased_ssa/internal/reachability/Dominance.qll
new file mode 100644
index 00000000000..cddc3e23d7e
--- /dev/null
+++ b/repo-tests/codeql/cpp/ql/lib/semmle/code/cpp/ir/implementation/unaliased_ssa/internal/reachability/Dominance.qll
@@ -0,0 +1,22 @@
+private import DominanceInternal
+
+predicate blockImmediatelyDominates(Graph::Block dominator, Graph::Block block) =
+ idominance(Graph::isEntryBlock/1, Graph::blockSuccessor/2)(_, dominator, block)
+
+predicate blockStrictlyDominates(Graph::Block dominator, Graph::Block block) {
+ blockImmediatelyDominates+(dominator, block)
+}
+
+predicate blockDominates(Graph::Block dominator, Graph::Block block) {
+ blockStrictlyDominates(dominator, block) or dominator = block
+}
+
+Graph::Block getDominanceFrontier(Graph::Block dominator) {
+ Graph::blockSuccessor(dominator, result) and
+ not blockImmediatelyDominates(dominator, result)
+ or
+ exists(Graph::Block prev | result = getDominanceFrontier(prev) |
+ blockImmediatelyDominates(dominator, prev) and
+ not blockImmediatelyDominates(dominator, result)
+ )
+}
diff --git a/repo-tests/codeql/cpp/ql/lib/semmle/code/cpp/ir/implementation/unaliased_ssa/internal/reachability/DominanceInternal.qll b/repo-tests/codeql/cpp/ql/lib/semmle/code/cpp/ir/implementation/unaliased_ssa/internal/reachability/DominanceInternal.qll
new file mode 100644
index 00000000000..cee8fa1543b
--- /dev/null
+++ b/repo-tests/codeql/cpp/ql/lib/semmle/code/cpp/ir/implementation/unaliased_ssa/internal/reachability/DominanceInternal.qll
@@ -0,0 +1,9 @@
+private import ReachableBlock as Reachability
+
+private module ReachabilityGraph = Reachability::Graph;
+
+module Graph {
+ import Reachability::Graph
+
+ class Block = Reachability::ReachableBlock;
+}
diff --git a/repo-tests/codeql/cpp/ql/lib/semmle/code/cpp/ir/implementation/unaliased_ssa/internal/reachability/PrintDominance.qll b/repo-tests/codeql/cpp/ql/lib/semmle/code/cpp/ir/implementation/unaliased_ssa/internal/reachability/PrintDominance.qll
new file mode 100644
index 00000000000..f26565bc278
--- /dev/null
+++ b/repo-tests/codeql/cpp/ql/lib/semmle/code/cpp/ir/implementation/unaliased_ssa/internal/reachability/PrintDominance.qll
@@ -0,0 +1,22 @@
+private import DominanceInternal
+private import ReachableBlockInternal
+private import Dominance
+import IR
+
+private class DominancePropertyProvider extends IRPropertyProvider {
+ override string getBlockProperty(IRBlock block, string key) {
+ exists(IRBlock dominator |
+ blockImmediatelyDominates(dominator, block) and
+ key = "ImmediateDominator" and
+ result = "Block " + dominator.getDisplayIndex().toString()
+ )
+ or
+ key = "DominanceFrontier" and
+ result =
+ strictconcat(IRBlock frontierBlock |
+ frontierBlock = getDominanceFrontier(block)
+ |
+ frontierBlock.getDisplayIndex().toString(), ", " order by frontierBlock.getDisplayIndex()
+ )
+ }
+}
diff --git a/repo-tests/codeql/cpp/ql/lib/semmle/code/cpp/ir/implementation/unaliased_ssa/internal/reachability/PrintReachableBlock.qll b/repo-tests/codeql/cpp/ql/lib/semmle/code/cpp/ir/implementation/unaliased_ssa/internal/reachability/PrintReachableBlock.qll
new file mode 100644
index 00000000000..6befad72336
--- /dev/null
+++ b/repo-tests/codeql/cpp/ql/lib/semmle/code/cpp/ir/implementation/unaliased_ssa/internal/reachability/PrintReachableBlock.qll
@@ -0,0 +1,17 @@
+private import ReachableBlockInternal
+private import ReachableBlock
+import IR
+
+private class ReachableBlockPropertyProvider extends IRPropertyProvider {
+ override string getBlockProperty(IRBlock block, string key) {
+ not block instanceof ReachableBlock and
+ key = "Unreachable" and
+ result = "true"
+ or
+ exists(EdgeKind kind |
+ isInfeasibleEdge(block, kind) and
+ key = "Infeasible(" + kind.toString() + ")" and
+ result = "true"
+ )
+ }
+}
diff --git a/repo-tests/codeql/cpp/ql/lib/semmle/code/cpp/ir/implementation/unaliased_ssa/internal/reachability/ReachableBlock.qll b/repo-tests/codeql/cpp/ql/lib/semmle/code/cpp/ir/implementation/unaliased_ssa/internal/reachability/ReachableBlock.qll
new file mode 100644
index 00000000000..25a53bbefe8
--- /dev/null
+++ b/repo-tests/codeql/cpp/ql/lib/semmle/code/cpp/ir/implementation/unaliased_ssa/internal/reachability/ReachableBlock.qll
@@ -0,0 +1,53 @@
+private import ReachableBlockInternal
+private import IR
+private import ConstantAnalysis
+
+predicate isInfeasibleInstructionSuccessor(Instruction instr, EdgeKind kind) {
+ exists(int conditionValue |
+ conditionValue = getConstantValue(instr.(ConditionalBranchInstruction).getCondition()) and
+ if conditionValue = 0 then kind instanceof TrueEdge else kind instanceof FalseEdge
+ )
+}
+
+pragma[noinline]
+predicate isInfeasibleEdge(IRBlockBase block, EdgeKind kind) {
+ isInfeasibleInstructionSuccessor(block.getLastInstruction(), kind)
+}
+
+private IRBlock getAFeasiblePredecessorBlock(IRBlock successor) {
+ exists(EdgeKind kind |
+ result.getSuccessor(kind) = successor and
+ not isInfeasibleEdge(result, kind)
+ )
+}
+
+private predicate isBlockReachable(IRBlock block) {
+ exists(IRFunction f | getAFeasiblePredecessorBlock*(block) = f.getEntryBlock())
+}
+
+/**
+ * An IR block that is reachable from the entry block of the function, considering only feasible
+ * edges.
+ */
+class ReachableBlock extends IRBlockBase {
+ ReachableBlock() { isBlockReachable(this) }
+
+ final ReachableBlock getAFeasiblePredecessor() { result = getAFeasiblePredecessorBlock(this) }
+
+ final ReachableBlock getAFeasibleSuccessor() { this = getAFeasiblePredecessorBlock(result) }
+}
+
+/**
+ * An instruction that is contained in a reachable block.
+ */
+class ReachableInstruction extends Instruction {
+ ReachableInstruction() { this.getBlock() instanceof ReachableBlock }
+}
+
+module Graph {
+ predicate isEntryBlock(ReachableBlock block) { exists(IRFunction f | block = f.getEntryBlock()) }
+
+ predicate blockSuccessor(ReachableBlock pred, ReachableBlock succ) {
+ succ = pred.getAFeasibleSuccessor()
+ }
+}
diff --git a/repo-tests/codeql/cpp/ql/lib/semmle/code/cpp/ir/implementation/unaliased_ssa/internal/reachability/ReachableBlockInternal.qll b/repo-tests/codeql/cpp/ql/lib/semmle/code/cpp/ir/implementation/unaliased_ssa/internal/reachability/ReachableBlockInternal.qll
new file mode 100644
index 00000000000..61dc1a50399
--- /dev/null
+++ b/repo-tests/codeql/cpp/ql/lib/semmle/code/cpp/ir/implementation/unaliased_ssa/internal/reachability/ReachableBlockInternal.qll
@@ -0,0 +1,2 @@
+import semmle.code.cpp.ir.implementation.unaliased_ssa.IR as IR
+import semmle.code.cpp.ir.implementation.unaliased_ssa.constant.ConstantAnalysis as ConstantAnalysis
diff --git a/repo-tests/codeql/cpp/ql/lib/semmle/code/cpp/ir/internal/ASTValueNumbering.qll b/repo-tests/codeql/cpp/ql/lib/semmle/code/cpp/ir/internal/ASTValueNumbering.qll
new file mode 100644
index 00000000000..dcc013fd387
--- /dev/null
+++ b/repo-tests/codeql/cpp/ql/lib/semmle/code/cpp/ir/internal/ASTValueNumbering.qll
@@ -0,0 +1,115 @@
+/**
+ * Provides an implementation of Global Value Numbering.
+ * See https://en.wikipedia.org/wiki/Global_value_numbering
+ *
+ * The predicate `globalValueNumber` converts an expression into a `GVN`,
+ * which is an abstract type representing the value of the expression. If
+ * two expressions have the same `GVN` then they compute the same value.
+ * For example:
+ *
+ * ```
+ * void f(int x, int y) {
+ * g(x+y, x+y);
+ * }
+ * ```
+ *
+ * In this example, both arguments in the call to `g` compute the same value,
+ * so both arguments have the same `GVN`. In other words, we can find
+ * this call with the following query:
+ *
+ * ```
+ * from FunctionCall call, GVN v
+ * where v = globalValueNumber(call.getArgument(0))
+ * and v = globalValueNumber(call.getArgument(1))
+ * select call
+ * ```
+ *
+ * The analysis is conservative, so two expressions might have different
+ * `GVN`s even though the actually always compute the same value. The most
+ * common reason for this is that the analysis cannot prove that there
+ * are no side-effects that might cause the computed value to change.
+ */
+
+import cpp
+private import semmle.code.cpp.ir.implementation.aliased_ssa.gvn.internal.ValueNumberingInternal
+private import semmle.code.cpp.ir.IR
+
+/**
+ * A Global Value Number. A GVN is an abstract representation of the value
+ * computed by an expression. The relationship between `Expr` and `GVN` is
+ * many-to-one: every `Expr` has exactly one `GVN`, but multiple
+ * expressions can have the same `GVN`. If two expressions have the same
+ * `GVN`, it means that they compute the same value at run time. The `GVN`
+ * is an opaque value, so you cannot deduce what the run-time value of an
+ * expression will be from its `GVN`. The only use for the `GVN` of an
+ * expression is to find other expressions that compute the same value.
+ * Use the predicate `globalValueNumber` to get the `GVN` for an `Expr`.
+ *
+ * Note: `GVN` has `toString` and `getLocation` methods, so that it can be
+ * displayed in a results list. These work by picking an arbitrary
+ * expression with this `GVN` and using its `toString` and `getLocation`
+ * methods.
+ */
+class GVN extends TValueNumber {
+ pragma[noinline]
+ GVN() {
+ exists(Instruction instr |
+ this = tvalueNumber(instr) and exists(instr.getUnconvertedResultExpression())
+ )
+ }
+
+ private Instruction getAnInstruction() { this = tvalueNumber(result) }
+
+ final string toString() { result = "GVN" }
+
+ final string getDebugString() { result = strictconcat(getAnExpr().toString(), ", ") }
+
+ final Location getLocation() {
+ if exists(Expr e | e = getAnExpr() and not e.getLocation() instanceof UnknownLocation)
+ then
+ result =
+ min(Location l |
+ l = getAnExpr().getLocation() and not l instanceof UnknownLocation
+ |
+ l
+ order by
+ l.getFile().getAbsolutePath(), l.getStartLine(), l.getStartColumn(), l.getEndLine(),
+ l.getEndColumn()
+ )
+ else result instanceof UnknownDefaultLocation
+ }
+
+ final string getKind() {
+ this instanceof TVariableAddressValueNumber and result = "VariableAddress"
+ or
+ this instanceof TInitializeParameterValueNumber and result = "InitializeParameter"
+ or
+ this instanceof TStringConstantValueNumber and result = "StringConstant"
+ or
+ this instanceof TFieldAddressValueNumber and result = "FieldAddress"
+ or
+ this instanceof TBinaryValueNumber and result = "Binary"
+ or
+ this instanceof TPointerArithmeticValueNumber and result = "PointerArithmetic"
+ or
+ this instanceof TUnaryValueNumber and result = "Unary"
+ or
+ this instanceof TInheritanceConversionValueNumber and result = "InheritanceConversion"
+ or
+ this instanceof TLoadTotalOverlapValueNumber and result = "LoadTotalOverlap"
+ or
+ this instanceof TUniqueValueNumber and result = "Unique"
+ }
+
+ /** Gets an expression that has this GVN. */
+ Expr getAnExpr() { result = getAnUnconvertedExpr() }
+
+ /** Gets an expression that has this GVN. */
+ Expr getAnUnconvertedExpr() { result = getAnInstruction().getUnconvertedResultExpression() }
+
+ /** Gets an expression that has this GVN. */
+ Expr getAConvertedExpr() { result = getAnInstruction().getConvertedResultExpression() }
+}
+
+/** Gets the global value number of expression `e`. */
+GVN globalValueNumber(Expr e) { e = result.getAnExpr() }
diff --git a/repo-tests/codeql/cpp/ql/lib/semmle/code/cpp/ir/internal/CppType.qll b/repo-tests/codeql/cpp/ql/lib/semmle/code/cpp/ir/internal/CppType.qll
new file mode 100644
index 00000000000..2ce23f098a2
--- /dev/null
+++ b/repo-tests/codeql/cpp/ql/lib/semmle/code/cpp/ir/internal/CppType.qll
@@ -0,0 +1,558 @@
+private import cpp
+private import semmle.code.cpp.Print
+private import semmle.code.cpp.ir.implementation.IRType
+private import semmle.code.cpp.ir.implementation.raw.internal.IRConstruction::Raw as Raw
+
+private int getPointerSize() { result = max(any(NullPointerType t).getSize()) }
+
+/**
+ * Works around an extractor bug where a function reference gets a size of one byte.
+ */
+private int getTypeSizeWorkaround(Type type) {
+ exists(Type unspecifiedType |
+ unspecifiedType = type.getUnspecifiedType() and
+ (
+ unspecifiedType instanceof FunctionReferenceType and
+ result = getPointerSize()
+ or
+ exists(PointerToMemberType ptmType |
+ ptmType = unspecifiedType and
+ (
+ if ptmType.getBaseType().getUnspecifiedType() instanceof RoutineType
+ then result = getPointerSize() * 2
+ else result = getPointerSize()
+ )
+ )
+ or
+ exists(ArrayType arrayType |
+ // Treat `T[]` as `T*`.
+ arrayType = unspecifiedType and
+ not arrayType.hasArraySize() and
+ result = getPointerSize()
+ )
+ )
+ )
+}
+
+private int getTypeSize(Type type) {
+ if exists(getTypeSizeWorkaround(type))
+ then result = getTypeSizeWorkaround(type)
+ else result = type.getSize()
+}
+
+/**
+ * Holds if an `IRErrorType` should exist.
+ */
+predicate hasErrorType() { exists(ErroneousType t) }
+
+/**
+ * Holds if an `IRBooleanType` with the specified `byteSize` should exist.
+ */
+predicate hasBooleanType(int byteSize) { byteSize = getTypeSize(any(BoolType type)) }
+
+private predicate isSigned(IntegralOrEnumType type) {
+ type.(IntegralType).isSigned()
+ or
+ exists(Enum enumType |
+ // If the enum has an explicit underlying type, we'll determine signedness from that. If not,
+ // we'll assume unsigned. The actual rules for the implicit underlying type of an enum vary
+ // between compilers, so we'll need an extractor change to get this 100% right. Until then,
+ // unsigned is a reasonable default.
+ enumType = type.getUnspecifiedType() and
+ enumType.getExplicitUnderlyingType().getUnspecifiedType().(IntegralType).isSigned()
+ )
+}
+
+private predicate isSignedIntegerType(IntegralOrEnumType type) {
+ isSigned(type) and not type instanceof BoolType
+}
+
+private predicate isUnsignedIntegerType(IntegralOrEnumType type) {
+ not isSigned(type) and not type instanceof BoolType
+}
+
+/**
+ * Holds if an `IRSignedIntegerType` with the specified `byteSize` should exist.
+ */
+predicate hasSignedIntegerType(int byteSize) {
+ byteSize = any(IntegralOrEnumType type | isSignedIntegerType(type)).getSize()
+}
+
+/**
+ * Holds if an `IRUnsignedIntegerType` with the specified `byteSize` should exist.
+ */
+predicate hasUnsignedIntegerType(int byteSize) {
+ byteSize = any(IntegralOrEnumType type | isUnsignedIntegerType(type)).getSize()
+}
+
+/**
+ * Holds if an `IRFloatingPointType` with the specified size, base, and type domain should exist.
+ */
+predicate hasFloatingPointType(int byteSize, int base, TypeDomain domain) {
+ exists(FloatingPointType type |
+ byteSize = type.getSize() and
+ base = type.getBase() and
+ domain = type.getDomain()
+ )
+}
+
+private predicate isPointerIshType(Type type) {
+ type instanceof PointerType
+ or
+ type instanceof ReferenceType
+ or
+ type instanceof NullPointerType
+ or
+ // Treat `T[]` as a pointer. The only place we should see these is as the type of a parameter. If
+ // the corresponding decayed `T*` type is available, we'll use that, but if it's not available,
+ // we're stuck with `T[]`. Just treat it as a pointer.
+ type instanceof ArrayType and not exists(type.getSize())
+}
+
+/**
+ * Holds if an `IRAddressType` with the specified `byteSize` should exist.
+ */
+predicate hasAddressType(int byteSize) {
+ // This covers all pointers, all references, and because it also looks at `NullPointerType`, it
+ // should always return a result that makes sense for arbitrary glvalues as well.
+ byteSize = any(Type type | isPointerIshType(type)).getSize()
+}
+
+/**
+ * Holds if an `IRFunctionAddressType` with the specified `byteSize` should exist.
+ */
+predicate hasFunctionAddressType(int byteSize) {
+ byteSize = getPointerSize() or // Covers function lvalues
+ byteSize = getTypeSize(any(FunctionPointerIshType type))
+}
+
+private predicate isOpaqueType(Type type) {
+ exists(type.getSize()) and // Only include complete types
+ (
+ type instanceof ArrayType or
+ type instanceof Class or
+ type instanceof GNUVectorType
+ )
+ or
+ type instanceof PointerToMemberType // PTMs are missing size info
+}
+
+/**
+ * Holds if an `IROpaqueType` with the specified `tag` and `byteSize` should exist.
+ */
+predicate hasOpaqueType(Type tag, int byteSize) {
+ isOpaqueType(tag) and byteSize = getTypeSize(tag)
+ or
+ tag instanceof UnknownType and Raw::needsUnknownOpaqueType(byteSize)
+}
+
+/**
+ * Gets the `IRType` that represents a prvalue of the specified `Type`.
+ */
+private IRType getIRTypeForPRValue(Type type) {
+ exists(Type unspecifiedType | unspecifiedType = type.getUnspecifiedType() |
+ isOpaqueType(unspecifiedType) and
+ exists(IROpaqueType opaqueType | opaqueType = result |
+ opaqueType.getByteSize() = getTypeSize(type) and
+ opaqueType.getTag() = unspecifiedType
+ )
+ or
+ unspecifiedType instanceof BoolType and result.(IRBooleanType).getByteSize() = type.getSize()
+ or
+ isSignedIntegerType(unspecifiedType) and
+ result.(IRSignedIntegerType).getByteSize() = type.getSize()
+ or
+ isUnsignedIntegerType(unspecifiedType) and
+ result.(IRUnsignedIntegerType).getByteSize() = type.getSize()
+ or
+ exists(FloatingPointType floatType, IRFloatingPointType irFloatType |
+ floatType = unspecifiedType and
+ irFloatType = result and
+ irFloatType.getByteSize() = floatType.getSize() and
+ irFloatType.getBase() = floatType.getBase() and
+ irFloatType.getDomain() = floatType.getDomain()
+ )
+ or
+ isPointerIshType(unspecifiedType) and result.(IRAddressType).getByteSize() = getTypeSize(type)
+ or
+ unspecifiedType instanceof FunctionPointerIshType and
+ result.(IRFunctionAddressType).getByteSize() = getTypeSize(type)
+ or
+ unspecifiedType instanceof VoidType and result instanceof IRVoidType
+ or
+ unspecifiedType instanceof ErroneousType and result instanceof IRErrorType
+ or
+ unspecifiedType instanceof UnknownType and result instanceof IRUnknownType
+ )
+}
+
+cached
+private newtype TCppType =
+ TPRValueType(Type type) { exists(getIRTypeForPRValue(type)) } or
+ TFunctionGLValueType() or
+ TGLValueAddressType(Type type) or
+ TUnknownOpaqueType(int byteSize) { Raw::needsUnknownOpaqueType(byteSize) } or
+ TUnknownType()
+
+/**
+ * The C++ type of an IR entity.
+ * This cannot just be `Type` for a couple reasons:
+ * - Some types needed by the IR might not exist in the database (e.g. `RoutineType`s for functions
+ * that are always called directly)
+ * - Some types needed by the IR are not representable in the C++ type system (e.g. the result type
+ * of a `VariableAddress` where the variable is of reference type)
+ */
+class CppType extends TCppType {
+ /** Gets a textual representation of this type. */
+ string toString() { none() }
+
+ /** Gets a string used in IR dumps */
+ string getDumpString() { result = toString() }
+
+ /** Gets the size of the type in bytes, if known. */
+ final int getByteSize() { result = getIRType().getByteSize() }
+
+ /**
+ * Gets the `IRType` that represents this `CppType`. Many different `CppType`s can map to a single
+ * `IRType`.
+ */
+ cached
+ IRType getIRType() { none() }
+
+ /**
+ * Holds if the `CppType` represents a prvalue of type `Type` (if `isGLValue` is `false`), or if
+ * it represents a glvalue of type `Type` (if `isGLValue` is `true`).
+ */
+ predicate hasType(Type type, boolean isGLValue) { none() }
+
+ /**
+ * Holds if this type represents the C++ type `type`. If `isGLValue` is `true`, then this type
+ * represents a glvalue of type `type`. Otherwise, it represents a prvalue of type `type`.
+ */
+ final predicate hasUnspecifiedType(Type type, boolean isGLValue) {
+ exists(Type specifiedType |
+ hasType(specifiedType, isGLValue) and
+ type = specifiedType.getUnspecifiedType()
+ )
+ }
+}
+
+/**
+ * A `CppType` that wraps an existing `Type` (either as a prvalue or a glvalue).
+ */
+private class CppWrappedType extends CppType {
+ Type ctype;
+
+ CppWrappedType() {
+ this = TPRValueType(ctype) or
+ this = TGLValueAddressType(ctype)
+ }
+}
+
+/**
+ * A `CppType` that represents a prvalue of an existing `Type`.
+ */
+private class CppPRValueType extends CppWrappedType, TPRValueType {
+ final override string toString() { result = ctype.toString() }
+
+ final override string getDumpString() { result = ctype.getUnspecifiedType().toString() }
+
+ final override IRType getIRType() { result = getIRTypeForPRValue(ctype) }
+
+ final override predicate hasType(Type type, boolean isGLValue) {
+ type = ctype and
+ isGLValue = false
+ }
+}
+
+/**
+ * A `CppType` that has unknown type but a known size. Generally to represent synthesized types that
+ * occur in certain cases during IR construction, such as the type of a zero-initialized segment of
+ * a partially-initialized array.
+ */
+private class CppUnknownOpaqueType extends CppType, TUnknownOpaqueType {
+ int byteSize;
+
+ CppUnknownOpaqueType() { this = TUnknownOpaqueType(byteSize) }
+
+ final override string toString() { result = "unknown[" + byteSize.toString() + "]" }
+
+ final override IROpaqueType getIRType() {
+ result.getByteSize() = byteSize and result.getTag() instanceof UnknownType
+ }
+
+ override predicate hasType(Type type, boolean isGLValue) {
+ type instanceof UnknownType and isGLValue = false
+ }
+}
+
+/**
+ * A `CppType` that represents a glvalue of an existing `Type`.
+ */
+private class CppGLValueAddressType extends CppWrappedType, TGLValueAddressType {
+ final override string toString() { result = "glval<" + ctype.toString() + ">" }
+
+ final override string getDumpString() {
+ result = "glval<" + ctype.getUnspecifiedType().toString() + ">"
+ }
+
+ final override IRAddressType getIRType() { result.getByteSize() = getPointerSize() }
+
+ final override predicate hasType(Type type, boolean isGLValue) {
+ type = ctype and
+ isGLValue = true
+ }
+}
+
+/**
+ * A `CppType` that represents a function lvalue.
+ */
+private class CppFunctionGLValueType extends CppType, TFunctionGLValueType {
+ final override string toString() { result = "glval" }
+
+ final override IRFunctionAddressType getIRType() { result.getByteSize() = getPointerSize() }
+
+ final override predicate hasType(Type type, boolean isGLValue) {
+ type instanceof UnknownType and isGLValue = true
+ }
+}
+
+/**
+ * A `CppType` that represents an unknown type.
+ */
+private class CppUnknownType extends CppType, TUnknownType {
+ final override string toString() { result = any(UnknownType type).toString() }
+
+ final override IRUnknownType getIRType() { any() }
+
+ final override predicate hasType(Type type, boolean isGLValue) {
+ type instanceof UnknownType and isGLValue = false
+ }
+}
+
+/**
+ * Gets the single instance of `CppUnknownType`.
+ */
+CppUnknownType getUnknownType() { any() }
+
+/**
+ * Gets the `CppType` that represents a prvalue of type `void`.
+ */
+CppPRValueType getVoidType() { exists(VoidType voidType | result.hasType(voidType, false)) }
+
+/**
+ * Gets the `CppType` that represents a prvalue of type `type`.
+ */
+CppType getTypeForPRValue(Type type) {
+ if type instanceof UnknownType
+ then result instanceof CppUnknownType
+ else result.hasType(type, false)
+}
+
+/**
+ * Gets the `CppType` that represents a prvalue of type `type`, if such a `CppType` exists.
+ * Otherwise, gets `CppUnknownType`.
+ */
+CppType getTypeForPRValueOrUnknown(Type type) {
+ result = getTypeForPRValue(type)
+ or
+ not exists(getTypeForPRValue(type)) and result = getUnknownType()
+}
+
+/**
+ * Gets the `CppType` that represents a glvalue of type `type`.
+ */
+CppGLValueAddressType getTypeForGLValue(Type type) { result.hasType(type, true) }
+
+/**
+ * Gets the `CppType` that represents a prvalue of type `int`.
+ */
+CppPRValueType getIntType() {
+ exists(IntType type |
+ type.isImplicitlySigned() and
+ result.hasType(type, false)
+ )
+}
+
+/**
+ * Gets the `CppType` that represents a prvalue of type `bool`.
+ */
+CppPRValueType getBoolType() { exists(BoolType type | result.hasType(type, false)) }
+
+/**
+ * Gets the `CppType` that represents a glvalue of type `bool`.
+ */
+CppType getBoolGLValueType() { exists(BoolType type | result.hasType(type, true)) }
+
+/**
+ * Gets the `CppType` that represents a glvalue of function type.
+ */
+CppFunctionGLValueType getFunctionGLValueType() { any() }
+
+/**
+ * Gets the `CppType` that represents a opaque of unknown type with size `byteSize`.
+ */
+CppUnknownOpaqueType getUnknownOpaqueType(int byteSize) { result.getByteSize() = byteSize }
+
+/**
+ * Gets the `CppType` that is the canonical type for an `IRBooleanType` with the specified
+ * `byteSize`.
+ */
+CppWrappedType getCanonicalBooleanType(int byteSize) {
+ exists(BoolType type | result = TPRValueType(type) and byteSize = type.getSize())
+}
+
+/**
+ * Compute the sorting priority of an `IntegralType` based on its signedness.
+ */
+private int getSignPriority(IntegralType type) {
+ // Explicitly unsigned types sort first. Explicitly signed types sort last. Types with no explicit
+ // signedness sort in between. This lets us always choose `int` over `signed int`, while also
+ // choosing `unsigned char`+`char` when `char` is signed, and `unsigned char`+`signed char` when
+ // `char` is unsigned.
+ if type.isExplicitlyUnsigned()
+ then result = 2
+ else
+ if type.isExplicitlySigned()
+ then result = 0
+ else result = 1
+}
+
+/**
+ * Gets the sort priority of an `IntegralType` based on its kind.
+ */
+private int getKindPriority(IntegralType type) {
+ // `CharType` sorts lower so that we prefer the plain integer types when they have the same size
+ // as a `CharType`.
+ if type instanceof CharType then result = 0 else result = 1
+}
+
+/**
+ * Gets the `CppType` that is the canonical type for an `IRSignedIntegerType` with the specified
+ * `byteSize`.
+ */
+CppPRValueType getCanonicalSignedIntegerType(int byteSize) {
+ result =
+ TPRValueType(max(IntegralType type |
+ type.isSigned() and type.getSize() = byteSize
+ |
+ type order by getKindPriority(type), getSignPriority(type), type.toString() desc
+ ))
+}
+
+/**
+ * Gets the `CppType` that is the canonical type for an `IRUnsignedIntegerType` with the specified
+ * `byteSize`.
+ */
+CppPRValueType getCanonicalUnsignedIntegerType(int byteSize) {
+ result =
+ TPRValueType(max(IntegralType type |
+ type.isUnsigned() and type.getSize() = byteSize
+ |
+ type order by getKindPriority(type), getSignPriority(type), type.toString() desc
+ ))
+}
+
+/**
+ * Gets the sort priority of a `RealNumberType` base on its precision.
+ */
+private int getPrecisionPriority(RealNumberType type) {
+ // Prefer `double`, `float`, `long double` in that order.
+ if type instanceof DoubleType
+ then result = 4
+ else
+ if type instanceof FloatType
+ then result = 3
+ else
+ if type instanceof LongDoubleType
+ then result = 2
+ else
+ // If we get this far, prefer non-extended-precision types.
+ if not type.isExtendedPrecision()
+ then result = 1
+ else result = 0
+}
+
+/**
+ * Gets the `CppType` that is the canonical type for an `IRFloatingPointType` with the specified
+ * size, base, and type domain.
+ */
+CppPRValueType getCanonicalFloatingPointType(int byteSize, int base, TypeDomain domain) {
+ result =
+ TPRValueType(max(FloatingPointType type |
+ type.getSize() = byteSize and
+ type.getBase() = base and
+ type.getDomain() = domain
+ |
+ type order by getPrecisionPriority(type.getRealType()), type.toString() desc
+ ))
+}
+
+/**
+ * Gets the `CppType` that is the canonical type for an `IRAddressType` with the specified
+ * `byteSize`.
+ */
+CppPRValueType getCanonicalAddressType(int byteSize) {
+ // We just use `NullPointerType`, since it should be unique.
+ exists(NullPointerType type |
+ type.getSize() = byteSize and
+ result = TPRValueType(type)
+ )
+}
+
+/**
+ * Gets the `CppType` that is the canonical type for an `IRFunctionAddressType` with the specified
+ * `byteSize`.
+ */
+CppFunctionGLValueType getCanonicalFunctionAddressType(int byteSize) {
+ result.getByteSize() = byteSize
+}
+
+/**
+ * Gets the `CppType` that is the canonical type for `IRErrorType`.
+ */
+CppPRValueType getCanonicalErrorType() { result = TPRValueType(any(ErroneousType type)) }
+
+/**
+ * Gets the `CppType` that is the canonical type for `IRUnknownType`.
+ */
+CppUnknownType getCanonicalUnknownType() { any() }
+
+/**
+ * Gets the `CppType` that is the canonical type for `IRVoidType`.
+ */
+CppPRValueType getCanonicalVoidType() { result = TPRValueType(any(VoidType type)) }
+
+/**
+ * Gets the `CppType` that is the canonical type for an `IROpaqueType` with the specified `tag` and
+ * `byteSize`.
+ */
+CppType getCanonicalOpaqueType(Type tag, int byteSize) {
+ isOpaqueType(tag) and
+ result = TPRValueType(tag.getUnspecifiedType()) and
+ getTypeSize(tag) = byteSize
+ or
+ tag instanceof UnknownType and result = getUnknownOpaqueType(byteSize)
+}
+
+/**
+ * Gets a string that uniquely identifies an `IROpaqueType` tag. This may be different from the usual
+ * `toString()` of the tag in order to ensure uniqueness.
+ */
+string getOpaqueTagIdentityString(Type tag) {
+ hasOpaqueType(tag, _) and
+ result = getTypeIdentityString(tag)
+}
+
+module LanguageTypeConsistency {
+ /**
+ * Consistency query to detect C++ `Type` objects which have no corresponding `CppType` object.
+ */
+ query predicate missingCppType(Type type, string message) {
+ not exists(getTypeForPRValue(type)) and
+ exists(type.getSize()) and
+ // `ProxyClass`es have a size, but only appear in uninstantiated templates
+ not type instanceof ProxyClass and
+ message = "Type does not have an associated `CppType`."
+ }
+}
diff --git a/repo-tests/codeql/cpp/ql/lib/semmle/code/cpp/ir/internal/IRCppLanguage.qll b/repo-tests/codeql/cpp/ql/lib/semmle/code/cpp/ir/internal/IRCppLanguage.qll
new file mode 100644
index 00000000000..f047d6c4753
--- /dev/null
+++ b/repo-tests/codeql/cpp/ql/lib/semmle/code/cpp/ir/internal/IRCppLanguage.qll
@@ -0,0 +1,108 @@
+private import cpp as Cpp
+private import semmle.code.cpp.Print as Print
+private import IRUtilities
+private import semmle.code.cpp.ir.implementation.IRType
+private import semmle.code.cpp.ir.implementation.raw.internal.IRConstruction as IRConstruction
+import CppType
+
+class LanguageType = CppType;
+
+class OpaqueTypeTag = Cpp::Type;
+
+class TypeDomain = Cpp::TypeDomain;
+
+class RealDomain = Cpp::RealDomain;
+
+class ComplexDomain = Cpp::ComplexDomain;
+
+class ImaginaryDomain = Cpp::ImaginaryDomain;
+
+class Function = Cpp::Function;
+
+class Location = Cpp::Location;
+
+class UnknownLocation = Cpp::UnknownLocation;
+
+class UnknownDefaultLocation = Cpp::UnknownDefaultLocation;
+
+class File = Cpp::File;
+
+class AST = Cpp::Locatable;
+
+class Type = Cpp::Type;
+
+class UnknownType = Cpp::UnknownType;
+
+class VoidType = Cpp::VoidType;
+
+class IntegralType = Cpp::IntegralType;
+
+class FloatingPointType = Cpp::FloatingPointType;
+
+// REVIEW: May need to synthesize this for other languages. Or do we really need it at all?
+class ClassDerivation = Cpp::ClassDerivation;
+
+class StringLiteral = Cpp::StringLiteral;
+
+class Variable = Cpp::Variable;
+
+class AutomaticVariable = Cpp::StackVariable;
+
+class StaticVariable = Cpp::Variable;
+
+class Parameter = Cpp::Parameter;
+
+class Field = Cpp::Field;
+
+class BuiltInOperation = Cpp::BuiltInOperation;
+
+// TODO: Remove necessity for these.
+class Expr = Cpp::Expr;
+
+class Class = Cpp::Class; // Used for inheritance conversions
+
+predicate getIdentityString = Print::getIdentityString/1;
+
+predicate hasCaseEdge(string minValue, string maxValue) {
+ exists(Cpp::SwitchCase switchCase | hasCaseEdge(switchCase, minValue, maxValue))
+}
+
+predicate hasPositionalArgIndex(int argIndex) {
+ exists(Cpp::FunctionCall call | exists(call.getArgument(argIndex))) or
+ exists(Cpp::BuiltInOperation op | exists(op.getChild(argIndex)))
+}
+
+predicate hasAsmOperandIndex(int operandIndex) {
+ exists(Cpp::AsmStmt asm | exists(asm.getChild(operandIndex)))
+}
+
+int getTypeSize(Type type) { result = type.getSize() }
+
+int getPointerSize() { exists(Cpp::NullPointerType nullptr | result = nullptr.getSize()) }
+
+predicate isVariableAutomatic(Cpp::StackVariable var) { any() }
+
+string getStringLiteralText(StringLiteral s) {
+ result = s.getValueText().replaceAll("\n", " ").replaceAll("\r", "").replaceAll("\t", " ")
+}
+
+predicate hasPotentialLoop(Function f) {
+ exists(Cpp::Loop l | l.getEnclosingFunction() = f) or
+ exists(Cpp::GotoStmt s | s.getEnclosingFunction() = f)
+}
+
+predicate hasGoto(Function f) { exists(Cpp::GotoStmt s | s.getEnclosingFunction() = f) }
+
+/**
+ * Gets the offset of field `field` in bits.
+ */
+int getFieldBitOffset(Field field) {
+ if field instanceof Cpp::BitField
+ then result = (field.getByteOffset() * 8) + field.(Cpp::BitField).getBitOffset()
+ else result = field.getByteOffset() * 8
+}
+
+/**
+ * Holds if the specified `Function` can be overridden in a derived class.
+ */
+predicate isFunctionVirtual(Function f) { f.isVirtual() }
diff --git a/repo-tests/codeql/cpp/ql/lib/semmle/code/cpp/ir/internal/IRUtilities.qll b/repo-tests/codeql/cpp/ql/lib/semmle/code/cpp/ir/internal/IRUtilities.qll
new file mode 100644
index 00000000000..6b2b4c918af
--- /dev/null
+++ b/repo-tests/codeql/cpp/ql/lib/semmle/code/cpp/ir/internal/IRUtilities.qll
@@ -0,0 +1,45 @@
+private import cpp
+
+/**
+ * Given a type, get the type that would result by applying "pointer decay".
+ * A function type becomes a pointer to that function type, and an array type
+ * becomes a pointer to the element type of the array. If the specified type
+ * is not subject to pointer decay, this predicate does not hold.
+ */
+private Type getDecayedType(Type type) {
+ result.(FunctionPointerType).getBaseType() = type.(RoutineType) or
+ result.(PointerType).getBaseType() = type.(ArrayType).getBaseType()
+}
+
+/**
+ * Get the actual type of the specified variable, as opposed to the declared type.
+ * This returns the type of the variable after any pointer decay is applied, and
+ * after any unsized array type has its size inferred from the initializer.
+ */
+Type getVariableType(Variable v) {
+ exists(Type declaredType |
+ declaredType = v.getUnspecifiedType() and
+ if v instanceof Parameter
+ then
+ result = getDecayedType(declaredType)
+ or
+ not exists(getDecayedType(declaredType)) and result = v.getType()
+ else
+ if declaredType instanceof ArrayType and not declaredType.(ArrayType).hasArraySize()
+ then
+ result = v.getInitializer().getExpr().getType()
+ or
+ not exists(v.getInitializer()) and result = v.getType()
+ else result = v.getType()
+ )
+}
+
+/**
+ * Holds if the database contains a `case` label with the specified minimum and maximum value.
+ */
+predicate hasCaseEdge(SwitchCase switchCase, string minValue, string maxValue) {
+ minValue = switchCase.getExpr().getFullyConverted().getValue() and
+ if exists(switchCase.getEndExpr())
+ then maxValue = switchCase.getEndExpr().getFullyConverted().getValue()
+ else maxValue = minValue
+}
diff --git a/repo-tests/codeql/cpp/ql/lib/semmle/code/cpp/ir/internal/IntegerConstant.qll b/repo-tests/codeql/cpp/ql/lib/semmle/code/cpp/ir/internal/IntegerConstant.qll
new file mode 100644
index 00000000000..4af31745ab2
--- /dev/null
+++ b/repo-tests/codeql/cpp/ql/lib/semmle/code/cpp/ir/internal/IntegerConstant.qll
@@ -0,0 +1,236 @@
+/**
+ * Provides predicates for manipulating integer constants that are tracked by constant folding and
+ * similar analyses.
+ */
+
+/**
+ * An alias used to represent the constant value of an integer, if one can be determined. If no
+ * single constant value can be determined, or if the constant value is out of the representable
+ * range, it will be represented as the special value `unknown()`. This allows `IntValue` to be used
+ * in contexts where there must always be a value for the `IntValue`, even if no constant value is
+ * known.
+ */
+class IntValue = int;
+
+/**
+ * Returns the value of the maximum representable integer.
+ */
+int maxValue() { result = 2147483647 }
+
+/**
+ * Returns the value of the minimum representable integer.
+ */
+int minValue() { result = -2147483647 }
+
+/**
+ * Returns a value representing an unknown integer.
+ */
+IntValue unknown() { result = -2147483648 }
+
+/**
+ * Holds if `n` has a known value.
+ */
+bindingset[n]
+predicate hasValue(IntValue n) { n != unknown() }
+
+/**
+ * Returns a string representation of `n`. If `n` does not have a known value, the result is "??".
+ */
+bindingset[n]
+string intValueToString(IntValue n) { if hasValue(n) then result = n.toString() else result = "??" }
+
+/**
+ * Holds if the value `f` is within the range of representable integers.
+ */
+bindingset[f]
+pragma[inline]
+private predicate isRepresentable(float f) { f >= minValue() and f <= maxValue() }
+
+/**
+ * Gets the value of `n`. Holds only if `n` has a known value.
+ */
+bindingset[n]
+int getValue(IntValue n) { hasValue(n) and result = n }
+
+/**
+ * Returns `a + b`. If either input is unknown, or if the addition overflows,
+ * the result is unknown.
+ */
+bindingset[a, b]
+IntValue add(IntValue a, IntValue b) {
+ if hasValue(a) and hasValue(b) and isRepresentable(a.(float) + b.(float))
+ then result = a + b
+ else result = unknown()
+}
+
+/**
+ * Returns `a - b`. If either input is unknown, or if the subtraction overflows,
+ * the result is unknown.
+ */
+bindingset[a, b]
+IntValue sub(IntValue a, IntValue b) {
+ if hasValue(a) and hasValue(b) and isRepresentable(a.(float) - b.(float))
+ then result = a - b
+ else result = unknown()
+}
+
+/**
+ * Returns `a * b`. If the multiplication overflows, the result is unknown. If
+ * either input is unknown and the other input is non-zero, the result is
+ * unknown.
+ */
+bindingset[a, b]
+IntValue mul(IntValue a, IntValue b) {
+ if a = 0 or b = 0
+ then result = 0
+ else
+ if hasValue(a) and hasValue(b) and isRepresentable(a.(float) * b.(float))
+ then result = a * b
+ else result = unknown()
+}
+
+/**
+ * Returns `a / b`. If either input is unknown, or if `b` is zero, the result is
+ * unknown.
+ */
+bindingset[a, b]
+IntValue div(IntValue a, IntValue b) {
+ // Normally, integer division has to worry about overflow for INT_MIN/-1.
+ // However, since we use INT_MIN to represent an unknown value anyway, we only
+ // have to worry about division by zero.
+ if hasValue(a) and hasValue(b) and b != 0 then result = a / b else result = unknown()
+}
+
+/**
+ * Returns `a == b`. If either input is unknown, the result is unknown.
+ */
+bindingset[a, b]
+IntValue compareEQ(IntValue a, IntValue b) {
+ if hasValue(a) and hasValue(b)
+ then if a = b then result = 1 else result = 0
+ else result = unknown()
+}
+
+/**
+ * Returns `a != b`. If either input is unknown, the result is unknown.
+ */
+bindingset[a, b]
+IntValue compareNE(IntValue a, IntValue b) {
+ if hasValue(a) and hasValue(b)
+ then if a != b then result = 1 else result = 0
+ else result = unknown()
+}
+
+/**
+ * Returns `a < b`. If either input is unknown, the result is unknown.
+ */
+bindingset[a, b]
+IntValue compareLT(IntValue a, IntValue b) {
+ if hasValue(a) and hasValue(b)
+ then if a < b then result = 1 else result = 0
+ else result = unknown()
+}
+
+/**
+ * Returns `a > b`. If either input is unknown, the result is unknown.
+ */
+bindingset[a, b]
+IntValue compareGT(IntValue a, IntValue b) {
+ if hasValue(a) and hasValue(b)
+ then if a > b then result = 1 else result = 0
+ else result = unknown()
+}
+
+/**
+ * Returns `a <= b`. If either input is unknown, the result is unknown.
+ */
+bindingset[a, b]
+IntValue compareLE(IntValue a, IntValue b) {
+ if hasValue(a) and hasValue(b)
+ then if a <= b then result = 1 else result = 0
+ else result = unknown()
+}
+
+/**
+ * Returns `a >= b`. If either input is unknown, the result is unknown.
+ */
+bindingset[a, b]
+IntValue compareGE(IntValue a, IntValue b) {
+ if hasValue(a) and hasValue(b)
+ then if a >= b then result = 1 else result = 0
+ else result = unknown()
+}
+
+/**
+ * Return `-a`. If `a` is unknown, the result is unknown.
+ */
+bindingset[a]
+IntValue neg(IntValue a) {
+ result = -a // -INT_MIN = INT_MIN, so this preserves unknown
+}
+
+/**
+ * Holds if `a` is equal to `b`. Does not hold if either `a` or `b` is unknown.
+ */
+bindingset[a, b]
+predicate isEQ(IntValue a, IntValue b) { hasValue(a) and hasValue(b) and a = b }
+
+/**
+ * Holds if `a` is not equal to `b`. Does not hold if either `a` or `b` is unknown.
+ */
+bindingset[a, b]
+predicate isNE(IntValue a, IntValue b) { hasValue(a) and hasValue(b) and a != b }
+
+/**
+ * Holds if `a` is less than `b`. Does not hold if either `a` or `b` is unknown.
+ */
+bindingset[a, b]
+predicate isLT(IntValue a, IntValue b) { hasValue(a) and hasValue(b) and a < b }
+
+/**
+ * Holds if `a` is less than or equal to `b`. Does not hold if either `a` or `b` is unknown.
+ */
+bindingset[a, b]
+predicate isLE(IntValue a, IntValue b) { hasValue(a) and hasValue(b) and a <= b }
+
+/**
+ * Holds if `a` is greater than `b`. Does not hold if either `a` or `b` is unknown.
+ */
+bindingset[a, b]
+predicate isGT(IntValue a, IntValue b) { hasValue(a) and hasValue(b) and a > b }
+
+/**
+ * Holds if `a` is greater than or equal to `b`. Does not hold if either `a` or `b` is unknown.
+ */
+bindingset[a, b]
+predicate isGE(IntValue a, IntValue b) { hasValue(a) and hasValue(b) and a >= b }
+
+/**
+ * Converts the bit count in `bits` to a byte count and a bit count in the form
+ * "bytes:bits". If `bits` represents an integer number of bytes, the ":bits" section is omitted.
+ * If `bits` does not have a known value, the result is "?".
+ */
+bindingset[bits]
+string bitsToBytesAndBits(IntValue bits) {
+ exists(int bytes, int leftoverBits |
+ hasValue(bits) and
+ bytes = bits / 8 and
+ leftoverBits = bits % 8 and
+ if leftoverBits = 0 then result = bytes.toString() else result = bytes + ":" + leftoverBits
+ )
+ or
+ not hasValue(bits) and result = "?"
+}
+
+/**
+ * Gets a printable string for a bit offset with possibly unknown value.
+ */
+bindingset[bitOffset]
+string getBitOffsetString(IntValue bitOffset) {
+ if hasValue(bitOffset)
+ then
+ if bitOffset >= 0
+ then result = "+" + bitsToBytesAndBits(bitOffset)
+ else result = "-" + bitsToBytesAndBits(neg(bitOffset))
+ else result = "+?"
+}
diff --git a/repo-tests/codeql/cpp/ql/lib/semmle/code/cpp/ir/internal/IntegerInterval.qll b/repo-tests/codeql/cpp/ql/lib/semmle/code/cpp/ir/internal/IntegerInterval.qll
new file mode 100644
index 00000000000..4f8f4b4e672
--- /dev/null
+++ b/repo-tests/codeql/cpp/ql/lib/semmle/code/cpp/ir/internal/IntegerInterval.qll
@@ -0,0 +1,35 @@
+/**
+ * Support for integer intervals.
+ * An interval is represented as by its inclusive lower bound, `start`, and its exclusive upper bound, `end`.
+ * Either or both of `start` and `end` may have an unknown value.
+ */
+
+import Overlap
+private import IntegerConstant
+
+/**
+ * Gets the overlap relationship between the definition interval [`defStart`, `defEnd`) and the use interval
+ * [`useStart`, `useEnd`).
+ */
+bindingset[defStart, defEnd, useStart, useEnd]
+Overlap getOverlap(IntValue defStart, IntValue defEnd, IntValue useStart, IntValue useEnd) {
+ if isEQ(defStart, useStart) and isEQ(defEnd, useEnd)
+ then result instanceof MustExactlyOverlap
+ else
+ if isLE(defStart, useStart) and isGE(defEnd, useEnd)
+ then result instanceof MustTotallyOverlap
+ else (
+ not isLE(defEnd, useStart) and
+ not isGE(defStart, useEnd) and
+ result instanceof MayPartiallyOverlap
+ )
+}
+
+/**
+ * Gets a string representation of the interval [`start`, `end`).
+ */
+bindingset[start, end]
+string getIntervalString(IntValue start, IntValue end) {
+ // We represent an interval has half-open, so print it as "[start..end)".
+ result = "[" + bitsToBytesAndBits(start) + ".." + bitsToBytesAndBits(end) + ")"
+}
diff --git a/repo-tests/codeql/cpp/ql/lib/semmle/code/cpp/ir/internal/IntegerPartial.qll b/repo-tests/codeql/cpp/ql/lib/semmle/code/cpp/ir/internal/IntegerPartial.qll
new file mode 100644
index 00000000000..0e24f283b17
--- /dev/null
+++ b/repo-tests/codeql/cpp/ql/lib/semmle/code/cpp/ir/internal/IntegerPartial.qll
@@ -0,0 +1,99 @@
+/**
+ * Provides basic arithmetic operations that have no result if their result
+ * would overflow a 32-bit two's complement integer.
+ */
+
+/**
+ * Gets the value of the maximum representable integer.
+ */
+int maxValue() { result = 2147483647 }
+
+/**
+ * Gets the value of the minimum representable integer.
+ */
+int minValue() { result = -2147483648 }
+
+/**
+ * Holds if the value `f` is within the range of representable integers.
+ */
+bindingset[f]
+pragma[inline]
+private predicate isRepresentable(float f) { f >= minValue() and f <= maxValue() }
+
+/**
+ * Returns `a + b`. If the addition overflows, there is no result.
+ */
+bindingset[a, b]
+int add(int a, int b) {
+ isRepresentable(a.(float) + b.(float)) and
+ result = a + b
+}
+
+/**
+ * Returns `a - b`. If the subtraction overflows, there is no result.
+ */
+bindingset[a, b]
+int sub(int a, int b) {
+ isRepresentable(a.(float) - b.(float)) and
+ result = a - b
+}
+
+/**
+ * Returns `a * b`. If the multiplication overflows, there is no result. If
+ * either input is not given, and the other input is non-zero, there is no
+ * result.
+ */
+bindingset[a, b]
+int mul(int a, int b) {
+ a = 0 and
+ result = 0
+ or
+ b = 0 and
+ result = 0
+ or
+ isRepresentable(a.(float) * b.(float)) and
+ result = a * b
+}
+
+/**
+ * Returns `a / b`. If the division overflows, there is no result.
+ */
+bindingset[a, b]
+int div(int a, int b) {
+ b != 0 and
+ (a != minValue() or b != -1) and
+ result = a / b
+}
+
+/** Returns `a == b`. */
+bindingset[a, b]
+int compareEQ(int a, int b) { if a = b then result = 1 else result = 0 }
+
+/** Returns `a != b`. */
+bindingset[a, b]
+int compareNE(int a, int b) { if a != b then result = 1 else result = 0 }
+
+/** Returns `a < b`. */
+bindingset[a, b]
+int compareLT(int a, int b) { if a < b then result = 1 else result = 0 }
+
+/** Returns `a > b`. */
+bindingset[a, b]
+int compareGT(int a, int b) { if a > b then result = 1 else result = 0 }
+
+/** Returns `a <= b`. */
+bindingset[a, b]
+int compareLE(int a, int b) { if a <= b then result = 1 else result = 0 }
+
+/** Returns `a >= b`. */
+bindingset[a, b]
+int compareGE(int a, int b) { if a >= b then result = 1 else result = 0 }
+
+/**
+ * Returns `-a`. If the negation would overflow, there is no result.
+ */
+bindingset[a]
+int neg(int a) {
+ a != minValue() and
+ result = -a
+}
diff --git a/repo-tests/codeql/cpp/ql/lib/semmle/code/cpp/ir/internal/Overlap.qll b/repo-tests/codeql/cpp/ql/lib/semmle/code/cpp/ir/internal/Overlap.qll
new file mode 100644
index 00000000000..ca643b56cbb
--- /dev/null
+++ b/repo-tests/codeql/cpp/ql/lib/semmle/code/cpp/ir/internal/Overlap.qll
@@ -0,0 +1,70 @@
+private newtype TOverlap =
+ TMayPartiallyOverlap() or
+ TMustTotallyOverlap() or
+ TMustExactlyOverlap()
+
+/**
+ * Represents a possible overlap between two memory ranges.
+ */
+abstract class Overlap extends TOverlap {
+ abstract string toString();
+
+ /**
+ * Gets a value representing how precise this overlap is. The higher the value, the more precise
+ * the overlap. The precision values are ordered as
+ * follows, from most to least precise:
+ * `MustExactlyOverlap`
+ * `MustTotallyOverlap`
+ * `MayPartiallyOverlap`
+ */
+ abstract int getPrecision();
+}
+
+/**
+ * Represents a partial overlap between two memory ranges, which may or may not
+ * actually occur in practice.
+ */
+class MayPartiallyOverlap extends Overlap, TMayPartiallyOverlap {
+ final override string toString() { result = "MayPartiallyOverlap" }
+
+ final override int getPrecision() { result = 0 }
+}
+
+/**
+ * Represents an overlap in which the first memory range is known to include all
+ * bits of the second memory range, but may be larger or have a different type.
+ */
+class MustTotallyOverlap extends Overlap, TMustTotallyOverlap {
+ final override string toString() { result = "MustTotallyOverlap" }
+
+ final override int getPrecision() { result = 1 }
+}
+
+/**
+ * Represents an overlap between two memory ranges that have the same extent and
+ * the same type.
+ */
+class MustExactlyOverlap extends Overlap, TMustExactlyOverlap {
+ final override string toString() { result = "MustExactlyOverlap" }
+
+ final override int getPrecision() { result = 2 }
+}
+
+/**
+ * Gets the `Overlap` that best represents the relationship between two memory locations `a` and
+ * `c`, where `getOverlap(a, b) = previousOverlap` and `getOverlap(b, c) = newOverlap`, for some
+ * intermediate memory location `b`.
+ */
+Overlap combineOverlap(Overlap previousOverlap, Overlap newOverlap) {
+ // Note that it's possible that two less precise overlaps could combine to result in a more
+ // precise overlap. For example, both `previousOverlap` and `newOverlap` could be
+ // `MustTotallyOverlap` even though the actual relationship between `a` and `c` is
+ // `MustExactlyOverlap`. We will still return `MustTotallyOverlap` as the best conservative
+ // approximation we can make without additional input information.
+ result =
+ min(Overlap overlap |
+ overlap = [previousOverlap, newOverlap]
+ |
+ overlap order by overlap.getPrecision()
+ )
+}
diff --git a/repo-tests/codeql/cpp/ql/lib/semmle/code/cpp/ir/internal/TempVariableTag.qll b/repo-tests/codeql/cpp/ql/lib/semmle/code/cpp/ir/internal/TempVariableTag.qll
new file mode 100644
index 00000000000..e9fd299da45
--- /dev/null
+++ b/repo-tests/codeql/cpp/ql/lib/semmle/code/cpp/ir/internal/TempVariableTag.qll
@@ -0,0 +1,24 @@
+newtype TTempVariableTag =
+ ConditionValueTempVar() or
+ ReturnValueTempVar() or
+ ThrowTempVar() or
+ LambdaTempVar() or
+ EllipsisTempVar() or
+ ThisTempVar() or
+ TempObjectTempVar()
+
+string getTempVariableTagId(TTempVariableTag tag) {
+ tag = ConditionValueTempVar() and result = "CondVal"
+ or
+ tag = ReturnValueTempVar() and result = "Ret"
+ or
+ tag = ThrowTempVar() and result = "Throw"
+ or
+ tag = LambdaTempVar() and result = "Lambda"
+ or
+ tag = EllipsisTempVar() and result = "Ellipsis"
+ or
+ tag = ThisTempVar() and result = "This"
+ or
+ tag = TempObjectTempVar() and result = "Temp"
+}
diff --git a/repo-tests/codeql/cpp/ql/lib/semmle/code/cpp/metrics/MetricClass.qll b/repo-tests/codeql/cpp/ql/lib/semmle/code/cpp/metrics/MetricClass.qll
new file mode 100644
index 00000000000..33a256ce3e5
--- /dev/null
+++ b/repo-tests/codeql/cpp/ql/lib/semmle/code/cpp/metrics/MetricClass.qll
@@ -0,0 +1,604 @@
+import cpp
+
+/**
+ * A wrapper that provides metrics for a C++ class.
+ */
+class MetricClass extends Class {
+ /**
+ * Gets the nesting level of this class. A class that is _not_ nested
+ * directly inside another class has nesting level 0.
+ */
+ int getNestingLevel() {
+ if not this instanceof NestedClass
+ then result = 0
+ else result = this.(NestedClass).getDeclaringType().(MetricClass).getNestingLevel() + 1
+ }
+
+ /**
+ * Gets the length of *some* path to a root of the hierarchy. A class with no
+ * base class has depth 0.
+ */
+ int getADepth() {
+ not this.getABaseClass+() = this and
+ if not exists(this.getABaseClass())
+ then result = 0
+ else result = this.getABaseClass().(MetricClass).getADepth() + 1
+ }
+
+ /**
+ * Gets the maximum depth of inheritance of this class. A class with no base
+ * class has depth 0.
+ */
+ int getInheritanceDepth() { result = max(this.getADepth()) }
+
+ /** Gets the number of member functions in this class. */
+ int getNumberOfMemberFunctions() {
+ result = count(MemberFunction mf | mf.getDeclaringType() = this)
+ }
+
+ /** Gets the number of nested classes defined in this class. */
+ int getNumberOfNestedClasses() { result = count(NestedClass nc | nc.getDeclaringType() = this) }
+
+ /** Gets the number of non-static data members defined in this class. */
+ int getNumberOfFields() { result = count(Field f | f.getDeclaringType() = this) }
+
+ /** Gets the total number of members defined in this class. */
+ int getNumberOfMembers() { result = count(Declaration m | m.getDeclaringType() = this) }
+
+ /** Gets the number of incoming class dependencies. */
+ int getAfferentCoupling() { result = count(MetricClass that | that.getAClassDependency() = this) }
+
+ /** Gets the number of outgoing class dependencies. */
+ int getEfferentCoupling() { result = count(MetricClass that | this.getAClassDependency() = that) }
+
+ /** Gets the number of outgoing source class dependencies. */
+ int getEfferentSourceCoupling() {
+ result = count(MetricClass that | this.getAClassDependency() = that and that.fromSource())
+ }
+
+ /** Gets a class dependency of this element. */
+ Class getAClassDependency() { dependsOnClassSimple(this, result) }
+
+ /*
+ * -------- HENDERSON-SELLERS LACK OF COHESION IN METHODS --------
+ *
+ * The aim of this metric is to try and determine whether a class
+ * represents one abstraction (good) or multiple abstractions (bad).
+ * If a class represents multiple abstractions, it should be split
+ * up into multiple classes.
+ *
+ * In the Henderson-Sellers method, this is measured as follows:
+ * M = set of methods in class
+ * F = set of fields in class
+ * r(f) = number of methods that access field f
+ * = mean of r(f) over f in F
+ * The lack of cohesion is then given by
+ *
+ * - |M|
+ * ---------
+ * 1 - |M|
+ *
+ * We follow the Eclipse metrics plugin by restricting M to methods
+ * that access some field in the same class, and restrict F to
+ * fields that are read by methods in the same class.
+ *
+ * Classes where the value of this metric is higher than 0.9 ought
+ * to be scrutinised for possible splitting. Here is a query
+ * to find such classes:
+ *
+ * from MetricRefType t, float loc
+ * where loc = t.getLackOfCohesionHS() and loc > 0.9
+ * select t, loc order by loc desc
+ */
+
+ /** Holds if `func` accesses field `f` defined in the same type. */
+ predicate accessesLocalField(Function func, Field f) {
+ func.accesses(f) and
+ this.getAMemberFunction() = func and
+ f.getDeclaringType() = this
+ }
+
+ /** Gets any method that accesses some local field. */
+ Function getAccessingMethod() { exists(Field f | this.accessesLocalField(result, f)) }
+
+ /** Gets any field that is accessed by a local method. */
+ Field getAccessedField() { exists(Function func | this.accessesLocalField(func, result)) }
+
+ /** Gets the Henderson-Sellers lack-of-cohesion metric. */
+ float getLackOfCohesionHS() {
+ exists(int m, float r |
+ // m = number of methods that access some field
+ m = count(this.getAccessingMethod()) and
+ // r = average (over f) of number of methods that access field f
+ r =
+ avg(Field f |
+ f = this.getAccessedField()
+ |
+ count(Function x | this.accessesLocalField(x, f))
+ ) and
+ // avoid division by zero
+ m != 1 and
+ // compute LCOM
+ result = ((r - m) / (1 - m))
+ )
+ }
+
+ /*
+ * -------- CHIDAMBER AND KEMERER LACK OF COHESION IN METHODS ------------
+ *
+ * The aim of this metric is to try and determine whether a class
+ * represents one abstraction (good) or multiple abstractions (bad).
+ * If a class represents multiple abstractions, it should be split
+ * up into multiple classes.
+ *
+ * In the Chidamber and Kemerer method, this is measured as follows:
+ * n1 = number of pairs of distinct methods in a class that do *not*
+ * have at least one commonly accessed field
+ * n2 = number of pairs of distinct methods in a class that do
+ * have at least one commonly accessed field
+ * lcom = ((n1 - n2)/2 max 0)
+ *
+ * We divide by 2 because each pair (m1,m2) is counted twice in n1 and n2.
+ */
+
+ /** Holds if `f` should be excluded from the CK cohesion computation. */
+ predicate ignoreLackOfCohesionCK(Function f) {
+ none() // by default, nothing is ignored
+ }
+
+ /** Holds if `m1` and `m2` are distinct member functions of this class. */
+ predicate distinctMembers(MemberFunction m1, MemberFunction m2) {
+ m1.getDeclaringType() = this and
+ m2.getDeclaringType() = this and
+ m1 != m2
+ }
+
+ /**
+ * Holds if `m1` and `m2` are distinct member functions of this class that
+ * both access a common field.
+ */
+ predicate shareField(MemberFunction m1, MemberFunction m2) {
+ exists(Field f |
+ m1.accesses(f) and
+ m1.getDeclaringType() = this and
+ m2.accesses(f) and
+ m2.getDeclaringType() = this
+ ) and
+ m1 != m2
+ }
+
+ /** Gets the Chidamber and Kemerer lack-of-cohesion metric. */
+ float getLackOfCohesionCK() {
+ exists(int n1, int n2, float n |
+ n1 =
+ count(MemberFunction m1, MemberFunction m2 |
+ not this.ignoreLackOfCohesionCK(m1) and
+ not this.ignoreLackOfCohesionCK(m2) and
+ this.distinctMembers(m1, m2) and
+ not this.shareField(m1, m2)
+ ) and
+ n2 = count(MemberFunction m1, MemberFunction m2 | this.shareField(m1, m2)) and
+ n = (n1 - n2) / 2.0 and
+ (
+ n < 0 and result = 0
+ or
+ n >= 0 and result = n
+ )
+ )
+ }
+
+ /*
+ * ----------------- RESPONSE FOR A CLASS ---------------------------------
+ */
+
+ /**
+ * Gets the _response_ for this class. This estimates the number of
+ * different functions that can be executed when a function is invoked on
+ * this class.
+ */
+ int getResponse() {
+ result =
+ sum(MemberFunction f |
+ f.getDeclaringType() = this
+ |
+ count(Call call | call.getEnclosingFunction() = f)
+ )
+ }
+
+ /*
+ * ----------------- SPECIALIZATION INDEX --------------------------------
+ */
+
+ /**
+ * Gets a function that should be excluded when reporting the number of
+ * overriding methods. By default, no functions are excluded.
+ */
+ predicate ignoreOverride(MemberFunction m) { none() }
+
+ /** Gets some method that overrides a non-abstract method in a base class. */
+ MemberFunction getOverrides() {
+ this.getAMemberFunction() = result and
+ exists(MemberFunction c |
+ result.overrides(c) and
+ not c instanceof PureVirtualFunction
+ ) and
+ not this.ignoreOverride(result)
+ }
+
+ /** Gets the number of methods that are overridden by this class (NORM). */
+ int getNumberOverridden() { result = count(this.getOverrides()) }
+
+ /**
+ * Gets the _specialization index_ of this class.
+ *
+ * The specialization index metric measures the extent to which derived
+ * classes override (replace) the behavior of their base classes. If they
+ * override many methods, it is an indication that the original abstraction
+ * in the base classes may have been inappropriate. On the whole, derived
+ * classes should add behavior to their base classes, but not alter that
+ * behavior dramatically.
+ */
+ float getSpecialisationIndex() {
+ this.getNumberOfMemberFunctions() != 0 and
+ result =
+ (this.getNumberOverridden() * this.getInheritanceDepth()) /
+ this.getNumberOfMemberFunctions().(float)
+ }
+
+ /*
+ * ----------------- HALSTEAD METRICS ------------------------------------
+ */
+
+ /**
+ * Gets the Halstead "N1" metric for this class. This is the total number of
+ * operators in the class. Operators are taken to be all operators in
+ * expressions (`+`, `*`, `&`, `->`, `=`, ...) as well as most statements.
+ */
+ int getHalsteadN1() {
+ result =
+ // account for the class itself
+ 1 +
+ sum(MetricFunction mf, int toSum |
+ mf.(MemberFunction).getDeclaringType() = this and toSum = mf.getHalsteadN1()
+ |
+ toSum
+ ) +
+ // Each member variable declaration counts once as an operator
+ count(MemberVariable mv | mv.getDeclaringType() = this) +
+ // Friend declarations
+ count(FriendDecl f | f.getDeclaringClass() = this)
+ }
+
+ /**
+ * Gets the Halstead "N2" metric for this class: this is the total number of operands.
+ * An operand is either a variable, constant, type name, class name, or function name.
+ */
+ int getHalsteadN2() {
+ result =
+ // the class itself
+ 1 +
+ sum(MetricFunction mf, int toSum |
+ mf.(MemberFunction).getDeclaringType() = this and toSum = mf.getHalsteadN2()
+ |
+ toSum
+ ) +
+ // Each variable declaration that is not in a function counts once as an operand
+ count(MemberVariable mv | mv.getDeclaringType() = this)
+ }
+
+ /**
+ * Gets an expression contained anywhere in this class: member functions (including
+ * constructors, destructors and operators), initializers...
+ */
+ Expr getAnEnclosedExpression() {
+ exists(MemberFunction mf |
+ mf.getDeclaringType() = this and
+ result.getEnclosingFunction() = mf
+ )
+ or
+ exists(MemberVariable mv |
+ mv.getDeclaringType() = this and
+ mv.getInitializer().getExpr().getAChild*() = result
+ )
+ }
+
+ /** Gets a statement in a member function of this class. */
+ Stmt getAnEnclosedStmt() {
+ result.getEnclosingFunction().(MemberFunction).getDeclaringType() = this
+ }
+
+ private string getAUsedHalsteadN1Operator() {
+ exists(CommaExpr e | e = this.getAnEnclosedExpression()) and result = "comma"
+ or
+ exists(ReferenceToExpr e | e = this.getAnEnclosedExpression()) and result = "refTo"
+ or
+ exists(PointerDereferenceExpr e | e = this.getAnEnclosedExpression()) and result = "dereference"
+ or
+ exists(CStyleCast e | e = this.getAnEnclosedExpression()) and result = "cCast"
+ or
+ exists(StaticCast e | e = this.getAnEnclosedExpression()) and result = "staticCast"
+ or
+ exists(ConstCast e | e = this.getAnEnclosedExpression()) and result = "constCast"
+ or
+ exists(ReinterpretCast e | e = this.getAnEnclosedExpression()) and result = "reinterpretCast"
+ or
+ exists(DynamicCast e | e = this.getAnEnclosedExpression()) and result = "dynamicCast"
+ or
+ exists(SizeofExprOperator e | e = this.getAnEnclosedExpression()) and result = "sizeofExpr"
+ or
+ exists(SizeofTypeOperator e | e = this.getAnEnclosedExpression()) and result = "sizeofType"
+ or
+ exists(IfStmt e | e = this.getAnEnclosedStmt()) and result = "ifVal"
+ or
+ exists(SwitchStmt e | e = this.getAnEnclosedStmt()) and result = "switchVal"
+ or
+ exists(ForStmt e | e = this.getAnEnclosedStmt()) and result = "forVal"
+ or
+ exists(DoStmt e | e = this.getAnEnclosedStmt()) and result = "doVal"
+ or
+ exists(WhileStmt e | e = this.getAnEnclosedStmt()) and result = "whileVal"
+ or
+ exists(GotoStmt e | e = this.getAnEnclosedStmt()) and result = "gotoVal"
+ or
+ exists(ContinueStmt e | e = this.getAnEnclosedStmt()) and result = "continueVal"
+ or
+ exists(BreakStmt e | e = this.getAnEnclosedStmt()) and result = "breakVal"
+ or
+ exists(ReturnStmt e | e = this.getAnEnclosedStmt()) and result = "returnVal"
+ or
+ exists(SwitchCase e | e = this.getAnEnclosedStmt()) and result = "caseVal"
+ or
+ exists(IfStmt s | s = this.getAnEnclosedStmt() and s.hasElse()) and
+ result = "elseVal"
+ or
+ exists(MemberFunction f | f.getDeclaringType() = this) and result = "function"
+ or
+ exists(FriendDecl e | e.getDeclaringClass() = this) and result = "friendDecl"
+ }
+
+ /**
+ * Gets the Halstead "n1" metric: this is the total number of distinct operators
+ * in this class. Operators are defined as in the "N1" metric (`getHalsteadN1`).
+ */
+ int getHalsteadN1Distinct() {
+ result =
+ // avoid 0 values
+ 1 +
+ count(string s |
+ exists(Operation op | op = this.getAnEnclosedExpression() and s = op.getOperator())
+ ) + count(string s | s = getAUsedHalsteadN1Operator())
+ }
+
+ /**
+ * Gets the Halstead "n2" metric: this is the number of distinct operands in this
+ * class. An operand is either a variable, constant, type name, or function name.
+ */
+ int getHalsteadN2Distinct() {
+ result =
+ // avoid 0 values
+ 1 +
+ count(string s |
+ exists(Access a | a = this.getAnEnclosedExpression() and s = a.getTarget().getName())
+ ) +
+ count(Function f |
+ exists(FunctionCall fc | fc = this.getAnEnclosedExpression() and f = fc.getTarget())
+ ) +
+ // Approximate: count declarations once more to account for the type name
+ count(Declaration d | d.getParentScope*() = this)
+ }
+
+ /**
+ * Gets the Halstead length of this class. This is the sum of the N1 and N2 Halstead metrics.
+ */
+ int getHalsteadLength() { result = this.getHalsteadN1() + this.getHalsteadN2() }
+
+ /**
+ * Gets the Halstead vocabulary size of this class. This is the sum of the n1 and n2 Halstead metrics.
+ */
+ int getHalsteadVocabulary() {
+ result = this.getHalsteadN1Distinct() + this.getHalsteadN2Distinct()
+ }
+
+ /**
+ * Gets the Halstead volume of this class. This is the Halstead size multiplied by the log of the
+ * Halstead vocabulary. It represents the information content of the class.
+ */
+ float getHalsteadVolume() {
+ result = this.getHalsteadLength().(float) * this.getHalsteadVocabulary().log2()
+ }
+
+ /**
+ * Gets the Halstead difficulty value of this class. This is proportional to the number of unique
+ * operators, and further proportional to the ratio of total operands to unique operands.
+ */
+ float getHalsteadDifficulty() {
+ result =
+ (this.getHalsteadN1Distinct() * this.getHalsteadN2()).(float) /
+ (2 * this.getHalsteadN2Distinct()).(float)
+ }
+
+ /**
+ * Gets the Halstead level of this class. This is the inverse of the _difficulty_ of the class.
+ */
+ float getHalsteadLevel() {
+ exists(float difficulty |
+ difficulty = this.getHalsteadDifficulty() and
+ if difficulty != 0.0 then result = 1.0 / difficulty else result = 0.0
+ )
+ }
+
+ /**
+ * Gets the Halstead implementation effort for this class. This is the product of the volume and difficulty.
+ */
+ float getHalsteadEffort() { result = this.getHalsteadVolume() * this.getHalsteadDifficulty() }
+
+ /**
+ * Gets the Halstead _delivered bugs_ metric for this class. This metric correlates with the complexity of
+ * the software but is known to be an underestimate of bug counts.
+ */
+ float getHalsteadDeliveredBugs() { result = this.getHalsteadEffort().pow(2.0 / 3.0) / 3000.0 }
+}
+
+pragma[noopt]
+private predicate dependsOnClassSimple(Class source, Class dest) {
+ (
+ // a class depends on the classes it inherits from
+ source.derivesFrom(dest)
+ or
+ // a nested class depends on its enclosing class
+ source.getDeclaringType() = dest and source instanceof Class
+ or
+ // a class depends on its friend classes
+ exists(FriendDecl fd | source.getAFriendDecl() = fd and fd.getFriend() = dest)
+ or
+ // a friend functions return type
+ exists(FriendDecl fd, Function f, Type t |
+ source.getAFriendDecl() = fd and fd.getFriend() = f and f.getType() = t and t.refersTo(dest)
+ )
+ or
+ // the type of the arguments to a friend function
+ exists(FriendDecl fd, Function f, Parameter p, Type t |
+ source.getAFriendDecl() = fd and
+ fd.getFriend() = f and
+ f.getAParameter() = p and
+ p.getType() = t and
+ t.refersTo(dest)
+ )
+ or
+ // a class depends on the types of its member variables
+ exists(MemberVariable v, Type t |
+ v.getDeclaringType() = source and
+ v.getType() = t and
+ t.refersTo(dest) and
+ v instanceof MemberVariable
+ )
+ or
+ // a class depends on the return types of its member functions
+ exists(MemberFunction f, Type t |
+ f.getDeclaringType() = source and
+ f instanceof MemberFunction and
+ f.getType() = t and
+ t.refersTo(dest)
+ )
+ or
+ // a class depends on the argument types of its member functions
+ exists(MemberFunction f, Parameter p, Type t |
+ f.getDeclaringType() = source and
+ f instanceof MemberFunction and
+ f.getAParameter() = p and
+ p.getType() = t and
+ t.refersTo(dest)
+ )
+ or
+ // a class depends on the base types of type def types nested in it
+ exists(NestedTypedefType t, Type td |
+ t.getDeclaringType() = source and
+ t.getBaseType() = td and
+ t instanceof NestedTypedefType and
+ td.refersTo(dest)
+ )
+ or
+ // a class depends on the type names used in a casts in functions nested in it
+ exists(Cast c, Function m, Type t |
+ m.getDeclaringType() = source and
+ m = c.getEnclosingFunction() and
+ c instanceof Cast and
+ c.getType() = t and
+ t.refersTo(dest)
+ )
+ or
+ // a class depends on the type names used in casts in initialization of member variables
+ exists(Cast c, Variable m, Type t |
+ m.getDeclaringType() = source and
+ m = c.getEnclosingVariable() and
+ c instanceof Cast and
+ c.getType() = t and
+ t.refersTo(dest)
+ )
+ or
+ // a class depends on classes for which a call to its member function is done from a function
+ exists(MemberFunction target, MemberFunction f, Locatable l |
+ f.getDeclaringType() = source and
+ f instanceof MemberFunction and
+ f.calls(target, l) and
+ target instanceof MemberFunction and
+ target.getDeclaringType() = dest
+ )
+ or
+ // a class depends on classes for which a call to its member function is done from a member variable initializer
+ exists(MemberFunction target, FunctionCall c, MemberVariable v |
+ v.getDeclaringType() = source and
+ v instanceof MemberVariable and
+ c.getEnclosingVariable() = v and
+ c instanceof FunctionCall and
+ c.getTarget() = target and
+ target instanceof MemberFunction and
+ target.getDeclaringType() = dest
+ )
+ or
+ // a class(source) depends on classes(dest) for which its member functions(mf) are accessed(fa) from a member function(f)
+ exists(MemberFunction f, FunctionAccess fa, MemberFunction mf |
+ f.getDeclaringType() = source and
+ f instanceof MemberFunction and
+ fa.getEnclosingFunction() = f and
+ fa.getTarget() = mf and
+ mf.getDeclaringType() = dest and
+ mf instanceof MemberFunction and
+ fa instanceof FunctionAccess
+ )
+ or
+ // a class depends on classes for which its member functions are accessed from a member variable initializer
+ exists(MemberVariable v, FunctionAccess fa, MemberFunction mf |
+ v.getDeclaringType() = source and
+ v instanceof MemberVariable and
+ fa.getEnclosingVariable() = v and
+ fa.getTarget() = mf and
+ mf.getDeclaringType() = dest and
+ fa instanceof FunctionAccess and
+ mf instanceof MemberFunction
+ )
+ or
+ // a class depends on classes for which its member variables are accessed from a member function
+ exists(MemberFunction f, VariableAccess va, MemberVariable mv |
+ f.getDeclaringType() = source and
+ f instanceof MemberFunction and
+ va.getEnclosingFunction() = f and
+ va instanceof VariableAccess and
+ va.getTarget() = mv and
+ mv.getDeclaringType() = dest and
+ mv instanceof MemberVariable
+ )
+ or
+ // a class depends on classes for which its member variables are accessed from a member variable initializer
+ exists(MemberVariable v, VariableAccess va, MemberVariable mv |
+ v.getDeclaringType() = source and
+ v instanceof MemberVariable and
+ va.getEnclosingVariable() = v and
+ va instanceof VariableAccess and
+ va.getTarget() = mv and
+ mv.getDeclaringType() = dest and
+ mv instanceof MemberVariable
+ )
+ or
+ // a class depends on enums for which its enum constants are accessed from a member function
+ exists(MemberFunction f, EnumConstantAccess ea, EnumConstant e |
+ f.getDeclaringType() = source and
+ f instanceof MemberFunction and
+ ea.getEnclosingFunction() = f and
+ ea.getTarget() = e and
+ e.getDeclaringEnum() = dest and
+ ea instanceof EnumConstantAccess
+ )
+ or
+ // a class depends on enums for which its enum constants are accessed from a member variable initializer
+ exists(MemberVariable v, EnumConstantAccess ea, EnumConstant e |
+ v.getDeclaringType() = source and
+ v instanceof MemberVariable and
+ ea.getEnclosingVariable() = v and
+ ea instanceof EnumConstantAccess and
+ ea.getTarget() = e and
+ e.getDeclaringEnum() = dest
+ )
+ ) and
+ dest instanceof Class
+}
diff --git a/repo-tests/codeql/cpp/ql/lib/semmle/code/cpp/metrics/MetricFile.qll b/repo-tests/codeql/cpp/ql/lib/semmle/code/cpp/metrics/MetricFile.qll
new file mode 100644
index 00000000000..f12d1011865
--- /dev/null
+++ b/repo-tests/codeql/cpp/ql/lib/semmle/code/cpp/metrics/MetricFile.qll
@@ -0,0 +1,298 @@
+import cpp
+
+/**
+ * A wrapper that provides metrics for a C/C++ file.
+ */
+class MetricFile extends File {
+ /** Gets the number of functions defined in this file. */
+ int getNumberOfTopLevelFunctions() {
+ result = count(Function f | f.isTopLevel() and f.getFile() = this)
+ }
+
+ /** Gets the number of classes defined in this file. */
+ int getNumberOfClasses() { result = count(Class c | c.getFile() = this) }
+
+ /** Gets the number of user-defined types defined in this file. */
+ int getNumberOfUserTypes() { result = count(UserType t | t.getFile() = this) }
+
+ /** Gets the number of lines in this file. */
+ int getNumberOfLines() { numlines(underlyingElement(this), result, _, _) }
+
+ /** Gets the number of lines of code in this file. */
+ int getNumberOfLinesOfCode() { numlines(underlyingElement(this), _, result, _) }
+
+ /** Gets the number of lines of comments in this file. */
+ int getNumberOfLinesOfComments() { numlines(underlyingElement(this), _, _, result) }
+
+ /** Gets the number of incoming file dependencies. */
+ int getAfferentCoupling() { result = count(MetricFile that | that.getAFileDependency() = this) }
+
+ /** Gets the number of outgoing file dependencies. */
+ int getEfferentCoupling() { result = count(MetricFile that | this.getAFileDependency() = that) }
+
+ /*
+ * HALSTEAD METRICS
+ */
+
+ /**
+ * Gets the Halstead "N1" metric for this file. This is the total number of
+ * operators in the file. Operators are taken to be all operators in
+ * expressions (`+`, `*`, `&`, `->`, `=`, ...) as well as most statements.
+ */
+ int getHalsteadN1() {
+ result =
+ sum(MetricFunction mf, int toSum | mf.getFile() = this and toSum = mf.getHalsteadN1() | toSum)
+ +
+ // Each class counts once as an operator
+ count(Class c | c.getFile() = this) +
+ // Each variable declaration that is not in a function counts once as an operator
+ count(GlobalVariable gv | gv.getFile() = this) +
+ count(MemberVariable mv | mv.getFile() = this) +
+ // Type declarations - to count the definition tokens
+ count(TypeDeclarationEntry decl | decl.getFile() = this) +
+ // Friend declarations
+ count(FriendDecl f | f.getFile() = this)
+ }
+
+ /**
+ * Gets the Halstead "N2" metric for this file: this is the total number of operands.
+ * An operand is either a variable, constant, type name, class name, or function name.
+ */
+ int getHalsteadN2() {
+ result =
+ sum(MetricFunction mf, int toSum | mf.getFile() = this and toSum = mf.getHalsteadN2() | toSum)
+ +
+ // Each class counts once as an operand
+ count(Class c | c.getFile() = this) +
+ // Each variable declaration that is not in a function counts once as an operand
+ count(GlobalVariable gv | gv.getFile() = this) +
+ count(MemberVariable mv | mv.getFile() = this) +
+ // Type declarations - to count the type names
+ count(TypeDeclarationEntry decl | decl.getFile() = this) +
+ // Enum constant declarations to count the name
+ count(EnumConstant ec | ec.getFile() = this)
+ }
+
+ private string getAUsedHalsteadN1Operator() {
+ exists(CommaExpr e | e.getFile() = this) and result = "comma"
+ or
+ exists(ReferenceToExpr e | e.getFile() = this) and result = "refTo"
+ or
+ exists(PointerDereferenceExpr e | e.getFile() = this) and result = "dereference"
+ or
+ exists(CStyleCast e | e.getFile() = this) and result = "cCast"
+ or
+ exists(StaticCast e | e.getFile() = this) and result = "staticCast"
+ or
+ exists(ConstCast e | e.getFile() = this) and result = "constCast"
+ or
+ exists(ReinterpretCast e | e.getFile() = this) and result = "reinterpretCast"
+ or
+ exists(DynamicCast e | e.getFile() = this) and result = "dynamicCast"
+ or
+ exists(SizeofExprOperator e | e.getFile() = this) and result = "sizeofExpr"
+ or
+ exists(SizeofTypeOperator e | e.getFile() = this) and result = "sizeofType"
+ or
+ exists(IfStmt e | e.getFile() = this) and result = "ifVal"
+ or
+ exists(SwitchStmt e | e.getFile() = this) and result = "switchVal"
+ or
+ exists(ForStmt e | e.getFile() = this) and result = "forVal"
+ or
+ exists(DoStmt e | e.getFile() = this) and result = "doVal"
+ or
+ exists(WhileStmt e | e.getFile() = this) and result = "whileVal"
+ or
+ exists(GotoStmt e | e.getFile() = this) and result = "gotoVal"
+ or
+ exists(ContinueStmt e | e.getFile() = this) and result = "continueVal"
+ or
+ exists(BreakStmt e | e.getFile() = this) and result = "breakVal"
+ or
+ exists(ReturnStmt e | e.getFile() = this) and result = "returnVal"
+ or
+ exists(SwitchCase e | e.getFile() = this) and result = "caseVal"
+ or
+ exists(IfStmt s | s.getFile() = this and s.hasElse()) and
+ result = "elseVal"
+ or
+ exists(Function f | f.getFile() = this) and result = "function"
+ or
+ exists(Class c | c.getFile() = this) and result = "classDef"
+ or
+ exists(TypeDeclarationEntry e | e.getFile() = this) and result = "typeDecl"
+ or
+ exists(FriendDecl e | e.getFile() = this) and result = "friendDecl"
+ }
+
+ /**
+ * Gets the Halstead "n1" metric: this is the total number of distinct operators
+ * in this file. Operators are defined as in the "N1" metric (`getHalsteadN1`).
+ */
+ int getHalsteadN1Distinct() {
+ result =
+ // avoid 0 values
+ 1 + count(string s | exists(Operation op | op.getFile() = this and s = op.getOperator())) +
+ count(string s | s = getAUsedHalsteadN1Operator())
+ }
+
+ /**
+ * Gets the Halstead "n2" metric: this is the number of distinct operands in this
+ * file. An operand is either a variable, constant, type name, or function name.
+ */
+ int getHalsteadN2Distinct() {
+ result =
+ // avoid 0 values
+ 1 + count(string s | exists(Access a | a.getFile() = this and s = a.getTarget().getName())) +
+ count(Function f | exists(FunctionCall fc | fc.getFile() = this and f = fc.getTarget())) +
+ // Approximate: count declarations once more to account for the type name
+ count(Declaration d | d.getFile() = this)
+ }
+
+ /**
+ * Gets the Halstead length of this file. This is the sum of the N1 and N2 Halstead metrics.
+ */
+ int getHalsteadLength() { result = this.getHalsteadN1() + this.getHalsteadN2() }
+
+ /**
+ * Gets the Halstead vocabulary size of this file. This is the sum of the n1 and n2 Halstead metrics.
+ */
+ int getHalsteadVocabulary() {
+ result = this.getHalsteadN1Distinct() + this.getHalsteadN2Distinct()
+ }
+
+ /**
+ * Gets the Halstead volume of this file. This is the Halstead size multiplied by the log of the
+ * Halstead vocabulary. It represents the information content of the file.
+ */
+ float getHalsteadVolume() {
+ result = this.getHalsteadLength().(float) * this.getHalsteadVocabulary().log2()
+ }
+
+ /**
+ * Gets the Halstead difficulty value of this file. This is proportional to the number of unique
+ * operators, and further proportional to the ratio of total operands to unique operands.
+ */
+ float getHalsteadDifficulty() {
+ result =
+ (this.getHalsteadN1Distinct() * this.getHalsteadN2()).(float) /
+ (2 * this.getHalsteadN2Distinct()).(float)
+ }
+
+ /**
+ * Gets the Halstead level of this file. This is the inverse of the difficulty of the file.
+ */
+ float getHalsteadLevel() {
+ exists(float difficulty |
+ difficulty = this.getHalsteadDifficulty() and
+ if difficulty != 0.0 then result = 1.0 / difficulty else result = 0.0
+ )
+ }
+
+ /**
+ * Gets the Halstead implementation effort for this file. This is the product of the volume and difficulty.
+ */
+ float getHalsteadEffort() { result = this.getHalsteadVolume() * this.getHalsteadDifficulty() }
+
+ /**
+ * Gets the Halstead 'delivered bugs' metric for this file. This metric correlates with the complexity of
+ * the software, but is known to be an underestimate of bug counts.
+ */
+ float getHalsteadDeliveredBugs() { result = this.getHalsteadEffort().pow(2.0 / 3.0) / 3000.0 }
+
+ /** Gets a file dependency of this file. */
+ File getAFileDependency() { dependsOnFileSimple(this, result.getMetrics()) }
+}
+
+private predicate aClassFile(Class c, File file) { c.getDefinitionLocation().getFile() = file }
+
+pragma[noopt]
+private predicate dependsOnFileSimple(MetricFile source, MetricFile dest) {
+ // class derives from classs
+ exists(Class fromClass, Class toClass |
+ aClassFile(fromClass, source) and
+ fromClass.derivesFrom(toClass) and
+ aClassFile(toClass, dest)
+ )
+ or
+ // class nested in another class
+ exists(Class fromClass, Class toClass |
+ aClassFile(fromClass, source) and
+ fromClass.getDeclaringType() = toClass and
+ toClass.getFile() = dest
+ )
+ or
+ // class has friend class
+ exists(Class fromClass, Class toClass, FriendDecl fd |
+ aClassFile(fromClass, source) and
+ fromClass.getAFriendDecl() = fd and
+ fd.getFriend() = toClass and
+ toClass instanceof Class and
+ dest = toClass.getFile()
+ )
+ or
+ exists(FunctionCall ca, Function f |
+ ca instanceof FunctionCall and
+ ca.getFile() = source and
+ ca.getTarget() = f and
+ f.getFile() = dest and
+ not f.isMultiplyDefined() and
+ not exists(Function ef | ef = ca.getEnclosingFunction() and ef.isMultiplyDefined())
+ )
+ or
+ exists(Access a, Declaration d |
+ a instanceof Access and
+ a.getFile() = source and
+ a.getTarget() = d and
+ d.getFile() = dest and
+ not exists(Function ef | ef = a.getEnclosingFunction() and ef.isMultiplyDefined())
+ )
+ or
+ exists(Variable v, VariableDeclarationEntry e, Type vt, UserType t |
+ e instanceof VariableDeclarationEntry and
+ e.getFile() = source and
+ v.getADeclarationEntry() = e and
+ vt = v.getType() and
+ vt.refersTo(t) and
+ t instanceof UserType and
+ t.getFile() = dest
+ )
+ or
+ exists(Function f, FunctionDeclarationEntry e, Type ft, UserType t |
+ e instanceof FunctionDeclarationEntry and
+ e.getFile() = source and
+ f.getADeclarationEntry() = e and
+ ft = f.getType() and
+ ft.refersTo(t) and
+ t instanceof UserType and
+ t.getFile() = dest
+ )
+ or
+ exists(MacroInvocation mi, Macro m |
+ mi instanceof MacroInvocation and
+ mi.getFile() = source and
+ mi.getMacro() = m and
+ m.getFile() = dest
+ )
+ or
+ exists(TypedefType t, TypeDeclarationEntry e, Type bt, UserType u |
+ e instanceof TypeDeclarationEntry and
+ e.getFile() = source and
+ t.getADeclarationEntry() = e and
+ bt = t.getBaseType() and
+ bt.refersTo(u) and
+ u instanceof UserType and
+ u.getFile() = dest
+ )
+ or
+ exists(Cast c, Type t, UserType u |
+ c instanceof Cast and
+ c.getFile() = source and
+ c.getType() = t and
+ t.refersTo(u) and
+ u instanceof UserType and
+ u.getFile() = dest and
+ not exists(Function ef | ef = c.getEnclosingFunction() and ef.isMultiplyDefined())
+ )
+}
diff --git a/repo-tests/codeql/cpp/ql/lib/semmle/code/cpp/metrics/MetricFunction.qll b/repo-tests/codeql/cpp/ql/lib/semmle/code/cpp/metrics/MetricFunction.qll
new file mode 100644
index 00000000000..45036cfddf3
--- /dev/null
+++ b/repo-tests/codeql/cpp/ql/lib/semmle/code/cpp/metrics/MetricFunction.qll
@@ -0,0 +1,387 @@
+import cpp
+
+/**
+ * A wrapper that provides metrics for a C/C++ function.
+ */
+class MetricFunction extends Function {
+ /** Gets the number of parameters. */
+ override int getNumberOfParameters() { result = count(this.getAParameter()) }
+
+ /** Gets the number of lines in this function. */
+ int getNumberOfLines() { numlines(underlyingElement(this), result, _, _) }
+
+ /** Gets the number of lines of code in this function. */
+ int getNumberOfLinesOfCode() { numlines(underlyingElement(this), _, result, _) }
+
+ /** Gets the number of lines of comments in this function. */
+ int getNumberOfLinesOfComments() { numlines(underlyingElement(this), _, _, result) }
+
+ /** Gets the ratio of lines of comments to total lines in this function (between 0.0 and 1.0). */
+ float getCommentRatio() {
+ if this.getNumberOfLines() = 0
+ then result = 0.0
+ else result = this.getNumberOfLinesOfComments().(float) / this.getNumberOfLines().(float)
+ }
+
+ /** Gets the number of function calls in this function. */
+ int getNumberOfCalls() {
+ // Checking that the name of the target exists is a workaround for a DB inconsistency
+ result =
+ count(FunctionCall c |
+ c.getEnclosingFunction() = this and
+ not c.getTarget() instanceof Operator and
+ exists(c.getTarget().getName())
+ )
+ }
+
+ /**
+ * Gets the cyclomatic complexity of this function. This is defined as the
+ * number of branching statements (`if`, `while`, `do`, `for`, and
+ * non-fallthrough `case`) plus the number of branching expressions (`?`,
+ * `&&`, and `||`) plus one.
+ */
+ int getCyclomaticComplexity() {
+ result = 1 + cyclomaticComplexityBranches(getBlock()) and
+ not this.isMultiplyDefined()
+ }
+
+ /**
+ * Gets the branching complexity of this function. This is a measure derived
+ * from cyclomatic complexity, but it reflects only the branches that make
+ * the code difficult to read (as opposed to cyclomatic complexity, which
+ * attempts to evaluate how difficult the code is to test).
+ */
+ int getBranchingComplexity() {
+ result =
+ count(IfStmt stmt | stmt.getEnclosingFunction() = this and not stmt.isInMacroExpansion()) +
+ count(WhileStmt stmt | stmt.getEnclosingFunction() = this and not stmt.isInMacroExpansion())
+ + count(DoStmt stmt | stmt.getEnclosingFunction() = this and not stmt.isInMacroExpansion()) +
+ count(ForStmt stmt | stmt.getEnclosingFunction() = this and not stmt.isInMacroExpansion()) +
+ count(SwitchStmt stmt | stmt.getEnclosingFunction() = this and not stmt.isInMacroExpansion())
+ + 1 and
+ not this.isMultiplyDefined()
+ }
+
+ /**
+ * Gets the number of incoming dependencies: functions that call or access
+ * this function.
+ */
+ int getAfferentCoupling() {
+ result =
+ count(Function f |
+ exists(Locatable l |
+ f.calls(this, l) or
+ f.accesses(this, l)
+ )
+ )
+ }
+
+ /**
+ * Gets the number of outgoing dependencies: functions that are called or
+ * accessed by this function.
+ */
+ int getEfferentCoupling() {
+ result =
+ count(Function f |
+ exists(Locatable l |
+ this.calls(f, l) or
+ this.accesses(f, l)
+ )
+ )
+ }
+
+ /*
+ * Halstead Metrics
+ */
+
+ /**
+ * Gets the Halstead "N1" metric: this is the total number of operators in
+ * this function. Operators are taken to be all operators in expressions
+ * (`+`, `*`, `&`, `->`, `=`, ...) as well as most statements.
+ */
+ int getHalsteadN1() {
+ // The `1 +` is to account for the function itself
+ result =
+ 1 + count(Operation op | op.getEnclosingFunction() = this) +
+ count(CommaExpr e | e.getEnclosingFunction() = this) +
+ count(ReferenceToExpr e | e.getEnclosingFunction() = this) +
+ count(PointerDereferenceExpr e | e.getEnclosingFunction() = this) +
+ count(Cast e | e.getEnclosingFunction() = this) +
+ count(SizeofOperator e | e.getEnclosingFunction() = this) +
+ count(TypeidOperator e | e.getEnclosingFunction() = this) +
+ count(ControlStructure s | s.getEnclosingFunction() = this) +
+ count(JumpStmt s | s.getEnclosingFunction() = this) +
+ count(ReturnStmt s | s.getEnclosingFunction() = this) +
+ count(SwitchCase c | c.getEnclosingFunction() = this) +
+ // Count the 'else' branches
+ count(IfStmt s | s.getEnclosingFunction() = this and s.hasElse())
+ }
+
+ /**
+ * Gets the Halstead "N2" metric: this is the total number of operands in this
+ * function. An operand is either a variable, constant, type name, or function name.
+ */
+ int getHalsteadN2() {
+ // The `1 +` is to account for the function itself
+ result =
+ 1 + count(Access a | a.getEnclosingFunction() = this) +
+ count(FunctionCall fc | fc.getEnclosingFunction() = this) +
+ // Approximate: count declarations twice to account for the type name
+ // and the identifier
+ 2 * count(Declaration d | d.getParentScope+() = this)
+ }
+
+ /**
+ * Gets the Halstead "n1" metric: this is the total number of distinct operators
+ * in this function. Operators (as in the N1 metric) are all operators in expressions
+ * as well as most statements.
+ */
+ int getHalsteadN1Distinct() {
+ exists(
+ int comma, int refTo, int dereference, int cCast, int staticCast, int constCast,
+ int reinterpretCast, int dynamicCast, int sizeofExpr, int sizeofType, int ifVal,
+ int switchVal, int forVal, int doVal, int whileVal, int gotoVal, int continueVal,
+ int breakVal, int returnVal, int caseVal, int elseVal
+ |
+ (if exists(CommaExpr e | e.getEnclosingFunction() = this) then comma = 1 else comma = 0) and
+ (if exists(ReferenceToExpr e | e.getEnclosingFunction() = this) then refTo = 1 else refTo = 0) and
+ (
+ if exists(PointerDereferenceExpr e | e.getEnclosingFunction() = this)
+ then dereference = 1
+ else dereference = 0
+ ) and
+ (if exists(CStyleCast e | e.getEnclosingFunction() = this) then cCast = 1 else cCast = 0) and
+ (
+ if exists(StaticCast e | e.getEnclosingFunction() = this)
+ then staticCast = 1
+ else staticCast = 0
+ ) and
+ (
+ if exists(ConstCast e | e.getEnclosingFunction() = this)
+ then constCast = 1
+ else constCast = 0
+ ) and
+ (
+ if exists(ReinterpretCast e | e.getEnclosingFunction() = this)
+ then reinterpretCast = 1
+ else reinterpretCast = 0
+ ) and
+ (
+ if exists(DynamicCast e | e.getEnclosingFunction() = this)
+ then dynamicCast = 1
+ else dynamicCast = 0
+ ) and
+ (
+ if exists(SizeofExprOperator e | e.getEnclosingFunction() = this)
+ then sizeofExpr = 1
+ else sizeofExpr = 0
+ ) and
+ (
+ if exists(SizeofTypeOperator e | e.getEnclosingFunction() = this)
+ then sizeofType = 1
+ else sizeofType = 0
+ ) and
+ (if exists(IfStmt e | e.getEnclosingFunction() = this) then ifVal = 1 else ifVal = 0) and
+ (
+ if exists(SwitchStmt e | e.getEnclosingFunction() = this)
+ then switchVal = 1
+ else switchVal = 0
+ ) and
+ (if exists(ForStmt e | e.getEnclosingFunction() = this) then forVal = 1 else forVal = 0) and
+ (if exists(DoStmt e | e.getEnclosingFunction() = this) then doVal = 1 else doVal = 0) and
+ (if exists(WhileStmt e | e.getEnclosingFunction() = this) then whileVal = 1 else whileVal = 0) and
+ (if exists(GotoStmt e | e.getEnclosingFunction() = this) then gotoVal = 1 else gotoVal = 0) and
+ (
+ if exists(ContinueStmt e | e.getEnclosingFunction() = this)
+ then continueVal = 1
+ else continueVal = 0
+ ) and
+ (if exists(BreakStmt e | e.getEnclosingFunction() = this) then breakVal = 1 else breakVal = 0) and
+ (
+ if exists(ReturnStmt e | e.getEnclosingFunction() = this)
+ then returnVal = 1
+ else returnVal = 0
+ ) and
+ (if exists(SwitchCase e | e.getEnclosingFunction() = this) then caseVal = 1 else caseVal = 0) and
+ (
+ if exists(IfStmt s | s.getEnclosingFunction() = this and s.hasElse())
+ then elseVal = 1
+ else elseVal = 0
+ ) and
+ // The `1 +` is to account for the function itself
+ result =
+ 1 +
+ count(string s |
+ exists(Operation op | op.getEnclosingFunction() = this and s = op.getOperator())
+ ) + comma + refTo + dereference + cCast + staticCast + constCast + reinterpretCast +
+ dynamicCast + sizeofExpr + sizeofType + ifVal + switchVal + forVal + doVal + whileVal +
+ gotoVal + continueVal + breakVal + returnVal + caseVal + elseVal
+ )
+ }
+
+ /**
+ * Gets the Halstead "n2" metric: this is the number of distinct operands in this
+ * function. An operand is either a variable, constant, type name, or function name.
+ */
+ int getHalsteadN2Distinct() {
+ // The `1 +` is to account for the function itself
+ result =
+ 1 +
+ count(string s |
+ exists(Access a | a.getEnclosingFunction() = this and s = a.getTarget().getName())
+ ) +
+ count(Function f |
+ exists(FunctionCall fc | fc.getEnclosingFunction() = this and f = fc.getTarget())
+ ) +
+ // Approximate: count declarations once more to account for the type name
+ count(Declaration d | d.getParentScope+() = this)
+ }
+
+ /**
+ * Gets the Halstead length of this function. This is the sum of the N1 and N2 Halstead metrics.
+ */
+ int getHalsteadLength() { result = this.getHalsteadN1() + this.getHalsteadN2() }
+
+ /**
+ * Gets the Halstead vocabulary size of this function. This is the sum of the n1 and n2 Halstead metrics.
+ */
+ int getHalsteadVocabulary() {
+ result = this.getHalsteadN1Distinct() + this.getHalsteadN2Distinct()
+ }
+
+ /**
+ * Gets the Halstead volume of this function. This is the Halstead size multiplied by the log of the
+ * Halstead vocabulary. It represents the information content of the function.
+ */
+ float getHalsteadVolume() {
+ result = this.getHalsteadLength().(float) * this.getHalsteadVocabulary().log2()
+ }
+
+ /**
+ * Gets the Halstead difficulty value of this function. This is proportional to the number of unique
+ * operators, and further proportional to the ratio of total operands to unique operands.
+ */
+ float getHalsteadDifficulty() {
+ result =
+ (this.getHalsteadN1Distinct() * this.getHalsteadN2()).(float) /
+ (2 * this.getHalsteadN2Distinct()).(float)
+ }
+
+ /**
+ * Gets the Halstead level of this function. This is the inverse of the difficulty of the function.
+ */
+ float getHalsteadLevel() {
+ exists(float difficulty |
+ difficulty = this.getHalsteadDifficulty() and
+ if difficulty != 0.0 then result = 1.0 / difficulty else result = 0.0
+ )
+ }
+
+ /**
+ * Gets the Halstead implementation effort for this function. This is the product of the volume and difficulty.
+ */
+ float getHalsteadEffort() { result = this.getHalsteadVolume() * this.getHalsteadDifficulty() }
+
+ /**
+ * Gets the Halstead 'delivered bugs' metric for this function. This metric correlates with the complexity of
+ * the software, but is known to be an underestimate of bug counts.
+ */
+ float getHalsteadDeliveredBugs() { result = this.getHalsteadEffort().pow(2.0 / 3.0) / 3000.0 }
+
+ /**
+ * Gets the maximum nesting level of complex statements such as if, while in the function. A nesting depth of
+ * 2 would mean that there is, for example, an if statement nested in another if statement.
+ */
+ int getNestingDepth() {
+ result =
+ max(Stmt s, int aDepth | s.getEnclosingFunction() = this and nestingDepth(s, aDepth) | aDepth) and
+ not isMultiplyDefined()
+ }
+}
+
+// Branching points in the sense of cyclomatic complexity are binary,
+// so there should be a branching point for each non-default switch
+// case (ignoring those that just fall through to the next case).
+private predicate branchingSwitchCase(SwitchCase sc) {
+ not sc.isDefault() and
+ not sc.getASuccessor() instanceof SwitchCase and
+ not defaultFallThrough(sc)
+}
+
+private predicate defaultFallThrough(SwitchCase sc) {
+ sc.isDefault() or
+ defaultFallThrough(sc.getAPredecessor())
+}
+
+// A branching statement used for the computation of cyclomatic complexity.
+private predicate branchingStmt(Stmt stmt) {
+ stmt instanceof IfStmt or
+ stmt instanceof WhileStmt or
+ stmt instanceof DoStmt or
+ stmt instanceof ForStmt or
+ branchingSwitchCase(stmt)
+}
+
+// A branching expression used for the computation of cyclomatic complexity.
+private predicate branchingExpr(Expr expr) {
+ expr instanceof NotExpr or
+ expr instanceof LogicalAndExpr or
+ expr instanceof LogicalOrExpr or
+ expr instanceof ConditionalExpr
+}
+
+/**
+ * Gets the number of branching statements and expressions in a block. This is
+ * for computing cyclomatic complexity.
+ */
+int cyclomaticComplexityBranches(BlockStmt b) {
+ result =
+ count(Stmt stmt |
+ branchingStmt(stmt) and
+ b.getAChild+() = stmt and
+ not stmt.isInMacroExpansion()
+ ) +
+ count(Expr expr |
+ branchingExpr(expr) and
+ b.getAChild+() = expr.getEnclosingStmt() and
+ not expr.isInMacroExpansion()
+ )
+}
+
+/**
+ * Gets the parent of a statement, excluding some common cases that don't really
+ * make sense for nesting depth. An example is:
+ * `if (...) { } else if (...) { }`: we don't consider the second if nested.
+ * Blocks are also skipped, as are parents that have the same location as the
+ * child (typically they come from macros).
+ */
+private predicate realParent(Stmt inner, Stmt outer) {
+ if skipParent(inner)
+ then realParent(inner.getParentStmt(), outer)
+ else outer = inner.getParentStmt()
+}
+
+private predicate startsAt(Stmt s, File f, int line, int col) {
+ exists(Location loc | loc = s.getLocation() |
+ f = loc.getFile() and
+ line = loc.getStartLine() and
+ col = loc.getStartColumn()
+ )
+}
+
+private predicate skipParent(Stmt s) {
+ exists(Stmt parent | parent = s.getParentStmt() |
+ s instanceof IfStmt and parent.(IfStmt).getElse() = s
+ or
+ parent instanceof BlockStmt
+ or
+ exists(File f, int startLine, int startCol |
+ startsAt(s, f, startLine, startCol) and
+ startsAt(parent, f, startLine, startCol)
+ )
+ )
+}
+
+private predicate nestingDepth(Stmt s, int depth) {
+ depth = count(Stmt enclosing | realParent+(s, enclosing))
+}
diff --git a/repo-tests/codeql/cpp/ql/lib/semmle/code/cpp/metrics/MetricNamespace.qll b/repo-tests/codeql/cpp/ql/lib/semmle/code/cpp/metrics/MetricNamespace.qll
new file mode 100644
index 00000000000..41482c605b0
--- /dev/null
+++ b/repo-tests/codeql/cpp/ql/lib/semmle/code/cpp/metrics/MetricNamespace.qll
@@ -0,0 +1,113 @@
+import cpp
+
+/**
+ * A wrapper that provides metrics for a C/C++ namespace.
+ */
+class MetricNamespace extends Namespace {
+ /** Gets the number of incoming dependencies from other namespaces. */
+ int getAfferentCoupling() {
+ result = count(MetricNamespace that | that.getANamespaceDependency() = this)
+ }
+
+ /** Gets the number of outgoing dependencies on other namespaces. */
+ int getEfferentCoupling() {
+ result = count(MetricNamespace that | this.getANamespaceDependency() = that)
+ }
+
+ /**
+ * Gets the _instability_ of this namespace. Instability is a measure of how
+ * likely a namespace is to be influenced by changes to other namespace. If
+ * this metric value is high, it is easily influenced, if it is low, the
+ * impact is likely to be minimal. Instability is estimated as the number of
+ * outgoing dependencies relative to the total number of dependencies.
+ */
+ float getInstability() {
+ exists(int ecoupling, int sumcoupling |
+ ecoupling = this.getEfferentCoupling() and
+ sumcoupling = ecoupling + this.getAfferentCoupling() and
+ sumcoupling > 0 and
+ result = ecoupling / sumcoupling.(float)
+ )
+ }
+
+ /**
+ * Gets the _abstractness_ of this namespace. Abstractness measures the
+ * proportion of abstract classes in a namespace relative to the total number
+ * of classes in that namespace. A highly abstract namespace (where the
+ * metric value is close 1) that is furthermore instable is likely to be
+ * useless: the class hierarchy has been over-engineered, and all those
+ * abstract classes are not heavily used.
+ */
+ float getAbstractness() {
+ exists(int i, int j |
+ i = count(Class c | c.getNamespace() = this) and
+ j =
+ count(Class c |
+ c.getNamespace() = this and
+ c.isAbstract()
+ ) and
+ result = j / i.(float) and
+ i > 0
+ )
+ }
+
+ /**
+ * Gets the _distance from main sequence_ of this namespace. This measure
+ * intends to capture the tradeoff between abstractness and instability: the
+ * ideal situation occurs when the sum of abstractness and instability is
+ * one. That is, a namespace is completely abstract and stable
+ * (abstractness=1 and instability=0) or it is concrete and instable
+ * (abstractness=0 and instability=1). We thus measure the distance from that
+ * ideal situation.
+ */
+ float getDistanceFromMain() {
+ exists(float r |
+ r = this.getAbstractness() + this.getInstability() - 1 and
+ (
+ r >= 0 and result = r
+ or
+ r < 0 and result = -r
+ )
+ )
+ }
+
+ /** Gets a namespace dependency of this element. */
+ MetricNamespace getANamespaceDependency() {
+ exists(MetricClass c |
+ c.getNamespace() = this and
+ c.getAClassDependency().getNamespace() = result
+ )
+ or
+ exists(FunctionCall c |
+ c.getEnclosingFunction().getNamespace() = this and
+ c.getTarget().getNamespace() = result
+ )
+ or
+ exists(FunctionCall c |
+ c.getEnclosingVariable().getNamespace() = this and
+ c.getTarget().getNamespace() = result
+ )
+ or
+ exists(Access a |
+ a.getEnclosingFunction().getNamespace() = this and
+ a.getTarget().getNamespace() = result
+ )
+ or
+ exists(Access a |
+ a.getEnclosingVariable().getNamespace() = this and
+ a.getTarget().getNamespace() = result
+ )
+ or
+ exists(Variable v, UserType t |
+ v.getNamespace() = this and
+ v.getType().refersTo(t) and
+ t.getNamespace() = result
+ )
+ or
+ exists(Function f, UserType t |
+ f.getNamespace() = this and
+ f.getType().refersTo(t) and
+ t.getNamespace() = result
+ )
+ }
+}
diff --git a/repo-tests/codeql/cpp/ql/lib/semmle/code/cpp/models/Models.qll b/repo-tests/codeql/cpp/ql/lib/semmle/code/cpp/models/Models.qll
new file mode 100644
index 00000000000..3eed4341cce
--- /dev/null
+++ b/repo-tests/codeql/cpp/ql/lib/semmle/code/cpp/models/Models.qll
@@ -0,0 +1,39 @@
+private import implementations.Allocation
+private import implementations.Deallocation
+private import implementations.Fread
+private import implementations.Getenv
+private import implementations.Gets
+private import implementations.IdentityFunction
+private import implementations.Inet
+private import implementations.Iterator
+private import implementations.MemberFunction
+private import implementations.Memcpy
+private import implementations.Memset
+private import implementations.Printf
+private import implementations.Pure
+private import implementations.Strcat
+private import implementations.Strcpy
+private import implementations.Strdup
+private import implementations.Strftime
+private import implementations.Strtok
+private import implementations.Strset
+private import implementations.Strcrement
+private import implementations.Strnextc
+private import implementations.StdContainer
+private import implementations.StdPair
+private import implementations.StdMap
+private import implementations.StdSet
+private import implementations.StdString
+private import implementations.Swap
+private import implementations.GetDelim
+private import implementations.SmartPointer
+private import implementations.Sscanf
+private import implementations.Send
+private import implementations.Recv
+private import implementations.Accept
+private import implementations.Poll
+private import implementations.Select
+private import implementations.MySql
+private import implementations.SqLite3
+private import implementations.PostgreSql
+private import implementations.System
diff --git a/repo-tests/codeql/cpp/ql/lib/semmle/code/cpp/models/implementations/Accept.qll b/repo-tests/codeql/cpp/ql/lib/semmle/code/cpp/models/implementations/Accept.qll
new file mode 100644
index 00000000000..cea4598acf3
--- /dev/null
+++ b/repo-tests/codeql/cpp/ql/lib/semmle/code/cpp/models/implementations/Accept.qll
@@ -0,0 +1,54 @@
+/**
+ * Provides implementation classes modeling `accept` and various similar
+ * functions. See `semmle.code.cpp.models.Models` for usage information.
+ */
+
+import semmle.code.cpp.Function
+import semmle.code.cpp.models.interfaces.ArrayFunction
+import semmle.code.cpp.models.interfaces.Taint
+import semmle.code.cpp.models.interfaces.Alias
+import semmle.code.cpp.models.interfaces.SideEffect
+
+/**
+ * The function `accept` and its assorted variants
+ */
+private class Accept extends ArrayFunction, AliasFunction, TaintFunction, SideEffectFunction {
+ Accept() { this.hasGlobalName(["accept", "accept4", "WSAAccept"]) }
+
+ override predicate hasArrayWithUnknownSize(int bufParam) { bufParam = 1 }
+
+ override predicate hasArrayInput(int bufParam) { bufParam = 1 }
+
+ override predicate hasArrayOutput(int bufParam) { bufParam = 1 }
+
+ override predicate parameterNeverEscapes(int index) { exists(this.getParameter(index)) }
+
+ override predicate parameterEscapesOnlyViaReturn(int index) { none() }
+
+ override predicate parameterIsAlwaysReturned(int index) { none() }
+
+ override predicate hasTaintFlow(FunctionInput input, FunctionOutput output) {
+ (input.isParameter(0) or input.isParameterDeref(1)) and
+ (output.isReturnValue() or output.isParameterDeref(1))
+ }
+
+ override predicate hasSpecificWriteSideEffect(ParameterIndex i, boolean buffer, boolean mustWrite) {
+ i = 1 and buffer = true and mustWrite = false
+ or
+ i = 2 and buffer = false and mustWrite = false
+ }
+
+ override predicate hasSpecificReadSideEffect(ParameterIndex i, boolean buffer) {
+ i = 0 and buffer = true
+ or
+ i = 1 and buffer = false
+ }
+
+ // NOTE: The size parameter is a pointer to the size. So we can't implement `getParameterSizeIndex` for
+ // this model.
+ // NOTE: We implement thse two predicates as none because we can't model the low-level changes made to
+ // the structure pointed to by the file-descriptor argument.
+ override predicate hasOnlySpecificReadSideEffects() { none() }
+
+ override predicate hasOnlySpecificWriteSideEffects() { none() }
+}
diff --git a/repo-tests/codeql/cpp/ql/lib/semmle/code/cpp/models/implementations/Allocation.qll b/repo-tests/codeql/cpp/ql/lib/semmle/code/cpp/models/implementations/Allocation.qll
new file mode 100644
index 00000000000..25dae1c2fd1
--- /dev/null
+++ b/repo-tests/codeql/cpp/ql/lib/semmle/code/cpp/models/implementations/Allocation.qll
@@ -0,0 +1,291 @@
+/**
+ * Provides implementation classes modeling various methods of allocation
+ * (`malloc`, `new` etc). See `semmle.code.cpp.models.interfaces.Allocation`
+ * for usage information.
+ */
+
+import semmle.code.cpp.models.interfaces.Allocation
+
+/**
+ * An allocation function (such as `malloc`) that has an argument for the size
+ * in bytes.
+ */
+private class MallocAllocationFunction extends AllocationFunction {
+ int sizeArg;
+
+ MallocAllocationFunction() {
+ // --- C library allocation
+ hasGlobalOrStdOrBslName("malloc") and // malloc(size)
+ sizeArg = 0
+ or
+ hasGlobalName([
+ // --- Windows Memory Management for Windows Drivers
+ "MmAllocateContiguousMemory", // MmAllocateContiguousMemory(size, maxaddress)
+ "MmAllocateContiguousNodeMemory", // MmAllocateContiguousNodeMemory(size, minaddress, maxaddress, bound, flag, prefer)
+ "MmAllocateContiguousMemorySpecifyCache", // MmAllocateContiguousMemorySpecifyCache(size, minaddress, maxaddress, bound, type)
+ "MmAllocateContiguousMemorySpecifyCacheNode", // MmAllocateContiguousMemorySpecifyCacheNode(size, minaddress, maxaddress, bound, type, prefer)
+ "MmAllocateNonCachedMemory", // MmAllocateNonCachedMemory(size)
+ "MmAllocateMappingAddress", // MmAllocateMappingAddress(size, tag)
+ // --- Windows COM allocation
+ "CoTaskMemAlloc", // CoTaskMemAlloc(size)
+ // --- Solaris/BSD kernel memory allocator
+ "kmem_alloc", // kmem_alloc(size, flags)
+ "kmem_zalloc", // kmem_zalloc(size, flags)
+ // --- OpenSSL memory allocation
+ "CRYPTO_malloc", // CRYPTO_malloc(size_t num, const char *file, int line)
+ "CRYPTO_zalloc", // CRYPTO_zalloc(size_t num, const char *file, int line)
+ "CRYPTO_secure_malloc", // CRYPTO_secure_malloc(size_t num, const char *file, int line)
+ "CRYPTO_secure_zalloc" // CRYPTO_secure_zalloc(size_t num, const char *file, int line)
+ ]) and
+ sizeArg = 0
+ or
+ hasGlobalName([
+ // --- Windows Memory Management for Windows Drivers
+ "ExAllocatePool", // ExAllocatePool(type, size)
+ "ExAllocatePoolWithTag", // ExAllocatePool(type, size, tag)
+ "ExAllocatePoolWithTagPriority", // ExAllocatePoolWithTagPriority(type, size, tag, priority)
+ "ExAllocatePoolWithQuota", // ExAllocatePoolWithQuota(type, size)
+ "ExAllocatePoolWithQuotaTag", // ExAllocatePoolWithQuotaTag(type, size, tag)
+ "IoAllocateMdl", // IoAllocateMdl(address, size, flag, flag, irp)
+ "IoAllocateErrorLogEntry", // IoAllocateErrorLogEntry(object, size)
+ // --- Windows Global / Local legacy allocation
+ "LocalAlloc", // LocalAlloc(flags, size)
+ "GlobalAlloc", // GlobalAlloc(flags, size)
+ // --- Windows System Services allocation
+ "VirtualAlloc" // VirtualAlloc(address, size, type, flag)
+ ]) and
+ sizeArg = 1
+ or
+ hasGlobalName(["HeapAlloc"]) and // HeapAlloc(heap, flags, size)
+ sizeArg = 2
+ or
+ hasGlobalName([
+ // --- Windows Memory Management for Windows Drivers
+ "MmAllocatePagesForMdl", // MmAllocatePagesForMdl(minaddress, maxaddress, skip, size)
+ "MmAllocatePagesForMdlEx", // MmAllocatePagesForMdlEx(minaddress, maxaddress, skip, size, type, flags)
+ "MmAllocateNodePagesForMdlEx" // MmAllocateNodePagesForMdlEx(minaddress, maxaddress, skip, size, type, prefer, flags)
+ ]) and
+ sizeArg = 3
+ }
+
+ override int getSizeArg() { result = sizeArg }
+}
+
+/**
+ * An allocation function (such as `alloca`) that does not require a
+ * corresponding free (and has an argument for the size in bytes).
+ */
+private class AllocaAllocationFunction extends AllocationFunction {
+ int sizeArg;
+
+ AllocaAllocationFunction() {
+ hasGlobalName([
+ // --- stack allocation
+ "alloca", // // alloca(size)
+ "__builtin_alloca", // __builtin_alloca(size)
+ "_alloca", // _alloca(size)
+ "_malloca" // _malloca(size)
+ ]) and
+ sizeArg = 0
+ }
+
+ override int getSizeArg() { result = sizeArg }
+
+ override predicate requiresDealloc() { none() }
+}
+
+/**
+ * An allocation function (such as `calloc`) that has an argument for the size
+ * and another argument for the size of those units (in bytes).
+ */
+private class CallocAllocationFunction extends AllocationFunction {
+ int sizeArg;
+ int multArg;
+
+ CallocAllocationFunction() {
+ // --- C library allocation
+ hasGlobalOrStdOrBslName("calloc") and // calloc(num, size)
+ sizeArg = 1 and
+ multArg = 0
+ }
+
+ override int getSizeArg() { result = sizeArg }
+
+ override int getSizeMult() { result = multArg }
+}
+
+/**
+ * An allocation function (such as `realloc`) that has an argument for the size
+ * in bytes, and an argument for an existing pointer that is to be reallocated.
+ */
+private class ReallocAllocationFunction extends AllocationFunction {
+ int sizeArg;
+ int reallocArg;
+
+ ReallocAllocationFunction() {
+ // --- C library allocation
+ hasGlobalOrStdOrBslName("realloc") and // realloc(ptr, size)
+ sizeArg = 1 and
+ reallocArg = 0
+ or
+ hasGlobalName([
+ // --- Windows Global / Local legacy allocation
+ "LocalReAlloc", // LocalReAlloc(ptr, size, flags)
+ "GlobalReAlloc", // GlobalReAlloc(ptr, size, flags)
+ // --- Windows COM allocation
+ "CoTaskMemRealloc", // CoTaskMemRealloc(ptr, size)
+ // --- OpenSSL memory allocation
+ "CRYPTO_realloc" // CRYPTO_realloc(void *addr, size_t num, const char *file, int line)
+ ]) and
+ sizeArg = 1 and
+ reallocArg = 0
+ or
+ hasGlobalName("HeapReAlloc") and // HeapReAlloc(heap, flags, ptr, size)
+ sizeArg = 3 and
+ reallocArg = 2
+ }
+
+ override int getSizeArg() { result = sizeArg }
+
+ override int getReallocPtrArg() { result = reallocArg }
+}
+
+/**
+ * A miscellaneous allocation function that has no explicit argument for
+ * the size of the allocation.
+ */
+private class SizelessAllocationFunction extends AllocationFunction {
+ SizelessAllocationFunction() {
+ hasGlobalName([
+ // --- Windows Memory Management for Windows Drivers
+ "ExAllocateFromLookasideListEx", // ExAllocateFromLookasideListEx(list)
+ "ExAllocateFromPagedLookasideList", // ExAllocateFromPagedLookasideList(list)
+ "ExAllocateFromNPagedLookasideList", // ExAllocateFromNPagedLookasideList(list)
+ "ExAllocateTimer", // ExAllocateTimer(callback, context, attributes)
+ "IoAllocateWorkItem", // IoAllocateWorkItem(object)
+ "MmMapLockedPagesWithReservedMapping", // MmMapLockedPagesWithReservedMapping(address, tag, list, type)
+ "MmMapLockedPages", // MmMapLockedPages(list, mode)
+ "MmMapLockedPagesSpecifyCache", // MmMapLockedPagesSpecifyCache(list, mode, type, address, flag, flag)
+ // --- NetBSD pool manager
+ "pool_get", // pool_get(pool, flags)
+ "pool_cache_get" // pool_cache_get(pool, flags)
+ ])
+ }
+}
+
+/**
+ * Holds if `sizeExpr` is an expression consisting of a subexpression
+ * `lengthExpr` multiplied by a constant `sizeof` that is the result of a
+ * `sizeof()` expression. Alternatively if there isn't a suitable `sizeof()`
+ * expression, `lengthExpr = sizeExpr` and `sizeof = 1`. For example:
+ * ```
+ * malloc(a * 2 * sizeof(char32_t));
+ * ```
+ * In this case if the `sizeExpr` is the argument to `malloc`, the `lengthExpr`
+ * is `a * 2` and `sizeof` is `4`.
+ */
+private predicate deconstructSizeExpr(Expr sizeExpr, Expr lengthExpr, int sizeof) {
+ exists(SizeofOperator sizeofOp |
+ sizeofOp = sizeExpr.(MulExpr).getAnOperand() and
+ lengthExpr = sizeExpr.(MulExpr).getAnOperand() and
+ not lengthExpr instanceof SizeofOperator and
+ sizeof = sizeofOp.getValue().toInt()
+ )
+ or
+ not exists(SizeofOperator sizeofOp, Expr lengthOp |
+ sizeofOp = sizeExpr.(MulExpr).getAnOperand() and
+ lengthOp = sizeExpr.(MulExpr).getAnOperand() and
+ not lengthOp instanceof SizeofOperator and
+ exists(sizeofOp.getValue().toInt())
+ ) and
+ lengthExpr = sizeExpr and
+ sizeof = 1
+}
+
+/**
+ * An allocation expression that is a function call, such as call to `malloc`.
+ */
+private class CallAllocationExpr extends AllocationExpr, FunctionCall {
+ AllocationFunction target;
+
+ CallAllocationExpr() {
+ target = getTarget() and
+ // realloc(ptr, 0) only frees the pointer
+ not (
+ exists(target.getReallocPtrArg()) and
+ getArgument(target.getSizeArg()).getValue().toInt() = 0
+ ) and
+ // these are modelled directly (and more accurately), avoid duplication
+ not exists(NewOrNewArrayExpr new | new.getAllocatorCall() = this)
+ }
+
+ override Expr getSizeExpr() {
+ exists(Expr sizeExpr | sizeExpr = getArgument(target.getSizeArg()) |
+ if exists(target.getSizeMult())
+ then result = sizeExpr
+ else
+ exists(Expr lengthExpr |
+ deconstructSizeExpr(sizeExpr, lengthExpr, _) and
+ result = lengthExpr
+ )
+ )
+ }
+
+ override int getSizeMult() {
+ // malloc with multiplier argument that is a constant
+ result = getArgument(target.getSizeMult()).getValue().toInt()
+ or
+ // malloc with no multiplier argument
+ not exists(target.getSizeMult()) and
+ deconstructSizeExpr(getArgument(target.getSizeArg()), _, result)
+ }
+
+ override int getSizeBytes() { result = getSizeExpr().getValue().toInt() * getSizeMult() }
+
+ override Expr getReallocPtr() { result = getArgument(target.getReallocPtrArg()) }
+
+ override Type getAllocatedElementType() {
+ result =
+ this.getFullyConverted().getType().stripTopLevelSpecifiers().(PointerType).getBaseType() and
+ not result instanceof VoidType
+ }
+
+ override predicate requiresDealloc() { target.requiresDealloc() }
+}
+
+/**
+ * An allocation expression that is a `new` expression.
+ */
+private class NewAllocationExpr extends AllocationExpr, NewExpr {
+ NewAllocationExpr() { this instanceof NewExpr }
+
+ override int getSizeBytes() { result = getAllocatedType().getSize() }
+
+ override Type getAllocatedElementType() { result = getAllocatedType() }
+
+ override predicate requiresDealloc() { not exists(getPlacementPointer()) }
+}
+
+/**
+ * An allocation expression that is a `new []` expression.
+ */
+private class NewArrayAllocationExpr extends AllocationExpr, NewArrayExpr {
+ NewArrayAllocationExpr() { this instanceof NewArrayExpr }
+
+ override Expr getSizeExpr() {
+ // new array expr with variable size
+ result = getExtent()
+ }
+
+ override int getSizeMult() {
+ // new array expr with variable size
+ exists(getExtent()) and
+ result = getAllocatedElementType().getSize()
+ }
+
+ override Type getAllocatedElementType() { result = NewArrayExpr.super.getAllocatedElementType() }
+
+ override int getSizeBytes() { result = getAllocatedType().getSize() }
+
+ override predicate requiresDealloc() { not exists(getPlacementPointer()) }
+}
diff --git a/repo-tests/codeql/cpp/ql/lib/semmle/code/cpp/models/implementations/Deallocation.qll b/repo-tests/codeql/cpp/ql/lib/semmle/code/cpp/models/implementations/Deallocation.qll
new file mode 100644
index 00000000000..6bd2916b733
--- /dev/null
+++ b/repo-tests/codeql/cpp/ql/lib/semmle/code/cpp/models/implementations/Deallocation.qll
@@ -0,0 +1,89 @@
+/**
+ * Provides implementation classes modeling various methods of deallocation
+ * (`free`, `delete` etc). See `semmle.code.cpp.models.interfaces.Deallocation`
+ * for usage information.
+ */
+
+import semmle.code.cpp.models.interfaces.Deallocation
+
+/**
+ * A deallocation function such as `free`.
+ */
+private class StandardDeallocationFunction extends DeallocationFunction {
+ int freedArg;
+
+ StandardDeallocationFunction() {
+ hasGlobalOrStdOrBslName([
+ // --- C library allocation
+ "free", "realloc"
+ ]) and
+ freedArg = 0
+ or
+ hasGlobalName([
+ // --- OpenSSL memory allocation
+ "CRYPTO_free", "CRYPTO_secure_free"
+ ]) and
+ freedArg = 0
+ or
+ hasGlobalOrStdName([
+ // --- Windows Memory Management for Windows Drivers
+ "ExFreePoolWithTag", "ExDeleteTimer", "IoFreeMdl", "IoFreeWorkItem", "IoFreeErrorLogEntry",
+ "MmFreeContiguousMemory", "MmFreeContiguousMemorySpecifyCache", "MmFreeNonCachedMemory",
+ "MmFreeMappingAddress", "MmFreePagesFromMdl", "MmUnmapReservedMapping",
+ "MmUnmapLockedPages",
+ // --- Windows Global / Local legacy allocation
+ "LocalFree", "GlobalFree", "LocalReAlloc", "GlobalReAlloc",
+ // --- Windows System Services allocation
+ "VirtualFree",
+ // --- Windows COM allocation
+ "CoTaskMemFree", "CoTaskMemRealloc",
+ // --- Windows Automation
+ "SysFreeString",
+ // --- Solaris/BSD kernel memory allocator
+ "kmem_free"
+ ]) and
+ freedArg = 0
+ or
+ hasGlobalOrStdName([
+ // --- Windows Memory Management for Windows Drivers
+ "ExFreeToLookasideListEx", "ExFreeToPagedLookasideList", "ExFreeToNPagedLookasideList",
+ // --- NetBSD pool manager
+ "pool_put", "pool_cache_put"
+ ]) and
+ freedArg = 1
+ or
+ hasGlobalOrStdName(["HeapFree", "HeapReAlloc"]) and
+ freedArg = 2
+ }
+
+ override int getFreedArg() { result = freedArg }
+}
+
+/**
+ * An deallocation expression that is a function call, such as call to `free`.
+ */
+private class CallDeallocationExpr extends DeallocationExpr, FunctionCall {
+ DeallocationFunction target;
+
+ CallDeallocationExpr() { target = getTarget() }
+
+ override Expr getFreedExpr() { result = getArgument(target.getFreedArg()) }
+}
+
+/**
+ * An deallocation expression that is a `delete` expression.
+ */
+private class DeleteDeallocationExpr extends DeallocationExpr, DeleteExpr {
+ DeleteDeallocationExpr() { this instanceof DeleteExpr }
+
+ override Expr getFreedExpr() { result = getExpr() }
+}
+
+/**
+ * An deallocation expression that is a `delete []` expression.
+ */
+private class DeleteArrayDeallocationExpr extends DeallocationExpr, DeleteArrayExpr {
+ DeleteArrayDeallocationExpr() { this instanceof DeleteArrayExpr }
+
+ override Expr getFreedExpr() { result = getExpr() }
+}
diff --git a/repo-tests/codeql/cpp/ql/lib/semmle/code/cpp/models/implementations/Fread.qll b/repo-tests/codeql/cpp/ql/lib/semmle/code/cpp/models/implementations/Fread.qll
new file mode 100644
index 00000000000..df2d92fbc4f
--- /dev/null
+++ b/repo-tests/codeql/cpp/ql/lib/semmle/code/cpp/models/implementations/Fread.qll
@@ -0,0 +1,20 @@
+import semmle.code.cpp.models.interfaces.Alias
+import semmle.code.cpp.models.interfaces.FlowSource
+
+private class Fread extends AliasFunction, RemoteFlowSourceFunction {
+ Fread() { this.hasGlobalOrStdOrBslName("fread") }
+
+ override predicate parameterNeverEscapes(int n) {
+ n = 0 or
+ n = 3
+ }
+
+ override predicate parameterEscapesOnlyViaReturn(int n) { none() }
+
+ override predicate parameterIsAlwaysReturned(int n) { none() }
+
+ override predicate hasRemoteFlowSource(FunctionOutput output, string description) {
+ output.isParameterDeref(0) and
+ description = "String read by " + this.getName()
+ }
+}
diff --git a/repo-tests/codeql/cpp/ql/lib/semmle/code/cpp/models/implementations/GetDelim.qll b/repo-tests/codeql/cpp/ql/lib/semmle/code/cpp/models/implementations/GetDelim.qll
new file mode 100644
index 00000000000..e2015406346
--- /dev/null
+++ b/repo-tests/codeql/cpp/ql/lib/semmle/code/cpp/models/implementations/GetDelim.qll
@@ -0,0 +1,41 @@
+import semmle.code.cpp.models.interfaces.Taint
+import semmle.code.cpp.models.interfaces.Alias
+import semmle.code.cpp.models.interfaces.SideEffect
+import semmle.code.cpp.models.interfaces.FlowSource
+
+/**
+ * The standard functions `getdelim`, `getwdelim` and the glibc variant `__getdelim`.
+ */
+private class GetDelimFunction extends TaintFunction, AliasFunction, SideEffectFunction,
+ RemoteFlowSourceFunction {
+ GetDelimFunction() { hasGlobalName(["getdelim", "getwdelim", "__getdelim"]) }
+
+ override predicate hasTaintFlow(FunctionInput i, FunctionOutput o) {
+ i.isParameter(3) and o.isParameterDeref(0)
+ }
+
+ override predicate parameterNeverEscapes(int index) { index = [0, 1, 3] }
+
+ override predicate parameterEscapesOnlyViaReturn(int index) { none() }
+
+ override predicate parameterIsAlwaysReturned(int index) { none() }
+
+ override predicate hasOnlySpecificReadSideEffects() { any() }
+
+ override predicate hasOnlySpecificWriteSideEffects() { any() }
+
+ override predicate hasSpecificWriteSideEffect(ParameterIndex i, boolean buffer, boolean mustWrite) {
+ i = [0, 1] and
+ buffer = false and
+ mustWrite = false
+ }
+
+ override predicate hasSpecificReadSideEffect(ParameterIndex i, boolean buffer) {
+ i = 3 and buffer = false
+ }
+
+ override predicate hasRemoteFlowSource(FunctionOutput output, string description) {
+ output.isParameterDeref(0) and
+ description = "String read by " + this.getName()
+ }
+}
diff --git a/repo-tests/codeql/cpp/ql/lib/semmle/code/cpp/models/implementations/Getenv.qll b/repo-tests/codeql/cpp/ql/lib/semmle/code/cpp/models/implementations/Getenv.qll
new file mode 100644
index 00000000000..87e191241d2
--- /dev/null
+++ b/repo-tests/codeql/cpp/ql/lib/semmle/code/cpp/models/implementations/Getenv.qll
@@ -0,0 +1,21 @@
+/**
+ * Provides an implementation class modeling the POSIX function `getenv`.
+ */
+
+import cpp
+import semmle.code.cpp.models.interfaces.FlowSource
+
+/**
+ * The POSIX function `getenv`.
+ */
+class Getenv extends LocalFlowSourceFunction {
+ Getenv() { this.hasGlobalOrStdOrBslName("getenv") }
+
+ override predicate hasLocalFlowSource(FunctionOutput output, string description) {
+ (
+ output.isReturnValueDeref() or
+ output.isReturnValue()
+ ) and
+ description = "an environment variable"
+ }
+}
diff --git a/repo-tests/codeql/cpp/ql/lib/semmle/code/cpp/models/implementations/Gets.qll b/repo-tests/codeql/cpp/ql/lib/semmle/code/cpp/models/implementations/Gets.qll
new file mode 100644
index 00000000000..08222c2cd6a
--- /dev/null
+++ b/repo-tests/codeql/cpp/ql/lib/semmle/code/cpp/models/implementations/Gets.qll
@@ -0,0 +1,68 @@
+/**
+ * Provides implementation classes modeling `gets` and various similar
+ * functions. See `semmle.code.cpp.models.Models` for usage information.
+ */
+
+import semmle.code.cpp.models.interfaces.DataFlow
+import semmle.code.cpp.models.interfaces.Taint
+import semmle.code.cpp.models.interfaces.ArrayFunction
+import semmle.code.cpp.models.interfaces.Alias
+import semmle.code.cpp.models.interfaces.SideEffect
+import semmle.code.cpp.models.interfaces.FlowSource
+
+/**
+ * The standard functions `gets` and `fgets`.
+ */
+private class GetsFunction extends DataFlowFunction, TaintFunction, ArrayFunction, AliasFunction,
+ SideEffectFunction, RemoteFlowSourceFunction {
+ GetsFunction() {
+ // gets(str)
+ // fgets(str, num, stream)
+ // fgetws(wstr, num, stream)
+ hasGlobalOrStdOrBslName(["gets", "fgets", "fgetws"])
+ }
+
+ override predicate hasDataFlow(FunctionInput input, FunctionOutput output) {
+ input.isParameter(0) and
+ output.isReturnValue()
+ }
+
+ override predicate hasTaintFlow(FunctionInput input, FunctionOutput output) {
+ input.isParameter(2) and
+ output.isParameterDeref(0)
+ }
+
+ override predicate parameterNeverEscapes(int index) { index = 2 }
+
+ override predicate parameterEscapesOnlyViaReturn(int index) { index = 0 }
+
+ override predicate parameterIsAlwaysReturned(int index) { index = 0 }
+
+ override predicate hasOnlySpecificReadSideEffects() { any() }
+
+ override predicate hasOnlySpecificWriteSideEffects() { any() }
+
+ override predicate hasSpecificWriteSideEffect(ParameterIndex i, boolean buffer, boolean mustWrite) {
+ i = 0 and
+ buffer = true and
+ mustWrite = true
+ }
+
+ override predicate hasRemoteFlowSource(FunctionOutput output, string description) {
+ output.isParameterDeref(0) and
+ description = "String read by " + this.getName()
+ }
+
+ override predicate hasArrayWithVariableSize(int bufParam, int countParam) {
+ not hasName("gets") and
+ bufParam = 0 and
+ countParam = 1
+ }
+
+ override predicate hasArrayWithUnknownSize(int bufParam) {
+ hasName("gets") and
+ bufParam = 0
+ }
+
+ override predicate hasArrayOutput(int bufParam) { bufParam = 0 }
+}
diff --git a/repo-tests/codeql/cpp/ql/lib/semmle/code/cpp/models/implementations/IdentityFunction.qll b/repo-tests/codeql/cpp/ql/lib/semmle/code/cpp/models/implementations/IdentityFunction.qll
new file mode 100644
index 00000000000..60afd2b25ef
--- /dev/null
+++ b/repo-tests/codeql/cpp/ql/lib/semmle/code/cpp/models/implementations/IdentityFunction.qll
@@ -0,0 +1,35 @@
+import semmle.code.cpp.Function
+import semmle.code.cpp.models.interfaces.Alias
+import semmle.code.cpp.models.interfaces.DataFlow
+import semmle.code.cpp.models.interfaces.SideEffect
+
+/**
+ * The standard function templates `std::move` and `std::forward`.
+ */
+private class IdentityFunction extends DataFlowFunction, SideEffectFunction, AliasFunction,
+ FunctionTemplateInstantiation {
+ IdentityFunction() { this.hasQualifiedName("std", ["move", "forward"]) }
+
+ override predicate hasOnlySpecificReadSideEffects() { any() }
+
+ override predicate hasOnlySpecificWriteSideEffects() { any() }
+
+ override predicate parameterNeverEscapes(int index) { none() }
+
+ override predicate parameterEscapesOnlyViaReturn(int index) {
+ // These functions simply return the argument value.
+ index = 0
+ }
+
+ override predicate parameterIsAlwaysReturned(int index) {
+ // These functions simply return the argument value.
+ index = 0
+ }
+
+ override predicate hasDataFlow(FunctionInput input, FunctionOutput output) {
+ // These functions simply return the argument value.
+ input.isParameter(0) and output.isReturnValue()
+ or
+ input.isParameterDeref(0) and output.isReturnValueDeref()
+ }
+}
diff --git a/repo-tests/codeql/cpp/ql/lib/semmle/code/cpp/models/implementations/Inet.qll b/repo-tests/codeql/cpp/ql/lib/semmle/code/cpp/models/implementations/Inet.qll
new file mode 100644
index 00000000000..397dca69fed
--- /dev/null
+++ b/repo-tests/codeql/cpp/ql/lib/semmle/code/cpp/models/implementations/Inet.qll
@@ -0,0 +1,144 @@
+import semmle.code.cpp.models.interfaces.Taint
+import semmle.code.cpp.models.interfaces.Alias
+import semmle.code.cpp.models.interfaces.ArrayFunction
+
+private class InetNtoa extends TaintFunction {
+ InetNtoa() { hasGlobalName("inet_ntoa") }
+
+ override predicate hasTaintFlow(FunctionInput input, FunctionOutput output) {
+ input.isParameter(0) and
+ output.isReturnValueDeref()
+ }
+}
+
+private class InetAton extends TaintFunction, ArrayFunction {
+ InetAton() { hasGlobalName("inet_aton") }
+
+ override predicate hasTaintFlow(FunctionInput input, FunctionOutput output) {
+ input.isParameterDeref(0) and
+ output.isParameterDeref(1)
+ }
+
+ override predicate hasArrayInput(int bufParam) { bufParam = 0 }
+
+ override predicate hasArrayOutput(int bufParam) { bufParam = 1 }
+
+ override predicate hasArrayWithNullTerminator(int bufParam) { bufParam = 0 }
+
+ override predicate hasArrayWithFixedSize(int bufParam, int elemCount) {
+ bufParam = 1 and
+ elemCount = 1
+ }
+}
+
+private class InetAddr extends TaintFunction, ArrayFunction, AliasFunction {
+ InetAddr() { hasGlobalName("inet_addr") }
+
+ override predicate hasTaintFlow(FunctionInput input, FunctionOutput output) {
+ input.isParameterDeref(0) and
+ output.isReturnValue()
+ }
+
+ override predicate hasArrayInput(int bufParam) { bufParam = 0 }
+
+ override predicate hasArrayWithNullTerminator(int bufParam) { bufParam = 0 }
+
+ override predicate parameterNeverEscapes(int index) { index = 0 }
+
+ override predicate parameterEscapesOnlyViaReturn(int index) { none() }
+
+ override predicate parameterIsAlwaysReturned(int index) { none() }
+}
+
+private class InetNetwork extends TaintFunction, ArrayFunction {
+ InetNetwork() { hasGlobalName("inet_network") }
+
+ override predicate hasTaintFlow(FunctionInput input, FunctionOutput output) {
+ input.isParameterDeref(0) and
+ output.isReturnValue()
+ }
+
+ override predicate hasArrayInput(int bufParam) { bufParam = 0 }
+
+ override predicate hasArrayWithNullTerminator(int bufParam) { bufParam = 0 }
+}
+
+private class InetMakeaddr extends TaintFunction {
+ InetMakeaddr() { hasGlobalName("inet_makeaddr") }
+
+ override predicate hasTaintFlow(FunctionInput input, FunctionOutput output) {
+ (
+ input.isParameter(0) or
+ input.isParameter(1)
+ ) and
+ output.isReturnValue()
+ }
+}
+
+private class InetLnaof extends TaintFunction {
+ InetLnaof() { hasGlobalName("inet_lnaof") }
+
+ override predicate hasTaintFlow(FunctionInput input, FunctionOutput output) {
+ input.isParameter(0) and
+ output.isReturnValue()
+ }
+}
+
+private class InetNetof extends TaintFunction {
+ InetNetof() { hasGlobalName("inet_netof") }
+
+ override predicate hasTaintFlow(FunctionInput input, FunctionOutput output) {
+ input.isParameter(0) and
+ output.isReturnValue()
+ }
+}
+
+private class InetPton extends TaintFunction, ArrayFunction {
+ InetPton() { hasGlobalName("inet_pton") }
+
+ override predicate hasTaintFlow(FunctionInput input, FunctionOutput output) {
+ (
+ input.isParameter(0) or
+ input.isParameterDeref(1)
+ ) and
+ output.isParameterDeref(2)
+ }
+
+ override predicate hasArrayInput(int bufParam) { bufParam = 1 }
+
+ override predicate hasArrayOutput(int bufParam) { bufParam = 2 }
+
+ override predicate hasArrayWithNullTerminator(int bufParam) { bufParam = 1 }
+
+ override predicate hasArrayWithUnknownSize(int bufParam) { bufParam = 2 }
+}
+
+private class Gethostbyname extends TaintFunction, ArrayFunction {
+ Gethostbyname() { hasGlobalName("gethostbyname") }
+
+ override predicate hasTaintFlow(FunctionInput input, FunctionOutput output) {
+ input.isParameterDeref(0) and
+ output.isReturnValueDeref()
+ }
+
+ override predicate hasArrayInput(int bufParam) { bufParam = 0 }
+
+ override predicate hasArrayWithNullTerminator(int bufParam) { bufParam = 0 }
+}
+
+private class Gethostbyaddr extends TaintFunction, ArrayFunction {
+ Gethostbyaddr() { hasGlobalName("gethostbyaddr") }
+
+ override predicate hasTaintFlow(FunctionInput input, FunctionOutput output) {
+ (
+ input.isParameterDeref(0) or
+ input.isParameter(1) or
+ input.isParameter(2)
+ ) and
+ output.isReturnValueDeref()
+ }
+
+ override predicate hasArrayInput(int bufParam) { bufParam = 0 }
+
+ override predicate hasArrayWithNullTerminator(int bufParam) { bufParam = 0 }
+}
diff --git a/repo-tests/codeql/cpp/ql/lib/semmle/code/cpp/models/implementations/Iterator.qll b/repo-tests/codeql/cpp/ql/lib/semmle/code/cpp/models/implementations/Iterator.qll
new file mode 100644
index 00000000000..24d5456293f
--- /dev/null
+++ b/repo-tests/codeql/cpp/ql/lib/semmle/code/cpp/models/implementations/Iterator.qll
@@ -0,0 +1,348 @@
+/**
+ * Provides implementation classes modeling C++ iterators, including
+ * `std::iterator`, `std::iterator_traits`, and types meeting the
+ * `LegacyIterator` named requirement. See `semmle.code.cpp.models.Models` for
+ * usage information.
+ */
+
+import cpp
+import semmle.code.cpp.models.interfaces.Taint
+import semmle.code.cpp.models.interfaces.DataFlow
+import semmle.code.cpp.models.interfaces.Iterator
+
+/**
+ * An instantiation of the `std::iterator_traits` template.
+ */
+private class IteratorTraits extends Class {
+ IteratorTraits() {
+ this.hasQualifiedName(["std", "bsl"], "iterator_traits") and
+ not this instanceof TemplateClass and
+ exists(TypedefType t |
+ this.getAMember() = t and
+ t.getName() = "iterator_category"
+ )
+ }
+
+ Type getIteratorType() { result = this.getTemplateArgument(0) }
+}
+
+/**
+ * A type that is deduced to be an iterator because there is a corresponding
+ * `std::iterator_traits` instantiation for it.
+ */
+private class IteratorByTraits extends Iterator {
+ IteratorByTraits() { exists(IteratorTraits it | it.getIteratorType() = this) }
+}
+
+/**
+ * A type which has the typedefs expected for an iterator.
+ */
+private class IteratorByTypedefs extends Iterator, Class {
+ IteratorByTypedefs() {
+ this.getAMember().(TypedefType).hasName("difference_type") and
+ this.getAMember().(TypedefType).hasName("value_type") and
+ this.getAMember().(TypedefType).hasName("pointer") and
+ this.getAMember().(TypedefType).hasName("reference") and
+ this.getAMember().(TypedefType).hasName("iterator_category") and
+ not this.hasQualifiedName(["std", "bsl"], "iterator_traits")
+ }
+}
+
+/**
+ * The `std::iterator` class.
+ */
+private class StdIterator extends Iterator, Class {
+ StdIterator() { this.hasQualifiedName(["std", "bsl"], "iterator") }
+}
+
+/**
+ * Gets the `FunctionInput` corresponding to an iterator parameter to
+ * user-defined operator `op`, at `index`.
+ */
+private FunctionInput getIteratorArgumentInput(Operator op, int index) {
+ exists(Type t |
+ t =
+ op.getACallToThisFunction()
+ .getArgument(index)
+ .getExplicitlyConverted()
+ .getType()
+ .stripTopLevelSpecifiers()
+ |
+ (
+ t instanceof Iterator or
+ t.(ReferenceType).getBaseType() instanceof Iterator
+ ) and
+ if op.getParameter(index).getUnspecifiedType() instanceof ReferenceType
+ then result.isParameterDeref(index)
+ else result.isParameter(index)
+ )
+}
+
+/**
+ * A non-member prefix `operator*` function for an iterator type.
+ */
+private class IteratorPointerDereferenceOperator extends Operator, TaintFunction,
+ IteratorReferenceFunction {
+ FunctionInput iteratorInput;
+
+ IteratorPointerDereferenceOperator() {
+ this.hasName("operator*") and
+ iteratorInput = getIteratorArgumentInput(this, 0)
+ }
+
+ override predicate hasTaintFlow(FunctionInput input, FunctionOutput output) {
+ input = iteratorInput and
+ output.isReturnValue()
+ or
+ input.isReturnValueDeref() and
+ output.isParameterDeref(0)
+ }
+}
+
+/**
+ * A non-member `operator++` or `operator--` function for an iterator type.
+ */
+private class IteratorCrementOperator extends Operator, DataFlowFunction {
+ FunctionInput iteratorInput;
+
+ IteratorCrementOperator() {
+ this.hasName(["operator++", "operator--"]) and
+ iteratorInput = getIteratorArgumentInput(this, 0)
+ }
+
+ override predicate hasDataFlow(FunctionInput input, FunctionOutput output) {
+ input = iteratorInput and
+ output.isReturnValue()
+ or
+ input.isParameterDeref(0) and output.isReturnValueDeref()
+ }
+}
+
+/**
+ * A non-member `operator+` function for an iterator type.
+ */
+private class IteratorAddOperator extends Operator, TaintFunction {
+ FunctionInput iteratorInput;
+
+ IteratorAddOperator() {
+ this.hasName("operator+") and
+ iteratorInput = getIteratorArgumentInput(this, [0, 1])
+ }
+
+ override predicate hasTaintFlow(FunctionInput input, FunctionOutput output) {
+ input = iteratorInput and
+ output.isReturnValue()
+ }
+}
+
+/**
+ * A non-member `operator-` function that takes a pointer difference type as its second argument.
+ */
+private class IteratorSubOperator extends Operator, TaintFunction {
+ FunctionInput iteratorInput;
+
+ IteratorSubOperator() {
+ this.hasName("operator-") and
+ iteratorInput = getIteratorArgumentInput(this, 0) and
+ this.getParameter(1).getUnspecifiedType() instanceof IntegralType // not an iterator difference
+ }
+
+ override predicate hasTaintFlow(FunctionInput input, FunctionOutput output) {
+ input = iteratorInput and
+ output.isReturnValue()
+ }
+}
+
+/**
+ * A non-member `operator+=` or `operator-=` function for an iterator type.
+ */
+private class IteratorAssignArithmeticOperator extends Operator, DataFlowFunction, TaintFunction {
+ IteratorAssignArithmeticOperator() {
+ this.hasName(["operator+=", "operator-="]) and
+ exists(getIteratorArgumentInput(this, 0))
+ }
+
+ override predicate hasDataFlow(FunctionInput input, FunctionOutput output) {
+ input.isParameter(0) and
+ output.isReturnValue()
+ }
+
+ override predicate hasTaintFlow(FunctionInput input, FunctionOutput output) {
+ input.isParameterDeref(0) and output.isReturnValueDeref()
+ or
+ // reverse flow from returned reference to the object referenced by the first parameter
+ input.isReturnValueDeref() and
+ output.isParameterDeref(0)
+ or
+ input.isParameterDeref(1) and
+ output.isParameterDeref(0)
+ }
+}
+
+/**
+ * A prefix `operator*` member function for an iterator type.
+ */
+class IteratorPointerDereferenceMemberOperator extends MemberFunction, TaintFunction,
+ IteratorReferenceFunction {
+ IteratorPointerDereferenceMemberOperator() {
+ this.getClassAndName("operator*") instanceof Iterator
+ }
+
+ override predicate hasTaintFlow(FunctionInput input, FunctionOutput output) {
+ input.isQualifierObject() and
+ output.isReturnValue()
+ or
+ input.isReturnValueDeref() and
+ output.isQualifierObject()
+ }
+}
+
+/**
+ * An `operator++` or `operator--` member function for an iterator type.
+ */
+private class IteratorCrementMemberOperator extends MemberFunction, DataFlowFunction, TaintFunction {
+ IteratorCrementMemberOperator() {
+ this.getClassAndName(["operator++", "operator--"]) instanceof Iterator
+ }
+
+ override predicate hasDataFlow(FunctionInput input, FunctionOutput output) {
+ input.isQualifierAddress() and
+ output.isReturnValue()
+ or
+ input.isReturnValueDeref() and
+ output.isQualifierObject()
+ or
+ input.isQualifierObject() and
+ output.isReturnValueDeref()
+ }
+
+ override predicate hasTaintFlow(FunctionInput input, FunctionOutput output) {
+ input.isQualifierObject() and
+ output.isReturnValueDeref()
+ }
+}
+
+/**
+ * A member `operator->` function for an iterator type.
+ */
+private class IteratorFieldMemberOperator extends Operator, TaintFunction {
+ IteratorFieldMemberOperator() { this.getClassAndName("operator->") instanceof Iterator }
+
+ override predicate hasTaintFlow(FunctionInput input, FunctionOutput output) {
+ input.isQualifierObject() and
+ output.isReturnValue()
+ }
+}
+
+/**
+ * An `operator+` or `operator-` member function of an iterator class.
+ */
+private class IteratorBinaryArithmeticMemberOperator extends MemberFunction, TaintFunction {
+ IteratorBinaryArithmeticMemberOperator() {
+ this.getClassAndName(["operator+", "operator-"]) instanceof Iterator
+ }
+
+ override predicate hasTaintFlow(FunctionInput input, FunctionOutput output) {
+ input.isQualifierObject() and
+ output.isReturnValue()
+ }
+}
+
+/**
+ * An `operator+=` or `operator-=` member function of an iterator class.
+ */
+private class IteratorAssignArithmeticMemberOperator extends MemberFunction, DataFlowFunction,
+ TaintFunction {
+ IteratorAssignArithmeticMemberOperator() {
+ this.getClassAndName(["operator+=", "operator-="]) instanceof Iterator
+ }
+
+ override predicate hasDataFlow(FunctionInput input, FunctionOutput output) {
+ input.isQualifierAddress() and
+ output.isReturnValue()
+ }
+
+ override predicate hasTaintFlow(FunctionInput input, FunctionOutput output) {
+ input.isQualifierObject() and
+ output.isReturnValueDeref()
+ or
+ // reverse flow from returned reference to the qualifier
+ input.isReturnValueDeref() and
+ output.isQualifierObject()
+ or
+ input.isParameterDeref(0) and
+ output.isQualifierObject()
+ }
+}
+
+/**
+ * An `operator[]` member function of an iterator class.
+ */
+private class IteratorArrayMemberOperator extends MemberFunction, TaintFunction,
+ IteratorReferenceFunction {
+ IteratorArrayMemberOperator() { this.getClassAndName("operator[]") instanceof Iterator }
+
+ override predicate hasTaintFlow(FunctionInput input, FunctionOutput output) {
+ input.isQualifierObject() and
+ output.isReturnValue()
+ }
+}
+
+/**
+ * An `operator=` member function of an iterator class that is not a copy or move assignment
+ * operator.
+ *
+ * The `hasTaintFlow` override provides flow through output iterators that return themselves with
+ * `operator*` and use their own `operator=` to assign to the container.
+ */
+private class IteratorAssignmentMemberOperator extends MemberFunction, TaintFunction {
+ IteratorAssignmentMemberOperator() {
+ this.getClassAndName("operator=") instanceof Iterator and
+ not this instanceof CopyAssignmentOperator and
+ not this instanceof MoveAssignmentOperator
+ }
+
+ override predicate hasTaintFlow(FunctionInput input, FunctionOutput output) {
+ input.isParameterDeref(0) and
+ output.isQualifierObject()
+ }
+}
+
+/**
+ * A `begin` or `end` member function, or a related member function, that
+ * returns an iterator.
+ */
+private class BeginOrEndFunction extends MemberFunction, TaintFunction, GetIteratorFunction {
+ BeginOrEndFunction() {
+ this.hasName([
+ "begin", "cbegin", "rbegin", "crbegin", "end", "cend", "rend", "crend", "before_begin",
+ "cbefore_begin"
+ ]) and
+ this.getType().getUnspecifiedType() instanceof Iterator
+ }
+
+ override predicate hasTaintFlow(FunctionInput input, FunctionOutput output) {
+ input.isQualifierObject() and
+ output.isReturnValue()
+ }
+
+ override predicate getsIterator(FunctionInput input, FunctionOutput output) {
+ input.isQualifierObject() and
+ output.isReturnValue()
+ }
+}
+
+/**
+ * The `std::front_inserter`, `std::inserter`, and `std::back_inserter`
+ * functions.
+ */
+private class InserterIteratorFunction extends GetIteratorFunction {
+ InserterIteratorFunction() {
+ this.hasQualifiedName(["std", "bsl"], ["front_inserter", "inserter", "back_inserter"])
+ }
+
+ override predicate getsIterator(FunctionInput input, FunctionOutput output) {
+ input.isParameterDeref(0) and
+ output.isReturnValue()
+ }
+}
diff --git a/repo-tests/codeql/cpp/ql/lib/semmle/code/cpp/models/implementations/MemberFunction.qll b/repo-tests/codeql/cpp/ql/lib/semmle/code/cpp/models/implementations/MemberFunction.qll
new file mode 100644
index 00000000000..31752b304a4
--- /dev/null
+++ b/repo-tests/codeql/cpp/ql/lib/semmle/code/cpp/models/implementations/MemberFunction.qll
@@ -0,0 +1,92 @@
+/**
+ * Provides models for C++ constructors and user-defined operators.
+ */
+
+import cpp
+import semmle.code.cpp.models.interfaces.DataFlow
+import semmle.code.cpp.models.interfaces.Taint
+
+/**
+ * Model for C++ conversion constructors. As of C++11 this does not correspond
+ * perfectly with the language definition of a converting constructor, however,
+ * it does correspond with the constructors we are confident taint should flow
+ * through.
+ */
+private class ConversionConstructorModel extends Constructor, TaintFunction {
+ ConversionConstructorModel() {
+ strictcount(Parameter p | p = getAParameter() and not p.hasInitializer()) = 1 and
+ not hasSpecifier("explicit")
+ }
+
+ override predicate hasTaintFlow(FunctionInput input, FunctionOutput output) {
+ // taint flow from the first constructor argument to the returned object
+ input.isParameter(0) and
+ (
+ output.isReturnValue()
+ or
+ output.isQualifierObject()
+ )
+ }
+}
+
+/**
+ * Model for C++ copy constructors.
+ */
+private class CopyConstructorModel extends CopyConstructor, DataFlowFunction {
+ override predicate hasDataFlow(FunctionInput input, FunctionOutput output) {
+ // data flow from the first constructor argument to the returned object
+ input.isParameterDeref(0) and
+ (
+ output.isReturnValue()
+ or
+ output.isQualifierObject()
+ )
+ }
+}
+
+/**
+ * Model for C++ move constructors.
+ */
+private class MoveConstructorModel extends MoveConstructor, DataFlowFunction {
+ override predicate hasDataFlow(FunctionInput input, FunctionOutput output) {
+ // data flow from the first constructor argument to the returned object
+ input.isParameterDeref(0) and
+ (
+ output.isReturnValue()
+ or
+ output.isQualifierObject()
+ )
+ }
+}
+
+/**
+ * Model for C++ copy assignment operators.
+ */
+private class CopyAssignmentOperatorModel extends CopyAssignmentOperator, TaintFunction {
+ override predicate hasTaintFlow(FunctionInput input, FunctionOutput output) {
+ // taint flow from argument to self
+ input.isParameterDeref(0) and
+ output.isQualifierObject()
+ or
+ // taint flow from argument to return value
+ input.isParameterDeref(0) and
+ output.isReturnValueDeref()
+ // TODO: it would be more accurate to model copy assignment as data flow
+ }
+}
+
+/**
+ * Model for C++ move assignment operators.
+ */
+private class MoveAssignmentOperatorModel extends MoveAssignmentOperator, TaintFunction {
+ override predicate hasTaintFlow(FunctionInput input, FunctionOutput output) {
+ // taint flow from argument to self
+ input.isParameterDeref(0) and
+ output.isQualifierObject()
+ or
+ // taint flow from argument to return value
+ input.isParameterDeref(0) and
+ output.isReturnValueDeref()
+ // TODO: it would be more accurate to model move assignment as data flow
+ }
+}
diff --git a/repo-tests/codeql/cpp/ql/lib/semmle/code/cpp/models/implementations/Memcpy.qll b/repo-tests/codeql/cpp/ql/lib/semmle/code/cpp/models/implementations/Memcpy.qll
new file mode 100644
index 00000000000..b7d8aed60fa
--- /dev/null
+++ b/repo-tests/codeql/cpp/ql/lib/semmle/code/cpp/models/implementations/Memcpy.qll
@@ -0,0 +1,109 @@
+/**
+ * Provides implementation classes modeling `memcpy` and various similar
+ * functions. See `semmle.code.cpp.models.Models` for usage information.
+ */
+
+import semmle.code.cpp.Function
+import semmle.code.cpp.models.interfaces.ArrayFunction
+import semmle.code.cpp.models.interfaces.DataFlow
+import semmle.code.cpp.models.interfaces.Alias
+import semmle.code.cpp.models.interfaces.SideEffect
+import semmle.code.cpp.models.interfaces.Taint
+
+/**
+ * The standard functions `memcpy`, `memmove` and `bcopy`; and the gcc variant
+ * `__builtin___memcpy_chk`.
+ */
+private class MemcpyFunction extends ArrayFunction, DataFlowFunction, SideEffectFunction,
+ AliasFunction {
+ MemcpyFunction() {
+ // memcpy(dest, src, num)
+ // memmove(dest, src, num)
+ // memmove(dest, src, num, remaining)
+ this.hasGlobalOrStdOrBslName(["memcpy", "memmove"])
+ or
+ // bcopy(src, dest, num)
+ // mempcpy(dest, src, num)
+ // memccpy(dest, src, c, n)
+ this.hasGlobalName(["bcopy", mempcpy(), "memccpy", "__builtin___memcpy_chk"])
+ }
+
+ /**
+ * Gets the index of the parameter that is the source buffer for the copy.
+ */
+ int getParamSrc() { if this.hasGlobalName("bcopy") then result = 0 else result = 1 }
+
+ /**
+ * Gets the index of the parameter that is the destination buffer for the
+ * copy.
+ */
+ int getParamDest() { if this.hasGlobalName("bcopy") then result = 1 else result = 0 }
+
+ /**
+ * Gets the index of the parameter that is the size of the copy (in bytes).
+ */
+ int getParamSize() { if this.hasGlobalName("memccpy") then result = 3 else result = 2 }
+
+ override predicate hasArrayInput(int bufParam) { bufParam = getParamSrc() }
+
+ override predicate hasArrayOutput(int bufParam) { bufParam = getParamDest() }
+
+ override predicate hasDataFlow(FunctionInput input, FunctionOutput output) {
+ input.isParameterDeref(getParamSrc()) and
+ output.isParameterDeref(getParamDest())
+ or
+ input.isParameterDeref(getParamSrc()) and
+ output.isReturnValueDeref()
+ or
+ input.isParameter(getParamDest()) and
+ output.isReturnValue()
+ }
+
+ override predicate hasArrayWithVariableSize(int bufParam, int countParam) {
+ (
+ bufParam = getParamDest() or
+ bufParam = getParamSrc()
+ ) and
+ countParam = getParamSize()
+ }
+
+ override predicate hasOnlySpecificReadSideEffects() { any() }
+
+ override predicate hasOnlySpecificWriteSideEffects() { any() }
+
+ override predicate hasSpecificWriteSideEffect(ParameterIndex i, boolean buffer, boolean mustWrite) {
+ i = getParamDest() and
+ buffer = true and
+ // memccpy only writes until a given character `c` is found
+ (if this.hasGlobalName("memccpy") then mustWrite = false else mustWrite = true)
+ }
+
+ override predicate hasSpecificReadSideEffect(ParameterIndex i, boolean buffer) {
+ i = getParamSrc() and buffer = true
+ }
+
+ override ParameterIndex getParameterSizeIndex(ParameterIndex i) {
+ result = getParamSize() and
+ (
+ i = getParamDest() or
+ i = getParamSrc()
+ )
+ }
+
+ override predicate parameterNeverEscapes(int index) {
+ index = getParamSrc()
+ or
+ this.hasGlobalName("bcopy") and index = getParamDest()
+ }
+
+ override predicate parameterEscapesOnlyViaReturn(int index) {
+ not this.hasGlobalName("bcopy") and index = getParamDest()
+ }
+
+ override predicate parameterIsAlwaysReturned(int index) {
+ not this.hasGlobalName(["bcopy", mempcpy(), "memccpy"]) and
+ index = getParamDest()
+ }
+}
+
+private string mempcpy() { result = ["mempcpy", "wmempcpy"] }
diff --git a/repo-tests/codeql/cpp/ql/lib/semmle/code/cpp/models/implementations/Memset.qll b/repo-tests/codeql/cpp/ql/lib/semmle/code/cpp/models/implementations/Memset.qll
new file mode 100644
index 00000000000..d646be0363d
--- /dev/null
+++ b/repo-tests/codeql/cpp/ql/lib/semmle/code/cpp/models/implementations/Memset.qll
@@ -0,0 +1,61 @@
+/**
+ * Provides implementation classes modeling `memset` and various similar
+ * functions. See `semmle.code.cpp.models.Models` for usage information.
+ */
+
+import semmle.code.cpp.Function
+import semmle.code.cpp.models.interfaces.ArrayFunction
+import semmle.code.cpp.models.interfaces.DataFlow
+import semmle.code.cpp.models.interfaces.Alias
+import semmle.code.cpp.models.interfaces.SideEffect
+
+/**
+ * The standard function `memset` and its assorted variants
+ */
+private class MemsetFunction extends ArrayFunction, DataFlowFunction, AliasFunction,
+ SideEffectFunction {
+ MemsetFunction() {
+ this.hasGlobalOrStdOrBslName("memset")
+ or
+ this.hasGlobalOrStdName("wmemset")
+ or
+ this.hasGlobalName([bzero(), "__builtin_memset", "__builtin_memset_chk"])
+ }
+
+ override predicate hasArrayOutput(int bufParam) { bufParam = 0 }
+
+ override predicate hasDataFlow(FunctionInput input, FunctionOutput output) {
+ input.isParameter(0) and
+ output.isReturnValue()
+ }
+
+ override predicate hasArrayWithVariableSize(int bufParam, int countParam) {
+ bufParam = 0 and
+ (if hasGlobalName(bzero()) then countParam = 1 else countParam = 2)
+ }
+
+ override predicate parameterNeverEscapes(int index) { hasGlobalName(bzero()) and index = 0 }
+
+ override predicate parameterEscapesOnlyViaReturn(int index) {
+ not hasGlobalName(bzero()) and index = 0
+ }
+
+ override predicate parameterIsAlwaysReturned(int index) {
+ not hasGlobalName(bzero()) and index = 0
+ }
+
+ override predicate hasOnlySpecificReadSideEffects() { any() }
+
+ override predicate hasOnlySpecificWriteSideEffects() { any() }
+
+ override predicate hasSpecificWriteSideEffect(ParameterIndex i, boolean buffer, boolean mustWrite) {
+ i = 0 and buffer = true and mustWrite = true
+ }
+
+ override ParameterIndex getParameterSizeIndex(ParameterIndex i) {
+ i = 0 and
+ if hasGlobalName(bzero()) then result = 1 else result = 2
+ }
+}
+
+private string bzero() { result = ["bzero", "explicit_bzero"] }
diff --git a/repo-tests/codeql/cpp/ql/lib/semmle/code/cpp/models/implementations/MySql.qll b/repo-tests/codeql/cpp/ql/lib/semmle/code/cpp/models/implementations/MySql.qll
new file mode 100644
index 00000000000..ca5d7020158
--- /dev/null
+++ b/repo-tests/codeql/cpp/ql/lib/semmle/code/cpp/models/implementations/MySql.qll
@@ -0,0 +1,32 @@
+/**
+ * Provides implementation classes modeling the MySql C API.
+ * See `semmle.code.cpp.models.Models` for usage information.
+ */
+
+private import semmle.code.cpp.models.interfaces.Sql
+private import semmle.code.cpp.models.interfaces.FunctionInputsAndOutputs
+
+/**
+ * The `mysql_query` family of functions from the MySQL C API.
+ */
+private class MySqlExecutionFunction extends SqlExecutionFunction {
+ MySqlExecutionFunction() {
+ this.hasName(["mysql_query", "mysql_real_query", "mysql_real_query_nonblocking"])
+ }
+
+ override predicate hasSqlArgument(FunctionInput input) { input.isParameterDeref(1) }
+}
+
+/**
+ * The `mysql_real_escape_string` family of functions from the MySQL C API.
+ */
+private class MySqlBarrierFunction extends SqlBarrierFunction {
+ MySqlBarrierFunction() {
+ this.hasName(["mysql_real_escape_string", "mysql_real_escape_string_quote"])
+ }
+
+ override predicate barrierSqlArgument(FunctionInput input, FunctionOutput output) {
+ input.isParameterDeref(2) and
+ output.isParameterDeref(1)
+ }
+}
diff --git a/repo-tests/codeql/cpp/ql/lib/semmle/code/cpp/models/implementations/Poll.qll b/repo-tests/codeql/cpp/ql/lib/semmle/code/cpp/models/implementations/Poll.qll
new file mode 100644
index 00000000000..020bd6aaa51
--- /dev/null
+++ b/repo-tests/codeql/cpp/ql/lib/semmle/code/cpp/models/implementations/Poll.qll
@@ -0,0 +1,44 @@
+/**
+ * Provides implementation classes modeling `poll` and various similar
+ * functions. See `semmle.code.cpp.models.Models` for usage information.
+ */
+
+import semmle.code.cpp.Function
+import semmle.code.cpp.models.interfaces.ArrayFunction
+import semmle.code.cpp.models.interfaces.Alias
+import semmle.code.cpp.models.interfaces.SideEffect
+
+/**
+ * The function `poll` and its assorted variants
+ */
+private class Poll extends ArrayFunction, AliasFunction, SideEffectFunction {
+ Poll() { this.hasGlobalName(["poll", "ppoll", "WSAPoll"]) }
+
+ override predicate hasArrayWithVariableSize(int bufParam, int countParam) {
+ bufParam = 0 and countParam = 1
+ }
+
+ override predicate hasArrayInput(int bufParam) { bufParam = 0 }
+
+ override predicate hasArrayOutput(int bufParam) { bufParam = 0 }
+
+ override predicate parameterNeverEscapes(int index) { exists(this.getParameter(index)) }
+
+ override predicate parameterEscapesOnlyViaReturn(int index) { none() }
+
+ override predicate parameterIsAlwaysReturned(int index) { none() }
+
+ override predicate hasSpecificWriteSideEffect(ParameterIndex i, boolean buffer, boolean mustWrite) {
+ i = 0 and buffer = true and mustWrite = false
+ }
+
+ override predicate hasSpecificReadSideEffect(ParameterIndex i, boolean buffer) {
+ i = 0 and buffer = true
+ or
+ this.hasGlobalName("ppoll") and i = [2, 3] and buffer = false
+ }
+
+ override predicate hasOnlySpecificReadSideEffects() { any() }
+
+ override predicate hasOnlySpecificWriteSideEffects() { any() }
+}
diff --git a/repo-tests/codeql/cpp/ql/lib/semmle/code/cpp/models/implementations/PostgreSql.qll b/repo-tests/codeql/cpp/ql/lib/semmle/code/cpp/models/implementations/PostgreSql.qll
new file mode 100644
index 00000000000..595805f176f
--- /dev/null
+++ b/repo-tests/codeql/cpp/ql/lib/semmle/code/cpp/models/implementations/PostgreSql.qll
@@ -0,0 +1,94 @@
+private import semmle.code.cpp.models.interfaces.Sql
+private import semmle.code.cpp.models.interfaces.FunctionInputsAndOutputs
+
+private predicate pqxxTransactionSqlArgument(string function, int arg) {
+ function = "exec" and arg = 0
+ or
+ function = "exec0" and arg = 0
+ or
+ function = "exec1" and arg = 0
+ or
+ function = "exec_n" and arg = 1
+ or
+ function = "exec_params" and arg = 0
+ or
+ function = "exec_params0" and arg = 0
+ or
+ function = "exec_params1" and arg = 0
+ or
+ function = "exec_params_n" and arg = 1
+ or
+ function = "query_value" and arg = 0
+ or
+ function = "stream" and arg = 0
+}
+
+private predicate pqxxConnectionSqlArgument(string function, int arg) {
+ function = "prepare" and arg = 1
+}
+
+private predicate pqxxTransationClassNames(string className, string namespace) {
+ namespace = "pqxx" and
+ className in [
+ "dbtransaction", "nontransaction", "basic_robusttransaction", "robusttransaction",
+ "subtransaction", "transaction", "basic_transaction", "transaction_base", "work"
+ ]
+}
+
+private predicate pqxxConnectionClassNames(string className, string namespace) {
+ namespace = "pqxx" and
+ className in ["connection_base", "basic_connection", "connection"]
+}
+
+private predicate pqxxEscapeArgument(string function, int arg) {
+ arg = 0 and
+ function in ["esc", "esc_raw", "quote", "quote_raw", "quote_name", "quote_table", "esc_like"]
+}
+
+private class PostgreSqlExecutionFunction extends SqlExecutionFunction {
+ PostgreSqlExecutionFunction() {
+ exists(Class c |
+ this.getDeclaringType() = c and
+ // transaction exec and connection prepare variations
+ (
+ pqxxTransationClassNames(c.getName(), c.getNamespace().getName()) and
+ pqxxTransactionSqlArgument(this.getName(), _)
+ or
+ pqxxConnectionSqlArgument(this.getName(), _) and
+ pqxxConnectionClassNames(c.getName(), c.getNamespace().getName())
+ )
+ )
+ }
+
+ override predicate hasSqlArgument(FunctionInput input) {
+ exists(int argIndex |
+ pqxxTransactionSqlArgument(this.getName(), argIndex)
+ or
+ pqxxConnectionSqlArgument(this.getName(), argIndex)
+ |
+ input.isParameterDeref(argIndex)
+ )
+ }
+}
+
+private class PostgreSqlBarrierFunction extends SqlBarrierFunction {
+ PostgreSqlBarrierFunction() {
+ exists(Class c |
+ this.getDeclaringType() = c and
+ // transaction and connection escape functions
+ (
+ pqxxTransationClassNames(c.getName(), c.getNamespace().getName()) or
+ pqxxConnectionClassNames(c.getName(), c.getNamespace().getName())
+ ) and
+ pqxxEscapeArgument(this.getName(), _)
+ )
+ }
+
+ override predicate barrierSqlArgument(FunctionInput input, FunctionOutput output) {
+ exists(int argIndex |
+ input.isParameterDeref(argIndex) and
+ output.isReturnValueDeref() and
+ pqxxEscapeArgument(this.getName(), argIndex)
+ )
+ }
+}
diff --git a/repo-tests/codeql/cpp/ql/lib/semmle/code/cpp/models/implementations/Printf.qll b/repo-tests/codeql/cpp/ql/lib/semmle/code/cpp/models/implementations/Printf.qll
new file mode 100644
index 00000000000..ed201a14587
--- /dev/null
+++ b/repo-tests/codeql/cpp/ql/lib/semmle/code/cpp/models/implementations/Printf.qll
@@ -0,0 +1,212 @@
+/**
+ * Provides implementation classes modeling various standard formatting
+ * functions (`printf`, `snprintf` etc).
+ * See `semmle.code.cpp.models.interfaces.FormattingFunction` for usage
+ * information.
+ */
+
+import semmle.code.cpp.models.interfaces.FormattingFunction
+import semmle.code.cpp.models.interfaces.Alias
+
+/**
+ * The standard functions `printf`, `wprintf` and their glib variants.
+ */
+private class Printf extends FormattingFunction, AliasFunction {
+ Printf() {
+ this instanceof TopLevelFunction and
+ (
+ hasGlobalOrStdOrBslName(["printf", "wprintf"]) or
+ hasGlobalName(["printf_s", "wprintf_s", "g_printf"])
+ ) and
+ not exists(getDefinition().getFile().getRelativePath())
+ }
+
+ override int getFormatParameterIndex() { result = 0 }
+
+ deprecated override predicate isWideCharDefault() { hasName(["wprintf", "wprintf_s"]) }
+
+ override predicate isOutputGlobal() { any() }
+
+ override predicate parameterNeverEscapes(int n) { n = 0 }
+
+ override predicate parameterEscapesOnlyViaReturn(int n) { none() }
+
+ override predicate parameterIsAlwaysReturned(int n) { none() }
+}
+
+/**
+ * The standard functions `fprintf`, `fwprintf` and their glib variants.
+ */
+private class Fprintf extends FormattingFunction {
+ Fprintf() {
+ this instanceof TopLevelFunction and
+ (
+ hasGlobalOrStdOrBslName(["fprintf", "fwprintf"]) or
+ hasGlobalName("g_fprintf")
+ ) and
+ not exists(getDefinition().getFile().getRelativePath())
+ }
+
+ override int getFormatParameterIndex() { result = 1 }
+
+ deprecated override predicate isWideCharDefault() { hasName("fwprintf") }
+
+ override int getOutputParameterIndex(boolean isStream) { result = 0 and isStream = true }
+}
+
+/**
+ * The standard function `sprintf` and its Microsoft and glib variants.
+ */
+private class Sprintf extends FormattingFunction {
+ Sprintf() {
+ this instanceof TopLevelFunction and
+ (
+ hasGlobalOrStdOrBslName([
+ "sprintf", // sprintf(dst, format, args...)
+ "wsprintf" // wsprintf(dst, format, args...)
+ ])
+ or
+ hasGlobalName([
+ "_sprintf_l", // _sprintf_l(dst, format, locale, args...)
+ "__swprintf_l", // __swprintf_l(dst, format, locale, args...)
+ "g_strdup_printf", // g_strdup_printf(format, ...)
+ "g_sprintf", // g_sprintf(dst, format, ...)
+ "__builtin___sprintf_chk" // __builtin___sprintf_chk(dst, flag, os, format, ...)
+ ])
+ ) and
+ not exists(getDefinition().getFile().getRelativePath())
+ }
+
+ deprecated override predicate isWideCharDefault() {
+ getParameter(getFormatParameterIndex())
+ .getType()
+ .getUnspecifiedType()
+ .(PointerType)
+ .getBaseType()
+ .getSize() > 1
+ }
+
+ override int getFormatParameterIndex() {
+ hasName("g_strdup_printf") and result = 0
+ or
+ hasName("__builtin___sprintf_chk") and result = 3
+ or
+ not getName() = ["g_strdup_printf", "__builtin___sprintf_chk"] and
+ result = 1
+ }
+
+ override int getOutputParameterIndex(boolean isStream) {
+ not hasName("g_strdup_printf") and result = 0 and isStream = false
+ }
+
+ override int getFirstFormatArgumentIndex() {
+ if hasName("__builtin___sprintf_chk") then result = 4 else result = getNumberOfParameters()
+ }
+}
+
+/**
+ * Implements `Snprintf`.
+ */
+private class SnprintfImpl extends Snprintf {
+ SnprintfImpl() {
+ this instanceof TopLevelFunction and
+ (
+ hasGlobalOrStdOrBslName([
+ "snprintf", // C99 defines snprintf
+ "swprintf" // The s version of wide-char printf is also always the n version
+ ])
+ or
+ // Microsoft has _snprintf as well as several other variations
+ hasGlobalName([
+ "sprintf_s", "snprintf_s", "swprintf_s", "_snprintf", "_snprintf_s", "_snprintf_l",
+ "_snprintf_s_l", "_snwprintf", "_snwprintf_s", "_snwprintf_l", "_snwprintf_s_l",
+ "_sprintf_s_l", "_swprintf_l", "_swprintf_s_l", "g_snprintf", "wnsprintf",
+ "__builtin___snprintf_chk"
+ ])
+ ) and
+ not exists(getDefinition().getFile().getRelativePath())
+ }
+
+ override int getFormatParameterIndex() {
+ if getName().matches("%\\_l")
+ then result = getFirstFormatArgumentIndex() - 2
+ else result = getFirstFormatArgumentIndex() - 1
+ }
+
+ deprecated override predicate isWideCharDefault() {
+ getParameter(getFormatParameterIndex())
+ .getType()
+ .getUnspecifiedType()
+ .(PointerType)
+ .getBaseType()
+ .getSize() > 1
+ }
+
+ override int getOutputParameterIndex(boolean isStream) { result = 0 and isStream = false }
+
+ override int getFirstFormatArgumentIndex() {
+ exists(string name |
+ name = getQualifiedName() and
+ (
+ name = "__builtin___snprintf_chk" and
+ result = 5
+ or
+ name != "__builtin___snprintf_chk" and
+ result = getNumberOfParameters()
+ )
+ )
+ }
+
+ override predicate returnsFullFormatLength() {
+ hasName(["snprintf", "g_snprintf", "__builtin___snprintf_chk", "snprintf_s"]) and
+ not exists(getDefinition().getFile().getRelativePath())
+ }
+
+ override int getSizeParameterIndex() { result = 1 }
+}
+
+/**
+ * The Microsoft `StringCchPrintf` function and variants.
+ */
+private class StringCchPrintf extends FormattingFunction {
+ StringCchPrintf() {
+ this instanceof TopLevelFunction and
+ hasGlobalName([
+ "StringCchPrintf", "StringCchPrintfEx", "StringCchPrintf_l", "StringCchPrintf_lEx",
+ "StringCbPrintf", "StringCbPrintfEx", "StringCbPrintf_l", "StringCbPrintf_lEx"
+ ]) and
+ not exists(getDefinition().getFile().getRelativePath())
+ }
+
+ override int getFormatParameterIndex() {
+ if getName().matches("%Ex") then result = 5 else result = 2
+ }
+
+ deprecated override predicate isWideCharDefault() {
+ getParameter(getFormatParameterIndex())
+ .getType()
+ .getUnspecifiedType()
+ .(PointerType)
+ .getBaseType()
+ .getSize() > 1
+ }
+
+ override int getOutputParameterIndex(boolean isStream) { result = 0 and isStream = false }
+
+ override int getSizeParameterIndex() { result = 1 }
+}
+
+/**
+ * The standard function `syslog`.
+ */
+private class Syslog extends FormattingFunction {
+ Syslog() {
+ this instanceof TopLevelFunction and
+ hasGlobalName("syslog") and
+ not exists(getDefinition().getFile().getRelativePath())
+ }
+
+ override int getFormatParameterIndex() { result = 1 }
+
+ override predicate isOutputGlobal() { any() }
+}
diff --git a/repo-tests/codeql/cpp/ql/lib/semmle/code/cpp/models/implementations/Pure.qll b/repo-tests/codeql/cpp/ql/lib/semmle/code/cpp/models/implementations/Pure.qll
new file mode 100644
index 00000000000..d728a66463d
--- /dev/null
+++ b/repo-tests/codeql/cpp/ql/lib/semmle/code/cpp/models/implementations/Pure.qll
@@ -0,0 +1,208 @@
+import semmle.code.cpp.models.interfaces.ArrayFunction
+import semmle.code.cpp.models.interfaces.Taint
+import semmle.code.cpp.models.interfaces.Alias
+import semmle.code.cpp.models.interfaces.SideEffect
+
+/**
+ * A function that operates on strings and is pure. That is, its evaluation is
+ * guaranteed to be side-effect free.
+ */
+private class PureStrFunction extends AliasFunction, ArrayFunction, TaintFunction,
+ SideEffectFunction {
+ PureStrFunction() {
+ hasGlobalOrStdOrBslName([
+ atoi(), "strcasestr", "strchnul", "strchr", "strchrnul", "strstr", "strpbrk", "strrchr",
+ "strspn", strtol(), strrev(), strcmp(), strlwr(), strupr()
+ ])
+ }
+
+ override predicate hasArrayInput(int bufParam) {
+ getParameter(bufParam).getUnspecifiedType() instanceof PointerType
+ }
+
+ override predicate hasArrayWithNullTerminator(int bufParam) {
+ getParameter(bufParam).getUnspecifiedType() instanceof PointerType
+ }
+
+ override predicate hasTaintFlow(FunctionInput input, FunctionOutput output) {
+ exists(ParameterIndex i |
+ (
+ input.isParameter(i) and
+ exists(getParameter(i))
+ or
+ input.isParameterDeref(i) and
+ getParameter(i).getUnspecifiedType() instanceof PointerType
+ ) and
+ // Functions that end with _l also take a locale argument (always as the last argument),
+ // and we don't want taint from those arguments.
+ (not this.getName().matches("%\\_l") or exists(getParameter(i + 1)))
+ ) and
+ (
+ output.isReturnValueDeref() and
+ getUnspecifiedType() instanceof PointerType
+ or
+ output.isReturnValue()
+ )
+ }
+
+ override predicate parameterNeverEscapes(int i) {
+ getParameter(i).getUnspecifiedType() instanceof PointerType and
+ not parameterEscapesOnlyViaReturn(i)
+ }
+
+ override predicate parameterEscapesOnlyViaReturn(int i) {
+ i = 0 and
+ getUnspecifiedType() instanceof PointerType
+ }
+
+ override predicate parameterIsAlwaysReturned(int i) { none() }
+
+ override predicate hasOnlySpecificReadSideEffects() { any() }
+
+ override predicate hasOnlySpecificWriteSideEffects() { any() }
+
+ override predicate hasSpecificReadSideEffect(ParameterIndex i, boolean buffer) {
+ getParameter(i).getUnspecifiedType() instanceof PointerType and
+ buffer = true
+ }
+}
+
+private string atoi() { result = ["atof", "atoi", "atol", "atoll"] }
+
+private string strtol() { result = ["strtod", "strtof", "strtol", "strtoll", "strtoq", "strtoul"] }
+
+private string strlwr() {
+ result = ["_strlwr", "_wcslwr", "_mbslwr", "_strlwr_l", "_wcslwr_l", "_mbslwr_l"]
+}
+
+private string strupr() {
+ result = ["_strupr", "_wcsupr", "_mbsupr", "_strupr_l", "_wcsupr_l", "_mbsupr_l"]
+}
+
+private string strrev() { result = ["_strrev", "_wcsrev", "_mbsrev", "_mbsrev_l"] }
+
+private string strcmp() {
+ // NOTE: `strcoll` doesn't satisfy _all_ the definitions of purity: its behavior depends on
+ // `LC_COLLATE` (which is set by `setlocale`). Not sure this behavior worth including in the model, so
+ // for now we interpret the function as being pure.
+ result =
+ [
+ "strcmp", "strcspn", "strncmp", "strcoll", "strverscmp", "_mbsnbcmp", "_mbsnbcmp_l",
+ "_stricmp"
+ ]
+}
+
+/**
+ * A function such as `strlen` that returns the length of the given string.
+ */
+private class StrLenFunction extends AliasFunction, ArrayFunction, SideEffectFunction {
+ StrLenFunction() {
+ hasGlobalOrStdOrBslName(["strlen", "strnlen", "wcslen"])
+ or
+ hasGlobalName(["_mbslen", "_mbslen_l", "_mbstrlen", "_mbstrlen_l"])
+ }
+
+ override predicate hasArrayInput(int bufParam) {
+ getParameter(bufParam).getUnspecifiedType() instanceof PointerType
+ }
+
+ override predicate hasArrayWithNullTerminator(int bufParam) {
+ getParameter(bufParam).getUnspecifiedType() instanceof PointerType
+ }
+
+ override predicate parameterNeverEscapes(int i) {
+ getParameter(i).getUnspecifiedType() instanceof PointerType
+ }
+
+ override predicate parameterEscapesOnlyViaReturn(int i) { none() }
+
+ override predicate parameterIsAlwaysReturned(int i) { none() }
+
+ override predicate hasOnlySpecificReadSideEffects() { any() }
+
+ override predicate hasOnlySpecificWriteSideEffects() { any() }
+
+ override predicate hasSpecificReadSideEffect(ParameterIndex i, boolean buffer) {
+ getParameter(i).getUnspecifiedType() instanceof PointerType and
+ buffer = true
+ }
+}
+
+/**
+ * A function that is pure, that is, its evaluation is guaranteed to be
+ * side-effect free. Excludes functions modeled by `PureStrFunction` and `PureMemFunction`.
+ */
+private class PureFunction extends TaintFunction, SideEffectFunction {
+ PureFunction() { hasGlobalOrStdOrBslName(["abs", "labs"]) }
+
+ override predicate hasTaintFlow(FunctionInput input, FunctionOutput output) {
+ exists(ParameterIndex i |
+ input.isParameter(i) and
+ exists(getParameter(i))
+ ) and
+ output.isReturnValue()
+ }
+
+ override predicate hasOnlySpecificReadSideEffects() { any() }
+
+ override predicate hasOnlySpecificWriteSideEffects() { any() }
+}
+
+/**
+ * A function that operates on memory buffers and is pure. That is, its
+ * evaluation is guaranteed to be side-effect free.
+ */
+private class PureMemFunction extends AliasFunction, ArrayFunction, TaintFunction,
+ SideEffectFunction {
+ PureMemFunction() {
+ hasGlobalOrStdOrBslName([
+ "memchr", "__builtin_memchr", "memrchr", "rawmemchr", "memcmp", "__builtin_memcmp", "memmem"
+ ]) or
+ this.hasGlobalName("memfrob")
+ }
+
+ override predicate hasArrayInput(int bufParam) {
+ getParameter(bufParam).getUnspecifiedType() instanceof PointerType
+ }
+
+ override predicate hasTaintFlow(FunctionInput input, FunctionOutput output) {
+ exists(ParameterIndex i |
+ (
+ input.isParameter(i) and
+ exists(getParameter(i))
+ or
+ input.isParameterDeref(i) and
+ getParameter(i).getUnspecifiedType() instanceof PointerType
+ ) and
+ // `memfrob` should not have taint from the size argument.
+ (not this.hasGlobalName("memfrob") or i = 0)
+ ) and
+ (
+ output.isReturnValueDeref() and
+ getUnspecifiedType() instanceof PointerType
+ or
+ output.isReturnValue()
+ )
+ }
+
+ override predicate parameterNeverEscapes(int i) {
+ getParameter(i).getUnspecifiedType() instanceof PointerType and
+ not parameterEscapesOnlyViaReturn(i)
+ }
+
+ override predicate parameterEscapesOnlyViaReturn(int i) {
+ i = 0 and
+ getUnspecifiedType() instanceof PointerType
+ }
+
+ override predicate parameterIsAlwaysReturned(int i) { none() }
+
+ override predicate hasOnlySpecificReadSideEffects() { any() }
+
+ override predicate hasOnlySpecificWriteSideEffects() { any() }
+
+ override predicate hasSpecificReadSideEffect(ParameterIndex i, boolean buffer) {
+ getParameter(i).getUnspecifiedType() instanceof PointerType and
+ buffer = true
+ }
+}
diff --git a/repo-tests/codeql/cpp/ql/lib/semmle/code/cpp/models/implementations/Recv.qll b/repo-tests/codeql/cpp/ql/lib/semmle/code/cpp/models/implementations/Recv.qll
new file mode 100644
index 00000000000..0551185ba14
--- /dev/null
+++ b/repo-tests/codeql/cpp/ql/lib/semmle/code/cpp/models/implementations/Recv.qll
@@ -0,0 +1,90 @@
+/**
+ * Provides implementation classes modeling `recv` and various similar
+ * functions. See `semmle.code.cpp.models.Models` for usage information.
+ */
+
+import semmle.code.cpp.models.interfaces.Taint
+import semmle.code.cpp.models.interfaces.ArrayFunction
+import semmle.code.cpp.models.interfaces.Alias
+import semmle.code.cpp.models.interfaces.FlowSource
+import semmle.code.cpp.models.interfaces.SideEffect
+
+/** The function `recv` and its assorted variants */
+private class Recv extends AliasFunction, ArrayFunction, SideEffectFunction,
+ RemoteFlowSourceFunction {
+ Recv() {
+ this.hasGlobalName([
+ "recv", // recv(socket, dest, len, flags)
+ "recvfrom", // recvfrom(socket, dest, len, flags, from, fromlen)
+ "recvmsg", // recvmsg(socket, msg, flags)
+ "read", // read(socket, dest, len)
+ "pread", // pread(socket, dest, len, offset)
+ "readv", // readv(socket, dest, len)
+ "preadv", // readv(socket, dest, len, offset)
+ "preadv2" // readv2(socket, dest, len, offset, flags)
+ ])
+ }
+
+ override predicate parameterNeverEscapes(int index) {
+ this.getParameter(index).getUnspecifiedType() instanceof PointerType
+ }
+
+ override predicate parameterEscapesOnlyViaReturn(int index) { none() }
+
+ override predicate parameterIsAlwaysReturned(int index) { none() }
+
+ override predicate hasArrayWithVariableSize(int bufParam, int countParam) {
+ not this.hasGlobalName("recvmsg") and
+ bufParam = 1 and
+ countParam = 2
+ }
+
+ override predicate hasArrayInput(int bufParam) { this.hasGlobalName("recvfrom") and bufParam = 4 }
+
+ override predicate hasArrayOutput(int bufParam) {
+ bufParam = 1
+ or
+ this.hasGlobalName("recvfrom") and bufParam = 4
+ }
+
+ override predicate hasSpecificReadSideEffect(ParameterIndex i, boolean buffer) {
+ this.hasGlobalName("recvfrom") and
+ (
+ i = 4 and buffer = true
+ or
+ i = 5 and buffer = false
+ )
+ or
+ this.hasGlobalName("recvmsg") and
+ i = 1 and
+ buffer = true
+ }
+
+ override ParameterIndex getParameterSizeIndex(ParameterIndex i) { i = 1 and result = 2 }
+
+ override predicate hasSpecificWriteSideEffect(ParameterIndex i, boolean buffer, boolean mustWrite) {
+ i = 1 and buffer = true and mustWrite = false
+ or
+ this.hasGlobalName("recvfrom") and
+ (
+ i = 4 and buffer = true and mustWrite = false
+ or
+ i = 5 and buffer = false and mustWrite = false
+ )
+ }
+
+ override predicate hasOnlySpecificReadSideEffects() { any() }
+
+ override predicate hasOnlySpecificWriteSideEffects() { any() }
+
+ override predicate hasRemoteFlowSource(FunctionOutput output, string description) {
+ (
+ output.isParameterDeref(1)
+ or
+ this.hasGlobalName("recvfrom") and output.isParameterDeref([4, 5])
+ ) and
+ description = "Buffer read by " + this.getName()
+ }
+
+ override predicate hasSocketInput(FunctionInput input) { input.isParameter(0) }
+}
diff --git a/repo-tests/codeql/cpp/ql/lib/semmle/code/cpp/models/implementations/Select.qll b/repo-tests/codeql/cpp/ql/lib/semmle/code/cpp/models/implementations/Select.qll
new file mode 100644
index 00000000000..b2120b241b9
--- /dev/null
+++ b/repo-tests/codeql/cpp/ql/lib/semmle/code/cpp/models/implementations/Select.qll
@@ -0,0 +1,40 @@
+/**
+ * Provides implementation classes modeling `select` and various similar
+ * functions. See `semmle.code.cpp.models.Models` for usage information.
+ */
+
+import semmle.code.cpp.Function
+import semmle.code.cpp.models.interfaces.ArrayFunction
+import semmle.code.cpp.models.interfaces.Alias
+import semmle.code.cpp.models.interfaces.SideEffect
+
+/**
+ * The function `select` and its assorted variants
+ */
+private class Select extends ArrayFunction, AliasFunction, SideEffectFunction {
+ Select() { this.hasGlobalName(["select", "pselect"]) }
+
+ override predicate hasArrayWithUnknownSize(int bufParam) { bufParam = [1 .. 3] }
+
+ override predicate hasArrayInput(int bufParam) { bufParam = [1 .. 3] }
+
+ override predicate hasArrayOutput(int bufParam) { bufParam = [1 .. 3] }
+
+ override predicate parameterNeverEscapes(int index) { exists(this.getParameter(index)) }
+
+ override predicate parameterEscapesOnlyViaReturn(int index) { none() }
+
+ override predicate parameterIsAlwaysReturned(int index) { none() }
+
+ override predicate hasSpecificWriteSideEffect(ParameterIndex i, boolean buffer, boolean mustWrite) {
+ i = [1 .. 3] and buffer = true and mustWrite = false
+ }
+
+ override predicate hasSpecificReadSideEffect(ParameterIndex i, boolean buffer) {
+ i = [1 .. 5] and buffer = true
+ }
+
+ override predicate hasOnlySpecificReadSideEffects() { any() }
+
+ override predicate hasOnlySpecificWriteSideEffects() { any() }
+}
diff --git a/repo-tests/codeql/cpp/ql/lib/semmle/code/cpp/models/implementations/Send.qll b/repo-tests/codeql/cpp/ql/lib/semmle/code/cpp/models/implementations/Send.qll
new file mode 100644
index 00000000000..d871bad68af
--- /dev/null
+++ b/repo-tests/codeql/cpp/ql/lib/semmle/code/cpp/models/implementations/Send.qll
@@ -0,0 +1,65 @@
+/**
+ * Provides implementation classes modeling `send` and various similar
+ * functions. See `semmle.code.cpp.models.Models` for usage information.
+ */
+
+import semmle.code.cpp.models.interfaces.Taint
+import semmle.code.cpp.models.interfaces.ArrayFunction
+import semmle.code.cpp.models.interfaces.Alias
+import semmle.code.cpp.models.interfaces.FlowSource
+import semmle.code.cpp.models.interfaces.SideEffect
+
+/** The function `send` and its assorted variants */
+private class Send extends AliasFunction, ArrayFunction, SideEffectFunction, RemoteFlowSinkFunction {
+ Send() {
+ this.hasGlobalName([
+ "send", // send(socket, buf, len, flags)
+ "sendto", // sendto(socket, buf, len, flags, to, tolen)
+ "sendmsg", // sendmsg(socket, msg, flags)
+ "write", // write(socket, buf, len)
+ "writev", // writev(socket, buf, len)
+ "pwritev", // pwritev(socket, buf, len, offset)
+ "pwritev2" // pwritev2(socket, buf, len, offset, flags)
+ ])
+ }
+
+ override predicate parameterNeverEscapes(int index) {
+ this.getParameter(index).getUnspecifiedType() instanceof PointerType
+ }
+
+ override predicate parameterEscapesOnlyViaReturn(int index) { none() }
+
+ override predicate parameterIsAlwaysReturned(int index) { none() }
+
+ override predicate hasArrayWithVariableSize(int bufParam, int countParam) {
+ not this.hasGlobalName("sendmsg") and
+ bufParam = 1 and
+ countParam = 2
+ }
+
+ override predicate hasArrayInput(int bufParam) { bufParam = 1 }
+
+ override predicate hasOnlySpecificReadSideEffects() { any() }
+
+ override predicate hasOnlySpecificWriteSideEffects() { any() }
+
+ override predicate hasSpecificWriteSideEffect(ParameterIndex i, boolean buffer, boolean mustWrite) {
+ none()
+ }
+
+ override predicate hasSpecificReadSideEffect(ParameterIndex i, boolean buffer) {
+ i = 1 and buffer = true
+ or
+ this.hasGlobalName("sendto") and i = 4 and buffer = false
+ or
+ this.hasGlobalName("sendmsg") and i = 1 and buffer = true
+ }
+
+ override ParameterIndex getParameterSizeIndex(ParameterIndex i) { i = 1 and result = 2 }
+
+ override predicate hasRemoteFlowSink(FunctionInput input, string description) {
+ input.isParameterDeref(1) and description = "Buffer sent by " + this.getName()
+ }
+
+ override predicate hasSocketInput(FunctionInput input) { input.isParameter(0) }
+}
diff --git a/repo-tests/codeql/cpp/ql/lib/semmle/code/cpp/models/implementations/SmartPointer.qll b/repo-tests/codeql/cpp/ql/lib/semmle/code/cpp/models/implementations/SmartPointer.qll
new file mode 100644
index 00000000000..e249a164061
--- /dev/null
+++ b/repo-tests/codeql/cpp/ql/lib/semmle/code/cpp/models/implementations/SmartPointer.qll
@@ -0,0 +1,167 @@
+import semmle.code.cpp.models.interfaces.Alias
+import semmle.code.cpp.models.interfaces.SideEffect
+import semmle.code.cpp.models.interfaces.Taint
+import semmle.code.cpp.models.interfaces.DataFlow
+import semmle.code.cpp.models.interfaces.PointerWrapper
+
+/**
+ * The `std::shared_ptr`, `std::weak_ptr`, and `std::unique_ptr` template classes.
+ */
+private class SmartPtr extends Class, PointerWrapper {
+ SmartPtr() { this.hasQualifiedName(["std", "bsl"], ["shared_ptr", "weak_ptr", "unique_ptr"]) }
+
+ override MemberFunction getAnUnwrapperFunction() {
+ result.(OverloadedPointerDereferenceFunction).getDeclaringType() = this
+ or
+ result.getClassAndName(["operator->", "get"]) = this
+ }
+
+ override predicate pointsToConst() { this.getTemplateArgument(0).(Type).isConst() }
+}
+
+/**
+ * Any function that returns the address wrapped by a `PointerWrapper`, whether as a pointer or a
+ * reference.
+ *
+ * Examples:
+ * - `std::unique_ptr::get()`
+ * - `std::shared_ptr::operator->()`
+ * - `std::weak_ptr::operator*()`
+ */
+private class PointerUnwrapperFunction extends MemberFunction, TaintFunction, DataFlowFunction,
+ SideEffectFunction, AliasFunction {
+ PointerUnwrapperFunction() {
+ exists(PointerWrapper wrapper | wrapper.getAnUnwrapperFunction() = this)
+ }
+
+ override predicate hasTaintFlow(FunctionInput input, FunctionOutput output) {
+ input.isReturnValueDeref() and
+ output.isQualifierObject()
+ }
+
+ override predicate hasDataFlow(FunctionInput input, FunctionOutput output) {
+ input.isQualifierObject() and output.isReturnValue()
+ }
+
+ override predicate hasOnlySpecificReadSideEffects() { any() }
+
+ override predicate hasOnlySpecificWriteSideEffects() { any() }
+
+ override predicate hasSpecificReadSideEffect(ParameterIndex i, boolean buffer) {
+ // Only reads from `*this`.
+ i = -1 and buffer = false
+ }
+
+ override predicate parameterNeverEscapes(int index) { index = -1 }
+
+ override predicate parameterEscapesOnlyViaReturn(int index) { none() }
+
+ override predicate hasAddressFlow(FunctionInput input, FunctionOutput output) {
+ input.isQualifierObject() and output.isReturnValue()
+ }
+}
+
+/**
+ * The `std::make_shared` and `std::make_unique` template functions.
+ */
+private class MakeUniqueOrShared extends TaintFunction {
+ MakeUniqueOrShared() { this.hasQualifiedName(["bsl", "std"], ["make_shared", "make_unique"]) }
+
+ override predicate hasTaintFlow(FunctionInput input, FunctionOutput output) {
+ // Exclude the specializations of `std::make_shared` and `std::make_unique` that allocate arrays
+ // since these just take a size argument, which we don't want to propagate taint through.
+ not this.isArray() and
+ (
+ input.isParameter([0 .. getNumberOfParameters() - 1])
+ or
+ input.isParameterDeref([0 .. getNumberOfParameters() - 1])
+ ) and
+ output.isReturnValue()
+ }
+
+ /**
+ * Holds if the function returns a `shared_ptr` (or `unique_ptr`) where `T` is an
+ * array type (i.e., `U[]` for some type `U`).
+ */
+ predicate isArray() {
+ this.getTemplateArgument(0).(Type).getUnderlyingType() instanceof ArrayType
+ }
+}
+
+/**
+ * A function that sets the value of a smart pointer.
+ *
+ * This could be a constructor, an assignment operator, or a named member function like `reset()`.
+ */
+private class SmartPtrSetterFunction extends MemberFunction, AliasFunction, SideEffectFunction {
+ SmartPtrSetterFunction() {
+ this.getDeclaringType() instanceof SmartPtr and
+ not this.isStatic() and
+ (
+ this instanceof Constructor
+ or
+ this.hasName("operator=")
+ or
+ this.hasName("reset")
+ )
+ }
+
+ override predicate hasOnlySpecificReadSideEffects() { none() }
+
+ override predicate hasOnlySpecificWriteSideEffects() { none() }
+
+ override predicate hasSpecificWriteSideEffect(ParameterIndex i, boolean buffer, boolean mustWrite) {
+ // Always write to the destination smart pointer itself.
+ i = -1 and buffer = false and mustWrite = true
+ or
+ // When taking ownership of a smart pointer via an rvalue reference, always overwrite the input
+ // smart pointer.
+ getPointerInput().isParameterDeref(i) and
+ this.getParameter(i).getUnspecifiedType() instanceof RValueReferenceType and
+ buffer = false and
+ mustWrite = true
+ }
+
+ override predicate hasSpecificReadSideEffect(ParameterIndex i, boolean buffer) {
+ getPointerInput().isParameterDeref(i) and
+ buffer = false
+ or
+ not this instanceof Constructor and
+ i = -1 and
+ buffer = false
+ }
+
+ override predicate parameterNeverEscapes(int index) { index = -1 }
+
+ override predicate parameterEscapesOnlyViaReturn(int index) { none() }
+
+ override predicate hasAddressFlow(FunctionInput input, FunctionOutput output) {
+ input = getPointerInput() and
+ output.isQualifierObject()
+ or
+ // Assignment operator always returns a reference to `*this`.
+ this.hasName("operator=") and
+ input.isQualifierAddress() and
+ output.isReturnValue()
+ }
+
+ private FunctionInput getPointerInput() {
+ exists(Parameter param0 | param0 = this.getParameter(0) |
+ (
+ param0.getUnspecifiedType().(ReferenceType).getBaseType() instanceof SmartPtr and
+ if this.getParameter(1).getUnspecifiedType() instanceof PointerType
+ then
+ // This is one of the constructors of `std::shared_ptr` that creates a smart pointer that
+ // wraps a raw pointer with ownership controlled by an unrelated smart pointer. We propagate
+ // the raw pointer in the second parameter, rather than the smart pointer in the first
+ // parameter.
+ result.isParameter(1)
+ else result.isParameterDeref(0)
+ or
+ // One of the functions that takes ownership of a raw pointer.
+ param0.getUnspecifiedType() instanceof PointerType and
+ result.isParameter(0)
+ )
+ )
+ }
+}
diff --git a/repo-tests/codeql/cpp/ql/lib/semmle/code/cpp/models/implementations/SqLite3.qll b/repo-tests/codeql/cpp/ql/lib/semmle/code/cpp/models/implementations/SqLite3.qll
new file mode 100644
index 00000000000..d65df9a27ed
--- /dev/null
+++ b/repo-tests/codeql/cpp/ql/lib/semmle/code/cpp/models/implementations/SqLite3.qll
@@ -0,0 +1,21 @@
+/**
+ * Provides implementation classes modeling the SQLite C API.
+ * See `semmle.code.cpp.models.Models` for usage information.
+ */
+
+private import semmle.code.cpp.models.interfaces.Sql
+private import semmle.code.cpp.models.interfaces.FunctionInputsAndOutputs
+
+/**
+ * The `sqlite3_exec` and `sqlite3_prepare` families of functions from the SQLite C API.
+ */
+private class SqLite3ExecutionFunction extends SqlExecutionFunction {
+ SqLite3ExecutionFunction() {
+ this.hasName([
+ "sqlite3_exec", "sqlite3_prepare", "sqlite3_prepare_v2", "sqlite3_prepare_v3",
+ "sqlite3_prepare16", "sqlite3_prepare16_v2", "sqlite3_prepare16_v3"
+ ])
+ }
+
+ override predicate hasSqlArgument(FunctionInput input) { input.isParameterDeref(1) }
+}
diff --git a/repo-tests/codeql/cpp/ql/lib/semmle/code/cpp/models/implementations/Sscanf.qll b/repo-tests/codeql/cpp/ql/lib/semmle/code/cpp/models/implementations/Sscanf.qll
new file mode 100644
index 00000000000..b6120abf05a
--- /dev/null
+++ b/repo-tests/codeql/cpp/ql/lib/semmle/code/cpp/models/implementations/Sscanf.qll
@@ -0,0 +1,72 @@
+/**
+ * Provides implementation classes modeling `sscanf`, `fscanf` and various similar
+ * functions. See `semmle.code.cpp.models.Models` for usage information.
+ */
+
+import semmle.code.cpp.Function
+import semmle.code.cpp.commons.Scanf
+import semmle.code.cpp.models.interfaces.ArrayFunction
+import semmle.code.cpp.models.interfaces.Taint
+import semmle.code.cpp.models.interfaces.Alias
+import semmle.code.cpp.models.interfaces.SideEffect
+
+/**
+ * The standard function `sscanf`, `fscanf` and its assorted variants
+ */
+private class SscanfModel extends ArrayFunction, TaintFunction, AliasFunction, SideEffectFunction {
+ SscanfModel() { this instanceof Sscanf or this instanceof Fscanf or this instanceof Snscanf }
+
+ override predicate hasArrayWithNullTerminator(int bufParam) {
+ bufParam = this.(ScanfFunction).getFormatParameterIndex()
+ or
+ not this instanceof Fscanf and
+ bufParam = this.(ScanfFunction).getInputParameterIndex()
+ }
+
+ override predicate hasArrayInput(int bufParam) { hasArrayWithNullTerminator(bufParam) }
+
+ private int getLengthParameterIndex() { result = this.(Snscanf).getInputLengthParameterIndex() }
+
+ private int getLocaleParameterIndex() {
+ this.getName().matches("%\\_l") and
+ (
+ if exists(getLengthParameterIndex())
+ then result = getLengthParameterIndex() + 2
+ else result = 2
+ )
+ }
+
+ private int getArgsStartPosition() { result = this.getNumberOfParameters() }
+
+ override predicate hasTaintFlow(FunctionInput input, FunctionOutput output) {
+ input.isParameterDeref(this.(ScanfFunction).getInputParameterIndex()) and
+ output.isParameterDeref(any(int i | i >= getArgsStartPosition()))
+ }
+
+ override predicate parameterNeverEscapes(int index) {
+ index = [0 .. max(getACallToThisFunction().getNumberOfArguments())]
+ }
+
+ override predicate parameterEscapesOnlyViaReturn(int index) { none() }
+
+ override predicate parameterIsAlwaysReturned(int index) { none() }
+
+ override predicate hasOnlySpecificReadSideEffects() { any() }
+
+ override predicate hasOnlySpecificWriteSideEffects() { any() }
+
+ override predicate hasSpecificWriteSideEffect(ParameterIndex i, boolean buffer, boolean mustWrite) {
+ i >= getArgsStartPosition() and
+ buffer = true and
+ mustWrite = true
+ }
+
+ override predicate hasSpecificReadSideEffect(ParameterIndex i, boolean buffer) {
+ buffer = true and
+ i =
+ [
+ this.(ScanfFunction).getInputParameterIndex(),
+ this.(ScanfFunction).getFormatParameterIndex(), getLocaleParameterIndex()
+ ]
+ }
+}
diff --git a/repo-tests/codeql/cpp/ql/lib/semmle/code/cpp/models/implementations/StdContainer.qll b/repo-tests/codeql/cpp/ql/lib/semmle/code/cpp/models/implementations/StdContainer.qll
new file mode 100644
index 00000000000..367db1613fc
--- /dev/null
+++ b/repo-tests/codeql/cpp/ql/lib/semmle/code/cpp/models/implementations/StdContainer.qll
@@ -0,0 +1,269 @@
+/**
+ * Provides models for C++ containers `std::array`, `std::vector`, `std::deque`, `std::list` and `std::forward_list`.
+ */
+
+import semmle.code.cpp.models.interfaces.Taint
+import semmle.code.cpp.models.interfaces.Iterator
+
+/**
+ * The `std::array` template class.
+ */
+private class Array extends Class {
+ Array() { this.hasQualifiedName(["std", "bsl"], "array") }
+}
+
+/**
+ * The `std::deque` template class.
+ */
+private class Deque extends Class {
+ Deque() { this.hasQualifiedName(["std", "bsl"], "deque") }
+}
+
+/**
+ * The `std::forward_list` template class.
+ */
+private class ForwardList extends Class {
+ ForwardList() { this.hasQualifiedName(["std", "bsl"], "forward_list") }
+}
+
+/**
+ * The `std::list` template class.
+ */
+private class List extends Class {
+ List() { this.hasQualifiedName(["std", "bsl"], "list") }
+}
+
+/**
+ * The `std::vector` template class.
+ */
+private class Vector extends Class {
+ Vector() { this.hasQualifiedName(["std", "bsl"], "vector") }
+}
+
+/**
+ * Additional model for standard container constructors that reference the
+ * value type of the container (that is, the `T` in `std::vector`). For
+ * example the fill constructor:
+ * ```
+ * std::vector v(100, potentially_tainted_string);
+ * ```
+ */
+private class StdSequenceContainerConstructor extends Constructor, TaintFunction {
+ StdSequenceContainerConstructor() {
+ this.getDeclaringType() instanceof Vector or
+ this.getDeclaringType() instanceof Deque or
+ this.getDeclaringType() instanceof List or
+ this.getDeclaringType() instanceof ForwardList
+ }
+
+ /**
+ * Gets the index of a parameter to this function that is a reference to the
+ * value type of the container.
+ */
+ int getAValueTypeParameterIndex() {
+ getParameter(result).getUnspecifiedType().(ReferenceType).getBaseType() =
+ getDeclaringType().getTemplateArgument(0).(Type).getUnspecifiedType() // i.e. the `T` of this `std::vector`
+ }
+
+ /**
+ * Gets the index of a parameter to this function that is an iterator.
+ */
+ int getAnIteratorParameterIndex() { getParameter(result).getType() instanceof Iterator }
+
+ override predicate hasTaintFlow(FunctionInput input, FunctionOutput output) {
+ // taint flow from any parameter of the value type to the returned object
+ (
+ input.isParameterDeref(getAValueTypeParameterIndex()) or
+ input.isParameter(getAnIteratorParameterIndex())
+ ) and
+ (
+ output.isReturnValue() // TODO: this is only needed for AST data flow, which treats constructors as returning the new object
+ or
+ output.isQualifierObject()
+ )
+ }
+}
+
+/**
+ * The standard container function `data`.
+ */
+private class StdSequenceContainerData extends TaintFunction {
+ StdSequenceContainerData() {
+ this.getClassAndName("data") instanceof Array or
+ this.getClassAndName("data") instanceof Vector
+ }
+
+ override predicate hasTaintFlow(FunctionInput input, FunctionOutput output) {
+ // flow from container itself (qualifier) to return value
+ input.isQualifierObject() and
+ output.isReturnValueDeref()
+ or
+ // reverse flow from returned reference to the qualifier (for writes to
+ // `data`)
+ input.isReturnValueDeref() and
+ output.isQualifierObject()
+ }
+}
+
+/**
+ * The standard container functions `push_back` and `push_front`.
+ */
+private class StdSequenceContainerPush extends TaintFunction {
+ StdSequenceContainerPush() {
+ this.getClassAndName("push_back") instanceof Vector or
+ this.getClassAndName(["push_back", "push_front"]) instanceof Deque or
+ this.getClassAndName("push_front") instanceof ForwardList or
+ this.getClassAndName(["push_back", "push_front"]) instanceof List
+ }
+
+ override predicate hasTaintFlow(FunctionInput input, FunctionOutput output) {
+ // flow from parameter to qualifier
+ input.isParameterDeref(0) and
+ output.isQualifierObject()
+ }
+}
+
+/**
+ * The standard container functions `front` and `back`.
+ */
+private class StdSequenceContainerFrontBack extends TaintFunction {
+ StdSequenceContainerFrontBack() {
+ this.getClassAndName(["front", "back"]) instanceof Array or
+ this.getClassAndName(["front", "back"]) instanceof Deque or
+ this.getClassAndName("front") instanceof ForwardList or
+ this.getClassAndName(["front", "back"]) instanceof List or
+ this.getClassAndName(["front", "back"]) instanceof Vector
+ }
+
+ override predicate hasTaintFlow(FunctionInput input, FunctionOutput output) {
+ // flow from object to returned reference
+ input.isQualifierObject() and
+ output.isReturnValueDeref()
+ }
+}
+
+/**
+ * The standard container functions `insert` and `insert_after`.
+ */
+private class StdSequenceContainerInsert extends TaintFunction {
+ StdSequenceContainerInsert() {
+ this.getClassAndName("insert") instanceof Deque or
+ this.getClassAndName("insert") instanceof List or
+ this.getClassAndName("insert") instanceof Vector or
+ this.getClassAndName("insert_after") instanceof ForwardList
+ }
+
+ /**
+ * Gets the index of a parameter to this function that is a reference to the
+ * value type of the container.
+ */
+ int getAValueTypeParameterIndex() {
+ getParameter(result).getUnspecifiedType().(ReferenceType).getBaseType() =
+ getDeclaringType().getTemplateArgument(0).(Type).getUnspecifiedType() // i.e. the `T` of this `std::vector`
+ }
+
+ /**
+ * Gets the index of a parameter to this function that is an iterator.
+ */
+ int getAnIteratorParameterIndex() { getParameter(result).getType() instanceof Iterator }
+
+ override predicate hasTaintFlow(FunctionInput input, FunctionOutput output) {
+ // flow from parameter to container itself (qualifier) and return value
+ (
+ input.isQualifierObject() or
+ input.isParameterDeref(getAValueTypeParameterIndex()) or
+ input.isParameter(getAnIteratorParameterIndex())
+ ) and
+ (
+ output.isQualifierObject() or
+ output.isReturnValueDeref()
+ )
+ }
+}
+
+/**
+ * The standard container function `assign`.
+ */
+private class StdSequenceContainerAssign extends TaintFunction {
+ StdSequenceContainerAssign() {
+ this.getClassAndName("assign") instanceof Deque or
+ this.getClassAndName("assign") instanceof ForwardList or
+ this.getClassAndName("assign") instanceof List or
+ this.getClassAndName("assign") instanceof Vector
+ }
+
+ /**
+ * Gets the index of a parameter to this function that is a reference to the
+ * value type of the container.
+ */
+ int getAValueTypeParameterIndex() {
+ getParameter(result).getUnspecifiedType().(ReferenceType).getBaseType() =
+ getDeclaringType().getTemplateArgument(0).(Type).getUnspecifiedType() // i.e. the `T` of this `std::vector`
+ }
+
+ /**
+ * Gets the index of a parameter to this function that is an iterator.
+ */
+ int getAnIteratorParameterIndex() { getParameter(result).getType() instanceof Iterator }
+
+ override predicate hasTaintFlow(FunctionInput input, FunctionOutput output) {
+ // flow from parameter to container itself (qualifier)
+ (
+ input.isParameterDeref(getAValueTypeParameterIndex()) or
+ input.isParameter(getAnIteratorParameterIndex())
+ ) and
+ output.isQualifierObject()
+ }
+}
+
+/**
+ * The standard container functions `at` and `operator[]`.
+ */
+private class StdSequenceContainerAt extends TaintFunction {
+ StdSequenceContainerAt() {
+ this.getClassAndName(["at", "operator[]"]) instanceof Array or
+ this.getClassAndName(["at", "operator[]"]) instanceof Deque or
+ this.getClassAndName(["at", "operator[]"]) instanceof Vector
+ }
+
+ override predicate hasTaintFlow(FunctionInput input, FunctionOutput output) {
+ // flow from qualifier to referenced return value
+ input.isQualifierObject() and
+ output.isReturnValueDeref()
+ or
+ // reverse flow from returned reference to the qualifier
+ input.isReturnValueDeref() and
+ output.isQualifierObject()
+ }
+}
+
+/**
+ * The standard vector `emplace` function.
+ */
+class StdVectorEmplace extends TaintFunction {
+ StdVectorEmplace() { this.getClassAndName("emplace") instanceof Vector }
+
+ override predicate hasTaintFlow(FunctionInput input, FunctionOutput output) {
+ // flow from any parameter except the position iterator to qualifier and return value
+ // (here we assume taint flow from any constructor parameter to the constructed object)
+ input.isParameterDeref([1 .. getNumberOfParameters() - 1]) and
+ (
+ output.isQualifierObject() or
+ output.isReturnValue()
+ )
+ }
+}
+
+/**
+ * The standard vector `emplace_back` function.
+ */
+class StdVectorEmplaceBack extends TaintFunction {
+ StdVectorEmplaceBack() { this.getClassAndName("emplace_back") instanceof Vector }
+
+ override predicate hasTaintFlow(FunctionInput input, FunctionOutput output) {
+ // flow from any parameter to qualifier
+ // (here we assume taint flow from any constructor parameter to the constructed object)
+ input.isParameterDeref([0 .. getNumberOfParameters() - 1]) and
+ output.isQualifierObject()
+ }
+}
diff --git a/repo-tests/codeql/cpp/ql/lib/semmle/code/cpp/models/implementations/StdMap.qll b/repo-tests/codeql/cpp/ql/lib/semmle/code/cpp/models/implementations/StdMap.qll
new file mode 100644
index 00000000000..aecd98981e8
--- /dev/null
+++ b/repo-tests/codeql/cpp/ql/lib/semmle/code/cpp/models/implementations/StdMap.qll
@@ -0,0 +1,176 @@
+/**
+ * Provides models for C++ containers `std::map` and `std::unordered_map`.
+ */
+
+import semmle.code.cpp.models.interfaces.Taint
+import semmle.code.cpp.models.interfaces.Iterator
+
+/**
+ * The `std::map` and `std::unordered_map` template classes.
+ */
+private class MapOrUnorderedMap extends Class {
+ MapOrUnorderedMap() { this.hasQualifiedName(["std", "bsl"], ["map", "unordered_map"]) }
+}
+
+/**
+ * Additional model for map constructors using iterator inputs.
+ */
+private class StdMapConstructor extends Constructor, TaintFunction {
+ StdMapConstructor() { this.getDeclaringType() instanceof MapOrUnorderedMap }
+
+ /**
+ * Gets the index of a parameter to this function that is an iterator.
+ */
+ int getAnIteratorParameterIndex() {
+ getParameter(result).getUnspecifiedType() instanceof Iterator
+ }
+
+ override predicate hasTaintFlow(FunctionInput input, FunctionOutput output) {
+ // taint flow from any parameter of an iterator type to the qualifier
+ input.isParameterDeref(getAnIteratorParameterIndex()) and
+ (
+ output.isReturnValue() // TODO: this is only needed for AST data flow, which treats constructors as returning the new object
+ or
+ output.isQualifierObject()
+ )
+ }
+}
+
+/**
+ * The standard map `insert` and `insert_or_assign` functions.
+ */
+private class StdMapInsert extends TaintFunction {
+ StdMapInsert() {
+ this.getClassAndName(["insert", "insert_or_assign"]) instanceof MapOrUnorderedMap
+ }
+
+ override predicate hasTaintFlow(FunctionInput input, FunctionOutput output) {
+ // flow from last parameter to qualifier and return value
+ // (where the return value is a pair, this should really flow just to the first part of it)
+ input.isParameterDeref(getNumberOfParameters() - 1) and
+ (
+ output.isQualifierObject() or
+ output.isReturnValue()
+ )
+ }
+}
+
+/**
+ * The standard map `emplace` and `emplace_hint` functions.
+ */
+private class StdMapEmplace extends TaintFunction {
+ StdMapEmplace() { this.getClassAndName(["emplace", "emplace_hint"]) instanceof MapOrUnorderedMap }
+
+ override predicate hasTaintFlow(FunctionInput input, FunctionOutput output) {
+ // flow from the last parameter (which may be the value part used to
+ // construct a pair, or a pair to be copied / moved) to the qualifier and
+ // return value.
+ // (where the return value is a pair, this should really flow just to the first part of it)
+ input.isParameterDeref(getNumberOfParameters() - 1) and
+ (
+ output.isQualifierObject() or
+ output.isReturnValue()
+ )
+ or
+ input.isQualifierObject() and
+ output.isReturnValue()
+ }
+}
+
+/**
+ * The standard map `try_emplace` function.
+ */
+private class StdMapTryEmplace extends TaintFunction {
+ StdMapTryEmplace() { this.getClassAndName("try_emplace") instanceof MapOrUnorderedMap }
+
+ override predicate hasTaintFlow(FunctionInput input, FunctionOutput output) {
+ // flow from any parameter apart from the key to qualifier and return value
+ // (here we assume taint flow from any constructor parameter to the constructed object)
+ // (where the return value is a pair, this should really flow just to the first part of it)
+ exists(int arg | arg = [1 .. getNumberOfParameters() - 1] |
+ (
+ not getUnspecifiedType() instanceof Iterator or
+ arg != 1
+ ) and
+ input.isParameterDeref(arg)
+ ) and
+ (
+ output.isQualifierObject() or
+ output.isReturnValue()
+ )
+ or
+ input.isQualifierObject() and
+ output.isReturnValue()
+ }
+}
+
+/**
+ * The standard map `merge` function.
+ */
+private class StdMapMerge extends TaintFunction {
+ StdMapMerge() { this.getClassAndName("merge") instanceof MapOrUnorderedMap }
+
+ override predicate hasTaintFlow(FunctionInput input, FunctionOutput output) {
+ // container1.merge(container2)
+ input.isParameterDeref(0) and
+ output.isQualifierObject()
+ }
+}
+
+/**
+ * The standard map functions `at` and `operator[]`.
+ */
+private class StdMapAt extends TaintFunction {
+ StdMapAt() { this.getClassAndName(["at", "operator[]"]) instanceof MapOrUnorderedMap }
+
+ override predicate hasTaintFlow(FunctionInput input, FunctionOutput output) {
+ // flow from qualifier to referenced return value
+ input.isQualifierObject() and
+ output.isReturnValueDeref()
+ or
+ // reverse flow from returned reference to the qualifier
+ input.isReturnValueDeref() and
+ output.isQualifierObject()
+ }
+}
+
+/**
+ * The standard map `find` function.
+ */
+private class StdMapFind extends TaintFunction {
+ StdMapFind() { this.getClassAndName("find") instanceof MapOrUnorderedMap }
+
+ override predicate hasTaintFlow(FunctionInput input, FunctionOutput output) {
+ input.isQualifierObject() and
+ output.isReturnValue()
+ }
+}
+
+/**
+ * The standard map `erase` function.
+ */
+private class StdMapErase extends TaintFunction {
+ StdMapErase() { this.getClassAndName("erase") instanceof MapOrUnorderedMap }
+
+ override predicate hasTaintFlow(FunctionInput input, FunctionOutput output) {
+ // flow from qualifier to iterator return value
+ getType().getUnderlyingType() instanceof Iterator and
+ input.isQualifierObject() and
+ output.isReturnValue()
+ }
+}
+
+/**
+ * The standard map `lower_bound`, `upper_bound` and `equal_range` functions.
+ */
+private class StdMapEqualRange extends TaintFunction {
+ StdMapEqualRange() {
+ this.getClassAndName(["lower_bound", "upper_bound", "equal_range"]) instanceof MapOrUnorderedMap
+ }
+
+ override predicate hasTaintFlow(FunctionInput input, FunctionOutput output) {
+ // flow from qualifier to return value
+ input.isQualifierObject() and
+ output.isReturnValue()
+ }
+}
diff --git a/repo-tests/codeql/cpp/ql/lib/semmle/code/cpp/models/implementations/StdPair.qll b/repo-tests/codeql/cpp/ql/lib/semmle/code/cpp/models/implementations/StdPair.qll
new file mode 100644
index 00000000000..755f6a48520
--- /dev/null
+++ b/repo-tests/codeql/cpp/ql/lib/semmle/code/cpp/models/implementations/StdPair.qll
@@ -0,0 +1,68 @@
+/**
+ * Provides models for the C++ `std::pair` class.
+ */
+
+import semmle.code.cpp.models.interfaces.Taint
+
+/**
+ * An instantiation of `std::pair`.
+ */
+private class StdPair extends ClassTemplateInstantiation {
+ StdPair() { this.hasQualifiedName(["std", "bsl"], "pair") }
+}
+
+/**
+ * DEPRECATED: This is now called `StdPair` and is a private part of the
+ * library implementation.
+ */
+deprecated class StdPairClass = StdPair;
+
+/**
+ * Any of the single-parameter constructors of `std::pair` that takes a reference to an
+ * instantiation of `std::pair`. These constructors allow conversion between pair types when the
+ * underlying element types are convertible.
+ */
+class StdPairCopyishConstructor extends Constructor, TaintFunction {
+ StdPairCopyishConstructor() {
+ this.getDeclaringType() instanceof StdPair and
+ this.getNumberOfParameters() = 1 and
+ this.getParameter(0).getUnspecifiedType().(ReferenceType).getBaseType() instanceof StdPair
+ }
+
+ override predicate hasTaintFlow(FunctionInput input, FunctionOutput output) {
+ // taint flow from the source object to the constructed object
+ input.isParameterDeref(0) and
+ (
+ output.isReturnValue()
+ or
+ output.isQualifierObject()
+ )
+ }
+}
+
+/**
+ * Additional model for `std::pair` constructors.
+ */
+private class StdPairConstructor extends Constructor, TaintFunction {
+ StdPairConstructor() { this.getDeclaringType() instanceof StdPair }
+
+ /**
+ * Gets the index of a parameter to this function that is a reference to
+ * either value type of the pair.
+ */
+ int getAValueTypeParameterIndex() {
+ getParameter(result).getUnspecifiedType().(ReferenceType).getBaseType() =
+ getDeclaringType().getTemplateArgument(_).(Type).getUnspecifiedType() // i.e. the `T1` or `T2` of this `std::pair`
+ }
+
+ override predicate hasTaintFlow(FunctionInput input, FunctionOutput output) {
+ // taint flow from second parameter of a value type to the qualifier
+ getAValueTypeParameterIndex() = 1 and
+ input.isParameterDeref(1) and
+ (
+ output.isReturnValue() // TODO: this is only needed for AST data flow, which treats constructors as returning the new object
+ or
+ output.isQualifierObject()
+ )
+ }
+}
diff --git a/repo-tests/codeql/cpp/ql/lib/semmle/code/cpp/models/implementations/StdSet.qll b/repo-tests/codeql/cpp/ql/lib/semmle/code/cpp/models/implementations/StdSet.qll
new file mode 100644
index 00000000000..d2e9892abcb
--- /dev/null
+++ b/repo-tests/codeql/cpp/ql/lib/semmle/code/cpp/models/implementations/StdSet.qll
@@ -0,0 +1,129 @@
+/**
+ * Provides models for C++ containers `std::set` and `std::unordered_set`.
+ */
+
+import semmle.code.cpp.models.interfaces.Taint
+import semmle.code.cpp.models.interfaces.Iterator
+
+/**
+ * An instantiation of `std::set` or `std::unordered_set`.
+ */
+private class StdSet extends ClassTemplateInstantiation {
+ StdSet() { this.hasQualifiedName(["std", "bsl"], ["set", "unordered_set"]) }
+}
+
+/**
+ * Additional model for set constructors using iterator inputs.
+ */
+private class StdSetConstructor extends Constructor, TaintFunction {
+ StdSetConstructor() { this.getDeclaringType() instanceof StdSet }
+
+ /**
+ * Gets the index of a parameter to this function that is an iterator.
+ */
+ int getAnIteratorParameterIndex() {
+ getParameter(result).getUnspecifiedType() instanceof Iterator
+ }
+
+ override predicate hasTaintFlow(FunctionInput input, FunctionOutput output) {
+ // taint flow from any parameter of an iterator type to the qualifier
+ input.isParameterDeref(getAnIteratorParameterIndex()) and
+ (
+ output.isReturnValue() // TODO: this is only needed for AST data flow, which treats constructors as returning the new object
+ or
+ output.isQualifierObject()
+ )
+ }
+}
+
+/**
+ * The standard set `insert` and `insert_or_assign` functions.
+ */
+private class StdSetInsert extends TaintFunction {
+ StdSetInsert() { this.getClassAndName("insert") instanceof StdSet }
+
+ override predicate hasTaintFlow(FunctionInput input, FunctionOutput output) {
+ // flow from last parameter to qualifier and return value
+ // (where the return value is a pair, this should really flow just to the first part of it)
+ input.isParameterDeref(getNumberOfParameters() - 1) and
+ (
+ output.isQualifierObject() or
+ output.isReturnValue()
+ )
+ }
+}
+
+/**
+ * The standard set `emplace` and `emplace_hint` functions.
+ */
+private class StdSetEmplace extends TaintFunction {
+ StdSetEmplace() { this.getClassAndName(["emplace", "emplace_hint"]) instanceof StdSet }
+
+ override predicate hasTaintFlow(FunctionInput input, FunctionOutput output) {
+ // flow from any parameter to qualifier and return value
+ // (here we assume taint flow from any constructor parameter to the constructed object)
+ // (where the return value is a pair, this should really flow just to the first part of it)
+ input.isParameterDeref([0 .. getNumberOfParameters() - 1]) and
+ (
+ output.isQualifierObject() or
+ output.isReturnValue()
+ )
+ or
+ input.isQualifierObject() and
+ output.isReturnValue()
+ }
+}
+
+/**
+ * The standard set `merge` function.
+ */
+private class StdSetMerge extends TaintFunction {
+ StdSetMerge() { this.getClassAndName("merge") instanceof StdSet }
+
+ override predicate hasTaintFlow(FunctionInput input, FunctionOutput output) {
+ // container1.merge(container2)
+ input.isParameterDeref(0) and
+ output.isQualifierObject()
+ }
+}
+
+/**
+ * The standard set `find` function.
+ */
+private class StdSetFind extends TaintFunction {
+ StdSetFind() { this.getClassAndName("find") instanceof StdSet }
+
+ override predicate hasTaintFlow(FunctionInput input, FunctionOutput output) {
+ input.isQualifierObject() and
+ output.isReturnValue()
+ }
+}
+
+/**
+ * The standard set `erase` function.
+ */
+private class StdSetErase extends TaintFunction {
+ StdSetErase() { this.getClassAndName("erase") instanceof StdSet }
+
+ override predicate hasTaintFlow(FunctionInput input, FunctionOutput output) {
+ // flow from qualifier to iterator return value
+ getType().getUnderlyingType() instanceof Iterator and
+ input.isQualifierObject() and
+ output.isReturnValue()
+ }
+}
+
+/**
+ * The standard set `lower_bound`, `upper_bound` and `equal_range` functions.
+ */
+private class StdSetEqualRange extends TaintFunction {
+ StdSetEqualRange() {
+ this.getClassAndName(["lower_bound", "upper_bound", "equal_range"]) instanceof StdSet
+ }
+
+ override predicate hasTaintFlow(FunctionInput input, FunctionOutput output) {
+ // flow from qualifier to return value
+ input.isQualifierObject() and
+ output.isReturnValue()
+ }
+}
diff --git a/repo-tests/codeql/cpp/ql/lib/semmle/code/cpp/models/implementations/StdString.qll b/repo-tests/codeql/cpp/ql/lib/semmle/code/cpp/models/implementations/StdString.qll
new file mode 100644
index 00000000000..73a0f6edf26
--- /dev/null
+++ b/repo-tests/codeql/cpp/ql/lib/semmle/code/cpp/models/implementations/StdString.qll
@@ -0,0 +1,642 @@
+/**
+ * Provides implementation classes modeling `std::string` (and other
+ * instantiations of `std::basic_string`) and `std::ostream`. See
+ * `semmle.code.cpp.models.Models` for usage information.
+ */
+
+import semmle.code.cpp.models.interfaces.Taint
+import semmle.code.cpp.models.interfaces.Iterator
+import semmle.code.cpp.models.interfaces.DataFlow
+
+/**
+ * The `std::basic_string` template class instantiations.
+ */
+private class StdBasicString extends ClassTemplateInstantiation {
+ StdBasicString() { this.hasQualifiedName(["std", "bsl"], "basic_string") }
+}
+
+/**
+ * Additional model for `std::string` constructors that reference the character
+ * type of the container, or an iterator. For example construction from
+ * iterators:
+ * ```
+ * std::string b(a.begin(), a.end());
+ * ```
+ */
+private class StdStringConstructor extends Constructor, TaintFunction {
+ StdStringConstructor() { this.getDeclaringType() instanceof StdBasicString }
+
+ /**
+ * Gets the index of a parameter to this function that is a string (or
+ * character).
+ */
+ int getAStringParameterIndex() {
+ exists(Type paramType | paramType = getParameter(result).getUnspecifiedType() |
+ // e.g. `std::basic_string::CharT *`
+ paramType instanceof PointerType
+ or
+ // e.g. `std::basic_string &`, avoiding `const Allocator&`
+ paramType instanceof ReferenceType and
+ not paramType.(ReferenceType).getBaseType() =
+ getDeclaringType().getTemplateArgument(2).(Type).getUnspecifiedType()
+ or
+ // i.e. `std::basic_string::CharT`
+ getParameter(result).getUnspecifiedType() =
+ getDeclaringType().getTemplateArgument(0).(Type).getUnspecifiedType()
+ )
+ }
+
+ /**
+ * Gets the index of a parameter to this function that is an iterator.
+ */
+ int getAnIteratorParameterIndex() { getParameter(result).getType() instanceof Iterator }
+
+ override predicate hasTaintFlow(FunctionInput input, FunctionOutput output) {
+ // taint flow from any parameter of the value type to the returned object
+ (
+ input.isParameterDeref(getAStringParameterIndex()) or
+ input.isParameter(getAnIteratorParameterIndex())
+ ) and
+ (
+ output.isReturnValue() // TODO: this is only needed for AST data flow, which treats constructors as returning the new object
+ or
+ output.isQualifierObject()
+ )
+ }
+}
+
+/**
+ * The `std::string` function `c_str`.
+ */
+private class StdStringCStr extends TaintFunction {
+ StdStringCStr() { this.getClassAndName("c_str") instanceof StdBasicString }
+
+ override predicate hasTaintFlow(FunctionInput input, FunctionOutput output) {
+ // flow from string itself (qualifier) to return value
+ input.isQualifierObject() and
+ output.isReturnValueDeref()
+ }
+}
+
+/**
+ * The `std::string` function `data`.
+ */
+private class StdStringData extends TaintFunction {
+ StdStringData() { this.getClassAndName("data") instanceof StdBasicString }
+
+ override predicate hasTaintFlow(FunctionInput input, FunctionOutput output) {
+ // flow from string itself (qualifier) to return value
+ input.isQualifierObject() and
+ output.isReturnValueDeref()
+ or
+ // reverse flow from returned reference to the qualifier (for writes to
+ // `data`)
+ input.isReturnValueDeref() and
+ output.isQualifierObject()
+ }
+}
+
+/**
+ * The `std::string` function `push_back`.
+ */
+private class StdStringPush extends TaintFunction {
+ StdStringPush() { this.getClassAndName("push_back") instanceof StdBasicString }
+
+ override predicate hasTaintFlow(FunctionInput input, FunctionOutput output) {
+ // flow from parameter to qualifier
+ input.isParameterDeref(0) and
+ output.isQualifierObject()
+ }
+}
+
+/**
+ * The `std::string` functions `front` and `back`.
+ */
+private class StdStringFrontBack extends TaintFunction {
+ StdStringFrontBack() { this.getClassAndName(["front", "back"]) instanceof StdBasicString }
+
+ override predicate hasTaintFlow(FunctionInput input, FunctionOutput output) {
+ // flow from object to returned reference
+ input.isQualifierObject() and
+ output.isReturnValueDeref()
+ }
+}
+
+/**
+ * The (non-member) `std::string` function `operator+`.
+ */
+private class StdStringPlus extends TaintFunction {
+ StdStringPlus() {
+ this.hasQualifiedName(["std", "bsl"], "operator+") and
+ this.getUnspecifiedType() instanceof StdBasicString
+ }
+
+ override predicate hasTaintFlow(FunctionInput input, FunctionOutput output) {
+ // flow from parameters to return value
+ (
+ input.isParameterDeref(0) or
+ input.isParameterDeref(1)
+ ) and
+ output.isReturnValue()
+ }
+}
+
+/**
+ * The `std::string` functions `operator+=`, `append`, `insert` and
+ * `replace`. All of these functions combine the existing string
+ * with a new string (or character) from one of the arguments.
+ */
+private class StdStringAppend extends TaintFunction {
+ StdStringAppend() {
+ this.getClassAndName(["operator+=", "append", "insert", "replace"]) instanceof StdBasicString
+ }
+
+ /**
+ * Gets the index of a parameter to this function that is a string (or
+ * character).
+ */
+ int getAStringParameterIndex() {
+ getParameter(result).getType() instanceof PointerType or // e.g. `std::basic_string::CharT *`
+ getParameter(result).getType() instanceof ReferenceType or // e.g. `std::basic_string &`
+ getParameter(result).getUnspecifiedType() =
+ getDeclaringType().getTemplateArgument(0).(Type).getUnspecifiedType() // i.e. `std::basic_string::CharT`
+ }
+
+ /**
+ * Gets the index of a parameter to this function that is an iterator.
+ */
+ int getAnIteratorParameterIndex() { getParameter(result).getType() instanceof Iterator }
+
+ override predicate hasTaintFlow(FunctionInput input, FunctionOutput output) {
+ // flow from string and parameter to string (qualifier) and return value
+ (
+ input.isQualifierObject() or
+ input.isParameterDeref(getAStringParameterIndex()) or
+ input.isParameter(getAnIteratorParameterIndex())
+ ) and
+ (
+ output.isQualifierObject() or
+ output.isReturnValueDeref()
+ )
+ or
+ // reverse flow from returned reference to the qualifier (for writes to
+ // the result)
+ input.isReturnValueDeref() and
+ output.isQualifierObject()
+ }
+}
+
+/**
+ * The standard function `std::string.assign`.
+ */
+private class StdStringAssign extends TaintFunction {
+ StdStringAssign() { this.getClassAndName("assign") instanceof StdBasicString }
+
+ /**
+ * Gets the index of a parameter to this function that is a string (or
+ * character).
+ */
+ int getAStringParameterIndex() {
+ getParameter(result).getType() instanceof PointerType or // e.g. `std::basic_string::CharT *`
+ getParameter(result).getType() instanceof ReferenceType or // e.g. `std::basic_string &`
+ getParameter(result).getUnspecifiedType() =
+ getDeclaringType().getTemplateArgument(0).(Type).getUnspecifiedType() // i.e. `std::basic_string::CharT`
+ }
+
+ /**
+ * Gets the index of a parameter to this function that is an iterator.
+ */
+ int getAnIteratorParameterIndex() { getParameter(result).getType() instanceof Iterator }
+
+ override predicate hasTaintFlow(FunctionInput input, FunctionOutput output) {
+ // flow from parameter to string itself (qualifier) and return value
+ (
+ input.isParameterDeref(getAStringParameterIndex()) or
+ input.isParameter(getAnIteratorParameterIndex())
+ ) and
+ (
+ output.isQualifierObject() or
+ output.isReturnValueDeref()
+ )
+ or
+ // reverse flow from returned reference to the qualifier (for writes to
+ // the result)
+ input.isReturnValueDeref() and
+ output.isQualifierObject()
+ }
+}
+
+/**
+ * The standard function `std::string.copy`.
+ */
+private class StdStringCopy extends TaintFunction {
+ StdStringCopy() { this.getClassAndName("copy") instanceof StdBasicString }
+
+ override predicate hasTaintFlow(FunctionInput input, FunctionOutput output) {
+ // copy(dest, num, pos)
+ input.isQualifierObject() and
+ output.isParameterDeref(0)
+ }
+}
+
+/**
+ * The standard function `std::string.substr`.
+ */
+private class StdStringSubstr extends TaintFunction {
+ StdStringSubstr() { this.getClassAndName("substr") instanceof StdBasicString }
+
+ override predicate hasTaintFlow(FunctionInput input, FunctionOutput output) {
+ // substr(pos, num)
+ input.isQualifierObject() and
+ output.isReturnValue()
+ }
+}
+
+/**
+ * The `std::string` functions `at` and `operator[]`.
+ */
+private class StdStringAt extends TaintFunction {
+ StdStringAt() { this.getClassAndName(["at", "operator[]"]) instanceof StdBasicString }
+
+ override predicate hasTaintFlow(FunctionInput input, FunctionOutput output) {
+ // flow from qualifier to referenced return value
+ input.isQualifierObject() and
+ output.isReturnValueDeref()
+ or
+ // reverse flow from returned reference to the qualifier
+ input.isReturnValueDeref() and
+ output.isQualifierObject()
+ }
+}
+
+/**
+ * The `std::basic_istream` template class instantiations.
+ */
+private class StdBasicIStream extends ClassTemplateInstantiation {
+ StdBasicIStream() { this.hasQualifiedName(["std", "bsl"], "basic_istream") }
+}
+
+/**
+ * The `std::istream` function `operator>>` (defined as a member function).
+ */
+private class StdIStreamIn extends DataFlowFunction, TaintFunction {
+ StdIStreamIn() { this.getClassAndName("operator>>") instanceof StdBasicIStream }
+
+ override predicate hasDataFlow(FunctionInput input, FunctionOutput output) {
+ // returns reference to `*this`
+ input.isQualifierAddress() and
+ output.isReturnValue()
+ or
+ input.isQualifierObject() and
+ output.isReturnValueDeref()
+ }
+
+ override predicate hasTaintFlow(FunctionInput input, FunctionOutput output) {
+ // flow from qualifier to first parameter
+ input.isQualifierObject() and
+ output.isParameterDeref(0)
+ or
+ // reverse flow from returned reference to the qualifier
+ input.isReturnValueDeref() and
+ output.isQualifierObject()
+ }
+}
+
+/**
+ * The `std::istream` function `operator>>` (defined as a non-member function).
+ */
+private class StdIStreamInNonMember extends DataFlowFunction, TaintFunction {
+ StdIStreamInNonMember() {
+ this.hasQualifiedName(["std", "bsl"], "operator>>") and
+ this.getUnspecifiedType().(ReferenceType).getBaseType() instanceof StdBasicIStream
+ }
+
+ override predicate hasDataFlow(FunctionInput input, FunctionOutput output) {
+ // flow from first parameter to return value
+ input.isParameter(0) and
+ output.isReturnValue()
+ or
+ input.isParameterDeref(0) and
+ output.isReturnValueDeref()
+ }
+
+ override predicate hasTaintFlow(FunctionInput input, FunctionOutput output) {
+ // flow from first parameter to second parameter
+ input.isParameterDeref(0) and
+ output.isParameterDeref(1)
+ or
+ // reverse flow from returned reference to the first parameter
+ input.isReturnValueDeref() and
+ output.isParameterDeref(0)
+ }
+}
+
+/**
+ * The `std::istream` functions `get` (without parameters) and `peek`.
+ */
+private class StdIStreamGet extends TaintFunction {
+ StdIStreamGet() {
+ this.getClassAndName(["get", "peek"]) instanceof StdBasicIStream and
+ this.getNumberOfParameters() = 0
+ }
+
+ override predicate hasTaintFlow(FunctionInput input, FunctionOutput output) {
+ // flow from qualifier to return value
+ input.isQualifierObject() and
+ output.isReturnValue()
+ }
+}
+
+/**
+ * The `std::istream` functions `get` (with parameters) and `read`.
+ */
+private class StdIStreamRead extends DataFlowFunction, TaintFunction {
+ StdIStreamRead() {
+ this.getClassAndName(["get", "read"]) instanceof StdBasicIStream and
+ this.getNumberOfParameters() > 0
+ }
+
+ override predicate hasDataFlow(FunctionInput input, FunctionOutput output) {
+ // returns reference to `*this`
+ input.isQualifierAddress() and
+ output.isReturnValue()
+ or
+ input.isQualifierObject() and
+ output.isReturnValueDeref()
+ }
+
+ override predicate hasTaintFlow(FunctionInput input, FunctionOutput output) {
+ // flow from qualifier to first parameter
+ input.isQualifierObject() and
+ output.isParameterDeref(0)
+ or
+ // reverse flow from returned reference to the qualifier
+ input.isReturnValueDeref() and
+ output.isQualifierObject()
+ }
+}
+
+/**
+ * The `std::istream` function `readsome`.
+ */
+private class StdIStreamReadSome extends TaintFunction {
+ StdIStreamReadSome() { this.getClassAndName("readsome") instanceof StdBasicIStream }
+
+ override predicate hasTaintFlow(FunctionInput input, FunctionOutput output) {
+ // flow from qualifier to first parameter
+ input.isQualifierObject() and
+ output.isParameterDeref(0)
+ }
+}
+
+/**
+ * The `std::istream` function `putback`.
+ */
+private class StdIStreamPutBack extends DataFlowFunction, TaintFunction {
+ StdIStreamPutBack() { this.getClassAndName("putback") instanceof StdBasicIStream }
+
+ override predicate hasDataFlow(FunctionInput input, FunctionOutput output) {
+ // returns reference to `*this`
+ input.isQualifierAddress() and
+ output.isReturnValue()
+ or
+ input.isQualifierObject() and
+ output.isReturnValueDeref()
+ }
+
+ override predicate hasTaintFlow(FunctionInput input, FunctionOutput output) {
+ // flow from first parameter (value or pointer) to qualifier
+ input.isParameter(0) and
+ output.isQualifierObject()
+ or
+ input.isParameterDeref(0) and
+ output.isQualifierObject()
+ or
+ // flow from first parameter (value or pointer) to return value
+ input.isParameter(0) and
+ output.isReturnValueDeref()
+ or
+ input.isParameterDeref(0) and
+ output.isReturnValueDeref()
+ or
+ // reverse flow from returned reference to the qualifier
+ input.isReturnValueDeref() and
+ output.isQualifierObject()
+ }
+}
+
+/**
+ * The `std::istream` function `getline`.
+ */
+private class StdIStreamGetLine extends DataFlowFunction, TaintFunction {
+ StdIStreamGetLine() { this.getClassAndName("getline") instanceof StdBasicIStream }
+
+ override predicate hasDataFlow(FunctionInput input, FunctionOutput output) {
+ // returns reference to `*this`
+ input.isQualifierAddress() and
+ output.isReturnValue()
+ or
+ input.isQualifierObject() and
+ output.isReturnValueDeref()
+ }
+
+ override predicate hasTaintFlow(FunctionInput input, FunctionOutput output) {
+ // flow from qualifier to first parameter
+ input.isQualifierObject() and
+ output.isParameterDeref(0)
+ or
+ // reverse flow from returned reference to the qualifier
+ input.isReturnValueDeref() and
+ output.isQualifierObject()
+ }
+}
+
+/**
+ * The (non-member) function `std::getline`.
+ */
+private class StdGetLine extends DataFlowFunction, TaintFunction {
+ StdGetLine() { this.hasQualifiedName(["std", "bsl"], "getline") }
+
+ override predicate hasDataFlow(FunctionInput input, FunctionOutput output) {
+ // flow from first parameter to return value
+ input.isParameter(0) and
+ output.isReturnValue()
+ or
+ input.isParameterDeref(0) and
+ output.isReturnValueDeref()
+ }
+
+ override predicate hasTaintFlow(FunctionInput input, FunctionOutput output) {
+ // flow from first parameter to second parameter
+ input.isParameterDeref(0) and
+ output.isParameterDeref(1)
+ or
+ // reverse flow from returned reference to first parameter
+ input.isReturnValueDeref() and
+ output.isParameterDeref(0)
+ }
+}
+
+/**
+ * The `std::basic_ostream` template class instantiations.
+ */
+private class StdBasicOStream extends ClassTemplateInstantiation {
+ StdBasicOStream() { this.hasQualifiedName(["std", "bsl"], "basic_ostream") }
+}
+
+/**
+ * The `std::ostream` functions `operator<<` (defined as a member function),
+ * `put` and `write`.
+ */
+private class StdOStreamOut extends DataFlowFunction, TaintFunction {
+ StdOStreamOut() {
+ this.getClassAndName(["operator<<", "put", "write"]) instanceof StdBasicOStream
+ }
+
+ override predicate hasDataFlow(FunctionInput input, FunctionOutput output) {
+ // returns reference to `*this`
+ input.isQualifierAddress() and
+ output.isReturnValue()
+ or
+ input.isQualifierObject() and
+ output.isReturnValueDeref()
+ }
+
+ override predicate hasTaintFlow(FunctionInput input, FunctionOutput output) {
+ // flow from first parameter (value or pointer) to qualifier
+ input.isParameter(0) and
+ output.isQualifierObject()
+ or
+ input.isParameterDeref(0) and
+ output.isQualifierObject()
+ or
+ // flow from first parameter (value or pointer) to return value
+ input.isParameter(0) and
+ output.isReturnValueDeref()
+ or
+ input.isParameterDeref(0) and
+ output.isReturnValueDeref()
+ or
+ // reverse flow from returned reference to the qualifier
+ input.isReturnValueDeref() and
+ output.isQualifierObject()
+ }
+}
+
+/**
+ * The `std::ostream` function `operator<<` (defined as a non-member function).
+ */
+private class StdOStreamOutNonMember extends DataFlowFunction, TaintFunction {
+ StdOStreamOutNonMember() {
+ this.hasQualifiedName(["std", "bsl"], "operator<<") and
+ this.getUnspecifiedType().(ReferenceType).getBaseType() instanceof StdBasicOStream
+ }
+
+ override predicate hasDataFlow(FunctionInput input, FunctionOutput output) {
+ // flow from first parameter to return value
+ input.isParameter(0) and
+ output.isReturnValue()
+ or
+ input.isParameterDeref(0) and
+ output.isReturnValueDeref()
+ }
+
+ override predicate hasTaintFlow(FunctionInput input, FunctionOutput output) {
+ // flow from second parameter to first parameter
+ input.isParameter(1) and
+ output.isParameterDeref(0)
+ or
+ // flow from second parameter to return value
+ input.isParameter(1) and
+ output.isReturnValueDeref()
+ or
+ // reverse flow from returned reference to the first parameter
+ input.isReturnValueDeref() and
+ output.isParameterDeref(0)
+ }
+}
+
+/**
+ * The `std::basic_stringstream` template class instantiations.
+ */
+private class StdBasicStringStream extends ClassTemplateInstantiation {
+ StdBasicStringStream() { this.hasQualifiedName(["std", "bsl"], "basic_stringstream") }
+}
+
+/**
+ * Additional model for `std::stringstream` constructors that take a string
+ * input parameter.
+ */
+private class StdStringStreamConstructor extends Constructor, TaintFunction {
+ StdStringStreamConstructor() { this.getDeclaringType() instanceof StdBasicStringStream }
+
+ /**
+ * Gets the index of a parameter to this function that is a string.
+ */
+ int getAStringParameterIndex() {
+ getParameter(result).getType() instanceof ReferenceType // `const std::basic_string &`
+ }
+
+ override predicate hasTaintFlow(FunctionInput input, FunctionOutput output) {
+ // taint flow from any parameter of string type to the returned object
+ input.isParameterDeref(getAStringParameterIndex()) and
+ (
+ output.isReturnValue() // TODO: this is only needed for AST data flow, which treats constructors as returning the new object
+ or
+ output.isQualifierObject()
+ )
+ }
+}
+
+/**
+ * The `std::stringstream` function `str`.
+ */
+private class StdStringStreamStr extends TaintFunction {
+ StdStringStreamStr() { this.getClassAndName("str") instanceof StdBasicStringStream }
+
+ override predicate hasTaintFlow(FunctionInput input, FunctionOutput output) {
+ // flow from qualifier to return value (if any)
+ input.isQualifierObject() and
+ output.isReturnValue()
+ or
+ // flow from first parameter (if any) to qualifier
+ input.isParameterDeref(0) and
+ output.isQualifierObject()
+ }
+}
+
+/**
+ * The `std::basic_ios` template class instantiations.
+ */
+private class StdBasicIOS extends ClassTemplateInstantiation {
+ StdBasicIOS() { this.hasQualifiedName(["std", "bsl"], "basic_ios") }
+}
+
+/**
+ * A `std::` stream function that does not require a model, except that it
+ * returns a reference to `*this` and thus could be used in a chain.
+ */
+private class StdStreamFunction extends DataFlowFunction, TaintFunction {
+ StdStreamFunction() {
+ this.getClassAndName(["ignore", "unget", "seekg"]) instanceof StdBasicIStream
+ or
+ this.getClassAndName(["seekp", "flush"]) instanceof StdBasicOStream
+ or
+ this.getClassAndName("copyfmt") instanceof StdBasicIOS
+ }
+
+ override predicate hasDataFlow(FunctionInput input, FunctionOutput output) {
+ // returns reference to `*this`
+ input.isQualifierAddress() and
+ output.isReturnValue()
+ or
+ input.isQualifierObject() and
+ output.isReturnValueDeref()
+ }
+
+ override predicate hasTaintFlow(FunctionInput input, FunctionOutput output) {
+ // reverse flow from returned reference to the qualifier
+ input.isReturnValueDeref() and
+ output.isQualifierObject()
+ }
+}
diff --git a/repo-tests/codeql/cpp/ql/lib/semmle/code/cpp/models/implementations/Strcat.qll b/repo-tests/codeql/cpp/ql/lib/semmle/code/cpp/models/implementations/Strcat.qll
new file mode 100644
index 00000000000..ee9af547582
--- /dev/null
+++ b/repo-tests/codeql/cpp/ql/lib/semmle/code/cpp/models/implementations/Strcat.qll
@@ -0,0 +1,93 @@
+/**
+ * Provides implementation classes modeling `strcat` and various similar functions.
+ * See `semmle.code.cpp.models.Models` for usage information.
+ */
+
+import semmle.code.cpp.models.interfaces.ArrayFunction
+import semmle.code.cpp.models.interfaces.DataFlow
+import semmle.code.cpp.models.interfaces.Taint
+import semmle.code.cpp.models.interfaces.SideEffect
+
+/**
+ * The standard function `strcat` and its wide, sized, and Microsoft variants.
+ */
+class StrcatFunction extends TaintFunction, DataFlowFunction, ArrayFunction, SideEffectFunction {
+ StrcatFunction() {
+ this.hasGlobalOrStdOrBslName([
+ "strcat", // strcat(dst, src)
+ "strncat", // strncat(dst, src, max_amount)
+ "wcscat", // wcscat(dst, src)
+ "wcsncat" // wcsncat(dst, src, max_amount)
+ ])
+ or
+ this.hasGlobalName([
+ "_mbscat", // _mbscat(dst, src)
+ "_mbsncat", // _mbsncat(dst, src, max_amount)
+ "_mbsncat_l", // _mbsncat_l(dst, src, max_amount, locale)
+ "_mbsnbcat", // _mbsnbcat(dest, src, count)
+ "_mbsnbcat_l" // _mbsnbcat_l(dest, src, count, locale)
+ ])
+ }
+
+ /**
+ * Gets the index of the parameter that is the size of the copy (in characters).
+ */
+ int getParamSize() { exists(getParameter(2)) and result = 2 }
+
+ /**
+ * Gets the index of the parameter that is the source of the copy.
+ */
+ int getParamSrc() { result = 1 }
+
+ /**
+ * Gets the index of the parameter that is the destination to be appended to.
+ */
+ int getParamDest() { result = 0 }
+
+ override predicate hasDataFlow(FunctionInput input, FunctionOutput output) {
+ input.isParameter(0) and
+ output.isReturnValue()
+ }
+
+ override predicate hasTaintFlow(FunctionInput input, FunctionOutput output) {
+ getName() = ["strncat", "wcsncat", "_mbsncat", "_mbsncat_l"] and
+ input.isParameter(2) and
+ output.isParameterDeref(0)
+ or
+ getName() = ["_mbsncat_l", "_mbsnbcat_l"] and
+ input.isParameter(3) and
+ output.isParameterDeref(0)
+ or
+ input.isParameterDeref(0) and
+ output.isParameterDeref(0)
+ or
+ input.isParameter(1) and
+ output.isParameterDeref(0)
+ }
+
+ override predicate hasArrayInput(int param) {
+ param = 0 or
+ param = 1
+ }
+
+ override predicate hasArrayOutput(int param) { param = 0 }
+
+ override predicate hasArrayWithNullTerminator(int param) { param = 1 }
+
+ override predicate hasArrayWithUnknownSize(int param) { param = 0 }
+
+ override predicate hasOnlySpecificReadSideEffects() { any() }
+
+ override predicate hasOnlySpecificWriteSideEffects() { any() }
+
+ override predicate hasSpecificWriteSideEffect(ParameterIndex i, boolean buffer, boolean mustWrite) {
+ i = 0 and
+ buffer = true and
+ mustWrite = false
+ }
+
+ override predicate hasSpecificReadSideEffect(ParameterIndex i, boolean buffer) {
+ (i = 0 or i = 1) and
+ buffer = true
+ }
+}
diff --git a/repo-tests/codeql/cpp/ql/lib/semmle/code/cpp/models/implementations/Strcpy.qll b/repo-tests/codeql/cpp/ql/lib/semmle/code/cpp/models/implementations/Strcpy.qll
new file mode 100644
index 00000000000..432fbf999ef
--- /dev/null
+++ b/repo-tests/codeql/cpp/ql/lib/semmle/code/cpp/models/implementations/Strcpy.qll
@@ -0,0 +1,137 @@
+/**
+ * Provides implementation classes modeling `strcpy` and various similar
+ * functions. See `semmle.code.cpp.models.Models` for usage information.
+ */
+
+import semmle.code.cpp.models.interfaces.ArrayFunction
+import semmle.code.cpp.models.interfaces.DataFlow
+import semmle.code.cpp.models.interfaces.Taint
+import semmle.code.cpp.models.interfaces.SideEffect
+
+/**
+ * The standard function `strcpy` and its wide, sized, and Microsoft variants.
+ */
+class StrcpyFunction extends ArrayFunction, DataFlowFunction, TaintFunction, SideEffectFunction {
+ StrcpyFunction() {
+ this.hasGlobalOrStdOrBslName([
+ "strcpy", // strcpy(dst, src)
+ "wcscpy", // wcscpy(dst, src)
+ "strncpy", // strncpy(dst, src, max_amount)
+ "wcsncpy", // wcsncpy(dst, src, max_amount)
+ "strxfrm", // strxfrm(dest, src, max_amount)
+ "wcsxfrm" // wcsxfrm(dest, src, max_amount)
+ ])
+ or
+ this.hasGlobalName([
+ "_mbscpy", // _mbscpy(dst, src)
+ "_strncpy_l", // _strncpy_l(dst, src, max_amount, locale)
+ "_wcsncpy_l", // _wcsncpy_l(dst, src, max_amount, locale)
+ "_mbsncpy", // _mbsncpy(dst, src, max_amount)
+ "_mbsncpy_l", // _mbsncpy_l(dst, src, max_amount, locale)
+ "_strxfrm_l", // _strxfrm_l(dest, src, max_amount, locale)
+ "wcsxfrm_l", // _strxfrm_l(dest, src, max_amount, locale)
+ "_mbsnbcpy", // _mbsnbcpy(dest, src, max_amount)
+ "stpcpy", // stpcpy(dest, src)
+ "stpncpy" // stpcpy(dest, src, max_amount)
+ ])
+ or
+ (
+ this.hasGlobalOrStdName([
+ "strcpy_s", // strcpy_s(dst, max_amount, src)
+ "wcscpy_s" // wcscpy_s(dst, max_amount, src)
+ ])
+ or
+ this.hasGlobalName("_mbscpy_s") // _mbscpy_s(dst, max_amount, src)
+ ) and
+ // exclude the 2-parameter template versions
+ // that find the size of a fixed size destination buffer.
+ getNumberOfParameters() = 3
+ }
+
+ /**
+ * Holds if this is one of the `strcpy_s` variants.
+ */
+ private predicate isSVariant() { getName().matches("%\\_s") }
+
+ /**
+ * Gets the index of the parameter that is the maximum size of the copy (in characters).
+ */
+ int getParamSize() {
+ if isSVariant()
+ then result = 1
+ else (
+ getName().matches(["%ncpy%", "%nbcpy%", "%xfrm%"]) and
+ result = 2
+ )
+ }
+
+ /**
+ * Gets the index of the parameter that is the source of the copy.
+ */
+ int getParamSrc() { if isSVariant() then result = 2 else result = 1 }
+
+ /**
+ * Gets the index of the parameter that is the destination of the copy.
+ */
+ int getParamDest() { result = 0 }
+
+ override predicate hasArrayInput(int bufParam) { bufParam = getParamSrc() }
+
+ override predicate hasArrayOutput(int bufParam) { bufParam = getParamDest() }
+
+ override predicate hasArrayWithNullTerminator(int bufParam) { bufParam = getParamSrc() }
+
+ override predicate hasArrayWithVariableSize(int bufParam, int countParam) {
+ bufParam = getParamDest() and
+ countParam = getParamSize()
+ }
+
+ override predicate hasArrayWithUnknownSize(int bufParam) {
+ not exists(getParamSize()) and
+ bufParam = getParamDest()
+ }
+
+ override predicate hasDataFlow(FunctionInput input, FunctionOutput output) {
+ not exists(getParamSize()) and
+ input.isParameterDeref(getParamSrc()) and
+ output.isParameterDeref(getParamDest())
+ or
+ not exists(getParamSize()) and
+ input.isParameterDeref(getParamSrc()) and
+ output.isReturnValueDeref()
+ or
+ input.isParameter(getParamDest()) and
+ output.isReturnValue()
+ }
+
+ override predicate hasTaintFlow(FunctionInput input, FunctionOutput output) {
+ // these may do only a partial copy of the input buffer to the output
+ // buffer
+ exists(getParamSize()) and
+ input.isParameter(getParamSrc()) and
+ (
+ output.isParameterDeref(getParamDest()) or
+ output.isReturnValueDeref()
+ )
+ }
+
+ override predicate hasOnlySpecificReadSideEffects() { any() }
+
+ override predicate hasOnlySpecificWriteSideEffects() { any() }
+
+ override predicate hasSpecificWriteSideEffect(ParameterIndex i, boolean buffer, boolean mustWrite) {
+ i = getParamDest() and
+ buffer = true and
+ mustWrite = false
+ }
+
+ override predicate hasSpecificReadSideEffect(ParameterIndex i, boolean buffer) {
+ i = getParamSrc() and
+ buffer = true
+ }
+
+ override ParameterIndex getParameterSizeIndex(ParameterIndex i) {
+ i = getParamDest() and
+ result = getParamSize()
+ }
+}
diff --git a/repo-tests/codeql/cpp/ql/lib/semmle/code/cpp/models/implementations/Strcrement.qll b/repo-tests/codeql/cpp/ql/lib/semmle/code/cpp/models/implementations/Strcrement.qll
new file mode 100644
index 00000000000..4c335c8581e
--- /dev/null
+++ b/repo-tests/codeql/cpp/ql/lib/semmle/code/cpp/models/implementations/Strcrement.qll
@@ -0,0 +1,49 @@
+/**
+ * Provides implementation classes modeling `_strinc`, `_strdec` and their variants.
+ * See `semmle.code.cpp.models.Models` for usage information.
+ */
+
+import semmle.code.cpp.models.interfaces.ArrayFunction
+import semmle.code.cpp.models.interfaces.Taint
+import semmle.code.cpp.models.interfaces.SideEffect
+
+/**
+ * The function `_strinc`, `_strdec` and their variants.
+ */
+private class Strcrement extends ArrayFunction, TaintFunction, SideEffectFunction {
+ Strcrement() {
+ this.hasGlobalName([
+ "_strinc", // _strinc(source, locale)
+ "_wcsinc", // _strinc(source, locale)
+ "_mbsinc", // _strinc(source)
+ "_mbsinc_l", // _strinc(source, locale)
+ "_strdec", // _strdec(start, source)
+ "_wcsdec", // _wcsdec(start, source)
+ "_mbsdec", // _mbsdec(start, source)
+ "_mbsdec_l" // _mbsdec_l(start, source, locale)
+ ])
+ }
+
+ override predicate hasArrayWithNullTerminator(int bufParam) {
+ // Match all parameters that are not locales.
+ this.getParameter(bufParam).getUnspecifiedType() instanceof PointerType
+ }
+
+ override predicate hasArrayInput(int bufParam) { hasArrayWithNullTerminator(bufParam) }
+
+ override predicate hasTaintFlow(FunctionInput input, FunctionOutput output) {
+ exists(int index | hasArrayInput(index) |
+ input.isParameter(index) and output.isReturnValue()
+ or
+ input.isParameterDeref(index) and output.isReturnValueDeref()
+ )
+ }
+
+ override predicate hasOnlySpecificReadSideEffects() { any() }
+
+ override predicate hasOnlySpecificWriteSideEffects() { any() }
+
+ override predicate hasSpecificReadSideEffect(ParameterIndex i, boolean buffer) {
+ hasArrayInput(i) and buffer = true
+ }
+}
diff --git a/repo-tests/codeql/cpp/ql/lib/semmle/code/cpp/models/implementations/Strdup.qll b/repo-tests/codeql/cpp/ql/lib/semmle/code/cpp/models/implementations/Strdup.qll
new file mode 100644
index 00000000000..51d496fc69e
--- /dev/null
+++ b/repo-tests/codeql/cpp/ql/lib/semmle/code/cpp/models/implementations/Strdup.qll
@@ -0,0 +1,64 @@
+/**
+ * Provides implementation classes modeling `strdup` and various similar
+ * functions. See `semmle.code.cpp.models.Models` for usage information.
+ */
+
+import semmle.code.cpp.models.interfaces.Allocation
+import semmle.code.cpp.models.interfaces.ArrayFunction
+import semmle.code.cpp.models.interfaces.DataFlow
+import semmle.code.cpp.models.interfaces.Taint
+
+/**
+ * A `strdup` style allocation function.
+ */
+private class StrdupFunction extends AllocationFunction, ArrayFunction, DataFlowFunction {
+ StrdupFunction() {
+ hasGlobalName([
+ // --- C library allocation
+ "strdup", // strdup(str)
+ "strdupa", // strdupa(str) - returns stack allocated buffer
+ "wcsdup", // wcsdup(str)
+ "_strdup", // _strdup(str)
+ "_wcsdup", // _wcsdup(str)
+ "_mbsdup" // _mbsdup(str)
+ ])
+ }
+
+ override predicate hasArrayInput(int bufParam) { bufParam = 0 }
+
+ override predicate hasArrayWithNullTerminator(int bufParam) { bufParam = 0 }
+
+ override predicate hasDataFlow(FunctionInput input, FunctionOutput output) {
+ input.isParameterDeref(0) and
+ output.isReturnValueDeref()
+ }
+
+ override predicate requiresDealloc() { not hasGlobalName("strdupa") }
+}
+
+/**
+ * A `strndup` style allocation function.
+ */
+private class StrndupFunction extends AllocationFunction, ArrayFunction, DataFlowFunction {
+ StrndupFunction() {
+ hasGlobalName([
+ // -- C library allocation
+ "strndup", // strndup(str, maxlen)
+ "strndupa" // strndupa(str, maxlen) -- returns stack allocated buffer
+ ])
+ }
+
+ override predicate hasArrayInput(int bufParam) { bufParam = 0 }
+
+ override predicate hasArrayWithNullTerminator(int bufParam) { bufParam = 0 }
+
+ override predicate hasDataFlow(FunctionInput input, FunctionOutput output) {
+ (
+ input.isParameterDeref(0) or
+ input.isParameter(1)
+ ) and
+ output.isReturnValueDeref()
+ }
+
+ override predicate requiresDealloc() { not hasGlobalName("strndupa") }
+}
diff --git a/repo-tests/codeql/cpp/ql/lib/semmle/code/cpp/models/implementations/Strftime.qll b/repo-tests/codeql/cpp/ql/lib/semmle/code/cpp/models/implementations/Strftime.qll
new file mode 100644
index 00000000000..0dad89e950f
--- /dev/null
+++ b/repo-tests/codeql/cpp/ql/lib/semmle/code/cpp/models/implementations/Strftime.qll
@@ -0,0 +1,34 @@
+import semmle.code.cpp.models.interfaces.Taint
+import semmle.code.cpp.models.interfaces.ArrayFunction
+
+private class Strftime extends TaintFunction, ArrayFunction {
+ Strftime() { hasGlobalName("strftime") }
+
+ override predicate hasTaintFlow(FunctionInput input, FunctionOutput output) {
+ (
+ input.isParameter(1) or
+ input.isParameterDeref(2) or
+ input.isParameterDeref(3)
+ ) and
+ output.isParameterDeref(0)
+ }
+
+ override predicate hasArrayWithNullTerminator(int bufParam) { bufParam = 2 }
+
+ override predicate hasArrayWithFixedSize(int bufParam, int elemCount) {
+ bufParam = 3 and
+ elemCount = 1
+ }
+
+ override predicate hasArrayWithVariableSize(int bufParam, int countParam) {
+ bufParam = 0 and
+ countParam = 1
+ }
+
+ override predicate hasArrayInput(int bufParam) {
+ bufParam = 2 or
+ bufParam = 3
+ }
+
+ override predicate hasArrayOutput(int bufParam) { bufParam = 0 }
+}
diff --git a/repo-tests/codeql/cpp/ql/lib/semmle/code/cpp/models/implementations/Strnextc.qll b/repo-tests/codeql/cpp/ql/lib/semmle/code/cpp/models/implementations/Strnextc.qll
new file mode 100644
index 00000000000..fc8ac17b5f6
--- /dev/null
+++ b/repo-tests/codeql/cpp/ql/lib/semmle/code/cpp/models/implementations/Strnextc.qll
@@ -0,0 +1,38 @@
+/**
+ * Provides implementation classes modeling `strnextc` and various similar functions.
+ * See `semmle.code.cpp.models.Models` for usage information.
+ */
+
+import semmle.code.cpp.models.interfaces.ArrayFunction
+import semmle.code.cpp.models.interfaces.Taint
+import semmle.code.cpp.models.interfaces.Alias
+import semmle.code.cpp.models.interfaces.SideEffect
+
+/**
+ * The function `strnextc` and its variants.
+ */
+private class Strnextc extends TaintFunction, ArrayFunction, AliasFunction, SideEffectFunction {
+ Strnextc() { this.hasGlobalName(["_strnextc", "_wcsnextc", "_mbsnextc", "_mbsnextc_l"]) }
+
+ override predicate hasTaintFlow(FunctionInput input, FunctionOutput output) {
+ input.isParameterDeref(0) and output.isReturnValue()
+ }
+
+ override predicate hasArrayWithNullTerminator(int bufParam) { bufParam = 0 }
+
+ override predicate hasArrayInput(int bufParam) { bufParam = 0 }
+
+ override predicate parameterNeverEscapes(int index) { index = 0 }
+
+ override predicate parameterEscapesOnlyViaReturn(int index) { none() }
+
+ override predicate parameterIsAlwaysReturned(int index) { none() }
+
+ override predicate hasOnlySpecificReadSideEffects() { any() }
+
+ override predicate hasOnlySpecificWriteSideEffects() { any() }
+
+ override predicate hasSpecificReadSideEffect(ParameterIndex i, boolean buffer) {
+ i = 0 and buffer = true
+ }
+}
diff --git a/repo-tests/codeql/cpp/ql/lib/semmle/code/cpp/models/implementations/Strset.qll b/repo-tests/codeql/cpp/ql/lib/semmle/code/cpp/models/implementations/Strset.qll
new file mode 100644
index 00000000000..f4a80cbabac
--- /dev/null
+++ b/repo-tests/codeql/cpp/ql/lib/semmle/code/cpp/models/implementations/Strset.qll
@@ -0,0 +1,62 @@
+/**
+ * Provides implementation classes modeling `strset` and various similar
+ * functions. See `semmle.code.cpp.models.Models` for usage information.
+ */
+
+import semmle.code.cpp.Function
+import semmle.code.cpp.models.interfaces.ArrayFunction
+import semmle.code.cpp.models.interfaces.DataFlow
+import semmle.code.cpp.models.interfaces.Alias
+import semmle.code.cpp.models.interfaces.SideEffect
+
+/**
+ * The standard function `strset` and its assorted variants
+ */
+private class StrsetFunction extends ArrayFunction, DataFlowFunction, AliasFunction,
+ SideEffectFunction {
+ StrsetFunction() {
+ hasGlobalName([
+ "strset", "_strset", "_strset_l", "_wcsset", "_wcsset_l", "_mbsset", "_mbsset_l",
+ "_mbsnbset", "_mbsnbset_l", "_strnset", "_strnset_l", "_wcsnset", "_wcsnset_l", "_mbsnset",
+ "_mbsnset_l"
+ ])
+ }
+
+ override predicate hasArrayWithNullTerminator(int bufParam) { bufParam = 0 }
+
+ override predicate hasArrayInput(int bufParam) { bufParam = 0 }
+
+ override predicate hasArrayOutput(int bufParam) { bufParam = 0 }
+
+ override predicate hasDataFlow(FunctionInput input, FunctionOutput output) {
+ // flow from the character that overrides the string
+ input.isParameter(1) and
+ (
+ output.isReturnValueDeref()
+ or
+ output.isParameterDeref(0)
+ )
+ or
+ // flow from the input string to the output string
+ input.isParameter(0) and
+ output.isReturnValue()
+ }
+
+ override predicate parameterNeverEscapes(int index) { none() }
+
+ override predicate parameterEscapesOnlyViaReturn(int index) { index = 0 }
+
+ override predicate parameterIsAlwaysReturned(int index) { index = 0 }
+
+ override predicate hasOnlySpecificReadSideEffects() { any() }
+
+ override predicate hasOnlySpecificWriteSideEffects() { any() }
+
+ override predicate hasSpecificWriteSideEffect(ParameterIndex i, boolean buffer, boolean mustWrite) {
+ i = 0 and buffer = true and mustWrite = true
+ }
+
+ override predicate hasSpecificReadSideEffect(ParameterIndex i, boolean buffer) {
+ i = 0 and buffer = true
+ }
+}
diff --git a/repo-tests/codeql/cpp/ql/lib/semmle/code/cpp/models/implementations/Strtok.qll b/repo-tests/codeql/cpp/ql/lib/semmle/code/cpp/models/implementations/Strtok.qll
new file mode 100644
index 00000000000..f2cb6498819
--- /dev/null
+++ b/repo-tests/codeql/cpp/ql/lib/semmle/code/cpp/models/implementations/Strtok.qll
@@ -0,0 +1,88 @@
+/**
+ * Provides implementation classes modeling `strtok` and various similar
+ * functions. See `semmle.code.cpp.models.Models` for usage information.
+ */
+
+import semmle.code.cpp.Function
+import semmle.code.cpp.models.interfaces.ArrayFunction
+import semmle.code.cpp.models.interfaces.ArrayFunction
+import semmle.code.cpp.models.interfaces.Alias
+import semmle.code.cpp.models.interfaces.SideEffect
+import semmle.code.cpp.models.interfaces.Taint
+
+/**
+ * The standard function `strtok` and its assorted variants
+ */
+private class Strtok extends ArrayFunction, AliasFunction, TaintFunction, SideEffectFunction {
+ Strtok() {
+ this.hasGlobalOrStdOrBslName("strtok") or
+ this.hasGlobalName(["strtok_r", "_strtok_l", "wcstok", "_wcstok_l", "_mbstok", "_mbstok_l"])
+ }
+
+ override predicate hasArrayWithNullTerminator(int bufParam) { bufParam = [0, 1] }
+
+ override predicate hasArrayInput(int bufParam) { bufParam = [0, 1] }
+
+ override predicate hasArrayOutput(int bufParam) { bufParam = 0 }
+
+ override predicate parameterNeverEscapes(int index) { index = 1 }
+
+ override predicate parameterEscapesOnlyViaReturn(int index) { index = 0 }
+
+ override predicate parameterIsAlwaysReturned(int index) { none() }
+
+ override predicate hasTaintFlow(FunctionInput input, FunctionOutput output) {
+ input.isParameter(0) and output.isReturnValue()
+ }
+
+ override predicate hasOnlySpecificReadSideEffects() { none() }
+
+ override predicate hasOnlySpecificWriteSideEffects() { none() }
+
+ override predicate hasSpecificWriteSideEffect(ParameterIndex i, boolean buffer, boolean mustWrite) {
+ i = 0 and buffer = true and mustWrite = false
+ }
+
+ override predicate hasSpecificReadSideEffect(ParameterIndex i, boolean buffer) {
+ i = [0, 1] and buffer = true
+ }
+}
+
+/**
+ * The function `strtok` is a variant of `strtok` that takes a `char**` parameter instead of
+ * a `char*` as the first parameter.
+ */
+private class Strsep extends ArrayFunction, AliasFunction, TaintFunction, SideEffectFunction {
+ Strsep() { this.hasGlobalName("strsep") }
+
+ override predicate hasArrayWithNullTerminator(int bufParam) { bufParam = 1 }
+
+ override predicate hasArrayInput(int bufParam) { bufParam = 1 }
+
+ override predicate parameterNeverEscapes(int index) { index = [0, 1] }
+
+ override predicate parameterEscapesOnlyViaReturn(int index) { none() }
+
+ override predicate parameterIsAlwaysReturned(int index) { none() }
+
+ override predicate hasTaintFlow(FunctionInput input, FunctionOutput output) {
+ // NOTE: What we really want here is: (input.isParameterDerefDeref(0) or input.isParameterDeref(1))
+ // as the first conjunct.
+ input.isParameterDeref([0, 1]) and
+ (output.isReturnValue() or output.isReturnValueDeref())
+ }
+
+ override predicate hasOnlySpecificReadSideEffects() { any() }
+
+ override predicate hasOnlySpecificWriteSideEffects() { any() }
+
+ override predicate hasSpecificWriteSideEffect(ParameterIndex i, boolean buffer, boolean mustWrite) {
+ i = 0 and buffer = false and mustWrite = false
+ }
+
+ override predicate hasSpecificReadSideEffect(ParameterIndex i, boolean buffer) {
+ i = 0 and buffer = false
+ or
+ i = 1 and buffer = true
+ }
+}
diff --git a/repo-tests/codeql/cpp/ql/lib/semmle/code/cpp/models/implementations/Swap.qll b/repo-tests/codeql/cpp/ql/lib/semmle/code/cpp/models/implementations/Swap.qll
new file mode 100644
index 00000000000..b79f7afe5d9
--- /dev/null
+++ b/repo-tests/codeql/cpp/ql/lib/semmle/code/cpp/models/implementations/Swap.qll
@@ -0,0 +1,50 @@
+import semmle.code.cpp.models.interfaces.DataFlow
+import semmle.code.cpp.models.interfaces.Taint
+import semmle.code.cpp.models.interfaces.Alias
+
+/**
+ * The standard function `swap`. A use of `swap` looks like this:
+ * ```
+ * std::swap(obj1, obj2)
+ * ```
+ */
+private class Swap extends DataFlowFunction {
+ Swap() { this.hasQualifiedName(["std", "bsl"], "swap") }
+
+ override predicate hasDataFlow(FunctionInput input, FunctionOutput output) {
+ input.isParameterDeref(0) and
+ output.isParameterDeref(1)
+ or
+ input.isParameterDeref(1) and
+ output.isParameterDeref(0)
+ }
+}
+
+/**
+ * A `swap` member function that is used as follows:
+ * ```
+ * obj1.swap(obj2)
+ * ```
+ */
+private class MemberSwap extends TaintFunction, MemberFunction, AliasFunction {
+ MemberSwap() {
+ this.hasName("swap") and
+ this.getNumberOfParameters() = 1 and
+ this.getParameter(0).getType().(ReferenceType).getBaseType().getUnspecifiedType() =
+ getDeclaringType()
+ }
+
+ override predicate hasTaintFlow(FunctionInput input, FunctionOutput output) {
+ input.isQualifierObject() and
+ output.isParameterDeref(0)
+ or
+ input.isParameterDeref(0) and
+ output.isQualifierObject()
+ }
+
+ override predicate parameterNeverEscapes(int index) { none() }
+
+ override predicate parameterEscapesOnlyViaReturn(int index) { index = 0 }
+
+ override predicate parameterIsAlwaysReturned(int index) { index = 0 }
+}
diff --git a/repo-tests/codeql/cpp/ql/lib/semmle/code/cpp/models/implementations/System.qll b/repo-tests/codeql/cpp/ql/lib/semmle/code/cpp/models/implementations/System.qll
new file mode 100644
index 00000000000..02a9d0d6744
--- /dev/null
+++ b/repo-tests/codeql/cpp/ql/lib/semmle/code/cpp/models/implementations/System.qll
@@ -0,0 +1,43 @@
+import cpp
+import semmle.code.cpp.models.interfaces.SideEffect
+import semmle.code.cpp.models.interfaces.Alias
+import semmle.code.cpp.models.interfaces.CommandExecution
+
+/**
+ * A function for running a command using a command interpreter.
+ */
+private class SystemFunction extends CommandExecutionFunction, ArrayFunction, AliasFunction,
+ SideEffectFunction {
+ SystemFunction() {
+ hasGlobalOrStdName("system") or // system(command)
+ hasGlobalName("popen") or // popen(command, mode)
+ // Windows variants
+ hasGlobalName("_popen") or // _popen(command, mode)
+ hasGlobalName("_wpopen") or // _wpopen(command, mode)
+ hasGlobalName("_wsystem") // _wsystem(command)
+ }
+
+ override predicate hasCommandArgument(FunctionInput input) { input.isParameterDeref(0) }
+
+ override predicate hasArrayWithNullTerminator(int bufParam) { bufParam = 0 or bufParam = 1 }
+
+ override predicate hasArrayInput(int bufParam) { bufParam = 0 or bufParam = 1 }
+
+ override predicate parameterNeverEscapes(int index) { index = 0 or index = 1 }
+
+ override predicate parameterEscapesOnlyViaReturn(int index) { none() }
+
+ override predicate parameterIsAlwaysReturned(int index) { none() }
+
+ override predicate hasOnlySpecificReadSideEffects() { any() }
+
+ override predicate hasOnlySpecificWriteSideEffects() {
+ hasGlobalOrStdName("system") or
+ hasGlobalName("_wsystem")
+ }
+
+ override predicate hasSpecificReadSideEffect(ParameterIndex i, boolean buffer) {
+ (i = 0 or i = 1) and
+ buffer = true
+ }
+}
diff --git a/repo-tests/codeql/cpp/ql/lib/semmle/code/cpp/models/interfaces/Alias.qll b/repo-tests/codeql/cpp/ql/lib/semmle/code/cpp/models/interfaces/Alias.qll
new file mode 100644
index 00000000000..e947a93fc90
--- /dev/null
+++ b/repo-tests/codeql/cpp/ql/lib/semmle/code/cpp/models/interfaces/Alias.qll
@@ -0,0 +1,65 @@
+/**
+ * Provides an abstract class for accurate alias modeling of library
+ * functions when source code is not available. To use this QL library,
+ * create a QL class extending `AliasFunction` with a characteristic
+ * predicate that selects the function or set of functions you are modeling.
+ * Within that class, override the predicates provided by `AliasFunction`
+ * to match the flow within that function.
+ */
+
+import semmle.code.cpp.Function
+import semmle.code.cpp.models.Models
+
+/**
+ * Models the aliasing behavior of a library function.
+ */
+abstract class AliasFunction extends Function {
+ /**
+ * Holds if the address passed to the parameter at the specified index is never retained after
+ * the function returns.
+ *
+ * Example:
+ * ```
+ * int* g;
+ * int* func(int* p, int* q, int* r, int* s, int n) {
+ * *s = 1; // `s` does not escape.
+ * g = p; // Stored in global. `p` escapes.
+ * if (rand()) {
+ * return q; // `q` escapes via the return value.
+ * }
+ * else {
+ * return r + n; // `r` escapes via the return value, even though an offset has been added.
+ * }
+ * }
+ * ```
+ *
+ * For the above function, the following terms hold:
+ * - `parameterEscapesOnlyViaReturn(1)`
+ * - `parameterEscapesOnlyViaReturn(2)`
+ * - `parameterNeverEscapes(3)`
+ */
+ abstract predicate parameterNeverEscapes(int index);
+
+ /**
+ * Holds if the address passed to the parameter at the specified index escapes via the return
+ * value of the function, but does not otherwise escape. See the comment for
+ * `parameterNeverEscapes` for an example.
+ */
+ abstract predicate parameterEscapesOnlyViaReturn(int index);
+
+ /**
+ * Holds if the function always returns the value of the parameter at the specified index.
+ */
+ predicate parameterIsAlwaysReturned(int index) { none() }
+
+ /**
+ * Holds if the address passed in via `input` is always propagated to `output`.
+ */
+ predicate hasAddressFlow(FunctionInput input, FunctionOutput output) {
+ exists(int index |
+ // By default, just use the old `parameterIsAlwaysReturned` predicate to detect flow from the
+ // parameter to the return value.
+ input.isParameter(index) and output.isReturnValue() and this.parameterIsAlwaysReturned(index)
+ )
+ }
+}
diff --git a/repo-tests/codeql/cpp/ql/lib/semmle/code/cpp/models/interfaces/Allocation.qll b/repo-tests/codeql/cpp/ql/lib/semmle/code/cpp/models/interfaces/Allocation.qll
new file mode 100644
index 00000000000..d309791f79a
--- /dev/null
+++ b/repo-tests/codeql/cpp/ql/lib/semmle/code/cpp/models/interfaces/Allocation.qll
@@ -0,0 +1,115 @@
+/**
+ * Provides an abstract class for modeling functions and expressions that
+ * allocate memory, such as the standard `malloc` function. To use this QL
+ * library, create one or more QL classes extending a class here with a
+ * characteristic predicate that selects the functions or expressions you are
+ * trying to model. Within that class, override the predicates provided
+ * by the abstract class to match the specifics of those functions or
+ * expressions. Finally, add a private import statement to `Models.qll`.
+ */
+
+import semmle.code.cpp.Function
+import semmle.code.cpp.models.Models
+
+/**
+ * An allocation function such as `malloc`.
+ */
+abstract class AllocationFunction extends Function {
+ /**
+ * Gets the index of the argument for the allocation size, if any. The actual
+ * allocation size is the value of this argument multiplied by the result of
+ * `getSizeMult()`, in bytes.
+ */
+ int getSizeArg() { none() }
+
+ /**
+ * Gets the index of an argument that multiplies the allocation size given by
+ * `getSizeArg`, if any.
+ */
+ int getSizeMult() { none() }
+
+ /**
+ * Gets the index of the input pointer argument to be reallocated, if this
+ * is a `realloc` function.
+ */
+ int getReallocPtrArg() { none() }
+
+ /**
+ * Whether or not this allocation requires a corresponding deallocation of
+ * some sort (most do, but `alloca` for example does not). If it is unclear,
+ * we default to no (for example a placement `new` allocation may or may not
+ * require a corresponding `delete`).
+ */
+ predicate requiresDealloc() { any() }
+}
+
+/**
+ * An allocation expression such as call to `malloc` or a `new` expression.
+ */
+abstract class AllocationExpr extends Expr {
+ /**
+ * Gets an expression for the allocation size, if any. The actual allocation
+ * size is the value of this expression multiplied by the result of
+ * `getSizeMult()`, in bytes.
+ */
+ Expr getSizeExpr() { none() }
+
+ /**
+ * Gets a constant multiplier for the allocation size given by `getSizeExpr`,
+ * in bytes.
+ */
+ int getSizeMult() { none() }
+
+ /**
+ * Gets the size of this allocation in bytes, if it is a fixed size and that
+ * size can be determined.
+ */
+ int getSizeBytes() { none() }
+
+ /**
+ * Gets the expression for the input pointer argument to be reallocated, if
+ * this is a `realloc` function.
+ */
+ Expr getReallocPtr() { none() }
+
+ /**
+ * Gets the type of the elements that are allocated, if it can be determined.
+ */
+ Type getAllocatedElementType() { none() }
+
+ /**
+ * Whether or not this allocation requires a corresponding deallocation of
+ * some sort (most do, but `alloca` for example does not). If it is unclear,
+ * we default to no (for example a placement `new` allocation may or may not
+ * require a corresponding `delete`).
+ */
+ predicate requiresDealloc() { any() }
+}
+
+/**
+ * An `operator new` or `operator new[]` function that may be associated with
+ * `new` or `new[]` expressions. Note that `new` and `new[]` are not function
+ * calls, but these functions may also be called directly.
+ */
+class OperatorNewAllocationFunction extends AllocationFunction {
+ OperatorNewAllocationFunction() {
+ hasGlobalName([
+ "operator new", // operator new(bytes, ...)
+ "operator new[]" // operator new[](bytes, ...)
+ ])
+ }
+
+ override int getSizeArg() { result = 0 }
+
+ override predicate requiresDealloc() { not exists(getPlacementArgument()) }
+
+ /**
+ * Gets the position of the placement pointer if this is a placement
+ * `operator new` function.
+ */
+ int getPlacementArgument() {
+ getNumberOfParameters() = 2 and
+ getParameter(1).getType() instanceof VoidPointerType and
+ result = 1
+ }
+}
diff --git a/repo-tests/codeql/cpp/ql/lib/semmle/code/cpp/models/interfaces/ArrayFunction.qll b/repo-tests/codeql/cpp/ql/lib/semmle/code/cpp/models/interfaces/ArrayFunction.qll
new file mode 100644
index 00000000000..cb4c531ebec
--- /dev/null
+++ b/repo-tests/codeql/cpp/ql/lib/semmle/code/cpp/models/interfaces/ArrayFunction.qll
@@ -0,0 +1,57 @@
+/**
+ * Provides an abstract class for accurate modeling of input and output buffers
+ * in library functions when source code is not available. To use this QL
+ * library, create a QL class extending `ArrayFunction` with a characteristic
+ * predicate that selects the function or set of functions you are trying to
+ * model. Within that class, override the predicates provided by `ArrayFunction`
+ * to match the flow within that function. Finally, add a private import
+ * statement to `Models.qll`
+ */
+
+import semmle.code.cpp.Function
+import semmle.code.cpp.models.Models
+
+/**
+ * A library function with input and/or output buffer parameters
+ */
+abstract class ArrayFunction extends Function {
+ /**
+ * Holds if parameter `bufParam` is a null-terminated buffer and the
+ * null-terminator will not be written past.
+ */
+ predicate hasArrayWithNullTerminator(int bufParam) { none() }
+
+ /**
+ * Holds if parameter `bufParam` should always point to a buffer with exactly
+ * `elemCount` elements.
+ */
+ predicate hasArrayWithFixedSize(int bufParam, int elemCount) { none() }
+
+ /**
+ * Holds if parameter `bufParam` should always point to a buffer with the
+ * number of elements indicated by `countParam`.
+ */
+ predicate hasArrayWithVariableSize(int bufParam, int countParam) { none() }
+
+ /**
+ * Holds if parameter `bufParam` points to a buffer with no fixed size and no
+ * size parameter, which is not null-terminated or which is null-terminated
+ * but for which the null value may be written past. For example, the first
+ * parameters of `sprintf` and `strcat`.
+ */
+ predicate hasArrayWithUnknownSize(int bufParam) { none() }
+
+ /**
+ * Holds if parameter `bufParam` is used as an input buffer.
+ *
+ * Note that this is not mutually exclusive with isOutBuffer.
+ */
+ predicate hasArrayInput(int bufParam) { none() }
+
+ /**
+ * Holds if parameter `bufParam` is used as an output buffer.
+ *
+ * Note that this is not mutually exclusive with isInBuffer.
+ */
+ predicate hasArrayOutput(int bufParam) { none() }
+}
diff --git a/repo-tests/codeql/cpp/ql/lib/semmle/code/cpp/models/interfaces/CommandExecution.qll b/repo-tests/codeql/cpp/ql/lib/semmle/code/cpp/models/interfaces/CommandExecution.qll
new file mode 100644
index 00000000000..a6e32341140
--- /dev/null
+++ b/repo-tests/codeql/cpp/ql/lib/semmle/code/cpp/models/interfaces/CommandExecution.qll
@@ -0,0 +1,23 @@
+/**
+ * Provides classes for modeling functions that execute new programs by
+ * interpreting string data as shell commands. To use this QL library, create
+ * a QL class extending `CommandExecutionFunction` with a characteristic
+ * predicate that selects the function or set of functions you are modeling.
+ * Within that class, override the `hasCommandArgument` predicate to indicate
+ * which parameters are interpreted as shell commands.
+ */
+
+import cpp
+import FunctionInputsAndOutputs
+import semmle.code.cpp.models.Models
+
+/**
+ * A function, such as `exec` or `popen` that starts a new process by
+ * interpreting a string as a shell command.
+ */
+abstract class CommandExecutionFunction extends Function {
+ /**
+ * Holds if `input` is interpreted as a shell command.
+ */
+ abstract predicate hasCommandArgument(FunctionInput input);
+}
diff --git a/repo-tests/codeql/cpp/ql/lib/semmle/code/cpp/models/interfaces/DataFlow.qll b/repo-tests/codeql/cpp/ql/lib/semmle/code/cpp/models/interfaces/DataFlow.qll
new file mode 100644
index 00000000000..b30861254dc
--- /dev/null
+++ b/repo-tests/codeql/cpp/ql/lib/semmle/code/cpp/models/interfaces/DataFlow.qll
@@ -0,0 +1,29 @@
+/**
+ * Provides an abstract class for accurate dataflow modeling of library
+ * functions when source code is not available. To use this QL library,
+ * create a QL class extending `DataFlowFunction` with a characteristic
+ * predicate that selects the function or set of functions you are modeling.
+ * Within that class, override the predicates provided by `DataFlowFunction`
+ * to match the flow within that function.
+ */
+
+import semmle.code.cpp.Function
+import FunctionInputsAndOutputs
+import semmle.code.cpp.models.Models
+
+/**
+ * A library function for which a value is or may be copied from a parameter
+ * or qualifier to an output buffer, return value, or qualifier.
+ *
+ * Note that this does not include partial copying of values or partial writes
+ * to destinations; that is covered by `TaintModel.qll`.
+ */
+abstract class DataFlowFunction extends Function {
+ /**
+ * Holds if data can be copied from the argument, qualifier, or buffer
+ * represented by `input` to the return value or buffer represented by
+ * `output`
+ */
+ pragma[nomagic]
+ abstract predicate hasDataFlow(FunctionInput input, FunctionOutput output);
+}
diff --git a/repo-tests/codeql/cpp/ql/lib/semmle/code/cpp/models/interfaces/Deallocation.qll b/repo-tests/codeql/cpp/ql/lib/semmle/code/cpp/models/interfaces/Deallocation.qll
new file mode 100644
index 00000000000..9c74102e99c
--- /dev/null
+++ b/repo-tests/codeql/cpp/ql/lib/semmle/code/cpp/models/interfaces/Deallocation.qll
@@ -0,0 +1,48 @@
+/**
+ * Provides an abstract class for modeling functions and expressions that
+ * deallocate memory, such as the standard `free` function. To use this QL
+ * library, create one or more QL classes extending a class here with a
+ * characteristic predicate that selects the functions or expressions you are
+ * trying to model. Within that class, override the predicates provided
+ * by the abstract class to match the specifics of those functions or
+ * expressions. Finally, add a private import statement to `Models.qll`.
+ */
+
+import semmle.code.cpp.Function
+import semmle.code.cpp.models.Models
+
+/**
+ * A deallocation function such as `free`.
+ */
+abstract class DeallocationFunction extends Function {
+ /**
+ * Gets the index of the argument that is freed by this function.
+ */
+ int getFreedArg() { none() }
+}
+
+/**
+ * An deallocation expression such as call to `free` or a `delete` expression.
+ */
+abstract class DeallocationExpr extends Expr {
+ /**
+ * Gets the expression that is freed by this function.
+ */
+ Expr getFreedExpr() { none() }
+}
+
+/**
+ * An `operator delete` or `operator delete[]` function that may be associated
+ * with `delete` or `delete[]` expressions. Note that `delete` and `delete[]`
+ * are not function calls, but these functions may also be called directly.
+ */
+class OperatorDeleteDeallocationFunction extends DeallocationFunction {
+ OperatorDeleteDeallocationFunction() {
+ hasGlobalName([
+ "operator delete", // operator delete(pointer, ...)
+ "operator delete[]" // operator delete[](pointer, ...)
+ ])
+ }
+
+ override int getFreedArg() { result = 0 }
+}
diff --git a/repo-tests/codeql/cpp/ql/lib/semmle/code/cpp/models/interfaces/FlowSource.qll b/repo-tests/codeql/cpp/ql/lib/semmle/code/cpp/models/interfaces/FlowSource.qll
new file mode 100644
index 00000000000..d454257ea86
--- /dev/null
+++ b/repo-tests/codeql/cpp/ql/lib/semmle/code/cpp/models/interfaces/FlowSource.qll
@@ -0,0 +1,66 @@
+/**
+ * Provides classes for modeling functions that return data from (or send data to) potentially untrusted
+ * sources. To use this QL library, create a QL class extending `DataFlowFunction` with a
+ * characteristic predicate that selects the function or set of functions you
+ * are modeling. Within that class, override the predicates provided by
+ * `RemoteFlowSourceFunction` or `RemoteFlowSinkFunction` to match the flow within that function.
+ */
+
+import cpp
+import FunctionInputsAndOutputs
+import semmle.code.cpp.models.Models
+
+/**
+ * A library function that returns data that may be read from a network connection.
+ */
+abstract class RemoteFlowSourceFunction extends Function {
+ /**
+ * Holds if remote data described by `description` flows from `output` of a call to this function.
+ */
+ abstract predicate hasRemoteFlowSource(FunctionOutput output, string description);
+
+ /**
+ * Holds if remote data from this source comes from a socket described by
+ * `input`. There is no result if a socket is not specified.
+ */
+ predicate hasSocketInput(FunctionInput input) { none() }
+}
+
+/**
+ * DEPRECATED: Use `RemoteFlowSourceFunction` instead.
+ *
+ * A library function that returns data that may be read from a network connection.
+ */
+deprecated class RemoteFlowFunction = RemoteFlowSourceFunction;
+
+/**
+ * A library function that returns data that is directly controlled by a user.
+ */
+abstract class LocalFlowSourceFunction extends Function {
+ /**
+ * Holds if data described by `description` flows from `output` of a call to this function.
+ */
+ abstract predicate hasLocalFlowSource(FunctionOutput output, string description);
+}
+
+/**
+ * DEPRECATED: Use `LocalFlowSourceFunction` instead.
+ *
+ * A library function that returns data that is directly controlled by a user.
+ */
+deprecated class LocalFlowFunction = LocalFlowSourceFunction;
+
+/** A library function that sends data over a network connection. */
+abstract class RemoteFlowSinkFunction extends Function {
+ /**
+ * Holds if data described by `description` flows into `input` to a call to this function, and is then
+ * send over a network connection.
+ */
+ abstract predicate hasRemoteFlowSink(FunctionInput input, string description);
+
+ /**
+ * Holds if data put into this sink is transmitted through a socket described
+ * by `input`. There is no result if a socket is not specified.
+ */
+ predicate hasSocketInput(FunctionInput input) { none() }
+}
diff --git a/repo-tests/codeql/cpp/ql/lib/semmle/code/cpp/models/interfaces/FormattingFunction.qll b/repo-tests/codeql/cpp/ql/lib/semmle/code/cpp/models/interfaces/FormattingFunction.qll
new file mode 100644
index 00000000000..9630bb13e18
--- /dev/null
+++ b/repo-tests/codeql/cpp/ql/lib/semmle/code/cpp/models/interfaces/FormattingFunction.qll
@@ -0,0 +1,196 @@
+/**
+ * Provides a class for modeling `printf`-style formatting functions. To use
+ * this QL library, create a QL class extending `FormattingFunction` with a
+ * characteristic predicate that selects the function or set of functions you
+ * are modeling. Within that class, override the predicates provided by
+ * `FormattingFunction` to match the flow within that function.
+ */
+
+import semmle.code.cpp.models.interfaces.ArrayFunction
+import semmle.code.cpp.models.interfaces.Taint
+
+private Type stripTopLevelSpecifiersOnly(Type t) {
+ result = stripTopLevelSpecifiersOnly(t.(SpecifiedType).getBaseType())
+ or
+ result = t and
+ not t instanceof SpecifiedType
+}
+
+/**
+ * A type that is used as a format string by any formatting function.
+ */
+Type getAFormatterWideType() {
+ exists(FormattingFunction ff |
+ result = stripTopLevelSpecifiersOnly(ff.getFormatCharType()) and
+ result.getSize() != 1
+ )
+}
+
+/**
+ * A type that is used as a format string by any formatting function, or `wchar_t` if
+ * there is none.
+ */
+private Type getAFormatterWideTypeOrDefault() {
+ result = getAFormatterWideType()
+ or
+ not exists(getAFormatterWideType()) and
+ result instanceof Wchar_t
+}
+
+/**
+ * A standard library function that uses a `printf`-like formatting string.
+ */
+abstract class FormattingFunction extends ArrayFunction, TaintFunction {
+ /** Gets the position at which the format parameter occurs. */
+ abstract int getFormatParameterIndex();
+
+ override string getAPrimaryQlClass() { result = "FormattingFunction" }
+
+ /**
+ * Holds if this `FormattingFunction` is in a context that supports
+ * Microsoft rules and extensions.
+ */
+ predicate isMicrosoft() { anyFileCompiledAsMicrosoft() }
+
+ /**
+ * Holds if the default meaning of `%s` is a `wchar_t *`, rather than
+ * a `char *` (either way, `%S` will have the opposite meaning).
+ *
+ * DEPRECATED: Use getDefaultCharType() instead.
+ */
+ deprecated predicate isWideCharDefault() { none() }
+
+ /**
+ * Gets the character type used in the format string for this function.
+ */
+ Type getFormatCharType() {
+ result =
+ stripTopLevelSpecifiersOnly(stripTopLevelSpecifiersOnly(getParameter(getFormatParameterIndex())
+ .getType()
+ .getUnderlyingType()).(PointerType).getBaseType())
+ }
+
+ /**
+ * Gets the default character type expected for `%s` by this function. Typically
+ * `char` or `wchar_t`.
+ */
+ Type getDefaultCharType() {
+ isMicrosoft() and
+ result = getFormatCharType()
+ or
+ not isMicrosoft() and
+ result instanceof PlainCharType
+ }
+
+ /**
+ * Gets the non-default character type expected for `%S` by this function. Typically
+ * `wchar_t` or `char`. On some snapshots there may be multiple results where we can't tell
+ * which is correct for a particular function.
+ */
+ Type getNonDefaultCharType() {
+ getDefaultCharType().getSize() = 1 and
+ result = getWideCharType()
+ or
+ not getDefaultCharType().getSize() = 1 and
+ result instanceof PlainCharType
+ }
+
+ /**
+ * Gets the wide character type for this function. This is usually `wchar_t`. On some
+ * snapshots there may be multiple results where we can't tell which is correct for a
+ * particular function.
+ */
+ Type getWideCharType() {
+ result = getFormatCharType() and
+ result.getSize() > 1
+ or
+ not getFormatCharType().getSize() > 1 and
+ result = getAFormatterWideTypeOrDefault() // may have more than one result
+ }
+
+ /**
+ * Gets the position at which the output parameter, if any, occurs. If
+ * `isStream` is `true`, the output parameter is a stream (that is, this
+ * function behaves like `fprintf`). If `isStream` is `false`, the output
+ * parameter is a buffer (that is, this function behaves like `sprintf`).
+ */
+ int getOutputParameterIndex(boolean isStream) { none() }
+
+ /**
+ * Gets the position at which the output parameter, if any, occurs.
+ *
+ * DEPRECATED: use `getOutputParameterIndex(boolean isStream)` instead.
+ */
+ deprecated int getOutputParameterIndex() { result = getOutputParameterIndex(_) }
+
+ /**
+ * Holds if this function outputs to a global stream such as standard output,
+ * standard error or a system log. For example `printf`.
+ */
+ predicate isOutputGlobal() { none() }
+
+ /**
+ * Gets the position of the first format argument, corresponding with
+ * the first format specifier in the format string.
+ */
+ int getFirstFormatArgumentIndex() {
+ result = getNumberOfParameters() and
+ // the formatting function either has a definition in the snapshot, or all
+ // `DeclarationEntry`s agree on the number of parameters (otherwise we don't
+ // really know the correct number)
+ (
+ hasDefinition()
+ or
+ forall(FunctionDeclarationEntry fde | fde = getADeclarationEntry() |
+ result = fde.getNumberOfParameters()
+ )
+ )
+ }
+
+ /**
+ * Gets the position of the buffer size argument, if any.
+ */
+ int getSizeParameterIndex() { none() }
+
+ override predicate hasArrayWithNullTerminator(int bufParam) {
+ bufParam = getFormatParameterIndex()
+ }
+
+ override predicate hasArrayWithVariableSize(int bufParam, int countParam) {
+ bufParam = getOutputParameterIndex(false) and
+ countParam = getSizeParameterIndex()
+ }
+
+ override predicate hasArrayWithUnknownSize(int bufParam) {
+ bufParam = getOutputParameterIndex(false) and
+ not exists(getSizeParameterIndex())
+ }
+
+ override predicate hasArrayInput(int bufParam) { bufParam = getFormatParameterIndex() }
+
+ override predicate hasArrayOutput(int bufParam) { bufParam = getOutputParameterIndex(false) }
+
+ override predicate hasTaintFlow(FunctionInput input, FunctionOutput output) {
+ exists(int arg |
+ (
+ arg = getFormatParameterIndex() or
+ arg >= getFirstFormatArgumentIndex()
+ ) and
+ input.isParameterDeref(arg) and
+ output.isParameterDeref(getOutputParameterIndex(_))
+ )
+ }
+}
+
+/**
+ * The standard functions `snprintf` and `swprintf`, and their
+ * Microsoft and glib variants.
+ */
+abstract class Snprintf extends FormattingFunction {
+ /**
+ * Holds if this function returns the length of the formatted string
+ * that would have been output, regardless of the amount of space
+ * in the buffer.
+ */
+ predicate returnsFullFormatLength() { none() }
+}
diff --git a/repo-tests/codeql/cpp/ql/lib/semmle/code/cpp/models/interfaces/FunctionInputsAndOutputs.qll b/repo-tests/codeql/cpp/ql/lib/semmle/code/cpp/models/interfaces/FunctionInputsAndOutputs.qll
new file mode 100644
index 00000000000..4ab55ee5b3f
--- /dev/null
+++ b/repo-tests/codeql/cpp/ql/lib/semmle/code/cpp/models/interfaces/FunctionInputsAndOutputs.qll
@@ -0,0 +1,492 @@
+/**
+ * Provides a set of QL classes for indicating dataflows through a particular
+ * parameter, return value, or qualifier, as well as flows at one level of
+ * pointer indirection.
+ */
+
+import semmle.code.cpp.Parameter
+
+private newtype TFunctionInput =
+ TInParameter(ParameterIndex i) or
+ TInParameterDeref(ParameterIndex i) or
+ TInQualifierObject() or
+ TInQualifierAddress() or
+ TInReturnValueDeref()
+
+/**
+ * An input to a function. This can be:
+ * - The value of one of the function's parameters
+ * - The value pointed to by one of function's pointer or reference parameters
+ * - The value of the function's `this` pointer
+ * - The value pointed to by the function's `this` pointer
+ */
+class FunctionInput extends TFunctionInput {
+ abstract string toString();
+
+ /**
+ * Holds if this is the input value of the parameter with index `index`.
+ *
+ * Example:
+ * ```
+ * void func(int n, char* p, float& r);
+ * ```
+ * - `isParameter(0)` holds for the `FunctionInput` that represents the value of `n` (with type
+ * `int`) on entry to the function.
+ * - `isParameter(1)` holds for the `FunctionInput` that represents the value of `p` (with type
+ * `char*`) on entry to the function.
+ * - `isParameter(2)` holds for the `FunctionInput` that represents the "value" of the reference
+ * `r` (with type `float&`) on entry to the function, _not_ the value of the referred-to
+ * `float`.
+ */
+ predicate isParameter(ParameterIndex index) { none() }
+
+ /**
+ * Holds if this is the input value of the parameter with index `index`.
+ * DEPRECATED: Use `isParameter(index)` instead.
+ */
+ deprecated final predicate isInParameter(ParameterIndex index) { isParameter(index) }
+
+ /**
+ * Holds if this is the input value pointed to by a pointer parameter to a function, or the input
+ * value referred to by a reference parameter to a function, where the parameter has index
+ * `index`.
+ *
+ * Example:
+ * ```
+ * void func(int n, char* p, float& r);
+ * ```
+ * - `isParameterDeref(1)` holds for the `FunctionInput` that represents the value of `*p` (with
+ * type `char`) on entry to the function.
+ * - `isParameterDeref(2)` holds for the `FunctionInput` that represents the value of `r` (with type
+ * `float`) on entry to the function.
+ * - There is no `FunctionInput` for which `isParameterDeref(0)` holds, because `n` is neither a
+ * pointer nor a reference.
+ */
+ predicate isParameterDeref(ParameterIndex index) { none() }
+
+ /**
+ * Holds if this is the input value pointed to by a pointer parameter to a function, or the input
+ * value referred to by a reference parameter to a function, where the parameter has index
+ * `index`.
+ * DEPRECATED: Use `isParameterDeref(index)` instead.
+ */
+ deprecated final predicate isInParameterPointer(ParameterIndex index) { isParameterDeref(index) }
+
+ /**
+ * Holds if this is the input value pointed to by the `this` pointer of an instance member
+ * function.
+ *
+ * Example:
+ * ```
+ * struct C {
+ * void mfunc(int n, char* p, float& r) const;
+ * };
+ * ```
+ * - `isQualifierObject()` holds for the `FunctionInput` that represents the value of `*this`
+ * (with type `C const`) on entry to the function.
+ */
+ predicate isQualifierObject() { none() }
+
+ /**
+ * Holds if this is the input value pointed to by the `this` pointer of an instance member
+ * function.
+ * DEPRECATED: Use `isQualifierObject()` instead.
+ */
+ deprecated final predicate isInQualifier() { isQualifierObject() }
+
+ /**
+ * Holds if this is the input value of the `this` pointer of an instance member function.
+ *
+ * Example:
+ * ```
+ * struct C {
+ * void mfunc(int n, char* p, float& r) const;
+ * };
+ * ```
+ * - `isQualifierAddress()` holds for the `FunctionInput` that represents the value of `this`
+ * (with type `C const *`) on entry to the function.
+ */
+ predicate isQualifierAddress() { none() }
+
+ /**
+ * Holds if `i >= 0` and `isParameter(i)` holds for this value, or
+ * if `i = -1` and `isQualifierAddress()` holds for this value.
+ */
+ final predicate isParameterOrQualifierAddress(ParameterIndex i) {
+ i >= 0 and this.isParameter(i)
+ or
+ i = -1 and this.isQualifierAddress()
+ }
+
+ /**
+ * Holds if this is the input value pointed to by the return value of a
+ * function, if the function returns a pointer, or the input value referred
+ * to by the return value of a function, if the function returns a reference.
+ *
+ * Example:
+ * ```
+ * char* getPointer();
+ * float& getReference();
+ * int getInt();
+ * ```
+ * - `isReturnValueDeref()` holds for the `FunctionInput` that represents the
+ * value of `*getPointer()` (with type `char`).
+ * - `isReturnValueDeref()` holds for the `FunctionInput` that represents the
+ * value of `getReference()` (with type `float`).
+ * - There is no `FunctionInput` of `getInt()` for which
+ * `isReturnValueDeref()` holds because the return type of `getInt()` is
+ * neither a pointer nor a reference.
+ *
+ * Note that data flows in through function return values are relatively
+ * rare, but they do occur when a function returns a reference to itself,
+ * part of itself, or one of its other inputs.
+ */
+ predicate isReturnValueDeref() { none() }
+
+ /**
+ * Holds if `i >= 0` and `isParameterDeref(i)` holds for this value, or
+ * if `i = -1` and `isQualifierObject()` holds for this value.
+ */
+ final predicate isParameterDerefOrQualifierObject(ParameterIndex i) {
+ i >= 0 and this.isParameterDeref(i)
+ or
+ i = -1 and this.isQualifierObject()
+ }
+}
+
+/**
+ * The input value of a parameter.
+ *
+ * Example:
+ * ```
+ * void func(int n, char* p, float& r);
+ * ```
+ * - There is an `InParameter` representing the value of `n` (with type `int`) on entry to the
+ * function.
+ * - There is an `InParameter` representing the value of `p` (with type `char*`) on entry to the
+ * function.
+ * - There is an `InParameter` representing the "value" of the reference `r` (with type `float&`) on
+ * entry to the function, _not_ the value of the referred-to `float`.
+ */
+class InParameter extends FunctionInput, TInParameter {
+ ParameterIndex index;
+
+ InParameter() { this = TInParameter(index) }
+
+ override string toString() { result = "InParameter " + index.toString() }
+
+ /** Gets the zero-based index of the parameter. */
+ ParameterIndex getIndex() { result = index }
+
+ override predicate isParameter(ParameterIndex i) { i = index }
+}
+
+/**
+ * The input value pointed to by a pointer parameter to a function, or the input value referred to
+ * by a reference parameter to a function.
+ *
+ * Example:
+ * ```
+ * void func(int n, char* p, float& r);
+ * ```
+ * - There is an `InParameterDeref` with `getIndex() = 1` that represents the value of `*p` (with
+ * type `char`) on entry to the function.
+ * - There is an `InParameterDeref` with `getIndex() = 2` that represents the value of `r` (with
+ * type `float`) on entry to the function.
+ * - There is no `InParameterDeref` representing the value of `n`, because `n` is neither a pointer
+ * nor a reference.
+ */
+class InParameterDeref extends FunctionInput, TInParameterDeref {
+ ParameterIndex index;
+
+ InParameterDeref() { this = TInParameterDeref(index) }
+
+ override string toString() { result = "InParameterDeref " + index.toString() }
+
+ /** Gets the zero-based index of the parameter. */
+ ParameterIndex getIndex() { result = index }
+
+ override predicate isParameterDeref(ParameterIndex i) { i = index }
+}
+
+/**
+ * The input value pointed to by the `this` pointer of an instance member function.
+ *
+ * Example:
+ * ```
+ * struct C {
+ * void mfunc(int n, char* p, float& r) const;
+ * };
+ * ```
+ * - `InQualifierObject` represents the value of `*this` (with type `C const`) on entry to the
+ * function.
+ */
+class InQualifierObject extends FunctionInput, TInQualifierObject {
+ override string toString() { result = "InQualifierObject" }
+
+ override predicate isQualifierObject() { any() }
+}
+
+/**
+ * The input value of the `this` pointer of an instance member function.
+ *
+ * Example:
+ * ```
+ * struct C {
+ * void mfunc(int n, char* p, float& r) const;
+ * };
+ * ```
+ * - `InQualifierAddress` represents the value of `this` (with type `C const *`) on entry to the
+ * function.
+ */
+class InQualifierAddress extends FunctionInput, TInQualifierAddress {
+ override string toString() { result = "InQualifierAddress" }
+
+ override predicate isQualifierAddress() { any() }
+}
+
+/**
+ * The input value pointed to by the return value of a function, if the
+ * function returns a pointer, or the input value referred to by the return
+ * value of a function, if the function returns a reference.
+ *
+ * Example:
+ * ```
+ * char* getPointer();
+ * float& getReference();
+ * int getInt();
+ * ```
+ * - `InReturnValueDeref` represents the value of `*getPointer()` (with type
+ * `char`).
+ * - `InReturnValueDeref` represents the value of `getReference()` (with type
+ * `float`).
+ * - `InReturnValueDeref` does not represent the return value of `getInt()`
+ * because the return type of `getInt()` is neither a pointer nor a reference.
+ *
+ * Note that data flows in through function return values are relatively
+ * rare, but they do occur when a function returns a reference to itself,
+ * part of itself, or one of its other inputs.
+ */
+class InReturnValueDeref extends FunctionInput, TInReturnValueDeref {
+ override string toString() { result = "InReturnValueDeref" }
+
+ override predicate isReturnValueDeref() { any() }
+}
+
+private newtype TFunctionOutput =
+ TOutParameterDeref(ParameterIndex i) or
+ TOutQualifierObject() or
+ TOutReturnValue() or
+ TOutReturnValueDeref()
+
+/**
+ * An output from a function. This can be:
+ * - The value pointed to by one of function's pointer or reference parameters
+ * - The value pointed to by the function's `this` pointer
+ * - The function's return value
+ * - The value pointed to by the function's return value, if the return value is a pointer or
+ * reference
+ */
+class FunctionOutput extends TFunctionOutput {
+ abstract string toString();
+
+ /**
+ * Holds if this is the output value pointed to by a pointer parameter to a function, or the
+ * output value referred to by a reference parameter to a function, where the parameter has
+ * index `index`.
+ *
+ * Example:
+ * ```
+ * void func(int n, char* p, float& r);
+ * ```
+ * - `isParameterDeref(1)` holds for the `FunctionOutput` that represents the value of `*p` (with
+ * type `char`) on return from the function.
+ * - `isParameterDeref(2)` holds for the `FunctionOutput` that represents the value of `r` (with
+ * type `float`) on return from the function.
+ * - There is no `FunctionOutput` for which `isParameterDeref(0)` holds, because `n` is neither a
+ * pointer nor a reference.
+ */
+ predicate isParameterDeref(ParameterIndex i) { none() }
+
+ /**
+ * Holds if this is the output value pointed to by a pointer parameter to a function, or the
+ * output value referred to by a reference parameter to a function, where the parameter has
+ * index `index`.
+ * DEPRECATED: Use `isParameterDeref(index)` instead.
+ */
+ deprecated final predicate isOutParameterPointer(ParameterIndex index) { isParameterDeref(index) }
+
+ /**
+ * Holds if this is the output value pointed to by the `this` pointer of an instance member
+ * function.
+ *
+ * Example:
+ * ```
+ * struct C {
+ * void mfunc(int n, char* p, float& r);
+ * };
+ * ```
+ * - `isQualifierObject()` holds for the `FunctionOutput` that represents the value of `*this`
+ * (with type `C`) on return from the function.
+ */
+ predicate isQualifierObject() { none() }
+
+ /**
+ * Holds if this is the output value pointed to by the `this` pointer of an instance member
+ * function.
+ * DEPRECATED: Use `isQualifierObject()` instead.
+ */
+ deprecated final predicate isOutQualifier() { isQualifierObject() }
+
+ /**
+ * Holds if this is the value returned by a function.
+ *
+ * Example:
+ * ```
+ * int getInt();
+ * char* getPointer();
+ * float& getReference();
+ * ```
+ * - `isReturnValue()` holds for the `FunctionOutput` that represents the value returned by
+ * `getInt()` (with type `int`).
+ * - `isReturnValue()` holds for the `FunctionOutput` that represents the value returned by
+ * `getPointer()` (with type `char*`).
+ * - `isReturnValue()` holds for the `FunctionOutput` that represents the "value" of the reference
+ * returned by `getReference()` (with type `float&`), _not_ the value of the referred-to
+ * `float`.
+ */
+ predicate isReturnValue() { none() }
+
+ /**
+ * Holds if this is the value returned by a function.
+ * DEPRECATED: Use `isReturnValue()` instead.
+ */
+ deprecated final predicate isOutReturnValue() { isReturnValue() }
+
+ /**
+ * Holds if this is the output value pointed to by the return value of a function, if the function
+ * returns a pointer, or the output value referred to by the return value of a function, if the
+ * function returns a reference.
+ *
+ * Example:
+ * ```
+ * char* getPointer();
+ * float& getReference();
+ * int getInt();
+ * ```
+ * - `isReturnValueDeref()` holds for the `FunctionOutput` that represents the value of
+ * `*getPointer()` (with type `char`).
+ * - `isReturnValueDeref()` holds for the `FunctionOutput` that represents the value of
+ * `getReference()` (with type `float`).
+ * - There is no `FunctionOutput` of `getInt()` for which `isReturnValueDeref()` holds because the
+ * return type of `getInt()` is neither a pointer nor a reference.
+ */
+ predicate isReturnValueDeref() { none() }
+
+ /**
+ * Holds if this is the output value pointed to by the return value of a function, if the function
+ * returns a pointer, or the output value referred to by the return value of a function, if the
+ * function returns a reference.
+ * DEPRECATED: Use `isReturnValueDeref()` instead.
+ */
+ deprecated final predicate isOutReturnPointer() { isReturnValueDeref() }
+
+ /**
+ * Holds if `i >= 0` and `isParameterDeref(i)` holds for this is the value, or
+ * if `i = -1` and `isQualifierObject()` holds for this value.
+ */
+ final predicate isParameterDerefOrQualifierObject(ParameterIndex i) {
+ i >= 0 and this.isParameterDeref(i)
+ or
+ i = -1 and this.isQualifierObject()
+ }
+}
+
+/**
+ * The output value pointed to by a pointer parameter to a function, or the output value referred to
+ * by a reference parameter to a function.
+ *
+ * Example:
+ * ```
+ * void func(int n, char* p, float& r);
+ * ```
+ * - There is an `OutParameterDeref` with `getIndex()=1` that represents the value of `*p` (with
+ * type `char`) on return from the function.
+ * - There is an `OutParameterDeref` with `getIndex()=2` that represents the value of `r` (with
+ * type `float`) on return from the function.
+ * - There is no `OutParameterDeref` representing the value of `n`, because `n` is neither a
+ * pointer nor a reference.
+ */
+class OutParameterDeref extends FunctionOutput, TOutParameterDeref {
+ ParameterIndex index;
+
+ OutParameterDeref() { this = TOutParameterDeref(index) }
+
+ override string toString() { result = "OutParameterDeref " + index.toString() }
+
+ ParameterIndex getIndex() { result = index }
+
+ override predicate isParameterDeref(ParameterIndex i) { i = index }
+}
+
+/**
+ * The output value pointed to by the `this` pointer of an instance member function.
+ *
+ * Example:
+ * ```
+ * struct C {
+ * void mfunc(int n, char* p, float& r);
+ * };
+ * ```
+ * - The `OutQualifierObject` represents the value of `*this` (with type `C`) on return from the
+ * function.
+ */
+class OutQualifierObject extends FunctionOutput, TOutQualifierObject {
+ override string toString() { result = "OutQualifierObject" }
+
+ override predicate isQualifierObject() { any() }
+}
+
+/**
+ * The value returned by a function.
+ *
+ * Example:
+ * ```
+ * int getInt();
+ * char* getPointer();
+ * float& getReference();
+ * ```
+ * - `OutReturnValue` represents the value returned by
+ * `getInt()` (with type `int`).
+ * - `OutReturnValue` represents the value returned by
+ * `getPointer()` (with type `char*`).
+ * - `OutReturnValue` represents the "value" of the reference returned by `getReference()` (with
+ * type `float&`), _not_ the value of the referred-to `float`.
+ */
+class OutReturnValue extends FunctionOutput, TOutReturnValue {
+ override string toString() { result = "OutReturnValue" }
+
+ override predicate isReturnValue() { any() }
+}
+
+/**
+ * The output value pointed to by the return value of a function, if the function returns a pointer,
+ * or the output value referred to by the return value of a function, if the function returns a
+ * reference.
+ *
+ * Example:
+ * ```
+ * char* getPointer();
+ * float& getReference();
+ * int getInt();
+ * ```
+ * - `OutReturnValueDeref` represents the value of `*getPointer()` (with type `char`).
+ * - `OutReturnValueDeref` represents the value of `getReference()` (with type `float`).
+ * - `OutReturnValueDeref` does not represent the return value of `getInt()` because the return type
+ * of `getInt()` is neither a pointer nor a reference.
+ */
+class OutReturnValueDeref extends FunctionOutput, TOutReturnValueDeref {
+ override string toString() { result = "OutReturnValueDeref" }
+
+ override predicate isReturnValueDeref() { any() }
+}
diff --git a/repo-tests/codeql/cpp/ql/lib/semmle/code/cpp/models/interfaces/Iterator.qll b/repo-tests/codeql/cpp/ql/lib/semmle/code/cpp/models/interfaces/Iterator.qll
new file mode 100644
index 00000000000..9a260a33255
--- /dev/null
+++ b/repo-tests/codeql/cpp/ql/lib/semmle/code/cpp/models/interfaces/Iterator.qll
@@ -0,0 +1,33 @@
+/**
+ * Provides an abstract class for accurate modeling of flow through output
+ * iterators. To use this QL library, create a QL class extending
+ * `IteratorReferenceFunction` with a characteristic predicate that selects the
+ * function or set of functions you are modeling. Within that class, override
+ * the predicates provided by `AliasFunction` to match the flow within that
+ * function.
+ */
+
+import cpp
+import semmle.code.cpp.models.Models
+
+/**
+ * A function which takes an iterator argument and returns a reference that
+ * can be used to write to the iterator's underlying collection.
+ */
+abstract class IteratorReferenceFunction extends Function { }
+
+/**
+ * A function which takes a container and returns an iterator over that container.
+ */
+abstract class GetIteratorFunction extends Function {
+ /**
+ * Holds if the return value or buffer represented by `output` is an iterator over the container
+ * passed in the argument, qualifier, or buffer represented by `input`.
+ */
+ abstract predicate getsIterator(FunctionInput input, FunctionOutput output);
+}
+
+/**
+ * A type which can be used as an iterator.
+ */
+abstract class Iterator extends Type { }
diff --git a/repo-tests/codeql/cpp/ql/lib/semmle/code/cpp/models/interfaces/PointerWrapper.qll b/repo-tests/codeql/cpp/ql/lib/semmle/code/cpp/models/interfaces/PointerWrapper.qll
new file mode 100644
index 00000000000..8948aee424b
--- /dev/null
+++ b/repo-tests/codeql/cpp/ql/lib/semmle/code/cpp/models/interfaces/PointerWrapper.qll
@@ -0,0 +1,17 @@
+/** Provides classes for modeling pointer wrapper types and expressions. */
+
+private import cpp
+
+/** A class that wraps a pointer type. For example, `std::unique_ptr` and `std::shared_ptr`. */
+abstract class PointerWrapper extends Class {
+ /**
+ * Gets a member function of this class that returns the wrapped pointer, if any.
+ *
+ * This includes both functions that return the wrapped pointer by value, and functions
+ * that return a reference to the pointed-to object.
+ */
+ abstract MemberFunction getAnUnwrapperFunction();
+
+ /** Holds if the type of the data that is pointed to by this pointer wrapper is `const`. */
+ abstract predicate pointsToConst();
+}
diff --git a/repo-tests/codeql/cpp/ql/lib/semmle/code/cpp/models/interfaces/SideEffect.qll b/repo-tests/codeql/cpp/ql/lib/semmle/code/cpp/models/interfaces/SideEffect.qll
new file mode 100644
index 00000000000..3377db771a3
--- /dev/null
+++ b/repo-tests/codeql/cpp/ql/lib/semmle/code/cpp/models/interfaces/SideEffect.qll
@@ -0,0 +1,55 @@
+/**
+ * Provides an abstract class for accurate dataflow modeling of library
+ * functions when source code is not available. To use this QL library,
+ * create a QL class extending `SideEffectFunction` with a characteristic
+ * predicate that selects the function or set of functions you are modeling.
+ * Within that class, override the predicates provided by `SideEffectFunction`
+ * to match the flow within that function.
+ */
+
+import semmle.code.cpp.Function
+import semmle.code.cpp.models.Models
+import semmle.code.cpp.models.interfaces.FunctionInputsAndOutputs
+
+/**
+ * Models the side effects of a library function.
+ */
+abstract class SideEffectFunction extends Function {
+ /**
+ * Holds if the function never reads from memory that was defined before entry to the function.
+ * This memory could be from global variables, or from other memory that was reachable from a
+ * pointer that was passed into the function. Input side-effects, and reads from memory that
+ * cannot be visible to the caller (for example a buffer inside an I/O library) are not modeled
+ * here.
+ */
+ abstract predicate hasOnlySpecificReadSideEffects();
+
+ /**
+ * Holds if the function never writes to memory that remains allocated after the function
+ * returns. This memory could be from global variables, or from other memory that was reachable
+ * from a pointer that was passed into the function. Output side-effects, and writes to memory
+ * that cannot be visible to the caller (for example a buffer inside an I/O library) are not
+ * modeled here.
+ */
+ abstract predicate hasOnlySpecificWriteSideEffects();
+
+ /**
+ * Holds if the value pointed to by the parameter at index `i` is written to. `buffer` is true
+ * if the write may be at an offset. `mustWrite` is true if the write is unconditional.
+ */
+ predicate hasSpecificWriteSideEffect(ParameterIndex i, boolean buffer, boolean mustWrite) {
+ none()
+ }
+
+ /**
+ * Holds if the value pointed to by the parameter at index `i` is read from. `buffer` is true
+ * if the read may be at an offset.
+ */
+ predicate hasSpecificReadSideEffect(ParameterIndex i, boolean buffer) { none() }
+
+ /**
+ * Gets the index of the parameter that indicates the size of the buffer pointed to by the
+ * parameter at index `i`.
+ */
+ ParameterIndex getParameterSizeIndex(ParameterIndex i) { none() }
+}
diff --git a/repo-tests/codeql/cpp/ql/lib/semmle/code/cpp/models/interfaces/Sql.qll b/repo-tests/codeql/cpp/ql/lib/semmle/code/cpp/models/interfaces/Sql.qll
new file mode 100644
index 00000000000..7d5111c2488
--- /dev/null
+++ b/repo-tests/codeql/cpp/ql/lib/semmle/code/cpp/models/interfaces/Sql.qll
@@ -0,0 +1,30 @@
+/**
+ * Provides abstract classes for modeling functions that execute and escape SQL query strings.
+ * To extend this QL library, create a QL class extending `SqlExecutionFunction` or `SqlEscapeFunction`
+ * with a characteristic predicate that selects the function or set of functions you are modeling.
+ * Within that class, override the predicates provided by the class to match the way a
+ * parameter flows into the function and, in the case of `SqlEscapeFunction`, out of the function.
+ */
+
+private import cpp
+
+/**
+ * An abstract class that represents a function that executes an SQL query.
+ */
+abstract class SqlExecutionFunction extends Function {
+ /**
+ * Holds if `input` to this function represents SQL code to be executed.
+ */
+ abstract predicate hasSqlArgument(FunctionInput input);
+}
+
+/**
+ * An abstract class that represents a function that is a barrier to an SQL query string.
+ */
+abstract class SqlBarrierFunction extends Function {
+ /**
+ * Holds if the `output` is a barrier to the SQL input `input` such that is it safe to pass to
+ * an `SqlExecutionFunction`.
+ */
+ abstract predicate barrierSqlArgument(FunctionInput input, FunctionOutput output);
+}
diff --git a/repo-tests/codeql/cpp/ql/lib/semmle/code/cpp/models/interfaces/Taint.qll b/repo-tests/codeql/cpp/ql/lib/semmle/code/cpp/models/interfaces/Taint.qll
new file mode 100644
index 00000000000..05a5d9f1c28
--- /dev/null
+++ b/repo-tests/codeql/cpp/ql/lib/semmle/code/cpp/models/interfaces/Taint.qll
@@ -0,0 +1,33 @@
+/**
+ * Provides an abstract class for accurate taint modeling of library
+ * functions when source code is not available. To use this QL library,
+ * create a QL class extending `TaintFunction` with a characteristic predicate
+ * that selects the function or set of functions you are modeling. Within that
+ * class, override the predicates provided by `TaintFunction` to match the flow
+ * within that function.
+ */
+
+import semmle.code.cpp.Function
+import FunctionInputsAndOutputs
+import semmle.code.cpp.models.Models
+
+/**
+ * A library function for which a taint-tracking library should propagate taint
+ * from a parameter or qualifier to an output buffer, return value, or qualifier.
+ *
+ * An expression is tainted if it could be influenced by an attacker to have
+ * an unusual value.
+ *
+ * Note that this does not include direct copying of values; that is covered by
+ * DataFlowModel.qll. If a value is sometimes copied in full, and sometimes
+ * altered (for example copying a string with `strncpy`), this is also considered
+ * data flow.
+ */
+abstract class TaintFunction extends Function {
+ /**
+ * Holds if data passed into the argument, qualifier, or buffer represented by
+ * `input` influences the return value or buffer represented by `output`
+ */
+ pragma[nomagic]
+ abstract predicate hasTaintFlow(FunctionInput input, FunctionOutput output);
+}
diff --git a/repo-tests/codeql/cpp/ql/lib/semmle/code/cpp/padding/ConsistencyCheck.ql b/repo-tests/codeql/cpp/ql/lib/semmle/code/cpp/padding/ConsistencyCheck.ql
new file mode 100644
index 00000000000..ac4102e6750
--- /dev/null
+++ b/repo-tests/codeql/cpp/ql/lib/semmle/code/cpp/padding/ConsistencyCheck.ql
@@ -0,0 +1,35 @@
+/**
+ * @name Padding Consistency Check
+ * @description Performs consistency checks for the padding library. This query should have no results.
+ * @kind table
+ * @id cpp/padding-consistency-check
+ */
+
+import Padding
+
+/*
+ * Consistency-check: Find discrepancies between computed and actual size on LP64.
+ */
+
+/*
+ * from Type t, LP64 a, int padded, int bit, int real, MemberVariable v
+ * where padded = a.paddedSize(t) and bit = a.bitSize(t)
+ * and real = t.getSize() * 8 and padded != real and count(t.getSize()) = 1
+ * select t, a.paddedSize(t) as Padded, real, v, t.(PaddedType).memberSize(v, a)
+ */
+
+/*
+ * from PaddedType t, LP64 a, MemberVariable v
+ * where t instanceof Union and v = t.getAMember() and not exists(t.memberSize(v, a))
+ * select t, v, v.getType().explain()
+ */
+
+/*
+ * from PaddedType t, LP64 a, MemberVariable v
+ * where not exists(a.paddedSize(t))
+ * select t, t.fieldIndex(v) as i, v, t.memberSize(v, a) order by t, i
+ */
+
+from PaddedType t, LP64 a
+where a.wastedSpace(t) != 0
+select t, a.paddedSize(t) as size, a.wastedSpace(t) as waste order by waste desc
diff --git a/repo-tests/codeql/cpp/ql/lib/semmle/code/cpp/padding/Padding.qll b/repo-tests/codeql/cpp/ql/lib/semmle/code/cpp/padding/Padding.qll
new file mode 100644
index 00000000000..7446569451d
--- /dev/null
+++ b/repo-tests/codeql/cpp/ql/lib/semmle/code/cpp/padding/Padding.qll
@@ -0,0 +1,676 @@
+import cpp
+
+/**
+ * Align the specified offset up to the specified alignment boundary.
+ * The result is the smallest integer `i` such that `(i % alignment) = 0`
+ * and `(i >= offset)`.
+ */
+bindingset[offset, alignment]
+private int alignUp(int offset, int alignment) {
+ result = (offset.(float) / alignment).ceil() * alignment
+}
+
+private Type stripSpecifiers(Type t) {
+ result = t.getUnspecifiedType()
+ or
+ result = t and not exists(t.getUnspecifiedType())
+}
+
+/**
+ * A string that represents an architecture.
+ * An "architecture" defines the sizes of variable-sized types and
+ * the properties of alignment for fields of various
+ * types. Two are provided out-of-the-box: ILP32 and LP64,
+ * corresponding to gcc's behavior on x86 and amd64.
+ */
+abstract class Architecture extends string {
+ bindingset[this]
+ Architecture() { any() }
+
+ /** Gets the size of a pointer, in bits. */
+ abstract int pointerSize();
+
+ /** Gets the size of a `long int`, in bits. */
+ abstract int longSize();
+
+ /** Gets the size of a `long double`, in bits. */
+ abstract int longDoubleSize();
+
+ /** Gets the size of a `long long`, in bits. */
+ abstract int longLongSize();
+
+ /** Gets the size of a `wchar_t`, in bits. */
+ abstract int wideCharSize();
+
+ /** Gets the alignment boundary for doubles, in bits. */
+ abstract int doubleAlign();
+
+ /** Gets the alignment boundary for long doubles, in bits. */
+ abstract int longDoubleAlign();
+
+ /** Gets the alignment boundary for long longs, in bits. */
+ abstract int longLongAlign();
+
+ /**
+ * Holds if this architecture allow bitfields with declared types of different sizes
+ * to be packed together.
+ */
+ abstract predicate allowHeterogeneousBitfields();
+
+ /**
+ * Gets the bit size of class `cd.getBaseClass()` when used as a base class of
+ * class `cd.getDerivedClass()`.
+ */
+ abstract int baseClassSize(ClassDerivation cd);
+
+ /**
+ * Gets the bit size of type `t`. Only holds if `t` is an integral or enum type.
+ */
+ cached
+ int integralBitSize(Type t) {
+ t instanceof BoolType and result = 8
+ or
+ t instanceof CharType and result = 8
+ or
+ t instanceof WideCharType and result = wideCharSize()
+ or
+ t instanceof Char8Type and result = 8
+ or
+ t instanceof Char16Type and result = 16
+ or
+ t instanceof Char32Type and result = 32
+ or
+ t instanceof ShortType and result = 16
+ or
+ t instanceof IntType and result = 32
+ or
+ t instanceof LongType and result = longSize()
+ or
+ t instanceof LongLongType and result = longLongSize()
+ or
+ result = enumBitSize(t.(Enum))
+ or
+ result = integralBitSize(t.(SpecifiedType).getBaseType())
+ or
+ result = integralBitSize(t.(TypedefType).getBaseType())
+ }
+
+ /**
+ * Gets the bit size of enum type `e`.
+ */
+ int enumBitSize(Enum e) {
+ result = integralBitSize(e.getExplicitUnderlyingType())
+ or
+ not exists(e.getExplicitUnderlyingType()) and result = 32
+ }
+
+ /**
+ * Gets the alignment of enum type `e`.
+ */
+ int enumAlignment(Enum e) {
+ result = alignment(e.getExplicitUnderlyingType())
+ or
+ not exists(e.getExplicitUnderlyingType()) and result = 32
+ }
+
+ /**
+ * Gets the bit size of type `t`; that is, the number of bits a value
+ * with type `t` takes up on this architecture, without any trailing
+ * padding on structs and unions.
+ */
+ cached
+ int bitSize(Type t) {
+ result = integralBitSize(t)
+ or
+ t instanceof FloatType and result = 32
+ or
+ t instanceof DoubleType and result = 64
+ or
+ t instanceof LongDoubleType and result = longDoubleSize()
+ or
+ t instanceof PointerType and result = pointerSize()
+ or
+ t instanceof ReferenceType and result = pointerSize()
+ or
+ t instanceof FunctionPointerType and result = pointerSize()
+ or
+ result = bitSize(t.(SpecifiedType).getBaseType())
+ or
+ result = bitSize(t.(TypedefType).getBaseType())
+ or
+ exists(ArrayType array | array = t |
+ result = array.getArraySize() * paddedSize(array.getBaseType())
+ )
+ or
+ result = t.(PaddedType).typeBitSize(this)
+ }
+
+ /**
+ * Gets the desired alignment boundary of type `t` as a struct field
+ * on this architecture, in bits.
+ */
+ cached
+ int alignment(Type t) {
+ t instanceof BoolType and result = 8
+ or
+ t instanceof CharType and result = 8
+ or
+ t instanceof WideCharType and result = wideCharSize()
+ or
+ t instanceof Char8Type and result = 8
+ or
+ t instanceof Char16Type and result = 16
+ or
+ t instanceof Char32Type and result = 32
+ or
+ t instanceof ShortType and result = 16
+ or
+ t instanceof IntType and result = 32
+ or
+ t instanceof FloatType and result = 32
+ or
+ t instanceof DoubleType and result = doubleAlign()
+ or
+ t instanceof LongType and result = longSize()
+ or
+ t instanceof LongDoubleType and result = longDoubleAlign()
+ or
+ t instanceof LongLongType and result = longLongAlign()
+ or
+ t instanceof PointerType and result = pointerSize()
+ or
+ t instanceof FunctionPointerType and result = pointerSize()
+ or
+ t instanceof ReferenceType and result = pointerSize()
+ or
+ result = enumAlignment(t.(Enum))
+ or
+ result = alignment(t.(SpecifiedType).getBaseType())
+ or
+ result = alignment(t.(TypedefType).getBaseType())
+ or
+ result = alignment(t.(ArrayType).getBaseType())
+ or
+ result = t.(PaddedType).typeAlignment(this)
+ }
+
+ /**
+ * Gets the padded size of type `t` on this architecture; that is,
+ * the number of bits that 'sizeof' should return, taking into account
+ * any trailing padding on top of the bit size.
+ */
+ int paddedSize(Type t) {
+ exists(Type realType | realType = stripSpecifiers(t) |
+ if realType instanceof PaddedType
+ then result = realType.(PaddedType).paddedSize(this)
+ else result = bitSize(realType)
+ )
+ }
+
+ /**
+ * Gets the wasted space of type `t`; that is, the number of bits
+ * spent on padding. This is zero for primitive types, and depends on
+ * struct fields and their alignment otherwise. Trailing padding is
+ * counted.
+ */
+ int wastedSpace(Type t) {
+ if t instanceof PaddedType then result = t.(PaddedType).wastedSpace(this) else result = 0
+ }
+}
+
+/**
+ * Gets an initial field of type `t`. If `t` is not a union, an initial field is
+ * either the first field declared in type `t`, or an initial field of the type
+ * of the first field declared in `t`. If `t` is a union, an initial field is
+ * either any field declared in type `t`, or an initial field of the type of any
+ * field declared in `t`.
+ */
+private Field getAnInitialField(PaddedType t) {
+ if t instanceof Union
+ then
+ // Any field of the union is an initial field
+ result = t.getAField()
+ or
+ // Initial field of the type of a field of the union
+ result = getAnInitialField(t.getAField().getUnspecifiedType().(PaddedType))
+ else
+ exists(Field firstField | t.fieldIndex(firstField) = 1 |
+ // The first field of `t`
+ result = firstField
+ or
+ // Initial field of the first field of `t`
+ result = getAnInitialField(firstField.getUnspecifiedType().(PaddedType))
+ )
+}
+
+/**
+ * Base class for architectures that follow the Itanium ABI. This includes
+ * pretty much everything except Windows, so we'll refer to this as
+ * "UnixArchitecture" to avoid any confusion due to the use of the name
+ * "Itanium".
+ */
+abstract class UnixArchitecture extends Architecture {
+ bindingset[this]
+ UnixArchitecture() { any() }
+
+ override int baseClassSize(ClassDerivation cd) {
+ if
+ not exists(cd.getBaseClass().getABaseClass*().getAField()) and
+ not exists(PaddedType fieldType |
+ fieldType = getAnInitialField(cd.getDerivedClass()).getUnspecifiedType() and
+ (
+ // Check if the type of the field is a base type of the class, or
+ // vice versa. This is an approximation of the actual rule, which is
+ // that the field type and the class must not share a common
+ // ancestor. This approximation should be sufficient for the vast
+ // majority of cases.
+ fieldType.getABaseClass*() = cd.getBaseClass() or
+ fieldType = cd.getBaseClass().getABaseClass*()
+ )
+ )
+ then
+ // No fields in this class or any base classes.
+ result = 0
+ else result = cd.getBaseClass().(PaddedType).paddedSize(this)
+ }
+
+ override int longLongSize() { result = 64 }
+
+ override int wideCharSize() { result = 32 }
+
+ override predicate allowHeterogeneousBitfields() { any() }
+}
+
+/**
+ * The ILP32 architecture has ints, longs and pointers
+ * of 32 bits.
+ */
+class ILP32 extends UnixArchitecture {
+ ILP32() { this = "ILP32" }
+
+ override int pointerSize() { result = 32 }
+
+ override int longSize() { result = 32 }
+
+ override int longDoubleSize() { result = 96 }
+
+ override int doubleAlign() { result = 32 }
+
+ override int longLongAlign() { result = 32 }
+
+ override int longDoubleAlign() { result = 32 }
+}
+
+/**
+ * The LP64 architecture has longs and pointers of 64 bits.
+ */
+class LP64 extends UnixArchitecture {
+ LP64() { this = "LP64" }
+
+ override int pointerSize() { result = 64 }
+
+ override int longSize() { result = 64 }
+
+ override int longDoubleSize() { result = 128 }
+
+ override int doubleAlign() { result = 64 }
+
+ override int longLongAlign() { result = 64 }
+
+ override int longDoubleAlign() { result = 128 }
+}
+
+/**
+ * Base class for Windows architectures.
+ */
+abstract class WindowsArchitecture extends Architecture {
+ bindingset[this]
+ WindowsArchitecture() { any() }
+
+ override int baseClassSize(ClassDerivation cd) {
+ if not exists(cd.getBaseClass().getABaseClass*().getAField())
+ then
+ // No fields in this class or any base classes.
+ result = 0
+ else result = cd.getBaseClass().(PaddedType).paddedSize(this)
+ }
+
+ override int longSize() { result = 32 }
+
+ override int longDoubleSize() { result = 64 }
+
+ override int longLongSize() { result = 64 }
+
+ override int wideCharSize() { result = 16 }
+
+ override int doubleAlign() { result = 64 }
+
+ override int longLongAlign() { result = 64 }
+
+ override int longDoubleAlign() { result = 64 }
+
+ override predicate allowHeterogeneousBitfields() { none() }
+}
+
+/**
+ * The ILP32_MS architecture is essentially the same as the
+ * ILP32 architecture, except that long doubles are 64 bits.
+ */
+class ILP32_MS extends WindowsArchitecture {
+ ILP32_MS() { this = "ILP32_MS" }
+
+ override int pointerSize() { result = 32 }
+}
+
+/**
+ * The LLP64_MS architecture has pointers of 64 bits, but both
+ * long and int are still 32 bits.
+ */
+class LLP64_MS extends WindowsArchitecture {
+ LLP64_MS() { this = "LLP64_MS" }
+
+ override int pointerSize() { result = 64 }
+}
+
+/**
+ * A class that is subject to padding by the compiler, and hence can
+ * introduce waste. Does not include types with virtual member functions,
+ * virtual base classes, or multiple base classes. These are excluded due
+ * to the complexity of the implementation.
+ */
+class PaddedType extends Class {
+ PaddedType() {
+ // We can't talk about bit size of template types.
+ not this instanceof TemplateClass and
+ // If the class has any virtual functions, the layout will be more
+ // complicated due to the presence of a virtual function table pointer.
+ not exists(MemberFunction f | f = this.getAMemberFunction() and f.isVirtual()) and
+ not exists(ClassDerivation cd | cd = this.getADerivation() |
+ // If the class has any virtual functions, the layout will be more
+ // complicated due to the presence of a virtual base table pointer.
+ cd.hasSpecifier("virtual")
+ or
+ // If one of the base classes was not a PaddedType, then we should not
+ // attempt to lay out the derived class, either.
+ not cd.getBaseClass() instanceof PaddedType
+ ) and
+ // Support only single inheritance for now. If multiple inheritance is
+ // supported, be sure to fix up the calls to getABaseClass*() to correctly
+ // handle the presence of multiple base class subojects with the same type.
+ not exists(ClassDerivation cd | cd = this.getDerivation(1))
+ }
+
+ /**
+ * Holds if, for each architecture, a single padded size is
+ * calculated for this type.
+ * This is normally the case, but sometimes the same type can be
+ * defined in different compilations with different sizes, normally
+ * due to use of the preprocessor in its definition.
+ */
+ predicate isPrecise() { forex(Architecture arch | 1 = strictcount(arch.paddedSize(this))) }
+
+ /**
+ * Gets the padded size of this type on architecture `arch`, in bits.
+ * This is its `bitSize`, rounded up to the next multiple of its
+ * `alignment`.
+ */
+ int paddedSize(Architecture arch) {
+ // Struct padding is weird: It needs to be such that struct arrays
+ // can be allocated contiguously. That means that the trailing padding
+ // has to bring the alignment up to the smallest common multiple of
+ // the alignment values of all fields. In practice, since valid
+ // alignment values are 1, 2, 4, 8 and 16, this means "the largest
+ // alignment value".
+ // If the class is empty, the size is rounded up to one byte.
+ result = alignUp(arch.bitSize(this), arch.alignment(this)).maximum(8)
+ }
+
+ /**
+ * Gets the number of bits wasted by padding at the end of this
+ * struct.
+ */
+ int trailingPadding(Architecture arch) { result = paddedSize(arch) - arch.bitSize(this) }
+
+ /**
+ * Gets the number of bits wasted in this struct definition; that is.
+ * the waste between fields plus any waste from trailing padding.
+ * Only the space wasted directly in this type is counted, not space
+ * wasted in nested structs. Note that for unions, the wasted space
+ * is simply the amount of trailing padding, as other fields are not
+ * laid out one after another, and hence there is no padding between
+ * them.
+ */
+ int wastedSpace(Architecture arch) { result = arch.paddedSize(this) - dataSize(arch) }
+
+ /**
+ * Gets the total size of all fields declared in this class, not including any
+ * padding between fields.
+ */
+ private int fieldDataSize(Architecture arch) {
+ if this instanceof Union
+ then result = max(Field f | f = this.getAMember() | fieldSize(f, arch))
+ else result = sum(Field f | f = this.getAMember() | fieldSize(f, arch))
+ }
+
+ /**
+ * Gets the data size of this type on architecture `arch`; that is,
+ * the number of bits taken up by data, rather than any kind of
+ * padding. Padding of fields that have struct type is
+ * considered "data" for the purposes of this definition, since
+ * removing it requires reorganizing other parts of the code.
+ */
+ int dataSize(Architecture arch) {
+ result = sum(PaddedType c | c = this.getABaseClass*() | c.fieldDataSize(arch))
+ }
+
+ /**
+ * Gets the optimal size of this type on architecture `arch`, that is,
+ * the sum of the sizes of all fields, ignoring padding
+ * between them, but adding any trailing padding required to align
+ * the type properly. This is a lower bound on the actual size that
+ * can be achieved just by reordering fields, and without
+ * reorganizing member structs' field layouts.
+ */
+ int optimalSize(Architecture arch) {
+ result = alignUp(dataSize(arch), arch.alignment(this)).maximum(8)
+ }
+
+ /**
+ * Gets the bit size of this type on architecture `arch`, that is, the
+ * size its fields and required padding take up, without including
+ * any trailing padding that is necessary.
+ */
+ int typeBitSize(Architecture arch) {
+ if this instanceof Union
+ then
+ // A correct implementation for unions would be:
+ // ```
+ // result = max(fieldSize(_, arch))
+ // ```
+ // but that uses a recursive aggregate, which isn't supported in
+ // QL. We therefore use this slightly more complex implementation
+ // instead.
+ result = biggestFieldSizeUpTo(lastFieldIndex(), arch)
+ else
+ // If we're not a union type, the size is the padded
+ // sum of field sizes, padded.
+ result = fieldEnd(lastFieldIndex(), arch)
+ }
+
+ /**
+ * Gets the alignment, in bits, of the entire struct/union type for
+ * architecture `arch`.
+ */
+ language[monotonicAggregates]
+ int typeAlignment(Architecture arch) {
+ // The alignment of the type is the largest alignment of any of its fields,
+ // including fields from base class subobjects.
+ result =
+ max(PaddedType c |
+ c = this.getABaseClass*()
+ |
+ c.biggestAlignmentUpTo(c.lastFieldIndex(), arch)
+ )
+ }
+
+ /**
+ * Gets the largest size, in bits, of the size of a field with
+ * (1-based) index less than or equal to `index` on architecture
+ * `arch`.
+ */
+ int biggestFieldSizeUpTo(int index, Architecture arch) {
+ if index = 0
+ then result = 0
+ else
+ exists(Field f, int fSize | index = fieldIndex(f) and fSize = fieldSize(f, arch) |
+ result = fSize.maximum(biggestFieldSizeUpTo(index - 1, arch))
+ )
+ }
+
+ /**
+ * Gets the largest alignment boundary, in bits, required by a field
+ * with (1-based) index less than or equal to `index` on architecture
+ * `arch`.
+ */
+ int biggestAlignmentUpTo(int index, Architecture arch) {
+ if index = 0
+ then result = 1 // Minimum possible alignment
+ else
+ exists(Field f, int fAlign | index = fieldIndex(f) and fAlign = arch.alignment(f.getType()) |
+ result = fAlign.maximum(biggestAlignmentUpTo(index - 1, arch))
+ )
+ }
+
+ /**
+ * Gets the 1-based index for each field.
+ */
+ int fieldIndex(Field f) {
+ memberIndex(f) = rank[result](Field field, int index | memberIndex(field) = index | index)
+ }
+
+ private int memberIndex(Field f) { result = min(int i | getCanonicalMember(i) = f) }
+
+ /**
+ * Gets the 1-based index for the last field.
+ */
+ int lastFieldIndex() {
+ if exists(lastField())
+ then result = fieldIndex(lastField())
+ else
+ // Field indices are 1-based, so return 0 to represent the lack of fields.
+ result = 0
+ }
+
+ /**
+ * Gets the size, in bits, of field `f` on architecture
+ * `arch`.
+ */
+ int fieldSize(Field f, Architecture arch) {
+ exists(fieldIndex(f)) and
+ if f instanceof BitField
+ then result = f.(BitField).getNumBits()
+ else result = arch.paddedSize(f.getType())
+ }
+
+ /** Gets the last field of this type. */
+ Field lastField() { fieldIndex(result) = max(Field other | | fieldIndex(other)) }
+
+ /**
+ * Gets the offset, in bits, of the end of the class' last base class
+ * subobject, or zero if the class has no base classes.
+ */
+ int baseClassEnd(Architecture arch) {
+ if exists(getABaseClass()) then result = arch.baseClassSize(getADerivation()) else result = 0
+ }
+
+ /** Gets the bitfield at field index `index`, if that field is a bitfield. */
+ private BitField bitFieldAt(int index) { fieldIndex(result) = index }
+
+ /**
+ * Gets the 0-based offset, in bits, of the first free bit after
+ * field `f` (which is the `index`th field of
+ * this type), taking padding into account, on architecture `arch`.
+ */
+ int fieldEnd(int index, Architecture arch) {
+ if index = 0
+ then
+ // Base case: No fields seen yet, so return the offset of the end of the
+ // base class subojects.
+ result = baseClassEnd(arch)
+ else
+ exists(Field f | index = fieldIndex(f) |
+ exists(int fSize | fSize = fieldSize(f, arch) |
+ // Recursive case: Take previous field's end point, pad and add
+ // this field's size
+ exists(int firstFree | firstFree = fieldEnd(index - 1, arch) |
+ if f instanceof BitField
+ then
+ // Bitfield packing:
+ // (1) A struct containing a bitfield with declared type T (e.g. T bf : 7) will be aligned as if it
+ // contained an actual field of type T. Thus, a struct containing a bitfield 'unsigned int bf : 8'
+ // will have an alignment of at least alignof(unsigned int), even though the bitfield was only 8 bits.
+ // (2) If a bitfield with declared type T would straddle a sizeof(T) boundary, padding is inserted
+ // before the bitfield to align it on an alignof(T) boundary. Note the subtle distinction between alignof
+ // and sizeof. This matters for 32-bit Linux, where sizeof(long long) == 8, but alignof(long long) == 4.
+ // (3) [MSVC only!] If a bitfield with declared type T immediately follows another bitfield with declared type P,
+ // and sizeof(P) != sizeof(T), padding will be inserted to align the new bitfield to a boundary of
+ // max(alignof(P), alignof(T)).
+ exists(int nextSizeofBoundary, int nextAlignofBoundary |
+ nextSizeofBoundary = alignUp(firstFree, arch.bitSize(f.getType())) and
+ nextAlignofBoundary = alignUp(firstFree, arch.alignment(f.getType()))
+ |
+ if arch.allowHeterogeneousBitfields()
+ then (
+ if nextSizeofBoundary < (firstFree + fSize)
+ then
+ // Straddles a sizeof(T) boundary, so pad for alignment.
+ result = nextAlignofBoundary + fSize
+ else
+ // No additional restrictions, so just pack it in with no padding.
+ result = firstFree + fSize
+ ) else (
+ if exists(bitFieldAt(index - 1))
+ then
+ exists(BitField previousBitField | previousBitField = bitFieldAt(index - 1) |
+ // Previous field was a bitfield.
+ if
+ nextSizeofBoundary >= (firstFree + fSize) and
+ arch.integralBitSize(previousBitField.getType()) =
+ arch.integralBitSize(f.getType())
+ then
+ // The new bitfield can be stored in the same allocation unit as the previous one,
+ // so we can avoid padding.
+ result = firstFree + fSize
+ else
+ // Either we switched types, or we would overlap a sizeof(T) boundary, so we have to insert padding.
+ // Note that we have to align to max(alignof(T), alignof(P)), where P is the type of the previous
+ // bitfield. Without the alignof(P) term, we'll get the wrong layout for:
+ // struct S {
+ // unsigned int x : 7;
+ // unsigned short y : 1;
+ // };
+ // If we only aligned to sizeof(T), we'd align 'y' to a 2-byte boundary. This is incorrect. The allocation
+ // unit that started with 'x' has to consume an entire unsigned int (4 bytes).
+ result =
+ max(int boundary |
+ boundary = nextAlignofBoundary or
+ boundary =
+ alignUp(firstFree, arch.alignment(previousBitField.getType()))
+ |
+ boundary
+ ) + fSize
+ )
+ else
+ // Previous field was not a bitfield. Align up to an
+ // alignof(T) boundary.
+ result = nextSizeofBoundary + fSize
+ )
+ )
+ else
+ // Normal case: Pad as necessary, then add the field.
+ result = alignUp(firstFree, arch.alignment(f.getType())) + fSize
+ )
+ )
+ )
+ }
+}
diff --git a/repo-tests/codeql/cpp/ql/lib/semmle/code/cpp/pointsto/CallGraph.qll b/repo-tests/codeql/cpp/ql/lib/semmle/code/cpp/pointsto/CallGraph.qll
new file mode 100644
index 00000000000..b6b8fd6e60e
--- /dev/null
+++ b/repo-tests/codeql/cpp/ql/lib/semmle/code/cpp/pointsto/CallGraph.qll
@@ -0,0 +1,77 @@
+import cpp
+import semmle.code.cpp.pointsto.PointsTo
+
+// a TargetPointsToExpr has points-to information for expressions
+// that will help build an accurate call-graph (i.e. expressions
+// in function-pointer calls and qualifiers of virtual calls)
+class TargetPointsToExpr extends PointsToExpr {
+ override predicate interesting() {
+ exists(ExprCall ec | ec.getExpr() = this)
+ or
+ exists(Call c | c.getQualifier() = this)
+ or
+ exists(DeleteExpr d | d.getExpr() = this)
+ }
+
+ // resolve a virtual-call where this is the qualifier
+ VirtualFunction resolve() { pointstosets(this.resolveToSet(), unresolveElement(result)) }
+
+ int resolveToSet() {
+ exists(int cset, VirtualFunction static |
+ this.interesting() and
+ parentSetFor(cset, underlyingElement(this)) and
+ static = this.staticTarget() and
+ childrenByElement(cset, static, result)
+ )
+ }
+
+ VirtualFunction staticTarget() {
+ exists(Function f, DeleteExpr d |
+ f.calls(result, d) and
+ d.getExpr() = this
+ )
+ or
+ exists(Function f, FunctionCall c |
+ f.calls(result, c) and
+ c.getQualifier() = this
+ )
+ }
+}
+
+predicate resolvedCall(Call call, Function called) {
+ call.(FunctionCall).getTarget() = called
+ or
+ call.(DestructorCall).getTarget() = called
+ or
+ exists(ExprCall ec, TargetPointsToExpr pte |
+ ec = call and ec.getExpr() = pte and pte.pointsTo() = called
+ )
+ or
+ exists(TargetPointsToExpr pte |
+ call.getQualifier() = pte and
+ pte.resolve() = called
+ )
+}
+
+predicate ptrCalls(Function f, Function g) {
+ exists(ExprCall ec |
+ ec.getEnclosingFunction() = f and
+ ec.getExpr().(TargetPointsToExpr).pointsTo() = g
+ )
+}
+
+predicate virtualCalls(Function f, VirtualFunction g) {
+ exists(DeleteExpr d, TargetPointsToExpr ptexpr, VirtualFunction static |
+ f.calls(static, d) and
+ d.getExpr() = ptexpr and
+ ptexpr.resolve() = g
+ )
+ or
+ exists(Call c, TargetPointsToExpr ptexpr, VirtualFunction static |
+ f.calls(static, c) and
+ c.getQualifier() = ptexpr and
+ ptexpr.resolve() = g
+ )
+}
+
+predicate allCalls(Function f, Function g) { f.calls(g) or ptrCalls(f, g) or virtualCalls(f, g) }
diff --git a/repo-tests/codeql/cpp/ql/lib/semmle/code/cpp/pointsto/PointsTo.qll b/repo-tests/codeql/cpp/ql/lib/semmle/code/cpp/pointsto/PointsTo.qll
new file mode 100644
index 00000000000..a9802fad4b7
--- /dev/null
+++ b/repo-tests/codeql/cpp/ql/lib/semmle/code/cpp/pointsto/PointsTo.qll
@@ -0,0 +1,767 @@
+/**
+ * Provides classes and predicates implementing a points-to analysis
+ * based on Steensgaard's algorithm, extended to support fields.
+ *
+ * A pointer set can be represented in one of two ways: an expression, or
+ * the combination of an expression and a label. In the former case,
+ * the expression represents the values the expression might evaluate to.
+ * In the latter case, the (expr, label) pair is called a "compound", and it
+ * represents a field of the value with the name of the given label. The label
+ * can be either a string or another element.
+ *
+ * The various "flow" predicates (`flow`, `flowToCompound`, etc.) represent
+ * direct flow from a source set to a destination set. The various "pointer"
+ * predicates (`pointer`, `pointerFromCompound`, etc.) indicate that one set
+ * contains values pointing to the locations represented by the other set.
+ *
+ * The individual flow and pointer predicates only hold tuples describing
+ * one step of flow; they do not include transitive closures. The
+ * `pointstoinfo` predicate determines the transitively implied points-to
+ * information by collapsing pointers into equivalence classes. These
+ * equivalence classes are called "points-to sets".
+ */
+
+import semmle.code.cpp.commons.File
+import semmle.code.cpp.exprs.Expr
+
+/**
+ * Holds if `actual` is the override of `resolved` for a value of type
+ * `dynamic`.
+ */
+predicate resolve(Class dynamic, VirtualFunction resolved, VirtualFunction actual) {
+ if resolved.getAnOverridingFunction*().getDeclaringType() = dynamic
+ then
+ actual = resolved.getAnOverridingFunction*() and
+ dynamic = actual.getDeclaringType()
+ else resolve(dynamic.getABaseClass(), resolved, actual)
+}
+
+/**
+ * Holds if `e` is evaluated just for its location. This includes
+ * expressions that are used in a reference expression (`&foo`),
+ * expressions that are used on the left side of an assignment,
+ * and some non-expression types such as `Initializer`.
+ *
+ * For expressions, this is similar to, but different than,
+ * `e.(Expr).isLValue()`, which holds if `e` *has* an address.
+ *
+ * This relation pervasively influences the interpretation of
+ * expressions throughout this module. An element evaluated for its
+ * lvalue is treated as evaluating to its location, not its value.
+ */
+predicate lvalue(Element e) {
+ exists(AssignExpr assign | assign.getLValue().getFullyConverted() = e)
+ or
+ exists(AddressOfExpr addof | e = addof.getOperand().getFullyConverted())
+ or
+ exists(FieldAccess fa |
+ fa.getQualifier().getFullyConverted() = e and
+ not pointerValue(e)
+ )
+ or
+ exists(Call c |
+ c.getQualifier().getFullyConverted() = e and
+ not pointerValue(e)
+ )
+ or
+ e.(Expr).getConversion() instanceof ArrayToPointerConversion
+ or
+ exists(ParenthesisExpr paren |
+ exprconv(unresolveElement(e), unresolveElement(paren)) and lvalue(paren)
+ )
+ or
+ exists(Cast c | lvalue(c) and e.(Expr).getConversion() = c)
+ or
+ exists(ReferenceToExpr toref | e.(Expr).getConversion() = toref)
+ or
+ // If f is a function-pointer, then the following two
+ // calls are equivalent: f() and (*f)()
+ exists(PointerDereferenceExpr deref |
+ e = deref and
+ deref.getUnderlyingType() instanceof FunctionPointerType
+ )
+ or
+ exists(Variable v |
+ e = v.getInitializer() and
+ v.getType().getUnderlyingType() instanceof Struct
+ )
+ or
+ exists(Variable v |
+ e = v.getInitializer() and
+ v.getType().getUnderlyingType() instanceof ArrayType
+ )
+ or
+ e instanceof AggregateLiteral
+}
+
+/**
+ * Gets an access for the value of `p` on line `line`.
+ */
+private VariableAccess param_rvalue_access_line(Parameter p, int line) {
+ p.getAnAccess() = result and
+ not lvalue(result) and
+ result.getLocation().getStartLine() = line
+}
+
+/**
+ * Gets an access for the value of `p`.
+ *
+ * The choice is arbitrary, and it doesn't matter if this returns more
+ * than one access, but we try to have few results to cut down the
+ * number of flow edges.
+ */
+private VariableAccess pick_rvalue_access(Parameter p) {
+ result = min(int line | | param_rvalue_access_line(p, line) order by line)
+}
+
+/**
+ * Holds if there is an access for the value of `p`.
+ *
+ * Usually we can just add a flow edge from a function argument to a
+ * value access of the corresponding parameter. If all accesses to the
+ * parameter are lvalues, however, we have to add a pointer edge from
+ * the parameter to the function argument. This is less precise, because
+ * it can equate more points-to sets.
+ */
+private predicate has_rvalue_access(Parameter p) {
+ exists(VariableAccess a | a = p.getAnAccess() | not lvalue(a))
+}
+
+/**
+ * Holds if `e` has a pointer type.
+ */
+predicate pointerValue(Expr e) {
+ exists(Type t |
+ t = e.getType().getUnderlyingType() and
+ (
+ t instanceof PointerType or
+ t instanceof ArrayType or
+ t instanceof ReferenceType
+ )
+ )
+}
+
+private predicate pointerEntity(@element src, @element dest) {
+ pointer(mkElement(src), mkElement(dest))
+}
+
+/**
+ * The source is a pointer to the destination.
+ */
+predicate pointer(Element src, Element dest) {
+ exists(Variable v |
+ not lvalue(dest) and
+ src = v and
+ (dest = v.getAnAccess() or dest = v.getInitializer())
+ )
+ or
+ exists(AssignExpr assign |
+ not lvalue(assign) and
+ src = assign.getLValue().getFullyConverted() and
+ dest = assign
+ )
+ or
+ exists(AssignExpr assign |
+ src = assign.getLValue().getFullyConverted() and
+ dest = assign.getRValue().getFullyConverted()
+ )
+ or
+ exists(FunctionCall c, Function f, Parameter p, int i |
+ p = f.getParameter(i) and
+ not has_rvalue_access(p) and
+ dest = c.getArgument(i).getFullyConverted() and
+ not f.isVirtual() and
+ src = p.getAnAccess() and
+ c.getTarget() = f
+ )
+ or
+ exists(PointerDereferenceExpr deref |
+ not lvalue(deref) and
+ src = deref.getOperand().getFullyConverted() and
+ dest = deref
+ )
+ or
+ exists(ArrayExpr ae |
+ not lvalue(dest) and
+ dest = ae and
+ src = ae.getArrayBase().getFullyConverted() and
+ pointerValue(src)
+ )
+ or
+ exists(ArrayExpr ae |
+ not lvalue(dest) and
+ dest = ae and
+ src = ae.getArrayOffset().getFullyConverted() and
+ pointerValue(src)
+ )
+ or
+ exists(ReferenceDereferenceExpr deref |
+ not lvalue(deref) and
+ dest = deref and
+ exprconv(unresolveElement(src), unresolveElement(deref))
+ )
+ or
+ exists(AggregateLiteral agg |
+ not lvalue(dest) and
+ agg.getType().getUnderlyingType() instanceof ArrayType and
+ src = agg and
+ dest = agg.getAChild().getFullyConverted()
+ )
+ or
+ // field points to constructor field initializer
+ exists(ConstructorFieldInit cfi |
+ dest = cfi and
+ src = cfi.getTarget() and
+ not lvalue(dest)
+ )
+ //
+ // add more cases here
+ //
+}
+
+private predicate flowEntity(@element src, @element dest) { flow(mkElement(src), mkElement(dest)) }
+
+/**
+ * The value held in the source flows to the value held in the destination.
+ */
+predicate flow(Element src, Element dest) {
+ exists(Variable v |
+ lvalue(dest) and
+ src = v and
+ (dest = v.getAnAccess() or dest = v.getInitializer())
+ )
+ or
+ exists(FunctionAccess fa | src = fa.getTarget() and dest = fa)
+ or
+ exists(AssignExpr assign |
+ lvalue(assign) and
+ src = assign.getLValue().getFullyConverted() and
+ dest = assign
+ )
+ or
+ exists(AddressOfExpr addof |
+ dest = addof and
+ src = addof.getOperand().getFullyConverted()
+ )
+ or
+ exists(FunctionCall c, Function f, int i |
+ not lvalue(dest) and
+ src = c.getArgument(i).getFullyConverted() and
+ not f.isVirtual() and
+ dest = pick_rvalue_access(f.getParameter(i)) and
+ c.getTarget() = f
+ )
+ or
+ exists(FunctionCall c, Function f, int i |
+ src = c.getArgument(i).getFullyConverted() and
+ not f.isVirtual() and
+ c.getTarget() = f and
+ i >= f.getNumberOfParameters() and
+ varArgRead(f, dest)
+ )
+ or
+ exists(FunctionCall c, Function f, ReturnStmt r |
+ c.getTarget() = f and
+ not f.isVirtual() and
+ r.getEnclosingFunction() = f and
+ src = r.getExpr().getFullyConverted() and
+ dest = c
+ )
+ or
+ exists(PointerDereferenceExpr deref |
+ lvalue(deref) and
+ src = deref.getAChild().getFullyConverted() and
+ dest = deref
+ )
+ or
+ exists(Variable v |
+ dest = v.getInitializer() and
+ src = v.getInitializer().getExpr().getFullyConverted()
+ )
+ or
+ exists(ArrayExpr ae |
+ lvalue(dest) and
+ dest = ae and
+ src = ae.getArrayBase().getFullyConverted() and
+ pointerValue(src)
+ )
+ or
+ exists(ArrayExpr ae |
+ lvalue(dest) and
+ dest = ae and
+ src = ae.getArrayOffset().getFullyConverted() and
+ pointerValue(src)
+ )
+ or
+ exists(Expr arg, BinaryArithmeticOperation binop |
+ dest = binop and
+ src = arg and
+ pointerValue(binop) and
+ pointerValue(arg) and
+ (
+ arg = binop.getLeftOperand().getFullyConverted() or
+ arg = binop.getRightOperand().getFullyConverted()
+ )
+ )
+ or
+ exists(Cast c | src = c.getExpr() and dest = c)
+ or
+ exists(ReferenceToExpr toref |
+ exprconv(unresolveElement(src), unresolveElement(toref)) and dest = toref
+ )
+ or
+ exists(ReferenceDereferenceExpr deref |
+ lvalue(deref) and
+ dest = deref and
+ exprconv(unresolveElement(src), unresolveElement(deref))
+ )
+ or
+ exists(ArrayToPointerConversion conv |
+ exprconv(unresolveElement(src), unresolveElement(conv)) and dest = conv
+ )
+ or
+ exists(ParenthesisExpr paren |
+ // these can appear on the LHS of an assignment
+ exprconv(unresolveElement(src), unresolveElement(paren)) and dest = paren
+ or
+ exprconv(unresolveElement(dest), unresolveElement(paren)) and src = paren
+ )
+ or
+ exists(ConditionalExpr cond |
+ dest = cond and
+ (
+ src = cond.getThen().getFullyConverted() or
+ src = cond.getElse().getFullyConverted()
+ )
+ )
+ or
+ exists(IncrementOperation inc |
+ dest = inc and
+ src = inc.getOperand().getFullyConverted()
+ )
+ or
+ exists(IncrementOperation dec |
+ dest = dec and
+ src = dec.getOperand().getFullyConverted()
+ )
+ or
+ exists(CommaExpr comma |
+ dest = comma and
+ src = comma.getRightOperand().getFullyConverted()
+ )
+ or
+ exists(ParenthesisExpr paren |
+ dest = paren and exprconv(unresolveElement(src), unresolveElement(paren))
+ )
+ or
+ // "vtable" for new-expressions
+ exists(NewExpr new | src = new and dest = new.getAllocatedType())
+ or
+ // "vtable" for class-typed variables
+ exists(Variable v, Class c | v.getType().getUnderlyingType() = c and src = v and dest = c)
+ or
+ exists(AggregateLiteral agg |
+ lvalue(dest) and
+ agg.getType().getUnderlyingType() instanceof ArrayType and
+ src = agg and
+ dest = agg.getAChild().getFullyConverted()
+ )
+ or
+ // contained expr -> constructor field initializer
+ exists(ConstructorFieldInit cfi |
+ src = cfi.getExpr().getFullyConverted() and
+ dest = cfi
+ )
+ //
+ // add more cases here
+ //
+}
+
+// Try to find the expression corresponding to the return value
+// of va_arg(...,...) - which is a macro.
+predicate varArgRead(Function f, Expr e) {
+ exists(Macro m, MacroInvocation mi |
+ m.getHead().matches("va\\_arg(%") and
+ mi.getMacro() = m and
+ e = mi.getAGeneratedElement() and
+ not e.getParent() = mi.getAGeneratedElement() and
+ e.getEnclosingFunction() = f
+ )
+}
+
+/**
+ * There is a flow from src to the compound (destParent, destLabel).
+ */
+predicate flowToCompound(Element destParent, string destLabel, Element src) {
+ exists(ExprCall call, int i |
+ src = call.getArgument(i).getFullyConverted() and
+ destParent = call.getExpr().getFullyConverted() and
+ if i < call.getType().(FunctionPointerType).getNumberOfParameters()
+ then destLabel = "+arg" + i.toString()
+ else destLabel = "+vararg"
+ )
+ or
+ exists(Function f, ReturnStmt ret |
+ f = ret.getEnclosingFunction() and
+ src = ret.getExpr().getFullyConverted() and
+ destLabel = "+ret" and
+ destParent = f
+ )
+ or
+ exists(AggregateLiteral agg, Struct s, int i |
+ destParent = agg and
+ lvalue(src) and
+ aggregateLiteralChild(agg, s, i, src) and
+ destLabel = s.getCanonicalMember(i).getName()
+ )
+ or
+ exists(FunctionCall c, Function f |
+ c.getTarget() = f and
+ not f.isVirtual() and
+ src = c.getQualifier().getFullyConverted() and
+ destParent = f and
+ destLabel = "+this"
+ )
+ or
+ exists(ConstructorCall c, Function f, Variable v |
+ c.getTarget() = f and
+ not f.isVirtual() and
+ v.getAnAssignedValue() = c and
+ src = v and
+ destParent = f and
+ destLabel = "+this"
+ )
+ or
+ exists(NewExpr ne, ConstructorCall c, Function f |
+ c.getTarget() = f and
+ not f.isVirtual() and
+ ne.getInitializer() = c and
+ src = ne and
+ destParent = f and
+ destLabel = "+this"
+ )
+ // in C, &s == &s.firstfield
+ // exists(FieldAccess fa, Field f |
+ // parent = fa.getQualifier().getFullyConverted() and src = parent and
+ // f = fa.getTarget() and not exists(f.previous()) and
+ // label = f.getName()
+ // )
+ //
+ // add more cases here
+ //
+}
+
+/**
+ * There is a flow from the compound (parent, label) to dest.
+ */
+predicate flowFromCompound(Element parent, string label, Element dest) {
+ exists(ExprCall call |
+ dest = call and label = "+ret" and parent = call.getExpr().getFullyConverted()
+ )
+ or
+ exists(Function f, int i |
+ dest = f.getParameter(i).getAnAccess() and
+ label = "+arg" + i.toString() and
+ parent = f
+ )
+ or
+ exists(Function f | parent = f and label = "+vararg" and varArgRead(f, dest))
+ or
+ exists(FieldAccess fa |
+ dest = fa and
+ parent = fa.getQualifier().getFullyConverted() and
+ label = fa.getTarget().getName() and
+ lvalue(dest)
+ )
+ or
+ exists(ThisExpr thisexpr |
+ dest = thisexpr and
+ label = "+this" and
+ parent = thisexpr.getEnclosingFunction()
+ )
+ //
+ // add more cases here
+ //
+}
+
+/**
+ * The values stored in src point to the compounds (destParent, destLabel).
+ */
+predicate pointerToCompound(Element destParent, string destLabel, Element src) {
+ none()
+ //
+ // add more cases here
+ //
+}
+
+/**
+ * The type of agg is s, and the expression initializing the ith member
+ * of s is child.
+ */
+pragma[noopt]
+predicate aggregateLiteralChild(AggregateLiteral agg, Struct s, int i, Expr child) {
+ // s = agg.getType().getUnderlyingType()
+ exists(Type t |
+ t = agg.getType() and
+ agg instanceof AggregateLiteral and
+ s = t.getUnderlyingType() and
+ s instanceof Struct
+ ) and
+ exists(Expr beforeConversion |
+ beforeConversion = agg.getChild(i) and
+ child = beforeConversion.getFullyConverted()
+ )
+}
+
+/**
+ * The compound (parent, label) holds pointers to dest.
+ */
+predicate pointerFromCompound(Element parent, string label, Element dest) {
+ exists(FieldAccess fa |
+ dest = fa and
+ parent = fa.getQualifier().getFullyConverted() and
+ label = fa.getTarget().getName() and
+ not lvalue(dest)
+ )
+ or
+ exists(AggregateLiteral agg, Struct s, int i |
+ parent = agg and
+ not lvalue(dest) and
+ aggregateLiteralChild(agg, s, i, dest) and
+ label = s.getCanonicalMember(i).getName()
+ )
+ //
+ // add more cases here
+ //
+}
+
+predicate virtualArg(Expr receiver, VirtualFunction called, string arglabel, Expr arg) {
+ exists(FunctionCall c, int i |
+ receiver = c.getQualifier().getFullyConverted() and
+ called = c.getTarget() and
+ called.isVirtual() and
+ arg = c.getArgument(i) and
+ i >= 0 and
+ if i < called.getNumberOfParameters()
+ then arglabel = "+arg" + i.toString()
+ else arglabel = "+vararg"
+ )
+}
+
+predicate virtualThis(Expr receiver, VirtualFunction called, string thislabel, Expr thisexpr) {
+ exists(FunctionCall c |
+ receiver = c.getQualifier().getFullyConverted() and
+ called = c.getTarget() and
+ thislabel = "+this" and
+ called.isVirtual() and
+ thisexpr = receiver
+ )
+}
+
+predicate virtualRet(Expr receiver, VirtualFunction called, string retlabel, FunctionCall c) {
+ receiver = c.getQualifier().getFullyConverted() and
+ called = c.getTarget() and
+ called.isVirtual() and
+ retlabel = "+ret"
+}
+
+private predicate compoundEdgeEntity(
+ @element parent, @element element, string label, @element other, int kind
+) {
+ compoundEdge(mkElement(parent), mkElement(element), label, mkElement(other), kind)
+}
+
+/**
+ * This relation combines all pointer and flow relations that
+ * go to or from a compound set.
+ *
+ * The "kind" of each tuple determines what relation the other
+ * four elements of the tuple indicate:
+ *
+ * 0 - flow from to other
+ * 1 - flow from other to
+ * 2 - pointer from to other
+ * 3 - pointer from other to
+ *
+ * 4 - flow from to other
+ * 5 - flow from other to
+ * 6 - flow from to other
+ * 7 - flow from other to
+ *
+ * 8 - flow from <,label> to other
+ * 9 - flow from other to <,label>
+ * 10 - pointer from <,label> to other
+ * 11 - pointer from other to <,label>
+ */
+predicate compoundEdge(Element parent, Element element, string label, Element other, int kind) {
+ flowFromCompound(parent, label, other) and element = parent and kind = 0
+ or
+ flowToCompound(parent, label, other) and element = parent and kind = 1
+ or
+ pointerFromCompound(parent, label, other) and element = parent and kind = 2
+ or
+ pointerToCompound(parent, label, other) and element = parent and kind = 3
+ or
+ resolve(parent, element, other) and label = "" and kind = 5
+ or
+ virtualRet(parent, element, label, other) and kind = 8
+ or
+ virtualArg(parent, element, label, other) and kind = 9
+ or
+ virtualThis(parent, element, label, other) and kind = 9
+}
+
+/**
+ * A summary of the points-to information for the program, computed by
+ * collapsing the various flow and pointer relations using the Java
+ * class PointsToCalculator. This relation combines several kinds of information;
+ * the different kinds are filtered out by several relations further
+ * in the file: pointstosets, setflow, children, childrenByElement,
+ * parentSetFor.
+ *
+ * The information represented by each tuple in the relation depends on
+ * the "label" element.
+ *
+ * If the label is the empty string, then the tuple describes membership of
+ * element "elem" in points-to set "ptset", and that children of the element
+ * are children of set "parent".
+ *
+ * If the label is "--flow--", then the tuple describes flow from the "parent"
+ * points-to set to the "ptset" points-to set.
+ *
+ * If the label is "--element--", then the tuple declares that the set "ptset" is
+ * a child of "parent", where the label of the child is "elem".
+ *
+ * In any other case, the tuple declares that set "ptset" is a child of
+ * "parent", where the label is "label".
+ */
+cached
+predicate pointstoinfo(int parent, @element elem, string label, int ptset) =
+ collapse(flowEntity/2, pointerEntity/2, compoundEdgeEntity/5, locationEntity/1)(parent, elem,
+ label, ptset)
+
+/**
+ * Which elements are in which points-to sets.
+ */
+cached
+predicate pointstosets(int ptset, @element elem) { pointstoinfo(_, elem, "", ptset) }
+
+/**
+ * The points-to set src flows to the points-to set dest.
+ * This relation is not transitively closed.
+ */
+predicate setflow(int src, int dest) { pointstoinfo(src, _, "--flow--", dest) }
+
+/**
+ * The points-to set parentset, when dereferenced using the
+ * given label, gives values in the points-to set childset.
+ */
+predicate children(int parentset, string label, int childset) {
+ pointstoinfo(parentset, _, label, childset) and
+ label != "" and
+ label != "--element--" and
+ label != "--flow--"
+}
+
+/**
+ * The same as children(), except that the label is an element.
+ */
+predicate childrenByElement(int parentset, Element label, int childset) {
+ pointstoinfo(parentset, unresolveElement(label), "--element--", childset)
+}
+
+/**
+ * The ID of the parent set for the given expression. Children
+ * of the given element should be looked up with children() and
+ * childrenByElement() using this ID.
+ */
+pragma[noopt]
+predicate parentSetFor(int cset, @element expr) {
+ exists(string s | s = "" and pointstoinfo(cset, expr, s, _))
+}
+
+private predicate locationEntity(@element location) { location(mkElement(location)) }
+
+/**
+ * Things that are elements of points-to sets.
+ */
+predicate location(Element location) {
+ location instanceof Variable or
+ location instanceof Function or
+ isAllocationExpr(location) or
+ fopenCall(location) or
+ allocateDescriptorCall(location)
+}
+
+/**
+ * A call to the Unix system function socket(2).
+ */
+predicate allocateDescriptorCall(FunctionCall fc) {
+ exists(string name |
+ name = "socket" and
+ fc.getTarget().hasGlobalName(name)
+ )
+}
+
+/**
+ * A points-to set that contains at least one interesting element, or
+ * flows to one that does.
+ */
+private int interestingSet() {
+ exists(PointsToExpr e |
+ e.interesting() and
+ pointstosets(result, unresolveElement(e))
+ )
+ or
+ setflow(result, interestingSet())
+}
+
+/**
+ * The elements that are either in the given points-to set, or
+ * which flow into it from another set. The results are restricted
+ * to sets which are interesting.
+ */
+cached
+predicate setlocations(int set, @element location) {
+ set = interestingSet() and
+ (
+ location(mkElement(location)) and pointstosets(set, location)
+ or
+ exists(int middle | setlocations(middle, location) and setflow(middle, set))
+ )
+}
+
+class PointsToExpr extends Expr {
+ /**
+ * This predicate is empty by default. It should be overridden and defined to
+ * include just those expressions for which points-to information is desired.
+ */
+ predicate interesting() { none() }
+
+ pragma[noopt]
+ Element pointsTo() {
+ this.interesting() and
+ exists(int set, @element thisEntity, @element resultEntity |
+ thisEntity = underlyingElement(this) and
+ pointstosets(set, thisEntity) and
+ setlocations(set, resultEntity) and
+ resultEntity = localUnresolveElement(result)
+ )
+ }
+
+ float confidence() { result = 1.0 / count(this.pointsTo()) }
+}
+
+// This is used above in a `pragma[noopt]` context, which prevents its
+// customary inlining. We materialise it explicitly here.
+private @element localUnresolveElement(Element e) { result = unresolveElement(e) }
+
+/**
+ * Holds if anything points to an element, that is, is equivalent to:
+ * ```
+ * exists(PointsToExpr e | e.pointsTo() = elem)
+ * ```
+ */
+predicate anythingPointsTo(Element elem) {
+ location(elem) and pointstosets(interestingSet(), unresolveElement(elem))
+}
diff --git a/repo-tests/codeql/cpp/ql/lib/semmle/code/cpp/rangeanalysis/NanAnalysis.qll b/repo-tests/codeql/cpp/ql/lib/semmle/code/cpp/rangeanalysis/NanAnalysis.qll
new file mode 100644
index 00000000000..d3042a4edc9
--- /dev/null
+++ b/repo-tests/codeql/cpp/ql/lib/semmle/code/cpp/rangeanalysis/NanAnalysis.qll
@@ -0,0 +1,44 @@
+/**
+ * Provides classes and predicates for recognizing floating point expressions which cannot be NaN.
+ */
+
+import cpp
+private import semmle.code.cpp.rangeanalysis.RangeSSA
+
+/**
+ * Holds if `guard` won't return the value `polarity` when either
+ * operand is NaN.
+ */
+predicate nanExcludingComparison(ComparisonOperation guard, boolean polarity) {
+ polarity = true and
+ (
+ guard instanceof LTExpr or
+ guard instanceof LEExpr or
+ guard instanceof GTExpr or
+ guard instanceof GEExpr or
+ guard instanceof EQExpr
+ )
+ or
+ polarity = false and
+ guard instanceof NEExpr
+}
+
+/**
+ * Holds if `v` is a use of an SSA definition in `def` which cannot be NaN,
+ * by virtue of the guard in `def`.
+ */
+private predicate excludesNan(RangeSsaDefinition def, VariableAccess v) {
+ exists(VariableAccess inCond, ComparisonOperation guard, boolean branch, StackVariable lsv |
+ def.isGuardPhi(lsv, inCond, guard, branch) and
+ v = def.getAUse(lsv) and
+ guard.getAnOperand() = inCond and
+ nanExcludingComparison(guard, branch)
+ )
+}
+
+/**
+ * A variable access which cannot be NaN.
+ */
+class NonNanVariableAccess extends VariableAccess {
+ NonNanVariableAccess() { excludesNan(_, this) }
+}
diff --git a/repo-tests/codeql/cpp/ql/lib/semmle/code/cpp/rangeanalysis/PointlessComparison.qll b/repo-tests/codeql/cpp/ql/lib/semmle/code/cpp/rangeanalysis/PointlessComparison.qll
new file mode 100644
index 00000000000..47289c7552b
--- /dev/null
+++ b/repo-tests/codeql/cpp/ql/lib/semmle/code/cpp/rangeanalysis/PointlessComparison.qll
@@ -0,0 +1,174 @@
+/**
+ * Provides utility predicates used by the pointless comparison queries.
+ */
+
+import cpp
+import semmle.code.cpp.rangeanalysis.SimpleRangeAnalysis
+
+/** Gets the lower bound of the fully converted expression. */
+private float lowerBoundFC(Expr expr) { result = lowerBound(expr.getFullyConverted()) }
+
+/** Gets the upper bound of the fully converted expression. */
+private float upperBoundFC(Expr expr) { result = upperBound(expr.getFullyConverted()) }
+
+/**
+ * Describes which side of a pointless comparison is known to be smaller.
+ */
+newtype SmallSide =
+ /**
+ * Represents that the left side of a pointless comparison is known to be smaller.
+ */
+ LeftIsSmaller() or
+ /**
+ * Represents that the right side of a pointless comparison is known to be smaller.
+ */
+ RightIsSmaller()
+
+/**
+ * Holds if `cmp` is a comparison operation in which the left hand
+ * argument (which is at most `left`) is always strictly less than the
+ * right hand argument (which is at least `right`), and `ss` is
+ * `LeftIsSmaller`.
+ *
+ * Note that the comparison operation could be any binary comparison
+ * operator, for example,`==`, `>`, or `<=`.
+ */
+private predicate alwaysLT(ComparisonOperation cmp, float left, float right, SmallSide ss) {
+ ss = LeftIsSmaller() and
+ left = upperBoundFC(cmp.getLeftOperand()) and
+ right = lowerBoundFC(cmp.getRightOperand()) and
+ left < right
+}
+
+/**
+ * Holds if `cmp` is a comparison operation in which the left hand
+ * argument (which is at most `left`) is always less than or equal to
+ * the right hand argument (which is at least `right`), and `ss` is
+ * `LeftIsSmaller`.
+ *
+ * Note that the comparison operation could be any binary comparison
+ * operator, for example,`==`, `>`, or `<=`.
+ */
+private predicate alwaysLE(ComparisonOperation cmp, float left, float right, SmallSide ss) {
+ ss = LeftIsSmaller() and
+ left = upperBoundFC(cmp.getLeftOperand()) and
+ right = lowerBoundFC(cmp.getRightOperand()) and
+ left <= right and
+ // Range analysis is not able to precisely represent large 64 bit numbers,
+ // because it stores the range as a `float`, which only has a 53 bit mantissa.
+ // For example, the number `2^64-1` is rounded to `2^64`. This means that we
+ // cannot trust the result if the numbers are large. Note: there is only
+ // a risk of a rounding error causing an incorrect result if `left == right`.
+ // If `left` is strictly less than `right` then there is enough of a gap
+ // that we don't need to worry about rounding errors.
+ left.ulp() <= 1
+}
+
+/**
+ * Holds if `cmp` is a comparison operation in which the left hand
+ * argument (which is at least `left`) is always strictly greater than
+ * the right hand argument (which is at most `right`), and `ss` is
+ * `RightIsSmaller`.
+ *
+ * Note that the comparison operation could be any binary comparison
+ * operator, for example,`==`, `>`, or `<=`.
+ */
+private predicate alwaysGT(ComparisonOperation cmp, float left, float right, SmallSide ss) {
+ ss = RightIsSmaller() and
+ left = lowerBoundFC(cmp.getLeftOperand()) and
+ right = upperBoundFC(cmp.getRightOperand()) and
+ left > right
+}
+
+/**
+ * Holds if `cmp` is a comparison operation in which the left hand
+ * argument (which is at least `left`) is always greater than or equal
+ * to the right hand argument (which is at most `right`), and `ss` is
+ * `RightIsSmaller`.
+ *
+ * Note that the comparison operation could be any binary comparison
+ * operator, for example,`==`, `>`, or `<=`.
+ */
+private predicate alwaysGE(ComparisonOperation cmp, float left, float right, SmallSide ss) {
+ ss = RightIsSmaller() and
+ left = lowerBoundFC(cmp.getLeftOperand()) and
+ right = upperBoundFC(cmp.getRightOperand()) and
+ left >= right and
+ // Range analysis is not able to precisely represent large 64 bit numbers,
+ // because it stores the range as a `float`, which only has a 53 bit mantissa.
+ // For example, the number 2^64-1 is rounded to 2^64. This means that we
+ // cannot trust the result if the numbers are large. Note: there is only
+ // a risk of a rounding error causing an incorrect result if `left == right`.
+ // If `left` is strictly less than `right` then there is enough of a gap
+ // that we don't need to worry about rounding errors.
+ left.ulp() <= 1
+}
+
+/**
+ * Holds if `cmp` is a comparison operation that always has the
+ * result `value`, and either
+ * * `ss` is `LeftIsSmaller`, and the left hand argument is always at
+ * most `left`, the right hand argument at least `right`, and `left`
+ * is less than or equal to `right`; or
+ * * `ss` is `RightIsSmaller`, and the left hand argument is always at
+ * least `left`, the right hand argument at most `right`, and `left`
+ * is greater than or equal to `right`.
+ *
+ * For example, if the comparison `x < y` is always true because
+ * `x <= 3` and `5 <= y` then
+ * `pointlessComparison(x < y, 3, 5, true, LeftIsSmaller)` holds.
+ *
+ * Similarly, if the comparison `x < y` is always false because `x >= 9`
+ * and `7 >= y` then
+ * `pointlessComparison(x < y, 9, 7, false, RightIsSmaller)` holds.
+ */
+predicate pointlessComparison(
+ ComparisonOperation cmp, float left, float right, boolean value, SmallSide ss
+) {
+ alwaysLT(cmp.(LTExpr), left, right, ss) and value = true
+ or
+ alwaysLE(cmp.(LEExpr), left, right, ss) and value = true
+ or
+ alwaysGT(cmp.(GTExpr), left, right, ss) and value = true
+ or
+ alwaysGE(cmp.(GEExpr), left, right, ss) and value = true
+ or
+ alwaysLT(cmp.(NEExpr), left, right, ss) and value = true
+ or
+ alwaysGT(cmp.(NEExpr), left, right, ss) and value = true
+ or
+ alwaysGE(cmp.(LTExpr), left, right, ss) and value = false
+ or
+ alwaysGT(cmp.(LEExpr), left, right, ss) and value = false
+ or
+ alwaysLE(cmp.(GTExpr), left, right, ss) and value = false
+ or
+ alwaysLT(cmp.(GEExpr), left, right, ss) and value = false
+ or
+ alwaysLT(cmp.(EQExpr), left, right, ss) and value = false
+ or
+ alwaysGT(cmp.(EQExpr), left, right, ss) and value = false
+}
+
+/**
+ * Holds if `cmp` is a pointless comparison (see `pointlessComparison`
+ * above) and `cmp` occurs in reachable code. The reason for excluding
+ * expressions that occur in unreachable code is that range analysis
+ * sometimes can deduce impossible ranges for them. For example:
+ *
+ * if (10 < x) {
+ * if (x < 5) {
+ * // Unreachable code
+ * return x; // x has an empty range: 10 < x && x < 5
+ * }
+ * }
+ */
+predicate reachablePointlessComparison(
+ ComparisonOperation cmp, float left, float right, boolean value, SmallSide ss
+) {
+ pointlessComparison(cmp, left, right, value, ss) and
+ // Reachable according to control flow analysis.
+ reachable(cmp) and
+ // Reachable according to range analysis.
+ not exprWithEmptyRange(cmp.getAChild+())
+}
diff --git a/repo-tests/codeql/cpp/ql/lib/semmle/code/cpp/rangeanalysis/RangeAnalysisUtils.qll b/repo-tests/codeql/cpp/ql/lib/semmle/code/cpp/rangeanalysis/RangeAnalysisUtils.qll
new file mode 100644
index 00000000000..410a39716dc
--- /dev/null
+++ b/repo-tests/codeql/cpp/ql/lib/semmle/code/cpp/rangeanalysis/RangeAnalysisUtils.qll
@@ -0,0 +1,478 @@
+import cpp
+
+/**
+ * Describes whether a relation is 'strict' (that is, a `<` or `>`
+ * relation) or 'non-strict' (a `<=` or `>=` relation).
+ */
+newtype RelationStrictness =
+ /**
+ * Represents that a relation is 'strict' (that is, a `<` or `>` relation).
+ */
+ Strict() or
+ /**
+ * Represents that a relation is 'non-strict' (that is, a `<=` or `>=` relation)
+ */
+ Nonstrict()
+
+/**
+ * Describes whether a relation is 'greater' (that is, a `>` or `>=`
+ * relation) or 'lesser' (a `<` or `<=` relation).
+ */
+newtype RelationDirection =
+ /**
+ * Represents that a relation is 'greater' (that is, a `>` or `>=` relation).
+ */
+ Greater() or
+ /**
+ * Represents that a relation is 'lesser' (that is, a `<` or `<=` relation).
+ */
+ Lesser()
+
+private RelationStrictness negateStrictness(RelationStrictness strict) {
+ strict = Strict() and result = Nonstrict()
+ or
+ strict = Nonstrict() and result = Strict()
+}
+
+private RelationDirection negateDirection(RelationDirection dir) {
+ dir = Greater() and result = Lesser()
+ or
+ dir = Lesser() and result = Greater()
+}
+
+/**
+ * Holds if `dir` is `Greater` (that is, a `>` or `>=` relation)
+ */
+boolean directionIsGreater(RelationDirection dir) {
+ dir = Greater() and result = true
+ or
+ dir = Lesser() and result = false
+}
+
+/**
+ * Holds if `dir` is `Lesser` (that is, a `<` or `<=` relation)
+ */
+boolean directionIsLesser(RelationDirection dir) {
+ dir = Greater() and result = false
+ or
+ dir = Lesser() and result = true
+}
+
+/**
+ * Holds if `rel` is a relational operation (`<`, `>`, `<=` or `>=`)
+ * with fully-converted children `lhs` and `rhs`, described by
+ * `dir` and `strict`.
+ *
+ * For example, if `rel` is `x < 5` then
+ * `relOp(rel, x, 5, Lesser(), Strict())` holds.
+ */
+private predicate relOp(
+ RelationalOperation rel, Expr lhs, Expr rhs, RelationDirection dir, RelationStrictness strict
+) {
+ lhs = rel.getLeftOperand().getFullyConverted() and
+ rhs = rel.getRightOperand().getFullyConverted() and
+ (
+ rel instanceof LTExpr and dir = Lesser() and strict = Strict()
+ or
+ rel instanceof LEExpr and dir = Lesser() and strict = Nonstrict()
+ or
+ rel instanceof GTExpr and dir = Greater() and strict = Strict()
+ or
+ rel instanceof GEExpr and dir = Greater() and strict = Nonstrict()
+ )
+}
+
+/**
+ * Holds if `rel` is a relational operation (`<`, `>`, `<=` or `>=`)
+ * with fully-converted children `a` and `b`, described by `dir` and `strict`.
+ *
+ * This allows for the relation to be either as written, or with its
+ * arguments reversed; for example, if `rel` is `x < 5` then both
+ * `relOpWithSwap(rel, x, 5, Lesser(), Strict())` and
+ * `relOpWithSwap(rel, 5, x, Greater(), Strict())` hold.
+ */
+predicate relOpWithSwap(
+ RelationalOperation rel, Expr a, Expr b, RelationDirection dir, RelationStrictness strict
+) {
+ relOp(rel, a, b, dir, strict) or
+ relOp(rel, b, a, negateDirection(dir), strict)
+}
+
+/**
+ * Holds if `rel` is a comparison operation (`<`, `>`, `<=` or `>=`)
+ * with fully-converted children `a` and `b`, described by `dir` and `strict`, with
+ * result `branch`.
+ *
+ * This allows for the relation to be either as written, or with its
+ * arguments reversed; for example, if `rel` is `x < 5` then
+ * `relOpWithSwapAndNegate(rel, x, 5, Lesser(), Strict(), true)`,
+ * `relOpWithSwapAndNegate(rel, 5, x, Greater(), Strict(), true)`,
+ * `relOpWithSwapAndNegate(rel, x, 5, Greater(), Nonstrict(), false)` and
+ * `relOpWithSwapAndNegate(rel, 5, x, Lesser(), Nonstrict(), false)` hold.
+ */
+predicate relOpWithSwapAndNegate(
+ RelationalOperation rel, Expr a, Expr b, RelationDirection dir, RelationStrictness strict,
+ boolean branch
+) {
+ relOpWithSwap(rel, a, b, dir, strict) and branch = true
+ or
+ relOpWithSwap(rel, a, b, negateDirection(dir), negateStrictness(strict)) and
+ branch = false
+}
+
+/**
+ * Holds if `cmp` is an equality operation (`==` or `!=`) with fully-converted
+ * children `lhs` and `rhs`, and `isEQ` is true if `cmp` is an
+ * `==` operation and false if it is an `!=` operation.
+ *
+ * For example, if `rel` is `x == 5` then
+ * `eqOpWithSwap(cmp, x, 5, true)` holds.
+ */
+private predicate eqOp(EqualityOperation cmp, Expr lhs, Expr rhs, boolean isEQ) {
+ lhs = cmp.getLeftOperand().getFullyConverted() and
+ rhs = cmp.getRightOperand().getFullyConverted() and
+ (
+ cmp instanceof EQExpr and isEQ = true
+ or
+ cmp instanceof NEExpr and isEQ = false
+ )
+}
+
+/**
+ * Holds if `cmp` is an equality operation (`==` or `!=`) with fully-converted
+ * operands `a` and `b`, and `isEQ` is true if `cmp` is an `==` operation and
+ * false if it is an `!=` operation.
+ *
+ * This allows for the equality to be either as written, or with its
+ * arguments reversed; for example, if `cmp` is `x == 5` then both
+ * `eqOpWithSwap(cmp, x, 5, true)` and
+ * `eqOpWithSwap(cmp, 5, x, true)` hold.
+ */
+private predicate eqOpWithSwap(EqualityOperation cmp, Expr a, Expr b, boolean isEQ) {
+ eqOp(cmp, a, b, isEQ) or
+ eqOp(cmp, b, a, isEQ)
+}
+
+/**
+ * Holds if `cmp` is an equality operation (`==` or `!=`) with fully-converted
+ * children `a` and `b`, `isEQ` is true if `cmp` is an `==` operation and
+ * false if it is an `!=` operation, and the result is `branch`.
+ *
+ * This allows for the comparison to be either as written, or with its
+ * arguments reversed; for example, if `cmp` is `x == 5` then
+ * `eqOpWithSwapAndNegate(cmp, x, 5, true, true)`,
+ * `eqOpWithSwapAndNegate(cmp, 5, x, true, true)`,
+ * `eqOpWithSwapAndNegate(cmp, x, 5, false, false)` and
+ * `eqOpWithSwapAndNegate(cmp, 5, x, false, false)` hold.
+ */
+predicate eqOpWithSwapAndNegate(EqualityOperation cmp, Expr a, Expr b, boolean isEQ, boolean branch) {
+ eqOpWithSwap(cmp, a, b, branch) and isEQ = true
+ or
+ eqOpWithSwap(cmp, a, b, branch.booleanNot()) and isEQ = false
+}
+
+/**
+ * Holds if `cmp` is an unconverted conversion of `a` to a Boolean that
+ * evalutes to `isEQ` iff `a` is 0.
+ *
+ * Note that `a` can be `cmp` itself or a conversion thereof.
+ */
+private predicate eqZero(Expr cmp, Expr a, boolean isEQ) {
+ // The `!a` expression tests `a` equal to zero when `a` is a number converted
+ // to a Boolean.
+ isEQ = true and
+ exists(Expr notOperand | notOperand = cmp.(NotExpr).getOperand().getFullyConverted() |
+ // In C++ code there will be a BoolConversion in `!myInt`
+ a = notOperand.(BoolConversion).getExpr()
+ or
+ // In C code there is no conversion since there was no bool type before C99
+ a = notOperand and
+ not a instanceof BoolConversion // avoid overlap with the case above
+ )
+ or
+ // The `(bool)a` expression tests `a` NOT equal to zero when `a` is a number
+ // converted to a Boolean. To avoid overlap with the case above, this case
+ // excludes conversions that are right below a `!`.
+ isEQ = false and
+ linearAccess(cmp, _, _, _) and
+ // This test for `isCondition` implies that `cmp` is unconverted and that the
+ // parent of `cfg` is not a `NotExpr` -- the CFG doesn't do branching from
+ // inside `NotExpr`.
+ cmp.isCondition() and
+ // The GNU two-operand conditional expression is not supported for the
+ // purpose of guards, but the value of the conditional expression itself is
+ // modeled in the range analysis.
+ not exists(ConditionalExpr cond | cmp = cond.getCondition() and cond.isTwoOperand()) and
+ (
+ // In C++ code there will be a BoolConversion in `if (myInt)`
+ a = cmp.getFullyConverted().(BoolConversion).getExpr()
+ or
+ // In C code there is no conversion since there was no bool type before C99
+ a = cmp.getFullyConverted() and
+ not a instanceof BoolConversion // avoid overlap with the case above
+ )
+}
+
+/**
+ * Holds if `branch` of `cmp` is taken when `a` compares `isEQ` to zero.
+ *
+ * Note that `a` can be `cmp` itself or a conversion thereof.
+ */
+predicate eqZeroWithNegate(Expr cmp, Expr a, boolean isEQ, boolean branch) {
+ // The comparison for _equality_ to zero is on the `true` branch when `cmp`
+ // compares equal to zero and on the `false` branch when `cmp` compares not
+ // equal to zero.
+ eqZero(cmp, a, branch) and isEQ = true
+ or
+ // The comparison for _inequality_ to zero is on the `false` branch when
+ // `cmp` compares equal to zero and on the `true` branch when `cmp` compares
+ // not equal to zero.
+ eqZero(cmp, a, branch.booleanNot()) and isEQ = false
+}
+
+/**
+ * Holds if `expr` is equivalent to `p*v + q`, where `p` is a non-zero
+ * number. This takes into account the associativity, commutativity and
+ * distributivity of arithmetic operations.
+ */
+predicate linearAccess(Expr expr, VariableAccess v, float p, float q) {
+ // Exclude 0 and NaN.
+ (p < 0 or p > 0) and
+ linearAccessImpl(expr, v, p, q)
+}
+
+/**
+ * Holds if `expr` is equivalent to `p*v + q`.
+ * This takes into account the associativity, commutativity and
+ * distributivity of arithmetic operations.
+ */
+private predicate linearAccessImpl(Expr expr, VariableAccess v, float p, float q) {
+ // Base case
+ expr = v and p = 1.0 and q = 0.0
+ or
+ expr.(ReferenceDereferenceExpr).getExpr() = v and p = 1.0 and q = 0.0
+ or
+ // a+(p*v+b) == p*v + (a+b)
+ exists(AddExpr addExpr, float a, float b |
+ addExpr.getLeftOperand().isConstant() and
+ a = addExpr.getLeftOperand().getFullyConverted().getValue().toFloat() and
+ linearAccess(addExpr.getRightOperand(), v, p, b) and
+ expr = addExpr and
+ q = a + b
+ )
+ or
+ // (p*v+a)+b == p*v + (a+b)
+ exists(AddExpr addExpr, float a, float b |
+ addExpr.getRightOperand().isConstant() and
+ b = addExpr.getRightOperand().getFullyConverted().getValue().toFloat() and
+ linearAccess(addExpr.getLeftOperand(), v, p, a) and
+ expr = addExpr and
+ q = a + b
+ )
+ or
+ // a-(m*v+b) == -m*v + (a-b)
+ exists(SubExpr subExpr, float a, float b, float m |
+ subExpr.getLeftOperand().isConstant() and
+ a = subExpr.getLeftOperand().getFullyConverted().getValue().toFloat() and
+ linearAccess(subExpr.getRightOperand(), v, m, b) and
+ expr = subExpr and
+ p = -m and
+ q = a - b
+ )
+ or
+ // (p*v+a)-b == p*v + (a-b)
+ exists(SubExpr subExpr, float a, float b |
+ subExpr.getRightOperand().isConstant() and
+ b = subExpr.getRightOperand().getFullyConverted().getValue().toFloat() and
+ linearAccess(subExpr.getLeftOperand(), v, p, a) and
+ expr = subExpr and
+ q = a - b
+ )
+ or
+ // +(p*v+q) == p*v + q
+ exists(UnaryPlusExpr unaryPlusExpr |
+ linearAccess(unaryPlusExpr.getOperand().getFullyConverted(), v, p, q) and
+ expr = unaryPlusExpr
+ )
+ or
+ // (larger_type)(p*v+q) == p*v + q
+ exists(Cast cast, ArithmeticType sourceType, ArithmeticType targetType |
+ linearAccess(cast.getExpr(), v, p, q) and
+ sourceType = cast.getExpr().getUnspecifiedType() and
+ targetType = cast.getUnspecifiedType() and
+ // This allows conversion between signed and unsigned, which is technically
+ // lossy but common enough that we'll just have to assume the user knows
+ // what they're doing.
+ targetType.getSize() >= sourceType.getSize() and
+ expr = cast
+ )
+ or
+ // (p*v+q) == p*v + q
+ exists(ParenthesisExpr paren |
+ linearAccess(paren.getExpr(), v, p, q) and
+ expr = paren
+ )
+ or
+ // -(a*v+b) == -a*v + (-b)
+ exists(UnaryMinusExpr unaryMinusExpr, float a, float b |
+ linearAccess(unaryMinusExpr.getOperand().getFullyConverted(), v, a, b) and
+ expr = unaryMinusExpr and
+ p = -a and
+ q = -b
+ )
+ or
+ // m*(a*v+b) == (m*a)*v + (m*b)
+ exists(MulExpr mulExpr, float a, float b, float m |
+ mulExpr.getLeftOperand().isConstant() and
+ m = mulExpr.getLeftOperand().getFullyConverted().getValue().toFloat() and
+ linearAccess(mulExpr.getRightOperand(), v, a, b) and
+ expr = mulExpr and
+ p = m * a and
+ q = m * b
+ )
+ or
+ // (a*v+b)*m == (m*a)*v + (m*b)
+ exists(MulExpr mulExpr, float a, float b, float m |
+ mulExpr.getRightOperand().isConstant() and
+ m = mulExpr.getRightOperand().getFullyConverted().getValue().toFloat() and
+ linearAccess(mulExpr.getLeftOperand(), v, a, b) and
+ expr = mulExpr and
+ p = m * a and
+ q = m * b
+ )
+}
+
+/**
+ * Holds if `guard` is a comparison operation (`<`, `<=`, `>`, `>=`,
+ * `==` or `!=`), one of its arguments is equivalent (up to
+ * associativity, commutativity and distributivity or the simple
+ * arithmetic operations) to `p*v + q` (for some `p` and `q`),
+ * `direction` describes whether `guard` give an upper or lower bound
+ * on `v`, and `branch` indicates which control-flow branch this
+ * bound is valid on.
+ *
+ * For example, if `guard` is `2*v + 3 > 10` then
+ * `cmpWithLinearBound(guard, v, Greater(), true)` and
+ * `cmpWithLinearBound(guard, v, Lesser(), false)` hold.
+ * If `guard` is `4 - v > 5` then
+ * `cmpWithLinearBound(guard, v, Lesser(), false)` and
+ * `cmpWithLinearBound(guard, v, Greater(), true)` hold.
+ *
+ * A more sophisticated predicate, such as `boundFromGuard`, is needed
+ * to compute an actual bound for `v`. This predicate can be used if
+ * you just want to check whether a variable is bounded, or to restrict
+ * a more expensive analysis to just guards that bound a variable.
+ */
+predicate cmpWithLinearBound(
+ ComparisonOperation guard, VariableAccess v,
+ RelationDirection direction, // Is this a lower or an upper bound?
+ boolean branch // Which control-flow branch is this bound valid on?
+) {
+ exists(Expr lhs, float p, RelationDirection dir |
+ linearAccess(lhs, v, p, _) and
+ relOpWithSwapAndNegate(guard, lhs, _, dir, _, branch) and
+ (
+ p > 0 and direction = dir
+ or
+ p < 0 and direction = negateDirection(dir)
+ )
+ )
+ or
+ exists(Expr lhs |
+ linearAccess(lhs, v, _, _) and
+ eqOpWithSwap(guard, lhs, _, branch)
+ )
+}
+
+/**
+ * Holds if `lb` and `ub` are the lower and upper bounds of the unspecified
+ * type `t`.
+ *
+ * For example, if `t` is a signed 32-bit type then holds if `lb` is
+ * `-2^31` and `ub` is `2^31 - 1`.
+ */
+private predicate typeBounds(ArithmeticType t, float lb, float ub) {
+ exists(IntegralType integralType, float limit |
+ integralType = t and limit = 2.pow(8 * integralType.getSize())
+ |
+ if integralType instanceof BoolType
+ then lb = 0 and ub = 1
+ else
+ if integralType.isSigned()
+ then (
+ lb = -(limit / 2) and ub = (limit / 2) - 1
+ ) else (
+ lb = 0 and ub = limit - 1
+ )
+ )
+ or
+ // This covers all floating point types. The range is (-Inf, +Inf).
+ t instanceof FloatingPointType and lb = -(1.0 / 0.0) and ub = 1.0 / 0.0
+}
+
+private Type stripReference(Type t) {
+ if t instanceof ReferenceType then result = t.(ReferenceType).getBaseType() else result = t
+}
+
+/** Gets the type used by range analysis for the given `StackVariable`. */
+Type getVariableRangeType(StackVariable v) { result = stripReference(v.getUnspecifiedType()) }
+
+/**
+ * Gets the lower bound for the unspecified type `t`.
+ *
+ * For example, if `t` is a signed 32-bit type then the result is
+ * `-2^31`.
+ */
+float typeLowerBound(Type t) { typeBounds(stripReference(t), result, _) }
+
+/**
+ * Gets the upper bound for the unspecified type `t`.
+ *
+ * For example, if `t` is a signed 32-bit type then the result is
+ * `2^31 - 1`.
+ */
+float typeUpperBound(Type t) { typeBounds(stripReference(t), _, result) }
+
+/**
+ * Gets the minimum value that this expression could represent, based on
+ * its type.
+ *
+ * For example, if `expr` has a signed 32-bit type then the result is
+ * `-2^31`.
+ *
+ * Note: Due to the way casts are represented, rather than calling
+ * `exprMinVal(expr)` you will normally want to call
+ * `exprMinVal(expr.getFullyConverted())`.
+ */
+float exprMinVal(Expr expr) { result = typeLowerBound(expr.getUnspecifiedType()) }
+
+/**
+ * Gets the maximum value that this expression could represent, based on
+ * its type.
+ *
+ * For example, if `expr` has a signed 32-bit type then the result is
+ * `2^31 - 1`.
+ *
+ * Note: Due to the way casts are represented, rather than calling
+ * `exprMaxVal(expr)` you will normally want to call
+ * `exprMaxVal(expr.getFullyConverted())`.
+ */
+float exprMaxVal(Expr expr) { result = typeUpperBound(expr.getUnspecifiedType()) }
+
+/**
+ * Gets the minimum value that this variable could represent, based on
+ * its type.
+ *
+ * For example, if `v` has a signed 32-bit type then the result is
+ * `-2^31`.
+ */
+float varMinVal(Variable v) { result = typeLowerBound(v.getUnspecifiedType()) }
+
+/**
+ * Gets the maximum value that this variable could represent, based on
+ * its type.
+ *
+ * For example, if `v` has a signed 32-bit type then the result is
+ * `2^31 - 1`.
+ */
+float varMaxVal(Variable v) { result = typeUpperBound(v.getUnspecifiedType()) }
diff --git a/repo-tests/codeql/cpp/ql/lib/semmle/code/cpp/rangeanalysis/RangeSSA.qll b/repo-tests/codeql/cpp/ql/lib/semmle/code/cpp/rangeanalysis/RangeSSA.qll
new file mode 100644
index 00000000000..bc66d9b2dd0
--- /dev/null
+++ b/repo-tests/codeql/cpp/ql/lib/semmle/code/cpp/rangeanalysis/RangeSSA.qll
@@ -0,0 +1,177 @@
+/**
+ * This library is a clone of semmle.code.cpp.controlflow.SSA, with
+ * only one difference: extra phi definitions are added after
+ * guards. For example:
+ * ```
+ * x = f();
+ * if (x < 10) {
+ * // Block 1
+ * ...
+ * } else {
+ * // Block 2
+ * ...
+ * }
+ * ```
+ * In standard SSA, basic blocks 1 and 2 do not need phi definitions
+ * for `x`, because they are dominated by the definition of `x` on the
+ * first line. In RangeSSA, however, we add phi definitions for `x` at
+ * the beginning of blocks 1 and 2. This is useful for range analysis
+ * because it enables us to deduce a more accurate range for `x` in the
+ * two branches of the if-statement.
+ */
+
+import cpp
+import semmle.code.cpp.controlflow.Dominance
+import semmle.code.cpp.controlflow.SSAUtils
+private import RangeAnalysisUtils
+
+/**
+ * The SSA logic comes in two versions: the standard SSA and range-analysis RangeSSA.
+ * This class provides the range-analysis SSA logic.
+ */
+library class RangeSSA extends SSAHelper {
+ RangeSSA() { this = 1 }
+
+ /**
+ * Add a phi node on the out-edge of a guard.
+ */
+ override predicate custom_phi_node(StackVariable v, BasicBlock b) {
+ guard_defn(v.getAnAccess(), _, b, _)
+ }
+}
+
+private predicate guard_defn(VariableAccess v, Expr guard, BasicBlock b, boolean branch) {
+ guardCondition(guard, v, branch) and
+ guardSuccessor(guard, branch, b)
+}
+
+private predicate guardCondition(Expr guard, VariableAccess v, boolean branch) {
+ exists(Expr lhs | linearAccess(lhs, v, _, _) |
+ relOpWithSwapAndNegate(guard, lhs, _, _, _, branch) or
+ eqOpWithSwapAndNegate(guard, lhs, _, _, branch) or
+ eqZeroWithNegate(guard, lhs, _, branch)
+ )
+}
+
+private predicate guardSuccessor(Expr guard, boolean branch, BasicBlock succ) {
+ branch = true and succ = guard.getATrueSuccessor()
+ or
+ branch = false and succ = guard.getAFalseSuccessor()
+}
+
+/**
+ * A definition of one or more SSA variables, including phi node
+ * definitions. An SSA variable is effectively the pair of a definition
+ * and the (non-SSA) variable that it defines. Note definitions and uses
+ * can be coincident, due to the presence of parameter definitions and phi
+ * nodes.
+ */
+class RangeSsaDefinition extends ControlFlowNodeBase {
+ RangeSsaDefinition() { exists(RangeSSA x | x.ssa_defn(_, this, _, _)) }
+
+ /**
+ * Gets a variable corresponding to a SSA StackVariable defined by
+ * this definition.
+ */
+ StackVariable getAVariable() { exists(RangeSSA x | x.ssa_defn(result, this, _, _)) }
+
+ /**
+ * A string representation of the SSA variable represented by the pair
+ * `(this, v)`.
+ */
+ string toString(StackVariable v) { exists(RangeSSA x | result = x.toString(this, v)) }
+
+ /** Gets a use of the SSA variable represented by the pair `(this, v)`. */
+ VariableAccess getAUse(StackVariable v) { exists(RangeSSA x | result = x.getAUse(this, v)) }
+
+ /** Gets the control flow node for this definition. */
+ ControlFlowNode getDefinition() { result = this }
+
+ /** Gets the basic block containing this definition. */
+ BasicBlock getBasicBlock() { result.contains(getDefinition()) }
+
+ /** Whether this definition is a phi node for variable `v`. */
+ predicate isPhiNode(StackVariable v) { exists(RangeSSA x | x.phi_node(v, this.(BasicBlock))) }
+
+ /**
+ * DEPRECATED: Use isGuardPhi/4 instead
+ * If this definition is a phi node corresponding to a guard,
+ * then return the variable access and the guard.
+ */
+ deprecated predicate isGuardPhi(VariableAccess va, Expr guard, boolean branch) {
+ guard_defn(va, guard, this, branch)
+ }
+
+ /**
+ * If this definition is a phi node corresponding to a guard,
+ * then return the variable guarded, the variable access and the guard.
+ */
+ predicate isGuardPhi(StackVariable v, VariableAccess va, Expr guard, boolean branch) {
+ guard_defn(va, guard, this, branch) and
+ va.getTarget() = v
+ }
+
+ /** Gets the primary location of this definition. */
+ Location getLocation() { result = this.(ControlFlowNode).getLocation() }
+
+ /** Whether this definition is from a parameter */
+ predicate definedByParameter(Parameter p) { this = p.getFunction().getEntryPoint() }
+
+ /** Gets a definition of `v` that is a phi input for this basic block. */
+ RangeSsaDefinition getAPhiInput(StackVariable v) {
+ this.isPhiNode(v) and
+ exists(BasicBlock pred |
+ pred = this.(BasicBlock).getAPredecessor() and
+ result.reachesEndOfBB(v, pred) and
+ // Suppose we have a CFG like this:
+ //
+ // 1: x_0 = ;
+ // 2: if () {
+ // 3: if (x_0 > 1) {
+ // 4: x_1 = phi(x_0);
+ // 5: }
+ // 6: }
+ // 7: x_2 = phi(x_0, x_1);
+ //
+ // The phi nodes on lines 4 and 7 are both guard phi nodes,
+ // because they have an incoming edge from the condition on
+ // line 3. Definition x_0 on line 1 should be considered a
+ // phi-input on line 7, but not on line 4. This is because
+ // the only CFG path from line 1 to line 4 goes through the
+ // condition on line 3, but there is a path from line 1 to
+ // line 7 which does not go through the condition. The logic
+ // below excludes definitions which can only reach guard phi
+ // nodes by going through the corresponding guard.
+ not exists(VariableAccess access |
+ pred.contains(access) and
+ this.isGuardPhi(v, access, _, _)
+ )
+ )
+ }
+
+ /** Gets the expression assigned to this SsaDefinition. */
+ Expr getDefiningValue(StackVariable v) {
+ exists(ControlFlowNode def | def = this.getDefinition() |
+ def = v.getInitializer().getExpr() and def = result
+ or
+ exists(AssignExpr assign |
+ def = assign and
+ assign.getLValue() = v.getAnAccess() and
+ result = assign.getRValue()
+ )
+ or
+ exists(AssignOperation assign |
+ def = assign and
+ assign.getLValue() = v.getAnAccess() and
+ result = assign
+ )
+ )
+ }
+
+ /**
+ * Holds if this definition of the variable `v` reached the end of the basic block `b`.
+ */
+ predicate reachesEndOfBB(StackVariable v, BasicBlock b) {
+ exists(RangeSSA x | x.ssaDefinitionReachesEndOfBB(v, this, b))
+ }
+}
diff --git a/repo-tests/codeql/cpp/ql/lib/semmle/code/cpp/rangeanalysis/SimpleRangeAnalysis.qll b/repo-tests/codeql/cpp/ql/lib/semmle/code/cpp/rangeanalysis/SimpleRangeAnalysis.qll
new file mode 100644
index 00000000000..289187d4301
--- /dev/null
+++ b/repo-tests/codeql/cpp/ql/lib/semmle/code/cpp/rangeanalysis/SimpleRangeAnalysis.qll
@@ -0,0 +1,1819 @@
+/**
+ * Simple range analysis library. Range analysis is usually done as an
+ * abstract interpretation over the lattice of range values. (A range is a
+ * pair, containing a lower and upper bound for the value.) The problem
+ * with this approach is that the lattice is very tall, which means it can
+ * take an extremely large number of iterations to find the least fixed
+ * point. This example illustrates the problem:
+ *
+ * int count = 0;
+ * for (; p; p = p->next) {
+ * count = count+1;
+ * }
+ *
+ * The range of 'count' is initially (0,0), then (0,1) on the second
+ * iteration, (0,2) on the third iteration, and so on until we eventually
+ * reach maxInt.
+ *
+ * This library uses a crude solution to the problem described above: if
+ * the upper (or lower) bound of an expression might depend recursively on
+ * itself then we round it up (down for lower bounds) to one of a fixed set
+ * of values, such as 0, 1, 2, 256, and +Inf. This limits the height of the
+ * lattice which ensures that the analysis will terminate in a reasonable
+ * amount of time. This solution is similar to the abstract interpretation
+ * technique known as 'widening', but it is less precise because we are
+ * unable to inspect the bounds from the previous iteration of the fixed
+ * point computation. For example, widening might be able to deduce that
+ * the lower bound is -11 but we would approximate it to -16.
+ *
+ * QL does not allow us to compute an aggregate over a recursive
+ * sub-expression, so we cannot compute the minimum lower bound and maximum
+ * upper bound during the recursive phase of the query. Instead, the
+ * recursive phase computes a set of lower bounds and a set of upper bounds
+ * for each expression. We compute the minimum lower bound and maximum
+ * upper bound after the recursion is finished. This is another reason why
+ * we need to limit the number of bounds per expression, because they will
+ * all be stored until the recursive phase is finished.
+ *
+ * The ranges are represented using a pair of floating point numbers. This
+ * is simpler than using integers because floating point numbers cannot
+ * overflow and wrap. It is also convenient because we can detect overflow
+ * and negative overflow by looking for bounds that are outside the range
+ * of the type.
+ */
+
+import cpp
+private import RangeAnalysisUtils
+private import experimental.semmle.code.cpp.models.interfaces.SimpleRangeAnalysisExpr
+private import experimental.semmle.code.cpp.models.interfaces.SimpleRangeAnalysisDefinition
+import RangeSSA
+import SimpleRangeAnalysisCached
+private import NanAnalysis
+
+/**
+ * This fixed set of lower bounds is used when the lower bounds of an
+ * expression are recursively defined. The inferred lower bound is rounded
+ * down to the nearest lower bound in the fixed set. This restricts the
+ * height of the lattice, which prevents the analysis from exploding.
+ *
+ * Note: these bounds were chosen fairly arbitrarily. Feel free to add more
+ * bounds to the set if it helps on specific examples and does not make
+ * performance dramatically worse on large codebases, such as libreoffice.
+ */
+private float wideningLowerBounds(ArithmeticType t) {
+ result = 2.0 or
+ result = 1.0 or
+ result = 0.0 or
+ result = -1.0 or
+ result = -2.0 or
+ result = -8.0 or
+ result = -16.0 or
+ result = -128.0 or
+ result = -256.0 or
+ result = -32768.0 or
+ result = -65536.0 or
+ result = typeLowerBound(t) or
+ result = -(1.0 / 0.0) // -Inf
+}
+
+/** See comment for `wideningLowerBounds`, above. */
+private float wideningUpperBounds(ArithmeticType t) {
+ result = -2.0 or
+ result = -1.0 or
+ result = 0.0 or
+ result = 1.0 or
+ result = 2.0 or
+ result = 7.0 or
+ result = 15.0 or
+ result = 127.0 or
+ result = 255.0 or
+ result = 32767.0 or
+ result = 65535.0 or
+ result = typeUpperBound(t) or
+ result = 1.0 / 0.0 // +Inf
+}
+
+/**
+ * Gets the value of the expression `e`, if it is a constant.
+ * This predicate also handles the case of constant variables initialized in different
+ * compilation units, which doesn't necessarily have a getValue() result from the extractor.
+ */
+private string getValue(Expr e) {
+ if exists(e.getValue())
+ then result = e.getValue()
+ else
+ /*
+ * It should be safe to propagate the initialization value to a variable if:
+ * The type of v is const, and
+ * The type of v is not volatile, and
+ * Either:
+ * v is a local/global variable, or
+ * v is a static member variable
+ */
+
+ exists(VariableAccess access, StaticStorageDurationVariable v |
+ not v.getUnderlyingType().isVolatile() and
+ v.getUnderlyingType().isConst() and
+ e = access and
+ v = access.getTarget() and
+ result = getValue(v.getAnAssignedValue())
+ )
+}
+
+/**
+ * A bitwise `&` expression in which both operands are unsigned, or are effectively
+ * unsigned due to being a non-negative constant.
+ */
+private class UnsignedBitwiseAndExpr extends BitwiseAndExpr {
+ UnsignedBitwiseAndExpr() {
+ (
+ getLeftOperand().getFullyConverted().getType().getUnderlyingType().(IntegralType).isUnsigned() or
+ getValue(getLeftOperand().getFullyConverted()).toInt() >= 0
+ ) and
+ (
+ getRightOperand()
+ .getFullyConverted()
+ .getType()
+ .getUnderlyingType()
+ .(IntegralType)
+ .isUnsigned() or
+ getValue(getRightOperand().getFullyConverted()).toInt() >= 0
+ )
+ }
+}
+
+/**
+ * Gets the floor of `v`, with additional logic to work around issues with
+ * large numbers.
+ */
+bindingset[v]
+float safeFloor(float v) {
+ // return the floor of v
+ v.abs() < 2.pow(31) and
+ result = v.floor()
+ or
+ // `floor()` doesn't work correctly on large numbers (since it returns an integer),
+ // so fall back to unrounded numbers at this scale.
+ not v.abs() < 2.pow(31) and
+ result = v
+}
+
+/** A `MulExpr` where exactly one operand is constant. */
+private class MulByConstantExpr extends MulExpr {
+ float constant;
+ Expr operand;
+
+ MulByConstantExpr() {
+ exists(Expr constantExpr |
+ this.hasOperands(constantExpr, operand) and
+ constant = getValue(constantExpr.getFullyConverted()).toFloat() and
+ not exists(getValue(operand.getFullyConverted()).toFloat())
+ )
+ }
+
+ /** Gets the value of the constant operand. */
+ float getConstant() { result = constant }
+
+ /** Gets the non-constant operand. */
+ Expr getOperand() { result = operand }
+}
+
+private class UnsignedMulExpr extends MulExpr {
+ UnsignedMulExpr() {
+ this.getType().(IntegralType).isUnsigned() and
+ // Avoid overlap. It should be slightly cheaper to analyze
+ // `MulByConstantExpr`.
+ not this instanceof MulByConstantExpr
+ }
+}
+
+/**
+ * Holds if `expr` is effectively a multiplication of `operand` with the
+ * positive constant `positive`.
+ */
+private predicate effectivelyMultipliesByPositive(Expr expr, Expr operand, float positive) {
+ operand = expr.(MulByConstantExpr).getOperand() and
+ positive = expr.(MulByConstantExpr).getConstant() and
+ positive >= 0.0 // includes positive zero
+ or
+ operand = expr.(UnaryPlusExpr).getOperand() and
+ positive = 1.0
+ or
+ operand = expr.(CommaExpr).getRightOperand() and
+ positive = 1.0
+ or
+ operand = expr.(StmtExpr).getResultExpr() and
+ positive = 1.0
+}
+
+/**
+ * Holds if `expr` is effectively a multiplication of `operand` with the
+ * negative constant `negative`.
+ */
+private predicate effectivelyMultipliesByNegative(Expr expr, Expr operand, float negative) {
+ operand = expr.(MulByConstantExpr).getOperand() and
+ negative = expr.(MulByConstantExpr).getConstant() and
+ negative < 0.0 // includes negative zero
+ or
+ operand = expr.(UnaryMinusExpr).getOperand() and
+ negative = -1.0
+}
+
+private class AssignMulByConstantExpr extends AssignMulExpr {
+ float constant;
+
+ AssignMulByConstantExpr() { constant = getValue(this.getRValue().getFullyConverted()).toFloat() }
+
+ float getConstant() { result = constant }
+}
+
+private class AssignMulByPositiveConstantExpr extends AssignMulByConstantExpr {
+ AssignMulByPositiveConstantExpr() { constant >= 0.0 }
+}
+
+private class AssignMulByNegativeConstantExpr extends AssignMulByConstantExpr {
+ AssignMulByNegativeConstantExpr() { constant < 0.0 }
+}
+
+private class UnsignedAssignMulExpr extends AssignMulExpr {
+ UnsignedAssignMulExpr() {
+ this.getType().(IntegralType).isUnsigned() and
+ // Avoid overlap. It should be slightly cheaper to analyze
+ // `AssignMulByConstantExpr`.
+ not this instanceof AssignMulByConstantExpr
+ }
+}
+
+/** Set of expressions which we know how to analyze. */
+private predicate analyzableExpr(Expr e) {
+ // The type of the expression must be arithmetic. We reuse the logic in
+ // `exprMinVal` to check this.
+ exists(exprMinVal(e)) and
+ (
+ exists(getValue(e).toFloat())
+ or
+ effectivelyMultipliesByPositive(e, _, _)
+ or
+ effectivelyMultipliesByNegative(e, _, _)
+ or
+ e instanceof MinExpr
+ or
+ e instanceof MaxExpr
+ or
+ e instanceof ConditionalExpr
+ or
+ e instanceof AddExpr
+ or
+ e instanceof SubExpr
+ or
+ e instanceof UnsignedMulExpr
+ or
+ e instanceof AssignExpr
+ or
+ e instanceof AssignAddExpr
+ or
+ e instanceof AssignSubExpr
+ or
+ e instanceof UnsignedAssignMulExpr
+ or
+ e instanceof AssignMulByConstantExpr
+ or
+ e instanceof CrementOperation
+ or
+ e instanceof RemExpr
+ or
+ // A conversion is analyzable, provided that its child has an arithmetic
+ // type. (Sometimes the child is a reference type, and so does not get
+ // any bounds.) Rather than checking whether the type of the child is
+ // arithmetic, we reuse the logic that is already encoded in
+ // `exprMinVal`.
+ exists(exprMinVal(e.(Conversion).getExpr()))
+ or
+ // Also allow variable accesses, provided that they have SSA
+ // information.
+ exists(RangeSsaDefinition def, StackVariable v | e = def.getAUse(v))
+ or
+ e instanceof UnsignedBitwiseAndExpr
+ or
+ // `>>` by a constant
+ exists(getValue(e.(RShiftExpr).getRightOperand()))
+ or
+ // A modeled expression for range analysis
+ e instanceof SimpleRangeAnalysisExpr
+ )
+}
+
+/**
+ * Set of definitions that this definition depends on. The transitive
+ * closure of this relation is used to detect definitions which are
+ * recursively defined, so that we can prevent the analysis from exploding.
+ *
+ * The structure of `defDependsOnDef` and its helper predicates matches the
+ * structure of `getDefLowerBoundsImpl` and
+ * `getDefUpperBoundsImpl`. Therefore, if changes are made to the structure
+ * of the main analysis algorithm then matching changes need to be made
+ * here.
+ */
+private predicate defDependsOnDef(
+ RangeSsaDefinition def, StackVariable v, RangeSsaDefinition srcDef, StackVariable srcVar
+) {
+ // Definitions with a defining value.
+ exists(Expr expr | assignmentDef(def, v, expr) | exprDependsOnDef(expr, srcDef, srcVar))
+ or
+ // Assignment operations with a defining value
+ exists(AssignOperation assignOp |
+ analyzableExpr(assignOp) and
+ def = assignOp and
+ def.getAVariable() = v and
+ exprDependsOnDef(assignOp, srcDef, srcVar)
+ )
+ or
+ exists(CrementOperation crem |
+ def = crem and
+ def.getAVariable() = v and
+ exprDependsOnDef(crem.getOperand(), srcDef, srcVar)
+ )
+ or
+ // Phi nodes.
+ phiDependsOnDef(def, v, srcDef, srcVar)
+ or
+ // Extensions
+ exists(Expr expr | def.(SimpleRangeAnalysisDefinition).dependsOnExpr(v, expr) |
+ exprDependsOnDef(expr, srcDef, srcVar)
+ )
+}
+
+/**
+ * Helper predicate for `defDependsOnDef`. This predicate matches
+ * the structure of `getLowerBoundsImpl` and `getUpperBoundsImpl`.
+ */
+private predicate exprDependsOnDef(Expr e, RangeSsaDefinition srcDef, StackVariable srcVar) {
+ exists(Expr operand |
+ effectivelyMultipliesByNegative(e, operand, _) and
+ exprDependsOnDef(operand, srcDef, srcVar)
+ )
+ or
+ exists(Expr operand |
+ effectivelyMultipliesByPositive(e, operand, _) and
+ exprDependsOnDef(operand, srcDef, srcVar)
+ )
+ or
+ exists(MinExpr minExpr | e = minExpr | exprDependsOnDef(minExpr.getAnOperand(), srcDef, srcVar))
+ or
+ exists(MaxExpr maxExpr | e = maxExpr | exprDependsOnDef(maxExpr.getAnOperand(), srcDef, srcVar))
+ or
+ exists(ConditionalExpr condExpr | e = condExpr |
+ exprDependsOnDef(condExpr.getAnOperand(), srcDef, srcVar)
+ )
+ or
+ exists(AddExpr addExpr | e = addExpr | exprDependsOnDef(addExpr.getAnOperand(), srcDef, srcVar))
+ or
+ exists(SubExpr subExpr | e = subExpr | exprDependsOnDef(subExpr.getAnOperand(), srcDef, srcVar))
+ or
+ exists(UnsignedMulExpr mulExpr | e = mulExpr |
+ exprDependsOnDef(mulExpr.getAnOperand(), srcDef, srcVar)
+ )
+ or
+ exists(AssignExpr addExpr | e = addExpr | exprDependsOnDef(addExpr.getRValue(), srcDef, srcVar))
+ or
+ exists(AssignAddExpr addExpr | e = addExpr |
+ exprDependsOnDef(addExpr.getAnOperand(), srcDef, srcVar)
+ )
+ or
+ exists(AssignSubExpr subExpr | e = subExpr |
+ exprDependsOnDef(subExpr.getAnOperand(), srcDef, srcVar)
+ )
+ or
+ exists(UnsignedAssignMulExpr mulExpr | e = mulExpr |
+ exprDependsOnDef(mulExpr.getAnOperand(), srcDef, srcVar)
+ )
+ or
+ exists(AssignMulByConstantExpr mulExpr | e = mulExpr |
+ exprDependsOnDef(mulExpr.getLValue(), srcDef, srcVar)
+ )
+ or
+ exists(CrementOperation crementExpr | e = crementExpr |
+ exprDependsOnDef(crementExpr.getOperand(), srcDef, srcVar)
+ )
+ or
+ exists(RemExpr remExpr | e = remExpr | exprDependsOnDef(remExpr.getAnOperand(), srcDef, srcVar))
+ or
+ exists(Conversion convExpr | e = convExpr | exprDependsOnDef(convExpr.getExpr(), srcDef, srcVar))
+ or
+ // unsigned `&`
+ exists(UnsignedBitwiseAndExpr andExpr |
+ andExpr = e and
+ exprDependsOnDef(andExpr.getAnOperand(), srcDef, srcVar)
+ )
+ or
+ // `>>` by a constant
+ exists(RShiftExpr rs |
+ rs = e and
+ exists(getValue(rs.getRightOperand())) and
+ exprDependsOnDef(rs.getLeftOperand(), srcDef, srcVar)
+ )
+ or
+ e = srcDef.getAUse(srcVar)
+ or
+ // A modeled expression for range analysis
+ exists(SimpleRangeAnalysisExpr rae | rae = e |
+ rae.dependsOnDef(srcDef, srcVar)
+ or
+ exists(Expr child |
+ rae.dependsOnChild(child) and
+ exprDependsOnDef(child, srcDef, srcVar)
+ )
+ )
+}
+
+/**
+ * Helper predicate for `defDependsOnDef`. This predicate matches
+ * the structure of `getPhiLowerBounds` and `getPhiUpperBounds`.
+ */
+private predicate phiDependsOnDef(
+ RangeSsaDefinition phi, StackVariable v, RangeSsaDefinition srcDef, StackVariable srcVar
+) {
+ exists(VariableAccess access, Expr guard | phi.isGuardPhi(v, access, guard, _) |
+ exprDependsOnDef(guard.(ComparisonOperation).getAnOperand(), srcDef, srcVar) or
+ exprDependsOnDef(access, srcDef, srcVar)
+ )
+ or
+ srcDef = phi.getAPhiInput(v) and srcVar = v
+}
+
+/** The transitive closure of `defDependsOnDef`. */
+private predicate defDependsOnDefTransitively(
+ RangeSsaDefinition def, StackVariable v, RangeSsaDefinition srcDef, StackVariable srcVar
+) {
+ defDependsOnDef(def, v, srcDef, srcVar)
+ or
+ exists(RangeSsaDefinition midDef, StackVariable midVar | defDependsOnDef(def, v, midDef, midVar) |
+ defDependsOnDefTransitively(midDef, midVar, srcDef, srcVar)
+ )
+}
+
+/** The set of definitions that depend recursively on themselves. */
+private predicate isRecursiveDef(RangeSsaDefinition def, StackVariable v) {
+ defDependsOnDefTransitively(def, v, def, v)
+}
+
+/**
+ * Holds if the bounds of `e` depend on a recursive definition, meaning that
+ * `e` is likely to have many candidate bounds during the main recursion.
+ */
+private predicate isRecursiveExpr(Expr e) {
+ exists(RangeSsaDefinition def, StackVariable v | exprDependsOnDef(e, def, v) |
+ isRecursiveDef(def, v)
+ )
+}
+
+/**
+ * Holds if `binop` is a binary operation that's likely to be assigned a
+ * quadratic (or more) number of candidate bounds during the analysis. This can
+ * happen when two conditions are satisfied:
+ * 1. It is likely there are many more candidate bounds for `binop` than for
+ * its operands. For example, the number of candidate bounds for `x + y`,
+ * denoted here nbounds(`x + y`), will be O(nbounds(`x`) * nbounds(`y`)).
+ * In contrast, nbounds(`b ? x : y`) is only O(nbounds(`x`) + nbounds(`y`)).
+ * 2. Both operands of `binop` are recursively determined and are therefore
+ * likely to have a large number of candidate bounds.
+ */
+private predicate isRecursiveBinary(BinaryOperation binop) {
+ (
+ binop instanceof UnsignedMulExpr
+ or
+ binop instanceof AddExpr
+ or
+ binop instanceof SubExpr
+ ) and
+ isRecursiveExpr(binop.getLeftOperand()) and
+ isRecursiveExpr(binop.getRightOperand())
+}
+
+/**
+ * We distinguish 3 kinds of RangeSsaDefinition:
+ *
+ * 1. Definitions with a defining value.
+ * For example: x = y+3 is a definition of x with defining value y+3.
+ *
+ * 2. Phi nodes: x3 = phi(x0,x1,x2)
+ *
+ * 3. Unanalyzable definitions.
+ * For example: a parameter is unanalyzable because we know nothing
+ * about its value.
+ *
+ * This predicate finds all the definitions in the first set.
+ */
+private predicate assignmentDef(RangeSsaDefinition def, StackVariable v, Expr expr) {
+ getVariableRangeType(v) instanceof ArithmeticType and
+ (
+ def = v.getInitializer().getExpr() and def = expr
+ or
+ exists(AssignExpr assign |
+ def = assign and
+ assign.getLValue() = v.getAnAccess() and
+ expr = assign.getRValue()
+ )
+ )
+}
+
+/** See comment above assignmentDef. */
+private predicate analyzableDef(RangeSsaDefinition def, StackVariable v) {
+ assignmentDef(def, v, _)
+ or
+ analyzableExpr(def.(AssignOperation)) and
+ v = def.getAVariable()
+ or
+ analyzableExpr(def.(CrementOperation)) and
+ v = def.getAVariable()
+ or
+ phiDependsOnDef(def, v, _, _)
+ or
+ // A modeled def for range analysis
+ def.(SimpleRangeAnalysisDefinition).hasRangeInformationFor(v)
+}
+
+/**
+ * Computes a normal form of `x` where -0.0 has changed to +0.0. This can be
+ * needed on the lesser side of a floating-point comparison or on both sides of
+ * a floating point equality because QL does not follow IEEE in floating-point
+ * comparisons but instead defines -0.0 to be less than and distinct from 0.0.
+ */
+bindingset[x]
+private float normalizeFloatUp(float x) { result = x + 0.0 }
+
+/**
+ * Computes `x + y`, rounded towards +Inf. This is the general case where both
+ * `x` and `y` may be large numbers.
+ */
+bindingset[x, y]
+private float addRoundingUp(float x, float y) {
+ if normalizeFloatUp((x + y) - x) < y or normalizeFloatUp((x + y) - y) < x
+ then result = (x + y).nextUp()
+ else result = (x + y)
+}
+
+/**
+ * Computes `x + y`, rounded towards -Inf. This is the general case where both
+ * `x` and `y` may be large numbers.
+ */
+bindingset[x, y]
+private float addRoundingDown(float x, float y) {
+ if (x + y) - x > normalizeFloatUp(y) or (x + y) - y > normalizeFloatUp(x)
+ then result = (x + y).nextDown()
+ else result = (x + y)
+}
+
+/**
+ * Computes `x + small`, rounded towards +Inf, where `small` is a small
+ * constant.
+ */
+bindingset[x, small]
+private float addRoundingUpSmall(float x, float small) {
+ if (x + small) - x < small then result = (x + small).nextUp() else result = (x + small)
+}
+
+/**
+ * Computes `x + small`, rounded towards -Inf, where `small` is a small
+ * constant.
+ */
+bindingset[x, small]
+private float addRoundingDownSmall(float x, float small) {
+ if (x + small) - x > small then result = (x + small).nextDown() else result = (x + small)
+}
+
+private predicate lowerBoundableExpr(Expr expr) {
+ analyzableExpr(expr) and
+ getUpperBoundsImpl(expr) <= exprMaxVal(expr) and
+ not exists(getValue(expr).toFloat())
+}
+
+/**
+ * Gets the lower bounds of the expression.
+ *
+ * Most of the work of computing the lower bounds is done by
+ * `getLowerBoundsImpl`. However, the lower bounds computed by
+ * `getLowerBoundsImpl` may not be representable by the result type of the
+ * expression. For example, if `x` and `y` are of type `int32` and each
+ * have lower bound -2147483648, then getLowerBoundsImpl` will compute a
+ * lower bound -4294967296 for the expression `x+y`, even though
+ * -4294967296 cannot be represented as an `int32`. Such unrepresentable
+ * bounds are replaced with `exprMinVal(expr)`. This predicate also adds
+ * `exprMinVal(expr)` as a lower bound if the expression might overflow
+ * positively, or if it is unanalyzable.
+ *
+ * Note: most callers should use `getFullyConvertedLowerBounds` rather than
+ * this predicate.
+ */
+private float getTruncatedLowerBounds(Expr expr) {
+ // If the expression evaluates to a constant, then there is no
+ // need to call getLowerBoundsImpl.
+ analyzableExpr(expr) and
+ result = getValue(expr).toFloat()
+ or
+ // Some of the bounds computed by getLowerBoundsImpl might
+ // overflow, so we replace invalid bounds with exprMinVal.
+ exists(float newLB | newLB = normalizeFloatUp(getLowerBoundsImpl(expr)) |
+ if exprMinVal(expr) <= newLB and newLB <= exprMaxVal(expr)
+ then
+ // Apply widening where we might get a combinatorial explosion.
+ if isRecursiveBinary(expr)
+ then
+ result =
+ max(float widenLB |
+ widenLB = wideningLowerBounds(expr.getUnspecifiedType()) and
+ not widenLB > newLB
+ )
+ else result = newLB
+ else result = exprMinVal(expr)
+ ) and
+ lowerBoundableExpr(expr)
+ or
+ // The expression might overflow and wrap. If so, the
+ // lower bound is exprMinVal.
+ analyzableExpr(expr) and
+ exprMightOverflowPositively(expr) and
+ not result = getValue(expr).toFloat() and
+ result = exprMinVal(expr)
+ or
+ // The expression is not analyzable, so its lower bound is
+ // unknown. Note that the call to exprMinVal restricts the
+ // expressions to just those with arithmetic types. There is no
+ // need to return results for non-arithmetic expressions.
+ not analyzableExpr(expr) and
+ result = exprMinVal(expr)
+}
+
+/**
+ * Gets the upper bounds of the expression.
+ *
+ * Most of the work of computing the upper bounds is done by
+ * `getUpperBoundsImpl`. However, the upper bounds computed by
+ * `getUpperBoundsImpl` may not be representable by the result type of the
+ * expression. For example, if `x` and `y` are of type `int32` and each
+ * have upper bound 2147483647, then getUpperBoundsImpl` will compute an
+ * upper bound 4294967294 for the expression `x+y`, even though 4294967294
+ * cannot be represented as an `int32`. Such unrepresentable bounds are
+ * replaced with `exprMaxVal(expr)`. This predicate also adds
+ * `exprMaxVal(expr)` as an upper bound if the expression might overflow
+ * negatively, or if it is unanalyzable.
+ *
+ * Note: most callers should use `getFullyConvertedUpperBounds` rather than
+ * this predicate.
+ */
+private float getTruncatedUpperBounds(Expr expr) {
+ if analyzableExpr(expr)
+ then
+ // If the expression evaluates to a constant, then there is no
+ // need to call getUpperBoundsImpl.
+ if exists(getValue(expr).toFloat())
+ then result = getValue(expr).toFloat()
+ else (
+ // Some of the bounds computed by `getUpperBoundsImpl`
+ // might overflow, so we replace invalid bounds with
+ // `exprMaxVal`.
+ exists(float newUB | newUB = normalizeFloatUp(getUpperBoundsImpl(expr)) |
+ if exprMinVal(expr) <= newUB and newUB <= exprMaxVal(expr)
+ then
+ // Apply widening where we might get a combinatorial explosion.
+ if isRecursiveBinary(expr)
+ then
+ result =
+ min(float widenUB |
+ widenUB = wideningUpperBounds(expr.getUnspecifiedType()) and
+ not widenUB < newUB
+ )
+ else result = newUB
+ else result = exprMaxVal(expr)
+ )
+ or
+ // The expression might overflow negatively and wrap. If so,
+ // the upper bound is `exprMaxVal`.
+ exprMightOverflowNegatively(expr) and
+ result = exprMaxVal(expr)
+ )
+ else
+ // The expression is not analyzable, so its upper bound is
+ // unknown. Note that the call to exprMaxVal restricts the
+ // expressions to just those with arithmetic types. There is no
+ // need to return results for non-arithmetic expressions.
+ result = exprMaxVal(expr)
+}
+
+/**
+ * Holds if the expression might overflow negatively. This predicate
+ * does not consider the possibility that the expression might overflow
+ * due to a conversion.
+ *
+ * DEPRECATED: use `exprMightOverflowNegatively` instead.
+ */
+deprecated predicate negative_overflow(Expr expr) { exprMightOverflowNegatively(expr) }
+
+/**
+ * Holds if the expression might overflow positively. This predicate
+ * does not consider the possibility that the expression might overflow
+ * due to a conversion.
+ *
+ * DEPRECATED: use `exprMightOverflowPositively` instead.
+ */
+deprecated predicate positive_overflow(Expr expr) { exprMightOverflowPositively(expr) }
+
+/** Only to be called by `getTruncatedLowerBounds`. */
+private float getLowerBoundsImpl(Expr expr) {
+ (
+ exists(Expr operand, float operandLow, float positive |
+ effectivelyMultipliesByPositive(expr, operand, positive) and
+ operandLow = getFullyConvertedLowerBounds(operand) and
+ result = positive * operandLow
+ )
+ or
+ exists(Expr operand, float operandHigh, float negative |
+ effectivelyMultipliesByNegative(expr, operand, negative) and
+ operandHigh = getFullyConvertedUpperBounds(operand) and
+ result = negative * operandHigh
+ )
+ or
+ exists(MinExpr minExpr |
+ expr = minExpr and
+ // Return the union of the lower bounds from both children.
+ result = getFullyConvertedLowerBounds(minExpr.getAnOperand())
+ )
+ or
+ exists(MaxExpr maxExpr |
+ expr = maxExpr and
+ // Compute the cross product of the bounds from both children. We are
+ // using this mathematical property:
+ //
+ // max (minimum{X}, minimum{Y})
+ // = minimum { max(x,y) | x in X, y in Y }
+ exists(float x, float y |
+ x = getFullyConvertedLowerBounds(maxExpr.getLeftOperand()) and
+ y = getFullyConvertedLowerBounds(maxExpr.getRightOperand()) and
+ if x >= y then result = x else result = y
+ )
+ )
+ or
+ // ConditionalExpr (true branch)
+ exists(ConditionalExpr condExpr |
+ expr = condExpr and
+ // Use `boolConversionUpperBound` to determine whether the condition
+ // might evaluate to `true`.
+ boolConversionUpperBound(condExpr.getCondition().getFullyConverted()) = 1 and
+ result = getFullyConvertedLowerBounds(condExpr.getThen())
+ )
+ or
+ // ConditionalExpr (false branch)
+ exists(ConditionalExpr condExpr |
+ expr = condExpr and
+ // Use `boolConversionLowerBound` to determine whether the condition
+ // might evaluate to `false`.
+ boolConversionLowerBound(condExpr.getCondition().getFullyConverted()) = 0 and
+ result = getFullyConvertedLowerBounds(condExpr.getElse())
+ )
+ or
+ exists(AddExpr addExpr, float xLow, float yLow |
+ expr = addExpr and
+ xLow = getFullyConvertedLowerBounds(addExpr.getLeftOperand()) and
+ yLow = getFullyConvertedLowerBounds(addExpr.getRightOperand()) and
+ result = addRoundingDown(xLow, yLow)
+ )
+ or
+ exists(SubExpr subExpr, float xLow, float yHigh |
+ expr = subExpr and
+ xLow = getFullyConvertedLowerBounds(subExpr.getLeftOperand()) and
+ yHigh = getFullyConvertedUpperBounds(subExpr.getRightOperand()) and
+ result = addRoundingDown(xLow, -yHigh)
+ )
+ or
+ exists(UnsignedMulExpr mulExpr, float xLow, float yLow |
+ expr = mulExpr and
+ xLow = getFullyConvertedLowerBounds(mulExpr.getLeftOperand()) and
+ yLow = getFullyConvertedLowerBounds(mulExpr.getRightOperand()) and
+ result = xLow * yLow
+ )
+ or
+ exists(AssignExpr assign |
+ expr = assign and
+ result = getFullyConvertedLowerBounds(assign.getRValue())
+ )
+ or
+ exists(AssignAddExpr addExpr, float xLow, float yLow |
+ expr = addExpr and
+ xLow = getFullyConvertedLowerBounds(addExpr.getLValue()) and
+ yLow = getFullyConvertedLowerBounds(addExpr.getRValue()) and
+ result = addRoundingDown(xLow, yLow)
+ )
+ or
+ exists(AssignSubExpr subExpr, float xLow, float yHigh |
+ expr = subExpr and
+ xLow = getFullyConvertedLowerBounds(subExpr.getLValue()) and
+ yHigh = getFullyConvertedUpperBounds(subExpr.getRValue()) and
+ result = addRoundingDown(xLow, -yHigh)
+ )
+ or
+ exists(UnsignedAssignMulExpr mulExpr, float xLow, float yLow |
+ expr = mulExpr and
+ xLow = getFullyConvertedLowerBounds(mulExpr.getLValue()) and
+ yLow = getFullyConvertedLowerBounds(mulExpr.getRValue()) and
+ result = xLow * yLow
+ )
+ or
+ exists(AssignMulByPositiveConstantExpr mulExpr, float xLow |
+ expr = mulExpr and
+ xLow = getFullyConvertedLowerBounds(mulExpr.getLValue()) and
+ result = xLow * mulExpr.getConstant()
+ )
+ or
+ exists(AssignMulByNegativeConstantExpr mulExpr, float xHigh |
+ expr = mulExpr and
+ xHigh = getFullyConvertedUpperBounds(mulExpr.getLValue()) and
+ result = xHigh * mulExpr.getConstant()
+ )
+ or
+ exists(PrefixIncrExpr incrExpr, float xLow |
+ expr = incrExpr and
+ xLow = getFullyConvertedLowerBounds(incrExpr.getOperand()) and
+ result = xLow + 1
+ )
+ or
+ exists(PrefixDecrExpr decrExpr, float xLow |
+ expr = decrExpr and
+ xLow = getFullyConvertedLowerBounds(decrExpr.getOperand()) and
+ result = addRoundingDownSmall(xLow, -1)
+ )
+ or
+ // `PostfixIncrExpr` and `PostfixDecrExpr` return the value of their
+ // operand. The incrementing/decrementing behavior is handled in
+ // `getDefLowerBoundsImpl`.
+ exists(PostfixIncrExpr incrExpr |
+ expr = incrExpr and
+ result = getFullyConvertedLowerBounds(incrExpr.getOperand())
+ )
+ or
+ exists(PostfixDecrExpr decrExpr |
+ expr = decrExpr and
+ result = getFullyConvertedLowerBounds(decrExpr.getOperand())
+ )
+ or
+ exists(RemExpr remExpr | expr = remExpr |
+ // If both inputs are positive then the lower bound is zero.
+ result = 0
+ or
+ // If either input could be negative then the output could be
+ // negative. If so, the lower bound of `x%y` is `-abs(y) + 1`, which is
+ // equal to `min(-y + 1,y - 1)`.
+ exists(float childLB |
+ childLB = getFullyConvertedLowerBounds(remExpr.getAnOperand()) and
+ not childLB >= 0
+ |
+ result = getFullyConvertedLowerBounds(remExpr.getRightOperand()) - 1
+ or
+ exists(float rhsUB | rhsUB = getFullyConvertedUpperBounds(remExpr.getRightOperand()) |
+ result = -rhsUB + 1
+ )
+ )
+ )
+ or
+ // If the conversion is to an arithmetic type then we just return the
+ // lower bound of the child. We do not need to handle truncation and
+ // overflow here, because that is done in `getTruncatedLowerBounds`.
+ // Conversions to `bool` need to be handled specially because they test
+ // whether the value of the expression is equal to 0.
+ exists(Conversion convExpr | expr = convExpr |
+ if convExpr.getUnspecifiedType() instanceof BoolType
+ then result = boolConversionLowerBound(convExpr.getExpr())
+ else result = getTruncatedLowerBounds(convExpr.getExpr())
+ )
+ or
+ // Use SSA to get the lower bounds for a variable use.
+ exists(RangeSsaDefinition def, StackVariable v | expr = def.getAUse(v) |
+ result = getDefLowerBounds(def, v)
+ )
+ or
+ // unsigned `&` (tighter bounds may exist)
+ exists(UnsignedBitwiseAndExpr andExpr |
+ andExpr = expr and
+ result = 0.0
+ )
+ or
+ // `>>` by a constant
+ exists(RShiftExpr rsExpr, float left, int right |
+ rsExpr = expr and
+ left = getFullyConvertedLowerBounds(rsExpr.getLeftOperand()) and
+ right = getValue(rsExpr.getRightOperand().getFullyConverted()).toInt() and
+ result = safeFloor(left / 2.pow(right))
+ )
+ // Not explicitly modeled by a SimpleRangeAnalysisExpr
+ ) and
+ not expr instanceof SimpleRangeAnalysisExpr
+ or
+ // A modeled expression for range analysis
+ exists(SimpleRangeAnalysisExpr rangeAnalysisExpr |
+ rangeAnalysisExpr = expr and
+ result = rangeAnalysisExpr.getLowerBounds()
+ )
+}
+
+/** Only to be called by `getTruncatedUpperBounds`. */
+private float getUpperBoundsImpl(Expr expr) {
+ (
+ exists(Expr operand, float operandHigh, float positive |
+ effectivelyMultipliesByPositive(expr, operand, positive) and
+ operandHigh = getFullyConvertedUpperBounds(operand) and
+ result = positive * operandHigh
+ )
+ or
+ exists(Expr operand, float operandLow, float negative |
+ effectivelyMultipliesByNegative(expr, operand, negative) and
+ operandLow = getFullyConvertedLowerBounds(operand) and
+ result = negative * operandLow
+ )
+ or
+ exists(MaxExpr maxExpr |
+ expr = maxExpr and
+ // Return the union of the upper bounds from both children.
+ result = getFullyConvertedUpperBounds(maxExpr.getAnOperand())
+ )
+ or
+ exists(MinExpr minExpr |
+ expr = minExpr and
+ // Compute the cross product of the bounds from both children. We are
+ // using this mathematical property:
+ //
+ // min (maximum{X}, maximum{Y})
+ // = maximum { min(x,y) | x in X, y in Y }
+ exists(float x, float y |
+ x = getFullyConvertedUpperBounds(minExpr.getLeftOperand()) and
+ y = getFullyConvertedUpperBounds(minExpr.getRightOperand()) and
+ if x <= y then result = x else result = y
+ )
+ )
+ or
+ // ConditionalExpr (true branch)
+ exists(ConditionalExpr condExpr |
+ expr = condExpr and
+ // Use `boolConversionUpperBound` to determine whether the condition
+ // might evaluate to `true`.
+ boolConversionUpperBound(condExpr.getCondition().getFullyConverted()) = 1 and
+ result = getFullyConvertedUpperBounds(condExpr.getThen())
+ )
+ or
+ // ConditionalExpr (false branch)
+ exists(ConditionalExpr condExpr |
+ expr = condExpr and
+ // Use `boolConversionLowerBound` to determine whether the condition
+ // might evaluate to `false`.
+ boolConversionLowerBound(condExpr.getCondition().getFullyConverted()) = 0 and
+ result = getFullyConvertedUpperBounds(condExpr.getElse())
+ )
+ or
+ exists(AddExpr addExpr, float xHigh, float yHigh |
+ expr = addExpr and
+ xHigh = getFullyConvertedUpperBounds(addExpr.getLeftOperand()) and
+ yHigh = getFullyConvertedUpperBounds(addExpr.getRightOperand()) and
+ result = addRoundingUp(xHigh, yHigh)
+ )
+ or
+ exists(SubExpr subExpr, float xHigh, float yLow |
+ expr = subExpr and
+ xHigh = getFullyConvertedUpperBounds(subExpr.getLeftOperand()) and
+ yLow = getFullyConvertedLowerBounds(subExpr.getRightOperand()) and
+ result = addRoundingUp(xHigh, -yLow)
+ )
+ or
+ exists(UnsignedMulExpr mulExpr, float xHigh, float yHigh |
+ expr = mulExpr and
+ xHigh = getFullyConvertedUpperBounds(mulExpr.getLeftOperand()) and
+ yHigh = getFullyConvertedUpperBounds(mulExpr.getRightOperand()) and
+ result = xHigh * yHigh
+ )
+ or
+ exists(AssignExpr assign |
+ expr = assign and
+ result = getFullyConvertedUpperBounds(assign.getRValue())
+ )
+ or
+ exists(AssignAddExpr addExpr, float xHigh, float yHigh |
+ expr = addExpr and
+ xHigh = getFullyConvertedUpperBounds(addExpr.getLValue()) and
+ yHigh = getFullyConvertedUpperBounds(addExpr.getRValue()) and
+ result = addRoundingUp(xHigh, yHigh)
+ )
+ or
+ exists(AssignSubExpr subExpr, float xHigh, float yLow |
+ expr = subExpr and
+ xHigh = getFullyConvertedUpperBounds(subExpr.getLValue()) and
+ yLow = getFullyConvertedLowerBounds(subExpr.getRValue()) and
+ result = addRoundingUp(xHigh, -yLow)
+ )
+ or
+ exists(UnsignedAssignMulExpr mulExpr, float xHigh, float yHigh |
+ expr = mulExpr and
+ xHigh = getFullyConvertedUpperBounds(mulExpr.getLValue()) and
+ yHigh = getFullyConvertedUpperBounds(mulExpr.getRValue()) and
+ result = xHigh * yHigh
+ )
+ or
+ exists(AssignMulByPositiveConstantExpr mulExpr, float xHigh |
+ expr = mulExpr and
+ xHigh = getFullyConvertedUpperBounds(mulExpr.getLValue()) and
+ result = xHigh * mulExpr.getConstant()
+ )
+ or
+ exists(AssignMulByNegativeConstantExpr mulExpr, float xLow |
+ expr = mulExpr and
+ xLow = getFullyConvertedLowerBounds(mulExpr.getLValue()) and
+ result = xLow * mulExpr.getConstant()
+ )
+ or
+ exists(PrefixIncrExpr incrExpr, float xHigh |
+ expr = incrExpr and
+ xHigh = getFullyConvertedUpperBounds(incrExpr.getOperand()) and
+ result = addRoundingUpSmall(xHigh, 1)
+ )
+ or
+ exists(PrefixDecrExpr decrExpr, float xHigh |
+ expr = decrExpr and
+ xHigh = getFullyConvertedUpperBounds(decrExpr.getOperand()) and
+ result = xHigh - 1
+ )
+ or
+ // `PostfixIncrExpr` and `PostfixDecrExpr` return the value of their operand.
+ // The incrementing/decrementing behavior is handled in
+ // `getDefUpperBoundsImpl`.
+ exists(PostfixIncrExpr incrExpr |
+ expr = incrExpr and
+ result = getFullyConvertedUpperBounds(incrExpr.getOperand())
+ )
+ or
+ exists(PostfixDecrExpr decrExpr |
+ expr = decrExpr and
+ result = getFullyConvertedUpperBounds(decrExpr.getOperand())
+ )
+ or
+ exists(RemExpr remExpr, float rhsUB |
+ expr = remExpr and
+ rhsUB = getFullyConvertedUpperBounds(remExpr.getRightOperand())
+ |
+ result = rhsUB - 1
+ or
+ // If the right hand side could be negative then we need to take its
+ // absolute value. Since `abs(x) = max(-x,x)` this is equivalent to
+ // adding `-rhsLB` to the set of upper bounds.
+ exists(float rhsLB |
+ rhsLB = getFullyConvertedLowerBounds(remExpr.getRightOperand()) and
+ not rhsLB >= 0
+ |
+ result = -rhsLB + 1
+ )
+ )
+ or
+ // If the conversion is to an arithmetic type then we just return the
+ // upper bound of the child. We do not need to handle truncation and
+ // overflow here, because that is done in `getTruncatedUpperBounds`.
+ // Conversions to `bool` need to be handled specially because they test
+ // whether the value of the expression is equal to 0.
+ exists(Conversion convExpr | expr = convExpr |
+ if convExpr.getUnspecifiedType() instanceof BoolType
+ then result = boolConversionUpperBound(convExpr.getExpr())
+ else result = getTruncatedUpperBounds(convExpr.getExpr())
+ )
+ or
+ // Use SSA to get the upper bounds for a variable use.
+ exists(RangeSsaDefinition def, StackVariable v | expr = def.getAUse(v) |
+ result = getDefUpperBounds(def, v)
+ )
+ or
+ // unsigned `&` (tighter bounds may exist)
+ exists(UnsignedBitwiseAndExpr andExpr, float left, float right |
+ andExpr = expr and
+ left = getFullyConvertedUpperBounds(andExpr.getLeftOperand()) and
+ right = getFullyConvertedUpperBounds(andExpr.getRightOperand()) and
+ result = left.minimum(right)
+ )
+ or
+ // `>>` by a constant
+ exists(RShiftExpr rsExpr, float left, int right |
+ rsExpr = expr and
+ left = getFullyConvertedUpperBounds(rsExpr.getLeftOperand()) and
+ right = getValue(rsExpr.getRightOperand().getFullyConverted()).toInt() and
+ result = safeFloor(left / 2.pow(right))
+ )
+ // Not explicitly modeled by a SimpleRangeAnalysisExpr
+ ) and
+ not expr instanceof SimpleRangeAnalysisExpr
+ or
+ // A modeled expression for range analysis
+ exists(SimpleRangeAnalysisExpr rangeAnalysisExpr |
+ rangeAnalysisExpr = expr and
+ result = rangeAnalysisExpr.getUpperBounds()
+ )
+}
+
+/**
+ * Holds if `expr` is converted to `bool` or if it is the child of a
+ * logical operation.
+ *
+ * The purpose of this predicate is to optimize `boolConversionLowerBound`
+ * and `boolConversionUpperBound` by preventing them from computing
+ * unnecessary results. In other words, `exprIsUsedAsBool(expr)` holds if
+ * `expr` is an expression that might be passed as an argument to
+ * `boolConversionLowerBound` or `boolConversionUpperBound`.
+ */
+private predicate exprIsUsedAsBool(Expr expr) {
+ expr = any(BinaryLogicalOperation op).getAnOperand().getFullyConverted()
+ or
+ expr = any(UnaryLogicalOperation op).getOperand().getFullyConverted()
+ or
+ expr = any(ConditionalExpr c).getCondition().getFullyConverted()
+ or
+ exists(Conversion cast | cast.getUnspecifiedType() instanceof BoolType | expr = cast.getExpr())
+}
+
+/**
+ * Gets the lower bound of the conversion `(bool)expr`. If we can prove that
+ * the value of `expr` is never 0 then `lb = 1`. Otherwise `lb = 0`.
+ */
+private float boolConversionLowerBound(Expr expr) {
+ // Case 1: if the range for `expr` includes the value 0,
+ // then `result = 0`.
+ exprIsUsedAsBool(expr) and
+ exists(float lb | lb = getTruncatedLowerBounds(expr) and not lb > 0) and
+ exists(float ub | ub = getTruncatedUpperBounds(expr) and not ub < 0) and
+ result = 0
+ or
+ // Case 2a: if the range for `expr` does not include the value 0,
+ // then `result = 1`.
+ exprIsUsedAsBool(expr) and getTruncatedLowerBounds(expr) > 0 and result = 1
+ or
+ // Case 2b: if the range for `expr` does not include the value 0,
+ // then `result = 1`.
+ exprIsUsedAsBool(expr) and getTruncatedUpperBounds(expr) < 0 and result = 1
+ or
+ // Case 3: the type of `expr` is not arithmetic. For example, it might
+ // be a pointer.
+ exprIsUsedAsBool(expr) and not exists(exprMinVal(expr)) and result = 0
+}
+
+/**
+ * Gets the upper bound of the conversion `(bool)expr`. If we can prove that
+ * the value of `expr` is always 0 then `ub = 0`. Otherwise `ub = 1`.
+ */
+private float boolConversionUpperBound(Expr expr) {
+ // Case 1a: if the upper bound of the operand is <= 0, then the upper
+ // bound might be 0.
+ exprIsUsedAsBool(expr) and getTruncatedUpperBounds(expr) <= 0 and result = 0
+ or
+ // Case 1b: if the upper bound of the operand is not <= 0, then the upper
+ // bound is 1.
+ exprIsUsedAsBool(expr) and
+ exists(float ub | ub = getTruncatedUpperBounds(expr) and not ub <= 0) and
+ result = 1
+ or
+ // Case 2a: if the lower bound of the operand is >= 0, then the upper
+ // bound might be 0.
+ exprIsUsedAsBool(expr) and getTruncatedLowerBounds(expr) >= 0 and result = 0
+ or
+ // Case 2b: if the lower bound of the operand is not >= 0, then the upper
+ // bound is 1.
+ exprIsUsedAsBool(expr) and
+ exists(float lb | lb = getTruncatedLowerBounds(expr) and not lb >= 0) and
+ result = 1
+ or
+ // Case 3: the type of `expr` is not arithmetic. For example, it might
+ // be a pointer.
+ exprIsUsedAsBool(expr) and not exists(exprMaxVal(expr)) and result = 1
+}
+
+/**
+ * This predicate computes the lower bounds of a phi definition. If the
+ * phi definition corresponds to a guard, then the guard is used to
+ * deduce a better lower bound.
+ * For example:
+ *
+ * def: x = y % 10;
+ * guard: if (x >= 2) {
+ * block: f(x)
+ * }
+ *
+ * In this example, the lower bound of x is 0, but we can
+ * use the guard to deduce that the lower bound is 2 inside the block.
+ */
+private float getPhiLowerBounds(StackVariable v, RangeSsaDefinition phi) {
+ exists(VariableAccess access, Expr guard, boolean branch, float defLB, float guardLB |
+ phi.isGuardPhi(v, access, guard, branch) and
+ lowerBoundFromGuard(guard, access, guardLB, branch) and
+ defLB = getFullyConvertedLowerBounds(access)
+ |
+ // Compute the maximum of `guardLB` and `defLB`.
+ if guardLB > defLB then result = guardLB else result = defLB
+ )
+ or
+ exists(VariableAccess access, float neConstant, float lower |
+ isNEPhi(v, phi, access, neConstant) and
+ lower = getTruncatedLowerBounds(access) and
+ if lower = neConstant then result = lower + 1 else result = lower
+ )
+ or
+ exists(VariableAccess access |
+ isUnsupportedGuardPhi(v, phi, access) and
+ result = getTruncatedLowerBounds(access)
+ )
+ or
+ result = getDefLowerBounds(phi.getAPhiInput(v), v)
+}
+
+/** See comment for `getPhiLowerBounds`, above. */
+private float getPhiUpperBounds(StackVariable v, RangeSsaDefinition phi) {
+ exists(VariableAccess access, Expr guard, boolean branch, float defUB, float guardUB |
+ phi.isGuardPhi(v, access, guard, branch) and
+ upperBoundFromGuard(guard, access, guardUB, branch) and
+ defUB = getFullyConvertedUpperBounds(access)
+ |
+ // Compute the minimum of `guardUB` and `defUB`.
+ if guardUB < defUB then result = guardUB else result = defUB
+ )
+ or
+ exists(VariableAccess access, float neConstant, float upper |
+ isNEPhi(v, phi, access, neConstant) and
+ upper = getTruncatedUpperBounds(access) and
+ if upper = neConstant then result = upper - 1 else result = upper
+ )
+ or
+ exists(VariableAccess access |
+ isUnsupportedGuardPhi(v, phi, access) and
+ result = getTruncatedUpperBounds(access)
+ )
+ or
+ result = getDefUpperBounds(phi.getAPhiInput(v), v)
+}
+
+/** Only to be called by `getDefLowerBounds`. */
+private float getDefLowerBoundsImpl(RangeSsaDefinition def, StackVariable v) {
+ // Definitions with a defining value.
+ exists(Expr expr | assignmentDef(def, v, expr) | result = getFullyConvertedLowerBounds(expr))
+ or
+ // Assignment operations with a defining value
+ exists(AssignOperation assignOp |
+ def = assignOp and
+ assignOp.getLValue() = v.getAnAccess() and
+ result = getTruncatedLowerBounds(assignOp)
+ )
+ or
+ exists(IncrementOperation incr, float newLB |
+ def = incr and
+ incr.getOperand() = v.getAnAccess() and
+ newLB = getFullyConvertedLowerBounds(incr.getOperand()) and
+ result = newLB + 1
+ )
+ or
+ exists(DecrementOperation decr, float newLB |
+ def = decr and
+ decr.getOperand() = v.getAnAccess() and
+ newLB = getFullyConvertedLowerBounds(decr.getOperand()) and
+ result = addRoundingDownSmall(newLB, -1)
+ )
+ or
+ // Phi nodes.
+ result = getPhiLowerBounds(v, def)
+ or
+ // A modeled def for range analysis
+ result = def.(SimpleRangeAnalysisDefinition).getLowerBounds(v)
+ or
+ // Unanalyzable definitions.
+ unanalyzableDefBounds(def, v, result, _)
+}
+
+/** Only to be called by `getDefUpperBounds`. */
+private float getDefUpperBoundsImpl(RangeSsaDefinition def, StackVariable v) {
+ // Definitions with a defining value.
+ exists(Expr expr | assignmentDef(def, v, expr) | result = getFullyConvertedUpperBounds(expr))
+ or
+ // Assignment operations with a defining value
+ exists(AssignOperation assignOp |
+ def = assignOp and
+ assignOp.getLValue() = v.getAnAccess() and
+ result = getTruncatedUpperBounds(assignOp)
+ )
+ or
+ exists(IncrementOperation incr, float newUB |
+ def = incr and
+ incr.getOperand() = v.getAnAccess() and
+ newUB = getFullyConvertedUpperBounds(incr.getOperand()) and
+ result = addRoundingUpSmall(newUB, 1)
+ )
+ or
+ exists(DecrementOperation decr, float newUB |
+ def = decr and
+ decr.getOperand() = v.getAnAccess() and
+ newUB = getFullyConvertedUpperBounds(decr.getOperand()) and
+ result = newUB - 1
+ )
+ or
+ // Phi nodes.
+ result = getPhiUpperBounds(v, def)
+ or
+ // A modeled def for range analysis
+ result = def.(SimpleRangeAnalysisDefinition).getUpperBounds(v)
+ or
+ // Unanalyzable definitions.
+ unanalyzableDefBounds(def, v, _, result)
+}
+
+/**
+ * Helper for `getDefLowerBounds` and `getDefUpperBounds`. Find the set of
+ * unanalyzable definitions (such as function parameters) and make their
+ * bounds unknown.
+ */
+private predicate unanalyzableDefBounds(RangeSsaDefinition def, StackVariable v, float lb, float ub) {
+ v = def.getAVariable() and
+ not analyzableDef(def, v) and
+ lb = varMinVal(v) and
+ ub = varMaxVal(v)
+}
+
+/**
+ * Holds if in the `branch` branch of a guard `guard` involving `v`,
+ * we know that `v` is not NaN, and therefore it is safe to make range
+ * inferences about `v`.
+ */
+bindingset[guard, v, branch]
+predicate nonNanGuardedVariable(Expr guard, VariableAccess v, boolean branch) {
+ getVariableRangeType(v.getTarget()) instanceof IntegralType
+ or
+ getVariableRangeType(v.getTarget()) instanceof FloatingPointType and
+ v instanceof NonNanVariableAccess
+ or
+ // The reason the following case is here is to ensure that when we say
+ // `if (x > 5) { ...then... } else { ...else... }`
+ // it is ok to conclude that `x > 5` in the `then`, (though not safe
+ // to conclude that x <= 5 in `else`) even if we had no prior
+ // knowledge of `x` not being `NaN`.
+ nanExcludingComparison(guard, branch)
+}
+
+/**
+ * If the guard is a comparison of the form `p*v + q r`, then this
+ * predicate uses the bounds information for `r` to compute a lower bound
+ * for `v`.
+ */
+private predicate lowerBoundFromGuard(Expr guard, VariableAccess v, float lb, boolean branch) {
+ exists(float childLB, RelationStrictness strictness |
+ boundFromGuard(guard, v, childLB, true, strictness, branch)
+ |
+ if nonNanGuardedVariable(guard, v, branch)
+ then
+ if
+ strictness = Nonstrict() or
+ not getVariableRangeType(v.getTarget()) instanceof IntegralType
+ then lb = childLB
+ else lb = childLB + 1
+ else lb = varMinVal(v.getTarget())
+ )
+}
+
+/**
+ * If the guard is a comparison of the form `p*v + q r`, then this
+ * predicate uses the bounds information for `r` to compute a upper bound
+ * for `v`.
+ */
+private predicate upperBoundFromGuard(Expr guard, VariableAccess v, float ub, boolean branch) {
+ exists(float childUB, RelationStrictness strictness |
+ boundFromGuard(guard, v, childUB, false, strictness, branch)
+ |
+ if nonNanGuardedVariable(guard, v, branch)
+ then
+ if
+ strictness = Nonstrict() or
+ not getVariableRangeType(v.getTarget()) instanceof IntegralType
+ then ub = childUB
+ else ub = childUB - 1
+ else ub = varMaxVal(v.getTarget())
+ )
+}
+
+/**
+ * This predicate simplifies the results returned by
+ * `linearBoundFromGuard`.
+ */
+private predicate boundFromGuard(
+ Expr guard, VariableAccess v, float boundValue, boolean isLowerBound,
+ RelationStrictness strictness, boolean branch
+) {
+ exists(float p, float q, float r, boolean isLB |
+ linearBoundFromGuard(guard, v, p, q, r, isLB, strictness, branch) and
+ boundValue = (r - q) / p
+ |
+ // If the multiplier is negative then the direction of the comparison
+ // needs to be flipped.
+ p > 0 and isLowerBound = isLB
+ or
+ p < 0 and isLowerBound = isLB.booleanNot()
+ )
+ or
+ // When `!e` is true, we know that `0 <= e <= 0`
+ exists(float p, float q, Expr e |
+ linearAccess(e, v, p, q) and
+ eqZeroWithNegate(guard, e, true, branch) and
+ boundValue = (0.0 - q) / p and
+ isLowerBound = [false, true] and
+ strictness = Nonstrict()
+ )
+}
+
+/**
+ * This predicate finds guards of the form `p*v + q < r or p*v + q == r`
+ * and decomposes them into a tuple of values which can be used to deduce a
+ * lower or upper bound for `v`.
+ */
+private predicate linearBoundFromGuard(
+ ComparisonOperation guard, VariableAccess v, float p, float q, float boundValue,
+ boolean isLowerBound, // Is this a lower or an upper bound?
+ RelationStrictness strictness, boolean branch // Which control-flow branch is this bound valid on?
+) {
+ // For the comparison x < RHS, we create two bounds:
+ //
+ // 1. x < upperbound(RHS)
+ // 2. x >= typeLowerBound(RHS.getUnspecifiedType())
+ //
+ exists(Expr lhs, Expr rhs, RelationDirection dir, RelationStrictness st |
+ linearAccess(lhs, v, p, q) and
+ relOpWithSwapAndNegate(guard, lhs, rhs, dir, st, branch)
+ |
+ isLowerBound = directionIsGreater(dir) and
+ strictness = st and
+ getBounds(rhs, boundValue, isLowerBound)
+ or
+ isLowerBound = directionIsLesser(dir) and
+ strictness = Nonstrict() and
+ exprTypeBounds(rhs, boundValue, isLowerBound)
+ )
+ or
+ // For x == RHS, we create the following bounds:
+ //
+ // 1. x <= upperbound(RHS)
+ // 2. x >= lowerbound(RHS)
+ //
+ exists(Expr lhs, Expr rhs |
+ linearAccess(lhs, v, p, q) and
+ eqOpWithSwapAndNegate(guard, lhs, rhs, true, branch) and
+ getBounds(rhs, boundValue, isLowerBound) and
+ strictness = Nonstrict()
+ )
+ // x != RHS and !x are handled elsewhere
+}
+
+/** Utility for `linearBoundFromGuard`. */
+private predicate getBounds(Expr expr, float boundValue, boolean isLowerBound) {
+ isLowerBound = true and boundValue = getFullyConvertedLowerBounds(expr)
+ or
+ isLowerBound = false and boundValue = getFullyConvertedUpperBounds(expr)
+}
+
+/** Utility for `linearBoundFromGuard`. */
+private predicate exprTypeBounds(Expr expr, float boundValue, boolean isLowerBound) {
+ isLowerBound = true and boundValue = exprMinVal(expr.getFullyConverted())
+ or
+ isLowerBound = false and boundValue = exprMaxVal(expr.getFullyConverted())
+}
+
+/**
+ * Holds if `(v, phi)` ensures that `access` is not equal to `neConstant`. For
+ * example, the condition `if (x + 1 != 3)` ensures that `x` is not equal to 2.
+ * Only integral types are supported.
+ */
+private predicate isNEPhi(
+ Variable v, RangeSsaDefinition phi, VariableAccess access, float neConstant
+) {
+ exists(
+ ComparisonOperation cmp, boolean branch, Expr linearExpr, Expr rExpr, float p, float q, float r
+ |
+ phi.isGuardPhi(v, access, cmp, branch) and
+ eqOpWithSwapAndNegate(cmp, linearExpr, rExpr, false, branch) and
+ v.getUnspecifiedType() instanceof IntegralOrEnumType and // Float `!=` is too imprecise
+ r = getValue(rExpr).toFloat() and
+ linearAccess(linearExpr, access, p, q) and
+ neConstant = (r - q) / p
+ )
+ or
+ exists(Expr op, boolean branch, Expr linearExpr, float p, float q |
+ phi.isGuardPhi(v, access, op, branch) and
+ eqZeroWithNegate(op, linearExpr, false, branch) and
+ v.getUnspecifiedType() instanceof IntegralOrEnumType and // Float `!` is too imprecise
+ linearAccess(linearExpr, access, p, q) and
+ neConstant = (0.0 - q) / p
+ )
+}
+
+/**
+ * Holds if `(v, phi)` constrains the value of `access` but in a way that
+ * doesn't allow this library to constrain the upper or lower bounds of
+ * `access`. An example is `if (x != y)` if neither `x` nor `y` is a
+ * compile-time constant.
+ */
+private predicate isUnsupportedGuardPhi(Variable v, RangeSsaDefinition phi, VariableAccess access) {
+ exists(Expr cmp, boolean branch |
+ eqOpWithSwapAndNegate(cmp, _, _, false, branch)
+ or
+ eqZeroWithNegate(cmp, _, false, branch)
+ |
+ phi.isGuardPhi(v, access, cmp, branch) and
+ not isNEPhi(v, phi, access, _)
+ )
+}
+
+/**
+ * Gets the upper bound of the expression, if the expression is guarded.
+ * An upper bound can only be found, if a guard phi node can be found, and the
+ * expression has only one immediate predecessor.
+ */
+private float getGuardedUpperBound(VariableAccess guardedAccess) {
+ exists(
+ RangeSsaDefinition def, StackVariable v, VariableAccess guardVa, Expr guard, boolean branch
+ |
+ def.isGuardPhi(v, guardVa, guard, branch) and
+ // If the basic block for the variable access being examined has
+ // more than one predecessor, the guard phi node could originate
+ // from one of the predecessors. This is because the guard phi
+ // node is attached to the block at the end of the edge and not on
+ // the actual edge. It is therefore not possible to determine which
+ // edge the guard phi node belongs to. The predicate below ensures
+ // that there is one predecessor, albeit somewhat conservative.
+ exists(unique(BasicBlock b | b = def.(BasicBlock).getAPredecessor())) and
+ guardedAccess = def.getAUse(v) and
+ result = max(float ub | upperBoundFromGuard(guard, guardVa, ub, branch)) and
+ not convertedExprMightOverflow(guard.getAChild+())
+ )
+}
+
+cached
+private module SimpleRangeAnalysisCached {
+ /**
+ * Gets the lower bound of the expression.
+ *
+ * Note: expressions in C/C++ are often implicitly or explicitly cast to a
+ * different result type. Such casts can cause the value of the expression
+ * to overflow or to be truncated. This predicate computes the lower bound
+ * of the expression without including the effect of the casts. To compute
+ * the lower bound of the expression after all the casts have been applied,
+ * call `lowerBound` like this:
+ *
+ * `lowerBound(expr.getFullyConverted())`
+ */
+ cached
+ float lowerBound(Expr expr) {
+ // Combine the lower bounds returned by getTruncatedLowerBounds into a
+ // single minimum value.
+ result = min(float lb | lb = getTruncatedLowerBounds(expr) | lb)
+ }
+
+ /**
+ * Gets the upper bound of the expression.
+ *
+ * Note: expressions in C/C++ are often implicitly or explicitly cast to a
+ * different result type. Such casts can cause the value of the expression
+ * to overflow or to be truncated. This predicate computes the upper bound
+ * of the expression without including the effect of the casts. To compute
+ * the upper bound of the expression after all the casts have been applied,
+ * call `upperBound` like this:
+ *
+ * `upperBound(expr.getFullyConverted())`
+ */
+ cached
+ float upperBound(Expr expr) {
+ // Combine the upper bounds returned by getTruncatedUpperBounds and
+ // getGuardedUpperBound into a single maximum value
+ result = min([max(getTruncatedUpperBounds(expr)), getGuardedUpperBound(expr)])
+ }
+
+ /** Holds if the upper bound of `expr` may have been widened. This means the the upper bound is in practice likely to be overly wide. */
+ cached
+ predicate upperBoundMayBeWidened(Expr e) {
+ isRecursiveExpr(e) and
+ // Widening is not a problem if the post-analysis in `getGuardedUpperBound` has overridden the widening.
+ // Note that the RHS of `<` may be multi-valued.
+ not getGuardedUpperBound(e) < getTruncatedUpperBounds(e)
+ }
+
+ /**
+ * Holds if `expr` has a provably empty range. For example:
+ *
+ * 10 < expr and expr < 5
+ *
+ * The range of an expression can only be empty if it can never be
+ * executed. For example:
+ *
+ * if (10 < x) {
+ * if (x < 5) {
+ * // Unreachable code
+ * return x; // x has an empty range: 10 < x && x < 5
+ * }
+ * }
+ */
+ cached
+ predicate exprWithEmptyRange(Expr expr) {
+ analyzableExpr(expr) and
+ (
+ not exists(lowerBound(expr)) or
+ not exists(upperBound(expr)) or
+ lowerBound(expr) > upperBound(expr)
+ )
+ }
+
+ /** Holds if the definition might overflow negatively. */
+ cached
+ predicate defMightOverflowNegatively(RangeSsaDefinition def, StackVariable v) {
+ getDefLowerBoundsImpl(def, v) < varMinVal(v)
+ }
+
+ /** Holds if the definition might overflow positively. */
+ cached
+ predicate defMightOverflowPositively(RangeSsaDefinition def, StackVariable v) {
+ getDefUpperBoundsImpl(def, v) > varMaxVal(v)
+ }
+
+ /**
+ * Holds if the definition might overflow (either positively or
+ * negatively).
+ */
+ cached
+ predicate defMightOverflow(RangeSsaDefinition def, StackVariable v) {
+ defMightOverflowNegatively(def, v) or
+ defMightOverflowPositively(def, v)
+ }
+
+ /**
+ * Holds if `e` is an expression where the concept of overflow makes sense.
+ * This predicate is used to filter out some of the unanalyzable expressions
+ * from `exprMightOverflowPositively` and `exprMightOverflowNegatively`.
+ */
+ pragma[inline]
+ private predicate exprThatCanOverflow(Expr e) {
+ e instanceof UnaryArithmeticOperation or
+ e instanceof BinaryArithmeticOperation or
+ e instanceof AssignArithmeticOperation or
+ e instanceof LShiftExpr or
+ e instanceof AssignLShiftExpr
+ }
+
+ /**
+ * Holds if the expression might overflow negatively. This predicate
+ * does not consider the possibility that the expression might overflow
+ * due to a conversion.
+ */
+ cached
+ predicate exprMightOverflowNegatively(Expr expr) {
+ getLowerBoundsImpl(expr) < exprMinVal(expr)
+ or
+ // The lower bound of the expression `x--` is the same as the lower
+ // bound of `x`, so the standard logic (above) does not work for
+ // detecting whether it might overflow.
+ getLowerBoundsImpl(expr.(PostfixDecrExpr)) = exprMinVal(expr)
+ or
+ // We can't conclude that any unanalyzable expression might overflow. This
+ // is because there are many expressions that the range analysis doesn't
+ // handle, but where the concept of overflow doesn't make sense.
+ exprThatCanOverflow(expr) and not analyzableExpr(expr)
+ }
+
+ /**
+ * Holds if the expression might overflow negatively. Conversions
+ * are also taken into account. For example the expression
+ * `(int16)(x+y)` might overflow due to the `(int16)` cast, rather than
+ * due to the addition.
+ */
+ cached
+ predicate convertedExprMightOverflowNegatively(Expr expr) {
+ exprMightOverflowNegatively(expr) or
+ convertedExprMightOverflowNegatively(expr.getConversion())
+ }
+
+ /**
+ * Holds if the expression might overflow positively. This predicate
+ * does not consider the possibility that the expression might overflow
+ * due to a conversion.
+ */
+ cached
+ predicate exprMightOverflowPositively(Expr expr) {
+ getUpperBoundsImpl(expr) > exprMaxVal(expr)
+ or
+ // The upper bound of the expression `x++` is the same as the upper
+ // bound of `x`, so the standard logic (above) does not work for
+ // detecting whether it might overflow.
+ getUpperBoundsImpl(expr.(PostfixIncrExpr)) = exprMaxVal(expr)
+ or
+ // We can't conclude that any unanalyzable expression might overflow. This
+ // is because there are many expressions that the range analysis doesn't
+ // handle, but where the concept of overflow doesn't make sense.
+ exprThatCanOverflow(expr) and not analyzableExpr(expr)
+ }
+
+ /**
+ * Holds if the expression might overflow positively. Conversions
+ * are also taken into account. For example the expression
+ * `(int16)(x+y)` might overflow due to the `(int16)` cast, rather than
+ * due to the addition.
+ */
+ cached
+ predicate convertedExprMightOverflowPositively(Expr expr) {
+ exprMightOverflowPositively(expr) or
+ convertedExprMightOverflowPositively(expr.getConversion())
+ }
+
+ /**
+ * Holds if the expression might overflow (either positively or
+ * negatively). The possibility that the expression might overflow
+ * due to an implicit or explicit cast is also considered.
+ */
+ cached
+ predicate convertedExprMightOverflow(Expr expr) {
+ convertedExprMightOverflowNegatively(expr) or
+ convertedExprMightOverflowPositively(expr)
+ }
+}
+
+/**
+ * INTERNAL: do not use. This module contains utilities for use in the
+ * experimental `SimpleRangeAnalysisExpr` module.
+ */
+module SimpleRangeAnalysisInternal {
+ /**
+ * Gets the truncated lower bounds of the fully converted expression.
+ */
+ float getFullyConvertedLowerBounds(Expr expr) {
+ result = getTruncatedLowerBounds(expr.getFullyConverted())
+ }
+
+ /**
+ * Gets the truncated upper bounds of the fully converted expression.
+ */
+ float getFullyConvertedUpperBounds(Expr expr) {
+ result = getTruncatedUpperBounds(expr.getFullyConverted())
+ }
+
+ /**
+ * Get the lower bounds for a `RangeSsaDefinition`. Most of the work is
+ * done by `getDefLowerBoundsImpl`, but this is where widening is applied
+ * to prevent the analysis from exploding due to a recursive definition.
+ */
+ float getDefLowerBounds(RangeSsaDefinition def, StackVariable v) {
+ exists(float newLB, float truncatedLB |
+ newLB = getDefLowerBoundsImpl(def, v) and
+ if varMinVal(v) <= newLB and newLB <= varMaxVal(v)
+ then truncatedLB = newLB
+ else truncatedLB = varMinVal(v)
+ |
+ // Widening: check whether the new lower bound is from a source which
+ // depends recursively on the current definition.
+ if isRecursiveDef(def, v)
+ then
+ // The new lower bound is from a recursive source, so we round
+ // down to one of a limited set of values to prevent the
+ // recursion from exploding.
+ result =
+ max(float widenLB |
+ widenLB = wideningLowerBounds(getVariableRangeType(v)) and
+ not widenLB > truncatedLB
+ |
+ widenLB
+ )
+ else result = truncatedLB
+ )
+ or
+ // The definition might overflow positively and wrap. If so, the lower
+ // bound is `typeLowerBound`.
+ defMightOverflowPositively(def, v) and result = varMinVal(v)
+ }
+
+ /** See comment for `getDefLowerBounds`, above. */
+ float getDefUpperBounds(RangeSsaDefinition def, StackVariable v) {
+ exists(float newUB, float truncatedUB |
+ newUB = getDefUpperBoundsImpl(def, v) and
+ if varMinVal(v) <= newUB and newUB <= varMaxVal(v)
+ then truncatedUB = newUB
+ else truncatedUB = varMaxVal(v)
+ |
+ // Widening: check whether the new upper bound is from a source which
+ // depends recursively on the current definition.
+ if isRecursiveDef(def, v)
+ then
+ // The new upper bound is from a recursive source, so we round
+ // up to one of a fixed set of values to prevent the recursion
+ // from exploding.
+ result =
+ min(float widenUB |
+ widenUB = wideningUpperBounds(getVariableRangeType(v)) and
+ not widenUB < truncatedUB
+ |
+ widenUB
+ )
+ else result = truncatedUB
+ )
+ or
+ // The definition might overflow negatively and wrap. If so, the upper
+ // bound is `typeUpperBound`.
+ defMightOverflowNegatively(def, v) and result = varMaxVal(v)
+ }
+}
+
+private import SimpleRangeAnalysisInternal
diff --git a/repo-tests/codeql/cpp/ql/lib/semmle/code/cpp/security/BufferAccess.qll b/repo-tests/codeql/cpp/ql/lib/semmle/code/cpp/security/BufferAccess.qll
new file mode 100644
index 00000000000..faeb859506d
--- /dev/null
+++ b/repo-tests/codeql/cpp/ql/lib/semmle/code/cpp/security/BufferAccess.qll
@@ -0,0 +1,326 @@
+import cpp
+
+/**
+ * Returns the size of the pointed-to type, counting void types as size 1.
+ */
+int getPointedSize(Type t) {
+ result = t.getUnspecifiedType().(PointerType).getBaseType().getSize().maximum(1)
+}
+
+/**
+ * An operation that reads data from or writes data to a buffer.
+ *
+ * See the BufferWrite class for an explanation of how BufferAccess and
+ * BufferWrite differ.
+ */
+abstract class BufferAccess extends Expr {
+ abstract string getName();
+
+ /**
+ * Gets the expression that denotes the buffer, along with a textual label
+ * for it and an access type.
+ *
+ * accessType:
+ * - 1 = buffer range [0, getSize) is accessed entirely.
+ * - 2 = buffer range [0, getSize) may be accessed partially or entirely.
+ * - 3 = buffer is accessed at offset getSize - 1.
+ */
+ abstract Expr getBuffer(string bufferDesc, int accessType);
+
+ abstract int getSize();
+}
+
+/**
+ * Calls to memcpy and similar functions.
+ * memcpy(dest, src, num)
+ * wmemcpy(dest, src, num)
+ * memmove(dest, src, num)
+ * wmemmove(dest, src, num)
+ * mempcpy(dest, src, num)
+ * wmempcpy(dest, src, num)
+ * RtlCopyMemoryNonTemporal(dest, src, num)
+ */
+class MemcpyBA extends BufferAccess {
+ MemcpyBA() {
+ this.(FunctionCall).getTarget().getName() =
+ [
+ "memcpy", "wmemcpy", "memmove", "wmemmove", "mempcpy", "wmempcpy",
+ "RtlCopyMemoryNonTemporal"
+ ]
+ }
+
+ override string getName() { result = this.(FunctionCall).getTarget().getName() }
+
+ override Expr getBuffer(string bufferDesc, int accessType) {
+ result = this.(FunctionCall).getArgument(0) and
+ bufferDesc = "destination buffer" and
+ accessType = 1
+ or
+ result = this.(FunctionCall).getArgument(1) and
+ bufferDesc = "source buffer" and
+ accessType = 1
+ }
+
+ override int getSize() {
+ result =
+ this.(FunctionCall).getArgument(2).getValue().toInt() *
+ getPointedSize(this.(FunctionCall).getTarget().getParameter(0).getType())
+ }
+}
+
+/**
+ * Calls to bcopy.
+ * bcopy(src, dest, num)
+ */
+class BCopyBA extends BufferAccess {
+ BCopyBA() { this.(FunctionCall).getTarget().getName() = "bcopy" }
+
+ override string getName() { result = this.(FunctionCall).getTarget().getName() }
+
+ override Expr getBuffer(string bufferDesc, int accessType) {
+ result = this.(FunctionCall).getArgument(0) and
+ bufferDesc = "source buffer" and
+ accessType = 1
+ or
+ result = this.(FunctionCall).getArgument(1) and
+ bufferDesc = "destination buffer" and
+ accessType = 1
+ }
+
+ override int getSize() {
+ result =
+ this.(FunctionCall).getArgument(2).getValue().toInt() *
+ getPointedSize(this.(FunctionCall).getTarget().getParameter(0).getType())
+ }
+}
+
+/**
+ * Calls to strncpy.
+ * strncpy(dest, src, num)
+ */
+class StrncpyBA extends BufferAccess {
+ StrncpyBA() { this.(FunctionCall).getTarget().getName() = "strncpy" }
+
+ override string getName() { result = this.(FunctionCall).getTarget().getName() }
+
+ override Expr getBuffer(string bufferDesc, int accessType) {
+ result = this.(FunctionCall).getArgument(0) and
+ bufferDesc = "destination buffer" and
+ accessType = 2
+ or
+ result = this.(FunctionCall).getArgument(1) and
+ bufferDesc = "source buffer" and
+ accessType = 2
+ }
+
+ override int getSize() {
+ result =
+ this.(FunctionCall).getArgument(2).getValue().toInt() *
+ getPointedSize(this.(FunctionCall).getTarget().getParameter(0).getType())
+ }
+}
+
+/**
+ * Calls to memccpy.
+ * memccpy(dest, src, c, n)
+ */
+class MemccpyBA extends BufferAccess {
+ MemccpyBA() { this.(FunctionCall).getTarget().getName() = "memccpy" }
+
+ override string getName() { result = this.(FunctionCall).getTarget().getName() }
+
+ override Expr getBuffer(string bufferDesc, int accessType) {
+ result = this.(FunctionCall).getArgument(0) and
+ bufferDesc = "destination buffer" and
+ accessType = 2
+ or
+ result = this.(FunctionCall).getArgument(1) and
+ bufferDesc = "source buffer" and
+ accessType = 2
+ }
+
+ override int getSize() {
+ result =
+ this.(FunctionCall).getArgument(3).getValue().toInt() *
+ getPointedSize(this.(FunctionCall).getTarget().getParameter(0).getType())
+ }
+}
+
+/**
+ * Calls to memcmp and similar functions.
+ * memcmp(buffer1, buffer2, num)
+ * wmemcmp(buffer1, buffer2, num)
+ * _memicmp(buffer1, buffer2, count)
+ * _memicmp_l(buffer1, buffer2, count, locale)
+ */
+class MemcmpBA extends BufferAccess {
+ MemcmpBA() {
+ this.(FunctionCall).getTarget().getName() = ["memcmp", "wmemcmp", "_memicmp", "_memicmp_l"]
+ }
+
+ override string getName() { result = this.(FunctionCall).getTarget().getName() }
+
+ override Expr getBuffer(string bufferDesc, int accessType) {
+ result = this.(FunctionCall).getArgument(0) and
+ bufferDesc = "first buffer" and
+ accessType = 2
+ or
+ result = this.(FunctionCall).getArgument(1) and
+ bufferDesc = "second buffer" and
+ accessType = 2
+ }
+
+ override int getSize() {
+ result =
+ this.(FunctionCall).getArgument(2).getValue().toInt() *
+ getPointedSize(this.(FunctionCall).getTarget().getParameter(0).getType())
+ }
+}
+
+/**
+ * Calls to swab and similar functions.
+ * swab(src, dest, num)
+ * _swab(src, dest, num)
+ */
+class SwabBA extends BufferAccess {
+ SwabBA() { this.(FunctionCall).getTarget().getName() = ["swab", "_swab"] }
+
+ override string getName() { result = this.(FunctionCall).getTarget().getName() }
+
+ override Expr getBuffer(string bufferDesc, int accessType) {
+ result = this.(FunctionCall).getArgument(0) and
+ bufferDesc = "source buffer" and
+ accessType = 1
+ or
+ result = this.(FunctionCall).getArgument(1) and
+ bufferDesc = "destination buffer" and
+ accessType = 1
+ }
+
+ override int getSize() {
+ result =
+ this.(FunctionCall).getArgument(2).getValue().toInt() *
+ getPointedSize(this.(FunctionCall).getTarget().getParameter(0).getType())
+ }
+}
+
+/**
+ * Calls to memset and similar functions.
+ * memset(dest, value, num)
+ * wmemset(dest, value, num)
+ */
+class MemsetBA extends BufferAccess {
+ MemsetBA() { this.(FunctionCall).getTarget().getName() = ["memset", "wmemset"] }
+
+ override string getName() { result = this.(FunctionCall).getTarget().getName() }
+
+ override Expr getBuffer(string bufferDesc, int accessType) {
+ result = this.(FunctionCall).getArgument(0) and
+ bufferDesc = "destination buffer" and
+ accessType = 1
+ }
+
+ override int getSize() {
+ result =
+ this.(FunctionCall).getArgument(2).getValue().toInt() *
+ getPointedSize(this.(FunctionCall).getTarget().getParameter(0).getType())
+ }
+}
+
+/**
+ * Calls to `RtlSecureZeroMemory`.
+ * RtlSecureZeroMemory(ptr, cnt)
+ */
+class ZeroMemoryBA extends BufferAccess {
+ ZeroMemoryBA() { this.(FunctionCall).getTarget().getName() = "RtlSecureZeroMemory" }
+
+ override string getName() { result = this.(FunctionCall).getTarget().getName() }
+
+ override Expr getBuffer(string bufferDesc, int accessType) {
+ result = this.(FunctionCall).getArgument(0) and
+ bufferDesc = "destination buffer" and
+ accessType = 1
+ }
+
+ override int getSize() { result = this.(FunctionCall).getArgument(1).getValue().toInt() }
+}
+
+/**
+ * Calls to memchr and similar functions.
+ * memchr(buffer, value, num)
+ * wmemchr(buffer, value, num)
+ */
+class MemchrBA extends BufferAccess {
+ MemchrBA() { this.(FunctionCall).getTarget().getName() = ["memchr", "wmemchr"] }
+
+ override string getName() { result = this.(FunctionCall).getTarget().getName() }
+
+ override Expr getBuffer(string bufferDesc, int accessType) {
+ result = this.(FunctionCall).getArgument(0) and
+ bufferDesc = "source buffer" and
+ accessType = 2
+ }
+
+ override int getSize() {
+ result =
+ this.(FunctionCall).getArgument(2).getValue().toInt() *
+ getPointedSize(this.(FunctionCall).getTarget().getParameter(0).getType())
+ }
+}
+
+/**
+ * Calls to fread.
+ * fread(buffer, size, number, file)
+ */
+class FreadBA extends BufferAccess {
+ FreadBA() { this.(FunctionCall).getTarget().getName() = "fread" }
+
+ override string getName() { result = this.(FunctionCall).getTarget().getName() }
+
+ override Expr getBuffer(string bufferDesc, int accessType) {
+ result = this.(FunctionCall).getArgument(0) and
+ bufferDesc = "destination buffer" and
+ accessType = 2
+ }
+
+ override int getSize() {
+ result =
+ this.(FunctionCall).getArgument(1).getValue().toInt() *
+ this.(FunctionCall).getArgument(2).getValue().toInt()
+ }
+}
+
+/**
+ * A array access on a buffer:
+ * buffer[ix]
+ * but not:
+ * &buffer[ix]
+ */
+class ArrayExprBA extends BufferAccess {
+ ArrayExprBA() {
+ exists(this.(ArrayExpr).getArrayOffset().getValue().toInt()) and
+ not exists(AddressOfExpr aoe | aoe.getAChild() = this) and
+ // exclude accesses in macro implementation of `strcmp`,
+ // which are carefully controlled but can look dangerous.
+ not exists(Macro m |
+ m.getName() = "strcmp" and
+ m.getAnInvocation().getAnExpandedElement() = this
+ )
+ }
+
+ override string getName() { result = "array indexing" }
+
+ override Expr getBuffer(string bufferDesc, int accessType) {
+ result = this.(ArrayExpr).getArrayBase() and
+ bufferDesc = "array" and
+ accessType = 3
+ }
+
+ override int getSize() {
+ // byte size of the buffer that would be required to support this
+ // access
+ result =
+ (1 + this.(ArrayExpr).getArrayOffset().getValue().toInt()) *
+ this.(ArrayExpr).getType().getSize()
+ }
+}
diff --git a/repo-tests/codeql/cpp/ql/lib/semmle/code/cpp/security/BufferWrite.qll b/repo-tests/codeql/cpp/ql/lib/semmle/code/cpp/security/BufferWrite.qll
new file mode 100644
index 00000000000..e5d892eb4cd
--- /dev/null
+++ b/repo-tests/codeql/cpp/ql/lib/semmle/code/cpp/security/BufferWrite.qll
@@ -0,0 +1,479 @@
+/**
+ * @name CWE-120
+ * @description Buffer Copy without Checking Size of Input ('Classic Buffer Overflow').
+ * @kind problem
+ * @problem.severity recommendation
+ */
+
+import cpp
+import semmle.code.cpp.commons.Alloc
+import semmle.code.cpp.commons.Buffer
+import semmle.code.cpp.commons.Scanf
+import semmle.code.cpp.models.implementations.Strcat
+import semmle.code.cpp.models.implementations.Strcpy
+
+/*
+ * --- BufferWrite framework ---
+ */
+
+/**
+ * An operation that writes a variable amount of data to a buffer
+ * (strcpy, strncat, sprintf etc).
+ *
+ * Note that there are two related class frameworks:
+ * - BufferWrite provides detailed coverage of null-terminated
+ * buffer write operations.
+ * - BufferAccess provides general coverage of buffer read and write
+ * operations whose size is either not data-dependent, or has an upper
+ * bound which is not data-dependent.
+ * This design has some overlaps between the two classes, for example
+ * the write of a 'strncpy'.
+ */
+abstract class BufferWrite extends Expr {
+ /*
+ * --- derived classes override these ---
+ */
+
+ /**
+ * Gets the (unspecified) type of the buffer this operation works
+ * with (for example `char *`).
+ */
+ abstract Type getBufferType();
+
+ /**
+ * Gets a data source of this operation (e.g. the source string,
+ * format string; not necessarily copied as-is).
+ */
+ Expr getASource() { none() }
+
+ /**
+ * Gets the destination buffer of this operation.
+ */
+ abstract Expr getDest();
+
+ /**
+ * Holds if the operation has an explicit parameter that limits the amount
+ * of data written (e.g. `strncpy` does, whereas `strcpy` does not); this
+ * is not the same as exists(getExplicitLimit()) because the limit may exist
+ * though it's value is unknown.
+ */
+ predicate hasExplicitLimit() { none() }
+
+ /**
+ * Gets the explicit limit of bytes copied by this operation, if it exists
+ * and it's value can be determined.
+ */
+ int getExplicitLimit() { none() }
+
+ /**
+ * Gets an upper bound to the amount of data that's being written (if one
+ * can be found).
+ */
+ int getMaxData() { none() }
+
+ /**
+ * Gets an upper bound to the amount of data that's being written (if one
+ * can be found), except that float to string conversions are assumed to be
+ * much smaller (8 bytes) than their true maximum length. This can be
+ * helpful in determining the cause of a buffer overflow issue.
+ */
+ int getMaxDataLimited() { result = getMaxData() }
+
+ /**
+ * Gets the size of a single character of the type this
+ * operation works with, in bytes.
+ */
+ int getCharSize() {
+ result = getBufferType().(PointerType).getBaseType().getSize() or
+ result = getBufferType().(ArrayType).getBaseType().getSize()
+ }
+
+ /**
+ * Gets a description of this buffer write.
+ */
+ string getBWDesc() { result = toString() }
+}
+
+/**
+ * A `BufferWrite` that is also a `FunctionCall` (most cases).
+ */
+abstract class BufferWriteCall extends BufferWrite, FunctionCall { }
+
+/*
+ * --- BufferWrite classes ---
+ */
+
+/**
+ * A call to a variant of `strcpy`.
+ */
+class StrCopyBW extends BufferWriteCall {
+ StrcpyFunction f;
+
+ StrCopyBW() { getTarget() = f.(TopLevelFunction) }
+
+ /**
+ * Gets the index of the parameter that is the maximum size of the copy (in characters).
+ */
+ int getParamSize() { result = f.getParamSize() }
+
+ /**
+ * Gets the index of the parameter that is the source of the copy.
+ */
+ int getParamSrc() { result = f.getParamSrc() }
+
+ override Type getBufferType() {
+ result = this.getTarget().getParameter(getParamSrc()).getUnspecifiedType()
+ }
+
+ override Expr getASource() { result = getArgument(getParamSrc()) }
+
+ override Expr getDest() { result = getArgument(f.getParamDest()) }
+
+ override predicate hasExplicitLimit() { exists(getParamSize()) }
+
+ override int getExplicitLimit() {
+ result = getArgument(getParamSize()).getValue().toInt() * getCharSize()
+ }
+
+ override int getMaxData() {
+ result = getArgument(getParamSrc()).(AnalysedString).getMaxLength() * getCharSize()
+ }
+}
+
+/**
+ * A call to a variant of `strcat`.
+ */
+class StrCatBW extends BufferWriteCall {
+ StrcatFunction f;
+
+ StrCatBW() { getTarget() = f.(TopLevelFunction) }
+
+ /**
+ * Gets the index of the parameter that is the maximum size of the copy (in characters).
+ */
+ int getParamSize() { result = f.getParamSize() }
+
+ /**
+ * Gets the index of the parameter that is the source of the copy.
+ */
+ int getParamSrc() { result = f.getParamSrc() }
+
+ override Type getBufferType() {
+ result = this.getTarget().getParameter(getParamSrc()).getUnspecifiedType()
+ }
+
+ override Expr getASource() { result = getArgument(getParamSrc()) }
+
+ override Expr getDest() { result = getArgument(f.getParamDest()) }
+
+ override predicate hasExplicitLimit() { exists(getParamSize()) }
+
+ override int getExplicitLimit() {
+ result = getArgument(getParamSize()).getValue().toInt() * getCharSize()
+ }
+
+ override int getMaxData() {
+ result = getArgument(getParamSrc()).(AnalysedString).getMaxLength() * getCharSize()
+ }
+}
+
+/**
+ * A call to a variant of `sprintf`.
+ */
+class SprintfBW extends BufferWriteCall {
+ FormattingFunction f;
+
+ SprintfBW() {
+ exists(string name | f = getTarget().(TopLevelFunction) and name = f.getName() |
+ /*
+ * C sprintf variants:
+ */
+
+ // sprintf(dst, format, args...)
+ name = "sprintf"
+ or
+ // vsprintf(dst, format, va_list)
+ name = "vsprintf"
+ or
+ // wsprintf(dst, format, args...)
+ name = "wsprintf"
+ or
+ // vwsprintf(dst, format, va_list)
+ name = "vwsprintf"
+ or
+ /*
+ * Microsoft sprintf variants:
+ */
+
+ // _sprintf_l(dst, format, locale, args...)
+ name.regexpMatch("_sprintf_l")
+ or
+ // _vsprintf_l(dst, format, locale, va_list))
+ name.regexpMatch("_vsprintf_l")
+ or
+ // __swprintf_l(dst, format, locale, args...)
+ name.regexpMatch("__swprintf_l")
+ or
+ // __vswprintf_l(dst, format, locale, va_list)
+ name.regexpMatch("__vswprintf_l")
+ )
+ }
+
+ override Type getBufferType() {
+ result = f.getParameter(f.getFormatParameterIndex()).getUnspecifiedType()
+ }
+
+ override Expr getASource() {
+ result = this.(FormattingFunctionCall).getFormat()
+ or
+ result = this.(FormattingFunctionCall).getFormatArgument(_)
+ }
+
+ override Expr getDest() { result = getArgument(f.getOutputParameterIndex(false)) }
+
+ override int getMaxData() {
+ exists(FormatLiteral fl |
+ fl = this.(FormattingFunctionCall).getFormat() and
+ result = fl.getMaxConvertedLength() * getCharSize()
+ )
+ }
+
+ override int getMaxDataLimited() {
+ exists(FormatLiteral fl |
+ fl = this.(FormattingFunctionCall).getFormat() and
+ result = fl.getMaxConvertedLengthLimited() * getCharSize()
+ )
+ }
+}
+
+/**
+ * A call to a variant of `snprintf`.
+ */
+class SnprintfBW extends BufferWriteCall {
+ SnprintfBW() {
+ exists(TopLevelFunction fn, string name | fn = getTarget() and name = fn.getName() |
+ /*
+ * C snprintf variants:
+ */
+
+ // snprintf(dst, max_amount, format, args...)
+ name = "snprintf"
+ or
+ // vsnprintf(dst, max_amount, format, va_list)
+ name = "vsnprintf"
+ or
+ // swprintf(dst, max_amount, format, args...)
+ name = "swprintf"
+ or
+ // vswprintf(dst, max_amount, format, va_list)
+ name = "vswprintf"
+ or
+ /*
+ * Microsoft snprintf variants:
+ */
+
+ // sprintf_s(dst, max_amount, format, locale, args...)
+ name = "sprintf_s"
+ or
+ // vsprintf_s(dst, max_amount, format, va_list)
+ name = "vsprintf_s"
+ or
+ // swprintf_s(dst, max_amount, format, args...)
+ name = "swprintf_s"
+ or
+ // vswprintf_s(dst, max_amount, format, va_list)
+ name = "vswprintf_s"
+ or
+ // Microsoft snprintf variants with '_':
+ // _sprintf_s_l(dst, max_amount, format, locale, args...)
+ // _swprintf_l(dst, max_amount, format, locale, args...)
+ // _swprintf_s_l(dst, max_amount, format, locale, args...)
+ // _snprintf(dst, max_amount, format, args...)
+ // _snprintf_l(dst, max_amount, format, locale, args...)
+ // _snwprintf(dst, max_amount, format, args...)
+ // _snwprintf_l(buffer, max_amount, format, locale, args...)
+ // _vsprintf_s_l(dst, max_amount, format, locale, va_list)
+ // _vsprintf_p(dst, max_amount, format, va_list)
+ // _vsprintf_p_l(dst, max_amount, format, locale, va_list)
+ // _vswprintf_l(dst, max_amount, format, locale, va_list)
+ // _vswprintf_s_l(buffer, max_amount, format, locale, va_list)
+ // _vswprintf_p(dst, max_amount, format, va_list)
+ // _vswprintf_p_l(dst, max_amount, format, locale, va_list)
+ // _vsnprintf(dst, max_amount, format, va_list)
+ // _vsnprintf_l(dst, max_amount, format, locale, va_list)
+ // _vsnwprintf(dst, max_amount, format, va_list)
+ // _vsnwprintf_l(dst, max_amount, format, locale, va_list)
+ name.regexpMatch("_v?sn?w?printf(_s)?(_p)?(_l)?") and
+ not this instanceof SprintfBW
+ )
+ }
+
+ /**
+ * Gets the index of the parameter that is the size of the destination (in characters).
+ */
+ int getParamSize() { result = 1 }
+
+ override Type getBufferType() {
+ exists(FormattingFunction f |
+ f = this.getTarget() and
+ result = f.getParameter(f.getFormatParameterIndex()).getUnspecifiedType()
+ )
+ }
+
+ override Expr getASource() {
+ result = this.(FormattingFunctionCall).getFormat()
+ or
+ result = this.(FormattingFunctionCall).getFormatArgument(_)
+ }
+
+ override Expr getDest() { result = getArgument(0) }
+
+ override predicate hasExplicitLimit() { exists(getParamSize()) }
+
+ override int getExplicitLimit() {
+ result = getArgument(getParamSize()).getValue().toInt() * getCharSize()
+ }
+
+ override int getMaxData() {
+ exists(FormatLiteral fl |
+ fl = this.(FormattingFunctionCall).getFormat() and
+ result = fl.getMaxConvertedLength() * getCharSize()
+ )
+ }
+
+ override int getMaxDataLimited() {
+ exists(FormatLiteral fl |
+ fl = this.(FormattingFunctionCall).getFormat() and
+ result = fl.getMaxConvertedLengthLimited() * getCharSize()
+ )
+ }
+}
+
+/**
+ * A call to a variant of `gets`.
+ */
+class GetsBW extends BufferWriteCall {
+ GetsBW() {
+ getTarget().(TopLevelFunction).getName() =
+ [
+ "gets", // gets(dst)
+ "fgets", // fgets(dst, max_amount, src_stream)
+ "fgetws" // fgetws(dst, max_amount, src_stream)
+ ]
+ }
+
+ /**
+ * Gets the index of the parameter that is the maximum number of characters to be read.
+ */
+ int getParamSize() { exists(getArgument(1)) and result = 1 }
+
+ override Type getBufferType() { result = this.getTarget().getParameter(0).getUnspecifiedType() }
+
+ override Expr getASource() {
+ if exists(getArgument(2))
+ then result = getArgument(2)
+ else
+ // the source is input inside the 'gets' call itself
+ result = this
+ }
+
+ override Expr getDest() { result = getArgument(0) }
+
+ override predicate hasExplicitLimit() { exists(getParamSize()) }
+
+ override int getExplicitLimit() {
+ result = getArgument(getParamSize()).getValue().toInt() * getCharSize()
+ }
+}
+
+/**
+ * A string that is written by a `scanf`-like function.
+ */
+class ScanfBW extends BufferWrite {
+ ScanfBW() {
+ exists(ScanfFunctionCall fc, ScanfFormatLiteral fl, int arg, int args_pos |
+ this = fc.getArgument(arg) and
+ args_pos = fc.getTarget().getNumberOfParameters() and
+ arg >= args_pos and
+ fl = fc.getFormat() and
+ fl.getConversionChar(arg - args_pos) = "s"
+ )
+ }
+
+ /**
+ * Gets the index of the parameter that is the first format argument.
+ */
+ int getParamArgs() {
+ exists(FunctionCall fc |
+ this = fc.getArgument(_) and
+ result = fc.getTarget().getNumberOfParameters()
+ )
+ }
+
+ override Type getBufferType() {
+ exists(ScanfFunction f, ScanfFunctionCall fc |
+ this = fc.getArgument(_) and
+ f = fc.getTarget() and
+ result = f.getParameter(f.getFormatParameterIndex()).getUnspecifiedType()
+ )
+ }
+
+ override Expr getASource() {
+ exists(ScanfFunctionCall fc |
+ this = fc.getArgument(_) and
+ (
+ // inputs are: the format string, input or the argument itself (if there's no explicit input)
+ result = fc.getFormat()
+ or
+ result = fc.getArgument(fc.getInputParameterIndex())
+ or
+ not exists(fc.getInputParameterIndex()) and result = this
+ )
+ )
+ }
+
+ override Expr getDest() { result = this }
+
+ override int getMaxData() {
+ exists(ScanfFunctionCall fc, ScanfFormatLiteral fl, int arg |
+ this = fc.getArgument(arg) and
+ fl = fc.getFormat() and
+ result = (fl.getMaxConvertedLength(arg - getParamArgs()) + 1) * getCharSize() // +1 is for the terminating null
+ )
+ }
+
+ override string getBWDesc() {
+ exists(FunctionCall fc |
+ this = fc.getArgument(_) and
+ result = fc.getTarget().getName() + " string argument"
+ )
+ }
+}
+
+/**
+ * A detected definition of PATH_MAX
+ */
+private int path_max() {
+ result = max(Macro macro | macro.getName() = "PATH_MAX" | macro.getBody().toInt())
+}
+
+/**
+ * A call to `realpath`.
+ */
+class RealpathBW extends BufferWriteCall {
+ RealpathBW() {
+ exists(path_max()) and // Ignore realpath() calls if PATH_MAX cannot be determined
+ getTarget().hasGlobalName("realpath") // realpath(path, resolved_path);
+ }
+
+ override Type getBufferType() { result = this.getTarget().getParameter(0).getUnspecifiedType() }
+
+ override Expr getDest() { result = getArgument(1) }
+
+ override Expr getASource() { result = getArgument(0) }
+
+ override int getMaxData() {
+ result = path_max() and
+ this = this // Suppress a compiler warning
+ }
+}
diff --git a/repo-tests/codeql/cpp/ql/lib/semmle/code/cpp/security/CommandExecution.qll b/repo-tests/codeql/cpp/ql/lib/semmle/code/cpp/security/CommandExecution.qll
new file mode 100644
index 00000000000..f8f7c6c476f
--- /dev/null
+++ b/repo-tests/codeql/cpp/ql/lib/semmle/code/cpp/security/CommandExecution.qll
@@ -0,0 +1,198 @@
+/** Provides definitions related to execution of commands */
+
+import cpp
+import semmle.code.cpp.security.FunctionWithWrappers
+import semmle.code.cpp.models.interfaces.SideEffect
+import semmle.code.cpp.models.interfaces.Alias
+import semmle.code.cpp.models.interfaces.CommandExecution
+
+/**
+ * A function for running a command using a command interpreter.
+ */
+class SystemFunction extends FunctionWithWrappers instanceof CommandExecutionFunction {
+ override predicate interestingArg(int arg) {
+ exists(FunctionInput input |
+ this.(CommandExecutionFunction).hasCommandArgument(input) and
+ (
+ input.isParameterDerefOrQualifierObject(arg) or
+ input.isParameterOrQualifierAddress(arg)
+ )
+ )
+ }
+}
+
+/**
+ * A function for running a command via varargs. Note that, at the time
+ * of writing, FunctionWithWrappers doesn't really support varargs
+ * arguments, because it requires a finite version of interestingArg().
+ */
+class VarargsExecFunctionCall extends FunctionCall {
+ VarargsExecFunctionCall() {
+ getTarget().hasGlobalName("execl") or
+ getTarget().hasGlobalName("execle") or
+ getTarget().hasGlobalName("execlp") or
+ // Windows
+ getTarget().hasGlobalName("_execl") or
+ getTarget().hasGlobalName("_execle") or
+ getTarget().hasGlobalName("_execlp") or
+ getTarget().hasGlobalName("_execlpe") or
+ getTarget().hasGlobalName("_spawnl") or
+ getTarget().hasGlobalName("_spawnle") or
+ getTarget().hasGlobalName("_spawnlp") or
+ getTarget().hasGlobalName("_spawnlpe") or
+ getTarget().hasGlobalName("_wexecl") or
+ getTarget().hasGlobalName("_wexecle") or
+ getTarget().hasGlobalName("_wexeclp") or
+ getTarget().hasGlobalName("_wexeclpe") or
+ getTarget().hasGlobalName("_wspawnl") or
+ getTarget().hasGlobalName("_wspawnle") or
+ getTarget().hasGlobalName("_wspawnlp") or
+ getTarget().hasGlobalName("_wspawnlpe")
+ }
+
+ /** Whether the last argument to the function is an environment pointer */
+ predicate hasEnvironmentArgument() {
+ getTarget().hasGlobalName("execle") or
+ getTarget().hasGlobalName("_execle") or
+ getTarget().hasGlobalName("_execlpe") or
+ getTarget().hasGlobalName("_wexecle") or
+ getTarget().hasGlobalName("_wexeclpe")
+ }
+
+ /**
+ * The arguments passed to the command. The 0th such argument is conventionally
+ * the name of the command.
+ */
+ Expr getCommandArgument(int idx) {
+ exists(int underlyingIdx |
+ result = getArgument(underlyingIdx) and
+ underlyingIdx > getCommandIdx() and
+ (
+ underlyingIdx < getNumberOfArguments() - 1 or
+ not hasEnvironmentArgument()
+ ) and
+ idx = underlyingIdx - getCommandIdx() - 1
+ )
+ }
+
+ /** The expression denoting the program to execute */
+ Expr getCommand() { result = getArgument(getCommandIdx()) }
+
+ /**
+ * The index of the command. The spawn variants start with a mode, whereas
+ * all the other ones start with the command.
+ */
+ private int getCommandIdx() {
+ if
+ getTarget().getName().matches("\\_spawn%") or
+ getTarget().getName().matches("\\_wspawn%")
+ then result = 1
+ else result = 0
+ }
+}
+
+/**
+ * A function for running a command using an array of arguments. Note that
+ * FunctionWithWrappers does not support tracking multiple interesting
+ * arguments all the way to the call site.
+ */
+class ArrayExecFunctionCall extends FunctionCall {
+ ArrayExecFunctionCall() {
+ getTarget().hasGlobalName("execv") or
+ getTarget().hasGlobalName("execvp") or
+ getTarget().hasGlobalName("execvpe") or
+ getTarget().hasGlobalName("execve") or
+ getTarget().hasGlobalName("fexecve") or
+ // Windows variants
+ getTarget().hasGlobalName("_execv") or
+ getTarget().hasGlobalName("_execve") or
+ getTarget().hasGlobalName("_execvp") or
+ getTarget().hasGlobalName("_execvpe") or
+ getTarget().hasGlobalName("_spawnv") or
+ getTarget().hasGlobalName("_spawnve") or
+ getTarget().hasGlobalName("_spawnvp") or
+ getTarget().hasGlobalName("_spawnvpe") or
+ getTarget().hasGlobalName("_wexecv") or
+ getTarget().hasGlobalName("_wexecve") or
+ getTarget().hasGlobalName("_wexecvp") or
+ getTarget().hasGlobalName("_wexecvpe") or
+ getTarget().hasGlobalName("_wspawnv") or
+ getTarget().hasGlobalName("_wspawnve") or
+ getTarget().hasGlobalName("_wspawnvp") or
+ getTarget().hasGlobalName("_wspawnvpe")
+ }
+
+ /** The argument with the array of command arguments */
+ Expr getArrayArgument() { result = getArgument(getCommandIdx() + 1) }
+
+ /** The expression denoting the program to execute */
+ Expr getCommand() { result = getArgument(getCommandIdx()) }
+
+ /**
+ * The index of the command. The spawn variants start with a mode, whereas
+ * all the other ones start with the command.
+ */
+ private int getCommandIdx() {
+ if
+ getTarget().getName().matches("\\_spawn%") or
+ getTarget().getName().matches("\\_wspawn%")
+ then result = 1
+ else result = 0
+ }
+}
+
+/**
+ * The name of a shell and the flag used to preface a command that should be parsed. Public
+ * for testing purposes.
+ */
+predicate shellCommandPreface(string cmd, string flag) {
+ cmd = ["sh", "/bin/sh", "bash", "/bin/bash"] and
+ flag = "-c"
+ or
+ cmd =
+ [
+ "cmd", "cmd.exe", "CMD", "CMD.EXE", "%WINDIR%\\system32\\cmd.exe" // used in Juliet tests
+ ] and
+ flag = ["/c", "/C"]
+}
+
+/**
+ * A command that is used as a command, or component of a command,
+ * that will be executed by a general-purpose command interpreter
+ * such as sh or cmd.exe.
+ */
+predicate shellCommand(Expr command, string callChain) {
+ // A call to a function like system()
+ exists(SystemFunction systemFunction |
+ systemFunction.outermostWrapperFunctionCall(command, callChain)
+ )
+ or
+ // A call to a function like execl(), passing "sh", then "-c", and then a command.
+ exists(
+ VarargsExecFunctionCall execCall, StringLiteral commandInterpreter, StringLiteral flag,
+ int commandIdx
+ |
+ callChain = execCall.getTarget().getName() and
+ execCall.getCommand() = commandInterpreter and
+ execCall.getCommandArgument(1) = flag and
+ execCall.getCommandArgument(commandIdx) = command and
+ commandIdx > 1 and
+ shellCommandPreface(commandInterpreter.getValue(), flag.getValue())
+ )
+ or
+ // A call to a function like execv(), where the array being passed is
+ // initialized to an array literal
+ exists(
+ ArrayExecFunctionCall execCall, StringLiteral commandInterpreter, Variable arrayVariable,
+ AggregateLiteral arrayInitializer, StringLiteral flag, int idx
+ |
+ callChain = execCall.getTarget().getName() and
+ execCall.getCommand() = commandInterpreter and
+ execCall.getArrayArgument() = arrayVariable.getAnAccess() and
+ arrayVariable.getInitializer().getExpr() = arrayInitializer and
+ arrayInitializer.getChild(1) = flag and
+ arrayInitializer.getChild(idx) = command and
+ shellCommandPreface(commandInterpreter.getValue(), flag.getValue()) and
+ idx > 1
+ )
+}
diff --git a/repo-tests/codeql/cpp/ql/lib/semmle/code/cpp/security/Encryption.qll b/repo-tests/codeql/cpp/ql/lib/semmle/code/cpp/security/Encryption.qll
new file mode 100644
index 00000000000..55ef606483c
--- /dev/null
+++ b/repo-tests/codeql/cpp/ql/lib/semmle/code/cpp/security/Encryption.qll
@@ -0,0 +1,114 @@
+/**
+ * Provides predicates relating to encryption in C and C++.
+ */
+
+import cpp
+
+/**
+ * Gets the name of an algorithm that is known to be insecure.
+ */
+string getAnInsecureAlgorithmName() {
+ result =
+ [
+ "DES", "RC2", "RC4", "RC5", "ARCFOUR", // ARCFOUR is a variant of RC4
+ "3DES", "DES3" // also appears separated, e.g. "TRIPLE-DES", which will be matched as "DES".
+ ]
+}
+
+/**
+ * Gets the name of an algorithm that is known to be secure.
+ */
+string getASecureAlgorithmName() {
+ result = ["RSA", "SHA256", "CCM", "GCM", "AES", "Blowfish", "ECIES"]
+}
+
+/**
+ * Gets the name of a hash algorithm that is insecure if it is being used for
+ * encryption (but it is hard to know when that is happening).
+ */
+string getAnInsecureHashAlgorithmName() { result = ["SHA1", "MD5"] }
+
+/**
+ * Gets the regular expression used for matching strings that look like they
+ * contain an algorithm that is known to be insecure.
+ *
+ * Consider using `isInsecureEncryption` rather than accessing this regular
+ * expression directly.
+ */
+string getInsecureAlgorithmRegex() {
+ result =
+ // algorithms usually appear in names surrounded by characters that are not
+ // alphabetical characters in the same case or numerical digits. This
+ // handles the upper case:
+ "(^|.*[^A-Z0-9])(" + strictconcat(getAnInsecureAlgorithmName(), "|") + ")([^A-Z0-9].*|$)" + "|" +
+ // for lowercase, we want to be careful to avoid being confused by
+ //camelCase, hence we require two preceding uppercase letters to be
+ // sure of a case switch (or a preceding non-alphabetic, non-numeric
+ // character).
+ "(^|.*[A-Z]{2}|.*[^a-zA-Z0-9])(" +
+ strictconcat(getAnInsecureAlgorithmName().toLowerCase(), "|") + ")([^a-z0-9].*|$)"
+}
+
+/**
+ * Holds if `name` looks like it might be related to operations with an
+ * insecure encyption algorithm.
+ */
+bindingset[name]
+predicate isInsecureEncryption(string name) { name.regexpMatch(getInsecureAlgorithmRegex()) }
+
+/**
+ * Holds if there is additional evidence that `name` looks like it might be
+ * related to operations with an encyption algorithm, besides the name of a
+ * specific algorithm. This can be used in conjuction with
+ * `isInsecureEncryption` to produce a stronger heuristic.
+ */
+bindingset[name]
+predicate isEncryptionAdditionalEvidence(string name) {
+ name.toUpperCase().matches("%" + ["CRYPT", "CODE", "CODING", "CBC", "KEY", "CIPHER", "MAC"] + "%")
+}
+
+/**
+ * Gets a regular expression for matching strings that look like they
+ * contain an algorithm that is known to be secure.
+ */
+string getSecureAlgorithmRegex() {
+ result =
+ // algorithms usually appear in names surrounded by characters that are not
+ // alphabetical characters in the same case or numerical digits. This
+ // handles the upper case:
+ "(^|.*[^A-Z0-9])(" + strictconcat(getASecureAlgorithmName(), "|") + ")([^A-Z0-9].*|$)" + "|" +
+ // for lowercase, we want to be careful to avoid being confused by
+ //camelCase, hence we require two preceding uppercase letters to be
+ // sure of a case switch (or a preceding non-alphabetic, non-numeric
+ // character).
+ "(^|.*[A-Z]{2}|.*[^a-zA-Z0-9])(" + strictconcat(getASecureAlgorithmName().toLowerCase(), "|") +
+ ")([^a-z0-9].*|$)"
+}
+
+/**
+ * DEPRECATED: Terminology has been updated. Use `getAnInsecureAlgorithmName()`
+ * instead.
+ */
+deprecated string algorithmBlacklist() { result = getAnInsecureAlgorithmName() }
+
+/**
+ * DEPRECATED: Terminology has been updated. Use
+ * `getAnInsecureHashAlgorithmName()` instead.
+ */
+deprecated string hashAlgorithmBlacklist() { result = getAnInsecureHashAlgorithmName() }
+
+/**
+ * DEPRECATED: Terminology has been updated. Use `getInsecureAlgorithmRegex()` instead.
+ */
+deprecated string algorithmBlacklistRegex() { result = getInsecureAlgorithmRegex() }
+
+/**
+ * DEPRECATED: Terminology has been updated. Use `getASecureAlgorithmName()`
+ * instead.
+ */
+deprecated string algorithmWhitelist() { result = getASecureAlgorithmName() }
+
+/**
+ * DEPRECATED: Terminology has been updated. Use `getSecureAlgorithmRegex()` instead.
+ */
+deprecated string algorithmWhitelistRegex() { result = getSecureAlgorithmRegex() }
diff --git a/repo-tests/codeql/cpp/ql/lib/semmle/code/cpp/security/FileWrite.qll b/repo-tests/codeql/cpp/ql/lib/semmle/code/cpp/security/FileWrite.qll
new file mode 100644
index 00000000000..7c3d893b471
--- /dev/null
+++ b/repo-tests/codeql/cpp/ql/lib/semmle/code/cpp/security/FileWrite.qll
@@ -0,0 +1,178 @@
+/**
+ * Provides classes for modeling writing of data to files through various standard mechanisms such as `fprintf`, `fwrite` and `operator<<`.
+ */
+
+import cpp
+
+/**
+ * A function call that writes to a file.
+ */
+class FileWrite extends Expr {
+ FileWrite() { fileWrite(this, _, _) }
+
+ /**
+ * Gets a source expression of this write.
+ */
+ Expr getASource() { fileWrite(this, result, _) }
+
+ /**
+ * Gets the expression for the object being written to.
+ */
+ Expr getDest() { fileWrite(this, _, result) }
+
+ /**
+ * Gets the conversion character for this write, if it exists and is known. For example in the following code the write of `value1` has conversion character `"s"`, whereas the write of `value2` has no conversion specifier.
+ * ```
+ * fprintf(file, "%s", value1);
+ * stream << value2;
+ * ```
+ */
+ string getSourceConvChar(Expr source) { fileWriteWithConvChar(this, source, result) }
+}
+
+/**
+ * A `std::basic_ostream` class, or something that can be used
+ * as one.
+ */
+class BasicOStreamClass extends Type {
+ BasicOStreamClass() {
+ this.(Class).getName().matches("basic\\_ostream%")
+ or
+ this.getUnspecifiedType() instanceof BasicOStreamClass
+ or
+ this.(Class).getABaseClass() instanceof BasicOStreamClass
+ or
+ this.(ReferenceType).getBaseType() instanceof BasicOStreamClass
+ }
+}
+
+/**
+ * A call to a member of `std::basic_ostream`, or something related,
+ * or a call with one of those objects as the first parameter.
+ */
+class BasicOStreamCall extends FunctionCall {
+ BasicOStreamCall() {
+ if getTarget() instanceof MemberFunction
+ then getQualifier().getType() instanceof BasicOStreamClass
+ else getArgument(0).getType() instanceof BasicOStreamClass
+ }
+}
+
+/**
+ * Output by a function that can be chained, such as `operator<<`.
+ */
+abstract class ChainedOutputCall extends BasicOStreamCall {
+ /**
+ * Gets the source expression of this output.
+ */
+ abstract Expr getSource();
+
+ /**
+ * Gets the immediate destination expression of this output.
+ */
+ abstract Expr getDest();
+
+ /**
+ * Gets the destination at the far left-hand end of the output chain.
+ */
+ Expr getEndDest() {
+ // recurse into the destination
+ result = getDest().(ChainedOutputCall).getEndDest()
+ or
+ // or return something other than a ChainedOutputCall
+ result = getDest() and
+ not result instanceof ChainedOutputCall
+ }
+}
+
+/**
+ * A call to `operator<<` on an output stream.
+ */
+class OperatorLShiftCall extends ChainedOutputCall {
+ OperatorLShiftCall() { getTarget().(Operator).hasName("operator<<") }
+
+ override Expr getSource() {
+ if getTarget() instanceof MemberFunction
+ then result = getArgument(0)
+ else result = getArgument(1)
+ }
+
+ override Expr getDest() {
+ if getTarget() instanceof MemberFunction
+ then result = getQualifier()
+ else result = getArgument(0)
+ }
+}
+
+/**
+ * A call to 'put'.
+ */
+class PutFunctionCall extends ChainedOutputCall {
+ PutFunctionCall() { getTarget().(MemberFunction).hasName("put") }
+
+ override Expr getSource() { result = getArgument(0) }
+
+ override Expr getDest() { result = getQualifier() }
+}
+
+/**
+ * A call to 'write'.
+ */
+class WriteFunctionCall extends ChainedOutputCall {
+ WriteFunctionCall() { getTarget().(MemberFunction).hasName("write") }
+
+ override Expr getSource() { result = getArgument(0) }
+
+ override Expr getDest() { result = getQualifier() }
+}
+
+/**
+ * Whether the function call is a call to `operator<<` or a similar function, that eventually starts at the given file stream.
+ */
+private predicate fileStreamChain(ChainedOutputCall out, Expr source, Expr dest) {
+ source = out.getSource() and
+ dest = out.getEndDest() and
+ dest.getUnderlyingType().(Class).getSimpleName() = ["basic_ofstream", "basic_fstream"]
+}
+
+/**
+ * Whether the function call is a write to file 'dest' from 'source'.
+ */
+private predicate fileWrite(Call write, Expr source, Expr dest) {
+ exists(Function f, int s, int d |
+ f = write.getTarget() and source = write.getArgument(s) and dest = write.getArgument(d)
+ |
+ exists(string name | f.hasGlobalOrStdName(name) |
+ // named functions
+ name = "fwrite" and s = 0 and d = 3
+ or
+ name = ["fputs", "fputws", "fputc", "fputwc", "putc", "putwc", "putw"] and
+ s = 0 and
+ d = 1
+ )
+ or
+ // fprintf
+ s >= f.(FormattingFunction).getFormatParameterIndex() and
+ d = f.(FormattingFunction).getOutputParameterIndex(true)
+ )
+ or
+ // file stream using '<<', 'put' or 'write'
+ fileStreamChain(write, source, dest)
+}
+
+/**
+ * Whether the function call is a write to a file from 'source' with
+ * conversion character 'conv'. Does not hold if there isn't a conversion
+ * character, or if it is unknown (for example the format string is not a
+ * constant).
+ */
+private predicate fileWriteWithConvChar(FormattingFunctionCall ffc, Expr source, string conv) {
+ // fprintf
+ exists(FormattingFunction f, int n |
+ f = ffc.getTarget() and
+ source = ffc.getFormatArgument(n)
+ |
+ exists(f.getOutputParameterIndex(true)) and
+ conv = ffc.(FormattingFunctionCall).getFormat().(FormatLiteral).getConversionChar(n)
+ )
+}
diff --git a/repo-tests/codeql/cpp/ql/lib/semmle/code/cpp/security/FlowSources.qll b/repo-tests/codeql/cpp/ql/lib/semmle/code/cpp/security/FlowSources.qll
new file mode 100644
index 00000000000..b080651951f
--- /dev/null
+++ b/repo-tests/codeql/cpp/ql/lib/semmle/code/cpp/security/FlowSources.qll
@@ -0,0 +1,128 @@
+/**
+ * Provides classes representing various flow sources for taint tracking.
+ */
+
+import cpp
+import semmle.code.cpp.ir.dataflow.DataFlow
+private import semmle.code.cpp.ir.IR
+import semmle.code.cpp.models.interfaces.FlowSource
+
+/** A data flow source of user input, whether local or remote. */
+abstract class FlowSource extends DataFlow::Node {
+ /** Gets a string that describes the type of this flow source. */
+ abstract string getSourceType();
+}
+
+/** A data flow source of remote user input. */
+abstract class RemoteFlowSource extends FlowSource { }
+
+/** A data flow source of local user input. */
+abstract class LocalFlowSource extends FlowSource { }
+
+private class RemoteReturnSource extends RemoteFlowSource {
+ string sourceType;
+
+ RemoteReturnSource() {
+ exists(RemoteFlowSourceFunction func, CallInstruction instr, FunctionOutput output |
+ asInstruction() = instr and
+ instr.getStaticCallTarget() = func and
+ func.hasRemoteFlowSource(output, sourceType) and
+ (
+ output.isReturnValue()
+ or
+ output.isReturnValueDeref()
+ )
+ )
+ }
+
+ override string getSourceType() { result = sourceType }
+}
+
+private class RemoteParameterSource extends RemoteFlowSource {
+ string sourceType;
+
+ RemoteParameterSource() {
+ exists(RemoteFlowSourceFunction func, WriteSideEffectInstruction instr, FunctionOutput output |
+ asInstruction() = instr and
+ instr.getPrimaryInstruction().(CallInstruction).getStaticCallTarget() = func and
+ func.hasRemoteFlowSource(output, sourceType) and
+ output.isParameterDerefOrQualifierObject(instr.getIndex())
+ )
+ }
+
+ override string getSourceType() { result = sourceType }
+}
+
+private class LocalReturnSource extends LocalFlowSource {
+ string sourceType;
+
+ LocalReturnSource() {
+ exists(LocalFlowSourceFunction func, CallInstruction instr, FunctionOutput output |
+ asInstruction() = instr and
+ instr.getStaticCallTarget() = func and
+ func.hasLocalFlowSource(output, sourceType) and
+ (
+ output.isReturnValue()
+ or
+ output.isReturnValueDeref()
+ )
+ )
+ }
+
+ override string getSourceType() { result = sourceType }
+}
+
+private class LocalParameterSource extends LocalFlowSource {
+ string sourceType;
+
+ LocalParameterSource() {
+ exists(LocalFlowSourceFunction func, WriteSideEffectInstruction instr, FunctionOutput output |
+ asInstruction() = instr and
+ instr.getPrimaryInstruction().(CallInstruction).getStaticCallTarget() = func and
+ func.hasLocalFlowSource(output, sourceType) and
+ output.isParameterDerefOrQualifierObject(instr.getIndex())
+ )
+ }
+
+ override string getSourceType() { result = sourceType }
+}
+
+private class ArgvSource extends LocalFlowSource {
+ ArgvSource() {
+ exists(Parameter argv |
+ argv.hasName("argv") and
+ argv.getFunction().hasGlobalName("main") and
+ this.asExpr() = argv.getAnAccess()
+ )
+ }
+
+ override string getSourceType() { result = "a command-line argument" }
+}
+
+/** A remote data flow sink. */
+abstract class RemoteFlowSink extends DataFlow::Node {
+ /** Gets a string that describes the type of this flow sink. */
+ abstract string getSinkType();
+}
+
+private class RemoteParameterSink extends RemoteFlowSink {
+ string sourceType;
+
+ RemoteParameterSink() {
+ exists(RemoteFlowSinkFunction func, FunctionInput input, CallInstruction call, int index |
+ func.hasRemoteFlowSink(input, sourceType) and call.getStaticCallTarget() = func
+ |
+ exists(ReadSideEffectInstruction read |
+ call = read.getPrimaryInstruction() and
+ read.getIndex() = index and
+ this.asOperand() = read.getSideEffectOperand() and
+ input.isParameterDerefOrQualifierObject(index)
+ )
+ or
+ input.isParameterOrQualifierAddress(index) and
+ this.asOperand() = call.getArgumentOperand(index)
+ )
+ }
+
+ override string getSinkType() { result = sourceType }
+}
diff --git a/repo-tests/codeql/cpp/ql/lib/semmle/code/cpp/security/FunctionWithWrappers.qll b/repo-tests/codeql/cpp/ql/lib/semmle/code/cpp/security/FunctionWithWrappers.qll
new file mode 100644
index 00000000000..654e9d92451
--- /dev/null
+++ b/repo-tests/codeql/cpp/ql/lib/semmle/code/cpp/security/FunctionWithWrappers.qll
@@ -0,0 +1,179 @@
+/**
+ * Provides predicates for identifying functions that wrap other functions,
+ * passing the same arguments from the outer call into the inner call. In the
+ * following example `MyMalloc` wraps a call to `malloc`, passing in the `size`
+ * parameter:
+ * ```
+ * void *MyMalloc(size_t size)
+ * {
+ * void *ptr = malloc(size);
+ *
+ * // ... additional logic?
+ *
+ * return ptr;
+ * }
+ * ```
+ */
+
+import cpp
+import PrintfLike
+private import semmle.code.cpp.ir.dataflow.ResolveCall
+
+bindingset[index]
+private string toCause(Function func, int index) {
+ result = func.getQualifiedName() + "(" + func.getParameter(index).getName() + ")"
+ or
+ not exists(func.getParameter(index).getName()) and
+ result = func.getQualifiedName() + "(arg " + index + ")"
+}
+
+/**
+ * Whether the parameter at index 'sourceParamIndex' of function 'source' is passed
+ * (without any evident changes) to the parameter at index 'targetParamIndex' of function 'target'.
+ */
+private predicate wrapperFunctionStep(
+ Function source, int sourceParamIndex, Function target, int targetParamIndex
+) {
+ not target.isVirtual() and
+ not source.isVirtual() and
+ source.hasDefinition() and
+ exists(Call call, Expr arg, Parameter sourceParam |
+ // there is a 'call' to 'target' with argument 'arg' at index 'targetParamIndex'
+ target = resolveCall(call) and
+ arg = call.getArgument(targetParamIndex) and
+ // 'call' is enclosed in 'source'
+ source = call.getEnclosingFunction() and
+ // 'arg' is an access to the parameter at index 'sourceParamIndex' of function 'source'
+ sourceParam = source.getParameter(sourceParamIndex) and
+ not exists(sourceParam.getAnAssignedValue()) and
+ arg = sourceParam.getAnAccess()
+ )
+}
+
+/**
+ * An abstract class for representing functions that may have wrapper functions.
+ * Wrapper functions propagate an argument (without any evident changes) to this function
+ * through one or more steps in a call chain.
+ *
+ * The design motivation is to report a violation at the location of the argument
+ * in a call to the wrapper function rather than the function being wrapped, since
+ * that is usually the more appropriate place to fix the violation.
+ *
+ * Subclasses should override the characteristic predicate and 'interestingArg'.
+ */
+abstract class FunctionWithWrappers extends Function {
+ /**
+ * Which argument indices are relevant for wrapper function detection.
+ */
+ predicate interestingArg(int arg) { none() }
+
+ /**
+ * Whether 'func' is a (possibly nested) wrapper function that feeds a parameter at the given index
+ * through to an interesting parameter of 'this' function at the given call chain 'depth'.
+ * The call chain depth is limited to 4.
+ */
+ private predicate wrapperFunctionLimitedDepth(
+ Function func, int paramIndex, string callChain, int depth
+ ) {
+ // base case
+ func = this and
+ interestingArg(paramIndex) and
+ callChain = toCause(func, paramIndex) and
+ depth = 0
+ or
+ // recursive step
+ exists(Function target, int targetParamIndex, string targetCause, int targetDepth |
+ this.wrapperFunctionLimitedDepth(target, targetParamIndex, targetCause, targetDepth) and
+ targetDepth < 4 and
+ wrapperFunctionStep(func, paramIndex, target, targetParamIndex) and
+ callChain = toCause(func, paramIndex) + ", which calls " + targetCause and
+ depth = targetDepth + 1
+ )
+ }
+
+ /**
+ * Whether 'func' is a (possibly nested) wrapper function that feeds a parameter at the given index
+ * through to an interesting parameter of 'this' function.
+ *
+ * The 'cause' gives the name of 'this' interesting function and its relevant parameter
+ * at the end of the call chain.
+ */
+ private predicate wrapperFunctionAnyDepth(Function func, int paramIndex, string cause) {
+ // base case
+ func = this and
+ interestingArg(paramIndex) and
+ cause = toCause(func, paramIndex)
+ or
+ // recursive step
+ exists(Function target, int targetParamIndex |
+ this.wrapperFunctionAnyDepth(target, targetParamIndex, cause) and
+ wrapperFunctionStep(func, paramIndex, target, targetParamIndex)
+ )
+ }
+
+ /**
+ * Whether 'func' is a (possibly nested) wrapper function that feeds a parameter at the given index
+ * through to an interesting parameter of 'this' function.
+ *
+ * The 'cause' gives the name of 'this' interesting function and its relevant parameter
+ * at the end of the call chain.
+ *
+ * If there is more than one possible 'cause', a unique one is picked (by lexicographic order).
+ */
+ pragma[nomagic]
+ private string wrapperFunctionAnyDepthUnique(Function func, int paramIndex) {
+ result =
+ toCause(func, paramIndex) + ", which ends up calling " +
+ min(string targetCause | this.wrapperFunctionAnyDepth(func, paramIndex, targetCause))
+ }
+
+ /**
+ * Whether 'func' is a (possibly nested) wrapper function that feeds a parameter at the given index
+ * through to an interesting parameter of 'this' function.
+ *
+ * If there exists a call chain with depth at most 4, the 'cause' reports the smallest call chain.
+ * Otherwise, the 'cause' merely reports the name of 'this' interesting function and its relevant
+ * parameter at the end of the call chain.
+ *
+ * If there is more than one possible 'cause', a unique one is picked (by lexicographic order).
+ */
+ predicate wrapperFunction(Function func, int paramIndex, string cause) {
+ cause =
+ min(string callChain, int depth |
+ this.wrapperFunctionLimitedDepth(func, paramIndex, callChain, depth) and
+ depth = min(int d | this.wrapperFunctionLimitedDepth(func, paramIndex, _, d) | d)
+ |
+ callChain
+ )
+ or
+ not this.wrapperFunctionLimitedDepth(func, paramIndex, _, _) and
+ cause = wrapperFunctionAnyDepthUnique(func, paramIndex)
+ }
+
+ /**
+ * Whether 'arg' is an argument in a call to an outermost wrapper function of 'this' function.
+ */
+ predicate outermostWrapperFunctionCall(Expr arg, string callChain) {
+ exists(Function targetFunc, Call call, int argIndex |
+ targetFunc = resolveCall(call) and
+ this.wrapperFunction(targetFunc, argIndex, callChain) and
+ (
+ exists(Function sourceFunc | sourceFunc = call.getEnclosingFunction() |
+ not wrapperFunctionStep(sourceFunc, _, targetFunc, argIndex)
+ )
+ or
+ not exists(call.getEnclosingFunction())
+ ) and
+ arg = call.getArgument(argIndex)
+ )
+ }
+}
+
+/**
+ * A `printf`-like formatting function.
+ */
+class PrintfLikeFunction extends FunctionWithWrappers {
+ PrintfLikeFunction() { printfLikeFunction(this, _) }
+
+ override predicate interestingArg(int arg) { printfLikeFunction(this, arg) }
+}
diff --git a/repo-tests/codeql/cpp/ql/lib/semmle/code/cpp/security/OutputWrite.qll b/repo-tests/codeql/cpp/ql/lib/semmle/code/cpp/security/OutputWrite.qll
new file mode 100644
index 00000000000..9ed22aa970f
--- /dev/null
+++ b/repo-tests/codeql/cpp/ql/lib/semmle/code/cpp/security/OutputWrite.qll
@@ -0,0 +1,91 @@
+/**
+ * Provides classes for modeling output to standard output / standard error through various mechanisms such as `printf`, `puts` and `operator<<`.
+ */
+
+import cpp
+import FileWrite
+
+/**
+ * A function call that writes to standard output or standard error.
+ */
+class OutputWrite extends Expr {
+ OutputWrite() { outputWrite(this, _) }
+
+ /**
+ * Gets a source expression for this output.
+ */
+ Expr getASource() { outputWrite(this, result) }
+}
+
+/**
+ * A standard output or standard error variable.
+ */
+private predicate outputVariable(Variable v) {
+ // standard output
+ v.hasName("cout") or
+ v.hasName("wcout") or
+ // standard error
+ v.hasName("cerr") or
+ v.hasName("clog") or
+ v.hasName("wcerr") or
+ v.hasName("wclog")
+}
+
+/**
+ * An expr representing standard output or standard error.
+ */
+private predicate outputExpr(ChainedOutputCall out) {
+ // output chain ending in an access to standard output / standard error
+ outputVariable(out.getEndDest().(VariableAccess).getTarget())
+}
+
+/**
+ * A file representing standard output or standard error.
+ */
+private predicate outputFile(Expr e) {
+ exists(string name |
+ (
+ name = e.(VariableAccess).getTarget().(GlobalVariable).toString() or
+ name = e.findRootCause().(Macro).getName()
+ ) and
+ name = ["stdout", "stderr"]
+ )
+}
+
+/**
+ * Holds if the function call is a write to standard output or standard error from 'source'.
+ */
+private predicate outputWrite(Expr write, Expr source) {
+ exists(Function f, int arg |
+ f = write.(Call).getTarget() and source = write.(Call).getArgument(arg)
+ |
+ // printf / syslog
+ f.(FormattingFunction).isOutputGlobal() and
+ arg >= f.(FormattingFunction).getFormatParameterIndex()
+ or
+ // puts, putchar
+ (
+ f.hasGlobalOrStdName("puts") or
+ f.hasGlobalOrStdName("putchar")
+ ) and
+ arg = 0
+ or
+ exists(Call wrappedCall, Expr wrappedSource |
+ // wrapped output call (recursive case)
+ outputWrite(wrappedCall, wrappedSource) and
+ wrappedCall.getEnclosingFunction() = f and
+ parameterUsePair(f.getParameter(arg), wrappedSource)
+ )
+ )
+ or
+ // output to standard output / standard error using operator<<, put or write
+ outputExpr(write) and
+ source = write.(ChainedOutputCall).getSource()
+ or
+ exists(FileWrite fileWrite |
+ // output to stdout, stderr as a file (using FileWrite.qll logic)
+ write = fileWrite and
+ outputFile(fileWrite.getDest()) and
+ source = fileWrite.getASource()
+ )
+}
diff --git a/repo-tests/codeql/cpp/ql/lib/semmle/code/cpp/security/Overflow.qll b/repo-tests/codeql/cpp/ql/lib/semmle/code/cpp/security/Overflow.qll
new file mode 100644
index 00000000000..50cd0d2e7a9
--- /dev/null
+++ b/repo-tests/codeql/cpp/ql/lib/semmle/code/cpp/security/Overflow.qll
@@ -0,0 +1,161 @@
+/**
+ * Provides predicates for reasoning about when the value of an expression is
+ * guarded by an operation such as `<`, which confines its range.
+ */
+
+import cpp
+import semmle.code.cpp.controlflow.Dominance
+// `GlobalValueNumbering` is only imported to prevent IR re-evaluation.
+private import semmle.code.cpp.valuenumbering.GlobalValueNumbering
+import semmle.code.cpp.rangeanalysis.SimpleRangeAnalysis
+import semmle.code.cpp.rangeanalysis.RangeAnalysisUtils
+
+/**
+ * Holds if the value of `use` is guarded using `abs`.
+ */
+predicate guardedAbs(Operation e, Expr use) {
+ exists(FunctionCall fc | fc.getTarget().getName() = ["abs", "labs", "llabs", "imaxabs"] |
+ fc.getArgument(0).getAChild*() = use and
+ guardedLesser(e, fc)
+ )
+}
+
+/**
+ * Gets the position of `stmt` in basic block `block` (this is a thin layer
+ * over `BasicBlock.getNode`, intended to improve performance).
+ */
+pragma[noinline]
+private int getStmtIndexInBlock(BasicBlock block, Stmt stmt) { block.getNode(result) = stmt }
+
+pragma[inline]
+private predicate stmtDominates(Stmt dominator, Stmt dominated) {
+ // In same block
+ exists(BasicBlock block, int dominatorIndex, int dominatedIndex |
+ dominatorIndex = getStmtIndexInBlock(block, dominator) and
+ dominatedIndex = getStmtIndexInBlock(block, dominated) and
+ dominatedIndex >= dominatorIndex
+ )
+ or
+ // In (possibly) different blocks
+ bbStrictlyDominates(dominator.getBasicBlock(), dominated.getBasicBlock())
+}
+
+/**
+ * Holds if the value of `use` is guarded to be less than something, and `e`
+ * is in code controlled by that guard (where the guard condition held).
+ */
+pragma[nomagic]
+predicate guardedLesser(Operation e, Expr use) {
+ exists(IfStmt c, RelationalOperation guard |
+ use = guard.getLesserOperand().getAChild*() and
+ guard = c.getControllingExpr().getAChild*() and
+ stmtDominates(c.getThen(), e.getEnclosingStmt())
+ )
+ or
+ exists(Loop c, RelationalOperation guard |
+ use = guard.getLesserOperand().getAChild*() and
+ guard = c.getControllingExpr().getAChild*() and
+ stmtDominates(c.getStmt(), e.getEnclosingStmt())
+ )
+ or
+ exists(ConditionalExpr c, RelationalOperation guard |
+ use = guard.getLesserOperand().getAChild*() and
+ guard = c.getCondition().getAChild*() and
+ c.getThen().getAChild*() = e
+ )
+ or
+ guardedAbs(e, use)
+}
+
+/**
+ * Holds if the value of `use` is guarded to be greater than something, and `e`
+ * is in code controlled by that guard (where the guard condition held).
+ */
+pragma[nomagic]
+predicate guardedGreater(Operation e, Expr use) {
+ exists(IfStmt c, RelationalOperation guard |
+ use = guard.getGreaterOperand().getAChild*() and
+ guard = c.getControllingExpr().getAChild*() and
+ stmtDominates(c.getThen(), e.getEnclosingStmt())
+ )
+ or
+ exists(Loop c, RelationalOperation guard |
+ use = guard.getGreaterOperand().getAChild*() and
+ guard = c.getControllingExpr().getAChild*() and
+ stmtDominates(c.getStmt(), e.getEnclosingStmt())
+ )
+ or
+ exists(ConditionalExpr c, RelationalOperation guard |
+ use = guard.getGreaterOperand().getAChild*() and
+ guard = c.getCondition().getAChild*() and
+ c.getThen().getAChild*() = e
+ )
+ or
+ guardedAbs(e, use)
+}
+
+/**
+ * Gets a use of a given variable `v`.
+ */
+VariableAccess varUse(LocalScopeVariable v) { result = v.getAnAccess() }
+
+/**
+ * Holds if `e` potentially overflows and `use` is an operand of `e` that is not guarded.
+ */
+predicate missingGuardAgainstOverflow(Operation e, VariableAccess use) {
+ // Since `e` is guarenteed to be a `BinaryArithmeticOperation`, a `UnaryArithmeticOperation` or
+ // an `AssignArithmeticOperation` by the other constraints in this predicate, we know that
+ // `convertedExprMightOverflowPositively` will have a result even when `e` is not analyzable
+ // by `SimpleRangeAnalysis`.
+ convertedExprMightOverflowPositively(e) and
+ use = e.getAnOperand() and
+ exists(LocalScopeVariable v | use.getTarget() = v |
+ // overflow possible if large
+ e instanceof AddExpr and not guardedLesser(e, varUse(v))
+ or
+ e instanceof AssignAddExpr and not guardedLesser(e, varUse(v))
+ or
+ e instanceof IncrementOperation and
+ not guardedLesser(e, varUse(v)) and
+ v.getUnspecifiedType() instanceof IntegralType
+ or
+ // overflow possible if large or small
+ e instanceof MulExpr and
+ not (guardedLesser(e, varUse(v)) and guardedGreater(e, varUse(v)))
+ or
+ // overflow possible if large or small
+ e instanceof AssignMulExpr and
+ not (guardedLesser(e, varUse(v)) and guardedGreater(e, varUse(v)))
+ )
+}
+
+/**
+ * Holds if `e` potentially underflows and `use` is an operand of `e` that is not guarded.
+ */
+predicate missingGuardAgainstUnderflow(Operation e, VariableAccess use) {
+ // Since `e` is guarenteed to be a `BinaryArithmeticOperation`, a `UnaryArithmeticOperation` or
+ // an `AssignArithmeticOperation` by the other constraints in this predicate, we know that
+ // `convertedExprMightOverflowNegatively` will have a result even when `e` is not analyzable
+ // by `SimpleRangeAnalysis`.
+ convertedExprMightOverflowNegatively(e) and
+ use = e.getAnOperand() and
+ exists(LocalScopeVariable v | use.getTarget() = v |
+ // underflow possible if use is left operand and small
+ use = e.(SubExpr).getLeftOperand() and not guardedGreater(e, varUse(v))
+ or
+ use = e.(AssignSubExpr).getLValue() and not guardedGreater(e, varUse(v))
+ or
+ // underflow possible if small
+ e instanceof DecrementOperation and
+ not guardedGreater(e, varUse(v)) and
+ v.getUnspecifiedType() instanceof IntegralType
+ or
+ // underflow possible if large or small
+ e instanceof MulExpr and
+ not (guardedLesser(e, varUse(v)) and guardedGreater(e, varUse(v)))
+ or
+ // underflow possible if large or small
+ e instanceof AssignMulExpr and
+ not (guardedLesser(e, varUse(v)) and guardedGreater(e, varUse(v)))
+ )
+}
diff --git a/repo-tests/codeql/cpp/ql/lib/semmle/code/cpp/security/PrintfLike.qll b/repo-tests/codeql/cpp/ql/lib/semmle/code/cpp/security/PrintfLike.qll
new file mode 100644
index 00000000000..92b86a53fb2
--- /dev/null
+++ b/repo-tests/codeql/cpp/ql/lib/semmle/code/cpp/security/PrintfLike.qll
@@ -0,0 +1,28 @@
+/**
+ * Provides a predicate for identifying formatting functions like `printf`.
+ *
+ * Consider using the newer model in
+ * `semmle.code.cpp.models.interfaces.FormattingFunction` directly instead of
+ * this library.
+ */
+
+import semmle.code.cpp.commons.Printf
+import external.ExternalArtifact
+
+/**
+ * Holds if `func` is a `printf`-like formatting function and `formatArg` is
+ * the index of the format string argument.
+ */
+predicate printfLikeFunction(Function func, int formatArg) {
+ formatArg = func.(FormattingFunction).getFormatParameterIndex() and
+ not func instanceof UserDefinedFormattingFunction
+ or
+ primitiveVariadicFormatter(func, _, formatArg, _)
+ or
+ exists(ExternalData data |
+ // TODO Do this \ to / conversion in the toolchain?
+ data.getDataPath().replaceAll("\\", "/") = "cert/formatingFunction.csv" and
+ func.getName() = data.getField(0) and
+ formatArg = data.getFieldAsInt(1)
+ )
+}
diff --git a/repo-tests/codeql/cpp/ql/lib/semmle/code/cpp/security/Security.qll b/repo-tests/codeql/cpp/ql/lib/semmle/code/cpp/security/Security.qll
new file mode 100644
index 00000000000..da808592b3e
--- /dev/null
+++ b/repo-tests/codeql/cpp/ql/lib/semmle/code/cpp/security/Security.qll
@@ -0,0 +1,206 @@
+/**
+ * Definitions related to security queries.
+ * These can be extended for specific code bases.
+ */
+
+import semmle.code.cpp.exprs.Expr
+import semmle.code.cpp.commons.Environment
+import semmle.code.cpp.security.SecurityOptions
+import semmle.code.cpp.models.interfaces.FlowSource
+import semmle.code.cpp.models.interfaces.Sql
+
+/**
+ * Extend this class to customize the security queries for
+ * a particular code base. Provide no constructor in the
+ * subclass, and override any methods that need customizing.
+ */
+class SecurityOptions extends string {
+ SecurityOptions() { this = "SecurityOptions" }
+
+ /**
+ * This predicate should hold if the function with the given
+ * name is a pure function of its arguments.
+ */
+ predicate isPureFunction(string name) {
+ name =
+ [
+ "abs", "atof", "atoi", "atol", "atoll", "labs", "strcasestr", "strcat", "strchnul",
+ "strchr", "strchrnul", "strcmp", "strcpy", "strcspn", "strdup", "strlen", "strncat",
+ "strncmp", "strncpy", "strndup", "strnlen", "strrchr", "strspn", "strstr", "strtod",
+ "strtof", "strtol", "strtoll", "strtoq", "strtoul"
+ ]
+ }
+
+ /**
+ * An argument to a function that is passed to a SQL server.
+ */
+ predicate sqlArgument(string function, int arg) {
+ exists(FunctionInput input, SqlExecutionFunction sql |
+ sql.hasName(function) and
+ input.isParameterDeref(arg) and
+ sql.hasSqlArgument(input)
+ )
+ }
+
+ /**
+ * The argument of the given function is filled in from user input.
+ */
+ predicate userInputArgument(FunctionCall functionCall, int arg) {
+ exists(string fname |
+ functionCall.getTarget().hasGlobalOrStdName(fname) and
+ exists(functionCall.getArgument(arg)) and
+ (
+ fname = ["fread", "fgets", "fgetws", "gets"] and arg = 0
+ or
+ fname = "scanf" and arg >= 1
+ or
+ fname = "fscanf" and arg >= 2
+ )
+ or
+ functionCall.getTarget().hasGlobalName(fname) and
+ exists(functionCall.getArgument(arg)) and
+ fname = "getaddrinfo" and
+ arg = 3
+ )
+ or
+ exists(RemoteFlowSourceFunction remote, FunctionOutput output |
+ functionCall.getTarget() = remote and
+ output.isParameterDerefOrQualifierObject(arg) and
+ remote.hasRemoteFlowSource(output, _)
+ )
+ }
+
+ /**
+ * The return value of the given function is filled in from user input.
+ */
+ predicate userInputReturned(FunctionCall functionCall) {
+ exists(string fname |
+ functionCall.getTarget().getName() = fname and
+ (
+ fname = ["fgets", "gets"] or
+ userInputReturn(fname)
+ )
+ )
+ or
+ exists(RemoteFlowSourceFunction remote, FunctionOutput output |
+ functionCall.getTarget() = remote and
+ (output.isReturnValue() or output.isReturnValueDeref()) and
+ remote.hasRemoteFlowSource(output, _)
+ )
+ }
+
+ /**
+ * DEPRECATED: Users should override `userInputReturned()` instead.
+ *
+ * note: this function is not formally tagged as `deprecated` since the
+ * new `userInputReturned` uses it to provide compatibility with older
+ * custom SecurityOptions.qll files.
+ */
+ predicate userInputReturn(string function) { none() }
+
+ /**
+ * The argument of the given function is used for running a process or loading
+ * a library.
+ */
+ predicate isProcessOperationArgument(string function, int arg) {
+ // POSIX
+ function =
+ ["system", "popen", "execl", "execlp", "execle", "execv", "execvp", "execvpe", "dlopen"] and
+ arg = 0
+ or
+ // Windows
+ function = ["LoadLibrary", "LoadLibraryA", "LoadLibraryW"] and arg = 0
+ }
+
+ /**
+ * This predicate should hold if the expression is directly
+ * computed from user input. Such expressions are treated as
+ * sources of taint.
+ */
+ predicate isUserInput(Expr expr, string cause) {
+ exists(FunctionCall fc, int i |
+ this.userInputArgument(fc, i) and
+ expr = fc.getArgument(i) and
+ cause = fc.getTarget().getName()
+ )
+ or
+ exists(FunctionCall fc |
+ this.userInputReturned(fc) and
+ expr = fc and
+ cause = fc.getTarget().getName()
+ )
+ or
+ commandLineArg(expr) and cause = "argv"
+ or
+ expr.(EnvironmentRead).getSourceDescription() = cause
+ }
+
+ /**
+ * This predicate should hold if the expression raises privilege for the
+ * current session. The default definition only holds true for some
+ * example code in the test suite. This predicate must be extended for
+ * a particular code base to be useful.
+ */
+ predicate raisesPrivilege(Expr expr) {
+ exists(ReturnStmt ret | ret.getExpr() = expr |
+ ret.getEnclosingFunction().getName() = "checkPinCode" and
+ ret.getExpr().getValue() = "1"
+ )
+ or
+ exists(AssignExpr assign, Variable adminPrivileges |
+ assign = expr and
+ adminPrivileges.hasName("adminPrivileges") and
+ assign.getLValue().(Access).getTarget() = adminPrivileges and
+ not assign.getRValue().(Literal).getValue() = "0"
+ )
+ }
+}
+
+/**
+ * An access to the argv argument to main().
+ */
+private predicate commandLineArg(Expr e) {
+ exists(Parameter argv |
+ argv(argv) and
+ argv.getAnAccess() = e
+ )
+}
+
+/** The argv parameter to the main function */
+predicate argv(Parameter argv) {
+ exists(Function f |
+ f.hasGlobalName("main") and
+ f.getParameter(1) = argv
+ )
+}
+
+/** Convenience accessor for SecurityOptions.isPureFunction */
+predicate isPureFunction(string name) { exists(SecurityOptions opts | opts.isPureFunction(name)) }
+
+/** Convenience accessor for SecurityOptions.userInputArgument */
+predicate userInputArgument(FunctionCall functionCall, int arg) {
+ exists(SecurityOptions opts | opts.userInputArgument(functionCall, arg))
+}
+
+/** Convenience accessor for SecurityOptions.userInputReturn */
+predicate userInputReturned(FunctionCall functionCall) {
+ exists(SecurityOptions opts | opts.userInputReturned(functionCall))
+}
+
+/** Convenience accessor for SecurityOptions.isUserInput */
+predicate isUserInput(Expr expr, string cause) {
+ exists(SecurityOptions opts | opts.isUserInput(expr, cause))
+}
+
+/** Convenience accessor for SecurityOptions.isProcessOperationArgument */
+predicate isProcessOperationArgument(string function, int arg) {
+ exists(SecurityOptions opts | opts.isProcessOperationArgument(function, arg))
+}
+
+/** Convenient accessor for SecurityOptions.raisesPrivilege */
+predicate raisesPrivilege(Expr expr) { exists(SecurityOptions opts | opts.raisesPrivilege(expr)) }
+
+/** Convenience accessor for SecurityOptions.sqlArgument */
+predicate sqlArgument(string function, int arg) {
+ exists(SecurityOptions opts | opts.sqlArgument(function, arg))
+}
diff --git a/repo-tests/codeql/cpp/ql/lib/semmle/code/cpp/security/SecurityOptions.qll b/repo-tests/codeql/cpp/ql/lib/semmle/code/cpp/security/SecurityOptions.qll
new file mode 100644
index 00000000000..64babe419c3
--- /dev/null
+++ b/repo-tests/codeql/cpp/ql/lib/semmle/code/cpp/security/SecurityOptions.qll
@@ -0,0 +1,49 @@
+/**
+ * Security pack options.
+ *
+ * see https://semmle.com/wiki/display/SD/_Configuring+SecurityOptions+for+your+code+base
+ *
+ * Please note that functions for MySql and SQLite are included by default and do not
+ * require any customization here.
+ */
+
+import semmle.code.cpp.security.Security
+
+/**
+ * This class overrides `SecurityOptions` and can be used to add project
+ * specific customization.
+ */
+class CustomSecurityOptions extends SecurityOptions {
+ override predicate sqlArgument(string function, int arg) {
+ SecurityOptions.super.sqlArgument(function, arg)
+ or
+ // --- custom functions that access SQL code via one of their arguments:
+ // 'arg' is the 0-based index of the argument that contains an SQL string
+ // for example: (function = "MySpecialSqlFunction" and arg = 0)
+ none() // rules to match custom functions replace this line
+ }
+
+ override predicate userInputArgument(FunctionCall functionCall, int arg) {
+ SecurityOptions.super.userInputArgument(functionCall, arg)
+ or
+ exists(string fname |
+ functionCall.getTarget().hasGlobalName(fname) and
+ exists(functionCall.getArgument(arg)) and
+ // --- custom functions that return user input via one of their arguments:
+ // 'arg' is the 0-based index of the argument that is used to return user input
+ // for example: (fname = "readXmlInto" and arg = 1)
+ none() // rules to match custom functions replace this line
+ )
+ }
+
+ override predicate userInputReturned(FunctionCall functionCall) {
+ SecurityOptions.super.userInputReturned(functionCall)
+ or
+ exists(string fname |
+ functionCall.getTarget().hasGlobalName(fname) and
+ // --- custom functions that return user input via their return value:
+ // for example: fname = "xmlReadAttribute"
+ none() // rules to match custom functions replace this line
+ )
+ }
+}
diff --git a/repo-tests/codeql/cpp/ql/lib/semmle/code/cpp/security/SensitiveExprs.qll b/repo-tests/codeql/cpp/ql/lib/semmle/code/cpp/security/SensitiveExprs.qll
new file mode 100644
index 00000000000..22e0ee71b66
--- /dev/null
+++ b/repo-tests/codeql/cpp/ql/lib/semmle/code/cpp/security/SensitiveExprs.qll
@@ -0,0 +1,55 @@
+/**
+ * Provides classes for heuristically identifying variables and functions that
+ * might contain or return a password or other sensitive information.
+ */
+
+import cpp
+
+/**
+ * Holds if the name `s` suggests something might contain or return a password
+ * or other sensitive information.
+ */
+bindingset[s]
+private predicate suspicious(string s) {
+ (
+ s.matches("%password%") or
+ s.matches("%passwd%") or
+ s.matches("%trusted%")
+ ) and
+ not (
+ s.matches("%hash%") or
+ s.matches("%crypt%") or
+ s.matches("%file%") or
+ s.matches("%path%")
+ )
+}
+
+/**
+ * A variable that might contain a password or other sensitive information.
+ */
+class SensitiveVariable extends Variable {
+ SensitiveVariable() {
+ suspicious(getName().toLowerCase()) and
+ not this.getUnspecifiedType() instanceof IntegralType
+ }
+}
+
+/**
+ * A function that might return a password or other sensitive information.
+ */
+class SensitiveFunction extends Function {
+ SensitiveFunction() {
+ suspicious(getName().toLowerCase()) and
+ not this.getUnspecifiedType() instanceof IntegralType
+ }
+}
+
+/**
+ * An expression whose value might be a password or other sensitive information.
+ */
+class SensitiveExpr extends Expr {
+ SensitiveExpr() {
+ this.(VariableAccess).getTarget() instanceof SensitiveVariable or
+ this.(FunctionCall).getTarget() instanceof SensitiveFunction
+ }
+}
diff --git a/repo-tests/codeql/cpp/ql/lib/semmle/code/cpp/security/TaintTracking.qll b/repo-tests/codeql/cpp/ql/lib/semmle/code/cpp/security/TaintTracking.qll
new file mode 100644
index 00000000000..65836d285ad
--- /dev/null
+++ b/repo-tests/codeql/cpp/ql/lib/semmle/code/cpp/security/TaintTracking.qll
@@ -0,0 +1,7 @@
+/*
+ * Support for tracking tainted data through the program.
+ *
+ * Prefer to use `semmle.code.cpp.dataflow.TaintTracking` when designing new queries.
+ */
+
+import semmle.code.cpp.ir.dataflow.DefaultTaintTracking
diff --git a/repo-tests/codeql/cpp/ql/lib/semmle/code/cpp/security/TaintTrackingImpl.qll b/repo-tests/codeql/cpp/ql/lib/semmle/code/cpp/security/TaintTrackingImpl.qll
new file mode 100644
index 00000000000..00db6a18bcb
--- /dev/null
+++ b/repo-tests/codeql/cpp/ql/lib/semmle/code/cpp/security/TaintTrackingImpl.qll
@@ -0,0 +1,650 @@
+/**
+ * DEPRECATED: we now use `semmle.code.cpp.ir.dataflow.DefaultTaintTracking`,
+ * which is based on the IR but designed to behave similarly to this old
+ * libarary.
+ *
+ * Provides the implementation of `semmle.code.cpp.security.TaintTracking`. Do
+ * not import this file directly.
+ */
+
+import cpp
+import Security
+
+/** Expressions that change the value of a variable */
+private predicate valueSource(Expr expr) {
+ exists(AssignExpr ae | expr = ae.getLValue())
+ or
+ exists(FunctionCall fc, int i |
+ userInputArgument(fc, i) and
+ expr = fc.getArgument(i)
+ )
+ or
+ exists(FunctionCall c, int arg |
+ copyValueBetweenArguments(c.getTarget(), _, arg) and
+ expr = c.getArgument(arg)
+ )
+ or
+ exists(FunctionCall c, int arg |
+ c.getTarget().getParameter(arg).getType() instanceof ReferenceType and
+ expr = c.getArgument(arg)
+ )
+}
+
+/** Expressions that are inside an expression that changes the value of a variable */
+private predicate insideValueSource(Expr expr) {
+ valueSource(expr)
+ or
+ insideValueSource(expr.getParent()) and
+ // A modification of array[offset] does not modify offset
+ not expr.getParent().(ArrayExpr).getArrayOffset() = expr
+}
+
+private predicate isPointer(Type type) {
+ type instanceof PointerType or
+ isPointer(type.(ReferenceType).getBaseType())
+}
+
+/**
+ * Tracks data flow from src to dest.
+ * If this is used in the left side of an assignment src and dest should be swapped
+ */
+private predicate moveToDependingOnSide(Expr src, Expr dest) {
+ exists(ParenthesisExpr e |
+ src = e.getAChild() and
+ dest = e
+ )
+ or
+ exists(ArrayExpr e |
+ src = e.getArrayBase() and
+ dest = e
+ )
+ or
+ exists(PointerDereferenceExpr e |
+ src = e.getOperand() and
+ dest = e
+ )
+ or
+ exists(AddressOfExpr e |
+ src = e.getOperand() and
+ dest = e
+ )
+ or
+ // if var+offset is tainted, then so is var
+ exists(VariableAccess base, BinaryOperation binop |
+ dest = binop and
+ (base = binop.getLeftOperand() or base = binop.getRightOperand()) and
+ isPointer(base.getType()) and
+ base.getTarget() instanceof LocalScopeVariable and
+ src = base and
+ // flow through pointer-pointer subtraction is dubious, the result should be
+ // a number bounded by the size of the pointed-to thing.
+ not binop instanceof PointerDiffExpr
+ )
+ or
+ exists(UnaryOperation unop |
+ dest = unop and
+ unop.getAnOperand() = src
+ )
+ or
+ exists(BinaryOperation binop |
+ dest = binop and
+ binop.getLeftOperand() = src and
+ predictable(binop.getRightOperand())
+ )
+ or
+ exists(BinaryOperation binop |
+ dest = binop and
+ binop.getRightOperand() = src and
+ predictable(binop.getLeftOperand())
+ )
+ or
+ exists(Cast cast |
+ dest = cast and
+ src = cast.getExpr()
+ )
+ or
+ exists(ConditionalExpr cond |
+ cond = dest and
+ (
+ cond.getThen() = src or
+ cond.getElse() = src
+ )
+ )
+}
+
+/**
+ * Track value flow between functions.
+ * Handles the following cases:
+ * - If an argument to a function is tainted, all the usages of the parameter inside the function are tainted
+ * - If a function obtains input from the user internally and returns it, all calls to the function are tainted
+ * - If an argument to a function is tainted and that parameter is returned, all calls to the function are not tainted
+ * (this is done to avoid false positives). Because of this we need to track if the tainted element came from an argument
+ * or not, and for that we use destFromArg
+ */
+private predicate betweenFunctionsValueMoveTo(Element src, Element dest, boolean destFromArg) {
+ not unreachable(src) and
+ not unreachable(dest) and
+ (
+ exists(Call call, Function called, int i |
+ src = call.getArgument(i) and
+ resolveCallWithParam(call, called, i, dest) and
+ destFromArg = true
+ )
+ or
+ // Only move the return of the function to the function itself if the value didn't came from an
+ // argument, or else we would taint all the calls to one function if one argument is tainted
+ // somewhere
+ exists(Function f, ReturnStmt ret |
+ ret.getEnclosingFunction() = f and
+ src = ret.getExpr() and
+ destFromArg = false and
+ dest = f
+ )
+ or
+ exists(Call call, Function f |
+ f = resolveCall(call) and
+ src = f and
+ dest = call and
+ destFromArg = false
+ )
+ or
+ // If a parameter of type reference is tainted inside a function, taint the argument too
+ exists(Call call, Function f, int pi, Parameter p |
+ resolveCallWithParam(call, f, pi, p) and
+ p.getType() instanceof ReferenceType and
+ src = p and
+ dest = call.getArgument(pi) and
+ destFromArg = false
+ )
+ )
+}
+
+// predicate folding for proper join-order
+// bad magic: pushes down predicate that ruins join-order
+pragma[nomagic]
+private predicate resolveCallWithParam(Call call, Function called, int i, Parameter p) {
+ called = resolveCall(call) and
+ p = called.getParameter(i)
+}
+
+/** A variable for which flow through is allowed. */
+library class FlowVariable extends Variable {
+ FlowVariable() {
+ (
+ this instanceof LocalScopeVariable or
+ this instanceof GlobalOrNamespaceVariable
+ ) and
+ not argv(this)
+ }
+}
+
+/** A local scope variable for which flow through is allowed. */
+library class FlowLocalScopeVariable extends Variable {
+ FlowLocalScopeVariable() { this instanceof LocalScopeVariable }
+}
+
+private predicate insideFunctionValueMoveTo(Element src, Element dest) {
+ not unreachable(src) and
+ not unreachable(dest) and
+ (
+ // Taint all variable usages when one is tainted
+ // This function taints global variables but doesn't taint from a global variable (see globalVariableValueMoveTo)
+ exists(FlowLocalScopeVariable v |
+ src = v and
+ dest = v.getAnAccess() and
+ not insideValueSource(dest)
+ )
+ or
+ exists(FlowVariable v |
+ src = v.getAnAccess() and
+ dest = v and
+ insideValueSource(src)
+ )
+ or
+ // Taint all union usages when one is tainted
+ // This function taints global variables but doesn't taint from a global variable (see globalVariableValueMoveTo)
+ exists(FlowLocalScopeVariable v, FieldAccess a |
+ unionAccess(v, _, a) and
+ src = v and
+ dest = a and
+ not insideValueSource(dest)
+ )
+ or
+ exists(FlowVariable v, FieldAccess a |
+ unionAccess(v, _, a) and
+ src = a and
+ dest = v and
+ insideValueSource(src)
+ )
+ or
+ // If a pointer is tainted, taint the original variable
+ exists(FlowVariable p, FlowVariable v, AddressOfExpr e |
+ p.getAnAssignedValue() = e and
+ e.getOperand() = v.getAnAccess() and
+ src = p and
+ dest = v
+ )
+ or
+ // If a reference is tainted, taint the original variable
+ exists(FlowVariable r, FlowVariable v |
+ r.getType() instanceof ReferenceType and
+ r.getInitializer().getExpr() = v.getAnAccess() and
+ src = r and
+ dest = v
+ )
+ or
+ exists(Variable var |
+ var = dest and
+ var.getInitializer().getExpr() = src
+ )
+ or
+ exists(AssignExpr ae |
+ src = ae.getRValue() and
+ dest = ae.getLValue()
+ )
+ or
+ exists(CommaExpr comma |
+ comma = dest and
+ comma.getRightOperand() = src
+ )
+ or
+ exists(FunctionCall c, int sourceArg, int destArg |
+ copyValueBetweenArguments(c.getTarget(), sourceArg, destArg) and
+ // Only consider copies from `printf`-like functions if the format is a string
+ (
+ exists(FormattingFunctionCall ffc, FormatLiteral format |
+ ffc = c and
+ format = ffc.getFormat() and
+ format.getConversionChar(sourceArg - ffc.getTarget().getNumberOfParameters()) = ["s", "S"]
+ )
+ or
+ not exists(FormatLiteral fl | fl = c.(FormattingFunctionCall).getFormat())
+ or
+ not c instanceof FormattingFunctionCall
+ ) and
+ src = c.getArgument(sourceArg) and
+ dest = c.getArgument(destArg)
+ )
+ or
+ exists(FunctionCall c, int sourceArg |
+ returnArgument(c.getTarget(), sourceArg) and
+ src = c.getArgument(sourceArg) and
+ dest = c
+ )
+ or
+ exists(FormattingFunctionCall formattingSend, int arg, FormatLiteral format |
+ dest = formattingSend and
+ formattingSend.getArgument(arg) = src and
+ format = formattingSend.getFormat() and
+ format.getConversionChar(arg - formattingSend.getTarget().getNumberOfParameters()) =
+ ["s", "S", "@"]
+ )
+ or
+ // Expressions computed from tainted data are also tainted
+ exists(FunctionCall call | dest = call and isPureFunction(call.getTarget().getName()) |
+ call.getAnArgument() = src and
+ forall(Expr arg | arg = call.getAnArgument() | arg = src or predictable(arg)) and
+ // flow through `strlen` tends to cause dubious results, if the length is
+ // bounded.
+ not call.getTarget().getName() = "strlen"
+ )
+ or
+ exists(Element a, Element b |
+ moveToDependingOnSide(a, b) and
+ if insideValueSource(a) then (src = b and dest = a) else (src = a and dest = b)
+ )
+ )
+}
+
+/**
+ * Handles data flow from global variables to its usages.
+ * The tainting for the global variable itself is done at insideFunctionValueMoveTo.
+ */
+private predicate globalVariableValueMoveTo(GlobalOrNamespaceVariable src, Expr dest) {
+ not unreachable(dest) and
+ (
+ exists(GlobalOrNamespaceVariable v |
+ src = v and
+ dest = v.getAnAccess() and
+ not insideValueSource(dest)
+ )
+ or
+ exists(GlobalOrNamespaceVariable v, FieldAccess a |
+ unionAccess(v, _, a) and
+ src = v and
+ dest = a and
+ not insideValueSource(dest)
+ )
+ )
+}
+
+private predicate unionAccess(Variable v, Field f, FieldAccess a) {
+ f.getDeclaringType() instanceof Union and
+ a.getTarget() = f and
+ a.getQualifier() = v.getAnAccess()
+}
+
+GlobalOrNamespaceVariable globalVarFromId(string id) {
+ if result instanceof NamespaceVariable
+ then id = result.getNamespace() + "::" + result.getName()
+ else id = result.getName()
+}
+
+/**
+ * A variable that has any kind of upper-bound check anywhere in the program. This is
+ * biased towards being inclusive because there are a lot of valid ways of doing an
+ * upper bounds checks if we don't consider where it occurs, for example:
+ * ```
+ * if (x < 10) { sink(x); }
+ *
+ * if (10 > y) { sink(y); }
+ *
+ * if (z > 10) { z = 10; }
+ * sink(z);
+ * ```
+ */
+private predicate hasUpperBoundsCheck(Variable var) {
+ exists(RelationalOperation oper, VariableAccess access |
+ oper.getAnOperand() = access and
+ access.getTarget() = var and
+ // Comparing to 0 is not an upper bound check
+ not oper.getAnOperand().getValue() = "0"
+ )
+}
+
+cached
+private predicate taintedWithArgsAndGlobalVars(
+ Element src, Element dest, boolean destFromArg, string globalVar
+) {
+ isUserInput(src, _) and
+ not unreachable(src) and
+ dest = src and
+ destFromArg = false and
+ globalVar = ""
+ or
+ exists(Element other, boolean otherFromArg, string otherGlobalVar |
+ taintedWithArgsAndGlobalVars(src, other, otherFromArg, otherGlobalVar)
+ |
+ not unreachable(dest) and
+ not hasUpperBoundsCheck(dest) and
+ (
+ // Direct flow from one expression to another.
+ betweenFunctionsValueMoveTo(other, dest, destFromArg) and
+ (destFromArg = true or otherFromArg = false) and
+ globalVar = otherGlobalVar
+ or
+ insideFunctionValueMoveTo(other, dest) and
+ destFromArg = otherFromArg and
+ globalVar = otherGlobalVar
+ or
+ exists(GlobalOrNamespaceVariable v |
+ v = other and
+ globalVariableValueMoveTo(v, dest) and
+ destFromArg = false and
+ v = globalVarFromId(globalVar)
+ )
+ )
+ )
+}
+
+/**
+ * A tainted expression is either directly user input, or is
+ * computed from user input in a way that users can probably
+ * control the exact output of the computation.
+ *
+ * This doesn't include data flow through global variables.
+ * If you need that you must call taintedIncludingGlobalVars.
+ */
+predicate tainted(Expr source, Element tainted) {
+ taintedWithArgsAndGlobalVars(source, tainted, _, "")
+}
+
+/**
+ * A tainted expression is either directly user input, or is
+ * computed from user input in a way that users can probably
+ * control the exact output of the computation.
+ *
+ * This version gives the same results as tainted but also includes
+ * data flow through global variables.
+ *
+ * The parameter `globalVar` is the name of the last global variable used to move the
+ * value from source to tainted.
+ */
+predicate taintedIncludingGlobalVars(Expr source, Element tainted, string globalVar) {
+ taintedWithArgsAndGlobalVars(source, tainted, _, globalVar)
+}
+
+/**
+ * A predictable expression is one where an external user can predict
+ * the value. For example, a literal in the source code is considered
+ * predictable.
+ */
+private predicate predictable(Expr expr) {
+ expr instanceof Literal
+ or
+ exists(BinaryOperation binop | binop = expr |
+ predictable(binop.getLeftOperand()) and predictable(binop.getRightOperand())
+ )
+ or
+ exists(UnaryOperation unop | unop = expr | predictable(unop.getOperand()))
+}
+
+private int maxArgIndex(Function f) {
+ result =
+ max(FunctionCall fc, int toMax |
+ fc.getTarget() = f and toMax = fc.getNumberOfArguments() - 1
+ |
+ toMax
+ )
+}
+
+/** Functions that copy the value of one argument to another */
+private predicate copyValueBetweenArguments(Function f, int sourceArg, int destArg) {
+ f.hasGlobalOrStdName("memcpy") and sourceArg = 1 and destArg = 0
+ or
+ f.hasGlobalName("__builtin___memcpy_chk") and sourceArg = 1 and destArg = 0
+ or
+ f.hasGlobalOrStdName("memmove") and sourceArg = 1 and destArg = 0
+ or
+ f.hasGlobalOrStdName("strcat") and sourceArg = 1 and destArg = 0
+ or
+ f.hasGlobalName("_mbscat") and sourceArg = 1 and destArg = 0
+ or
+ f.hasGlobalOrStdName("wcscat") and sourceArg = 1 and destArg = 0
+ or
+ f.hasGlobalOrStdName("strncat") and sourceArg = 1 and destArg = 0
+ or
+ f.hasGlobalName("_mbsncat") and sourceArg = 1 and destArg = 0
+ or
+ f.hasGlobalName("wcsncat") and sourceArg = 1 and destArg = 0
+ or
+ f.hasGlobalOrStdName("strcpy") and sourceArg = 1 and destArg = 0
+ or
+ f.hasGlobalName("_mbscpy") and sourceArg = 1 and destArg = 0
+ or
+ f.hasGlobalOrStdName("wcscpy") and sourceArg = 1 and destArg = 0
+ or
+ f.hasGlobalOrStdName("strncpy") and sourceArg = 1 and destArg = 0
+ or
+ f.hasGlobalName("_mbsncpy") and sourceArg = 1 and destArg = 0
+ or
+ f.hasGlobalOrStdName("wcsncpy") and sourceArg = 1 and destArg = 0
+ or
+ f.hasGlobalName("inet_aton") and sourceArg = 0 and destArg = 1
+ or
+ f.hasGlobalName("inet_pton") and sourceArg = 1 and destArg = 2
+ or
+ f.hasGlobalOrStdName("strftime") and sourceArg in [2 .. maxArgIndex(f)] and destArg = 0
+ or
+ exists(FormattingFunction ff | ff = f |
+ sourceArg in [ff.getFormatParameterIndex() .. maxArgIndex(f)] and
+ destArg = ff.getOutputParameterIndex(false)
+ )
+}
+
+/** Functions where if one of the arguments is tainted, the result should be tainted */
+private predicate returnArgument(Function f, int sourceArg) {
+ f.hasGlobalName("memcpy") and sourceArg = 0
+ or
+ f.hasGlobalName("__builtin___memcpy_chk") and sourceArg = 0
+ or
+ f.hasGlobalOrStdName("memmove") and sourceArg = 0
+ or
+ f.hasGlobalOrStdName("strcat") and sourceArg = 0
+ or
+ f.hasGlobalName("_mbscat") and sourceArg = 0
+ or
+ f.hasGlobalOrStdName("wcsncat") and sourceArg = 0
+ or
+ f.hasGlobalOrStdName("strncat") and sourceArg = 0
+ or
+ f.hasGlobalName("_mbsncat") and sourceArg = 0
+ or
+ f.hasGlobalOrStdName("wcsncat") and sourceArg = 0
+ or
+ f.hasGlobalOrStdName("strcpy") and sourceArg = 0
+ or
+ f.hasGlobalName("_mbscpy") and sourceArg = 0
+ or
+ f.hasGlobalOrStdName("wcscpy") and sourceArg = 0
+ or
+ f.hasGlobalOrStdName("strncpy") and sourceArg = 0
+ or
+ f.hasGlobalName("_mbsncpy") and sourceArg = 0
+ or
+ f.hasGlobalOrStdName("wcsncpy") and sourceArg = 0
+ or
+ f.hasGlobalName("inet_ntoa") and sourceArg = 0
+ or
+ f.hasGlobalName("inet_addr") and sourceArg = 0
+ or
+ f.hasGlobalName("inet_network") and sourceArg = 0
+ or
+ f.hasGlobalName("inet_ntoa") and sourceArg = 0
+ or
+ f.hasGlobalName("inet_makeaddr") and
+ (sourceArg = 0 or sourceArg = 1)
+ or
+ f.hasGlobalName("inet_lnaof") and sourceArg = 0
+ or
+ f.hasGlobalName("inet_netof") and sourceArg = 0
+ or
+ f.hasGlobalName("gethostbyname") and sourceArg = 0
+ or
+ f.hasGlobalName("gethostbyaddr") and sourceArg = 0
+}
+
+/**
+ * Resolve potential target function(s) for `call`.
+ *
+ * If `call` is a call through a function pointer (`ExprCall`) or
+ * targets a virtual method, simple data flow analysis is performed
+ * in order to identify target(s).
+ */
+Function resolveCall(Call call) {
+ result = call.getTarget()
+ or
+ result = call.(DataSensitiveCallExpr).resolve()
+}
+
+/** A data sensitive call expression. */
+abstract library class DataSensitiveCallExpr extends Expr {
+ DataSensitiveCallExpr() { not unreachable(this) }
+
+ abstract Expr getSrc();
+
+ cached
+ abstract Function resolve();
+
+ /**
+ * Whether `src` can flow to this call expression.
+ *
+ * Searches backwards from `getSrc()` to `src`.
+ */
+ predicate flowsFrom(Element src, boolean allowFromArg) {
+ src = getSrc() and allowFromArg = true
+ or
+ exists(Element other, boolean allowOtherFromArg | flowsFrom(other, allowOtherFromArg) |
+ exists(boolean otherFromArg | betweenFunctionsValueMoveToStatic(src, other, otherFromArg) |
+ otherFromArg = true and allowOtherFromArg = true and allowFromArg = true
+ or
+ otherFromArg = false and allowFromArg = false
+ )
+ or
+ insideFunctionValueMoveTo(src, other) and allowFromArg = allowOtherFromArg
+ or
+ globalVariableValueMoveTo(src, other) and allowFromArg = true
+ )
+ }
+}
+
+/** Call through a function pointer. */
+library class DataSensitiveExprCall extends DataSensitiveCallExpr, ExprCall {
+ override Expr getSrc() { result = getExpr() }
+
+ override Function resolve() {
+ exists(FunctionAccess fa | flowsFrom(fa, true) | result = fa.getTarget())
+ }
+}
+
+/** Call to a virtual function. */
+library class DataSensitiveOverriddenFunctionCall extends DataSensitiveCallExpr, FunctionCall {
+ DataSensitiveOverriddenFunctionCall() {
+ exists(getTarget().(VirtualFunction).getAnOverridingFunction())
+ }
+
+ override Expr getSrc() { result = getQualifier() }
+
+ override MemberFunction resolve() {
+ exists(NewExpr new |
+ flowsFrom(new, true) and
+ memberFunctionFromNewExpr(new, result) and
+ result.overrides*(getTarget().(VirtualFunction))
+ )
+ }
+}
+
+private predicate memberFunctionFromNewExpr(NewExpr new, MemberFunction f) {
+ f = new.getAllocatedType().(Class).getAMemberFunction()
+}
+
+/** Same as `betweenFunctionsValueMoveTo`, but calls are resolved to their static target. */
+private predicate betweenFunctionsValueMoveToStatic(Element src, Element dest, boolean destFromArg) {
+ not unreachable(src) and
+ not unreachable(dest) and
+ (
+ exists(FunctionCall call, Function called, int i |
+ src = call.getArgument(i) and
+ called = call.getTarget() and
+ dest = called.getParameter(i) and
+ destFromArg = true
+ )
+ or
+ // Only move the return of the function to the function itself if the value didn't came from an
+ // argument, or else we would taint all the calls to one function if one argument is tainted
+ // somewhere
+ exists(Function f, ReturnStmt ret |
+ ret.getEnclosingFunction() = f and
+ src = ret.getExpr() and
+ destFromArg = false and
+ dest = f
+ )
+ or
+ exists(FunctionCall call, Function f |
+ call.getTarget() = f and
+ src = f and
+ dest = call and
+ destFromArg = false
+ )
+ or
+ // If a parameter of type reference is tainted inside a function, taint the argument too
+ exists(FunctionCall call, Function f, int pi, Parameter p |
+ call.getTarget() = f and
+ f.getParameter(pi) = p and
+ p.getType() instanceof ReferenceType and
+ src = p and
+ dest = call.getArgument(pi) and
+ destFromArg = false
+ )
+ )
+}
diff --git a/repo-tests/codeql/cpp/ql/lib/semmle/code/cpp/security/boostorg/asio/protocols.qll b/repo-tests/codeql/cpp/ql/lib/semmle/code/cpp/security/boostorg/asio/protocols.qll
new file mode 100644
index 00000000000..e113d5e5745
--- /dev/null
+++ b/repo-tests/codeql/cpp/ql/lib/semmle/code/cpp/security/boostorg/asio/protocols.qll
@@ -0,0 +1,491 @@
+import cpp
+import semmle.code.cpp.dataflow.DataFlow
+
+module BoostorgAsio {
+ /**
+ * Represents the `boost::asio::ssl::context` enum.
+ */
+ class SslContextMethod extends Enum {
+ SslContextMethod() {
+ this.getName().toString() = "method" and
+ this.getQualifiedName().toString().matches("boost::asio::ssl::context%")
+ }
+
+ /**
+ * Gets an enumeration constant for a banned protocol.
+ */
+ EnumConstant getABannedProtocolConstant() {
+ result = this.getAnEnumConstant() and
+ (
+ /// Generic SSL version 2.
+ result.getName() = "sslv2"
+ or
+ /// SSL version 2 client.
+ result.getName() = "sslv2_client"
+ or
+ /// SSL version 2 server.
+ result.getName() = "sslv2_server"
+ or
+ /// Generic SSL version 3.
+ result.getName() = "sslv3"
+ or
+ /// SSL version 3 client.
+ result.getName() = "sslv3_client"
+ or
+ /// SSL version 3 server.
+ result.getName() = "sslv3_server"
+ or
+ /// Generic TLS version 1.
+ result.getName() = "tlsv1"
+ or
+ /// TLS version 1 client.
+ result.getName() = "tlsv1_client"
+ or
+ /// TLS version 1 server.
+ result.getName() = "tlsv1_server"
+ or
+ /// Generic TLS version 1.1.
+ result.getName() = "tlsv11"
+ or
+ /// TLS version 1.1 client.
+ result.getName() = "tlsv11_client"
+ or
+ /// TLS version 1.1 server.
+ result.getName() = "tlsv11_server"
+ )
+ }
+
+ /**
+ * Gets an enumeration constant for an approved protocol, that is hard-coded
+ * (no protocol negotiation).
+ */
+ EnumConstant getAnApprovedButHardcodedProtocolConstant() {
+ result = this.getATls12ProtocolConstant()
+ }
+
+ /**
+ * Gets an enumeration constant for a TLS v1.2 protocol.
+ */
+ EnumConstant getATls12ProtocolConstant() {
+ result = this.getAnEnumConstant() and
+ (
+ /// Generic TLS version 1.2.
+ result.getName() = "tlsv12"
+ or
+ /// TLS version 1.2 client.
+ result.getName() = "tlsv12_client"
+ or
+ /// TLS version 1.2 server.
+ result.getName() = "tlsv12_server"
+ )
+ }
+
+ /**
+ * Gets an enumeration constant for a TLS v1.3 protocol.
+ */
+ EnumConstant getATls13ProtocolConstant() {
+ result = this.getAnEnumConstant() and
+ (
+ /// Generic TLS version 1.3.
+ result.getName() = "tlsv13"
+ or
+ /// TLS version 1.3 client.
+ result.getName() = "tlsv13_client"
+ or
+ /// TLS version 1.3 server.
+ result.getName() = "tlsv13_server"
+ )
+ }
+
+ /**
+ * Gets an enumeration constant for a generic TLS or SSL/TLS protocol.
+ */
+ EnumConstant getAGenericTlsProtocolConstant() {
+ result = this.getAnEnumConstant() and
+ (
+ /// Generic TLS
+ result.getName() = "tls"
+ or
+ /// TLS client.
+ result.getName() = "tls_client"
+ or
+ /// TLS server.
+ result.getName() = "tls_server"
+ )
+ or
+ result = getASslv23ProtocolConstant()
+ }
+
+ /**
+ * Gets an enumeration constant for a generic SSL/TLS protocol.
+ */
+ EnumConstant getASslv23ProtocolConstant() {
+ result = this.getAnEnumConstant() and
+ (
+ /// OpenSSL - SSLv23 == A TLS/SSL connection established with these methods may understand the SSLv2, SSLv3, TLSv1, TLSv1.1 and TLSv1.2 protocols.
+ /// Generic SSL/TLS.
+ result.getName() = "sslv23"
+ or
+ /// SSL/TLS client.
+ result.getName() = "sslv23_client"
+ or
+ /// SSL/TLS server.
+ result.getName() = "sslv23_server"
+ )
+ }
+ }
+
+ /**
+ * Gets the value for the no_sslv2 constant, right shifted by 16 bits.
+ *
+ * Note that modern versions of OpelSSL do not support SSL v2, so this option is for backwards compatibility only.
+ */
+ int getShiftedSslOptionsNoSsl2() {
+ // SSL_OP_NO_SSLv2 was removed from modern OpenSSL versions
+ result = 0
+ }
+
+ /**
+ * Gets the value for the no_sslv3 constant, right shifted by 16 bits.
+ */
+ int getShiftedSslOptionsNoSsl3() {
+ // SSL_OP_NO_SSLv3 == 0x02000000U
+ result = 512
+ }
+
+ /**
+ * Gets the value for the no_tlsv1 constant, right shifted by 16 bits.
+ */
+ int getShiftedSslOptionsNoTls1() {
+ // SSL_OP_NO_TLSv1 == 0x04000000U
+ result = 1024
+ }
+
+ /**
+ * Gets the value for the no_tlsv1_1 constant, right shifted by 16 bits.
+ */
+ int getShiftedSslOptionsNoTls1_1() {
+ // SSL_OP_NO_TLSv1_1 == 0x10000000U
+ result = 4096
+ }
+
+ /**
+ * Gets the value for the no_tlsv1_2 constant, right shifted by 16 bits.
+ */
+ int getShiftedSslOptionsNoTls1_2() {
+ // SSL_OP_NO_TLSv1_2 == 0x08000000U
+ result = 2048
+ }
+
+ /**
+ * Gets the value for the no_tlsv1_3 constant, right shifted by 16 bits.
+ */
+ int getShiftedSslOptionsNoTls1_3() {
+ // SSL_OP_NO_TLSv1_2 == 0x20000000U
+ result = 8192
+ }
+
+ /**
+ * Represents the `boost::asio::ssl::context` class.
+ */
+ class SslContextClass extends Class {
+ SslContextClass() { this.getQualifiedName() = "boost::asio::ssl::context" }
+
+ ConstructorCall getAContructorCall() {
+ this.getAConstructor().getACallToThisFunction() = result and
+ not result.getLocation().getFile().toString().matches("%/boost/asio/%") and
+ result.fromSource()
+ }
+ }
+
+ /**
+ * Represents `boost::asio::ssl::context::set_options` member function.
+ */
+ class SslSetOptionsFunction extends Function {
+ SslSetOptionsFunction() {
+ this.getQualifiedName().matches("boost::asio::ssl::context::set_options")
+ }
+ }
+
+ /**
+ * Holds if the expression represents a banned protocol.
+ */
+ predicate isExprBannedBoostProtocol(Expr e) {
+ exists(Literal va | va = e |
+ va.getValue().toInt() = 0 or
+ va.getValue().toInt() = 1 or
+ va.getValue().toInt() = 2 or
+ va.getValue().toInt() = 3 or
+ va.getValue().toInt() = 4 or
+ va.getValue().toInt() = 5 or
+ va.getValue().toInt() = 6 or
+ va.getValue().toInt() = 7 or
+ va.getValue().toInt() = 8 or
+ va.getValue().toInt() = 12 or
+ va.getValue().toInt() = 13 or
+ va.getValue().toInt() = 14
+ )
+ or
+ exists(VariableAccess va | va = e |
+ va.getValue().toInt() = 0 or
+ va.getValue().toInt() = 1 or
+ va.getValue().toInt() = 2 or
+ va.getValue().toInt() = 3 or
+ va.getValue().toInt() = 4 or
+ va.getValue().toInt() = 5 or
+ va.getValue().toInt() = 6 or
+ va.getValue().toInt() = 7 or
+ va.getValue().toInt() = 8 or
+ va.getValue().toInt() = 12 or
+ va.getValue().toInt() = 13 or
+ va.getValue().toInt() = 14
+ )
+ or
+ exists(EnumConstantAccess eca, SslContextMethod enum | e = eca |
+ enum.getABannedProtocolConstant().getAnAccess() = eca
+ )
+ }
+
+ /**
+ * Holds if the expression represents a TLS v1.2 protocol.
+ */
+ predicate isExprTls12BoostProtocol(Expr e) {
+ exists(Literal va | va = e |
+ (
+ va.getValue().toInt() = 15 or /// Generic TLS version 1.2.
+ va.getValue().toInt() = 16 or /// TLS version 1.2 client.
+ va.getValue().toInt() = 17 /// TLS version 1.2 server.
+ )
+ )
+ or
+ exists(VariableAccess va | va = e |
+ (
+ va.getValue().toInt() = 15 or /// Generic TLS version 1.2.
+ va.getValue().toInt() = 16 or /// TLS version 1.2 client.
+ va.getValue().toInt() = 17 /// TLS version 1.2 server.
+ )
+ )
+ or
+ exists(EnumConstantAccess eca, SslContextMethod enum | e = eca |
+ enum.getATls12ProtocolConstant().getAnAccess() = eca
+ )
+ }
+
+ /**
+ * Holds if the expression represents a protocol that requires Crypto Board approval.
+ */
+ predicate isExprTls13BoostProtocol(Expr e) {
+ exists(Literal va | va = e |
+ (
+ va.getValue().toInt() = 18 or
+ va.getValue().toInt() = 19 or
+ va.getValue().toInt() = 20
+ )
+ )
+ or
+ exists(VariableAccess va | va = e |
+ (
+ va.getValue().toInt() = 18 or
+ va.getValue().toInt() = 19 or
+ va.getValue().toInt() = 20
+ )
+ )
+ or
+ exists(EnumConstantAccess eca, SslContextMethod enum | e = eca |
+ enum.getATls13ProtocolConstant().getAnAccess() = eca
+ )
+ }
+
+ /**
+ * Holds if the expression represents a generic TLS or SSL/TLS protocol.
+ */
+ predicate isExprTlsBoostProtocol(Expr e) {
+ exists(Literal va | va = e |
+ (
+ va.getValue().toInt() = 9 or /// Generic SSL/TLS.
+ va.getValue().toInt() = 10 or /// SSL/TLS client.
+ va.getValue().toInt() = 11 or /// SSL/TLS server.
+ va.getValue().toInt() = 21 or /// Generic TLS.
+ va.getValue().toInt() = 22 or /// TLS client.
+ va.getValue().toInt() = 23 /// TLS server.
+ )
+ )
+ or
+ exists(VariableAccess va | va = e |
+ (
+ va.getValue().toInt() = 9 or /// Generic SSL/TLS.
+ va.getValue().toInt() = 10 or /// SSL/TLS client.
+ va.getValue().toInt() = 11 or /// SSL/TLS server.
+ va.getValue().toInt() = 21 or /// Generic TLS.
+ va.getValue().toInt() = 22 or /// TLS client.
+ va.getValue().toInt() = 23 /// TLS server.
+ )
+ )
+ or
+ exists(EnumConstantAccess eca, SslContextMethod enum | e = eca |
+ enum.getAGenericTlsProtocolConstant().getAnAccess() = eca
+ )
+ }
+
+ /**
+ * Holds if the expression represents a generic SSl/TLS protocol.
+ */
+ predicate isExprSslV23BoostProtocol(Expr e) {
+ exists(Literal va | va = e |
+ (
+ va.getValue().toInt() = 9 or /// Generic SSL/TLS.
+ va.getValue().toInt() = 10 or /// SSL/TLS client.
+ va.getValue().toInt() = 11 /// SSL/TLS server.
+ )
+ )
+ or
+ exists(VariableAccess va | va = e |
+ (
+ va.getValue().toInt() = 9 or /// Generic SSL/TLS.
+ va.getValue().toInt() = 10 or /// SSL/TLS client.
+ va.getValue().toInt() = 11 /// SSL/TLS server.
+ )
+ )
+ or
+ exists(EnumConstantAccess eca, SslContextMethod enum | e = eca |
+ enum.getASslv23ProtocolConstant().getAnAccess() = eca
+ )
+ }
+
+ //////////////////////// Dataflow /////////////////////
+ /**
+ * Abstract class for flows of protocol values to the first argument of a context
+ * constructor.
+ */
+ abstract class SslContextCallAbstractConfig extends DataFlow::Configuration {
+ bindingset[this]
+ SslContextCallAbstractConfig() { any() }
+
+ override predicate isSink(DataFlow::Node sink) {
+ exists(ConstructorCall cc, SslContextClass c, Expr e | e = sink.asExpr() |
+ c.getAContructorCall() = cc and
+ cc.getArgument(0) = e
+ )
+ }
+ }
+
+ /**
+ * Any protocol value that flows to the first argument of a context constructor.
+ */
+ class SslContextCallConfig extends SslContextCallAbstractConfig {
+ SslContextCallConfig() { this = "SslContextCallConfig" }
+
+ override predicate isSource(DataFlow::Node source) {
+ exists(Expr e | e = source.asExpr() |
+ e.fromSource() and
+ not e.getLocation().getFile().toString().matches("%/boost/asio/%")
+ )
+ }
+ }
+
+ /**
+ * A banned protocol value that flows to the first argument of a context constructor.
+ */
+ class SslContextCallBannedProtocolConfig extends SslContextCallAbstractConfig {
+ SslContextCallBannedProtocolConfig() { this = "SslContextCallBannedProtocolConfig" }
+
+ override predicate isSource(DataFlow::Node source) {
+ exists(Expr e | e = source.asExpr() |
+ e.fromSource() and
+ not e.getLocation().getFile().toString().matches("%/boost/asio/%") and
+ isExprBannedBoostProtocol(e)
+ )
+ }
+ }
+
+ /**
+ * A TLS 1.2 protocol value that flows to the first argument of a context constructor.
+ */
+ class SslContextCallTls12ProtocolConfig extends SslContextCallAbstractConfig {
+ SslContextCallTls12ProtocolConfig() { this = "SslContextCallTls12ProtocolConfig" }
+
+ override predicate isSource(DataFlow::Node source) {
+ exists(Expr e | e = source.asExpr() |
+ e.fromSource() and
+ not e.getLocation().getFile().toString().matches("%/boost/asio/%") and
+ isExprTls12BoostProtocol(e)
+ )
+ }
+ }
+
+ /**
+ * A TLS 1.3 protocol value that flows to the first argument of a context constructor.
+ */
+ class SslContextCallTls13ProtocolConfig extends SslContextCallAbstractConfig {
+ SslContextCallTls13ProtocolConfig() { this = "SslContextCallTls12ProtocolConfig" }
+
+ override predicate isSource(DataFlow::Node source) {
+ exists(Expr e | e = source.asExpr() |
+ e.fromSource() and
+ not e.getLocation().getFile().toString().matches("%/boost/asio/%") and
+ isExprTls13BoostProtocol(e)
+ )
+ }
+ }
+
+ /**
+ * A generic TLS protocol value that flows to the first argument of a context constructor.
+ */
+ class SslContextCallTlsProtocolConfig extends SslContextCallAbstractConfig {
+ SslContextCallTlsProtocolConfig() { this = "SslContextCallTlsProtocolConfig" }
+
+ override predicate isSource(DataFlow::Node source) {
+ exists(Expr e | e = source.asExpr() |
+ e.fromSource() and
+ not e.getLocation().getFile().toString().matches("%/boost/asio/%") and
+ isExprTlsBoostProtocol(e)
+ )
+ }
+ }
+
+ /**
+ * A context constructor call that flows to a call to `SetOptions()`.
+ */
+ class SslContextFlowsToSetOptionConfig extends DataFlow::Configuration {
+ SslContextFlowsToSetOptionConfig() { this = "SslContextFlowsToSetOptionConfig" }
+
+ override predicate isSource(DataFlow::Node source) {
+ exists(SslContextClass c, ConstructorCall cc |
+ cc = source.asExpr() and
+ c.getAContructorCall() = cc
+ )
+ }
+
+ override predicate isSink(DataFlow::Node sink) {
+ exists(FunctionCall fc, SslSetOptionsFunction f, Variable v, VariableAccess va |
+ va = sink.asExpr()
+ |
+ f.getACallToThisFunction() = fc and
+ v.getAnAccess() = va and
+ va = fc.getQualifier()
+ )
+ }
+ }
+
+ /**
+ * An option value that flows to the first parameter of a call to `SetOptions()`.
+ */
+ class SslOptionConfig extends DataFlow::Configuration {
+ SslOptionConfig() { this = "SslOptionConfig" }
+
+ override predicate isSource(DataFlow::Node source) {
+ exists(Expr e | e = source.asExpr() |
+ e.fromSource() and
+ not e.getLocation().getFile().toString().matches("%/boost/asio/%")
+ )
+ }
+
+ override predicate isSink(DataFlow::Node sink) {
+ exists(SslSetOptionsFunction f, FunctionCall call |
+ sink.asExpr() = call.getArgument(0) and
+ f.getACallToThisFunction() = call and
+ not sink.getLocation().getFile().toString().matches("%/boost/asio/%")
+ )
+ }
+ }
+}
diff --git a/repo-tests/codeql/cpp/ql/lib/semmle/code/cpp/stmts/Block.qll b/repo-tests/codeql/cpp/ql/lib/semmle/code/cpp/stmts/Block.qll
new file mode 100644
index 00000000000..3bebc660456
--- /dev/null
+++ b/repo-tests/codeql/cpp/ql/lib/semmle/code/cpp/stmts/Block.qll
@@ -0,0 +1,134 @@
+/**
+ * Provides a class to model C/C++ block statements, enclosed by `{` and `}`.
+ */
+
+import semmle.code.cpp.Element
+import semmle.code.cpp.stmts.Stmt
+
+/**
+ * A C/C++ block statement.
+ *
+ * For example, the block from `{` to `}` in the following code:
+ * ```
+ * {
+ * int a;
+ * int b = 1;
+ * a = b;
+ * }
+ * ```
+ */
+class BlockStmt extends Stmt, @stmt_block {
+ override string getAPrimaryQlClass() { result = "BlockStmt" }
+
+ /**
+ * Gets a child declaration of this block.
+ *
+ * For example, for the block
+ * ```
+ * { int a; int b = 1; a = b; }
+ * ```
+ * it would have 2 results, for the declarations of `a` and `b`.
+ */
+ Declaration getADeclaration() { result = this.getAStmt().(DeclStmt).getADeclaration() }
+
+ /**
+ * Gets a body statement of this block.
+ *
+ * For example, for the block
+ * ```
+ * { int a; int b = 1; a = b; }
+ * ```
+ * it would have 3 results, for the declarations of `a` and `b` and
+ * for the expression statement `a = b`.
+ */
+ Stmt getAStmt() { result = this.getAChild() }
+
+ /**
+ * Gets the `n`th body statement of this block, indexed from 0.
+ *
+ * For example, for the block
+ * ```
+ * { int a; int b = 1; a = b; }
+ * ```
+ * `getStmt(2)`'s result is the expression statement `a = b`.
+ */
+ Stmt getStmt(int n) { result = this.getChild(n) }
+
+ /**
+ * Gets the last body statement of this block.
+ *
+ * For example, for the block
+ * ```
+ * { int a; int b = 1; a = b; }
+ * ```
+ * the result is the expression statement `a = b`.
+ */
+ Stmt getLastStmt() { result = this.getStmt(this.getNumStmt() - 1) }
+
+ /**
+ * Gets the last body statement of this block. If this last statement
+ * is itself a block, returns the last statement of that block, and so on.
+ *
+ * For example, for the block
+ * ```
+ * { int a; int b = 1; { a = b; } }
+ * ```
+ * the result is the expression statement `a = b`.
+ */
+ Stmt getLastStmtIn() {
+ if getLastStmt() instanceof BlockStmt
+ then result = getLastStmt().(BlockStmt).getLastStmtIn()
+ else result = getLastStmt()
+ }
+
+ /**
+ * Gets the number of body statements in this block.
+ *
+ * For example, for the block
+ * ```
+ * { int a; int b = 1; a = b; }
+ * ```
+ * the result is 3.
+ */
+ int getNumStmt() { result = count(this.getAStmt()) }
+
+ /**
+ * Holds if the block has no statements.
+ *
+ * For example, the block
+ * ```
+ * { }
+ * ```
+ * is empty, as is the block
+ * ```
+ * {
+ * // a comment
+ * }
+ * ```
+ */
+ predicate isEmpty() { this.getNumStmt() = 0 }
+
+ /**
+ * Gets the index of the given statement within this block, indexed from 0.
+ *
+ * For example, for the block
+ * ```
+ * { int a; int b = 1; a = b; }
+ * ```
+ * if `s` is the expression statement `a = b` then `getIndexOfStmt(s)`
+ * has result 2.
+ */
+ int getIndexOfStmt(Stmt s) { this.getStmt(result) = s }
+
+ override string toString() { result = "{ ... }" }
+
+ override predicate mayBeImpure() { this.getAStmt().mayBeImpure() }
+
+ override predicate mayBeGloballyImpure() { this.getAStmt().mayBeGloballyImpure() }
+}
+
+/**
+ * DEPRECATED: This is now called `BlockStmt` to avoid confusion with
+ * `BasicBlock`.
+ */
+deprecated class Block = BlockStmt;
diff --git a/repo-tests/codeql/cpp/ql/lib/semmle/code/cpp/stmts/Stmt.qll b/repo-tests/codeql/cpp/ql/lib/semmle/code/cpp/stmts/Stmt.qll
new file mode 100644
index 00000000000..ed1fb4fbb50
--- /dev/null
+++ b/repo-tests/codeql/cpp/ql/lib/semmle/code/cpp/stmts/Stmt.qll
@@ -0,0 +1,2148 @@
+/**
+ * Provides a hierarchy of classes for modeling C/C++ statements.
+ */
+
+import semmle.code.cpp.Element
+private import semmle.code.cpp.Enclosing
+private import semmle.code.cpp.internal.ResolveClass
+
+/**
+ * A C/C++ statement.
+ */
+class Stmt extends StmtParent, @stmt {
+ /** Gets the `n`th child of this statement. */
+ Element getChild(int n) {
+ stmtparents(unresolveElement(result), n, underlyingElement(this)) or
+ exprparents(unresolveElement(result), n, underlyingElement(this))
+ }
+
+ /** Holds if `e` is the `n`th child of this statement. */
+ predicate hasChild(Element e, int n) { this.getChild(n) = e }
+
+ /** Gets the enclosing function of this statement, if any. */
+ Function getEnclosingFunction() { result = stmtEnclosingElement(this) }
+
+ /**
+ * Gets the nearest enclosing block of this statement in the source, if any.
+ */
+ BlockStmt getEnclosingBlock() {
+ if
+ getParentStmt() instanceof BlockStmt and
+ not getParentStmt().(BlockStmt).getLocation() instanceof UnknownLocation
+ then result = getParentStmt()
+ else result = getParentStmt().getEnclosingBlock()
+ }
+
+ /** Gets a child of this statement. */
+ Element getAChild() { exists(int n | result = this.getChild(n)) }
+
+ /** Gets the parent of this statement, if any. */
+ StmtParent getParent() { stmtparents(underlyingElement(this), _, unresolveElement(result)) }
+
+ /** Gets the parent statement of this statement, if any. */
+ Stmt getParentStmt() { stmtparents(underlyingElement(this), _, unresolveElement(result)) }
+
+ /** Gets a child statement of this statement. */
+ Stmt getChildStmt() { result.getParentStmt() = this }
+
+ /**
+ * Gets the statement following this statement in the same block, if any.
+ *
+ * Note that this is not widely useful, because this doesn't have a result for
+ * the last statement of a block. Consider using the `ControlFlowNode` class
+ * to trace the flow of control instead.
+ */
+ Stmt getFollowingStmt() {
+ exists(BlockStmt b, int i |
+ this = b.getStmt(i) and
+ result = b.getStmt(i + 1)
+ )
+ }
+
+ override Location getLocation() { stmts(underlyingElement(this), _, result) }
+
+ /**
+ * Gets an int indicating the type of statement that this represents.
+ *
+ * DEPRECATED: use the subclasses of `Stmt` rather than relying on this predicate.
+ */
+ deprecated int getKind() { stmts(underlyingElement(this), result, _) }
+
+ override string toString() { none() }
+
+ override Function getControlFlowScope() { result = this.getEnclosingFunction() }
+
+ override Stmt getEnclosingStmt() { result = this }
+
+ /**
+ * Holds if this statement is side-effect free (a conservative
+ * approximation; that is, it may be side-effect free even if this
+ * predicate doesn't hold).
+ *
+ * This predicate cannot be overridden; override `mayBeImpure()`
+ * instead.
+ *
+ * Note that this predicate only considers whether the statement has
+ * any side-effects, such as writing to a file. Even if it holds, the
+ * statement may be impure in the sense that its behavior is affected
+ * by external factors, such as the contents of global variables.
+ */
+ final predicate isPure() { not this.mayBeImpure() }
+
+ /**
+ * Holds if it is possible that this statement is impure. If we are not
+ * sure, then it holds.
+ */
+ predicate mayBeImpure() { any() }
+
+ /**
+ * Holds if it is possible that this statement is globally impure.
+ *
+ * Similar to `mayBeImpure()`, except that `mayBeGloballyImpure()`
+ * does not consider modifications to temporary local variables to be
+ * impure. That is, if you call a function in which
+ * `mayBeGloballyImpure()` doesn't hold for any statement, then the
+ * function as a whole will have no side-effects, even if it mutates
+ * its own fresh stack variables.
+ */
+ predicate mayBeGloballyImpure() { any() }
+
+ /**
+ * Gets an attribute of this statement, for example
+ * `[[clang::fallthrough]]`.
+ */
+ Attribute getAnAttribute() { stmtattributes(underlyingElement(this), unresolveElement(result)) }
+
+ /**
+ * Gets a macro invocation that generates this entire statement.
+ *
+ * For example, given
+ * ```
+ * #define SOMEFUN a()
+ * #define FOO do { SOMEFUN; b(); } while (0)
+ * void f(void) {
+ * FOO;
+ * }
+ * ```
+ * this predicate would have results of `SOMEFUN` and `FOO` for the
+ * function call `a()`, and just `FOO` for the function call `b()`,
+ * the block within the 'do' statement, and the entire 'do' statement.
+ *
+ * Note that, unlike `isInMacroExpansion()` it is not necessary for
+ * the macro to generate the terminating semi-colon.
+ */
+ MacroInvocation getGeneratingMacro() { result.getAnExpandedElement() = this }
+
+ /** Holds if this statement was generated by the compiler. */
+ predicate isCompilerGenerated() { compgenerated(underlyingElement(this)) }
+}
+
+private class TStmtParent = @stmt or @expr;
+
+/**
+ * An element that is the parent of a statement in the C/C++ AST.
+ *
+ * This is normally a statement, but may be a `StmtExpr`.
+ */
+class StmtParent extends ControlFlowNode, TStmtParent { }
+
+/**
+ * A C/C++ 'expression' statement.
+ *
+ * For example,
+ * ```
+ * x = 1;
+ * ```
+ * is an assignment expression inside an 'expression' statement.
+ */
+class ExprStmt extends Stmt, @stmt_expr {
+ override string getAPrimaryQlClass() { result = "ExprStmt" }
+
+ /**
+ * Gets the expression of this 'expression' statement.
+ *
+ * For example, for
+ * ```
+ * x = 1;
+ * ```
+ * the result would be an `AssignExpr`.
+ */
+ Expr getExpr() { result = this.getChild(0) }
+
+ override string toString() { result = "ExprStmt" }
+
+ override predicate mayBeImpure() { this.getExpr().mayBeImpure() }
+
+ override predicate mayBeGloballyImpure() { this.getExpr().mayBeGloballyImpure() }
+
+ override MacroInvocation getGeneratingMacro() {
+ // We only need the expression to be in the macro, not the semicolon.
+ result.getAnExpandedElement() = this.getExpr()
+ }
+}
+
+private class TControlStructure = TConditionalStmt or TLoop;
+
+/**
+ * A C/C++ control structure, that is, either a conditional statement or
+ * a loop.
+ */
+class ControlStructure extends Stmt, TControlStructure {
+ /**
+ * Gets the controlling expression of this control structure.
+ *
+ * This is the condition of 'if' statements and loops, and the
+ * switched expression for 'switch' statements.
+ */
+ Expr getControllingExpr() { none() } // overridden by subclasses
+
+ /** Gets a child declaration of this scope. */
+ Declaration getADeclaration() { none() }
+}
+
+private class TConditionalStmt = @stmt_if or @stmt_constexpr_if or @stmt_switch;
+
+/**
+ * A C/C++ conditional statement, that is, either an 'if' statement or a
+ * 'switch' statement.
+ */
+class ConditionalStmt extends ControlStructure, TConditionalStmt { }
+
+/**
+ * A C/C++ 'if' statement. For example, the `if` statement in the following
+ * code:
+ * ```
+ * if (x == 1) {
+ * ...
+ * }
+ * ```
+ */
+class IfStmt extends ConditionalStmt, @stmt_if {
+ override string getAPrimaryQlClass() { result = "IfStmt" }
+
+ /**
+ * Gets the condition expression of this 'if' statement.
+ *
+ * For example, for
+ * ```
+ * if (b) { x = 1; }
+ * ```
+ * the result is `b`.
+ */
+ Expr getCondition() { result = this.getChild(0) }
+
+ override Expr getControllingExpr() { result = this.getCondition() }
+
+ /**
+ * Gets the 'then' statement of this 'if' statement.
+ *
+ * For example, for
+ * ```
+ * if (b) { x = 1; }
+ * ```
+ * the result is the `BlockStmt` `{ x = 1; }`.
+ */
+ Stmt getThen() { if_then(underlyingElement(this), unresolveElement(result)) }
+
+ /**
+ * Gets the 'else' statement of this 'if' statement, if any.
+ *
+ * For example, for
+ * ```
+ * if (b) { x = 1; } else { x = 2; }
+ * ```
+ * the result is the `BlockStmt` `{ x = 2; }`, and for
+ * ```
+ * if (b) { x = 1; }
+ * ```
+ * there is no result.
+ */
+ Stmt getElse() { if_else(underlyingElement(this), unresolveElement(result)) }
+
+ /**
+ * Holds if this 'if' statement has an 'else' statement.
+ *
+ * For example, this holds for
+ * ```
+ * if (b) { x = 1; } else { x = 2; }
+ * ```
+ * but not for
+ * ```
+ * if (b) { x = 1; }
+ * ```
+ */
+ predicate hasElse() { exists(Stmt s | this.getElse() = s) }
+
+ override string toString() { result = "if (...) ... " }
+
+ override predicate mayBeImpure() {
+ this.getCondition().mayBeImpure() or
+ this.getThen().mayBeImpure() or
+ this.getElse().mayBeImpure()
+ }
+
+ override predicate mayBeGloballyImpure() {
+ this.getCondition().mayBeGloballyImpure() or
+ this.getThen().mayBeGloballyImpure() or
+ this.getElse().mayBeGloballyImpure()
+ }
+
+ override MacroInvocation getGeneratingMacro() {
+ result.getAnExpandedElement() = this.getCondition() and
+ this.getThen().getGeneratingMacro() = result and
+ (this.hasElse() implies this.getElse().getGeneratingMacro() = result)
+ }
+}
+
+/**
+ * A C/C++ 'constexpr if' statement. For example, the `if constexpr` statement
+ * in the following code:
+ * ```
+ * if constexpr (x) {
+ * ...
+ * }
+ * ```
+ */
+class ConstexprIfStmt extends ConditionalStmt, @stmt_constexpr_if {
+ override string getAPrimaryQlClass() { result = "ConstexprIfStmt" }
+
+ /**
+ * Gets the condition expression of this 'constexpr if' statement.
+ *
+ * For example, for
+ * ```
+ * if constexpr (b) { x = 1; }
+ * ```
+ * the result is `b`.
+ */
+ Expr getCondition() { result = this.getChild(0) }
+
+ override Expr getControllingExpr() { result = this.getCondition() }
+
+ /**
+ * Gets the 'then' statement of this 'constexpr if' statement.
+ *
+ * For example, for
+ * ```
+ * if constexpr (b) { x = 1; }
+ * ```
+ * the result is the `BlockStmt` `{ x = 1; }`.
+ */
+ Stmt getThen() { constexpr_if_then(underlyingElement(this), unresolveElement(result)) }
+
+ /**
+ * Gets the 'else' statement of this 'constexpr if' statement, if any.
+ *
+ * For example, for
+ * ```
+ * if constexpr (b) { x = 1; } else { x = 2; }
+ * ```
+ * the result is the `BlockStmt` `{ x = 2; }`, and for
+ * ```
+ * if constexpr (b) { x = 1; }
+ * ```
+ * there is no result.
+ */
+ Stmt getElse() { constexpr_if_else(underlyingElement(this), unresolveElement(result)) }
+
+ /**
+ * Holds if this 'constexpr if' statement has an 'else' statement.
+ *
+ * For example, this holds for
+ * ```
+ * if constexpr (b) { x = 1; } else { x = 2; }
+ * ```
+ * but not for
+ * ```
+ * if constexpr (b) { x = 1; }
+ * ```
+ */
+ predicate hasElse() { exists(Stmt s | this.getElse() = s) }
+
+ override string toString() { result = "if constexpr (...) ... " }
+
+ override predicate mayBeImpure() {
+ this.getCondition().mayBeImpure() or
+ this.getThen().mayBeImpure() or
+ this.getElse().mayBeImpure()
+ }
+
+ override predicate mayBeGloballyImpure() {
+ this.getCondition().mayBeGloballyImpure() or
+ this.getThen().mayBeGloballyImpure() or
+ this.getElse().mayBeGloballyImpure()
+ }
+
+ override MacroInvocation getGeneratingMacro() {
+ result.getAnExpandedElement() = this.getCondition() and
+ this.getThen().getGeneratingMacro() = result and
+ (this.hasElse() implies this.getElse().getGeneratingMacro() = result)
+ }
+}
+
+private class TLoop = @stmt_while or @stmt_end_test_while or @stmt_range_based_for or @stmt_for;
+
+/**
+ * A C/C++ loop, that is, either a 'while' loop, a 'for' loop, or a
+ * 'do' loop.
+ */
+class Loop extends ControlStructure, TLoop {
+ /** Gets the condition expression of this loop. */
+ Expr getCondition() { none() } // overridden in subclasses
+
+ /** Gets the body statement of this loop. */
+ Stmt getStmt() { none() } // overridden in subclasses
+}
+
+/**
+ * A C/C++ 'while' statement.
+ *
+ * For example, the `while` statement in the following code:
+ * ```
+ * while (b) {
+ * f();
+ * }
+ * ```
+ */
+class WhileStmt extends Loop, @stmt_while {
+ override string getAPrimaryQlClass() { result = "WhileStmt" }
+
+ override Expr getCondition() { result = this.getChild(0) }
+
+ override Expr getControllingExpr() { result = this.getCondition() }
+
+ override Stmt getStmt() { while_body(underlyingElement(this), unresolveElement(result)) }
+
+ override string toString() { result = "while (...) ..." }
+
+ override predicate mayBeImpure() {
+ this.getCondition().mayBeImpure() or
+ this.getStmt().mayBeImpure()
+ }
+
+ override predicate mayBeGloballyImpure() {
+ this.getCondition().mayBeGloballyImpure() or
+ this.getStmt().mayBeGloballyImpure()
+ }
+
+ override MacroInvocation getGeneratingMacro() {
+ result.getAnExpandedElement() = this.getCondition() and
+ this.getStmt().getGeneratingMacro() = result
+ }
+
+ /**
+ * Holds if the loop condition is provably `true`.
+ *
+ * For example, this holds for
+ * ```
+ * while(1) { ...; if(b) break; ...; }
+ * ```
+ */
+ predicate conditionAlwaysTrue() { conditionAlwaysTrue(getCondition()) }
+
+ /**
+ * Holds if the loop condition is provably `false`.
+ *
+ * For example, this holds for
+ * ```
+ * while(0) { ...; }
+ * ```
+ */
+ predicate conditionAlwaysFalse() { conditionAlwaysFalse(getCondition()) }
+
+ /**
+ * Holds if the loop condition is provably `true` upon entry,
+ * that is, at least one iteration of the loop is guaranteed.
+ *
+ * For example, with
+ * ```
+ * bool done = false;
+ * while (!done) { ... done = true; ... }
+ * ```
+ * the condition `!done` always evaluates to `true` upon entry since
+ * `done = false`, but the condition may evaluate to `false` after
+ * some iterations.
+ */
+ predicate conditionAlwaysTrueUponEntry() { loopConditionAlwaysTrueUponEntry(this, _) }
+}
+
+/**
+ * A C/C++ jump statement.
+ */
+class JumpStmt extends Stmt, @jump {
+ override string getAPrimaryQlClass() { result = "JumpStmt" }
+
+ /** Gets the target of this jump statement. */
+ Stmt getTarget() { jumpinfo(underlyingElement(this), _, unresolveElement(result)) }
+}
+
+/**
+ * A C/C++ 'goto' statement which jumps to a label.
+ *
+ * For example, the `goto` statement in the following code:
+ * ```
+ * goto someLabel;
+ * ...
+ * somelabel:
+ * ```
+ */
+class GotoStmt extends JumpStmt, @stmt_goto {
+ override string getAPrimaryQlClass() { result = "GotoStmt" }
+
+ /**
+ * Gets the name of the label this 'goto' statement refers to.
+ *
+ * For example, for
+ * ```
+ * goto someLabel;
+ * ```
+ * the result is `"someLabel"`.
+ */
+ string getName() { jumpinfo(underlyingElement(this), result, _) and result != "" }
+
+ /** Holds if this 'goto' statement refers to a label. */
+ predicate hasName() { exists(string s | jumpinfo(underlyingElement(this), s, _) and s != "") }
+
+ override string toString() { result = "goto ..." }
+
+ /**
+ * Holds if this 'goto' statement breaks out of two or more nested
+ * loops.
+ *
+ * For example, for
+ * ```
+ * while(b) {
+ * while(b) {
+ * if(b) goto middle;
+ * if(b) goto end;
+ * }
+ * if(b) goto end;
+ * middle:
+ * }
+ * end:
+ * ```
+ * this holds for the second `goto`, but not the first or third.
+ */
+ predicate breaksFromNestedLoops() {
+ exists(Loop l1, Loop l2 |
+ this.getParentStmt+() = l1 and
+ l1.getParentStmt+() = l2 and
+ l2.getParentStmt+() = this.getASuccessor().(Stmt).getParentStmt()
+ )
+ }
+
+ override predicate mayBeImpure() { none() }
+
+ override predicate mayBeGloballyImpure() { none() }
+}
+
+/**
+ * A 'goto' statement whose target is computed by a non-constant
+ * expression (a non-standard extension to C/C++).
+ *
+ * For example, the `goto` statement in the following code:
+ * ```
+ * goto *ptr;
+ * ```
+ */
+class ComputedGotoStmt extends Stmt, @stmt_assigned_goto {
+ /**
+ * Gets the expression used to compute the target of this 'goto'
+ * statement.
+ *
+ * For example, for
+ * ```
+ * goto *ptr;
+ * ```
+ * the result is `ptr`.
+ */
+ Expr getExpr() { result = this.getChild(0) }
+
+ override string toString() { result = "computed goto ..." }
+
+ override predicate mayBeImpure() { this.getExpr().mayBeImpure() }
+
+ override predicate mayBeGloballyImpure() { this.getExpr().mayBeGloballyImpure() }
+
+ override MacroInvocation getGeneratingMacro() {
+ // We only need the expression to be in the macro, not the semicolon.
+ result.getAnExpandedElement() = this.getExpr()
+ }
+}
+
+/**
+ * A C/C++ 'continue' statement.
+ *
+ * For example, the `continue` statement in the following code:
+ * ```
+ * while (x) {
+ * if (arr[x] < 0) continue;
+ * ...
+ * }
+ * ```
+ */
+class ContinueStmt extends JumpStmt, @stmt_continue {
+ override string getAPrimaryQlClass() { result = "ContinueStmt" }
+
+ override string toString() { result = "continue;" }
+
+ override predicate mayBeImpure() { none() }
+
+ override predicate mayBeGloballyImpure() { none() }
+
+ /**
+ * Gets the loop that this continue statement will jump to the beginning of.
+ */
+ Stmt getContinuable() { result = getEnclosingContinuable(this) }
+}
+
+private Stmt getEnclosingContinuable(Stmt s) {
+ if s.getParent().getEnclosingStmt() instanceof Loop
+ then result = s.getParent().getEnclosingStmt()
+ else result = getEnclosingContinuable(s.getParent().getEnclosingStmt())
+}
+
+/**
+ * A C/C++ 'break' statement.
+ *
+ * For example, the `break` statement in the following code:
+ * ```
+ * while (x) {
+ * if (arr[x] == 0) break;
+ * ...
+ * }
+ * ```
+ */
+class BreakStmt extends JumpStmt, @stmt_break {
+ override string getAPrimaryQlClass() { result = "BreakStmt" }
+
+ override string toString() { result = "break;" }
+
+ override predicate mayBeImpure() { none() }
+
+ override predicate mayBeGloballyImpure() { none() }
+
+ /**
+ * Gets the loop or switch statement that this break statement will exit.
+ */
+ Stmt getBreakable() { result = getEnclosingBreakable(this) }
+}
+
+private Stmt getEnclosingBreakable(Stmt s) {
+ if
+ s.getParent().getEnclosingStmt() instanceof Loop or
+ s.getParent().getEnclosingStmt() instanceof SwitchStmt
+ then result = s.getParent().getEnclosingStmt()
+ else result = getEnclosingBreakable(s.getParent().getEnclosingStmt())
+}
+
+/**
+ * A C/C++ 'label' statement.
+ *
+ * For example, the `somelabel:` statement in the following code:
+ * ```
+ * goto someLabel;
+ * ...
+ * somelabel:
+ * ```
+ */
+class LabelStmt extends Stmt, @stmt_label {
+ override string getAPrimaryQlClass() { result = "LabelStmt" }
+
+ /** Gets the name of this 'label' statement. */
+ string getName() { jumpinfo(underlyingElement(this), result, _) and result != "" }
+
+ /** Holds if this 'label' statement is named. */
+ predicate isNamed() { exists(this.getName()) }
+
+ override string toString() { result = "label ...:" }
+
+ override predicate mayBeImpure() { none() }
+
+ override predicate mayBeGloballyImpure() { none() }
+}
+
+/**
+ * A C/C++ `co_return` statement.
+ *
+ * For example:
+ * ```
+ * co_return 1+2;
+ * ```
+ * or
+ * ```
+ * co_return;
+ * ```
+ */
+class CoReturnStmt extends Stmt, @stmt_co_return {
+ override string getAPrimaryQlClass() { result = "CoReturnStmt" }
+
+ /**
+ * Gets the operand of this `co_return` statement.
+ *
+ * For example, for
+ * ```
+ * co_return 1+2;
+ * ```
+ * the operand is a function call `return_value(1+2)`, and for
+ * ```
+ * co_return;
+ * ```
+ * the operand is a function call `return_void()`.
+ */
+ FunctionCall getOperand() { result = this.getChild(0) }
+
+ /**
+ * Gets the expression of this `co_return` statement, if any.
+ *
+ * For example, for
+ * ```
+ * co_return 1+2;
+ * ```
+ * the result is `1+2`, and there is no result for
+ * ```
+ * co_return;
+ * ```
+ */
+ Expr getExpr() { result = this.getOperand().getArgument(0) }
+
+ /**
+ * Holds if this `co_return` statement has an expression.
+ *
+ * For example, this holds for
+ * ```
+ * co_return 1+2;
+ * ```
+ * but not for
+ * ```
+ * co_return;
+ * ```
+ */
+ predicate hasExpr() { exists(this.getExpr()) }
+
+ override string toString() { result = "co_return ..." }
+}
+
+/**
+ * A C/C++ 'return' statement.
+ *
+ * For example:
+ * ```
+ * return 1+2;
+ * ```
+ * or
+ * ```
+ * return;
+ * ```
+ */
+class ReturnStmt extends Stmt, @stmt_return {
+ override string getAPrimaryQlClass() { result = "ReturnStmt" }
+
+ /**
+ * Gets the expression of this 'return' statement.
+ *
+ * For example, for
+ * ```
+ * return 1+2;
+ * ```
+ * the result is `1+2`, and there is no result for
+ * ```
+ * return;
+ * ```
+ */
+ Expr getExpr() { result = this.getChild(0) }
+
+ /**
+ * Holds if this 'return' statement has an expression.
+ *
+ * For example, this holds for
+ * ```
+ * return 1+2;
+ * ```
+ * but not for
+ * ```
+ * return;
+ * ```
+ */
+ predicate hasExpr() { exists(this.getExpr()) }
+
+ override string toString() { result = "return ..." }
+
+ override predicate mayBeImpure() { this.getExpr().mayBeImpure() }
+
+ override predicate mayBeGloballyImpure() { this.getExpr().mayBeGloballyImpure() }
+}
+
+/**
+ * A C/C++ 'do' statement.
+ *
+ * For example, the `do` ... `while` in the following code:
+ * ```
+ * do {
+ * x = x + 1;
+ * } while (x < 10);
+ * ```
+ */
+class DoStmt extends Loop, @stmt_end_test_while {
+ override string getAPrimaryQlClass() { result = "DoStmt" }
+
+ override Expr getCondition() { result = this.getChild(0) }
+
+ override Expr getControllingExpr() { result = this.getCondition() }
+
+ override Stmt getStmt() { do_body(underlyingElement(this), unresolveElement(result)) }
+
+ override string toString() { result = "do (...) ..." }
+
+ override predicate mayBeImpure() {
+ this.getCondition().mayBeImpure() or
+ this.getStmt().mayBeImpure()
+ }
+
+ override predicate mayBeGloballyImpure() {
+ this.getCondition().mayBeGloballyImpure() or
+ this.getStmt().mayBeGloballyImpure()
+ }
+
+ override MacroInvocation getGeneratingMacro() {
+ result.getAnExpandedElement() = this.getCondition() and
+ this.getStmt().getGeneratingMacro() = result
+ }
+}
+
+/**
+ * A C++11 range-based 'for' statement.
+ *
+ * For example,
+ * ```
+ * for (int x : xs) { y += x; }
+ * ```
+ *
+ * This example would be desugared to
+ * ```
+ * {
+ * auto && __range = xs;
+ * for (auto __begin = begin_expr, __end = end_expr;
+ * __begin != __end;
+ * ++__begin) {
+ * int x = *__begin;
+ * y += x;
+ * }
+ * }
+ * ```
+ * where `begin_expr` and `end_expr` depend on the type of `xs`.
+ */
+class RangeBasedForStmt extends Loop, @stmt_range_based_for {
+ override string getAPrimaryQlClass() { result = "RangeBasedForStmt" }
+
+ /**
+ * Gets the 'body' statement of this range-based 'for' statement.
+ *
+ * For example, for
+ * ```
+ * for (int x : xs) { y += x; }
+ * ```
+ * the result is the `BlockStmt` `{ y += x; }`.
+ */
+ override Stmt getStmt() { result = this.getChild(5) }
+
+ override string toString() { result = "for(...:...) ..." }
+
+ /**
+ * Gets the variable introduced by the for-range-declaration.
+ *
+ * For example, for
+ * ```
+ * for (int x : xs) { y += x; }
+ * ```
+ * the result is `int x`.
+ */
+ LocalVariable getVariable() { result = getChild(4).(DeclStmt).getADeclaration() }
+
+ /**
+ * Gets the expression giving the range to iterate over.
+ *
+ * For example, for
+ * ```
+ * for (int x : xs) { y += x; }
+ * ```
+ * the result is `xs`.
+ */
+ Expr getRange() { result = getRangeVariable().getInitializer().getExpr() }
+
+ /** Gets the compiler-generated `__range` variable after desugaring. */
+ LocalVariable getRangeVariable() { result = getChild(0).(DeclStmt).getADeclaration() }
+
+ /**
+ * Gets the compiler-generated `__begin != __end` which is the
+ * condition expression of this for statement after desugaring.
+ * It will be either an `NEExpr` or a call to a user-defined
+ * `operator!=`.
+ */
+ override Expr getCondition() { result = this.getChild(2) }
+
+ override Expr getControllingExpr() { result = this.getCondition() }
+
+ /**
+ * Gets a declaration statement that declares first `__begin` and then
+ * `__end`, initializing them to the values they have before entering the
+ * desugared loop.
+ */
+ DeclStmt getBeginEndDeclaration() { result = this.getChild(1) }
+
+ /** Gets the compiler-generated `__begin` variable after desugaring. */
+ LocalVariable getBeginVariable() { result = getBeginEndDeclaration().getDeclaration(0) }
+
+ /** Gets the compiler-generated `__end` variable after desugaring. */
+ LocalVariable getEndVariable() { result = getBeginEndDeclaration().getDeclaration(1) }
+
+ /**
+ * Gets the compiler-generated `++__begin` which is the update
+ * expression of this for statement after desugaring. It will
+ * be either a `PrefixIncrExpr` or a call to a user-defined
+ * `operator++`.
+ */
+ Expr getUpdate() { result = this.getChild(3) }
+
+ /** Gets the compiler-generated `__begin` variable after desugaring. */
+ LocalVariable getAnIterationVariable() { result = getBeginVariable() }
+}
+
+/**
+ * A C/C++ 'for' statement.
+ *
+ * This only represents "traditional" 'for' statements and not C++11
+ * range-based 'for' statements or Objective C 'for-in' statements.
+ *
+ * For example, the `for` statement in:
+ * ```
+ * for (i = 0; i < 10; i++) { j++; }
+ * ```
+ */
+class ForStmt extends Loop, @stmt_for {
+ override string getAPrimaryQlClass() { result = "ForStmt" }
+
+ /**
+ * Gets the initialization statement of this 'for' statement.
+ *
+ * For example, for
+ * ```
+ * for (i = 0; i < 10; i++) { j++; }
+ * ```
+ * the result is `i = 0;`.
+ *
+ * Does not hold if the initialization statement is an empty statement, as in
+ * ```
+ * for (; i < 10; i++) { j++ }
+ * ```
+ */
+ Stmt getInitialization() { for_initialization(underlyingElement(this), unresolveElement(result)) }
+
+ /**
+ * Gets the condition expression of this 'for' statement.
+ *
+ * For example, for
+ * ```
+ * for (i = 0; i < 10; i++) { j++; }
+ * ```
+ * the result is `i < 10`.
+ *
+ * Does not hold if the condition expression is omitted, as in
+ * ```
+ * for (i = 0;; i++) { if (i >= 10) break; }
+ * ```
+ */
+ override Expr getCondition() { for_condition(underlyingElement(this), unresolveElement(result)) }
+
+ override Expr getControllingExpr() { result = this.getCondition() }
+
+ /**
+ * Gets the update expression of this 'for' statement.
+ *
+ * For example, for
+ * ```
+ * for (i = 0; i < 10; i++) { j++; }
+ * ```
+ * the result is `i++`.
+ *
+ * Does not hold if the update expression is omitted, as in
+ * ```
+ * for (i = 0; i < 10;) { i++; }
+ * ```
+ */
+ Expr getUpdate() { for_update(underlyingElement(this), unresolveElement(result)) }
+
+ override Stmt getStmt() { for_body(underlyingElement(this), unresolveElement(result)) }
+
+ override string toString() { result = "for(...;...;...) ..." }
+
+ /**
+ * Gets a variable that is used as an iteration variable. That is, a
+ * variables that is defined, updated or tested in the head of this
+ * for statement.
+ *
+ * This only has results that are quite certainly loop variables: for
+ * complex iterations, it may not return anything.
+ *
+ * For example, for
+ * ```
+ * for (i = 0; i < 10; i++) { j++; }
+ * ```
+ * the result is `i`.
+ */
+ pragma[noopt]
+ Variable getAnIterationVariable() {
+ this instanceof ForStmt and
+ // check that it is assigned to, incremented or decremented in the update
+ exists(Expr updateOpRoot, Expr updateOp |
+ updateOpRoot = this.getUpdate() and
+ inForUpdate(updateOpRoot, updateOp)
+ |
+ exists(CrementOperation op, VariableAccess va |
+ op = updateOp and
+ op instanceof CrementOperation and
+ op.getOperand() = va and
+ va = result.getAnAccess()
+ )
+ or
+ updateOp = result.getAnAssignedValue()
+ ) and
+ result instanceof Variable and
+ // checked or used in the condition
+ exists(Expr e, VariableAccess va |
+ va = result.getAnAccess() and
+ inForCondition(e, va) and
+ e = this.getCondition()
+ )
+ }
+
+ /**
+ * Gets a declaration from the initialization statement of this 'for'
+ * statement.
+ *
+ * For example, for
+ * ```
+ * for(int x = 0, y = 10; x != y; ++x) { sum += x; }
+ * ```
+ * the results are `x` and `y`, while for
+ * ```
+ * for (i = 0; i < 10; i++) { j++; }
+ * ```
+ * there are no results.
+ */
+ override Declaration getADeclaration() {
+ result = this.getInitialization().(DeclStmt).getADeclaration()
+ }
+
+ override predicate mayBeImpure() {
+ this.getInitialization().mayBeImpure() or
+ this.getCondition().mayBeImpure() or
+ this.getUpdate().mayBeImpure() or
+ this.getStmt().mayBeImpure()
+ }
+
+ override predicate mayBeGloballyImpure() {
+ this.getInitialization().mayBeGloballyImpure() or
+ this.getCondition().mayBeGloballyImpure() or
+ this.getUpdate().mayBeGloballyImpure() or
+ this.getStmt().mayBeGloballyImpure()
+ }
+
+ override MacroInvocation getGeneratingMacro() {
+ (
+ exists(this.getInitialization())
+ implies
+ result = this.getInitialization().getGeneratingMacro()
+ ) and
+ (exists(this.getCondition()) implies this.getCondition() = result.getAnExpandedElement()) and
+ (exists(this.getUpdate()) implies this.getUpdate() = result.getAnExpandedElement()) and
+ this.getStmt().getGeneratingMacro() = result
+ }
+
+ /**
+ * Holds if the loop condition is provably `true`.
+ *
+ * For example, this holds for
+ * ```
+ * for(x = 0; 1; ++x) { sum += x; }
+ * ```
+ */
+ predicate conditionAlwaysTrue() { conditionAlwaysTrue(getCondition()) }
+
+ /**
+ * Holds if the loop condition is provably `false`.
+ *
+ * For example, this holds for
+ * ```
+ * for(x = 0; 0; ++x) { sum += x; }
+ * ```
+ */
+ predicate conditionAlwaysFalse() { conditionAlwaysFalse(getCondition()) }
+
+ /**
+ * Holds if the loop condition is provably `true` upon entry,
+ * that is, at least one iteration of the loop is guaranteed.
+ *
+ * For example, with
+ * ```
+ * for (int i = 0; i < 10; i++) { ... }
+ * ```
+ * the condition `i < 10` always evaluates to `true` upon entry since
+ * `i = 0`, but the condition will evaluate to `false` after 10
+ * iterations.
+ */
+ predicate conditionAlwaysTrueUponEntry() { loopConditionAlwaysTrueUponEntry(this, _) }
+}
+
+/**
+ * Holds if `child` is in the condition `forCondition` of a 'for'
+ * statement.
+ *
+ * For example, if a program includes
+ * ```
+ * for (i = 0; i < 10; i++) { j++; }
+ * ```
+ * then this predicate will hold with `forCondition` as `i < 10`,
+ * and `child` as any of `i`, `10` and `i < 10`.
+ */
+pragma[noopt]
+private predicate inForCondition(Expr forCondition, Expr child) {
+ exists(ForStmt for |
+ forCondition = for.getCondition() and
+ child = forCondition and
+ for instanceof ForStmt
+ )
+ or
+ exists(Expr mid |
+ inForCondition(forCondition, mid) and
+ child.getParent() = mid
+ )
+}
+
+/**
+ * Holds if `child` is in the update `forUpdate` of a 'for' statement.
+ *
+ * For example, if a program includes
+ * ```
+ * for (i = 0; i < 10; i += 1) { j++; }
+ * ```
+ * then this predicate will hold with `forUpdate` as `i += 1`,
+ * and `child` as any of `i`, `1` and `i += 1`.
+ */
+pragma[noopt]
+private predicate inForUpdate(Expr forUpdate, Expr child) {
+ exists(ForStmt for | forUpdate = for.getUpdate() and child = forUpdate)
+ or
+ exists(Expr mid | inForUpdate(forUpdate, mid) and child.getParent() = mid)
+}
+
+/** Gets the `rnk`'th `case` statement in `b`. */
+private int indexOfSwitchCaseRank(BlockStmt b, int rnk) {
+ result = rank[rnk](int i | b.getStmt(i) instanceof SwitchCase)
+}
+
+/**
+ * A C/C++ 'switch case' statement.
+ *
+ * For example, the `case` and `default` statements in:
+ * ```
+ * switch (i)
+ * {
+ * case 5:
+ * ...
+ * default:
+ * ...
+ * }
+ * ```
+ */
+class SwitchCase extends Stmt, @stmt_switch_case {
+ override string getAPrimaryQlClass() { result = "SwitchCase" }
+
+ /**
+ * Gets the expression of this 'switch case' statement (or the start of
+ * the range if there is a GNU case range). Does not exist for a
+ * `DefaultCase`.
+ *
+ * For example, for
+ * ```
+ * case 5:
+ * ```
+ * the result is `5`, for
+ * ```
+ * case 6 ... 7:
+ * ```
+ * the result is 6, and there is no result for
+ * ```
+ * default:
+ * ```
+ */
+ Expr getExpr() { result = this.getChild(0) }
+
+ /**
+ * Gets the end of the range, if this is a GNU case range. Otherwise
+ * has no result.
+ *
+ * For example, for
+ * ```
+ * case 6 ... 7:
+ * ```
+ * the result is `7`, while for
+ * ```
+ * case 5:
+ * ```
+ * and
+ * ```
+ * default:
+ * ```
+ * there is no result.
+ */
+ Expr getEndExpr() { result = this.getChild(1) }
+
+ /**
+ * Gets the 'switch' statement of this 'switch case' statement.
+ *
+ * For example, with
+ * ```
+ * switch(i) {
+ * case 5:
+ * x = 1;
+ * }
+ * ```
+ * the result of this predicate on `case 5:` is the whole
+ * `switch(i) { ... }` statement.
+ */
+ SwitchStmt getSwitchStmt() { result.getASwitchCase() = this }
+
+ /**
+ * Gets the 0-based index of this 'switch case' statement within its
+ * 'switch' statement.
+ *
+ * For example, for
+ * ```
+ * switch(i) {
+ * case 5:
+ * case 6:
+ * default:
+ * }
+ * ```
+ * the `case 5:` has result 0, `case 6:` has result 1, and `default:`
+ * has result 2.
+ */
+ int getChildNum() { switch_case(_, result, underlyingElement(this)) }
+
+ /**
+ * DEPRECATED: use `SwitchCase.getAStmt` or `ControlFlowNode.getASuccessor`
+ * rather than this predicate.
+ *
+ * Gets the `BlockStmt` statement immediately following this 'switch case'
+ * statement, if any.
+ *
+ * For example, for
+ * ```
+ * switch (i) {
+ * case 5:
+ * x = 1;
+ * break;
+ * case 6:
+ * case 7:
+ * { x = 2; break; }
+ * default:
+ * { x = 3; }
+ * x = 4;
+ * break;
+ * }
+ * ```
+ * the `case 7:` has result `{ x = 2; break; }`, `default:` has result
+ * `{ x = 3; }`, and the others have no result.
+ */
+ deprecated BlockStmt getLabelledStmt() {
+ exists(int i, Stmt parent |
+ this = parent.getChild(i) and
+ result = parent.getChild(i + 1)
+ )
+ }
+
+ /**
+ * Gets the next `SwitchCase` belonging to the same 'switch'
+ * statement, if any.
+ *
+ * For example, for
+ * ```
+ * switch (i) {
+ * case 5:
+ * x = 1;
+ * break;
+ * case 6:
+ * case 7:
+ * { x = 2; break; }
+ * default:
+ * { x = 3; }
+ * x = 4;
+ * break;
+ * }
+ * ```
+ * the `case 5:` has result `case 6:`, which has result `case 7:`,
+ * which has result `default:`, which has no result.
+ */
+ SwitchCase getNextSwitchCase() {
+ result.getSwitchStmt() = this.getSwitchStmt() and
+ result.getChildNum() = this.getChildNum() + 1
+ }
+
+ /**
+ * Gets the previous `SwitchCase` belonging to the same 'switch'
+ * statement, if any.
+ *
+ * For example, for
+ * ```
+ * switch (i) {
+ * case 5:
+ * x = 1;
+ * break;
+ * case 6:
+ * case 7:
+ * { x = 2; break; }
+ * default:
+ * { x = 3; }
+ * x = 4;
+ * break;
+ * }
+ * ```
+ * the `default:` has result `case 7:`, which has result `case 6:`,
+ * which has result `case 5:`, which has no result.
+ */
+ SwitchCase getPreviousSwitchCase() { result.getNextSwitchCase() = this }
+
+ /**
+ * Gets a statement belonging under this 'switch case' statement.
+ *
+ * For example, for
+ * ```
+ * switch (i) {
+ * case 5:
+ * x = 1;
+ * break;
+ * case 6:
+ * case 7:
+ * { x = 2; break; }
+ * default:
+ * { x = 3; }
+ * x = 4;
+ * break;
+ * }
+ * ```
+ * the `case 5:` has results `x = 1;` and `break;`, `case 6:` has no
+ * results, `case 7:` has a single result `{ x = 2; break; }`, and
+ * `default:` has results `{ x = 3; }, `x = 4;` and `break;`.
+ */
+ Stmt getAStmt() {
+ exists(BlockStmt b, int rnk, int i |
+ b.getStmt(i) = this and
+ i = indexOfSwitchCaseRank(b, rnk)
+ |
+ pragma[only_bind_into](b).getStmt([i + 1 .. indexOfSwitchCaseRank(b, rnk + 1) - 1]) = result
+ or
+ not exists(indexOfSwitchCaseRank(b, rnk + 1)) and
+ b.getStmt([i + 1 .. b.getNumStmt() + 1]) = result
+ )
+ }
+
+ /**
+ * Gets the last statement under this 'switch case' statement. If the
+ * last statement is wrapped in one or more blocks then the result is
+ * the last statement in those blocks instead.
+ *
+ * For example, for
+ * ```
+ * switch (i) {
+ * case 5:
+ * x = 1;
+ * break;
+ * case 6:
+ * case 7:
+ * { x = 2; break; }
+ * default:
+ * { x = 3; { x = 4; break; } }
+ * }
+ * ```
+ * the `case 5:` has result `break;`, the `case 6:` has no result,
+ * the `case 7:` has results `break;`, and the `default:` has result
+ * `break;`.
+ */
+ Stmt getLastStmt() {
+ exists(Stmt lastStmt |
+ lastStmt = this.getAStmt() and
+ not lastStmt.getFollowingStmt() = this.getAStmt() and
+ if lastStmt instanceof BlockStmt
+ then result = lastStmt.(BlockStmt).getLastStmtIn()
+ else result = lastStmt
+ )
+ }
+
+ /**
+ * Holds if the last statement, as determined by `getLastStmt`, under
+ * this 'switch case' statement is a 'break' statement.
+ *
+ * For example, for
+ * ```
+ * switch (i) {
+ * case 5:
+ * x = 1;
+ * break;
+ * case 6:
+ * case 7:
+ * { x = 2; break; }
+ * default:
+ * { x = 3; { x = 4; break; } }
+ * }
+ * ```
+ * this holds for `case 5:`, `case 7:` and `default:`, but not for `case 6:`.
+ */
+ predicate terminatesInBreakStmt() { this.getLastStmt() instanceof BreakStmt }
+
+ /**
+ * Holds if the last statement, as determined by `getLastStmt`, under
+ * this 'switch case' statement is a 'return' statement.
+ *
+ * For example, for
+ * ```
+ * switch (i) {
+ * case 5:
+ * x = 1;
+ * return;
+ * case 6:
+ * case 7:
+ * { x = 2; return; }
+ * default:
+ * { x = 3; { x = 4; return; } }
+ * }
+ * ```
+ * this holds for `case 5:`, `case 7:` and `default:`, but not for `case 6:`.
+ */
+ predicate terminatesInReturnStmt() { this.getLastStmt() instanceof ReturnStmt }
+
+ /**
+ * Holds if the last statement, as determined by `getLastStmt`, under
+ * this 'switch case' statement is a 'throw' statement.
+ *
+ * For example, for
+ * ```
+ * switch (i) {
+ * case 5:
+ * x = 1;
+ * throw 1;
+ * case 6:
+ * case 7:
+ * { x = 2; throw 2; }
+ * default:
+ * { x = 3; { x = 4; throw 3; } }
+ * }
+ * ```
+ * this holds for `case 5:`, `case 7:` and `default:`, but not for `case 6:`.
+ */
+ predicate terminatesInThrowStmt() {
+ exists(ThrowExpr t | t.getEnclosingStmt() = this.getLastStmt())
+ }
+
+ /**
+ * Holds if this 'switch case' statement is a 'default' statement.
+ *
+ * For example, for
+ * ```
+ * switch (i) {
+ * case 5:
+ * case 6:
+ * case 7:
+ * default:
+ * }
+ * ```
+ * this holds for `default:`, but not for `case 5:`, `case 6:`,
+ * or `case 7:`.
+ */
+ predicate isDefault() { this instanceof DefaultCase }
+
+ override string toString() { result = "case ...:" }
+
+ override predicate mayBeImpure() { this.getExpr().mayBeImpure() }
+
+ override predicate mayBeGloballyImpure() { this.getExpr().mayBeGloballyImpure() }
+}
+
+/**
+ * A C/C++ 'default case' statement.
+ *
+ * For example, the `default` statement in:
+ * ```
+ * switch (i)
+ * {
+ * case 5:
+ * ...
+ * default:
+ * ...
+ * }
+ * ```
+ */
+class DefaultCase extends SwitchCase {
+ DefaultCase() { not exists(this.getExpr()) }
+
+ override string toString() { result = "default: " }
+
+ override predicate mayBeImpure() { none() }
+
+ override predicate mayBeGloballyImpure() { none() }
+}
+
+/**
+ * A C/C++ 'switch' statement.
+ *
+ * For example, the `switch` statement in:
+ * ```
+ * switch (i)
+ * {
+ * case 5:
+ * ...
+ * default:
+ * ...
+ * }
+ * ```
+ */
+class SwitchStmt extends ConditionalStmt, @stmt_switch {
+ override string getAPrimaryQlClass() { result = "SwitchStmt" }
+
+ /**
+ * Gets the expression that this 'switch' statement switches on.
+ *
+ * For example, for
+ * ```
+ * switch(i) {
+ * case 1:
+ * case 2:
+ * break;
+ * default:
+ * break;
+ * }
+ * ```
+ * the result is `i`.
+ */
+ Expr getExpr() { result = this.getChild(0) }
+
+ override Expr getControllingExpr() { result = this.getExpr() }
+
+ /**
+ * Gets the body statement of this 'switch' statement.
+ *
+ * In almost all cases the result will be a `BlockStmt`, but there are
+ * other syntactically valid constructions.
+ *
+ * For example, for
+ * ```
+ * switch(i) {
+ * case 1:
+ * case 2:
+ * break;
+ * default:
+ * break;
+ * }
+ * ```
+ * the result is
+ * ```
+ * {
+ * case 1:
+ * case 2:
+ * break;
+ * default:
+ * break;
+ * }
+ * ```
+ */
+ Stmt getStmt() { switch_body(underlyingElement(this), unresolveElement(result)) }
+
+ /**
+ * Gets a 'switch case' statement of this 'switch' statement.
+ *
+ * For example, for
+ * ```
+ * switch(i) {
+ * case 1:
+ * case 2:
+ * break;
+ * default:
+ * break;
+ * }
+ * ```
+ * the results are `case 1:`, `case 2:` and `default:`.
+ */
+ SwitchCase getASwitchCase() { switch_case(underlyingElement(this), _, unresolveElement(result)) }
+
+ /**
+ * Gets the 'default case' statement of this 'switch' statement,
+ * if any.
+ *
+ * For example, for
+ * ```
+ * switch(i) {
+ * case 1:
+ * case 2:
+ * break;
+ * default:
+ * break;
+ * }
+ * ```
+ * the result is `default:`, but there is no result for
+ * ```
+ * switch(i) {
+ * case 1:
+ * case 2:
+ * break;
+ * }
+ * ```
+ */
+ DefaultCase getDefaultCase() { result = this.getASwitchCase() }
+
+ /**
+ * Holds if this 'switch' statement has a 'default case' statement.
+ *
+ * For example, this holds for
+ * ```
+ * switch(i) {
+ * case 1:
+ * case 2:
+ * break;
+ * default:
+ * break;
+ * }
+ * ```
+ * but not for
+ * ```
+ * switch(i) {
+ * case 1:
+ * case 2:
+ * break;
+ * }
+ * ```
+ */
+ predicate hasDefaultCase() { exists(this.getDefaultCase()) }
+
+ override string toString() { result = "switch (...) ... " }
+
+ override predicate mayBeImpure() {
+ this.getExpr().mayBeImpure() or
+ this.getStmt().mayBeImpure()
+ }
+
+ override predicate mayBeGloballyImpure() {
+ this.getExpr().mayBeGloballyImpure() or
+ this.getStmt().mayBeGloballyImpure()
+ }
+
+ override MacroInvocation getGeneratingMacro() {
+ result.getAnExpandedElement() = this.getExpr() and
+ forall(SwitchCase c | c = this.getASwitchCase() | exists(c.getGeneratingMacro()))
+ }
+}
+
+/**
+ * A C/C++ 'switch' statement where the controlling expression has an
+ * enum type.
+ *
+ * For example, given
+ * ```
+ * enum color { RED, GREEN, BLUE };
+ * enum color c;
+ * ```
+ * the `switch` statement in:
+ * ```
+ * switch (c) {
+ * case RED:
+ * return 1;
+ * default:
+ * return 2;
+ * }
+ * ```
+ */
+class EnumSwitch extends SwitchStmt {
+ EnumSwitch() { this.getExpr().getType().getUnderlyingType() instanceof Enum }
+
+ /**
+ * Gets a constant from the enum type that does not have a case in this
+ * 'switch' statement.
+ *
+ * For example, with
+ * ```
+ * enum color { RED, GREEN, BLUE };
+ * enum color c;
+ * switch (c) {
+ * case RED:
+ * return 1;
+ * default:
+ * return 2;
+ * }
+ * ```
+ * there are results `GREEN` and `BLUE`.
+ */
+ EnumConstant getAMissingCase() {
+ exists(Enum et |
+ et = this.getExpr().getUnderlyingType() and
+ result = et.getAnEnumConstant() and
+ not this.matchesValue(result.getInitializer().getExpr().getValue())
+ )
+ }
+
+ pragma[noinline]
+ private predicate matchesValue(string value) {
+ value = this.getASwitchCase().getExpr().getValue()
+ }
+}
+
+/**
+ * A handler for a 'try' statement.
+ *
+ * This corresponds to a 'catch block' in the source. If the exception
+ * is of a type that can be handled by this 'catch block', then
+ * execution continues with the associated `CatchBlock`. Otherwise,
+ * execution continues with the next `Handler`.
+ *
+ * This has no concrete representation in the source, but makes the
+ * control flow graph easier to use. For example in the following code:
+ * ```
+ * try
+ * {
+ * f();
+ * } catch (std::exception &e) {
+ * g();
+ * }
+ * ```
+ * there is a handler that's associated with the `catch` block and controls
+ * entry to it.
+ */
+class Handler extends Stmt, @stmt_handler {
+ override string toString() { result = "" }
+
+ override string getAPrimaryQlClass() { result = "Handler" }
+
+ /**
+ * Gets the block containing the implementation of this handler.
+ */
+ CatchBlock getBlock() { result = getChild(0) }
+
+ /** Gets the 'try' statement corresponding to this 'catch block'. */
+ TryStmt getTryStmt() { result = getParent() }
+
+ /**
+ * Gets the parameter introduced by this 'catch block', if any.
+ *
+ * For example, `catch(std::exception& e)` introduces a
+ * parameter `e`, whereas `catch(...)` does not introduce a parameter.
+ */
+ Parameter getParameter() { result = getBlock().getParameter() }
+
+ override predicate mayBeImpure() { none() }
+
+ override predicate mayBeGloballyImpure() { none() }
+}
+
+/**
+ * DEPRECATED: Objective-C is no longer supported.
+ * The end of a 'finally' clause.
+ *
+ * This has no concrete representation in the source, but makes the
+ * control flow graph easier to use.
+ */
+deprecated class FinallyEnd extends Stmt {
+ FinallyEnd() { none() }
+
+ override string toString() { result = "" }
+
+ override predicate mayBeImpure() { none() }
+
+ override predicate mayBeGloballyImpure() { none() }
+}
+
+/**
+ * A C/C++ 'try' statement.
+ *
+ * For example, the `try` statement in the following code:
+ * ```
+ * try {
+ * f();
+ * } catch(std::exception &e) {
+ * g();
+ * }
+ * ```
+ */
+class TryStmt extends Stmt, @stmt_try_block {
+ override string getAPrimaryQlClass() { result = "TryStmt" }
+
+ override string toString() { result = "try { ... }" }
+
+ /**
+ * Gets the 'body' statement of this 'try' statement.
+ *
+ * For example, for
+ * ```
+ * try { f(); } catch (...) { g(); }
+ * ```
+ * the result is `{ f(); }`.
+ */
+ Stmt getStmt() { result = this.getChild(0) }
+
+ /**
+ * Gets the `n`th 'catch block' of this 'try' statement.
+ *
+ * For example, for
+ * ```
+ * try { f(); } catch (...) { g(); }
+ * ```
+ * the result of `getCatchClause(0)` is `{ g(); }`.
+ */
+ CatchBlock getCatchClause(int n) { result = this.getChild(n + 1).(Handler).getBlock() }
+
+ /**
+ * Gets a 'catch block' of this 'try' statement.
+ *
+ * For example, for
+ * ```
+ * try { f(); } catch (...) { g(); }
+ * ```
+ * the result is `{ g(); }`.
+ */
+ CatchBlock getACatchClause() { result = this.getCatchClause(_) }
+
+ /**
+ * Gets the number of 'catch block's of this 'try' statement.
+ *
+ * For example, for
+ * ```
+ * try { f(); } catch (...) { g(); }
+ * ```
+ * the result is 1.
+ */
+ int getNumberOfCatchClauses() { result = count(this.getACatchClause()) }
+
+ override predicate mayBeImpure() {
+ this.getStmt().mayBeImpure() or
+ this.getACatchClause().mayBeImpure()
+ }
+
+ override predicate mayBeGloballyImpure() {
+ this.getStmt().mayBeGloballyImpure() or
+ this.getACatchClause().mayBeGloballyImpure()
+ }
+}
+
+/**
+ * A C++ 'function try' statement.
+ *
+ * This is a 'try' statement wrapped around an entire function body,
+ * for example the `try` statement in the following code:
+ * ```
+ * void foo() try {
+ * f();
+ * } catch(...) {
+ * g();
+ * }
+ * ```
+ */
+class FunctionTryStmt extends TryStmt {
+ FunctionTryStmt() { not exists(this.getEnclosingBlock()) }
+
+ override string getAPrimaryQlClass() { result = "FunctionTryStmt" }
+}
+
+/**
+ * A 'catch block', for example the second and third blocks in the following
+ * code:
+ * ```
+ * try {
+ * f();
+ * } catch(std::exception &e) {
+ * g();
+ * } catch(...) {
+ * h();
+ * }
+ * ```
+ */
+class CatchBlock extends BlockStmt {
+ override string getAPrimaryQlClass() { result = "CatchBlock" }
+
+ CatchBlock() { ishandler(underlyingElement(this)) }
+
+ /**
+ * Gets the parameter introduced by this 'catch block', if any.
+ *
+ * For example, `catch(std::exception& e)` introduces a parameter
+ * `e`, whereas `catch(...)` does not introduce a parameter.
+ */
+ Parameter getParameter() { result.getCatchBlock() = this }
+
+ /** Gets the try statement corresponding to this 'catch block'. */
+ TryStmt getTryStmt() { result.getACatchClause() = this }
+}
+
+/**
+ * A C++ 'catch-any block', for example the third block in the following code:
+ * ```
+ * try {
+ * f();
+ * } catch(std::exception &e) {
+ * g();
+ * } catch(...) {
+ * h();
+ * }
+ * ```
+ */
+class CatchAnyBlock extends CatchBlock {
+ CatchAnyBlock() { not exists(this.getParameter()) }
+
+ override string getAPrimaryQlClass() { result = "CatchAnyBlock" }
+}
+
+/**
+ * A structured exception handling 'try' statement, that is, a
+ * `__try __except` or `__try __finally` statement. This is a Microsoft
+ * C/C++ extension.
+ */
+class MicrosoftTryStmt extends Stmt, @stmt_microsoft_try {
+ /** Gets the body statement of this __try statement. */
+ Stmt getStmt() { result = this.getChild(0) }
+}
+
+/**
+ * A structured exception handling 'try except' statement, for example the
+ * `__try` statement in the following code:
+ * ```
+ * __try
+ * {
+ * f();
+ * } __except(myExceptionFilter()) {
+ * g()
+ * }
+ * ```
+ * This is a Microsoft C/C++ extension.
+ */
+class MicrosoftTryExceptStmt extends MicrosoftTryStmt {
+ MicrosoftTryExceptStmt() { getChild(1) instanceof Expr }
+
+ override string toString() { result = "__try { ... } __except( ... ) { ... }" }
+
+ /** Gets the expression guarding the `__except` statement. */
+ Expr getCondition() { result = getChild(1) }
+
+ /** Gets the `__except` statement (usually a `BlockStmt`). */
+ Stmt getExcept() { result = getChild(2) }
+
+ override string getAPrimaryQlClass() { result = "MicrosoftTryExceptStmt" }
+}
+
+/**
+ * A structured exception handling 'try finally' statement, for example the
+ * `__try` statement in the following code:
+ * ```
+ * __try
+ * {
+ * f();
+ * } __finally {
+ * g()
+ * }
+ * ```
+ * This is a Microsoft C/C++ extension.
+ */
+class MicrosoftTryFinallyStmt extends MicrosoftTryStmt {
+ MicrosoftTryFinallyStmt() { not getChild(1) instanceof Expr }
+
+ override string toString() { result = "__try { ... } __finally { ... }" }
+
+ /** Gets the `__finally` statement (usually a `BlockStmt`). */
+ Stmt getFinally() { result = getChild(1) }
+
+ override string getAPrimaryQlClass() { result = "MicrosoftTryFinallyStmt" }
+}
+
+/**
+ * A C/C++ 'declaration' statement.
+ *
+ * For example, the following statement is a declaration statement:
+ * ```
+ * int i, j;
+ * ```
+ */
+class DeclStmt extends Stmt, @stmt_decl {
+ override string getAPrimaryQlClass() { result = "DeclStmt" }
+
+ /**
+ * Gets the `i`th declaration entry declared by this 'declaration' statement.
+ *
+ * For example, for
+ * ```
+ * int i, j;
+ * ```
+ * the result of `getDeclarationEntry(0)` is `i`.
+ */
+ DeclarationEntry getDeclarationEntry(int i) {
+ stmt_decl_entry_bind(underlyingElement(this), i, unresolveElement(result))
+ }
+
+ /**
+ * Gets a declaration entry declared by this 'declaration' statement.
+ *
+ * For example, for
+ * ```
+ * int i, j;
+ * ```
+ * the results are `i` and `j`.
+ */
+ DeclarationEntry getADeclarationEntry() { result = this.getDeclarationEntry(_) }
+
+ /**
+ * Gets the number of declarations declared by this 'declaration' statement.
+ *
+ * For example, for
+ * ```
+ * int i, j;
+ * ```
+ * the result of `getNumDeclarations()` is `2`.
+ */
+ int getNumDeclarations() { result = count(this.getADeclaration()) }
+
+ /**
+ * Gets the `i`th declaration declared by this 'declaration' statement.
+ *
+ * For example, for
+ * ```
+ * int i, j;
+ * ```
+ * the result of `getDeclaration(0)` is `i`.
+ */
+ Declaration getDeclaration(int i) {
+ stmt_decl_bind(underlyingElement(this), i, unresolveElement(result))
+ }
+
+ /**
+ * Gets a declaration declared by this 'declaration' statement.
+ *
+ * For example, for
+ * ```
+ * int i, j;
+ * ```
+ * the results are `i` and `j`.
+ */
+ Declaration getADeclaration() { result = this.getDeclaration(_) }
+
+ override string toString() { result = "declaration" }
+
+ override predicate mayBeImpure() {
+ this.getADeclaration().(LocalVariable).getInitializer().getExpr().mayBeImpure()
+ }
+
+ override predicate mayBeGloballyImpure() {
+ this.getADeclaration().(LocalVariable).getInitializer().getExpr().mayBeGloballyImpure()
+ }
+}
+
+/**
+ * A C/C++ 'empty' statement.
+ *
+ * For example, the following statement is an empty statement:
+ * ```
+ * ;
+ * ```
+ */
+class EmptyStmt extends Stmt, @stmt_empty {
+ override string getAPrimaryQlClass() { result = "EmptyStmt" }
+
+ override string toString() { result = ";" }
+
+ override predicate mayBeImpure() { none() }
+
+ override predicate mayBeGloballyImpure() { none() }
+}
+
+/**
+ * A C/C++ 'asm' statement.
+ *
+ * For example, the `__asm__` statement in the following code:
+ * ```
+ * __asm__("movb %bh (%eax)");
+ * ```
+ */
+class AsmStmt extends Stmt, @stmt_asm {
+ override string toString() { result = "asm statement" }
+
+ override string getAPrimaryQlClass() { result = "AsmStmt" }
+}
+
+/**
+ * A C99 statement which computes the size of a single dimension of a
+ * variable length array. For example the variable length array dimension
+ * (`x`) in the following code:
+ * ```
+ * int myArray[x];
+ * ```
+ *
+ * Each `VlaDeclStmt` is preceded by one `VlaDimensionStmt` for each
+ * variable length dimension of the array.
+ */
+class VlaDimensionStmt extends Stmt, @stmt_set_vla_size {
+ override string toString() { result = "VLA dimension size" }
+
+ override string getAPrimaryQlClass() { result = "VlaDimensionStmt" }
+
+ /** Gets the expression which gives the size. */
+ Expr getDimensionExpr() { result = this.getChild(0) }
+}
+
+/**
+ * A C99 statement which declares a variable length array. For example
+ * the variable length array declaration in the following code:
+ * ```
+ * int myArray[x];
+ * ```
+ *
+ * Each `VlaDeclStmt` is preceded by one `VlaDimensionStmt` for each
+ * variable length dimension of the array.
+ */
+class VlaDeclStmt extends Stmt, @stmt_vla_decl {
+ override string toString() { result = "VLA declaration" }
+
+ override string getAPrimaryQlClass() { result = "VlaDeclStmt" }
+
+ /**
+ * Gets the number of VLA dimension statements in this VLA
+ * declaration statement.
+ */
+ int getNumberOfVlaDimensionStmts() {
+ exists(BlockStmt b, int j |
+ this = b.getStmt(j) and
+ result =
+ j - 1 -
+ max(int i |
+ i in [0 .. j - 1] and
+ not b.getStmt(i) instanceof VlaDimensionStmt
+ )
+ )
+ }
+
+ /**
+ * Gets the `i`th VLA dimension statement in this VLA
+ * declaration statement.
+ */
+ VlaDimensionStmt getVlaDimensionStmt(int i) {
+ i in [0 .. this.getNumberOfVlaDimensionStmts() - 1] and
+ exists(BlockStmt b, int j |
+ this = b.getStmt(j) and
+ result = b.getStmt(j - this.getNumberOfVlaDimensionStmts() + i)
+ )
+ }
+
+ /**
+ * Gets the type that this VLA declaration statement relates to,
+ * if any.
+ */
+ Type getType() { type_vla(unresolveElement(result), underlyingElement(this)) }
+
+ /**
+ * Gets the variable that this VLA declaration statement relates to,
+ * if any.
+ */
+ Variable getVariable() { variable_vla(unresolveElement(result), underlyingElement(this)) }
+}
diff --git a/repo-tests/codeql/cpp/ql/lib/semmle/code/cpp/valuenumbering/GlobalValueNumbering.qll b/repo-tests/codeql/cpp/ql/lib/semmle/code/cpp/valuenumbering/GlobalValueNumbering.qll
new file mode 100644
index 00000000000..cb28edc07b9
--- /dev/null
+++ b/repo-tests/codeql/cpp/ql/lib/semmle/code/cpp/valuenumbering/GlobalValueNumbering.qll
@@ -0,0 +1 @@
+import semmle.code.cpp.ir.internal.ASTValueNumbering
diff --git a/repo-tests/codeql/cpp/ql/lib/semmle/code/cpp/valuenumbering/GlobalValueNumberingImpl.qll b/repo-tests/codeql/cpp/ql/lib/semmle/code/cpp/valuenumbering/GlobalValueNumberingImpl.qll
new file mode 100644
index 00000000000..f9231e24725
--- /dev/null
+++ b/repo-tests/codeql/cpp/ql/lib/semmle/code/cpp/valuenumbering/GlobalValueNumberingImpl.qll
@@ -0,0 +1,608 @@
+/**
+ * Provides an implementation of Global Value Numbering.
+ * See https://en.wikipedia.org/wiki/Global_value_numbering
+ *
+ * The predicate `globalValueNumber` converts an expression into a `GVN`,
+ * which is an abstract type representing the value of the expression. If
+ * two expressions have the same `GVN` then they compute the same value.
+ * For example:
+ *
+ * ```
+ * void f(int x, int y) {
+ * g(x+y, x+y);
+ * }
+ * ```
+ *
+ * In this example, both arguments in the call to `g` compute the same value,
+ * so both arguments have the same `GVN`. In other words, we can find
+ * this call with the following query:
+ *
+ * ```
+ * from FunctionCall call, GVN v
+ * where v = globalValueNumber(call.getArgument(0))
+ * and v = globalValueNumber(call.getArgument(1))
+ * select call
+ * ```
+ *
+ * The analysis is conservative, so two expressions might have different
+ * `GVN`s even though the actually always compute the same value. The most
+ * common reason for this is that the analysis cannot prove that there
+ * are no side-effects that might cause the computed value to change.
+ */
+
+/*
+ * Note to developers: the correctness of this module depends on the
+ * definitions of GVN, globalValueNumber, and analyzableExpr being kept in
+ * sync with each other. If you change this module then make sure that the
+ * change is symmetric across all three.
+ */
+
+import cpp
+private import semmle.code.cpp.controlflow.SSA
+
+/**
+ * Holds if the result is a control flow node that might change the
+ * value of any global variable. This is used in the implementation
+ * of `GVN_OtherVariable`, because we need to be quite conservative when
+ * we assign a value number to a global variable. For example:
+ *
+ * ```
+ * x = g+1;
+ * dosomething();
+ * y = g+1;
+ * ```
+ *
+ * It is not safe to assign the same value number to both instances
+ * of `g+1` in this example, because the call to `dosomething` might
+ * change the value of `g`.
+ */
+private ControlFlowNode nodeWithPossibleSideEffect() {
+ result instanceof Call
+ or
+ // If the lhs of an assignment is not analyzable by SSA, then
+ // we need to treat the assignment as having a possible side-effect.
+ result instanceof Assignment and not result instanceof SsaDefinition
+ or
+ result instanceof CrementOperation and not result instanceof SsaDefinition
+ or
+ exists(LocalVariable v |
+ result = v.getInitializer().getExpr() and not result instanceof SsaDefinition
+ )
+ or
+ result instanceof AsmStmt
+}
+
+/**
+ * Gets the entry node of the control flow graph of which `node` is a
+ * member.
+ */
+cached
+private ControlFlowNode getControlFlowEntry(ControlFlowNode node) {
+ result = node.getControlFlowScope().getEntryPoint() and
+ result.getASuccessor*() = node
+}
+
+/**
+ * Holds if there is a control flow edge from `src` to `dst` or
+ * if `dst` is an expression with a possible side-effect. The idea
+ * is to treat side effects as entry points in the control flow
+ * graph so that we can use the dominator tree to find the most recent
+ * side-effect.
+ */
+private predicate sideEffectCFG(ControlFlowNode src, ControlFlowNode dst) {
+ src.getASuccessor() = dst
+ or
+ // Add an edge from the entry point to any node that might have a side
+ // effect.
+ dst = nodeWithPossibleSideEffect() and
+ src = getControlFlowEntry(dst)
+}
+
+/**
+ * Holds if `dominator` is the immediate dominator of `node` in
+ * the side-effect CFG.
+ */
+private predicate iDomEffect(ControlFlowNode dominator, ControlFlowNode node) =
+ idominance(functionEntry/1, sideEffectCFG/2)(_, dominator, node)
+
+/**
+ * Gets the most recent side effect. To be more precise, `result` is a
+ * dominator of `node` and no side-effects can occur between `result` and
+ * `node`.
+ *
+ * `sideEffectCFG` has an edge from the function entry to every node with a
+ * side-effect. This means that every node with a side-effect has the
+ * function entry as its immediate dominator. So if node `x` dominates node
+ * `y` then there can be no side effects between `x` and `y` unless `x` is
+ * the function entry. So the optimal choice for `result` has the function
+ * entry as its immediate dominator.
+ *
+ * Example:
+ *
+ * ```
+ * 000: int f(int a, int b, int *p) {
+ * 001: int r = 0;
+ * 002: if (a) {
+ * 003: if (b) {
+ * 004: sideEffect1();
+ * 005: }
+ * 006: } else {
+ * 007: sideEffect2();
+ * 008: }
+ * 009: if (a) {
+ * 010: r++; // Not a side-effect, because r is an SSA variable.
+ * 011: }
+ * 012: if (b) {
+ * 013: r++; // Not a side-effect, because r is an SSA variable.
+ * 014: }
+ * 015: return *p;
+ * 016: }
+ * ```
+ *
+ * Suppose we want to find the most recent side-effect for the dereference
+ * of `p` on line 015. The `sideEffectCFG` has an edge from the function
+ * entry (line 000) to the side effects at lines 004 and 007. Therefore,
+ * the immediate dominator tree looks like this:
+ *
+ * 000 - 001 - 002 - 003
+ * - 004
+ * - 007
+ * - 009 - 010
+ * - 012 - 013
+ * - 015
+ *
+ * The immediate dominator path to line 015 is 000 - 009 - 012 - 015.
+ * Therefore, the most recent side effect for line 015 is line 009.
+ */
+cached
+private ControlFlowNode mostRecentSideEffect(ControlFlowNode node) {
+ exists(ControlFlowNode entry |
+ functionEntry(entry) and
+ iDomEffect(entry, result) and
+ iDomEffect*(result, node)
+ )
+}
+
+/** Used to represent the "global value number" of an expression. */
+cached
+private newtype GVNBase =
+ GVN_IntConst(int val, Type t) { mk_IntConst(val, t, _) } or
+ GVN_FloatConst(float val, Type t) { mk_FloatConst(val, t, _) } or
+ // If the local variable does not have a defining value, then
+ // we use the SsaDefinition as its global value number.
+ GVN_UndefinedStackVariable(StackVariable x, SsaDefinition def) {
+ mk_UndefinedStackVariable(x, def, _)
+ } or
+ // Variables with no SSA information. As a crude (but safe)
+ // approximation, we use `mostRecentSideEffect` to compute a definition
+ // location for the variable. This ensures that two instances of the same
+ // global variable will only get the same value number if they are
+ // guaranteed to have the same value.
+ GVN_OtherVariable(Variable x, ControlFlowNode dominator) { mk_OtherVariable(x, dominator, _) } or
+ GVN_FieldAccess(GVN s, Field f) {
+ mk_DotFieldAccess(s, f, _) or
+ mk_PointerFieldAccess_with_deref(s, f, _) or
+ mk_ImplicitThisFieldAccess_with_deref(s, f, _)
+ } or
+ // Dereference a pointer. The value might have changed since the last
+ // time the pointer was dereferenced, so we need to include a definition
+ // location. As a crude (but safe) approximation, we use
+ // `mostRecentSideEffect` to compute a definition location.
+ GVN_Deref(GVN p, ControlFlowNode dominator) {
+ mk_Deref(p, dominator, _) or
+ mk_PointerFieldAccess(p, _, dominator, _) or
+ mk_ImplicitThisFieldAccess_with_qualifier(p, _, dominator, _)
+ } or
+ GVN_ThisExpr(Function fcn) {
+ mk_ThisExpr(fcn, _) or
+ mk_ImplicitThisFieldAccess(fcn, _, _, _)
+ } or
+ GVN_Conversion(Type t, GVN child) { mk_Conversion(t, child, _) } or
+ GVN_BinaryOp(GVN lhs, GVN rhs, string opname) { mk_BinaryOp(lhs, rhs, opname, _) } or
+ GVN_UnaryOp(GVN child, string opname) { mk_UnaryOp(child, opname, _) } or
+ GVN_ArrayAccess(GVN x, GVN i, ControlFlowNode dominator) { mk_ArrayAccess(x, i, dominator, _) } or
+ // Any expression that is not handled by the cases above is
+ // given a unique number based on the expression itself.
+ GVN_Unanalyzable(Expr e) { not analyzableExpr(e) }
+
+/**
+ * A Global Value Number. A GVN is an abstract representation of the value
+ * computed by an expression. The relationship between `Expr` and `GVN` is
+ * many-to-one: every `Expr` has exactly one `GVN`, but multiple
+ * expressions can have the same `GVN`. If two expressions have the same
+ * `GVN`, it means that they compute the same value at run time. The `GVN`
+ * is an opaque value, so you cannot deduce what the run-time value of an
+ * expression will be from its `GVN`. The only use for the `GVN` of an
+ * expression is to find other expressions that compute the same value.
+ * Use the predicate `globalValueNumber` to get the `GVN` for an `Expr`.
+ *
+ * Note: `GVN` has `toString` and `getLocation` methods, so that it can be
+ * displayed in a results list. These work by picking an arbitrary
+ * expression with this `GVN` and using its `toString` and `getLocation`
+ * methods.
+ */
+class GVN extends GVNBase {
+ GVN() { this instanceof GVNBase }
+
+ /** Gets an expression that has this GVN. */
+ Expr getAnExpr() { this = globalValueNumber(result) }
+
+ /** Gets the kind of the GVN. This can be useful for debugging. */
+ string getKind() {
+ if this instanceof GVN_IntConst
+ then result = "IntConst"
+ else
+ if this instanceof GVN_FloatConst
+ then result = "FloatConst"
+ else
+ if this instanceof GVN_UndefinedStackVariable
+ then result = "UndefinedStackVariable"
+ else
+ if this instanceof GVN_OtherVariable
+ then result = "OtherVariable"
+ else
+ if this instanceof GVN_FieldAccess
+ then result = "FieldAccess"
+ else
+ if this instanceof GVN_Deref
+ then result = "Deref"
+ else
+ if this instanceof GVN_ThisExpr
+ then result = "ThisExpr"
+ else
+ if this instanceof GVN_Conversion
+ then result = "Conversion"
+ else
+ if this instanceof GVN_BinaryOp
+ then result = "BinaryOp"
+ else
+ if this instanceof GVN_UnaryOp
+ then result = "UnaryOp"
+ else
+ if this instanceof GVN_ArrayAccess
+ then result = "ArrayAccess"
+ else
+ if this instanceof GVN_Unanalyzable
+ then result = "Unanalyzable"
+ else result = "error"
+ }
+
+ /**
+ * Gets an example of an expression with this GVN.
+ * This is useful for things like implementing toString().
+ */
+ private Expr exampleExpr() {
+ // Pick the expression with the minimum source location string. This is
+ // just an arbitrary way to pick an expression with this `GVN`.
+ result = min(Expr e | this = globalValueNumber(e) | e order by e.getLocation().toString())
+ }
+
+ /** Gets a textual representation of this element. */
+ string toString() { result = exampleExpr().toString() }
+
+ /** Gets the primary location of this element. */
+ Location getLocation() { result = exampleExpr().getLocation() }
+}
+
+private predicate analyzableIntConst(Expr e) {
+ strictcount(e.getValue().toInt()) = 1 and
+ strictcount(e.getUnspecifiedType()) = 1
+}
+
+private predicate mk_IntConst(int val, Type t, Expr e) {
+ analyzableIntConst(e) and
+ val = e.getValue().toInt() and
+ t = e.getUnspecifiedType()
+}
+
+private predicate analyzableFloatConst(Expr e) {
+ strictcount(e.getValue().toFloat()) = 1 and
+ strictcount(e.getUnspecifiedType()) = 1 and
+ not analyzableIntConst(e)
+}
+
+private predicate mk_FloatConst(float val, Type t, Expr e) {
+ analyzableFloatConst(e) and
+ val = e.getValue().toFloat() and
+ t = e.getUnspecifiedType()
+}
+
+private predicate analyzableStackVariable(VariableAccess access) {
+ strictcount(SsaDefinition def | def.getAUse(_) = access | def) = 1 and
+ strictcount(SsaDefinition def, Variable v | def.getAUse(v) = access | v) = 1 and
+ count(SsaDefinition def, Variable v |
+ def.getAUse(v) = access
+ |
+ def.getDefiningValue(v).getFullyConverted()
+ ) <= 1 and
+ not analyzableConst(access)
+}
+
+// Note: this predicate only has a result if the access has no
+// defining value. If there is a defining value, then there is no
+// need to generate a fresh `GVN` for the access because `globalValueNumber`
+// will follow the chain and use the GVN of the defining value.
+private predicate mk_UndefinedStackVariable(
+ StackVariable x, SsaDefinition def, VariableAccess access
+) {
+ analyzableStackVariable(access) and
+ access = def.getAUse(x) and
+ not exists(def.getDefiningValue(x))
+}
+
+private predicate analyzableDotFieldAccess(DotFieldAccess access) {
+ strictcount(access.getTarget()) = 1 and
+ strictcount(access.getQualifier().getFullyConverted()) = 1 and
+ not analyzableConst(access)
+}
+
+private predicate mk_DotFieldAccess(GVN qualifier, Field target, DotFieldAccess access) {
+ analyzableDotFieldAccess(access) and
+ target = access.getTarget() and
+ qualifier = globalValueNumber(access.getQualifier().getFullyConverted())
+}
+
+private predicate analyzablePointerFieldAccess(PointerFieldAccess access) {
+ strictcount(mostRecentSideEffect(access)) = 1 and
+ strictcount(access.getTarget()) = 1 and
+ strictcount(access.getQualifier().getFullyConverted()) = 1 and
+ not analyzableConst(access)
+}
+
+private predicate mk_PointerFieldAccess(
+ GVN qualifier, Field target, ControlFlowNode dominator, PointerFieldAccess access
+) {
+ analyzablePointerFieldAccess(access) and
+ dominator = mostRecentSideEffect(access) and
+ target = access.getTarget() and
+ qualifier = globalValueNumber(access.getQualifier().getFullyConverted())
+}
+
+/**
+ * `obj->field` is equivalent to `(*obj).field`, so we need to wrap an
+ * extra `GVN_Deref` around the qualifier.
+ */
+private predicate mk_PointerFieldAccess_with_deref(
+ GVN new_qualifier, Field target, PointerFieldAccess access
+) {
+ exists(GVN qualifier, ControlFlowNode dominator |
+ mk_PointerFieldAccess(qualifier, target, dominator, access) and
+ new_qualifier = GVN_Deref(qualifier, dominator)
+ )
+}
+
+private predicate analyzableImplicitThisFieldAccess(ImplicitThisFieldAccess access) {
+ strictcount(mostRecentSideEffect(access)) = 1 and
+ strictcount(access.getTarget()) = 1 and
+ strictcount(access.getEnclosingFunction()) = 1 and
+ not analyzableConst(access)
+}
+
+private predicate mk_ImplicitThisFieldAccess(
+ Function fcn, Field target, ControlFlowNode dominator, ImplicitThisFieldAccess access
+) {
+ analyzableImplicitThisFieldAccess(access) and
+ dominator = mostRecentSideEffect(access) and
+ target = access.getTarget() and
+ fcn = access.getEnclosingFunction()
+}
+
+private predicate mk_ImplicitThisFieldAccess_with_qualifier(
+ GVN qualifier, Field target, ControlFlowNode dominator, ImplicitThisFieldAccess access
+) {
+ exists(Function fcn |
+ mk_ImplicitThisFieldAccess(fcn, target, dominator, access) and
+ qualifier = GVN_ThisExpr(fcn)
+ )
+}
+
+private predicate mk_ImplicitThisFieldAccess_with_deref(
+ GVN new_qualifier, Field target, ImplicitThisFieldAccess access
+) {
+ exists(GVN qualifier, ControlFlowNode dominator |
+ mk_ImplicitThisFieldAccess_with_qualifier(qualifier, target, dominator, access) and
+ new_qualifier = GVN_Deref(qualifier, dominator)
+ )
+}
+
+/**
+ * Holds if `access` is an access of a variable that does
+ * not have SSA information. (For example, because the variable
+ * is global.)
+ */
+private predicate analyzableOtherVariable(VariableAccess access) {
+ not access instanceof FieldAccess and
+ not exists(SsaDefinition def | access = def.getAUse(_)) and
+ strictcount(access.getTarget()) = 1 and
+ strictcount(mostRecentSideEffect(access)) = 1 and
+ not analyzableConst(access)
+}
+
+private predicate mk_OtherVariable(Variable x, ControlFlowNode dominator, VariableAccess access) {
+ analyzableOtherVariable(access) and
+ x = access.getTarget() and
+ dominator = mostRecentSideEffect(access)
+}
+
+private predicate analyzableConversion(Conversion conv) {
+ strictcount(conv.getUnspecifiedType()) = 1 and
+ strictcount(conv.getExpr()) = 1 and
+ not analyzableConst(conv)
+}
+
+private predicate mk_Conversion(Type t, GVN child, Conversion conv) {
+ analyzableConversion(conv) and
+ t = conv.getUnspecifiedType() and
+ child = globalValueNumber(conv.getExpr())
+}
+
+private predicate analyzableBinaryOp(BinaryOperation op) {
+ op.isPure() and
+ strictcount(op.getLeftOperand().getFullyConverted()) = 1 and
+ strictcount(op.getRightOperand().getFullyConverted()) = 1 and
+ strictcount(op.getOperator()) = 1 and
+ not analyzableConst(op)
+}
+
+private predicate mk_BinaryOp(GVN lhs, GVN rhs, string opname, BinaryOperation op) {
+ analyzableBinaryOp(op) and
+ lhs = globalValueNumber(op.getLeftOperand().getFullyConverted()) and
+ rhs = globalValueNumber(op.getRightOperand().getFullyConverted()) and
+ opname = op.getOperator()
+}
+
+private predicate analyzableUnaryOp(UnaryOperation op) {
+ not op instanceof PointerDereferenceExpr and
+ op.isPure() and
+ strictcount(op.getOperand().getFullyConverted()) = 1 and
+ strictcount(op.getOperator()) = 1 and
+ not analyzableConst(op)
+}
+
+private predicate mk_UnaryOp(GVN child, string opname, UnaryOperation op) {
+ analyzableUnaryOp(op) and
+ child = globalValueNumber(op.getOperand().getFullyConverted()) and
+ opname = op.getOperator()
+}
+
+private predicate analyzableThisExpr(ThisExpr thisExpr) {
+ strictcount(thisExpr.getEnclosingFunction()) = 1 and
+ not analyzableConst(thisExpr)
+}
+
+private predicate mk_ThisExpr(Function fcn, ThisExpr thisExpr) {
+ analyzableThisExpr(thisExpr) and
+ fcn = thisExpr.getEnclosingFunction()
+}
+
+private predicate analyzableArrayAccess(ArrayExpr ae) {
+ strictcount(ae.getArrayBase().getFullyConverted()) = 1 and
+ strictcount(ae.getArrayOffset().getFullyConverted()) = 1 and
+ strictcount(mostRecentSideEffect(ae)) = 1 and
+ not analyzableConst(ae)
+}
+
+private predicate mk_ArrayAccess(GVN base, GVN offset, ControlFlowNode dominator, ArrayExpr ae) {
+ analyzableArrayAccess(ae) and
+ base = globalValueNumber(ae.getArrayBase().getFullyConverted()) and
+ offset = globalValueNumber(ae.getArrayOffset().getFullyConverted()) and
+ dominator = mostRecentSideEffect(ae)
+}
+
+private predicate analyzablePointerDereferenceExpr(PointerDereferenceExpr deref) {
+ strictcount(deref.getOperand().getFullyConverted()) = 1 and
+ strictcount(mostRecentSideEffect(deref)) = 1 and
+ not analyzableConst(deref)
+}
+
+private predicate mk_Deref(GVN p, ControlFlowNode dominator, PointerDereferenceExpr deref) {
+ analyzablePointerDereferenceExpr(deref) and
+ p = globalValueNumber(deref.getOperand().getFullyConverted()) and
+ dominator = mostRecentSideEffect(deref)
+}
+
+/** Gets the global value number of expression `e`. */
+cached
+GVN globalValueNumber(Expr e) {
+ exists(int val, Type t |
+ mk_IntConst(val, t, e) and
+ result = GVN_IntConst(val, t)
+ )
+ or
+ exists(float val, Type t |
+ mk_FloatConst(val, t, e) and
+ result = GVN_FloatConst(val, t)
+ )
+ or
+ // Local variable with a defining value.
+ exists(StackVariable x, SsaDefinition def |
+ analyzableStackVariable(e) and
+ e = def.getAUse(x) and
+ result = globalValueNumber(def.getDefiningValue(x).getFullyConverted())
+ )
+ or
+ // Local variable without a defining value.
+ exists(StackVariable x, SsaDefinition def |
+ mk_UndefinedStackVariable(x, def, e) and
+ result = GVN_UndefinedStackVariable(x, def)
+ )
+ or
+ // Variable with no SSA information.
+ exists(Variable x, ControlFlowNode dominator |
+ mk_OtherVariable(x, dominator, e) and
+ result = GVN_OtherVariable(x, dominator)
+ )
+ or
+ exists(GVN qualifier, Field target |
+ mk_DotFieldAccess(qualifier, target, e) and
+ result = GVN_FieldAccess(qualifier, target)
+ )
+ or
+ exists(GVN qualifier, Field target |
+ mk_PointerFieldAccess_with_deref(qualifier, target, e) and
+ result = GVN_FieldAccess(qualifier, target)
+ )
+ or
+ exists(GVN qualifier, Field target |
+ mk_ImplicitThisFieldAccess_with_deref(qualifier, target, e) and
+ result = GVN_FieldAccess(qualifier, target)
+ )
+ or
+ exists(Function fcn |
+ mk_ThisExpr(fcn, e) and
+ result = GVN_ThisExpr(fcn)
+ )
+ or
+ exists(Type t, GVN child |
+ mk_Conversion(t, child, e) and
+ result = GVN_Conversion(t, child)
+ )
+ or
+ exists(GVN lhs, GVN rhs, string opname |
+ mk_BinaryOp(lhs, rhs, opname, e) and
+ result = GVN_BinaryOp(lhs, rhs, opname)
+ )
+ or
+ exists(GVN child, string opname |
+ mk_UnaryOp(child, opname, e) and
+ result = GVN_UnaryOp(child, opname)
+ )
+ or
+ exists(GVN x, GVN i, ControlFlowNode dominator |
+ mk_ArrayAccess(x, i, dominator, e) and
+ result = GVN_ArrayAccess(x, i, dominator)
+ )
+ or
+ exists(GVN p, ControlFlowNode dominator |
+ mk_Deref(p, dominator, e) and
+ result = GVN_Deref(p, dominator)
+ )
+ or
+ not analyzableExpr(e) and result = GVN_Unanalyzable(e)
+}
+
+private predicate analyzableConst(Expr e) {
+ analyzableIntConst(e) or
+ analyzableFloatConst(e)
+}
+
+/**
+ * Holds if the expression is explicitly handled by `globalValueNumber`.
+ * Unanalyzable expressions still need to be given a global value number,
+ * but it will be a unique number that is not shared with any other
+ * expression.
+ */
+private predicate analyzableExpr(Expr e) {
+ analyzableConst(e) or
+ analyzableStackVariable(e) or
+ analyzableDotFieldAccess(e) or
+ analyzablePointerFieldAccess(e) or
+ analyzableImplicitThisFieldAccess(e) or
+ analyzableOtherVariable(e) or
+ analyzableConversion(e) or
+ analyzableBinaryOp(e) or
+ analyzableUnaryOp(e) or
+ analyzableThisExpr(e) or
+ analyzableArrayAccess(e) or
+ analyzablePointerDereferenceExpr(e)
+}
diff --git a/repo-tests/codeql/cpp/ql/lib/semmle/code/cpp/valuenumbering/HashCons.qll b/repo-tests/codeql/cpp/ql/lib/semmle/code/cpp/valuenumbering/HashCons.qll
new file mode 100644
index 00000000000..c16389ce9bf
--- /dev/null
+++ b/repo-tests/codeql/cpp/ql/lib/semmle/code/cpp/valuenumbering/HashCons.qll
@@ -0,0 +1,1121 @@
+/**
+ * Provides an implementation of Hash consing.
+ * See https://en.wikipedia.org/wiki/Hash_consing
+ *
+ * The predicate `hashCons` converts an expression into a `HashCons`, which is an
+ * abstract type presenting the hash-cons of the expression. If two
+ * expressions have the same `HashCons` then they are structurally equal.
+ *
+ * Important note: this library ignores the possibility that the value of
+ * an expression might change between one occurrence and the next. For
+ * example:
+ *
+ * ```
+ * x = a+b;
+ * a++;
+ * y = a+b;
+ * ```
+ *
+ * In this example, both copies of the expression `a+b` will hash-cons to
+ * the same value, even though the value of `a` has changed. This is the
+ * intended behavior of this library. If you care about the value of the
+ * expression being the same, then you should use the GlobalValueNumbering
+ * library instead.
+ *
+ * To determine if the expression `x` is structurally equal to the
+ * expression `y`, use the library like this:
+ *
+ * ```
+ * hashCons(x) = hashCons(y)
+ * ```
+ */
+
+/*
+ * Note to developers: the correctness of this module depends on the
+ * definitions of HC, hashCons, and analyzableExpr being kept in sync with
+ * each other. If you change this module then make sure that the change is
+ * symmetric across all three.
+ */
+
+import cpp
+
+/** Used to represent the hash-cons of an expression. */
+cached
+private newtype HCBase =
+ HC_IntLiteral(int val, Type t) { mk_IntLiteral(val, t, _) } or
+ HC_EnumConstantAccess(EnumConstant val, Type t) { mk_EnumConstantAccess(val, t, _) } or
+ HC_FloatLiteral(float val, Type t) { mk_FloatLiteral(val, t, _) } or
+ HC_StringLiteral(string val, Type t) { mk_StringLiteral(val, t, _) } or
+ HC_Nullptr() { mk_Nullptr(_) } or
+ HC_Variable(Variable x) { mk_Variable(x, _) } or
+ HC_FieldAccess(HashCons s, Field f) { mk_DotFieldAccess(s, f, _) } or
+ HC_Deref(HashCons p) { mk_Deref(p, _) } or
+ HC_PointerFieldAccess(HashCons qual, Field target) { mk_PointerFieldAccess(qual, target, _) } or
+ HC_ThisExpr(Function fcn) { mk_ThisExpr(fcn, _) } or
+ HC_ImplicitThisFieldAccess(Function fcn, Field target) {
+ mk_ImplicitThisFieldAccess(fcn, target, _)
+ } or
+ HC_Conversion(Type t, HashCons child) { mk_Conversion(t, child, _) } or
+ HC_BinaryOp(HashCons lhs, HashCons rhs, string opname) { mk_BinaryOp(lhs, rhs, opname, _) } or
+ HC_UnaryOp(HashCons child, string opname) { mk_UnaryOp(child, opname, _) } or
+ HC_ArrayAccess(HashCons x, HashCons i) { mk_ArrayAccess(x, i, _) } or
+ HC_NonmemberFunctionCall(Function fcn, HC_Args args) { mk_NonmemberFunctionCall(fcn, args, _) } or
+ HC_ExprCall(HashCons hc, HC_Args args) { mk_ExprCall(hc, args, _) } or
+ HC_MemberFunctionCall(Function trg, HashCons qual, HC_Args args) {
+ mk_MemberFunctionCall(trg, qual, args, _)
+ } or
+ // Hack to get around argument 0 of allocator calls being an error expression
+ HC_AllocatorArgZero(Type t) { mk_AllocatorArgZero(t, _) } or
+ HC_NewExpr(Type t, HC_Alloc alloc, HC_Init init) { mk_NewExpr(t, alloc, init, _) } or
+ HC_NewArrayExpr(Type t, HC_Alloc alloc, HC_Extent extent, HC_Init init) {
+ mk_NewArrayExpr(t, alloc, extent, init, _)
+ } or
+ HC_SizeofType(Type t) { mk_SizeofType(t, _) } or
+ HC_SizeofExpr(HashCons child) { mk_SizeofExpr(child, _) } or
+ HC_AlignofType(Type t) { mk_AlignofType(t, _) } or
+ HC_AlignofExpr(HashCons child) { mk_AlignofExpr(child, _) } or
+ HC_UuidofOperator(Type t) { mk_UuidofOperator(t, _) } or
+ HC_TypeidType(Type t) { mk_TypeidType(t, _) } or
+ HC_TypeidExpr(HashCons child) { mk_TypeidExpr(child, _) } or
+ HC_ClassAggregateLiteral(Class c, HC_Fields hcf) { mk_ClassAggregateLiteral(c, hcf, _) } or
+ HC_ArrayAggregateLiteral(Type t, HC_Array hca) { mk_ArrayAggregateLiteral(t, hca, _) } or
+ HC_DeleteExpr(HashCons child) { mk_DeleteExpr(child, _) } or
+ HC_DeleteArrayExpr(HashCons child) { mk_DeleteArrayExpr(child, _) } or
+ HC_ThrowExpr(HashCons child) { mk_ThrowExpr(child, _) } or
+ HC_ReThrowExpr() or
+ HC_ConditionalExpr(HashCons cond, HashCons trueHC, HashCons falseHC) {
+ mk_ConditionalExpr(cond, trueHC, falseHC, _)
+ } or
+ HC_NoExceptExpr(HashCons child) { mk_NoExceptExpr(child, _) } or
+ // Any expression that is not handled by the cases above is
+ // given a unique number based on the expression itself.
+ HC_Unanalyzable(Expr e) { not analyzableExpr(e, _) }
+
+/** Used to implement optional init on `new` expressions */
+private newtype HC_Init =
+ HC_NoInit() or
+ HC_HasInit(HashCons hc) { mk_HasInit(hc, _) }
+
+/**
+ * Used to implement optional allocator call on `new` expressions
+ */
+private newtype HC_Alloc =
+ HC_NoAlloc() or
+ HC_HasAlloc(HashCons hc) { mk_HasAlloc(hc, _) }
+
+/**
+ * Used to implement optional extent expression on `new[]` exprtessions
+ */
+private newtype HC_Extent =
+ HC_NoExtent() or
+ HC_HasExtent(HashCons hc) { mk_HasExtent(hc, _) }
+
+/** Used to implement hash-consing of argument lists */
+private newtype HC_Args =
+ HC_EmptyArgs() { any() } or
+ HC_ArgCons(HashCons hc, int i, HC_Args list) { mk_ArgCons(hc, i, list, _) }
+
+/**
+ * Used to implement hash-consing of struct initizializers.
+ */
+private newtype HC_Fields =
+ HC_EmptyFields(Class c) { exists(ClassAggregateLiteral cal | c = cal.getUnspecifiedType()) } or
+ HC_FieldCons(Class c, int i, Field f, HashCons hc, HC_Fields hcf) {
+ mk_FieldCons(c, i, f, hc, hcf, _)
+ }
+
+private newtype HC_Array =
+ HC_EmptyArray(Type t) { exists(ArrayAggregateLiteral aal | aal.getUnspecifiedType() = t) } or
+ HC_ArrayCons(Type t, int i, HashCons hc, HC_Array hca) { mk_ArrayCons(t, i, hc, hca, _) }
+
+/**
+ * HashCons is the hash-cons of an expression. The relationship between `Expr`
+ * and `HC` is many-to-one: every `Expr` has exactly one `HC`, but multiple
+ * expressions can have the same `HC`. If two expressions have the same
+ * `HC`, it means that they are structurally equal. The `HC` is an opaque
+ * value. The only use for the `HC` of an expression is to find other
+ * expressions that are structurally equal to it. Use the predicate
+ * `hashCons` to get the `HC` for an `Expr`.
+ *
+ * Note: `HC` has `toString` and `getLocation` methods, so that it can be
+ * displayed in a results list. These work by picking an arbitrary
+ * expression with this `HC` and using its `toString` and `getLocation`
+ * methods.
+ */
+class HashCons extends HCBase {
+ /** Gets an expression that has this HC. */
+ Expr getAnExpr() { this = hashCons(result) }
+
+ /** Gets the kind of the HC. This can be useful for debugging. */
+ string getKind() {
+ if this instanceof HC_IntLiteral
+ then result = "IntLiteral"
+ else
+ if this instanceof HC_EnumConstantAccess
+ then result = "EnumConstantAccess"
+ else
+ if this instanceof HC_FloatLiteral
+ then result = "FloatLiteral"
+ else
+ if this instanceof HC_StringLiteral
+ then result = "StringLiteral"
+ else
+ if this instanceof HC_Nullptr
+ then result = "Nullptr"
+ else
+ if this instanceof HC_Variable
+ then result = "Variable"
+ else
+ if this instanceof HC_FieldAccess
+ then result = "FieldAccess"
+ else
+ if this instanceof HC_Deref
+ then result = "Deref"
+ else
+ if this instanceof HC_ThisExpr
+ then result = "ThisExpr"
+ else
+ if this instanceof HC_Conversion
+ then result = "Conversion"
+ else
+ if this instanceof HC_BinaryOp
+ then result = "BinaryOp"
+ else
+ if this instanceof HC_UnaryOp
+ then result = "UnaryOp"
+ else
+ if this instanceof HC_ArrayAccess
+ then result = "ArrayAccess"
+ else
+ if this instanceof HC_Unanalyzable
+ then result = "Unanalyzable"
+ else
+ if this instanceof HC_NonmemberFunctionCall
+ then result = "NonmemberFunctionCall"
+ else
+ if this instanceof HC_MemberFunctionCall
+ then result = "MemberFunctionCall"
+ else
+ if this instanceof HC_NewExpr
+ then result = "NewExpr"
+ else
+ if this instanceof HC_NewArrayExpr
+ then result = "NewArrayExpr"
+ else
+ if this instanceof HC_SizeofType
+ then result = "SizeofTypeOperator"
+ else
+ if this instanceof HC_SizeofExpr
+ then result = "SizeofExprOperator"
+ else
+ if this instanceof HC_AlignofType
+ then result = "AlignofTypeOperator"
+ else
+ if this instanceof HC_AlignofExpr
+ then result = "AlignofExprOperator"
+ else
+ if this instanceof HC_UuidofOperator
+ then result = "UuidofOperator"
+ else
+ if this instanceof HC_TypeidType
+ then result = "TypeidType"
+ else
+ if this instanceof HC_TypeidExpr
+ then result = "TypeidExpr"
+ else
+ if this instanceof HC_ArrayAggregateLiteral
+ then result = "ArrayAggregateLiteral"
+ else
+ if this instanceof HC_ClassAggregateLiteral
+ then result = "ClassAggregateLiteral"
+ else
+ if this instanceof HC_DeleteExpr
+ then result = "DeleteExpr"
+ else
+ if this instanceof HC_DeleteArrayExpr
+ then result = "DeleteArrayExpr"
+ else
+ if this instanceof HC_ThrowExpr
+ then result = "ThrowExpr"
+ else
+ if this instanceof HC_ReThrowExpr
+ then result = "ReThrowExpr"
+ else
+ if this instanceof HC_ExprCall
+ then result = "ExprCall"
+ else
+ if
+ this instanceof
+ HC_ConditionalExpr
+ then result = "ConditionalExpr"
+ else
+ if
+ this instanceof
+ HC_NoExceptExpr
+ then result = "NoExceptExpr"
+ else
+ if
+ this instanceof
+ HC_AllocatorArgZero
+ then
+ result =
+ "AllocatorArgZero"
+ else result = "error"
+ }
+
+ /**
+ * Gets an example of an expression with this HC.
+ * This is useful for things like implementing toString().
+ */
+ private Expr exampleExpr() {
+ // Pick the expression with the minimum source location string. This is
+ // just an arbitrary way to pick an expression with this `HC`.
+ result =
+ min(Expr e |
+ this = hashCons(e)
+ |
+ e
+ order by
+ exampleLocationString(e.getLocation()), e.getLocation().getStartColumn(),
+ e.getLocation().getEndLine(), e.getLocation().getEndColumn()
+ )
+ }
+
+ /** Gets a textual representation of this element. */
+ string toString() { result = exampleExpr().toString() }
+
+ /** Gets the primary location of this element. */
+ Location getLocation() { result = exampleExpr().getLocation() }
+}
+
+/**
+ * Gets the absolute path of a known location or "~" for an unknown location. This ensures that
+ * expressions with unknown locations are ordered after expressions with known locations when
+ * selecting an example expression for a HashCons value.
+ */
+private string exampleLocationString(Location l) {
+ if l instanceof UnknownLocation then result = "~" else result = l.getFile().getAbsolutePath()
+}
+
+private predicate analyzableIntLiteral(Literal e) {
+ strictcount(e.getValue().toInt()) = 1 and
+ strictcount(e.getUnspecifiedType()) = 1 and
+ e.getUnspecifiedType() instanceof IntegralType
+}
+
+private predicate mk_IntLiteral(int val, Type t, Expr e) {
+ analyzableIntLiteral(e) and
+ val = e.getValue().toInt() and
+ t = e.getUnspecifiedType()
+}
+
+private predicate analyzableEnumConstantAccess(EnumConstantAccess e) {
+ strictcount(e.getValue().toInt()) = 1 and
+ strictcount(e.getUnspecifiedType()) = 1 and
+ e.getUnspecifiedType() instanceof Enum
+}
+
+private predicate mk_EnumConstantAccess(EnumConstant val, Type t, Expr e) {
+ analyzableEnumConstantAccess(e) and
+ val = e.(EnumConstantAccess).getTarget() and
+ t = e.getUnspecifiedType()
+}
+
+private predicate analyzableFloatLiteral(Literal e) {
+ strictcount(e.getValue().toFloat()) = 1 and
+ strictcount(e.getUnspecifiedType()) = 1 and
+ e.getUnspecifiedType() instanceof FloatingPointType
+}
+
+private predicate mk_FloatLiteral(float val, Type t, Expr e) {
+ analyzableFloatLiteral(e) and
+ val = e.getValue().toFloat() and
+ t = e.getUnspecifiedType()
+}
+
+private predicate analyzableNullptr(NullValue e) {
+ strictcount(e.getUnspecifiedType()) = 1 and
+ e.getType() instanceof NullPointerType
+}
+
+private predicate mk_Nullptr(Expr e) { analyzableNullptr(e) }
+
+private predicate analyzableStringLiteral(Literal e) {
+ strictcount(e.getValue()) = 1 and
+ strictcount(e.getUnspecifiedType()) = 1 and
+ e.getUnspecifiedType().(ArrayType).getBaseType() instanceof CharType
+}
+
+private predicate mk_StringLiteral(string val, Type t, Expr e) {
+ analyzableStringLiteral(e) and
+ val = e.getValue() and
+ t = e.getUnspecifiedType() and
+ t.(ArrayType).getBaseType() instanceof CharType
+}
+
+private predicate analyzableDotFieldAccess(DotFieldAccess access) {
+ strictcount(access.getTarget()) = 1 and
+ strictcount(access.getQualifier().getFullyConverted()) = 1
+}
+
+private predicate mk_DotFieldAccess(HashCons qualifier, Field target, DotFieldAccess access) {
+ analyzableDotFieldAccess(access) and
+ target = access.getTarget() and
+ qualifier = hashCons(access.getQualifier().getFullyConverted())
+}
+
+private predicate analyzablePointerFieldAccess(PointerFieldAccess access) {
+ strictcount(access.getTarget()) = 1 and
+ strictcount(access.getQualifier().getFullyConverted()) = 1
+}
+
+private predicate mk_PointerFieldAccess(HashCons qualifier, Field target, PointerFieldAccess access) {
+ analyzablePointerFieldAccess(access) and
+ target = access.getTarget() and
+ qualifier = hashCons(access.getQualifier().getFullyConverted())
+}
+
+private predicate analyzableImplicitThisFieldAccess(ImplicitThisFieldAccess access) {
+ strictcount(access.getTarget()) = 1 and
+ strictcount(access.getEnclosingFunction()) = 1
+}
+
+private predicate mk_ImplicitThisFieldAccess(
+ Function fcn, Field target, ImplicitThisFieldAccess access
+) {
+ analyzableImplicitThisFieldAccess(access) and
+ target = access.getTarget() and
+ fcn = access.getEnclosingFunction()
+}
+
+private predicate analyzableVariable(VariableAccess access) {
+ not access instanceof FieldAccess and
+ strictcount(access.getTarget()) = 1
+}
+
+private predicate mk_Variable(Variable x, VariableAccess access) {
+ analyzableVariable(access) and
+ x = access.getTarget()
+}
+
+private predicate analyzableConversion(Conversion conv) {
+ strictcount(conv.getUnspecifiedType()) = 1 and
+ strictcount(conv.getExpr()) = 1
+}
+
+private predicate mk_Conversion(Type t, HashCons child, Conversion conv) {
+ analyzableConversion(conv) and
+ t = conv.getUnspecifiedType() and
+ child = hashCons(conv.getExpr())
+}
+
+private predicate analyzableBinaryOp(BinaryOperation op) {
+ strictcount(op.getLeftOperand().getFullyConverted()) = 1 and
+ strictcount(op.getRightOperand().getFullyConverted()) = 1 and
+ strictcount(op.getOperator()) = 1
+}
+
+private predicate mk_BinaryOp(HashCons lhs, HashCons rhs, string opname, BinaryOperation op) {
+ analyzableBinaryOp(op) and
+ lhs = hashCons(op.getLeftOperand().getFullyConverted()) and
+ rhs = hashCons(op.getRightOperand().getFullyConverted()) and
+ opname = op.getOperator()
+}
+
+private predicate analyzableUnaryOp(UnaryOperation op) {
+ not op instanceof PointerDereferenceExpr and
+ strictcount(op.getOperand().getFullyConverted()) = 1 and
+ strictcount(op.getOperator()) = 1
+}
+
+private predicate mk_UnaryOp(HashCons child, string opname, UnaryOperation op) {
+ analyzableUnaryOp(op) and
+ child = hashCons(op.getOperand().getFullyConverted()) and
+ opname = op.getOperator()
+}
+
+private predicate analyzableThisExpr(ThisExpr thisExpr) {
+ strictcount(thisExpr.getEnclosingFunction()) = 1
+}
+
+private predicate mk_ThisExpr(Function fcn, ThisExpr thisExpr) {
+ analyzableThisExpr(thisExpr) and
+ fcn = thisExpr.getEnclosingFunction()
+}
+
+private predicate analyzableArrayAccess(ArrayExpr ae) {
+ strictcount(ae.getArrayBase().getFullyConverted()) = 1 and
+ strictcount(ae.getArrayOffset().getFullyConverted()) = 1
+}
+
+private predicate mk_ArrayAccess(HashCons base, HashCons offset, ArrayExpr ae) {
+ analyzableArrayAccess(ae) and
+ base = hashCons(ae.getArrayBase().getFullyConverted()) and
+ offset = hashCons(ae.getArrayOffset().getFullyConverted())
+}
+
+private predicate analyzablePointerDereferenceExpr(PointerDereferenceExpr deref) {
+ strictcount(deref.getOperand().getFullyConverted()) = 1
+}
+
+private predicate mk_Deref(HashCons p, PointerDereferenceExpr deref) {
+ analyzablePointerDereferenceExpr(deref) and
+ p = hashCons(deref.getOperand().getFullyConverted())
+}
+
+private predicate analyzableNonmemberFunctionCall(FunctionCall fc) {
+ forall(int i | i in [0 .. fc.getNumberOfArguments() - 1] |
+ strictcount(fc.getArgument(i).getFullyConverted()) = 1
+ ) and
+ strictcount(fc.getTarget()) = 1 and
+ not exists(fc.getQualifier())
+}
+
+private predicate mk_NonmemberFunctionCall(Function fcn, HC_Args args, FunctionCall fc) {
+ fc.getTarget() = fcn and
+ analyzableNonmemberFunctionCall(fc) and
+ (
+ exists(HashCons head, HC_Args tail |
+ mk_ArgConsInner(head, tail, fc.getNumberOfArguments() - 1, args, fc)
+ )
+ or
+ fc.getNumberOfArguments() = 0 and
+ args = HC_EmptyArgs()
+ )
+}
+
+private predicate analyzableExprCall(ExprCall ec) {
+ forall(int i | i in [0 .. ec.getNumberOfArguments() - 1] |
+ strictcount(ec.getArgument(i).getFullyConverted()) = 1
+ ) and
+ strictcount(ec.getExpr().getFullyConverted()) = 1
+}
+
+private predicate mk_ExprCall(HashCons hc, HC_Args args, ExprCall ec) {
+ hc.getAnExpr() = ec.getExpr() and
+ (
+ exists(HashCons head, HC_Args tail |
+ mk_ArgConsInner(head, tail, ec.getNumberOfArguments() - 1, args, ec)
+ )
+ or
+ ec.getNumberOfArguments() = 0 and
+ args = HC_EmptyArgs()
+ )
+}
+
+private predicate analyzableMemberFunctionCall(FunctionCall fc) {
+ forall(int i | i in [0 .. fc.getNumberOfArguments() - 1] |
+ strictcount(fc.getArgument(i).getFullyConverted()) = 1
+ ) and
+ strictcount(fc.getTarget()) = 1 and
+ strictcount(fc.getQualifier().getFullyConverted()) = 1
+}
+
+private predicate mk_MemberFunctionCall(Function fcn, HashCons qual, HC_Args args, FunctionCall fc) {
+ fc.getTarget() = fcn and
+ analyzableMemberFunctionCall(fc) and
+ hashCons(fc.getQualifier().getFullyConverted()) = qual and
+ (
+ exists(HashCons head, HC_Args tail |
+ mk_ArgConsInner(head, tail, fc.getNumberOfArguments() - 1, args, fc)
+ )
+ or
+ fc.getNumberOfArguments() = 0 and
+ args = HC_EmptyArgs()
+ )
+}
+
+private predicate analyzableCall(Call c) {
+ analyzableNonmemberFunctionCall(c)
+ or
+ analyzableMemberFunctionCall(c)
+ or
+ analyzableExprCall(c)
+}
+
+/**
+ * Holds if `fc` is a call to `fcn`, `fc`'s first `i` arguments have hash-cons
+ * `list`, and `fc`'s argument at index `i` has hash-cons `hc`.
+ */
+private predicate mk_ArgCons(HashCons hc, int i, HC_Args list, Call c) {
+ analyzableCall(c) and
+ hc = hashCons(c.getArgument(i).getFullyConverted()) and
+ (
+ exists(HashCons head, HC_Args tail |
+ mk_ArgConsInner(head, tail, i - 1, list, c) and
+ i > 0
+ )
+ or
+ i = 0 and
+ list = HC_EmptyArgs()
+ )
+}
+
+// avoid a join ordering issue
+pragma[noopt]
+private predicate mk_ArgConsInner(HashCons head, HC_Args tail, int i, HC_Args list, Call c) {
+ list = HC_ArgCons(head, i, tail) and
+ mk_ArgCons(head, i, tail, c)
+}
+
+/**
+ * The 0th argument of an allocator call in a new expression is always an error expression;
+ * this works around it
+ */
+private predicate analyzableAllocatorArgZero(ErrorExpr e) {
+ exists(NewOrNewArrayExpr new |
+ new.getAllocatorCall().getChild(0) = e and
+ strictcount(new.getUnspecifiedType()) = 1
+ ) and
+ strictcount(NewOrNewArrayExpr new | new.getAllocatorCall().getChild(0) = e) = 1
+}
+
+private predicate mk_AllocatorArgZero(Type t, ErrorExpr e) {
+ analyzableAllocatorArgZero(e) and
+ exists(NewOrNewArrayExpr new |
+ new.getAllocatorCall().getChild(0) = e and
+ t = new.getUnspecifiedType()
+ )
+}
+
+private predicate mk_HasInit(HashCons hc, NewOrNewArrayExpr new) {
+ hc = hashCons(new.(NewExpr).getInitializer().getFullyConverted()) or
+ hc = hashCons(new.(NewArrayExpr).getInitializer().getFullyConverted())
+}
+
+private predicate mk_HasAlloc(HashCons hc, NewOrNewArrayExpr new) {
+ hc = hashCons(new.(NewExpr).getAllocatorCall().getFullyConverted()) or
+ hc = hashCons(new.(NewArrayExpr).getAllocatorCall().getFullyConverted())
+}
+
+private predicate mk_HasExtent(HashCons hc, NewArrayExpr new) {
+ hc = hashCons(new.(NewArrayExpr).getExtent().getFullyConverted())
+}
+
+private predicate analyzableNewExpr(NewExpr new) {
+ strictcount(new.getAllocatedType().getUnspecifiedType()) = 1 and
+ count(new.getAllocatorCall().getFullyConverted()) <= 1 and
+ count(new.getInitializer().getFullyConverted()) <= 1
+}
+
+private predicate mk_NewExpr(Type t, HC_Alloc alloc, HC_Init init, NewExpr new) {
+ analyzableNewExpr(new) and
+ t = new.getAllocatedType().getUnspecifiedType() and
+ (
+ alloc = HC_HasAlloc(hashCons(new.getAllocatorCall().getFullyConverted()))
+ or
+ not exists(new.getAllocatorCall().getFullyConverted()) and
+ alloc = HC_NoAlloc()
+ ) and
+ (
+ init = HC_HasInit(hashCons(new.getInitializer().getFullyConverted()))
+ or
+ not exists(new.getInitializer().getFullyConverted()) and
+ init = HC_NoInit()
+ )
+}
+
+private predicate analyzableNewArrayExpr(NewArrayExpr new) {
+ strictcount(new.getAllocatedType().getUnspecifiedType()) = 1 and
+ count(new.getAllocatorCall().getFullyConverted()) <= 1 and
+ count(new.getInitializer().getFullyConverted()) <= 1 and
+ count(new.(NewArrayExpr).getExtent().getFullyConverted()) <= 1
+}
+
+private predicate mk_NewArrayExpr(
+ Type t, HC_Alloc alloc, HC_Extent extent, HC_Init init, NewArrayExpr new
+) {
+ analyzableNewArrayExpr(new) and
+ t = new.getAllocatedType() and
+ (
+ alloc = HC_HasAlloc(hashCons(new.getAllocatorCall().getFullyConverted()))
+ or
+ not exists(new.getAllocatorCall().getFullyConverted()) and
+ alloc = HC_NoAlloc()
+ ) and
+ (
+ init = HC_HasInit(hashCons(new.getInitializer().getFullyConverted()))
+ or
+ not exists(new.getInitializer().getFullyConverted()) and
+ init = HC_NoInit()
+ ) and
+ (
+ extent = HC_HasExtent(hashCons(new.getExtent().getFullyConverted()))
+ or
+ not exists(new.getExtent().getFullyConverted()) and
+ extent = HC_NoExtent()
+ )
+}
+
+private predicate analyzableDeleteExpr(DeleteExpr e) {
+ strictcount(e.getAChild().getFullyConverted()) = 1
+}
+
+private predicate mk_DeleteExpr(HashCons hc, DeleteExpr e) {
+ analyzableDeleteExpr(e) and
+ hc = hashCons(e.getAChild().getFullyConverted())
+}
+
+private predicate analyzableDeleteArrayExpr(DeleteArrayExpr e) {
+ strictcount(e.getAChild().getFullyConverted()) = 1
+}
+
+private predicate mk_DeleteArrayExpr(HashCons hc, DeleteArrayExpr e) {
+ analyzableDeleteArrayExpr(e) and
+ hc = hashCons(e.getAChild().getFullyConverted())
+}
+
+private predicate analyzableSizeofType(SizeofTypeOperator e) {
+ strictcount(e.getUnspecifiedType()) = 1 and
+ strictcount(e.getTypeOperand()) = 1
+}
+
+private predicate mk_SizeofType(Type t, SizeofTypeOperator e) {
+ analyzableSizeofType(e) and
+ t = e.getTypeOperand()
+}
+
+private predicate analyzableSizeofExpr(Expr e) {
+ e instanceof SizeofExprOperator and
+ strictcount(e.getAChild().getFullyConverted()) = 1
+}
+
+private predicate mk_SizeofExpr(HashCons child, SizeofExprOperator e) {
+ analyzableSizeofExpr(e) and
+ child = hashCons(e.getAChild())
+}
+
+private predicate analyzableUuidofOperator(UuidofOperator e) { strictcount(e.getTypeOperand()) = 1 }
+
+private predicate mk_UuidofOperator(Type t, UuidofOperator e) {
+ analyzableUuidofOperator(e) and
+ t = e.getTypeOperand()
+}
+
+private predicate analyzableTypeidType(TypeidOperator e) {
+ count(e.getAChild()) = 0 and
+ strictcount(e.getResultType()) = 1
+}
+
+private predicate mk_TypeidType(Type t, TypeidOperator e) {
+ analyzableTypeidType(e) and
+ t = e.getResultType()
+}
+
+private predicate analyzableTypeidExpr(Expr e) {
+ e instanceof TypeidOperator and
+ strictcount(e.getAChild().getFullyConverted()) = 1
+}
+
+private predicate mk_TypeidExpr(HashCons child, TypeidOperator e) {
+ analyzableTypeidExpr(e) and
+ child = hashCons(e.getAChild())
+}
+
+private predicate analyzableAlignofType(AlignofTypeOperator e) {
+ strictcount(e.getUnspecifiedType()) = 1 and
+ strictcount(e.getTypeOperand()) = 1
+}
+
+private predicate mk_AlignofType(Type t, AlignofTypeOperator e) {
+ analyzableAlignofType(e) and
+ t = e.getTypeOperand()
+}
+
+private predicate analyzableAlignofExpr(AlignofExprOperator e) {
+ strictcount(e.getExprOperand()) = 1
+}
+
+private predicate mk_AlignofExpr(HashCons child, AlignofExprOperator e) {
+ analyzableAlignofExpr(e) and
+ child = hashCons(e.getAChild())
+}
+
+/**
+ * Gets the hash cons of field initializer expressions [0..i), where i > 0, for
+ * the class aggregate literal `cal` of type `c`, where `head` is the hash cons
+ * of the i'th initializer expression.
+ */
+HC_Fields aggInitExprsUpTo(ClassAggregateLiteral cal, Class c, int i) {
+ exists(Field f, HashCons head, HC_Fields tail |
+ result = HC_FieldCons(c, i - 1, f, head, tail) and
+ mk_FieldCons(c, i - 1, f, head, tail, cal)
+ )
+}
+
+private predicate mk_FieldCons(
+ Class c, int i, Field f, HashCons hc, HC_Fields hcf, ClassAggregateLiteral cal
+) {
+ analyzableClassAggregateLiteral(cal) and
+ cal.getUnspecifiedType() = c and
+ exists(Expr e |
+ e = cal.getFieldExpr(f).getFullyConverted() and
+ f.getInitializationOrder() = i and
+ (
+ hc = hashCons(e) and
+ hcf = aggInitExprsUpTo(cal, c, i)
+ or
+ hc = hashCons(e) and
+ i = 0 and
+ hcf = HC_EmptyFields(c)
+ )
+ )
+}
+
+private predicate analyzableClassAggregateLiteral(ClassAggregateLiteral cal) {
+ forall(int i | exists(cal.getChild(i)) |
+ strictcount(cal.getChild(i).getFullyConverted()) = 1 and
+ strictcount(Field f | cal.getChild(i) = cal.getFieldExpr(f)) = 1 and
+ strictcount(Field f, int j |
+ cal.getFieldExpr(f) = cal.getChild(i) and j = f.getInitializationOrder()
+ ) = 1
+ )
+}
+
+private predicate mk_ClassAggregateLiteral(Class c, HC_Fields hcf, ClassAggregateLiteral cal) {
+ analyzableClassAggregateLiteral(cal) and
+ c = cal.getUnspecifiedType() and
+ (
+ hcf = aggInitExprsUpTo(cal, c, cal.getNumChild())
+ or
+ cal.getNumChild() = 0 and
+ hcf = HC_EmptyFields(c)
+ )
+}
+
+private predicate analyzableArrayAggregateLiteral(ArrayAggregateLiteral aal) {
+ forall(int i | exists(aal.getChild(i)) | strictcount(aal.getChild(i).getFullyConverted()) = 1) and
+ strictcount(aal.getUnspecifiedType()) = 1
+}
+
+/**
+ * Gets the hash cons of array elements in [0..i), where i > 0, for
+ * the array aggregate literal `aal` of type `t`.
+ */
+private HC_Array arrayElemsUpTo(ArrayAggregateLiteral aal, Type t, int i) {
+ exists(HC_Array tail, HashCons head |
+ result = HC_ArrayCons(t, i - 1, head, tail) and
+ mk_ArrayCons(t, i - 1, head, tail, aal)
+ )
+}
+
+private predicate mk_ArrayCons(Type t, int i, HashCons hc, HC_Array hca, ArrayAggregateLiteral aal) {
+ analyzableArrayAggregateLiteral(aal) and
+ t = aal.getUnspecifiedType() and
+ hc = hashCons(aal.getChild(i)) and
+ (
+ hca = arrayElemsUpTo(aal, t, i)
+ or
+ i = 0 and
+ hca = HC_EmptyArray(t)
+ )
+}
+
+private predicate mk_ArrayAggregateLiteral(Type t, HC_Array hca, ArrayAggregateLiteral aal) {
+ t = aal.getUnspecifiedType() and
+ (
+ exists(HashCons head, HC_Array tail, int numElements |
+ numElements = aal.getNumChild() and
+ hca = HC_ArrayCons(t, numElements - 1, head, tail) and
+ mk_ArrayCons(t, numElements - 1, head, tail, aal)
+ )
+ or
+ aal.getNumChild() = 0 and
+ hca = HC_EmptyArray(t)
+ )
+}
+
+private predicate analyzableThrowExpr(ThrowExpr te) {
+ strictcount(te.getExpr().getFullyConverted()) = 1
+}
+
+private predicate mk_ThrowExpr(HashCons hc, ThrowExpr te) {
+ analyzableThrowExpr(te) and
+ hc.getAnExpr() = te.getExpr().getFullyConverted()
+}
+
+private predicate analyzableReThrowExpr(ReThrowExpr rte) { any() }
+
+private predicate mk_ReThrowExpr(ReThrowExpr te) { any() }
+
+private predicate analyzableConditionalExpr(ConditionalExpr ce) {
+ strictcount(ce.getCondition().getFullyConverted()) = 1 and
+ strictcount(ce.getThen().getFullyConverted()) = 1 and
+ strictcount(ce.getElse().getFullyConverted()) = 1
+}
+
+private predicate mk_ConditionalExpr(
+ HashCons cond, HashCons trueHc, HashCons falseHc, ConditionalExpr ce
+) {
+ analyzableConditionalExpr(ce) and
+ cond.getAnExpr() = ce.getCondition() and
+ trueHc.getAnExpr() = ce.getThen() and
+ falseHc.getAnExpr() = ce.getElse()
+}
+
+private predicate analyzableNoExceptExpr(NoExceptExpr nee) {
+ strictcount(nee.getAChild().getFullyConverted()) = 1
+}
+
+private predicate mk_NoExceptExpr(HashCons child, NoExceptExpr nee) {
+ analyzableNoExceptExpr(nee) and
+ nee.getExpr().getFullyConverted() = child.getAnExpr()
+}
+
+/** Gets the hash-cons of expression `e`. */
+cached
+HashCons hashCons(Expr e) {
+ exists(int val, Type t |
+ mk_IntLiteral(val, t, e) and
+ result = HC_IntLiteral(val, t)
+ )
+ or
+ exists(EnumConstant val, Type t |
+ mk_EnumConstantAccess(val, t, e) and
+ result = HC_EnumConstantAccess(val, t)
+ )
+ or
+ exists(float val, Type t |
+ mk_FloatLiteral(val, t, e) and
+ result = HC_FloatLiteral(val, t)
+ )
+ or
+ exists(string val, Type t |
+ mk_StringLiteral(val, t, e) and
+ result = HC_StringLiteral(val, t)
+ )
+ or
+ exists(Variable x |
+ mk_Variable(x, e) and
+ result = HC_Variable(x)
+ )
+ or
+ exists(HashCons qualifier, Field target |
+ mk_DotFieldAccess(qualifier, target, e) and
+ result = HC_FieldAccess(qualifier, target)
+ )
+ or
+ exists(HashCons qualifier, Field target |
+ mk_PointerFieldAccess(qualifier, target, e) and
+ result = HC_PointerFieldAccess(qualifier, target)
+ )
+ or
+ exists(Function fcn, Field target |
+ mk_ImplicitThisFieldAccess(fcn, target, e) and
+ result = HC_ImplicitThisFieldAccess(fcn, target)
+ )
+ or
+ exists(Function fcn |
+ mk_ThisExpr(fcn, e) and
+ result = HC_ThisExpr(fcn)
+ )
+ or
+ exists(Type t, HashCons child |
+ mk_Conversion(t, child, e) and
+ result = HC_Conversion(t, child)
+ )
+ or
+ exists(HashCons lhs, HashCons rhs, string opname |
+ mk_BinaryOp(lhs, rhs, opname, e) and
+ result = HC_BinaryOp(lhs, rhs, opname)
+ )
+ or
+ exists(HashCons child, string opname |
+ mk_UnaryOp(child, opname, e) and
+ result = HC_UnaryOp(child, opname)
+ )
+ or
+ exists(HashCons x, HashCons i |
+ mk_ArrayAccess(x, i, e) and
+ result = HC_ArrayAccess(x, i)
+ )
+ or
+ exists(HashCons p |
+ mk_Deref(p, e) and
+ result = HC_Deref(p)
+ )
+ or
+ exists(Function fcn, HC_Args args |
+ mk_NonmemberFunctionCall(fcn, args, e) and
+ result = HC_NonmemberFunctionCall(fcn, args)
+ )
+ or
+ exists(HashCons hc, HC_Args args |
+ mk_ExprCall(hc, args, e) and
+ result = HC_ExprCall(hc, args)
+ )
+ or
+ exists(Function fcn, HashCons qual, HC_Args args |
+ mk_MemberFunctionCall(fcn, qual, args, e) and
+ result = HC_MemberFunctionCall(fcn, qual, args)
+ )
+ or
+ // works around an extractor issue
+ exists(Type t |
+ mk_AllocatorArgZero(t, e) and
+ result = HC_AllocatorArgZero(t)
+ )
+ or
+ exists(Type t, HC_Alloc alloc, HC_Init init |
+ mk_NewExpr(t, alloc, init, e) and
+ result = HC_NewExpr(t, alloc, init)
+ )
+ or
+ exists(Type t, HC_Alloc alloc, HC_Extent extent, HC_Init init |
+ mk_NewArrayExpr(t, alloc, extent, init, e) and
+ result = HC_NewArrayExpr(t, alloc, extent, init)
+ )
+ or
+ exists(Type t |
+ mk_SizeofType(t, e) and
+ result = HC_SizeofType(t)
+ )
+ or
+ exists(HashCons child |
+ mk_SizeofExpr(child, e) and
+ result = HC_SizeofExpr(child)
+ )
+ or
+ exists(Type t |
+ mk_TypeidType(t, e) and
+ result = HC_TypeidType(t)
+ )
+ or
+ exists(HashCons child |
+ mk_TypeidExpr(child, e) and
+ result = HC_TypeidExpr(child)
+ )
+ or
+ exists(Type t |
+ mk_UuidofOperator(t, e) and
+ result = HC_UuidofOperator(t)
+ )
+ or
+ exists(Type t |
+ mk_AlignofType(t, e) and
+ result = HC_AlignofType(t)
+ )
+ or
+ exists(HashCons child |
+ mk_AlignofExpr(child, e) and
+ result = HC_AlignofExpr(child)
+ )
+ or
+ exists(Class c, HC_Fields hfc |
+ mk_ClassAggregateLiteral(c, hfc, e) and
+ result = HC_ClassAggregateLiteral(c, hfc)
+ )
+ or
+ exists(Type t, HC_Array hca |
+ mk_ArrayAggregateLiteral(t, hca, e) and
+ result = HC_ArrayAggregateLiteral(t, hca)
+ )
+ or
+ exists(HashCons child |
+ mk_DeleteExpr(child, e) and
+ result = HC_DeleteExpr(child)
+ )
+ or
+ exists(HashCons child |
+ mk_DeleteArrayExpr(child, e) and
+ result = HC_DeleteArrayExpr(child)
+ )
+ or
+ exists(HashCons child |
+ mk_ThrowExpr(child, e) and
+ result = HC_ThrowExpr(child)
+ )
+ or
+ mk_ReThrowExpr(e) and
+ result = HC_ReThrowExpr()
+ or
+ exists(HashCons cond, HashCons thenHC, HashCons elseHC |
+ mk_ConditionalExpr(cond, thenHC, elseHC, e) and
+ result = HC_ConditionalExpr(cond, thenHC, elseHC)
+ )
+ or
+ mk_Nullptr(e) and
+ result = HC_Nullptr()
+ or
+ not analyzableExpr(e, _) and result = HC_Unanalyzable(e)
+}
+
+/**
+ * Holds if the expression is explicitly handled by `hashCons`.
+ * Unanalyzable expressions still need to be given a hash-cons,
+ * but it will be a unique number that is not shared with any other
+ * expression.
+ */
+predicate analyzableExpr(Expr e, string kind) {
+ analyzableIntLiteral(e) and kind = "IntLiteral"
+ or
+ analyzableEnumConstantAccess(e) and kind = "EnumConstantAccess"
+ or
+ analyzableFloatLiteral(e) and kind = "FloatLiteral"
+ or
+ analyzableStringLiteral(e) and kind = "StringLiteral"
+ or
+ analyzableNullptr(e) and kind = "Nullptr"
+ or
+ analyzableDotFieldAccess(e) and kind = "DotFieldAccess"
+ or
+ analyzablePointerFieldAccess(e) and kind = "PointerFieldAccess"
+ or
+ analyzableImplicitThisFieldAccess(e) and kind = "ImplicitThisFieldAccess"
+ or
+ analyzableVariable(e) and kind = "Variable"
+ or
+ analyzableConversion(e) and kind = "Conversion"
+ or
+ analyzableBinaryOp(e) and kind = "BinaryOp"
+ or
+ analyzableUnaryOp(e) and kind = "UnaryOp"
+ or
+ analyzableThisExpr(e) and kind = "ThisExpr"
+ or
+ analyzableArrayAccess(e) and kind = "ArrayAccess"
+ or
+ analyzablePointerDereferenceExpr(e) and kind = "PointerDereferenceExpr"
+ or
+ analyzableNonmemberFunctionCall(e) and kind = "NonmemberFunctionCall"
+ or
+ analyzableMemberFunctionCall(e) and kind = "MemberFunctionCall"
+ or
+ analyzableExprCall(e) and kind = "ExprCall"
+ or
+ analyzableNewExpr(e) and kind = "NewExpr"
+ or
+ analyzableNewArrayExpr(e) and kind = "NewArrayExpr"
+ or
+ analyzableSizeofType(e) and kind = "SizeofTypeOperator"
+ or
+ analyzableSizeofExpr(e) and kind = "SizeofExprOperator"
+ or
+ analyzableAlignofType(e) and kind = "AlignofTypeOperator"
+ or
+ analyzableAlignofExpr(e) and kind = "AlignofExprOperator"
+ or
+ analyzableUuidofOperator(e) and kind = "UuidofOperator"
+ or
+ analyzableTypeidType(e) and kind = "TypeidType"
+ or
+ analyzableTypeidExpr(e) and kind = "TypeidExpr"
+ or
+ analyzableClassAggregateLiteral(e) and kind = "ClassAggregateLiteral"
+ or
+ analyzableArrayAggregateLiteral(e) and kind = "ArrayAggregateLiteral"
+ or
+ analyzableDeleteExpr(e) and kind = "DeleteExpr"
+ or
+ analyzableDeleteArrayExpr(e) and kind = "DeleteArrayExpr"
+ or
+ analyzableThrowExpr(e) and kind = "ThrowExpr"
+ or
+ analyzableReThrowExpr(e) and kind = "ReThrowExpr"
+ or
+ analyzableConditionalExpr(e) and kind = "ConditionalExpr"
+ or
+ analyzableNoExceptExpr(e) and kind = "NoExceptExpr"
+ or
+ analyzableAllocatorArgZero(e) and kind = "AllocatorArgZero"
+}
diff --git a/repo-tests/codeql/cpp/ql/lib/semmle/files/FileSystem.qll b/repo-tests/codeql/cpp/ql/lib/semmle/files/FileSystem.qll
new file mode 100644
index 00000000000..4dfb0ae670a
--- /dev/null
+++ b/repo-tests/codeql/cpp/ql/lib/semmle/files/FileSystem.qll
@@ -0,0 +1,3 @@
+/** Provides classes for working with files and folders. */
+
+import semmle.code.cpp.File
diff --git a/repo-tests/codeql/cpp/ql/lib/semmlecode.cpp.dbscheme b/repo-tests/codeql/cpp/ql/lib/semmlecode.cpp.dbscheme
new file mode 100644
index 00000000000..018f430097e
--- /dev/null
+++ b/repo-tests/codeql/cpp/ql/lib/semmlecode.cpp.dbscheme
@@ -0,0 +1,2136 @@
+
+/**
+ * An invocation of the compiler. Note that more than one file may be
+ * compiled per invocation. For example, this command compiles three
+ * source files:
+ *
+ * gcc -c f1.c f2.c f3.c
+ *
+ * The `id` simply identifies the invocation, while `cwd` is the working
+ * directory from which the compiler was invoked.
+ */
+compilations(
+ /**
+ * An invocation of the compiler. Note that more than one file may
+ * be compiled per invocation. For example, this command compiles
+ * three source files:
+ *
+ * gcc -c f1.c f2.c f3.c
+ */
+ unique int id : @compilation,
+ string cwd : string ref
+);
+
+/**
+ * The arguments that were passed to the extractor for a compiler
+ * invocation. If `id` is for the compiler invocation
+ *
+ * gcc -c f1.c f2.c f3.c
+ *
+ * then typically there will be rows for
+ *
+ * num | arg
+ * --- | ---
+ * 0 | *path to extractor*
+ * 1 | `--mimic`
+ * 2 | `/usr/bin/gcc`
+ * 3 | `-c`
+ * 4 | f1.c
+ * 5 | f2.c
+ * 6 | f3.c
+ */
+#keyset[id, num]
+compilation_args(
+ int id : @compilation ref,
+ int num : int ref,
+ string arg : string ref
+);
+
+/**
+ * The source files that are compiled by a compiler invocation.
+ * If `id` is for the compiler invocation
+ *
+ * gcc -c f1.c f2.c f3.c
+ *
+ * then there will be rows for
+ *
+ * num | arg
+ * --- | ---
+ * 0 | f1.c
+ * 1 | f2.c
+ * 2 | f3.c
+ *
+ * Note that even if those files `#include` headers, those headers
+ * do not appear as rows.
+ */
+#keyset[id, num]
+compilation_compiling_files(
+ int id : @compilation ref,
+ int num : int ref,
+ int file : @file ref
+);
+
+/**
+ * The time taken by the extractor for a compiler invocation.
+ *
+ * For each file `num`, there will be rows for
+ *
+ * kind | seconds
+ * ---- | ---
+ * 1 | CPU seconds used by the extractor frontend
+ * 2 | Elapsed seconds during the extractor frontend
+ * 3 | CPU seconds used by the extractor backend
+ * 4 | Elapsed seconds during the extractor backend
+ */
+#keyset[id, num, kind]
+compilation_time(
+ int id : @compilation ref,
+ int num : int ref,
+ /* kind:
+ 1 = frontend_cpu_seconds
+ 2 = frontend_elapsed_seconds
+ 3 = extractor_cpu_seconds
+ 4 = extractor_elapsed_seconds
+ */
+ int kind : int ref,
+ float seconds : float ref
+);
+
+/**
+ * An error or warning generated by the extractor.
+ * The diagnostic message `diagnostic` was generated during compiler
+ * invocation `compilation`, and is the `file_number_diagnostic_number`th
+ * message generated while extracting the `file_number`th file of that
+ * invocation.
+ */
+#keyset[compilation, file_number, file_number_diagnostic_number]
+diagnostic_for(
+ int diagnostic : @diagnostic ref,
+ int compilation : @compilation ref,
+ int file_number : int ref,
+ int file_number_diagnostic_number : int ref
+);
+
+/**
+ * If extraction was successful, then `cpu_seconds` and
+ * `elapsed_seconds` are the CPU time and elapsed time (respectively)
+ * that extraction took for compiler invocation `id`.
+ */
+compilation_finished(
+ unique int id : @compilation ref,
+ float cpu_seconds : float ref,
+ float elapsed_seconds : float ref
+);
+
+
+/**
+ * External data, loaded from CSV files during snapshot creation. See
+ * [Tutorial: Incorporating external data](https://help.semmle.com/wiki/display/SD/Tutorial%3A+Incorporating+external+data)
+ * for more information.
+ */
+externalData(
+ int id : @externalDataElement,
+ string path : string ref,
+ int column: int ref,
+ string value : string ref
+);
+
+/**
+ * The date of the snapshot.
+ */
+snapshotDate(unique date snapshotDate : date ref);
+
+/**
+ * The source location of the snapshot.
+ */
+sourceLocationPrefix(string prefix : string ref);
+
+/**
+ * Data used by the 'duplicate code' detection.
+ */
+duplicateCode(
+ unique int id : @duplication,
+ string relativePath : string ref,
+ int equivClass : int ref
+);
+
+/**
+ * Data used by the 'similar code' detection.
+ */
+similarCode(
+ unique int id : @similarity,
+ string relativePath : string ref,
+ int equivClass : int ref
+);
+
+/**
+ * Data used by the 'duplicate code' and 'similar code' detection.
+ */
+@duplication_or_similarity = @duplication | @similarity
+
+/**
+ * Data used by the 'duplicate code' and 'similar code' detection.
+ */
+#keyset[id, offset]
+tokens(
+ int id : @duplication_or_similarity ref,
+ int offset : int ref,
+ int beginLine : int ref,
+ int beginColumn : int ref,
+ int endLine : int ref,
+ int endColumn : int ref
+);
+
+/**
+ * Information about packages that provide code used during compilation.
+ * The `id` is just a unique identifier.
+ * The `namespace` is typically the name of the package manager that
+ * provided the package (e.g. "dpkg" or "yum").
+ * The `package_name` is the name of the package, and `version` is its
+ * version (as a string).
+ */
+external_packages(
+ unique int id: @external_package,
+ string namespace : string ref,
+ string package_name : string ref,
+ string version : string ref
+);
+
+/**
+ * Holds if File `fileid` was provided by package `package`.
+ */
+header_to_external_package(
+ int fileid : @file ref,
+ int package : @external_package ref
+);
+
+/*
+ * Version history
+ */
+
+svnentries(
+ unique int id : @svnentry,
+ string revision : string ref,
+ string author : string ref,
+ date revisionDate : date ref,
+ int changeSize : int ref
+)
+
+svnaffectedfiles(
+ int id : @svnentry ref,
+ int file : @file ref,
+ string action : string ref
+)
+
+svnentrymsg(
+ unique int id : @svnentry ref,
+ string message : string ref
+)
+
+svnchurn(
+ int commit : @svnentry ref,
+ int file : @file ref,
+ int addedLines : int ref,
+ int deletedLines : int ref
+)
+
+/*
+ * C++ dbscheme
+ */
+
+@location = @location_stmt | @location_expr | @location_default ;
+
+/**
+ * The location of an element that is not an expression or a statement.
+ * The location spans column `startcolumn` of line `startline` to
+ * column `endcolumn` of line `endline` in file `file`.
+ * For more information, see
+ * [Locations](https://codeql.github.com/docs/writing-codeql-queries/providing-locations-in-codeql-queries/).
+ */
+locations_default(
+ /** The location of an element that is not an expression or a statement. */
+ unique int id: @location_default,
+ int container: @container ref,
+ int startLine: int ref,
+ int startColumn: int ref,
+ int endLine: int ref,
+ int endColumn: int ref
+);
+
+/**
+ * The location of a statement.
+ * The location spans column `startcolumn` of line `startline` to
+ * column `endcolumn` of line `endline` in file `file`.
+ * For more information, see
+ * [Locations](https://codeql.github.com/docs/writing-codeql-queries/providing-locations-in-codeql-queries/).
+ */
+locations_stmt(
+ /** The location of a statement. */
+ unique int id: @location_stmt,
+ int container: @container ref,
+ int startLine: int ref,
+ int startColumn: int ref,
+ int endLine: int ref,
+ int endColumn: int ref
+);
+
+/**
+ * The location of an expression.
+ * The location spans column `startcolumn` of line `startline` to
+ * column `endcolumn` of line `endline` in file `file`.
+ * For more information, see
+ * [Locations](https://codeql.github.com/docs/writing-codeql-queries/providing-locations-in-codeql-queries/).
+ */
+locations_expr(
+ /** The location of an expression. */
+ unique int id: @location_expr,
+ int container: @container ref,
+ int startLine: int ref,
+ int startColumn: int ref,
+ int endLine: int ref,
+ int endColumn: int ref
+);
+
+/** An element for which line-count information is available. */
+@sourceline = @file | @function | @variable | @enumconstant | @xmllocatable;
+
+numlines(
+ int element_id: @sourceline ref,
+ int num_lines: int ref,
+ int num_code: int ref,
+ int num_comment: int ref
+);
+
+diagnostics(
+ unique int id: @diagnostic,
+ int severity: int ref,
+ string error_tag: string ref,
+ string error_message: string ref,
+ string full_error_message: string ref,
+ int location: @location_default ref
+);
+
+files(
+ unique int id: @file,
+ string name: string ref
+);
+
+folders(
+ unique int id: @folder,
+ string name: string ref
+);
+
+@container = @folder | @file
+
+containerparent(
+ int parent: @container ref,
+ unique int child: @container ref
+);
+
+fileannotations(
+ int id: @file ref,
+ int kind: int ref,
+ string name: string ref,
+ string value: string ref
+);
+
+inmacroexpansion(
+ int id: @element ref,
+ int inv: @macroinvocation ref
+);
+
+affectedbymacroexpansion(
+ int id: @element ref,
+ int inv: @macroinvocation ref
+);
+
+/*
+ case @macroinvocations.kind of
+ 1 = macro expansion
+ | 2 = other macro reference
+ ;
+*/
+macroinvocations(
+ unique int id: @macroinvocation,
+ int macro_id: @ppd_define ref,
+ int location: @location_default ref,
+ int kind: int ref
+);
+
+macroparent(
+ unique int id: @macroinvocation ref,
+ int parent_id: @macroinvocation ref
+);
+
+// a macroinvocation may be part of another location
+// the way to find a constant expression that uses a macro
+// is thus to find a constant expression that has a location
+// to which a macro invocation is bound
+macrolocationbind(
+ int id: @macroinvocation ref,
+ int location: @location ref
+);
+
+#keyset[invocation, argument_index]
+macro_argument_unexpanded(
+ int invocation: @macroinvocation ref,
+ int argument_index: int ref,
+ string text: string ref
+);
+
+#keyset[invocation, argument_index]
+macro_argument_expanded(
+ int invocation: @macroinvocation ref,
+ int argument_index: int ref,
+ string text: string ref
+);
+
+/*
+ case @function.kind of
+ 1 = normal
+ | 2 = constructor
+ | 3 = destructor
+ | 4 = conversion
+ | 5 = operator
+ | 6 = builtin // GCC built-in functions, e.g. __builtin___memcpy_chk
+ ;
+*/
+functions(
+ unique int id: @function,
+ string name: string ref,
+ int kind: int ref
+);
+
+function_entry_point(int id: @function ref, unique int entry_point: @stmt ref);
+
+function_return_type(int id: @function ref, int return_type: @type ref);
+
+/** If `function` is a coroutine, then this gives the
+ std::experimental::resumable_traits instance associated with it,
+ and the variables representing the `handle` and `promise` for it. */
+coroutine(
+ unique int function: @function ref,
+ int traits: @type ref,
+ int handle: @variable ref,
+ int promise: @variable ref
+);
+
+/** The `new` function used for allocating the coroutine state, if any. */
+coroutine_new(
+ unique int function: @function ref,
+ int new: @function ref
+);
+
+/** The `delete` function used for deallocating the coroutine state, if any. */
+coroutine_delete(
+ unique int function: @function ref,
+ int delete: @function ref
+);
+
+purefunctions(unique int id: @function ref);
+
+function_deleted(unique int id: @function ref);
+
+function_defaulted(unique int id: @function ref);
+
+member_function_this_type(unique int id: @function ref, int this_type: @type ref);
+
+#keyset[id, type_id]
+fun_decls(
+ int id: @fun_decl,
+ int function: @function ref,
+ int type_id: @type ref,
+ string name: string ref,
+ int location: @location_default ref
+);
+fun_def(unique int id: @fun_decl ref);
+fun_specialized(unique int id: @fun_decl ref);
+fun_implicit(unique int id: @fun_decl ref);
+fun_decl_specifiers(
+ int id: @fun_decl ref,
+ string name: string ref
+)
+#keyset[fun_decl, index]
+fun_decl_throws(
+ int fun_decl: @fun_decl ref,
+ int index: int ref,
+ int type_id: @type ref
+);
+/* an empty throw specification is different from none */
+fun_decl_empty_throws(unique int fun_decl: @fun_decl ref);
+fun_decl_noexcept(
+ int fun_decl: @fun_decl ref,
+ int constant: @expr ref
+);
+fun_decl_empty_noexcept(int fun_decl: @fun_decl ref);
+fun_decl_typedef_type(
+ unique int fun_decl: @fun_decl ref,
+ int typedeftype_id: @usertype ref
+);
+
+param_decl_bind(
+ unique int id: @var_decl ref,
+ int index: int ref,
+ int fun_decl: @fun_decl ref
+);
+
+#keyset[id, type_id]
+var_decls(
+ int id: @var_decl,
+ int variable: @variable ref,
+ int type_id: @type ref,
+ string name: string ref,
+ int location: @location_default ref
+);
+var_def(unique int id: @var_decl ref);
+var_decl_specifiers(
+ int id: @var_decl ref,
+ string name: string ref
+)
+
+type_decls(
+ unique int id: @type_decl,
+ int type_id: @type ref,
+ int location: @location_default ref
+);
+type_def(unique int id: @type_decl ref);
+type_decl_top(
+ unique int type_decl: @type_decl ref
+);
+
+namespace_decls(
+ unique int id: @namespace_decl,
+ int namespace_id: @namespace ref,
+ int location: @location_default ref,
+ int bodylocation: @location_default ref
+);
+
+usings(
+ unique int id: @using,
+ int element_id: @element ref,
+ int location: @location_default ref
+);
+
+/** The element which contains the `using` declaration. */
+using_container(
+ int parent: @element ref,
+ int child: @using ref
+);
+
+static_asserts(
+ unique int id: @static_assert,
+ int condition : @expr ref,
+ string message : string ref,
+ int location: @location_default ref,
+ int enclosing : @element ref
+);
+
+// each function has an ordered list of parameters
+#keyset[id, type_id]
+#keyset[function, index, type_id]
+params(
+ int id: @parameter,
+ int function: @functionorblock ref,
+ int index: int ref,
+ int type_id: @type ref
+);
+
+overrides(int new: @function ref, int old: @function ref);
+
+#keyset[id, type_id]
+membervariables(
+ int id: @membervariable,
+ int type_id: @type ref,
+ string name: string ref
+);
+
+#keyset[id, type_id]
+globalvariables(
+ int id: @globalvariable,
+ int type_id: @type ref,
+ string name: string ref
+);
+
+#keyset[id, type_id]
+localvariables(
+ int id: @localvariable,
+ int type_id: @type ref,
+ string name: string ref
+);
+
+autoderivation(
+ unique int var: @variable ref,
+ int derivation_type: @type ref
+);
+
+enumconstants(
+ unique int id: @enumconstant,
+ int parent: @usertype ref,
+ int index: int ref,
+ int type_id: @type ref,
+ string name: string ref,
+ int location: @location_default ref
+);
+
+@variable = @localscopevariable | @globalvariable | @membervariable;
+
+@localscopevariable = @localvariable | @parameter;
+
+/*
+ Built-in types are the fundamental types, e.g., integral, floating, and void.
+
+ case @builtintype.kind of
+ 1 = error
+ | 2 = unknown
+ | 3 = void
+ | 4 = boolean
+ | 5 = char
+ | 6 = unsigned_char
+ | 7 = signed_char
+ | 8 = short
+ | 9 = unsigned_short
+ | 10 = signed_short
+ | 11 = int
+ | 12 = unsigned_int
+ | 13 = signed_int
+ | 14 = long
+ | 15 = unsigned_long
+ | 16 = signed_long
+ | 17 = long_long
+ | 18 = unsigned_long_long
+ | 19 = signed_long_long
+ | 20 = __int8 // Microsoft-specific
+ | 21 = __int16 // Microsoft-specific
+ | 22 = __int32 // Microsoft-specific
+ | 23 = __int64 // Microsoft-specific
+ | 24 = float
+ | 25 = double
+ | 26 = long_double
+ | 27 = _Complex_float // C99-specific
+ | 28 = _Complex_double // C99-specific
+ | 29 = _Complex_long double // C99-specific
+ | 30 = _Imaginary_float // C99-specific
+ | 31 = _Imaginary_double // C99-specific
+ | 32 = _Imaginary_long_double // C99-specific
+ | 33 = wchar_t // Microsoft-specific
+ | 34 = decltype_nullptr // C++11
+ | 35 = __int128
+ | 36 = unsigned___int128
+ | 37 = signed___int128
+ | 38 = __float128
+ | 39 = _Complex___float128
+ | 40 = _Decimal32
+ | 41 = _Decimal64
+ | 42 = _Decimal128
+ | 43 = char16_t
+ | 44 = char32_t
+ | 45 = _Float32
+ | 46 = _Float32x
+ | 47 = _Float64
+ | 48 = _Float64x
+ | 49 = _Float128
+ | 50 = _Float128x
+ | 51 = char8_t
+ ;
+*/
+builtintypes(
+ unique int id: @builtintype,
+ string name: string ref,
+ int kind: int ref,
+ int size: int ref,
+ int sign: int ref,
+ int alignment: int ref
+);
+
+/*
+ Derived types are types that are directly derived from existing types and
+ point to, refer to, transform type data to return a new type.
+
+ case @derivedtype.kind of
+ 1 = pointer
+ | 2 = reference
+ | 3 = type_with_specifiers
+ | 4 = array
+ | 5 = gnu_vector
+ | 6 = routineptr
+ | 7 = routinereference
+ | 8 = rvalue_reference // C++11
+// ... 9 type_conforming_to_protocols deprecated
+ | 10 = block
+ ;
+*/
+derivedtypes(
+ unique int id: @derivedtype,
+ string name: string ref,
+ int kind: int ref,
+ int type_id: @type ref
+);
+
+pointerishsize(unique int id: @derivedtype ref,
+ int size: int ref,
+ int alignment: int ref);
+
+arraysizes(
+ unique int id: @derivedtype ref,
+ int num_elements: int ref,
+ int bytesize: int ref,
+ int alignment: int ref
+);
+
+typedefbase(
+ unique int id: @usertype ref,
+ int type_id: @type ref
+);
+
+/**
+ * An instance of the C++11 `decltype` operator. For example:
+ * ```
+ * int a;
+ * decltype(1+a) b;
+ * ```
+ * Here `expr` is `1+a`.
+ *
+ * Sometimes an additional pair of parentheses around the expression
+ * would change the semantics of this decltype, e.g.
+ * ```
+ * struct A { double x; };
+ * const A* a = new A();
+ * decltype( a->x ); // type is double
+ * decltype((a->x)); // type is const double&
+ * ```
+ * (Please consult the C++11 standard for more details).
+ * `parentheses_would_change_meaning` is `true` iff that is the case.
+ */
+#keyset[id, expr]
+decltypes(
+ int id: @decltype,
+ int expr: @expr ref,
+ int base_type: @type ref,
+ boolean parentheses_would_change_meaning: boolean ref
+);
+
+/*
+ case @usertype.kind of
+ 1 = struct
+ | 2 = class
+ | 3 = union
+ | 4 = enum
+ | 5 = typedef // classic C: typedef typedef type name
+ | 6 = template
+ | 7 = template_parameter
+ | 8 = template_template_parameter
+ | 9 = proxy_class // a proxy class associated with a template parameter
+// ... 10 objc_class deprecated
+// ... 11 objc_protocol deprecated
+// ... 12 objc_category deprecated
+ | 13 = scoped_enum
+ | 14 = using_alias // a using name = type style typedef
+ ;
+*/
+usertypes(
+ unique int id: @usertype,
+ string name: string ref,
+ int kind: int ref
+);
+
+usertypesize(
+ unique int id: @usertype ref,
+ int size: int ref,
+ int alignment: int ref
+);
+
+usertype_final(unique int id: @usertype ref);
+
+usertype_uuid(
+ unique int id: @usertype ref,
+ unique string uuid: string ref
+);
+
+mangled_name(
+ unique int id: @declaration ref,
+ int mangled_name : @mangledname
+);
+
+is_pod_class(unique int id: @usertype ref);
+is_standard_layout_class(unique int id: @usertype ref);
+
+is_complete(unique int id: @usertype ref);
+
+is_class_template(unique int id: @usertype ref);
+class_instantiation(
+ int to: @usertype ref,
+ int from: @usertype ref
+);
+class_template_argument(
+ int type_id: @usertype ref,
+ int index: int ref,
+ int arg_type: @type ref
+);
+class_template_argument_value(
+ int type_id: @usertype ref,
+ int index: int ref,
+ int arg_value: @expr ref
+);
+
+is_proxy_class_for(
+ unique int id: @usertype ref,
+ unique int templ_param_id: @usertype ref
+);
+
+type_mentions(
+ unique int id: @type_mention,
+ int type_id: @type ref,
+ int location: @location ref,
+ // a_symbol_reference_kind from the EDG frontend. See symbol_ref.h there.
+ int kind: int ref
+);
+
+is_function_template(unique int id: @function ref);
+function_instantiation(
+ unique int to: @function ref,
+ int from: @function ref
+);
+function_template_argument(
+ int function_id: @function ref,
+ int index: int ref,
+ int arg_type: @type ref
+);
+function_template_argument_value(
+ int function_id: @function ref,
+ int index: int ref,
+ int arg_value: @expr ref
+);
+
+is_variable_template(unique int id: @variable ref);
+variable_instantiation(
+ unique int to: @variable ref,
+ int from: @variable ref
+);
+variable_template_argument(
+ int variable_id: @variable ref,
+ int index: int ref,
+ int arg_type: @type ref
+);
+variable_template_argument_value(
+ int variable_id: @variable ref,
+ int index: int ref,
+ int arg_value: @expr ref
+);
+
+/*
+ Fixed point types
+ precision(1) = short, precision(2) = default, precision(3) = long
+ is_unsigned(1) = unsigned is_unsigned(2) = signed
+ is_fract_type(1) = declared with _Fract
+ saturating(1) = declared with _Sat
+*/
+/* TODO
+fixedpointtypes(
+ unique int id: @fixedpointtype,
+ int precision: int ref,
+ int is_unsigned: int ref,
+ int is_fract_type: int ref,
+ int saturating: int ref);
+*/
+
+routinetypes(
+ unique int id: @routinetype,
+ int return_type: @type ref
+);
+
+routinetypeargs(
+ int routine: @routinetype ref,
+ int index: int ref,
+ int type_id: @type ref
+);
+
+ptrtomembers(
+ unique int id: @ptrtomember,
+ int type_id: @type ref,
+ int class_id: @type ref
+);
+
+/*
+ specifiers for types, functions, and variables
+
+ "public",
+ "protected",
+ "private",
+
+ "const",
+ "volatile",
+ "static",
+
+ "pure",
+ "virtual",
+ "sealed", // Microsoft
+ "__interface", // Microsoft
+ "inline",
+ "explicit",
+
+ "near", // near far extension
+ "far", // near far extension
+ "__ptr32", // Microsoft
+ "__ptr64", // Microsoft
+ "__sptr", // Microsoft
+ "__uptr", // Microsoft
+ "dllimport", // Microsoft
+ "dllexport", // Microsoft
+ "thread", // Microsoft
+ "naked", // Microsoft
+ "microsoft_inline", // Microsoft
+ "forceinline", // Microsoft
+ "selectany", // Microsoft
+ "nothrow", // Microsoft
+ "novtable", // Microsoft
+ "noreturn", // Microsoft
+ "noinline", // Microsoft
+ "noalias", // Microsoft
+ "restrict", // Microsoft
+*/
+
+specifiers(
+ unique int id: @specifier,
+ unique string str: string ref
+);
+
+typespecifiers(
+ int type_id: @type ref,
+ int spec_id: @specifier ref
+);
+
+funspecifiers(
+ int func_id: @function ref,
+ int spec_id: @specifier ref
+);
+
+varspecifiers(
+ int var_id: @accessible ref,
+ int spec_id: @specifier ref
+);
+
+attributes(
+ unique int id: @attribute,
+ int kind: int ref,
+ string name: string ref,
+ string name_space: string ref,
+ int location: @location_default ref
+);
+
+case @attribute.kind of
+ 0 = @gnuattribute
+| 1 = @stdattribute
+| 2 = @declspec
+| 3 = @msattribute
+| 4 = @alignas
+// ... 5 @objc_propertyattribute deprecated
+;
+
+attribute_args(
+ unique int id: @attribute_arg,
+ int kind: int ref,
+ int attribute: @attribute ref,
+ int index: int ref,
+ int location: @location_default ref
+);
+
+case @attribute_arg.kind of
+ 0 = @attribute_arg_empty
+| 1 = @attribute_arg_token
+| 2 = @attribute_arg_constant
+| 3 = @attribute_arg_type
+;
+
+attribute_arg_value(
+ unique int arg: @attribute_arg ref,
+ string value: string ref
+);
+attribute_arg_type(
+ unique int arg: @attribute_arg ref,
+ int type_id: @type ref
+);
+attribute_arg_name(
+ unique int arg: @attribute_arg ref,
+ string name: string ref
+);
+
+typeattributes(
+ int type_id: @type ref,
+ int spec_id: @attribute ref
+);
+
+funcattributes(
+ int func_id: @function ref,
+ int spec_id: @attribute ref
+);
+
+varattributes(
+ int var_id: @accessible ref,
+ int spec_id: @attribute ref
+);
+
+stmtattributes(
+ int stmt_id: @stmt ref,
+ int spec_id: @attribute ref
+);
+
+@type = @builtintype
+ | @derivedtype
+ | @usertype
+ /* TODO | @fixedpointtype */
+ | @routinetype
+ | @ptrtomember
+ | @decltype;
+
+unspecifiedtype(
+ unique int type_id: @type ref,
+ int unspecified_type_id: @type ref
+);
+
+member(
+ int parent: @type ref,
+ int index: int ref,
+ int child: @member ref
+);
+
+@enclosingfunction_child = @usertype | @variable | @namespace
+
+enclosingfunction(
+ unique int child: @enclosingfunction_child ref,
+ int parent: @function ref
+);
+
+derivations(
+ unique int derivation: @derivation,
+ int sub: @type ref,
+ int index: int ref,
+ int super: @type ref,
+ int location: @location_default ref
+);
+
+derspecifiers(
+ int der_id: @derivation ref,
+ int spec_id: @specifier ref
+);
+
+/**
+ * Contains the byte offset of the base class subobject within the derived
+ * class. Only holds for non-virtual base classes, but see table
+ * `virtual_base_offsets` for offsets of virtual base class subobjects.
+ */
+direct_base_offsets(
+ unique int der_id: @derivation ref,
+ int offset: int ref
+);
+
+/**
+ * Contains the byte offset of the virtual base class subobject for class
+ * `super` within a most-derived object of class `sub`. `super` can be either a
+ * direct or indirect base class.
+ */
+#keyset[sub, super]
+virtual_base_offsets(
+ int sub: @usertype ref,
+ int super: @usertype ref,
+ int offset: int ref
+);
+
+frienddecls(
+ unique int id: @frienddecl,
+ int type_id: @type ref,
+ int decl_id: @declaration ref,
+ int location: @location_default ref
+);
+
+@declaredtype = @usertype ;
+
+@declaration = @function
+ | @declaredtype
+ | @variable
+ | @enumconstant
+ | @frienddecl;
+
+@member = @membervariable
+ | @function
+ | @declaredtype
+ | @enumconstant;
+
+@locatable = @diagnostic
+ | @declaration
+ | @ppd_include
+ | @ppd_define
+ | @macroinvocation
+ /*| @funcall*/
+ | @xmllocatable
+ | @attribute
+ | @attribute_arg;
+
+@namedscope = @namespace | @usertype;
+
+@element = @locatable
+ | @file
+ | @folder
+ | @specifier
+ | @type
+ | @expr
+ | @namespace
+ | @initialiser
+ | @stmt
+ | @derivation
+ | @comment
+ | @preprocdirect
+ | @fun_decl
+ | @var_decl
+ | @type_decl
+ | @namespace_decl
+ | @using
+ | @namequalifier
+ | @specialnamequalifyingelement
+ | @static_assert
+ | @type_mention
+ | @lambdacapture;
+
+@exprparent = @element;
+
+comments(
+ unique int id: @comment,
+ string contents: string ref,
+ int location: @location_default ref
+);
+
+commentbinding(
+ int id: @comment ref,
+ int element: @element ref
+);
+
+exprconv(
+ int converted: @expr ref,
+ unique int conversion: @expr ref
+);
+
+compgenerated(unique int id: @element ref);
+
+/**
+ * `destructor_call` destructs the `i`'th entity that should be
+ * destructed following `element`. Note that entities should be
+ * destructed in reverse construction order, so for a given `element`
+ * these should be called from highest to lowest `i`.
+ */
+#keyset[element, destructor_call]
+#keyset[element, i]
+synthetic_destructor_call(
+ int element: @element ref,
+ int i: int ref,
+ int destructor_call: @routineexpr ref
+);
+
+namespaces(
+ unique int id: @namespace,
+ string name: string ref
+);
+
+namespace_inline(
+ unique int id: @namespace ref
+);
+
+namespacembrs(
+ int parentid: @namespace ref,
+ unique int memberid: @namespacembr ref
+);
+
+@namespacembr = @declaration | @namespace;
+
+exprparents(
+ int expr_id: @expr ref,
+ int child_index: int ref,
+ int parent_id: @exprparent ref
+);
+
+expr_isload(unique int expr_id: @expr ref);
+
+@cast = @c_style_cast
+ | @const_cast
+ | @dynamic_cast
+ | @reinterpret_cast
+ | @static_cast
+ ;
+
+/*
+case @conversion.kind of
+ 0 = @simple_conversion // a numeric conversion, qualification conversion, or a reinterpret_cast
+| 1 = @bool_conversion // conversion to 'bool'
+| 2 = @base_class_conversion // a derived-to-base conversion
+| 3 = @derived_class_conversion // a base-to-derived conversion
+| 4 = @pm_base_class_conversion // a derived-to-base conversion of a pointer to member
+| 5 = @pm_derived_class_conversion // a base-to-derived conversion of a pointer to member
+| 6 = @glvalue_adjust // an adjustment of the type of a glvalue
+| 7 = @prvalue_adjust // an adjustment of the type of a prvalue
+;
+*/
+/**
+ * Describes the semantics represented by a cast expression. This is largely
+ * independent of the source syntax of the cast, so it is separate from the
+ * regular expression kind.
+ */
+conversionkinds(
+ unique int expr_id: @cast ref,
+ int kind: int ref
+);
+
+@conversion = @cast
+ | @array_to_pointer
+ | @parexpr
+ | @reference_to
+ | @ref_indirect
+ | @temp_init
+ ;
+
+/*
+case @funbindexpr.kind of
+ 0 = @normal_call // a normal call
+| 1 = @virtual_call // a virtual call
+| 2 = @adl_call // a call whose target is only found by ADL
+;
+*/
+iscall(unique int caller: @funbindexpr ref, int kind: int ref);
+
+numtemplatearguments(
+ unique int expr_id: @expr ref,
+ int num: int ref
+);
+
+specialnamequalifyingelements(
+ unique int id: @specialnamequalifyingelement,
+ unique string name: string ref
+);
+
+@namequalifiableelement = @expr | @namequalifier;
+@namequalifyingelement = @namespace
+ | @specialnamequalifyingelement
+ | @usertype;
+
+namequalifiers(
+ unique int id: @namequalifier,
+ unique int qualifiableelement: @namequalifiableelement ref,
+ int qualifyingelement: @namequalifyingelement ref,
+ int location: @location_default ref
+);
+
+varbind(
+ int expr: @varbindexpr ref,
+ int var: @accessible ref
+);
+
+funbind(
+ int expr: @funbindexpr ref,
+ int fun: @function ref
+);
+
+@any_new_expr = @new_expr
+ | @new_array_expr;
+
+@new_or_delete_expr = @any_new_expr
+ | @delete_expr
+ | @delete_array_expr;
+
+@prefix_crement_expr = @preincrexpr | @predecrexpr;
+
+@postfix_crement_expr = @postincrexpr | @postdecrexpr;
+
+@increment_expr = @preincrexpr | @postincrexpr;
+
+@decrement_expr = @predecrexpr | @postdecrexpr;
+
+@crement_expr = @increment_expr | @decrement_expr;
+
+@un_arith_op_expr = @arithnegexpr
+ | @unaryplusexpr
+ | @conjugation
+ | @realpartexpr
+ | @imagpartexpr
+ | @crement_expr
+ ;
+
+@un_bitwise_op_expr = @complementexpr;
+
+@un_log_op_expr = @notexpr;
+
+@un_op_expr = @address_of
+ | @indirect
+ | @un_arith_op_expr
+ | @un_bitwise_op_expr
+ | @builtinaddressof
+ | @vec_fill
+ | @un_log_op_expr
+ | @co_await
+ | @co_yield
+ ;
+
+@bin_log_op_expr = @andlogicalexpr | @orlogicalexpr;
+
+@cmp_op_expr = @eq_op_expr | @rel_op_expr;
+
+@eq_op_expr = @eqexpr | @neexpr;
+
+@rel_op_expr = @gtexpr
+ | @ltexpr
+ | @geexpr
+ | @leexpr
+ | @spaceshipexpr
+ ;
+
+@bin_bitwise_op_expr = @lshiftexpr
+ | @rshiftexpr
+ | @andexpr
+ | @orexpr
+ | @xorexpr
+ ;
+
+@p_arith_op_expr = @paddexpr
+ | @psubexpr
+ | @pdiffexpr
+ ;
+
+@bin_arith_op_expr = @addexpr
+ | @subexpr
+ | @mulexpr
+ | @divexpr
+ | @remexpr
+ | @jmulexpr
+ | @jdivexpr
+ | @fjaddexpr
+ | @jfaddexpr
+ | @fjsubexpr
+ | @jfsubexpr
+ | @minexpr
+ | @maxexpr
+ | @p_arith_op_expr
+ ;
+
+@bin_op_expr = @bin_arith_op_expr
+ | @bin_bitwise_op_expr
+ | @cmp_op_expr
+ | @bin_log_op_expr
+ ;
+
+@op_expr = @un_op_expr
+ | @bin_op_expr
+ | @assign_expr
+ | @conditionalexpr
+ ;
+
+@assign_arith_expr = @assignaddexpr
+ | @assignsubexpr
+ | @assignmulexpr
+ | @assigndivexpr
+ | @assignremexpr
+ ;
+
+@assign_bitwise_expr = @assignandexpr
+ | @assignorexpr
+ | @assignxorexpr
+ | @assignlshiftexpr
+ | @assignrshiftexpr
+ | @assignpaddexpr
+ | @assignpsubexpr
+ ;
+
+@assign_op_expr = @assign_arith_expr | @assign_bitwise_expr
+
+@assign_expr = @assignexpr | @assign_op_expr
+
+/*
+ case @allocator.form of
+ 0 = plain
+ | 1 = alignment
+ ;
+*/
+
+/**
+ * The allocator function associated with a `new` or `new[]` expression.
+ * The `form` column specified whether the allocation call contains an alignment
+ * argument.
+ */
+expr_allocator(
+ unique int expr: @any_new_expr ref,
+ int func: @function ref,
+ int form: int ref
+);
+
+/*
+ case @deallocator.form of
+ 0 = plain
+ | 1 = size
+ | 2 = alignment
+ | 3 = size_and_alignment
+ ;
+*/
+
+/**
+ * The deallocator function associated with a `delete`, `delete[]`, `new`, or
+ * `new[]` expression. For a `new` or `new[]` expression, the deallocator is the
+ * one used to free memory if the initialization throws an exception.
+ * The `form` column specifies whether the deallocation call contains a size
+ * argument, and alignment argument, or both.
+ */
+expr_deallocator(
+ unique int expr: @new_or_delete_expr ref,
+ int func: @function ref,
+ int form: int ref
+);
+
+/**
+ * Holds if the `@conditionalexpr` is of the two operand form
+ * `guard ? : false`.
+ */
+expr_cond_two_operand(
+ unique int cond: @conditionalexpr ref
+);
+
+/**
+ * The guard of `@conditionalexpr` `guard ? true : false`
+ */
+expr_cond_guard(
+ unique int cond: @conditionalexpr ref,
+ int guard: @expr ref
+);
+
+/**
+ * The expression used when the guard of `@conditionalexpr`
+ * `guard ? true : false` holds. For the two operand form
+ * `guard ?: false` consider using `expr_cond_guard` instead.
+ */
+expr_cond_true(
+ unique int cond: @conditionalexpr ref,
+ int true: @expr ref
+);
+
+/**
+ * The expression used when the guard of `@conditionalexpr`
+ * `guard ? true : false` does not hold.
+ */
+expr_cond_false(
+ unique int cond: @conditionalexpr ref,
+ int false: @expr ref
+);
+
+/** A string representation of the value. */
+values(
+ unique int id: @value,
+ string str: string ref
+);
+
+/** The actual text in the source code for the value, if any. */
+valuetext(
+ unique int id: @value ref,
+ string text: string ref
+);
+
+valuebind(
+ int val: @value ref,
+ unique int expr: @expr ref
+);
+
+fieldoffsets(
+ unique int id: @variable ref,
+ int byteoffset: int ref,
+ int bitoffset: int ref
+);
+
+bitfield(
+ unique int id: @variable ref,
+ int bits: int ref,
+ int declared_bits: int ref
+);
+
+/* TODO
+memberprefix(
+ int member: @expr ref,
+ int prefix: @expr ref
+);
+*/
+
+/*
+ kind(1) = mbrcallexpr
+ kind(2) = mbrptrcallexpr
+ kind(3) = mbrptrmbrcallexpr
+ kind(4) = ptrmbrptrmbrcallexpr
+ kind(5) = mbrreadexpr // x.y
+ kind(6) = mbrptrreadexpr // p->y
+ kind(7) = mbrptrmbrreadexpr // x.*pm
+ kind(8) = mbrptrmbrptrreadexpr // x->*pm
+ kind(9) = staticmbrreadexpr // static x.y
+ kind(10) = staticmbrptrreadexpr // static p->y
+*/
+/* TODO
+memberaccess(
+ int member: @expr ref,
+ int kind: int ref
+);
+*/
+
+initialisers(
+ unique int init: @initialiser,
+ int var: @accessible ref,
+ unique int expr: @expr ref,
+ int location: @location_expr ref
+);
+
+/**
+ * An ancestor for the expression, for cases in which we cannot
+ * otherwise find the expression's parent.
+ */
+expr_ancestor(
+ int exp: @expr ref,
+ int ancestor: @element ref
+);
+
+exprs(
+ unique int id: @expr,
+ int kind: int ref,
+ int location: @location_expr ref
+);
+
+/*
+ case @value.category of
+ 1 = prval
+ | 2 = xval
+ | 3 = lval
+ ;
+*/
+expr_types(
+ int id: @expr ref,
+ int typeid: @type ref,
+ int value_category: int ref
+);
+
+case @expr.kind of
+ 1 = @errorexpr
+| 2 = @address_of // & AddressOfExpr
+| 3 = @reference_to // ReferenceToExpr (implicit?)
+| 4 = @indirect // * PointerDereferenceExpr
+| 5 = @ref_indirect // ReferenceDereferenceExpr (implicit?)
+// ...
+| 8 = @array_to_pointer // (???)
+| 9 = @vacuous_destructor_call // VacuousDestructorCall
+// ...
+| 11 = @assume // Microsoft
+| 12 = @parexpr
+| 13 = @arithnegexpr
+| 14 = @unaryplusexpr
+| 15 = @complementexpr
+| 16 = @notexpr
+| 17 = @conjugation // GNU ~ operator
+| 18 = @realpartexpr // GNU __real
+| 19 = @imagpartexpr // GNU __imag
+| 20 = @postincrexpr
+| 21 = @postdecrexpr
+| 22 = @preincrexpr
+| 23 = @predecrexpr
+| 24 = @conditionalexpr
+| 25 = @addexpr
+| 26 = @subexpr
+| 27 = @mulexpr
+| 28 = @divexpr
+| 29 = @remexpr
+| 30 = @jmulexpr // C99 mul imaginary
+| 31 = @jdivexpr // C99 div imaginary
+| 32 = @fjaddexpr // C99 add real + imaginary
+| 33 = @jfaddexpr // C99 add imaginary + real
+| 34 = @fjsubexpr // C99 sub real - imaginary
+| 35 = @jfsubexpr // C99 sub imaginary - real
+| 36 = @paddexpr // pointer add (pointer + int or int + pointer)
+| 37 = @psubexpr // pointer sub (pointer - integer)
+| 38 = @pdiffexpr // difference between two pointers
+| 39 = @lshiftexpr
+| 40 = @rshiftexpr
+| 41 = @andexpr
+| 42 = @orexpr
+| 43 = @xorexpr
+| 44 = @eqexpr
+| 45 = @neexpr
+| 46 = @gtexpr
+| 47 = @ltexpr
+| 48 = @geexpr
+| 49 = @leexpr
+| 50 = @minexpr // GNU minimum
+| 51 = @maxexpr // GNU maximum
+| 52 = @assignexpr
+| 53 = @assignaddexpr
+| 54 = @assignsubexpr
+| 55 = @assignmulexpr
+| 56 = @assigndivexpr
+| 57 = @assignremexpr
+| 58 = @assignlshiftexpr
+| 59 = @assignrshiftexpr
+| 60 = @assignandexpr
+| 61 = @assignorexpr
+| 62 = @assignxorexpr
+| 63 = @assignpaddexpr // assign pointer add
+| 64 = @assignpsubexpr // assign pointer sub
+| 65 = @andlogicalexpr
+| 66 = @orlogicalexpr
+| 67 = @commaexpr
+| 68 = @subscriptexpr // access to member of an array, e.g., a[5]
+// ... 69 @objc_subscriptexpr deprecated
+// ... 70 @cmdaccess deprecated
+// ...
+| 73 = @virtfunptrexpr
+| 74 = @callexpr
+// ... 75 @msgexpr_normal deprecated
+// ... 76 @msgexpr_super deprecated
+// ... 77 @atselectorexpr deprecated
+// ... 78 @atprotocolexpr deprecated
+| 79 = @vastartexpr
+| 80 = @vaargexpr
+| 81 = @vaendexpr
+| 82 = @vacopyexpr
+// ... 83 @atencodeexpr deprecated
+| 84 = @varaccess
+| 85 = @thisaccess
+// ... 86 @objc_box_expr deprecated
+| 87 = @new_expr
+| 88 = @delete_expr
+| 89 = @throw_expr
+| 90 = @condition_decl // a variable declared in a condition, e.g., if(int x = y > 2)
+| 91 = @braced_init_list
+| 92 = @type_id
+| 93 = @runtime_sizeof
+| 94 = @runtime_alignof
+| 95 = @sizeof_pack
+| 96 = @expr_stmt // GNU extension
+| 97 = @routineexpr
+| 98 = @type_operand // used to access a type in certain contexts (haven't found any examples yet....)
+| 99 = @offsetofexpr // offsetof ::= type and field
+| 100 = @hasassignexpr // __has_assign ::= type
+| 101 = @hascopyexpr // __has_copy ::= type
+| 102 = @hasnothrowassign // __has_nothrow_assign ::= type
+| 103 = @hasnothrowconstr // __has_nothrow_constructor ::= type
+| 104 = @hasnothrowcopy // __has_nothrow_copy ::= type
+| 105 = @hastrivialassign // __has_trivial_assign ::= type
+| 106 = @hastrivialconstr // __has_trivial_constructor ::= type
+| 107 = @hastrivialcopy // __has_trivial_copy ::= type
+| 108 = @hasuserdestr // __has_user_destructor ::= type
+| 109 = @hasvirtualdestr // __has_virtual_destructor ::= type
+| 110 = @isabstractexpr // __is_abstract ::= type
+| 111 = @isbaseofexpr // __is_base_of ::= type type
+| 112 = @isclassexpr // __is_class ::= type
+| 113 = @isconvtoexpr // __is_convertible_to ::= type type
+| 114 = @isemptyexpr // __is_empty ::= type
+| 115 = @isenumexpr // __is_enum ::= type
+| 116 = @ispodexpr // __is_pod ::= type
+| 117 = @ispolyexpr // __is_polymorphic ::= type
+| 118 = @isunionexpr // __is_union ::= type
+| 119 = @typescompexpr // GNU __builtin_types_compatible ::= type type
+| 120 = @intaddrexpr // EDG internal builtin, used to implement offsetof
+// ...
+| 122 = @hastrivialdestructor // __has_trivial_destructor ::= type
+| 123 = @literal
+| 124 = @uuidof
+| 127 = @aggregateliteral
+| 128 = @delete_array_expr
+| 129 = @new_array_expr
+// ... 130 @objc_array_literal deprecated
+// ... 131 @objc_dictionary_literal deprecated
+| 132 = @foldexpr
+// ...
+| 200 = @ctordirectinit
+| 201 = @ctorvirtualinit
+| 202 = @ctorfieldinit
+| 203 = @ctordelegatinginit
+| 204 = @dtordirectdestruct
+| 205 = @dtorvirtualdestruct
+| 206 = @dtorfielddestruct
+// ...
+| 210 = @static_cast
+| 211 = @reinterpret_cast
+| 212 = @const_cast
+| 213 = @dynamic_cast
+| 214 = @c_style_cast
+| 215 = @lambdaexpr
+| 216 = @param_ref
+| 217 = @noopexpr
+// ...
+| 294 = @istriviallyconstructibleexpr
+| 295 = @isdestructibleexpr
+| 296 = @isnothrowdestructibleexpr
+| 297 = @istriviallydestructibleexpr
+| 298 = @istriviallyassignableexpr
+| 299 = @isnothrowassignableexpr
+| 300 = @istrivialexpr
+| 301 = @isstandardlayoutexpr
+| 302 = @istriviallycopyableexpr
+| 303 = @isliteraltypeexpr
+| 304 = @hastrivialmoveconstructorexpr
+| 305 = @hastrivialmoveassignexpr
+| 306 = @hasnothrowmoveassignexpr
+| 307 = @isconstructibleexpr
+| 308 = @isnothrowconstructibleexpr
+| 309 = @hasfinalizerexpr
+| 310 = @isdelegateexpr
+| 311 = @isinterfaceclassexpr
+| 312 = @isrefarrayexpr
+| 313 = @isrefclassexpr
+| 314 = @issealedexpr
+| 315 = @issimplevalueclassexpr
+| 316 = @isvalueclassexpr
+| 317 = @isfinalexpr
+| 319 = @noexceptexpr
+| 320 = @builtinshufflevector
+| 321 = @builtinchooseexpr
+| 322 = @builtinaddressof
+| 323 = @vec_fill
+| 324 = @builtinconvertvector
+| 325 = @builtincomplex
+| 326 = @spaceshipexpr
+| 327 = @co_await
+| 328 = @co_yield
+| 329 = @temp_init
+;
+
+@var_args_expr = @vastartexpr
+ | @vaendexpr
+ | @vaargexpr
+ | @vacopyexpr
+ ;
+
+@builtin_op = @var_args_expr
+ | @noopexpr
+ | @offsetofexpr
+ | @intaddrexpr
+ | @hasassignexpr
+ | @hascopyexpr
+ | @hasnothrowassign
+ | @hasnothrowconstr
+ | @hasnothrowcopy
+ | @hastrivialassign
+ | @hastrivialconstr
+ | @hastrivialcopy
+ | @hastrivialdestructor
+ | @hasuserdestr
+ | @hasvirtualdestr
+ | @isabstractexpr
+ | @isbaseofexpr
+ | @isclassexpr
+ | @isconvtoexpr
+ | @isemptyexpr
+ | @isenumexpr
+ | @ispodexpr
+ | @ispolyexpr
+ | @isunionexpr
+ | @typescompexpr
+ | @builtinshufflevector
+ | @builtinconvertvector
+ | @builtinaddressof
+ | @istriviallyconstructibleexpr
+ | @isdestructibleexpr
+ | @isnothrowdestructibleexpr
+ | @istriviallydestructibleexpr
+ | @istriviallyassignableexpr
+ | @isnothrowassignableexpr
+ | @isstandardlayoutexpr
+ | @istriviallycopyableexpr
+ | @isliteraltypeexpr
+ | @hastrivialmoveconstructorexpr
+ | @hastrivialmoveassignexpr
+ | @hasnothrowmoveassignexpr
+ | @isconstructibleexpr
+ | @isnothrowconstructibleexpr
+ | @hasfinalizerexpr
+ | @isdelegateexpr
+ | @isinterfaceclassexpr
+ | @isrefarrayexpr
+ | @isrefclassexpr
+ | @issealedexpr
+ | @issimplevalueclassexpr
+ | @isvalueclassexpr
+ | @isfinalexpr
+ | @builtinchooseexpr
+ | @builtincomplex
+ ;
+
+new_allocated_type(
+ unique int expr: @new_expr ref,
+ int type_id: @type ref
+);
+
+new_array_allocated_type(
+ unique int expr: @new_array_expr ref,
+ int type_id: @type ref
+);
+
+/**
+ * The field being initialized by an initializer expression within an aggregate
+ * initializer for a class/struct/union.
+ */
+#keyset[aggregate, field]
+aggregate_field_init(
+ int aggregate: @aggregateliteral ref,
+ int initializer: @expr ref,
+ int field: @membervariable ref
+);
+
+/**
+ * The index of the element being initialized by an initializer expression
+ * within an aggregate initializer for an array.
+ */
+#keyset[aggregate, element_index]
+aggregate_array_init(
+ int aggregate: @aggregateliteral ref,
+ int initializer: @expr ref,
+ int element_index: int ref
+);
+
+@ctorinit = @ctordirectinit
+ | @ctorvirtualinit
+ | @ctorfieldinit
+ | @ctordelegatinginit;
+@dtordestruct = @dtordirectdestruct
+ | @dtorvirtualdestruct
+ | @dtorfielddestruct;
+
+
+condition_decl_bind(
+ unique int expr: @condition_decl ref,
+ unique int decl: @declaration ref
+);
+
+typeid_bind(
+ unique int expr: @type_id ref,
+ int type_id: @type ref
+);
+
+uuidof_bind(
+ unique int expr: @uuidof ref,
+ int type_id: @type ref
+);
+
+@runtime_sizeof_or_alignof = @runtime_sizeof | @runtime_alignof;
+
+sizeof_bind(
+ unique int expr: @runtime_sizeof_or_alignof ref,
+ int type_id: @type ref
+);
+
+code_block(
+ unique int block: @literal ref,
+ unique int routine: @function ref
+);
+
+lambdas(
+ unique int expr: @lambdaexpr ref,
+ string default_capture: string ref,
+ boolean has_explicit_return_type: boolean ref
+);
+
+lambda_capture(
+ unique int id: @lambdacapture,
+ int lambda: @lambdaexpr ref,
+ int index: int ref,
+ int field: @membervariable ref,
+ boolean captured_by_reference: boolean ref,
+ boolean is_implicit: boolean ref,
+ int location: @location_default ref
+);
+
+@funbindexpr = @routineexpr
+ | @new_expr
+ | @delete_expr
+ | @delete_array_expr
+ | @ctordirectinit
+ | @ctorvirtualinit
+ | @ctordelegatinginit
+ | @dtordirectdestruct
+ | @dtorvirtualdestruct;
+
+@varbindexpr = @varaccess | @ctorfieldinit | @dtorfielddestruct;
+@addressable = @function | @variable ;
+@accessible = @addressable | @enumconstant ;
+
+@access = @varaccess | @routineexpr ;
+
+fold(
+ int expr: @foldexpr ref,
+ string operator: string ref,
+ boolean is_left_fold: boolean ref
+);
+
+stmts(
+ unique int id: @stmt,
+ int kind: int ref,
+ int location: @location_stmt ref
+);
+
+case @stmt.kind of
+ 1 = @stmt_expr
+| 2 = @stmt_if
+| 3 = @stmt_while
+| 4 = @stmt_goto
+| 5 = @stmt_label
+| 6 = @stmt_return
+| 7 = @stmt_block
+| 8 = @stmt_end_test_while // do { ... } while ( ... )
+| 9 = @stmt_for
+| 10 = @stmt_switch_case
+| 11 = @stmt_switch
+| 13 = @stmt_asm // "asm" statement or the body of an asm function
+| 15 = @stmt_try_block
+| 16 = @stmt_microsoft_try // Microsoft
+| 17 = @stmt_decl
+| 18 = @stmt_set_vla_size // C99
+| 19 = @stmt_vla_decl // C99
+| 25 = @stmt_assigned_goto // GNU
+| 26 = @stmt_empty
+| 27 = @stmt_continue
+| 28 = @stmt_break
+| 29 = @stmt_range_based_for // C++11
+// ... 30 @stmt_at_autoreleasepool_block deprecated
+// ... 31 @stmt_objc_for_in deprecated
+// ... 32 @stmt_at_synchronized deprecated
+| 33 = @stmt_handler
+// ... 34 @stmt_finally_end deprecated
+| 35 = @stmt_constexpr_if
+| 37 = @stmt_co_return
+;
+
+type_vla(
+ int type_id: @type ref,
+ int decl: @stmt_vla_decl ref
+);
+
+variable_vla(
+ int var: @variable ref,
+ int decl: @stmt_vla_decl ref
+);
+
+if_then(
+ unique int if_stmt: @stmt_if ref,
+ int then_id: @stmt ref
+);
+
+if_else(
+ unique int if_stmt: @stmt_if ref,
+ int else_id: @stmt ref
+);
+
+constexpr_if_then(
+ unique int constexpr_if_stmt: @stmt_constexpr_if ref,
+ int then_id: @stmt ref
+);
+
+constexpr_if_else(
+ unique int constexpr_if_stmt: @stmt_constexpr_if ref,
+ int else_id: @stmt ref
+);
+
+while_body(
+ unique int while_stmt: @stmt_while ref,
+ int body_id: @stmt ref
+);
+
+do_body(
+ unique int do_stmt: @stmt_end_test_while ref,
+ int body_id: @stmt ref
+);
+
+#keyset[switch_stmt, index]
+switch_case(
+ int switch_stmt: @stmt_switch ref,
+ int index: int ref,
+ int case_id: @stmt_switch_case ref
+);
+
+switch_body(
+ unique int switch_stmt: @stmt_switch ref,
+ int body_id: @stmt ref
+);
+
+for_initialization(
+ unique int for_stmt: @stmt_for ref,
+ int init_id: @stmt ref
+);
+
+for_condition(
+ unique int for_stmt: @stmt_for ref,
+ int condition_id: @expr ref
+);
+
+for_update(
+ unique int for_stmt: @stmt_for ref,
+ int update_id: @expr ref
+);
+
+for_body(
+ unique int for_stmt: @stmt_for ref,
+ int body_id: @stmt ref
+);
+
+@stmtparent = @stmt | @expr_stmt ;
+stmtparents(
+ unique int id: @stmt ref,
+ int index: int ref,
+ int parent: @stmtparent ref
+);
+
+ishandler(unique int block: @stmt_block ref);
+
+@cfgnode = @stmt | @expr | @function | @initialiser ;
+
+stmt_decl_bind(
+ int stmt: @stmt_decl ref,
+ int num: int ref,
+ int decl: @declaration ref
+);
+
+stmt_decl_entry_bind(
+ int stmt: @stmt_decl ref,
+ int num: int ref,
+ int decl_entry: @element ref
+);
+
+@functionorblock = @function | @stmt_block;
+
+blockscope(
+ unique int block: @stmt_block ref,
+ int enclosing: @functionorblock ref
+);
+
+@jump = @stmt_goto | @stmt_break | @stmt_continue;
+
+@jumporlabel = @jump | @stmt_label | @literal;
+
+jumpinfo(
+ unique int id: @jumporlabel ref,
+ string str: string ref,
+ int target: @stmt ref
+);
+
+preprocdirects(
+ unique int id: @preprocdirect,
+ int kind: int ref,
+ int location: @location_default ref
+);
+case @preprocdirect.kind of
+ 0 = @ppd_if
+| 1 = @ppd_ifdef
+| 2 = @ppd_ifndef
+| 3 = @ppd_elif
+| 4 = @ppd_else
+| 5 = @ppd_endif
+| 6 = @ppd_plain_include
+| 7 = @ppd_define
+| 8 = @ppd_undef
+| 9 = @ppd_line
+| 10 = @ppd_error
+| 11 = @ppd_pragma
+| 12 = @ppd_objc_import
+| 13 = @ppd_include_next
+| 18 = @ppd_warning
+;
+
+@ppd_include = @ppd_plain_include | @ppd_objc_import | @ppd_include_next;
+
+@ppd_branch = @ppd_if | @ppd_ifdef | @ppd_ifndef | @ppd_elif;
+
+preprocpair(
+ int begin : @ppd_branch ref,
+ int elseelifend : @preprocdirect ref
+);
+
+preproctrue(int branch : @ppd_branch ref);
+preprocfalse(int branch : @ppd_branch ref);
+
+preproctext(
+ unique int id: @preprocdirect ref,
+ string head: string ref,
+ string body: string ref
+);
+
+includes(
+ unique int id: @ppd_include ref,
+ int included: @file ref
+);
+
+link_targets(
+ unique int id: @link_target,
+ int binary: @file ref
+);
+
+link_parent(
+ int element : @element ref,
+ int link_target : @link_target ref
+);
+
+/* XML Files */
+
+xmlEncoding(unique int id: @file ref, string encoding: string ref);
+
+xmlDTDs(
+ unique int id: @xmldtd,
+ string root: string ref,
+ string publicId: string ref,
+ string systemId: string ref,
+ int fileid: @file ref
+);
+
+xmlElements(
+ unique int id: @xmlelement,
+ string name: string ref,
+ int parentid: @xmlparent ref,
+ int idx: int ref,
+ int fileid: @file ref
+);
+
+xmlAttrs(
+ unique int id: @xmlattribute,
+ int elementid: @xmlelement ref,
+ string name: string ref,
+ string value: string ref,
+ int idx: int ref,
+ int fileid: @file ref
+);
+
+xmlNs(
+ int id: @xmlnamespace,
+ string prefixName: string ref,
+ string URI: string ref,
+ int fileid: @file ref
+);
+
+xmlHasNs(
+ int elementId: @xmlnamespaceable ref,
+ int nsId: @xmlnamespace ref,
+ int fileid: @file ref
+);
+
+xmlComments(
+ unique int id: @xmlcomment,
+ string text: string ref,
+ int parentid: @xmlparent ref,
+ int fileid: @file ref
+);
+
+xmlChars(
+ unique int id: @xmlcharacters,
+ string text: string ref,
+ int parentid: @xmlparent ref,
+ int idx: int ref,
+ int isCDATA: int ref,
+ int fileid: @file ref
+);
+
+@xmlparent = @file | @xmlelement;
+@xmlnamespaceable = @xmlelement | @xmlattribute;
+
+xmllocations(
+ int xmlElement: @xmllocatable ref,
+ int location: @location_default ref
+);
+
+@xmllocatable = @xmlcharacters
+ | @xmlelement
+ | @xmlcomment
+ | @xmlattribute
+ | @xmldtd
+ | @file
+ | @xmlnamespace;
diff --git a/repo-tests/codeql/cpp/ql/lib/tutorial.qll b/repo-tests/codeql/cpp/ql/lib/tutorial.qll
new file mode 100644
index 00000000000..8cb1797a532
--- /dev/null
+++ b/repo-tests/codeql/cpp/ql/lib/tutorial.qll
@@ -0,0 +1,1207 @@
+/**
+ * This library is used in the QL detective tutorials.
+ *
+ * Note: Data is usually stored in a separate database and the QL libraries only contain predicates,
+ * but for this tutorial both the data and the predicates are stored in the library.
+ */
+class Person extends string {
+ Person() {
+ this = "Ronil" or
+ this = "Dina" or
+ this = "Ravi" or
+ this = "Bruce" or
+ this = "Jo" or
+ this = "Aida" or
+ this = "Esme" or
+ this = "Charlie" or
+ this = "Fred" or
+ this = "Meera" or
+ this = "Maya" or
+ this = "Chad" or
+ this = "Tiana" or
+ this = "Laura" or
+ this = "George" or
+ this = "Will" or
+ this = "Mary" or
+ this = "Almira" or
+ this = "Susannah" or
+ this = "Rhoda" or
+ this = "Cynthia" or
+ this = "Eunice" or
+ this = "Olive" or
+ this = "Virginia" or
+ this = "Angeline" or
+ this = "Helen" or
+ this = "Cornelia" or
+ this = "Harriet" or
+ this = "Mahala" or
+ this = "Abby" or
+ this = "Margaret" or
+ this = "Deb" or
+ this = "Minerva" or
+ this = "Severus" or
+ this = "Lavina" or
+ this = "Adeline" or
+ this = "Cath" or
+ this = "Elisa" or
+ this = "Lucretia" or
+ this = "Anne" or
+ this = "Eleanor" or
+ this = "Joanna" or
+ this = "Adam" or
+ this = "Agnes" or
+ this = "Rosanna" or
+ this = "Clara" or
+ this = "Melissa" or
+ this = "Amy" or
+ this = "Isabel" or
+ this = "Jemima" or
+ this = "Cordelia" or
+ this = "Melinda" or
+ this = "Delila" or
+ this = "Jeremiah" or
+ this = "Elijah" or
+ this = "Hester" or
+ this = "Walter" or
+ this = "Oliver" or
+ this = "Hugh" or
+ this = "Aaron" or
+ this = "Reuben" or
+ this = "Eli" or
+ this = "Amos" or
+ this = "Augustus" or
+ this = "Theodore" or
+ this = "Ira" or
+ this = "Timothy" or
+ this = "Cyrus" or
+ this = "Horace" or
+ this = "Simon" or
+ this = "Asa" or
+ this = "Frank" or
+ this = "Nelson" or
+ this = "Leonard" or
+ this = "Harrison" or
+ this = "Anthony" or
+ this = "Louis" or
+ this = "Milton" or
+ this = "Noah" or
+ this = "Cornelius" or
+ this = "Abdul" or
+ this = "Warren" or
+ this = "Harvey" or
+ this = "Dennis" or
+ this = "Wesley" or
+ this = "Sylvester" or
+ this = "Gilbert" or
+ this = "Sullivan" or
+ this = "Edmund" or
+ this = "Wilson" or
+ this = "Perry" or
+ this = "Matthew" or
+ this = "Simba" or
+ this = "Nala" or
+ this = "Rafiki" or
+ this = "Shenzi" or
+ this = "Ernest" or
+ this = "Gertrude" or
+ this = "Oscar" or
+ this = "Lilian" or
+ this = "Raymond" or
+ this = "Elgar" or
+ this = "Elmer" or
+ this = "Herbert" or
+ this = "Maude" or
+ this = "Mae" or
+ this = "Otto" or
+ this = "Edwin" or
+ this = "Ophelia" or
+ this = "Parsley" or
+ this = "Sage" or
+ this = "Rosemary" or
+ this = "Thyme" or
+ this = "Garfunkel" or
+ this = "King Basil" or
+ this = "Stephen"
+ }
+
+ /** Gets the hair color of the person. If the person is bald, there is no result. */
+ string getHairColor() {
+ this = "Ronil" and result = "black"
+ or
+ this = "Dina" and result = "black"
+ or
+ this = "Ravi" and result = "black"
+ or
+ this = "Bruce" and result = "brown"
+ or
+ this = "Jo" and result = "red"
+ or
+ this = "Aida" and result = "blond"
+ or
+ this = "Esme" and result = "blond"
+ or
+ this = "Fred" and result = "gray"
+ or
+ this = "Meera" and result = "brown"
+ or
+ this = "Maya" and result = "brown"
+ or
+ this = "Chad" and result = "brown"
+ or
+ this = "Tiana" and result = "black"
+ or
+ this = "Laura" and result = "blond"
+ or
+ this = "George" and result = "blond"
+ or
+ this = "Will" and result = "blond"
+ or
+ this = "Mary" and result = "blond"
+ or
+ this = "Almira" and result = "black"
+ or
+ this = "Susannah" and result = "blond"
+ or
+ this = "Rhoda" and result = "blond"
+ or
+ this = "Cynthia" and result = "gray"
+ or
+ this = "Eunice" and result = "white"
+ or
+ this = "Olive" and result = "brown"
+ or
+ this = "Virginia" and result = "brown"
+ or
+ this = "Angeline" and result = "red"
+ or
+ this = "Helen" and result = "white"
+ or
+ this = "Cornelia" and result = "gray"
+ or
+ this = "Harriet" and result = "white"
+ or
+ this = "Mahala" and result = "black"
+ or
+ this = "Abby" and result = "red"
+ or
+ this = "Margaret" and result = "brown"
+ or
+ this = "Deb" and result = "brown"
+ or
+ this = "Minerva" and result = "brown"
+ or
+ this = "Severus" and result = "black"
+ or
+ this = "Lavina" and result = "brown"
+ or
+ this = "Adeline" and result = "brown"
+ or
+ this = "Cath" and result = "brown"
+ or
+ this = "Elisa" and result = "brown"
+ or
+ this = "Lucretia" and result = "gray"
+ or
+ this = "Anne" and result = "black"
+ or
+ this = "Eleanor" and result = "brown"
+ or
+ this = "Joanna" and result = "brown"
+ or
+ this = "Adam" and result = "black"
+ or
+ this = "Agnes" and result = "black"
+ or
+ this = "Rosanna" and result = "gray"
+ or
+ this = "Clara" and result = "blond"
+ or
+ this = "Melissa" and result = "brown"
+ or
+ this = "Amy" and result = "brown"
+ or
+ this = "Isabel" and result = "black"
+ or
+ this = "Jemima" and result = "red"
+ or
+ this = "Cordelia" and result = "red"
+ or
+ this = "Melinda" and result = "gray"
+ or
+ this = "Delila" and result = "white"
+ or
+ this = "Jeremiah" and result = "gray"
+ or
+ this = "Hester" and result = "black"
+ or
+ this = "Walter" and result = "black"
+ or
+ this = "Aaron" and result = "gray"
+ or
+ this = "Reuben" and result = "gray"
+ or
+ this = "Eli" and result = "gray"
+ or
+ this = "Amos" and result = "white"
+ or
+ this = "Augustus" and result = "white"
+ or
+ this = "Theodore" and result = "white"
+ or
+ this = "Timothy" and result = "brown"
+ or
+ this = "Cyrus" and result = "brown"
+ or
+ this = "Horace" and result = "brown"
+ or
+ this = "Simon" and result = "brown"
+ or
+ this = "Asa" and result = "brown"
+ or
+ this = "Frank" and result = "brown"
+ or
+ this = "Nelson" and result = "black"
+ or
+ this = "Leonard" and result = "black"
+ or
+ this = "Harrison" and result = "black"
+ or
+ this = "Anthony" and result = "black"
+ or
+ this = "Louis" and result = "black"
+ or
+ this = "Milton" and result = "blond"
+ or
+ this = "Noah" and result = "blond"
+ or
+ this = "Cornelius" and result = "red"
+ or
+ this = "Abdul" and result = "brown"
+ or
+ this = "Warren" and result = "red"
+ or
+ this = "Harvey" and result = "blond"
+ or
+ this = "Dennis" and result = "blond"
+ or
+ this = "Wesley" and result = "brown"
+ or
+ this = "Sylvester" and result = "brown"
+ or
+ this = "Gilbert" and result = "brown"
+ or
+ this = "Sullivan" and result = "brown"
+ or
+ this = "Edmund" and result = "brown"
+ or
+ this = "Wilson" and result = "blond"
+ or
+ this = "Perry" and result = "black"
+ or
+ this = "Simba" and result = "brown"
+ or
+ this = "Nala" and result = "brown"
+ or
+ this = "Rafiki" and result = "red"
+ or
+ this = "Shenzi" and result = "gray"
+ or
+ this = "Ernest" and result = "blond"
+ or
+ this = "Gertrude" and result = "brown"
+ or
+ this = "Oscar" and result = "blond"
+ or
+ this = "Lilian" and result = "brown"
+ or
+ this = "Raymond" and result = "brown"
+ or
+ this = "Elgar" and result = "brown"
+ or
+ this = "Elmer" and result = "brown"
+ or
+ this = "Herbert" and result = "brown"
+ or
+ this = "Maude" and result = "brown"
+ or
+ this = "Mae" and result = "brown"
+ or
+ this = "Otto" and result = "black"
+ or
+ this = "Edwin" and result = "black"
+ or
+ this = "Ophelia" and result = "brown"
+ or
+ this = "Parsley" and result = "brown"
+ or
+ this = "Sage" and result = "brown"
+ or
+ this = "Rosemary" and result = "brown"
+ or
+ this = "Thyme" and result = "brown"
+ or
+ this = "Garfunkel" and result = "brown"
+ or
+ this = "King Basil" and result = "brown"
+ or
+ this = "Stephen" and result = "black"
+ or
+ this = "Stephen" and result = "gray"
+ }
+
+ /** Gets the age of the person (in years). If the person is deceased, there is no result. */
+ int getAge() {
+ this = "Ronil" and result = 21
+ or
+ this = "Dina" and result = 53
+ or
+ this = "Ravi" and result = 16
+ or
+ this = "Bruce" and result = 35
+ or
+ this = "Jo" and result = 47
+ or
+ this = "Aida" and result = 26
+ or
+ this = "Esme" and result = 25
+ or
+ this = "Charlie" and result = 31
+ or
+ this = "Fred" and result = 68
+ or
+ this = "Meera" and result = 62
+ or
+ this = "Maya" and result = 29
+ or
+ this = "Chad" and result = 49
+ or
+ this = "Tiana" and result = 18
+ or
+ this = "Laura" and result = 2
+ or
+ this = "George" and result = 3
+ or
+ this = "Will" and result = 41
+ or
+ this = "Mary" and result = 51
+ or
+ this = "Almira" and result = 1
+ or
+ this = "Susannah" and result = 97
+ or
+ this = "Rhoda" and result = 39
+ or
+ this = "Cynthia" and result = 89
+ or
+ this = "Eunice" and result = 83
+ or
+ this = "Olive" and result = 25
+ or
+ this = "Virginia" and result = 52
+ or
+ this = "Angeline" and result = 22
+ or
+ this = "Helen" and result = 79
+ or
+ this = "Cornelia" and result = 59
+ or
+ this = "Harriet" and result = 57
+ or
+ this = "Mahala" and result = 61
+ or
+ this = "Abby" and result = 24
+ or
+ this = "Margaret" and result = 59
+ or
+ this = "Deb" and result = 31
+ or
+ this = "Minerva" and result = 72
+ or
+ this = "Severus" and result = 61
+ or
+ this = "Lavina" and result = 33
+ or
+ this = "Adeline" and result = 17
+ or
+ this = "Cath" and result = 22
+ or
+ this = "Elisa" and result = 9
+ or
+ this = "Lucretia" and result = 56
+ or
+ this = "Anne" and result = 11
+ or
+ this = "Eleanor" and result = 80
+ or
+ this = "Joanna" and result = 43
+ or
+ this = "Adam" and result = 37
+ or
+ this = "Agnes" and result = 47
+ or
+ this = "Rosanna" and result = 61
+ or
+ this = "Clara" and result = 31
+ or
+ this = "Melissa" and result = 37
+ or
+ this = "Amy" and result = 12
+ or
+ this = "Isabel" and result = 6
+ or
+ this = "Jemima" and result = 16
+ or
+ this = "Cordelia" and result = 21
+ or
+ this = "Melinda" and result = 55
+ or
+ this = "Delila" and result = 66
+ or
+ this = "Jeremiah" and result = 54
+ or
+ this = "Elijah" and result = 42
+ or
+ this = "Hester" and result = 68
+ or
+ this = "Walter" and result = 66
+ or
+ this = "Oliver" and result = 33
+ or
+ this = "Hugh" and result = 51
+ or
+ this = "Aaron" and result = 49
+ or
+ this = "Reuben" and result = 58
+ or
+ this = "Eli" and result = 70
+ or
+ this = "Amos" and result = 65
+ or
+ this = "Augustus" and result = 56
+ or
+ this = "Theodore" and result = 69
+ or
+ this = "Ira" and result = 1
+ or
+ this = "Timothy" and result = 54
+ or
+ this = "Cyrus" and result = 78
+ or
+ this = "Horace" and result = 34
+ or
+ this = "Simon" and result = 23
+ or
+ this = "Asa" and result = 28
+ or
+ this = "Frank" and result = 59
+ or
+ this = "Nelson" and result = 38
+ or
+ this = "Leonard" and result = 58
+ or
+ this = "Harrison" and result = 7
+ or
+ this = "Anthony" and result = 2
+ or
+ this = "Louis" and result = 34
+ or
+ this = "Milton" and result = 36
+ or
+ this = "Noah" and result = 48
+ or
+ this = "Cornelius" and result = 41
+ or
+ this = "Abdul" and result = 67
+ or
+ this = "Warren" and result = 47
+ or
+ this = "Harvey" and result = 31
+ or
+ this = "Dennis" and result = 39
+ or
+ this = "Wesley" and result = 13
+ or
+ this = "Sylvester" and result = 19
+ or
+ this = "Gilbert" and result = 16
+ or
+ this = "Sullivan" and result = 17
+ or
+ this = "Edmund" and result = 29
+ or
+ this = "Wilson" and result = 27
+ or
+ this = "Perry" and result = 31
+ or
+ this = "Matthew" and result = 55
+ or
+ this = "Simba" and result = 8
+ or
+ this = "Nala" and result = 7
+ or
+ this = "Rafiki" and result = 76
+ or
+ this = "Shenzi" and result = 67
+ }
+
+ /** Gets the height of the person (in cm). If the person is deceased, there is no result. */
+ float getHeight() {
+ this = "Ronil" and result = 183.0
+ or
+ this = "Dina" and result = 155.1
+ or
+ this = "Ravi" and result = 175.2
+ or
+ this = "Bruce" and result = 191.3
+ or
+ this = "Jo" and result = 163.4
+ or
+ this = "Aida" and result = 182.6
+ or
+ this = "Esme" and result = 176.9
+ or
+ this = "Charlie" and result = 189.7
+ or
+ this = "Fred" and result = 179.4
+ or
+ this = "Meera" and result = 160.1
+ or
+ this = "Maya" and result = 153.0
+ or
+ this = "Chad" and result = 168.5
+ or
+ this = "Tiana" and result = 149.7
+ or
+ this = "Laura" and result = 87.5
+ or
+ this = "George" and result = 96.4
+ or
+ this = "Will" and result = 167.1
+ or
+ this = "Mary" and result = 159.8
+ or
+ this = "Almira" and result = 62.1
+ or
+ this = "Susannah" and result = 145.8
+ or
+ this = "Rhoda" and result = 180.1
+ or
+ this = "Cynthia" and result = 161.8
+ or
+ this = "Eunice" and result = 153.2
+ or
+ this = "Olive" and result = 179.9
+ or
+ this = "Virginia" and result = 165.1
+ or
+ this = "Angeline" and result = 172.3
+ or
+ this = "Helen" and result = 163.1
+ or
+ this = "Cornelia" and result = 160.8
+ or
+ this = "Harriet" and result = 163.2
+ or
+ this = "Mahala" and result = 157.7
+ or
+ this = "Abby" and result = 174.5
+ or
+ this = "Margaret" and result = 165.6
+ or
+ this = "Deb" and result = 171.6
+ or
+ this = "Minerva" and result = 168.7
+ or
+ this = "Severus" and result = 188.8
+ or
+ this = "Lavina" and result = 155.1
+ or
+ this = "Adeline" and result = 165.5
+ or
+ this = "Cath" and result = 147.8
+ or
+ this = "Elisa" and result = 129.4
+ or
+ this = "Lucretia" and result = 153.6
+ or
+ this = "Anne" and result = 140.4
+ or
+ this = "Eleanor" and result = 151.1
+ or
+ this = "Joanna" and result = 167.2
+ or
+ this = "Adam" and result = 155.5
+ or
+ this = "Agnes" and result = 156.8
+ or
+ this = "Rosanna" and result = 162.4
+ or
+ this = "Clara" and result = 158.6
+ or
+ this = "Melissa" and result = 182.3
+ or
+ this = "Amy" and result = 147.1
+ or
+ this = "Isabel" and result = 121.4
+ or
+ this = "Jemima" and result = 149.8
+ or
+ this = "Cordelia" and result = 151.7
+ or
+ this = "Melinda" and result = 154.4
+ or
+ this = "Delila" and result = 163.4
+ or
+ this = "Jeremiah" and result = 167.5
+ or
+ this = "Elijah" and result = 184.5
+ or
+ this = "Hester" and result = 152.7
+ or
+ this = "Walter" and result = 159.6
+ or
+ this = "Oliver" and result = 192.4
+ or
+ this = "Hugh" and result = 173.1
+ or
+ this = "Aaron" and result = 176.6
+ or
+ this = "Reuben" and result = 169.9
+ or
+ this = "Eli" and result = 180.4
+ or
+ this = "Amos" and result = 167.4
+ or
+ this = "Augustus" and result = 156.5
+ or
+ this = "Theodore" and result = 176.6
+ or
+ this = "Ira" and result = 54.1
+ or
+ this = "Timothy" and result = 172.2
+ or
+ this = "Cyrus" and result = 157.9
+ or
+ this = "Horace" and result = 169.3
+ or
+ this = "Simon" and result = 157.1
+ or
+ this = "Asa" and result = 149.4
+ or
+ this = "Frank" and result = 167.2
+ or
+ this = "Nelson" and result = 173.0
+ or
+ this = "Leonard" and result = 172.0
+ or
+ this = "Harrison" and result = 126.0
+ or
+ this = "Anthony" and result = 98.4
+ or
+ this = "Louis" and result = 186.8
+ or
+ this = "Milton" and result = 157.8
+ or
+ this = "Noah" and result = 190.5
+ or
+ this = "Cornelius" and result = 183.1
+ or
+ this = "Abdul" and result = 182.0
+ or
+ this = "Warren" and result = 175.0
+ or
+ this = "Harvey" and result = 169.3
+ or
+ this = "Dennis" and result = 160.4
+ or
+ this = "Wesley" and result = 139.8
+ or
+ this = "Sylvester" and result = 188.2
+ or
+ this = "Gilbert" and result = 177.6
+ or
+ this = "Sullivan" and result = 168.3
+ or
+ this = "Edmund" and result = 159.2
+ or
+ this = "Wilson" and result = 167.6
+ or
+ this = "Perry" and result = 189.1
+ or
+ this = "Matthew" and result = 167.2
+ or
+ this = "Simba" and result = 140.1
+ or
+ this = "Nala" and result = 138.0
+ or
+ this = "Rafiki" and result = 139.3
+ or
+ this = "Shenzi" and result = 171.1
+ }
+
+ /** Gets the location of the person's home ("north", "south", "east", or "west"). If the person is deceased, there is no result. */
+ string getLocation() {
+ this = "Ronil" and result = "north"
+ or
+ this = "Dina" and result = "north"
+ or
+ this = "Ravi" and result = "north"
+ or
+ this = "Bruce" and result = "south"
+ or
+ this = "Jo" and result = "west"
+ or
+ this = "Aida" and result = "east"
+ or
+ this = "Esme" and result = "east"
+ or
+ this = "Charlie" and result = "south"
+ or
+ this = "Fred" and result = "west"
+ or
+ this = "Meera" and result = "south"
+ or
+ this = "Maya" and result = "south"
+ or
+ this = "Chad" and result = "south"
+ or
+ this = "Tiana" and result = "west"
+ or
+ this = "Laura" and result = "south"
+ or
+ this = "George" and result = "south"
+ or
+ this = "Will" and result = "south"
+ or
+ this = "Mary" and result = "south"
+ or
+ this = "Almira" and result = "south"
+ or
+ this = "Susannah" and result = "north"
+ or
+ this = "Rhoda" and result = "north"
+ or
+ this = "Cynthia" and result = "north"
+ or
+ this = "Eunice" and result = "north"
+ or
+ this = "Olive" and result = "west"
+ or
+ this = "Virginia" and result = "west"
+ or
+ this = "Angeline" and result = "west"
+ or
+ this = "Helen" and result = "west"
+ or
+ this = "Cornelia" and result = "east"
+ or
+ this = "Harriet" and result = "east"
+ or
+ this = "Mahala" and result = "east"
+ or
+ this = "Abby" and result = "east"
+ or
+ this = "Margaret" and result = "east"
+ or
+ this = "Deb" and result = "east"
+ or
+ this = "Minerva" and result = "south"
+ or
+ this = "Severus" and result = "north"
+ or
+ this = "Lavina" and result = "east"
+ or
+ this = "Adeline" and result = "west"
+ or
+ this = "Cath" and result = "east"
+ or
+ this = "Elisa" and result = "east"
+ or
+ this = "Lucretia" and result = "north"
+ or
+ this = "Anne" and result = "north"
+ or
+ this = "Eleanor" and result = "south"
+ or
+ this = "Joanna" and result = "south"
+ or
+ this = "Adam" and result = "east"
+ or
+ this = "Agnes" and result = "east"
+ or
+ this = "Rosanna" and result = "east"
+ or
+ this = "Clara" and result = "east"
+ or
+ this = "Melissa" and result = "west"
+ or
+ this = "Amy" and result = "west"
+ or
+ this = "Isabel" and result = "west"
+ or
+ this = "Jemima" and result = "west"
+ or
+ this = "Cordelia" and result = "west"
+ or
+ this = "Melinda" and result = "west"
+ or
+ this = "Delila" and result = "south"
+ or
+ this = "Jeremiah" and result = "north"
+ or
+ this = "Elijah" and result = "north"
+ or
+ this = "Hester" and result = "east"
+ or
+ this = "Walter" and result = "east"
+ or
+ this = "Oliver" and result = "east"
+ or
+ this = "Hugh" and result = "south"
+ or
+ this = "Aaron" and result = "south"
+ or
+ this = "Reuben" and result = "west"
+ or
+ this = "Eli" and result = "west"
+ or
+ this = "Amos" and result = "east"
+ or
+ this = "Augustus" and result = "south"
+ or
+ this = "Theodore" and result = "west"
+ or
+ this = "Ira" and result = "south"
+ or
+ this = "Timothy" and result = "north"
+ or
+ this = "Cyrus" and result = "north"
+ or
+ this = "Horace" and result = "east"
+ or
+ this = "Simon" and result = "east"
+ or
+ this = "Asa" and result = "east"
+ or
+ this = "Frank" and result = "west"
+ or
+ this = "Nelson" and result = "west"
+ or
+ this = "Leonard" and result = "west"
+ or
+ this = "Harrison" and result = "north"
+ or
+ this = "Anthony" and result = "north"
+ or
+ this = "Louis" and result = "north"
+ or
+ this = "Milton" and result = "south"
+ or
+ this = "Noah" and result = "south"
+ or
+ this = "Cornelius" and result = "east"
+ or
+ this = "Abdul" and result = "east"
+ or
+ this = "Warren" and result = "west"
+ or
+ this = "Harvey" and result = "west"
+ or
+ this = "Dennis" and result = "west"
+ or
+ this = "Wesley" and result = "west"
+ or
+ this = "Sylvester" and result = "south"
+ or
+ this = "Gilbert" and result = "east"
+ or
+ this = "Sullivan" and result = "east"
+ or
+ this = "Edmund" and result = "north"
+ or
+ this = "Wilson" and result = "north"
+ or
+ this = "Perry" and result = "west"
+ or
+ this = "Matthew" and result = "east"
+ or
+ this = "Simba" and result = "south"
+ or
+ this = "Nala" and result = "south"
+ or
+ this = "Rafiki" and result = "north"
+ or
+ this = "Shenzi" and result = "west"
+ }
+
+ /** Holds if the person is deceased. */
+ predicate isDeceased() {
+ this = "Ernest" or
+ this = "Gertrude" or
+ this = "Oscar" or
+ this = "Lilian" or
+ this = "Edwin" or
+ this = "Raymond" or
+ this = "Elgar" or
+ this = "Elmer" or
+ this = "Herbert" or
+ this = "Maude" or
+ this = "Mae" or
+ this = "Otto" or
+ this = "Ophelia" or
+ this = "Parsley" or
+ this = "Sage" or
+ this = "Rosemary" or
+ this = "Thyme" or
+ this = "Garfunkel" or
+ this = "King Basil"
+ }
+
+ /** Gets a parent of the person (alive or deceased). */
+ Person getAParent() {
+ this = "Stephen" and result = "Edmund"
+ or
+ this = "Edmund" and result = "Augustus"
+ or
+ this = "Augustus" and result = "Stephen"
+ or
+ this = "Abby" and result = "Cornelia"
+ or
+ this = "Abby" and result = "Amos"
+ or
+ this = "Abdul" and result = "Susannah"
+ or
+ this = "Adam" and result = "Amos"
+ or
+ this = "Adeline" and result = "Melinda"
+ or
+ this = "Adeline" and result = "Frank"
+ or
+ this = "Agnes" and result = "Abdul"
+ or
+ this = "Aida" and result = "Agnes"
+ or
+ this = "Almira" and result = "Sylvester"
+ or
+ this = "Amos" and result = "Eunice"
+ or
+ this = "Amy" and result = "Noah"
+ or
+ this = "Amy" and result = "Chad"
+ or
+ this = "Angeline" and result = "Reuben"
+ or
+ this = "Angeline" and result = "Lucretia"
+ or
+ this = "Anne" and result = "Rhoda"
+ or
+ this = "Anne" and result = "Louis"
+ or
+ this = "Anthony" and result = "Lavina"
+ or
+ this = "Anthony" and result = "Asa"
+ or
+ this = "Asa" and result = "Cornelia"
+ or
+ this = "Cath" and result = "Harriet"
+ or
+ this = "Charlie" and result = "Matthew"
+ or
+ this = "Clara" and result = "Ernest"
+ or
+ this = "Cornelia" and result = "Cynthia"
+ or
+ this = "Cornelius" and result = "Eli"
+ or
+ this = "Deb" and result = "Margaret"
+ or
+ this = "Dennis" and result = "Fred"
+ or
+ this = "Eli" and result = "Susannah"
+ or
+ this = "Elijah" and result = "Delila"
+ or
+ this = "Elisa" and result = "Deb"
+ or
+ this = "Elisa" and result = "Horace"
+ or
+ this = "Esme" and result = "Margaret"
+ or
+ this = "Frank" and result = "Eleanor"
+ or
+ this = "Frank" and result = "Cyrus"
+ or
+ this = "George" and result = "Maya"
+ or
+ this = "George" and result = "Wilson"
+ or
+ this = "Gilbert" and result = "Cornelius"
+ or
+ this = "Harriet" and result = "Cynthia"
+ or
+ this = "Harrison" and result = "Louis"
+ or
+ this = "Harvey" and result = "Fred"
+ or
+ this = "Helen" and result = "Susannah"
+ or
+ this = "Hester" and result = "Edwin"
+ or
+ this = "Hugh" and result = "Cyrus"
+ or
+ this = "Hugh" and result = "Helen"
+ or
+ this = "Ira" and result = "Maya"
+ or
+ this = "Ira" and result = "Wilson"
+ or
+ this = "Isabel" and result = "Perry"
+ or
+ this = "Isabel" and result = "Harvey"
+ or
+ this = "Jemima" and result = "Melinda"
+ or
+ this = "Jemima" and result = "Frank"
+ or
+ this = "Ernest" and result = "Lilian"
+ or
+ this = "Ernest" and result = "Oscar"
+ or
+ this = "Gertrude" and result = "Ophelia"
+ or
+ this = "Gertrude" and result = "Raymond"
+ or
+ this = "Lilian" and result = "Elgar"
+ or
+ this = "Lilian" and result = "Mae"
+ or
+ this = "Raymond" and result = "Elgar"
+ or
+ this = "Raymond" and result = "Mae"
+ or
+ this = "Elmer" and result = "Ophelia"
+ or
+ this = "Elmer" and result = "Raymond"
+ or
+ this = "Herbert" and result = "Ophelia"
+ or
+ this = "Herbert" and result = "Raymond"
+ or
+ this = "Maude" and result = "Ophelia"
+ or
+ this = "Maude" and result = "Raymond"
+ or
+ this = "Otto" and result = "Elgar"
+ or
+ this = "Otto" and result = "Mae"
+ or
+ this = "Edwin" and result = "Otto"
+ or
+ this = "Parsley" and result = "Simon"
+ or
+ this = "Parsley" and result = "Garfunkel"
+ or
+ this = "Sage" and result = "Simon"
+ or
+ this = "Sage" and result = "Garfunkel"
+ or
+ this = "Rosemary" and result = "Simon"
+ or
+ this = "Rosemary" and result = "Garfunkel"
+ or
+ this = "Thyme" and result = "Simon"
+ or
+ this = "Thyme" and result = "Garfunkel"
+ or
+ this = "King Basil" and result = "Ophelia"
+ or
+ this = "King Basil" and result = "Raymond"
+ or
+ this = "Jo" and result = "Theodore"
+ or
+ this = "Joanna" and result = "Shenzi"
+ or
+ this = "Laura" and result = "Maya"
+ or
+ this = "Laura" and result = "Wilson"
+ or
+ this = "Lavina" and result = "Mahala"
+ or
+ this = "Lavina" and result = "Walter"
+ or
+ this = "Leonard" and result = "Cyrus"
+ or
+ this = "Leonard" and result = "Helen"
+ or
+ this = "Lucretia" and result = "Eleanor"
+ or
+ this = "Lucretia" and result = "Cyrus"
+ or
+ this = "Mahala" and result = "Eunice"
+ or
+ this = "Margaret" and result = "Cynthia"
+ or
+ this = "Matthew" and result = "Cyrus"
+ or
+ this = "Matthew" and result = "Helen"
+ or
+ this = "Maya" and result = "Meera"
+ or
+ this = "Melinda" and result = "Rafiki"
+ or
+ this = "Melissa" and result = "Mahala"
+ or
+ this = "Melissa" and result = "Walter"
+ or
+ this = "Nala" and result = "Bruce"
+ or
+ this = "Nelson" and result = "Mahala"
+ or
+ this = "Nelson" and result = "Walter"
+ or
+ this = "Noah" and result = "Eli"
+ or
+ this = "Olive" and result = "Reuben"
+ or
+ this = "Olive" and result = "Lucretia"
+ or
+ this = "Oliver" and result = "Matthew"
+ or
+ this = "Perry" and result = "Leonard"
+ or
+ this = "Ravi" and result = "Dina"
+ or
+ this = "Simba" and result = "Will"
+ or
+ this = "Simon" and result = "Margaret"
+ or
+ this = "Sullivan" and result = "Cornelius"
+ or
+ this = "Sylvester" and result = "Timothy"
+ or
+ this = "Theodore" and result = "Susannah"
+ or
+ this = "Tiana" and result = "Jo"
+ or
+ this = "Virginia" and result = "Helen"
+ or
+ this = "Warren" and result = "Shenzi"
+ or
+ this = "Wesley" and result = "Warren"
+ or
+ this = "Wesley" and result = "Jo"
+ or
+ this = "Will" and result = "Eli"
+ }
+
+ /** Holds if the person is allowed in the region. Initially, all villagers are allowed in every region. */
+ predicate isAllowedIn(string region) {
+ region = "north" or
+ region = "south" or
+ region = "east" or
+ region = "west"
+ }
+}
+
+/** Returns a parent of the person. */
+Person parentOf(Person p) { result = p.getAParent() }
diff --git a/repo-tests/codeql/cpp/ql/src/AlertSuppression.ql b/repo-tests/codeql/cpp/ql/src/AlertSuppression.ql
new file mode 100644
index 00000000000..9a3983ed515
--- /dev/null
+++ b/repo-tests/codeql/cpp/ql/src/AlertSuppression.ql
@@ -0,0 +1,82 @@
+/**
+ * @name Alert suppression
+ * @description Generates information about alert suppressions.
+ * @kind alert-suppression
+ * @id cpp/alert-suppression
+ */
+
+import cpp
+
+/**
+ * An alert suppression comment.
+ */
+class SuppressionComment extends Comment {
+ string annotation;
+ string text;
+
+ SuppressionComment() {
+ (
+ this instanceof CppStyleComment and
+ // strip the beginning slashes
+ text = getContents().suffix(2)
+ or
+ this instanceof CStyleComment and
+ // strip both the beginning /* and the end */ the comment
+ exists(string text0 |
+ text0 = getContents().suffix(2) and
+ text = text0.prefix(text0.length() - 2)
+ ) and
+ // The /* */ comment must be a single-line comment
+ not text.matches("%\n%")
+ ) and
+ (
+ // match `lgtm[...]` anywhere in the comment
+ annotation = text.regexpFind("(?i)\\blgtm\\s*\\[[^\\]]*\\]", _, _)
+ or
+ // match `lgtm` at the start of the comment and after semicolon
+ annotation = text.regexpFind("(?i)(?<=^|;)\\s*lgtm(?!\\B|\\s*\\[)", _, _).trim()
+ )
+ }
+
+ /** Gets the text in this comment, excluding the leading //. */
+ string getText() { result = text }
+
+ /** Gets the suppression annotation in this comment. */
+ string getAnnotation() { result = annotation }
+
+ /**
+ * Holds if this comment applies to the range from column `startcolumn` of line `startline`
+ * to column `endcolumn` of line `endline` in file `filepath`.
+ */
+ predicate covers(string filepath, int startline, int startcolumn, int endline, int endcolumn) {
+ this.getLocation().hasLocationInfo(filepath, startline, _, endline, endcolumn) and
+ startcolumn = 1
+ }
+
+ /** Gets the scope of this suppression. */
+ SuppressionScope getScope() { result = this }
+}
+
+/**
+ * The scope of an alert suppression comment.
+ */
+class SuppressionScope extends ElementBase instanceof SuppressionComment {
+ /**
+ * Holds if this element is at the specified location.
+ * The location spans column `startcolumn` of line `startline` to
+ * column `endcolumn` of line `endline` in file `filepath`.
+ * For more information, see
+ * [Locations](https://codeql.github.com/docs/writing-codeql-queries/providing-locations-in-codeql-queries/).
+ */
+ predicate hasLocationInfo(
+ string filepath, int startline, int startcolumn, int endline, int endcolumn
+ ) {
+ super.covers(filepath, startline, startcolumn, endline, endcolumn)
+ }
+}
+
+from SuppressionComment c
+select c, // suppression comment
+ c.getText(), // text of suppression comment (excluding delimiters)
+ c.getAnnotation(), // text of suppression annotation
+ c.getScope() // scope of suppression
diff --git a/repo-tests/codeql/cpp/ql/src/Architecture/FeatureEnvy.ql b/repo-tests/codeql/cpp/ql/src/Architecture/FeatureEnvy.ql
new file mode 100644
index 00000000000..90a4db6b747
--- /dev/null
+++ b/repo-tests/codeql/cpp/ql/src/Architecture/FeatureEnvy.ql
@@ -0,0 +1,71 @@
+/**
+ * @name Feature envy
+ * @description A function that uses more functions and variables from another file than functions and variables from its own file. This function might be better placed in the other file, to avoid exposing internals of the file it depends on.
+ * @kind problem
+ * @problem.severity recommendation
+ * @precision medium
+ * @id cpp/feature-envy
+ * @tags maintainability
+ * modularity
+ * statistical
+ * non-attributable
+ */
+
+import cpp
+
+predicate functionUsesVariable(Function source, Variable v, File target) {
+ v.getAnAccess().getEnclosingFunction() = source and
+ not v.(LocalScopeVariable).getFunction() = source and
+ v.getFile() = target
+}
+
+predicate functionUsesFunction(Function source, Function f, File target) {
+ exists(FunctionCall fc | fc.getEnclosingFunction() = source and fc.getTarget() = f) and
+ f.getFile() = target
+}
+
+predicate dependencyCount(Function source, File target, int res) {
+ res =
+ strictcount(Declaration d |
+ functionUsesVariable(source, d, target) or
+ functionUsesFunction(source, d, target)
+ )
+}
+
+predicate selfDependencyCountOrZero(Function source, int res) {
+ exists(File target | target = source.getFile() and onlyInFile(source, target) |
+ res = max(int i | dependencyCount(source, target, i) or i = 0)
+ )
+}
+
+predicate dependsHighlyOn(Function source, File target, int res) {
+ dependencyCount(source, target, res) and
+ target.fromSource() and
+ exists(int selfCount |
+ selfDependencyCountOrZero(source, selfCount) and
+ res > 2 * selfCount and
+ res > 4
+ )
+}
+
+predicate onlyInFile(Function f, File file) {
+ file = f.getFile() and
+ not exists(File file2 | file2 = f.getFile() and file2 != file)
+}
+
+from Function f, File other, int selfCount, int depCount, string selfDeps
+where
+ dependsHighlyOn(f, other, depCount) and
+ selfDependencyCountOrZero(f, selfCount) and
+ not exists(File yetAnother | dependsHighlyOn(f, yetAnother, _) and yetAnother != other) and
+ not other instanceof HeaderFile and
+ not f instanceof MemberFunction and
+ if selfCount = 0
+ then selfDeps = "0 dependencies"
+ else
+ if selfCount = 1
+ then selfDeps = "only 1 dependency"
+ else selfDeps = "only " + selfCount.toString() + " dependencies"
+select f,
+ "Function " + f.getName() + " could be moved to file $@" + " since it has " + depCount.toString() +
+ " dependencies to that file, but " + selfDeps + " to its own file.", other, other.getBaseName()
diff --git a/repo-tests/codeql/cpp/ql/src/Architecture/General Class-Level Information/ClassHierarchies.ql b/repo-tests/codeql/cpp/ql/src/Architecture/General Class-Level Information/ClassHierarchies.ql
new file mode 100644
index 00000000000..4d17618f1bc
--- /dev/null
+++ b/repo-tests/codeql/cpp/ql/src/Architecture/General Class-Level Information/ClassHierarchies.ql
@@ -0,0 +1,14 @@
+/**
+ * @name Class hierarchies
+ * @description Shows an inheritance hierarchy for classes and their base classes.
+ * @kind graph
+ * @id cpp/architecture/class-hierarchies
+ * @graph.layout organic
+ * @tags maintainability
+ */
+
+import cpp
+
+from Class s
+where s.fromSource()
+select s, s.getABaseClass()
diff --git a/repo-tests/codeql/cpp/ql/src/Architecture/General Class-Level Information/HubClasses.ql b/repo-tests/codeql/cpp/ql/src/Architecture/General Class-Level Information/HubClasses.ql
new file mode 100644
index 00000000000..7361e875581
--- /dev/null
+++ b/repo-tests/codeql/cpp/ql/src/Architecture/General Class-Level Information/HubClasses.ql
@@ -0,0 +1,16 @@
+/**
+ * @name Hub classes
+ * @description Shows coupling between classes. Large, red, boxes are hub types that depend on many other classes
+ * and are depended on by many other classes.
+ * @kind table
+ * @id cpp/architecture/hub-classes
+ * @treemap.warnOn highValues
+ * @tags maintainability
+ */
+
+import cpp
+
+from Class c
+where c.fromSource()
+select c as Class, c.getMetrics().getAfferentCoupling() as AfferentCoupling,
+ c.getMetrics().getEfferentSourceCoupling() as EfferentCoupling order by AfferentCoupling desc
diff --git a/repo-tests/codeql/cpp/ql/src/Architecture/General Class-Level Information/InheritanceDepthDistribution.ql b/repo-tests/codeql/cpp/ql/src/Architecture/General Class-Level Information/InheritanceDepthDistribution.ql
new file mode 100644
index 00000000000..0fc6f0ba2a7
--- /dev/null
+++ b/repo-tests/codeql/cpp/ql/src/Architecture/General Class-Level Information/InheritanceDepthDistribution.ql
@@ -0,0 +1,20 @@
+/**
+ * @name Inheritance depth distribution
+ * @description Shows the distribution of inheritance depth across all classes.
+ * @kind chart
+ * @id cpp/architecture/inheritance-depth-distribution
+ * @chart.type line
+ * @tags maintainability
+ */
+
+import cpp
+
+/** does source class c have inheritance depth d? */
+predicate hasInheritanceDepth(Class c, int d) {
+ c.fromSource() and d = c.getMetrics().getInheritanceDepth()
+}
+
+from int depth
+where hasInheritanceDepth(_, depth)
+select depth as InheritanceDepth, count(Class c | hasInheritanceDepth(c, depth)) as NumberOfClasses
+ order by InheritanceDepth
diff --git a/repo-tests/codeql/cpp/ql/src/Architecture/General Namespace-Level Information/CyclicNamespaces.ql b/repo-tests/codeql/cpp/ql/src/Architecture/General Namespace-Level Information/CyclicNamespaces.ql
new file mode 100644
index 00000000000..65b1932cf77
--- /dev/null
+++ b/repo-tests/codeql/cpp/ql/src/Architecture/General Namespace-Level Information/CyclicNamespaces.ql
@@ -0,0 +1,15 @@
+/**
+ * @name Cyclic namespaces
+ * @description Shows namespaces that cyclically depend on one another.
+ * @kind graph
+ * @id cpp/architecture/cyclic-namespaces
+ * @graph.layout hierarchical
+ * @tags maintainability
+ * modularity
+ */
+
+import cpp
+
+from MetricNamespace a, MetricNamespace b
+where a.getANamespaceDependency() = b and b.getANamespaceDependency*() = a
+select a, b
diff --git a/repo-tests/codeql/cpp/ql/src/Architecture/General Namespace-Level Information/GlobalNamespaceClasses.ql b/repo-tests/codeql/cpp/ql/src/Architecture/General Namespace-Level Information/GlobalNamespaceClasses.ql
new file mode 100644
index 00000000000..eec387e2957
--- /dev/null
+++ b/repo-tests/codeql/cpp/ql/src/Architecture/General Namespace-Level Information/GlobalNamespaceClasses.ql
@@ -0,0 +1,18 @@
+/**
+ * @name Global namespace classes
+ * @description Finds classes that belong to no namespace.
+ * @kind problem
+ * @problem.severity recommendation
+ * @id cpp/architecture/global-namespace-classes
+ * @tags maintainability
+ * modularity
+ */
+
+import cpp
+
+from Class c
+where
+ c.fromSource() and
+ c.isTopLevel() and
+ c.getParentScope() instanceof GlobalNamespace
+select c, "This class is not declared in any namespace"
diff --git a/repo-tests/codeql/cpp/ql/src/Architecture/General Namespace-Level Information/NamespaceDependencies.ql b/repo-tests/codeql/cpp/ql/src/Architecture/General Namespace-Level Information/NamespaceDependencies.ql
new file mode 100644
index 00000000000..d9131a77121
--- /dev/null
+++ b/repo-tests/codeql/cpp/ql/src/Architecture/General Namespace-Level Information/NamespaceDependencies.ql
@@ -0,0 +1,15 @@
+/**
+ * @name Namespace dependencies
+ * @description Shows dependencies between namespaces as a hierarchical graph.
+ * @kind graph
+ * @id cpp/architecture/namespace-dependencies
+ * @graph.layout hierarchical
+ * @tags maintainability
+ * modularity
+ */
+
+import cpp
+
+from MetricNamespace a, MetricNamespace b
+where a.getANamespaceDependency() = b
+select a, b
diff --git a/repo-tests/codeql/cpp/ql/src/Architecture/General Top-Level Information/GeneralStatistics.ql b/repo-tests/codeql/cpp/ql/src/Architecture/General Top-Level Information/GeneralStatistics.ql
new file mode 100644
index 00000000000..6f842baec42
--- /dev/null
+++ b/repo-tests/codeql/cpp/ql/src/Architecture/General Top-Level Information/GeneralStatistics.ql
@@ -0,0 +1,54 @@
+/**
+ * @name General statistics
+ * @description Shows general statistics about the application.
+ * @kind table
+ * @id cpp/architecture/general-statistics
+ * @tags maintainability
+ */
+
+import cpp
+
+from string l, string n
+where
+ l = "Number of Namespaces" and
+ n = count(Namespace p | p.fromSource()).toString()
+ or
+ l = "Number of Files" and
+ n = count(File f | f.fromSource()).toString()
+ or
+ l = "Number of Header Files" and
+ n = count(HeaderFile f | f.fromSource()).toString()
+ or
+ l = "Number of C Files" and
+ n = count(CFile f | f.fromSource()).toString()
+ or
+ l = "Number of C++ Files" and
+ n = count(CppFile f | f.fromSource()).toString()
+ or
+ l = "Number of Classes" and
+ n = count(Class c | c.fromSource() and not c instanceof Struct).toString()
+ or
+ l = "Number of Structs" and
+ n = count(Struct s | s.fromSource() and not s instanceof Union).toString()
+ or
+ l = "Number of Unions" and
+ n = count(Union u | u.fromSource()).toString()
+ or
+ l = "Number of Functions" and
+ n = count(Function f | f.fromSource()).toString()
+ or
+ l = "Number of Lines Of Code" and
+ n =
+ sum(File f, int toSum |
+ f.fromSource() and toSum = f.getMetrics().getNumberOfLinesOfCode()
+ |
+ toSum
+ ).toString()
+ or
+ l = "Self-Containedness" and
+ n =
+ (
+ 100 * sum(Class c | c.fromSource() | c.getMetrics().getEfferentSourceCoupling()) /
+ sum(Class c | c.fromSource() | c.getMetrics().getEfferentCoupling())
+ ).toString() + "%"
+select l as Title, n as Value
diff --git a/repo-tests/codeql/cpp/ql/src/Architecture/InappropriateIntimacy.ql b/repo-tests/codeql/cpp/ql/src/Architecture/InappropriateIntimacy.ql
new file mode 100644
index 00000000000..a6879a13323
--- /dev/null
+++ b/repo-tests/codeql/cpp/ql/src/Architecture/InappropriateIntimacy.ql
@@ -0,0 +1,67 @@
+/**
+ * @name Inappropriate Intimacy
+ * @description Two files share too much information about each other (accessing many operations or variables in both directions). It would be better to invert some of the dependencies to reduce the coupling between the two files.
+ * @kind problem
+ * @problem.severity recommendation
+ * @precision medium
+ * @id cpp/file-intimacy
+ * @tags maintainability
+ * modularity
+ * statistical
+ * non-attributable
+ */
+
+import cpp
+
+predicate remoteVarAccess(File source, File target, VariableAccess va) {
+ va.getFile() = source and
+ va.getTarget().getFile() = target and
+ // Ignore variables with locations in multiple files
+ strictcount(File f | f = va.getTarget().getFile()) = 1 and
+ source != target
+}
+
+predicate remoteFunAccess(File source, File target, FunctionCall fc) {
+ fc.getFile() = source and
+ fc.getTarget().getFile() = target and
+ // Ignore functions with locations in multiple files
+ strictcount(File f | f = fc.getTarget().getFile()) = 1 and
+ source != target
+}
+
+predicate candidateFilePair(File source, File target) {
+ remoteVarAccess(source, target, _) or
+ remoteFunAccess(source, target, _)
+}
+
+predicate variableDependencyCount(File source, File target, int res) {
+ candidateFilePair(source, target) and
+ res = count(VariableAccess va | remoteVarAccess(source, target, va))
+}
+
+predicate functionDependencyCount(File source, File target, int res) {
+ candidateFilePair(source, target) and
+ res = count(FunctionCall fc | remoteFunAccess(source, target, fc))
+}
+
+predicate highDependencyCount(File source, File target, int res) {
+ exists(int varCount, int funCount |
+ variableDependencyCount(source, target, varCount) and
+ functionDependencyCount(source, target, funCount) and
+ res = varCount + funCount and
+ res > 20
+ )
+}
+
+from File a, File b, int ca, int cb
+where
+ highDependencyCount(a, b, ca) and
+ highDependencyCount(b, a, cb) and
+ ca >= cb and
+ a != b and
+ not a instanceof HeaderFile and
+ not b instanceof HeaderFile and
+ b.getShortName().trim().length() > 0
+select a,
+ "File is too closely tied to $@ (" + ca.toString() + " dependencies one way and " + cb.toString() +
+ " the other).", b, b.getBaseName()
diff --git a/repo-tests/codeql/cpp/ql/src/Architecture/Refactoring Opportunities/ClassesWithManyDependencies.ql b/repo-tests/codeql/cpp/ql/src/Architecture/Refactoring Opportunities/ClassesWithManyDependencies.ql
new file mode 100644
index 00000000000..384af9ebef8
--- /dev/null
+++ b/repo-tests/codeql/cpp/ql/src/Architecture/Refactoring Opportunities/ClassesWithManyDependencies.ql
@@ -0,0 +1,19 @@
+/**
+ * @name Classes with too many source dependencies
+ * @description Finds classes that depend on many other types; they could probably be refactored into smaller classes with fewer dependencies.
+ * @kind problem
+ * @id cpp/architecture/classes-with-many-dependencies
+ * @problem.severity recommendation
+ * @tags maintainability
+ * statistical
+ * non-attributable
+ */
+
+import cpp
+
+from Class t, int n
+where
+ t.fromSource() and
+ n = t.getMetrics().getEfferentSourceCoupling() and
+ n > 10
+select t as Class, "This class has too many dependencies (" + n.toString() + ")"
diff --git a/repo-tests/codeql/cpp/ql/src/Architecture/Refactoring Opportunities/ClassesWithManyFields.ql b/repo-tests/codeql/cpp/ql/src/Architecture/Refactoring Opportunities/ClassesWithManyFields.ql
new file mode 100644
index 00000000000..5f11a9e0830
--- /dev/null
+++ b/repo-tests/codeql/cpp/ql/src/Architecture/Refactoring Opportunities/ClassesWithManyFields.ql
@@ -0,0 +1,185 @@
+/**
+ * @name Classes with too many fields
+ * @description Finds classes with many fields; they could probably be refactored by breaking them down into smaller classes, and using composition.
+ * @kind problem
+ * @problem.severity recommendation
+ * @precision medium
+ * @id cpp/class-many-fields
+ * @tags maintainability
+ * statistical
+ * non-attributable
+ */
+
+import cpp
+
+/**
+ * Gets a string describing the kind of a `Class`.
+ */
+string kindstr(Class c) {
+ exists(int kind | usertypes(unresolveElement(c), _, kind) |
+ kind = 1 and result = "Struct"
+ or
+ kind = 2 and result = "Class"
+ or
+ kind = 6 and result = "Template class"
+ )
+}
+
+/**
+ * Holds if the arguments correspond to information about a `VariableDeclarationEntry`.
+ */
+predicate vdeInfo(VariableDeclarationEntry vde, Class c, File f, int line) {
+ c = vde.getVariable().getDeclaringType() and
+ f = vde.getLocation().getFile() and
+ line = vde.getLocation().getStartLine()
+}
+
+newtype TVariableDeclarationInfo =
+ TVariableDeclarationLine(Class c, File f, int line) { vdeInfo(_, c, f, line) }
+
+/**
+ * A line that contains one or more `VariableDeclarationEntry`s (in the same class).
+ */
+class VariableDeclarationLine extends TVariableDeclarationInfo {
+ Class c;
+ File f;
+ int line;
+
+ VariableDeclarationLine() {
+ vdeInfo(_, c, f, line) and
+ this = TVariableDeclarationLine(c, f, line)
+ }
+
+ /**
+ * Gets the class associated with this `VariableDeclarationLine`.
+ */
+ Class getClass() { result = c }
+
+ /**
+ * Gets the line of this `VariableDeclarationLine`.
+ */
+ int getLine() { result = line }
+
+ /**
+ * Gets a `VariableDeclarationEntry` on this line.
+ */
+ VariableDeclarationEntry getAVDE() { vdeInfo(result, c, f, line) }
+
+ /**
+ * Gets the start column of the first `VariableDeclarationEntry` on this line.
+ */
+ int getStartColumn() { result = min(getAVDE().getLocation().getStartColumn()) }
+
+ /**
+ * Gets the end column of the last `VariableDeclarationEntry` on this line.
+ */
+ int getEndColumn() { result = max(getAVDE().getLocation().getEndColumn()) }
+
+ /**
+ * Gets the rank of this `VariableDeclarationLine` in its file and class
+ * (that is, the first is 0, the second is 1 and so on).
+ */
+ private int getRank() {
+ line =
+ rank[result](VariableDeclarationLine vdl, int l | vdl = TVariableDeclarationLine(c, f, l) | l)
+ }
+
+ /**
+ * Gets the `VariableDeclarationLine` following this one, if any.
+ */
+ VariableDeclarationLine getNext() {
+ result = TVariableDeclarationLine(c, f, _) and
+ result.getRank() = getRank() + 1
+ }
+
+ /**
+ * Gets the `VariableDeclarationLine` following this one, if it is nearby.
+ */
+ VariableDeclarationLine getProximateNext() {
+ result = getNext() and
+ result.getLine() <= this.getLine() + 3
+ }
+
+ string toString() { result = "VariableDeclarationLine" }
+}
+
+/**
+ * A group of `VariableDeclarationEntry`s in the same class that are approximately
+ * contiguous.
+ */
+class VariableDeclarationGroup extends VariableDeclarationLine {
+ VariableDeclarationLine end;
+
+ VariableDeclarationGroup() {
+ // there is no `VariableDeclarationLine` within three lines previously
+ not any(VariableDeclarationLine prev).getProximateNext() = this and
+ // `end` is the last transitively proximate line
+ end = getProximateNext*() and
+ not exists(end.getProximateNext())
+ }
+
+ predicate hasLocationInfo(string path, int startline, int startcol, int endline, int endcol) {
+ path = f.getAbsolutePath() and
+ startline = getLine() and
+ startcol = getStartColumn() and
+ endline = end.getLine() and
+ endcol = end.getEndColumn()
+ }
+
+ /**
+ * Gets the number of uniquely named `VariableDeclarationEntry`s in this group.
+ */
+ int getCount() {
+ result =
+ count(VariableDeclarationLine l |
+ l = getProximateNext*()
+ |
+ l.getAVDE().getVariable().getName()
+ )
+ }
+
+ override string toString() {
+ getCount() = 1 and
+ result = "declaration of " + getAVDE().getVariable().getName()
+ or
+ getCount() > 1 and
+ result = "group of " + getCount() + " fields here"
+ }
+}
+
+class ExtClass extends Class {
+ predicate hasOneVariableGroup() {
+ strictcount(VariableDeclarationGroup vdg | vdg.getClass() = this) = 1
+ }
+
+ predicate hasLocationInfo(string path, int startline, int startcol, int endline, int endcol) {
+ if hasOneVariableGroup()
+ then
+ exists(VariableDeclarationGroup vdg | vdg.getClass() = this |
+ vdg.hasLocationInfo(path, startline, startcol, endline, endcol)
+ )
+ else getLocation().hasLocationInfo(path, startline, startcol, endline, endcol)
+ }
+}
+
+from ExtClass c, int n, VariableDeclarationGroup vdg, string suffix
+where
+ n =
+ strictcount(string fieldName |
+ exists(Field f |
+ f.getDeclaringType() = c and
+ fieldName = f.getName() and
+ // IBOutlet's are a way of building GUIs
+ // automatically out of ObjC properties.
+ // We don't want to count those for the
+ // purposes of this query.
+ not f.getType().getAnAttribute().hasName("iboutlet")
+ )
+ ) and
+ n > 15 and
+ not c.isConstructedFrom(_) and
+ c = vdg.getClass() and
+ if c.hasOneVariableGroup() then suffix = "" else suffix = " - see $@"
+select c,
+ kindstr(c) + " " + c.getName() + " has " + n +
+ " fields; we suggest refactoring to 15 fields or fewer" + suffix + ".", vdg, vdg.toString()
diff --git a/repo-tests/codeql/cpp/ql/src/Architecture/Refactoring Opportunities/ComplexFunctions.ql b/repo-tests/codeql/cpp/ql/src/Architecture/Refactoring Opportunities/ComplexFunctions.ql
new file mode 100644
index 00000000000..5fe0585131c
--- /dev/null
+++ b/repo-tests/codeql/cpp/ql/src/Architecture/Refactoring Opportunities/ComplexFunctions.ql
@@ -0,0 +1,20 @@
+/**
+ * @name Complex functions
+ * @description Finds functions which call too many other functions. Splitting these functions would increase maintainability and readability.
+ * @kind problem
+ * @id cpp/architecture/complex-functions
+ * @problem.severity recommendation
+ * @tags maintainability
+ * statistical
+ * non-attributable
+ */
+
+import cpp
+
+from Function f, int n
+where
+ f.fromSource() and
+ n = f.getMetrics().getNumberOfCalls() and
+ n > 99 and
+ not f.isMultiplyDefined()
+select f as Function, "This function makes too many calls (" + n.toString() + ")"
diff --git a/repo-tests/codeql/cpp/ql/src/Architecture/Refactoring Opportunities/CyclomaticComplexity.ql b/repo-tests/codeql/cpp/ql/src/Architecture/Refactoring Opportunities/CyclomaticComplexity.ql
new file mode 100644
index 00000000000..141ec33118b
--- /dev/null
+++ b/repo-tests/codeql/cpp/ql/src/Architecture/Refactoring Opportunities/CyclomaticComplexity.ql
@@ -0,0 +1,18 @@
+/**
+ * @name Cyclomatic Complexity
+ * @description Functions with high cyclomatic complexity. With increasing cyclomatic complexity there need to be more test cases that are necessary to achieve a complete branch coverage when testing this function.
+ * @kind problem
+ * @id cpp/architecture/cyclomatic-complexity
+ * @problem.severity warning
+ * @tags testability
+ * statistical
+ * non-attributable
+ */
+
+import cpp
+
+from Function f, int complexity
+where
+ complexity = f.getMetrics().getCyclomaticComplexity() and
+ complexity > 250
+select f, "Function has high cyclomatic complexity: " + complexity.toString()
diff --git a/repo-tests/codeql/cpp/ql/src/Architecture/Refactoring Opportunities/FunctionsWithManyParameters.ql b/repo-tests/codeql/cpp/ql/src/Architecture/Refactoring Opportunities/FunctionsWithManyParameters.ql
new file mode 100644
index 00000000000..0621bde812c
--- /dev/null
+++ b/repo-tests/codeql/cpp/ql/src/Architecture/Refactoring Opportunities/FunctionsWithManyParameters.ql
@@ -0,0 +1,21 @@
+/**
+ * @name Functions with too many parameters
+ * @description Finds functions with many parameters;
+ * they could probably be refactored by wrapping parameters into a struct.
+ * @kind problem
+ * @id cpp/architecture/functions-with-many-parameters
+ * @problem.severity recommendation
+ * @tags testability
+ * statistical
+ * non-attributable
+ */
+
+import cpp
+
+from Function f
+where
+ f.fromSource() and
+ f.getMetrics().getNumberOfParameters() > 15
+select f,
+ "This function has too many parameters (" + f.getMetrics().getNumberOfParameters().toString() +
+ ")"
diff --git a/repo-tests/codeql/cpp/ql/src/Best Practices/BlockWithTooManyStatements.ql b/repo-tests/codeql/cpp/ql/src/Best Practices/BlockWithTooManyStatements.ql
new file mode 100644
index 00000000000..97481bc8b03
--- /dev/null
+++ b/repo-tests/codeql/cpp/ql/src/Best Practices/BlockWithTooManyStatements.ql
@@ -0,0 +1,35 @@
+/**
+ * @name Block with too many statements
+ * @description Blocks with too many consecutive statements are candidates for refactoring. Only complex statements are counted here (eg. for, while, switch ...). The top-level logic will be clearer if each complex statement is extracted to a function.
+ * @kind problem
+ * @problem.severity recommendation
+ * @precision high
+ * @id cpp/complex-block
+ * @tags testability
+ * readability
+ * maintainability
+ */
+
+import cpp
+
+class ComplexStmt extends Stmt {
+ ComplexStmt() {
+ exists(BlockStmt body |
+ body = this.(Loop).getStmt() or
+ body = this.(SwitchStmt).getStmt()
+ |
+ strictcount(body.getAStmt+()) > 6
+ ) and
+ not exists(this.getGeneratingMacro())
+ }
+}
+
+from BlockStmt b, int n, ComplexStmt complexStmt
+where
+ n = strictcount(ComplexStmt s | s = b.getAStmt()) and
+ n > 3 and
+ complexStmt = b.getAStmt()
+select b,
+ "Block with too many statements (" + n.toString() +
+ " complex statements in the block). Complex statements at: $@", complexStmt,
+ complexStmt.toString()
diff --git a/repo-tests/codeql/cpp/ql/src/Best Practices/ComplexCondition.ql b/repo-tests/codeql/cpp/ql/src/Best Practices/ComplexCondition.ql
new file mode 100644
index 00000000000..20ecd1b1ade
--- /dev/null
+++ b/repo-tests/codeql/cpp/ql/src/Best Practices/ComplexCondition.ql
@@ -0,0 +1,33 @@
+/**
+ * @name Complex condition
+ * @description Boolean expressions that are too deeply nested are hard to read and understand. Consider naming intermediate results as local variables.
+ * @kind problem
+ * @problem.severity recommendation
+ * @precision high
+ * @id cpp/complex-condition
+ * @tags testability
+ * readability
+ * maintainability
+ * statistical
+ * non-attributable
+ */
+
+import cpp
+
+predicate logicalOp(string op) { op = "&&" or op = "||" }
+
+predicate nontrivialLogicalOperator(Operation e) {
+ exists(string op |
+ op = e.getOperator() and
+ logicalOp(op) and
+ not op = e.getParent().(Operation).getOperator()
+ ) and
+ not e.isInMacroExpansion()
+}
+
+from Expr e, int operators
+where
+ not e.getParent() instanceof Expr and
+ operators = count(Operation op | op.getParent*() = e and nontrivialLogicalOperator(op)) and
+ operators > 5
+select e, "Complex condition: too many logical operations in this expression."
diff --git a/repo-tests/codeql/cpp/ql/src/Best Practices/Exceptions/AccidentalRethrow.ql b/repo-tests/codeql/cpp/ql/src/Best Practices/Exceptions/AccidentalRethrow.ql
new file mode 100644
index 00000000000..1e902579a49
--- /dev/null
+++ b/repo-tests/codeql/cpp/ql/src/Best Practices/Exceptions/AccidentalRethrow.ql
@@ -0,0 +1,26 @@
+/**
+ * @name Accidental rethrow
+ * @description When there is nothing to rethrow, attempting to rethrow an exception will terminate the program.
+ * @kind problem
+ * @problem.severity warning
+ * @precision high
+ * @id cpp/rethrow-no-exception
+ * @tags reliability
+ * correctness
+ * exceptions
+ */
+
+import cpp
+
+predicate isInCatch(Expr e) {
+ e.getEnclosingStmt().getParent*() instanceof CatchBlock // Lexically enclosing catch blocks will cause there to be a current exception,
+ or
+ exists(Function f | f = e.getEnclosingFunction() |
+ isInCatch(f.getACallToThisFunction()) or // as will dynamically enclosing catch blocks.
+ f.getName().toLowerCase().matches("%exception%") // We assume that rethrows are intended when the function is called *exception*.
+ )
+}
+
+from ReThrowExpr e
+where not isInCatch(e)
+select e, "As there is no current exception, this rethrow expression will terminate the program."
diff --git a/repo-tests/codeql/cpp/ql/src/Best Practices/Exceptions/CatchingByValue.ql b/repo-tests/codeql/cpp/ql/src/Best Practices/Exceptions/CatchingByValue.ql
new file mode 100644
index 00000000000..28e7541fc74
--- /dev/null
+++ b/repo-tests/codeql/cpp/ql/src/Best Practices/Exceptions/CatchingByValue.ql
@@ -0,0 +1,18 @@
+/**
+ * @name Catching by value
+ * @description Catching an exception by value will create a copy of the thrown exception, thereby potentially slicing the original exception object.
+ * @kind problem
+ * @problem.severity warning
+ * @precision very-high
+ * @id cpp/catch-by-value
+ * @tags efficiency
+ * correctness
+ * exceptions
+ */
+
+import cpp
+
+from CatchBlock cb, Class caughtType
+where caughtType = cb.getParameter().getUnspecifiedType()
+select cb,
+ "This should catch a " + caughtType.getName() + " by (const) reference rather than by value."
diff --git a/repo-tests/codeql/cpp/ql/src/Best Practices/Exceptions/LeakyCatch.ql b/repo-tests/codeql/cpp/ql/src/Best Practices/Exceptions/LeakyCatch.ql
new file mode 100644
index 00000000000..d3848962abd
--- /dev/null
+++ b/repo-tests/codeql/cpp/ql/src/Best Practices/Exceptions/LeakyCatch.ql
@@ -0,0 +1,50 @@
+/**
+ * @name Leaky catch
+ * @description If an exception is allocated on the heap, then it should be deleted when caught.
+ * @kind problem
+ * @problem.severity warning
+ * @precision high
+ * @id cpp/catch-missing-free
+ * @tags efficiency
+ * correctness
+ * exceptions
+ * external/cwe/cwe-401
+ */
+
+import cpp
+
+predicate doesRethrow(Function f) {
+ exists(ReThrowExpr e | e.getEnclosingFunction() = f |
+ not e.getEnclosingStmt().getParent*() instanceof CatchBlock
+ )
+ or
+ exists(FunctionCall fc | fc.getEnclosingFunction() = f | doesRethrow(fc.getTarget()))
+}
+
+predicate deletesException(Expr expr, Parameter exception) {
+ expr.getEnclosingBlock().getParent*().(CatchBlock).getParameter() = exception and
+ (
+ exists(FunctionCall fc | fc = expr |
+ // Calling a delete function on the exception will free it (MFC's CException has a Delete function).
+ fc.getQualifier() = exception.getAnAccess() and
+ fc.getTarget().getName().toLowerCase().matches("%delete%")
+ or
+ // Passing the exception to a function might free it.
+ fc.getAnArgument() = exception.getAnAccess()
+ or
+ // Calling a function which rethrows the current exception might cause the exception to be freed.
+ doesRethrow(fc.getTarget())
+ )
+ or
+ // Calling operator delete on the exception will free it.
+ exists(DeleteExpr d | d = expr | d.getExpr() = exception.getAnAccess())
+ )
+}
+
+from CatchBlock cb
+where
+ cb.getParameter().getType().getUnderlyingType() instanceof PointerType and
+ not exists(Expr e | e.getEnclosingBlock().getParent*() = cb |
+ deletesException(e, cb.getParameter())
+ )
+select cb, "This catch block does not free the caught exception, thereby leaking memory."
diff --git a/repo-tests/codeql/cpp/ql/src/Best Practices/Exceptions/ThrowingPointers.ql b/repo-tests/codeql/cpp/ql/src/Best Practices/Exceptions/ThrowingPointers.ql
new file mode 100644
index 00000000000..85c21a28f56
--- /dev/null
+++ b/repo-tests/codeql/cpp/ql/src/Best Practices/Exceptions/ThrowingPointers.ql
@@ -0,0 +1,21 @@
+/**
+ * @name Throwing pointers
+ * @description Exceptions should be objects rather than pointers to objects.
+ * @kind problem
+ * @problem.severity warning
+ * @precision high
+ * @id cpp/throwing-pointer
+ * @tags efficiency
+ * correctness
+ * exceptions
+ */
+
+import cpp
+
+from ThrowExpr throw, NewExpr new, Type t
+where
+ new.getParent() = throw and
+ // Microsoft MFC's CException hierarchy should be thrown (and caught) as pointers
+ t = new.getAllocatedType() and
+ not t.getUnderlyingType().(Class).getABaseClass*().hasName("CException")
+select throw, "This should throw a " + t.toString() + " rather than a pointer to one."
diff --git a/repo-tests/codeql/cpp/ql/src/Best Practices/Hiding/DeclarationHidesParameter.ql b/repo-tests/codeql/cpp/ql/src/Best Practices/Hiding/DeclarationHidesParameter.ql
new file mode 100644
index 00000000000..64e5760ed2b
--- /dev/null
+++ b/repo-tests/codeql/cpp/ql/src/Best Practices/Hiding/DeclarationHidesParameter.ql
@@ -0,0 +1,60 @@
+/**
+ * @name Declaration hides parameter
+ * @description A local variable hides a parameter. This may be confusing. Consider renaming one of them.
+ * @kind problem
+ * @problem.severity recommendation
+ * @precision very-high
+ * @id cpp/declaration-hides-parameter
+ * @tags maintainability
+ * readability
+ */
+
+import cpp
+
+/**
+ * Gets the template that a function `f` is constructed from, or just `f` if it
+ * is not from a template instantiation.
+ */
+Function getConstructedFrom(Function f) {
+ f.isConstructedFrom(result)
+ or
+ not f.isConstructedFrom(_) and
+ result = f
+}
+
+/**
+ * Gets the parameter of `f` with name `name`, which has to come from the
+ * _definition_ of `f` and not a prototype declaration.
+ * We also exclude names from functions that have multiple definitions.
+ * This should not happen in a single application but since we
+ * have a system wide view it is likely to happen for instance for
+ * the main function.
+ *
+ * Note: we use `getConstructedFrom` to ensure that we look at template
+ * functions rather than their instantiations. We get better results this way
+ * as the instantiation is artificial and may have inherited parameter names
+ * from the declaration rather than the definition.
+ */
+ParameterDeclarationEntry functionParameterNames(Function f, string name) {
+ exists(FunctionDeclarationEntry fe |
+ result.getFunctionDeclarationEntry() = fe and
+ getConstructedFrom(f).getDefinition() = fe and
+ fe.getLocation() = f.getDefinitionLocation() and
+ strictcount(f.getDefinitionLocation()) = 1 and
+ result.getName() = name
+ )
+}
+
+/** Gets a local variable in `f` with name `name`. */
+pragma[nomagic]
+LocalVariable localVariableNames(Function f, string name) {
+ name = result.getName() and
+ f = result.getFunction()
+}
+
+from Function f, LocalVariable lv, ParameterDeclarationEntry pde, string name
+where
+ lv = localVariableNames(f, name) and
+ pde = functionParameterNames(f, name) and
+ not lv.isInMacroExpansion()
+select lv, "Local variable '" + lv.getName() + "' hides a $@.", pde, "parameter of the same name"
diff --git a/repo-tests/codeql/cpp/ql/src/Best Practices/Hiding/DeclarationHidesVariable.ql b/repo-tests/codeql/cpp/ql/src/Best Practices/Hiding/DeclarationHidesVariable.ql
new file mode 100644
index 00000000000..d10d346c513
--- /dev/null
+++ b/repo-tests/codeql/cpp/ql/src/Best Practices/Hiding/DeclarationHidesVariable.ql
@@ -0,0 +1,23 @@
+/**
+ * @name Declaration hides variable
+ * @description A local variable hides another local variable from a surrounding scope. This may be confusing. Consider renaming one of the variables.
+ * @kind problem
+ * @problem.severity recommendation
+ * @precision high
+ * @id cpp/declaration-hides-variable
+ * @tags maintainability
+ * readability
+ */
+
+import cpp
+import Best_Practices.Hiding.Shadowing
+
+from LocalVariable lv1, LocalVariable lv2
+where
+ shadowing(lv1, lv2) and
+ not lv1.isCompilerGenerated() and
+ not lv2.isCompilerGenerated() and
+ not lv1.getParentScope().(BlockStmt).isInMacroExpansion() and
+ not lv2.getParentScope().(BlockStmt).isInMacroExpansion()
+select lv1, "Variable " + lv1.getName() + " hides another variable of the same name (on $@).", lv2,
+ "line " + lv2.getLocation().getStartLine().toString()
diff --git a/repo-tests/codeql/cpp/ql/src/Best Practices/Hiding/LocalVariableHidesGlobalVariable.ql b/repo-tests/codeql/cpp/ql/src/Best Practices/Hiding/LocalVariableHidesGlobalVariable.ql
new file mode 100644
index 00000000000..53c96c4beb7
--- /dev/null
+++ b/repo-tests/codeql/cpp/ql/src/Best Practices/Hiding/LocalVariableHidesGlobalVariable.ql
@@ -0,0 +1,38 @@
+/**
+ * @name Local variable hides global variable
+ * @description A local variable or parameter that hides a global variable of the same name. This may be confusing. Consider renaming one of the variables.
+ * @kind problem
+ * @problem.severity recommendation
+ * @precision very-high
+ * @id cpp/local-variable-hides-global-variable
+ * @tags maintainability
+ * readability
+ */
+
+import cpp
+
+class LocalVariableOrParameter extends VariableDeclarationEntry {
+ LocalVariableOrParameter() {
+ this.getVariable() instanceof LocalScopeVariable and
+ (
+ // we only need to report parameters hiding globals when the clash is with the parameter
+ // name as used in the function definition. The parameter name used in any other function
+ // declaration is harmless.
+ this instanceof ParameterDeclarationEntry
+ implies
+ exists(this.(ParameterDeclarationEntry).getFunctionDeclarationEntry().getBlock())
+ )
+ }
+
+ string type() {
+ if this.getVariable() instanceof Parameter
+ then result = "Parameter "
+ else result = "Local variable "
+ }
+}
+
+from LocalVariableOrParameter lv, GlobalVariable gv
+where
+ lv.getName() = gv.getName() and
+ lv.getFile() = gv.getFile()
+select lv, lv.type() + gv.getName() + " hides $@ with the same name.", gv, "a global variable"
diff --git a/repo-tests/codeql/cpp/ql/src/Best Practices/Hiding/Shadowing.qll b/repo-tests/codeql/cpp/ql/src/Best Practices/Hiding/Shadowing.qll
new file mode 100644
index 00000000000..a662e66558f
--- /dev/null
+++ b/repo-tests/codeql/cpp/ql/src/Best Practices/Hiding/Shadowing.qll
@@ -0,0 +1,32 @@
+import cpp
+
+predicate ancestorScope(Element b1, Element b2) { b1.getParentScope+() = b2 }
+
+pragma[noopt]
+predicate localVariablesSameNameInNestedScopes(LocalVariable lv1, LocalVariable lv2) {
+ exists(Element b1, Element b2 |
+ b1 = lv1.getParentScope() and
+ not b1 instanceof Namespace and
+ lv1 instanceof LocalVariable and
+ ancestorScope(b1, b2) and
+ not b2 instanceof Namespace and
+ b2 = lv2.getParentScope() and
+ lv2 instanceof LocalVariable and
+ lv1.getName() = lv2.getName()
+ )
+}
+
+predicate shadowing(LocalVariable lv1, LocalVariable lv2) {
+ localVariablesSameNameInNestedScopes(lv1, lv2) and
+ exists(Location l1, Location l2 |
+ l1 = lv1.getLocation() and
+ l2 = lv2.getLocation() and
+ (
+ // variables declared later in parent scope are not shadowed
+ l2.getEndLine() < l1.getStartLine()
+ or
+ l2.getEndLine() = l1.getStartLine() and
+ l2.getEndColumn() <= l1.getStartColumn()
+ )
+ )
+}
diff --git a/repo-tests/codeql/cpp/ql/src/Best Practices/Likely Errors/EmptyBlock.ql b/repo-tests/codeql/cpp/ql/src/Best Practices/Likely Errors/EmptyBlock.ql
new file mode 100644
index 00000000000..9fa8c4e5e3f
--- /dev/null
+++ b/repo-tests/codeql/cpp/ql/src/Best Practices/Likely Errors/EmptyBlock.ql
@@ -0,0 +1,114 @@
+/**
+ * @name Empty branch of conditional
+ * @description An empty block after a conditional can be a sign of an omission
+ * and can decrease maintainability of the code. Such blocks
+ * should contain an explanatory comment to aid future
+ * maintainers.
+ * @kind problem
+ * @problem.severity recommendation
+ * @precision very-high
+ * @id cpp/empty-block
+ * @tags reliability
+ * readability
+ */
+
+import cpp
+
+predicate emptyBlock(ControlStructure s, BlockStmt b) {
+ b = s.getAChild() and
+ not exists(b.getAChild()) and
+ not b.isInMacroExpansion() and
+ not s instanceof Loop
+}
+
+class AffectedFile extends File {
+ AffectedFile() {
+ exists(BlockStmt b |
+ emptyBlock(_, b) and
+ this = b.getFile()
+ )
+ }
+}
+
+/**
+ * A block, or an element we might find textually within a block that is
+ * not a child of it in the AST.
+ */
+class BlockOrNonChild extends Element {
+ BlockOrNonChild() {
+ (
+ this instanceof BlockStmt
+ or
+ this instanceof Comment
+ or
+ this instanceof PreprocessorDirective
+ or
+ this instanceof MacroInvocation
+ ) and
+ this.getFile() instanceof AffectedFile
+ }
+
+ private int getNonContiguousStartRankIn(AffectedFile file) {
+ // When using `rank` with `order by`, the ranks may not be contiguous.
+ this =
+ rank[result](BlockOrNonChild boc, int startLine, int startCol |
+ boc.getLocation().hasLocationInfo(file.getAbsolutePath(), startLine, startCol, _, _)
+ |
+ boc order by startLine, startCol
+ )
+ }
+
+ int getStartRankIn(AffectedFile file) {
+ this.getNonContiguousStartRankIn(file) =
+ rank[result](int rnk |
+ exists(BlockOrNonChild boc | boc.getNonContiguousStartRankIn(file) = rnk)
+ )
+ }
+
+ int getNonContiguousEndRankIn(AffectedFile file) {
+ this =
+ rank[result](BlockOrNonChild boc, int endLine, int endCol |
+ boc.getLocation().hasLocationInfo(file.getAbsolutePath(), _, _, endLine, endCol)
+ |
+ boc order by endLine, endCol
+ )
+ }
+}
+
+/**
+ * A block that contains a non-child element.
+ */
+predicate emptyBlockContainsNonchild(BlockStmt b) {
+ emptyBlock(_, b) and
+ exists(BlockOrNonChild c, AffectedFile file |
+ c.(BlockOrNonChild).getStartRankIn(file) = 1 + b.(BlockOrNonChild).getStartRankIn(file) and
+ c.(BlockOrNonChild).getNonContiguousEndRankIn(file) <
+ b.(BlockOrNonChild).getNonContiguousEndRankIn(file)
+ )
+}
+
+/**
+ * A block that is entirely on one line, which also contains a comment. Chances
+ * are the comment is intended to refer to the block.
+ */
+predicate lineComment(BlockStmt b) {
+ emptyBlock(_, b) and
+ exists(Location bLocation, File f, int line |
+ bLocation = b.getLocation() and
+ f = bLocation.getFile() and
+ line = bLocation.getStartLine() and
+ line = bLocation.getEndLine() and
+ exists(Comment c, Location cLocation |
+ cLocation = c.getLocation() and
+ cLocation.getFile() = f and
+ cLocation.getStartLine() = line
+ )
+ )
+}
+
+from ControlStructure s, BlockStmt eb
+where
+ emptyBlock(s, eb) and
+ not emptyBlockContainsNonchild(eb) and
+ not lineComment(eb)
+select eb, "Empty block without comment"
diff --git a/repo-tests/codeql/cpp/ql/src/Best Practices/Likely Errors/OffsetUseBeforeRangeCheck.ql b/repo-tests/codeql/cpp/ql/src/Best Practices/Likely Errors/OffsetUseBeforeRangeCheck.ql
new file mode 100644
index 00000000000..c8bf3842773
--- /dev/null
+++ b/repo-tests/codeql/cpp/ql/src/Best Practices/Likely Errors/OffsetUseBeforeRangeCheck.ql
@@ -0,0 +1,44 @@
+/**
+ * @name Array offset used before range check
+ * @description Accessing an array offset before checking the range means that
+ * the program may attempt to read beyond the end of a buffer
+ * @kind problem
+ * @id cpp/offset-use-before-range-check
+ * @problem.severity warning
+ * @security-severity 8.2
+ * @precision medium
+ * @tags reliability
+ * security
+ * external/cwe/cwe-120
+ * external/cwe/cwe-125
+ */
+
+import cpp
+
+predicate beforeArrayAccess(Variable v, ArrayExpr access, Expr before) {
+ exists(LogicalAndExpr andexpr |
+ access.getArrayOffset() = v.getAnAccess() and
+ andexpr.getRightOperand().getAChild*() = access and
+ andexpr.getLeftOperand() = before
+ )
+}
+
+predicate afterArrayAccess(Variable v, ArrayExpr access, Expr after) {
+ exists(LogicalAndExpr andexpr |
+ access.getArrayOffset() = v.getAnAccess() and
+ andexpr.getLeftOperand().getAChild*() = access and
+ andexpr.getRightOperand() = after
+ )
+}
+
+from Variable v, ArrayExpr access, LTExpr rangecheck
+where
+ afterArrayAccess(v, access, rangecheck) and
+ rangecheck.getLeftOperand() = v.getAnAccess() and
+ not access.isInMacroExpansion() and
+ not exists(LTExpr altcheck |
+ beforeArrayAccess(v, access, altcheck) and
+ altcheck.getLeftOperand() = v.getAnAccess()
+ )
+select access, "This use of offset '" + v.getName() + "' should follow the $@.", rangecheck,
+ "range check"
diff --git a/repo-tests/codeql/cpp/ql/src/Best Practices/Likely Errors/Slicing.ql b/repo-tests/codeql/cpp/ql/src/Best Practices/Likely Errors/Slicing.ql
new file mode 100644
index 00000000000..8da32dfe66d
--- /dev/null
+++ b/repo-tests/codeql/cpp/ql/src/Best Practices/Likely Errors/Slicing.ql
@@ -0,0 +1,25 @@
+/**
+ * @name Slicing
+ * @description Assigning a non-reference instance of a derived type to a variable of the base type slices off all members added by the derived class, and can cause an unexpected state.
+ * @kind problem
+ * @problem.severity warning
+ * @precision high
+ * @id cpp/slicing
+ * @tags reliability
+ * correctness
+ * types
+ */
+
+import cpp
+
+from AssignExpr e, Class lhsType, Class rhsType
+where
+ e.getLValue().getType() = lhsType and
+ e.getRValue().getType() = rhsType and
+ rhsType.getABaseClass+() = lhsType and
+ exists(Declaration m |
+ rhsType.getAMember() = m and
+ not m.(VirtualFunction).isPure()
+ ) // add additional checks for concrete members in in-between supertypes
+select e, "This assignment expression slices from type $@ to $@", rhsType, rhsType.getName(),
+ lhsType, lhsType.getName()
diff --git a/repo-tests/codeql/cpp/ql/src/Best Practices/Magic Constants/JapaneseEraDate.ql b/repo-tests/codeql/cpp/ql/src/Best Practices/Magic Constants/JapaneseEraDate.ql
new file mode 100644
index 00000000000..a81aa4fc12d
--- /dev/null
+++ b/repo-tests/codeql/cpp/ql/src/Best Practices/Magic Constants/JapaneseEraDate.ql
@@ -0,0 +1,73 @@
+/**
+ * @name Hard-coded Japanese era start date
+ * @description Japanese era changes can lead to code behaving differently. Avoid hard-coding Japanese era start dates.
+ * @kind problem
+ * @problem.severity warning
+ * @id cpp/japanese-era/exact-era-date
+ * @precision low
+ * @tags maintainability
+ * reliability
+ * japanese-era
+ */
+
+import cpp
+import semmle.code.cpp.commons.DateTime
+
+predicate assignedYear(Struct s, YearFieldAccess year, int value) {
+ exists(Operation yearAssignment |
+ s.getAField().getAnAccess() = year and
+ yearAssignment.getAnOperand() = year and
+ yearAssignment.getAnOperand().getValue().toInt() = value
+ )
+}
+
+predicate assignedMonth(Struct s, MonthFieldAccess month, int value) {
+ exists(Operation monthAssignment |
+ s.getAField().getAnAccess() = month and
+ monthAssignment.getAnOperand() = month and
+ monthAssignment.getAnOperand().getValue().toInt() = value
+ )
+}
+
+predicate assignedDay(Struct s, DayFieldAccess day, int value) {
+ exists(Operation dayAssignment |
+ s.getAField().getAnAccess() = day and
+ dayAssignment.getAnOperand() = day and
+ dayAssignment.getAnOperand().getValue().toInt() = value
+ )
+}
+
+predicate eraDate(int year, int month, int day) {
+ year = 1989 and month = 1 and day = 8
+ or
+ year = 2019 and month = 5 and day = 1
+}
+
+predicate badStructInitialization(Element target, string message) {
+ exists(
+ StructLikeClass s, YearFieldAccess year, MonthFieldAccess month, DayFieldAccess day,
+ int yearValue, int monthValue, int dayValue
+ |
+ eraDate(yearValue, monthValue, dayValue) and
+ assignedYear(s, year, yearValue) and
+ assignedMonth(s, month, monthValue) and
+ assignedDay(s, day, dayValue) and
+ target = year and
+ message = "A time struct that is initialized with exact Japanese calendar era start date."
+ )
+}
+
+predicate badCall(Element target, string message) {
+ exists(Call cc, int i |
+ eraDate(cc.getArgument(i).getValue().toInt(), cc.getArgument(i + 1).getValue().toInt(),
+ cc.getArgument(i + 2).getValue().toInt()) and
+ target = cc and
+ message = "Call that appears to have hard-coded Japanese era start date as parameter."
+ )
+}
+
+from Element target, string message
+where
+ badStructInitialization(target, message) or
+ badCall(target, message)
+select target, message
diff --git a/repo-tests/codeql/cpp/ql/src/Best Practices/Magic Constants/MagicConstants.qll b/repo-tests/codeql/cpp/ql/src/Best Practices/Magic Constants/MagicConstants.qll
new file mode 100644
index 00000000000..587b64b60b3
--- /dev/null
+++ b/repo-tests/codeql/cpp/ql/src/Best Practices/Magic Constants/MagicConstants.qll
@@ -0,0 +1,316 @@
+import cpp
+import semmle.code.cpp.AutogeneratedFile
+
+/*
+ * Counting nontrivial literal occurrences
+ */
+
+predicate trivialPositiveIntValue(string s) {
+ // Small numbers
+ s = [0 .. 20].toString() or
+ s =
+ [
+ // Popular powers of two (decimal)
+ "16", "24", "32", "64", "128", "256", "512", "1024", "2048", "4096", "16384", "32768",
+ "65536", "1048576", "2147483648", "4294967296",
+ // Popular powers of two, minus one (decimal)
+ "15", "31", "63", "127", "255", "511", "1023", "2047", "4095", "16383", "32767", "65535",
+ "1048577", "2147483647", "4294967295",
+ // Popular powers of two (32-bit hex)
+ "0x00000001", "0x00000002", "0x00000004", "0x00000008", "0x00000010", "0x00000020",
+ "0x00000040", "0x00000080", "0x00000100", "0x00000200", "0x00000400", "0x00000800",
+ "0x00001000", "0x00002000", "0x00004000", "0x00008000", "0x00010000", "0x00020000",
+ "0x00040000", "0x00080000", "0x00100000", "0x00200000", "0x00400000", "0x00800000",
+ "0x01000000", "0x02000000", "0x04000000", "0x08000000", "0x10000000", "0x20000000",
+ "0x40000000", "0x80000000",
+ // Popular powers of two, minus one (32-bit hex)
+ "0x00000001", "0x00000003", "0x00000007", "0x0000000f", "0x0000001f", "0x0000003f",
+ "0x0000007f", "0x000000ff", "0x000001ff", "0x000003ff", "0x000007ff", "0x00000fff",
+ "0x00001fff", "0x00003fff", "0x00007fff", "0x0000ffff", "0x0001ffff", "0x0003ffff",
+ "0x0007ffff", "0x000fffff", "0x001fffff", "0x003fffff", "0x007fffff", "0x00ffffff",
+ "0x01ffffff", "0x03ffffff", "0x07ffffff", "0x0fffffff", "0x1fffffff", "0x3fffffff",
+ "0x7fffffff", "0xffffffff",
+ // Popular powers of two (16-bit hex)
+ "0x0001", "0x0002", "0x0004", "0x0008", "0x0010", "0x0020", "0x0040", "0x0080", "0x0100",
+ "0x0200", "0x0400", "0x0800", "0x1000", "0x2000", "0x4000", "0x8000",
+ // Popular powers of two, minus one (16-bit hex)
+ "0x0001", "0x0003", "0x0007", "0x000f", "0x001f", "0x003f", "0x007f", "0x00ff", "0x01ff",
+ "0x03ff", "0x07ff", "0x0fff", "0x1fff", "0x3fff", "0x7fff", "0xffff",
+ // Popular powers of two (8-bit hex)
+ "0x01", "0x02", "0x04", "0x08", "0x10", "0x20", "0x40", "0x80",
+ // Popular powers of two, minus one (8-bit hex)
+ "0x01", "0x03", "0x07", "0x0f", "0x1f", "0x3f", "0x7f", "0xff", "0x00",
+ // Powers of ten
+ "10", "100", "1000", "10000", "100000", "1000000", "10000000", "100000000", "1000000000"
+ ]
+}
+
+predicate trivialIntValue(string s) {
+ trivialPositiveIntValue(s)
+ or
+ exists(string pos | trivialPositiveIntValue(pos) and s = "-" + pos)
+}
+
+predicate trivialLongValue(string s) { exists(string v | trivialIntValue(v) and s = v + "L") }
+
+predicate intTrivial(Literal lit) { exists(string v | trivialIntValue(v) and v = lit.getValue()) }
+
+predicate longTrivial(Literal lit) { exists(string v | trivialLongValue(v) and v = lit.getValue()) }
+
+predicate powerOfTen(float f) {
+ f = 10 or
+ f = 100 or
+ f = 1000 or
+ f = 10000 or
+ f = 100000 or
+ f = 1000000 or
+ f = 10000000 or
+ f = 100000000 or
+ f = 1000000000
+}
+
+predicate floatTrivial(Literal lit) {
+ lit.getType() instanceof FloatingPointType and
+ exists(string value, float f |
+ lit.getValue() = value and
+ f = value.toFloat() and
+ (f.abs() <= 20.0 or powerOfTen(f))
+ )
+}
+
+predicate charLiteral(Literal lit) { lit instanceof CharLiteral }
+
+Type literalType(Literal literal) { result = literal.getType() }
+
+predicate stringType(DerivedType t) {
+ t.getBaseType() instanceof CharType
+ or
+ exists(SpecifiedType constCharType |
+ t.getBaseType() = constCharType and
+ constCharType.isConst() and
+ constCharType.getBaseType() instanceof CharType
+ )
+}
+
+predicate numberType(Type t) { t instanceof FloatingPointType or t instanceof IntegralType }
+
+predicate stringLiteral(Literal literal) { literal instanceof StringLiteral }
+
+predicate stringTrivial(Literal lit) {
+ stringLiteral(lit) and
+ lit.getValue().length() < 8
+}
+
+predicate joiningStringTrivial(Literal lit) {
+ // We want to be more lenient with string literals that are being
+ // joined together, because replacing sentence fragments with named
+ // constants could actually result in code that is harder to
+ // understand (which is against the spirit of these queries).
+ stringLiteral(lit) and
+ exists(FunctionCall fc |
+ fc.getTarget().getName() = ["operator+", "operator<<"] and
+ fc.getAnArgument().getAChild*() = lit
+ ) and
+ lit.getValue().length() < 16
+}
+
+predicate small(Literal lit) { lit.getValue().length() <= 1 }
+
+predicate trivial(Literal lit) {
+ charLiteral(lit) or
+ intTrivial(lit) or
+ floatTrivial(lit) or
+ stringTrivial(lit) or
+ joiningStringTrivial(lit) or
+ longTrivial(lit) or
+ small(lit)
+}
+
+private predicate isReferenceTo(Variable ref, Variable to) {
+ exists(VariableAccess a |
+ ref.getInitializer().getExpr().getConversion().(ReferenceToExpr).getExpr() = a and
+ a.getTarget() = to
+ )
+}
+
+private predicate variableNotModifiedAfterInitializer(Variable v) {
+ not exists(VariableAccess a | a.getTarget() = v and a.isModified()) and
+ not exists(AddressOfExpr e | e.getAddressable() = v) and
+ forall(Variable v2 | isReferenceTo(v2, v) | variableNotModifiedAfterInitializer(v2))
+}
+
+predicate literalIsConstantInitializer(Literal literal, Variable f) {
+ f.getInitializer().getExpr() = literal and
+ variableNotModifiedAfterInitializer(f) and
+ not f instanceof Parameter
+}
+
+predicate literalIsEnumInitializer(Literal literal) {
+ exists(EnumConstant ec | ec.getInitializer().getExpr() = literal)
+}
+
+predicate literalInArrayInitializer(Literal literal) {
+ exists(AggregateLiteral arrayInit | arrayInitializerChild(arrayInit, literal))
+}
+
+predicate arrayInitializerChild(AggregateLiteral parent, Expr e) {
+ e = parent
+ or
+ exists(Expr mid | arrayInitializerChild(parent, mid) and e.getParent() = mid)
+}
+
+// i.e. not a constant folded expression
+predicate literallyLiteral(Literal lit) {
+ lit.getValueText()
+ .regexpMatch(".*\".*|\\s*+[-+]?+\\s*+(0[xob][0-9a-fA-F]|[0-9])[0-9a-fA-F,._]*+([eE][-+]?+[0-9,._]*+)?+\\s*+[a-zA-Z]*+\\s*+")
+}
+
+predicate nonTrivialValue(string value, Literal literal) {
+ value = literal.getValue() and
+ not trivial(literal) and
+ not literalIsConstantInitializer(literal, _) and
+ not literalIsEnumInitializer(literal) and
+ not literalInArrayInitializer(literal) and
+ not literal.isAffectedByMacro() and
+ literallyLiteral(literal)
+}
+
+predicate valueOccurrenceCount(string value, int n) {
+ n =
+ strictcount(Location loc |
+ exists(Literal lit | lit.getLocation() = loc | nonTrivialValue(value, lit)) and
+ // Exclude generated files (they do not have the same maintainability
+ // concerns as ordinary source files)
+ not loc.getFile() instanceof AutogeneratedFile
+ ) and
+ n > 20
+}
+
+predicate occurenceCount(Literal lit, string value, int n) {
+ valueOccurrenceCount(value, n) and
+ value = lit.getValue() and
+ nonTrivialValue(_, lit)
+}
+
+/*
+ * Literals repeated frequently
+ */
+
+predicate check(Literal lit, string value, int n, File f) {
+ // Check that the literal is nontrivial
+ not trivial(lit) and
+ // Check that it is repeated a number of times
+ occurenceCount(lit, value, n) and
+ n > 20 and
+ f = lit.getFile() and
+ // Exclude generated files
+ not f instanceof AutogeneratedFile
+}
+
+predicate checkWithFileCount(string value, int overallCount, int fileCount, File f) {
+ fileCount =
+ strictcount(Location loc |
+ exists(Literal lit | lit.getLocation() = loc | check(lit, value, overallCount, f))
+ )
+}
+
+predicate start(Literal lit, int startLine) {
+ exists(Location l | l = lit.getLocation() and startLine = l.getStartLine())
+}
+
+predicate firstOccurrence(Literal lit, string value, int n) {
+ exists(File f, int fileCount |
+ checkWithFileCount(value, n, fileCount, f) and
+ fileCount < 100 and
+ check(lit, value, n, f) and
+ not exists(Literal lit2, int start1, int start2 |
+ check(lit2, value, n, f) and
+ start(lit, start1) and
+ start(lit2, start2) and
+ start2 < start1
+ )
+ )
+}
+
+predicate magicConstant(Literal e, string msg) {
+ exists(string value, int n |
+ firstOccurrence(e, value, n) and
+ msg =
+ "Magic constant: literal '" + value + "' is repeated " + n.toString() +
+ " times and should be encapsulated in a constant."
+ )
+}
+
+/*
+ * Literals where there is a defined constant with the same value
+ */
+
+predicate relevantVariable(Variable f, string value) {
+ exists(Literal lit |
+ not trivial(lit) and value = lit.getValue() and literalIsConstantInitializer(lit, f)
+ )
+}
+
+predicate relevantCallable(Function f, string value) {
+ exists(Literal lit |
+ not trivial(lit) and value = lit.getValue() and lit.getEnclosingFunction() = f
+ )
+}
+
+predicate isVisible(Variable field, Function fromCallable) {
+ exists(string value |
+ //public fields
+ relevantVariable(field, value) and
+ field.(MemberVariable).isPublic() and
+ relevantCallable(fromCallable, value)
+ or
+ //in same class
+ relevantVariable(field, value) and
+ exists(Type t |
+ t = field.getDeclaringType() and
+ t = fromCallable.getDeclaringType()
+ ) and
+ relevantCallable(fromCallable, value)
+ or
+ //in subclass and not private
+ relevantVariable(field, value) and
+ not field.(MemberVariable).isPrivate() and
+ exists(Class sup, Class sub |
+ sup = field.getDeclaringType() and
+ sub.getABaseClass+() = sup and
+ sub = fromCallable.getDeclaringType()
+ ) and
+ relevantCallable(fromCallable, value)
+ )
+}
+
+predicate canUseFieldInsteadOfLiteral(Variable constField, Literal magicLiteral) {
+ exists(Literal initLiteral |
+ literalIsConstantInitializer(initLiteral, constField) and
+ not trivial(initLiteral) and
+ not constField.getType().hasName("boolean") and
+ exists(string value |
+ value = initLiteral.getValue() and
+ magicLiteral.getValue() = value
+ ) and
+ constField.getType() = magicLiteral.getType() and
+ not literalIsConstantInitializer(magicLiteral, _) and
+ exists(Function c |
+ c = magicLiteral.getEnclosingFunction() and
+ (
+ constField.isTopLevel() and
+ (not constField.isStatic() or constField.getFile() = c.getFile())
+ or
+ isVisible(constField, c)
+ )
+ )
+ )
+}
+
+predicate literalInsteadOfConstant(
+ Literal magicLiteral, string message, Variable constField, string linkText
+) {
+ canUseFieldInsteadOfLiteral(constField, magicLiteral) and
+ message = "Literal value '" + magicLiteral.getValue() + "' used instead of constant $@." and
+ linkText = constField.getName()
+}
diff --git a/repo-tests/codeql/cpp/ql/src/Best Practices/Magic Constants/MagicConstantsNumbers.ql b/repo-tests/codeql/cpp/ql/src/Best Practices/Magic Constants/MagicConstantsNumbers.ql
new file mode 100644
index 00000000000..830bb18bdef
--- /dev/null
+++ b/repo-tests/codeql/cpp/ql/src/Best Practices/Magic Constants/MagicConstantsNumbers.ql
@@ -0,0 +1,24 @@
+/**
+ * @name Magic numbers
+ * @description 'Magic constants' should be avoided: if a nontrivial constant is used repeatedly, it should be encapsulated into a const variable or macro definition.
+ * @kind problem
+ * @id cpp/magic-number
+ * @problem.severity recommendation
+ * @precision medium
+ * @tags maintainability
+ * statistical
+ * non-attributable
+ */
+
+import cpp
+import MagicConstants
+
+pragma[noopt]
+predicate selection(Element e, string msg) {
+ magicConstant(e, msg) and
+ exists(Literal l, Type t | l = e and t = l.getType() and numberType(t) and l instanceof Literal)
+}
+
+from Literal e, string msg
+where selection(e, msg)
+select e, msg
diff --git a/repo-tests/codeql/cpp/ql/src/Best Practices/Magic Constants/MagicConstantsString.ql b/repo-tests/codeql/cpp/ql/src/Best Practices/Magic Constants/MagicConstantsString.ql
new file mode 100644
index 00000000000..6f26e6f43f2
--- /dev/null
+++ b/repo-tests/codeql/cpp/ql/src/Best Practices/Magic Constants/MagicConstantsString.ql
@@ -0,0 +1,21 @@
+/**
+ * @name Magic strings
+ * @description 'Magic constants' should be avoided: if a nontrivial constant is used repeatedly, it should be encapsulated into a const variable or macro definition.
+ * @kind problem
+ * @id cpp/magic-string
+ * @problem.severity recommendation
+ * @precision medium
+ * @tags maintainability
+ * statistical
+ * non-attributable
+ */
+
+import cpp
+import MagicConstants
+
+pragma[noopt]
+predicate selection(Element e, string msg) { magicConstant(e, msg) and stringLiteral(e) }
+
+from Literal e, string msg
+where selection(e, msg)
+select e, msg
diff --git a/repo-tests/codeql/cpp/ql/src/Best Practices/Magic Constants/MagicNumbersUseConstant.ql b/repo-tests/codeql/cpp/ql/src/Best Practices/Magic Constants/MagicNumbersUseConstant.ql
new file mode 100644
index 00000000000..2f569fb9fd0
--- /dev/null
+++ b/repo-tests/codeql/cpp/ql/src/Best Practices/Magic Constants/MagicNumbersUseConstant.ql
@@ -0,0 +1,18 @@
+/**
+ * @name Magic numbers: use defined constant
+ * @description A numeric literal that matches the initializer of a constant variable was found. Consider using the constant variable instead of the numeric literal.
+ * @kind problem
+ * @id cpp/use-number-constant
+ * @problem.severity recommendation
+ * @precision low
+ * @tags maintainability
+ */
+
+import cpp
+import MagicConstants
+
+from Literal magicLiteral, string message, Variable constant, string linkText
+where
+ numberType(magicLiteral.getType()) and
+ literalInsteadOfConstant(magicLiteral, message, constant, linkText)
+select magicLiteral, message, constant, linkText
diff --git a/repo-tests/codeql/cpp/ql/src/Best Practices/Magic Constants/MagicStringsUseConstant.ql b/repo-tests/codeql/cpp/ql/src/Best Practices/Magic Constants/MagicStringsUseConstant.ql
new file mode 100644
index 00000000000..f2c205e6c2a
--- /dev/null
+++ b/repo-tests/codeql/cpp/ql/src/Best Practices/Magic Constants/MagicStringsUseConstant.ql
@@ -0,0 +1,18 @@
+/**
+ * @name Magic strings: use defined constant
+ * @description A string literal that matches the initializer of a constant variable was found. Consider using the constant variable instead of the string literal.
+ * @kind problem
+ * @id cpp/use-string-constant
+ * @problem.severity recommendation
+ * @precision low
+ * @tags maintainability
+ */
+
+import cpp
+import MagicConstants
+
+from Literal magicLiteral, string message, Variable constant, string linkText
+where
+ stringLiteral(magicLiteral) and
+ literalInsteadOfConstant(magicLiteral, message, constant, linkText)
+select magicLiteral, message, constant, linkText
diff --git a/repo-tests/codeql/cpp/ql/src/Best Practices/NVI.ql b/repo-tests/codeql/cpp/ql/src/Best Practices/NVI.ql
new file mode 100644
index 00000000000..a25cb172fb1
--- /dev/null
+++ b/repo-tests/codeql/cpp/ql/src/Best Practices/NVI.ql
@@ -0,0 +1,21 @@
+/**
+ * @name Public virtual method
+ * @description When public methods can be overridden, base classes are unable
+ * to enforce invariants that should hold for the whole hierarchy.
+ * @kind problem
+ * @id cpp/nvi
+ * @problem.severity recommendation
+ * @precision low
+ * @tags maintainability
+ */
+
+import cpp
+
+//see http://www.gotw.ca/publications/mill18.htm
+from MemberFunction f
+where
+ f.hasSpecifier("public") and
+ f.hasSpecifier("virtual") and
+ f.getFile().fromSource() and
+ not f instanceof Destructor
+select f, "Avoid having public virtual methods (NVI idiom)"
diff --git a/repo-tests/codeql/cpp/ql/src/Best Practices/NVIHub.ql b/repo-tests/codeql/cpp/ql/src/Best Practices/NVIHub.ql
new file mode 100644
index 00000000000..330ccb8882f
--- /dev/null
+++ b/repo-tests/codeql/cpp/ql/src/Best Practices/NVIHub.ql
@@ -0,0 +1,26 @@
+/**
+ * @name Public virtual method in Hub Class
+ * @description When public methods can be overridden, base classes are unable
+ * to enforce invariants that should hold for the whole hierarchy.
+ * This is especially problematic in classes with many
+ * dependencies or dependents.
+ * @kind problem
+ * @id cpp/nvi-hub
+ * @problem.severity recommendation
+ * @precision low
+ * @tags maintainability
+ */
+
+import cpp
+
+//see http://www.gotw.ca/publications/mill18.htm
+from MemberFunction f, int hubIndex, Class fclass
+where
+ f.hasSpecifier("public") and
+ f.hasSpecifier("virtual") and
+ f.getFile().fromSource() and
+ not f instanceof Destructor and
+ fclass = f.getDeclaringType() and
+ hubIndex = fclass.getMetrics().getAfferentCoupling() * fclass.getMetrics().getEfferentCoupling() and
+ hubIndex > 100
+select f, "Avoid having public virtual methods (NVI idiom)"
diff --git a/repo-tests/codeql/cpp/ql/src/Best Practices/RuleOfThree.ql b/repo-tests/codeql/cpp/ql/src/Best Practices/RuleOfThree.ql
new file mode 100644
index 00000000000..b03085c13aa
--- /dev/null
+++ b/repo-tests/codeql/cpp/ql/src/Best Practices/RuleOfThree.ql
@@ -0,0 +1,31 @@
+/**
+ * @name Rule of three
+ * @description Classes that have an explicit destructor, copy constructor, or
+ * copy assignment operator may behave inconsistently if they do
+ * not have all three.
+ * @kind problem
+ * @id cpp/rule-of-three
+ * @problem.severity warning
+ * @tags reliability
+ */
+
+import cpp
+
+class BigThree extends MemberFunction {
+ BigThree() {
+ this instanceof Destructor or
+ this instanceof CopyConstructor or
+ this instanceof CopyAssignmentOperator
+ }
+}
+
+from Class c, BigThree b
+where
+ b.getDeclaringType() = c and
+ not (
+ c.hasDestructor() and
+ c.getAMemberFunction() instanceof CopyConstructor and
+ c.getAMemberFunction() instanceof CopyAssignmentOperator
+ )
+select c,
+ "Class defines a destructor, copy constructor, or copy assignment operator, but not all three."
diff --git a/repo-tests/codeql/cpp/ql/src/Best Practices/RuleOfTwo.ql b/repo-tests/codeql/cpp/ql/src/Best Practices/RuleOfTwo.ql
new file mode 100644
index 00000000000..efc30c0829e
--- /dev/null
+++ b/repo-tests/codeql/cpp/ql/src/Best Practices/RuleOfTwo.ql
@@ -0,0 +1,66 @@
+/**
+ * @name Inconsistent definition of copy constructor and assignment ('Rule of Two')
+ * @description Classes that have an explicit copy constructor or copy
+ * assignment operator may behave inconsistently if they do
+ * not have both.
+ * @kind problem
+ * @problem.severity warning
+ * @precision high
+ * @id cpp/rule-of-two
+ * @tags reliability
+ * readability
+ * language-features
+ */
+
+import cpp
+
+// This query enforces the Rule of Two, which is a conservative variation of
+// the more well-known Rule of Three.
+//
+// The Rule of Two is usually phrased informally, ignoring the distinction
+// between whether a member is missing because it's auto-generated (missing
+// from the source) or missing because it can't be called (missing from the
+// generated code).
+//
+// This query checks if one member is explicitly defined while the other is
+// auto-generated. This can lead to memory safety issues. It's a separate issue
+// whether one is callable while the other is not callable; that is an API
+// design question and carries has no safety risk.
+predicate generatedCopyAssignment(CopyConstructor cc, string msg) {
+ cc.getDeclaringType().hasImplicitCopyAssignmentOperator() and
+ msg =
+ "No matching copy assignment operator in class " + cc.getDeclaringType().getName() +
+ ". It is good practice to match a copy constructor with a " + "copy assignment operator."
+}
+
+predicate generatedCopyConstructor(CopyAssignmentOperator ca, string msg) {
+ ca.getDeclaringType().hasImplicitCopyConstructor() and
+ msg =
+ "No matching copy constructor in class " + ca.getDeclaringType().getName() +
+ ". It is good practice to match a copy assignment operator with a " + "copy constructor."
+}
+
+from MemberFunction f, string msg
+where
+ (
+ generatedCopyAssignment(f, msg) or
+ generatedCopyConstructor(f, msg)
+ ) and
+ // Ignore template instantiations to prevent an explosion of alerts
+ not f.getDeclaringType().isConstructedFrom(_) and
+ // Ignore private members since a private constructor or assignment operator
+ // is a common idiom that simulates suppressing the default-generated
+ // members. It would be better to use C++11's "delete" facility or use
+ // appropriate Boost helper classes, but it is too common to report as a
+ // violation.
+ not f.isPrivate() and
+ // If it is truly user-defined then it must have a body. This leaves out
+ // C++11 members that use `= delete` or `= default`.
+ exists(f.getBlock()) and
+ // In rare cases, the extractor pretends that an auto-generated copy
+ // constructor has a block that is one character long and is located on top
+ // of the first character of the class name. Checking for
+ // `isCompilerGenerated` will remove those results.
+ not f.isCompilerGenerated() and
+ not f.isDeleted()
+select f, msg
diff --git a/repo-tests/codeql/cpp/ql/src/Best Practices/SloppyGlobal.ql b/repo-tests/codeql/cpp/ql/src/Best Practices/SloppyGlobal.ql
new file mode 100644
index 00000000000..050590c1816
--- /dev/null
+++ b/repo-tests/codeql/cpp/ql/src/Best Practices/SloppyGlobal.ql
@@ -0,0 +1,19 @@
+/**
+ * @name Short global name
+ * @description Global variables should have descriptive names, to help document their use, avoid namespace pollution and reduce the risk of shadowing with local variables.
+ * @kind problem
+ * @problem.severity recommendation
+ * @precision very-high
+ * @id cpp/short-global-name
+ * @tags maintainability
+ */
+
+import cpp
+
+from GlobalVariable gv
+where
+ gv.getName().length() <= 3 and
+ not gv.isStatic()
+select gv,
+ "Poor global variable name '" + gv.getName() +
+ "'. Prefer longer, descriptive names for globals (eg. kMyGlobalConstant, not foo)."
diff --git a/repo-tests/codeql/cpp/ql/src/Best Practices/SwitchLongCase.ql b/repo-tests/codeql/cpp/ql/src/Best Practices/SwitchLongCase.ql
new file mode 100644
index 00000000000..98fa39f66fd
--- /dev/null
+++ b/repo-tests/codeql/cpp/ql/src/Best Practices/SwitchLongCase.ql
@@ -0,0 +1,42 @@
+/**
+ * @name Long switch case
+ * @description A switch statement with too much code in its cases can make the control flow hard to follow. Consider wrapping the code for each case in a function and just using the switch statement to invoke the appropriate function in each case.
+ * @kind problem
+ * @problem.severity recommendation
+ * @precision high
+ * @id cpp/long-switch
+ * @tags maintainability
+ * readability
+ */
+
+import cpp
+
+predicate switchCaseStartLine(SwitchCase sc, int start) { sc.getLocation().getStartLine() = start }
+
+predicate switchStmtEndLine(SwitchStmt s, int start) { s.getLocation().getEndLine() = start }
+
+predicate switchCaseLength(SwitchCase sc, int length) {
+ exists(SwitchCase next, int l1, int l2 |
+ next = sc.getNextSwitchCase() and
+ switchCaseStartLine(next, l1) and
+ switchCaseStartLine(sc, l2) and
+ length = l1 - l2 - 1
+ )
+ or
+ not exists(sc.getNextSwitchCase()) and
+ exists(int l1, int l2 |
+ switchStmtEndLine(sc.getSwitchStmt(), l1) and
+ switchCaseStartLine(sc, l2) and
+ length = l1 - l2 - 1
+ )
+}
+
+predicate tooLong(SwitchCase sc) { exists(int n | switchCaseLength(sc, n) and n > 30) }
+
+from SwitchStmt switch, SwitchCase sc, int lines
+where
+ sc = switch.getASwitchCase() and
+ tooLong(sc) and
+ switchCaseLength(sc, lines)
+select switch, "Switch has at least one case that is too long: $@", sc,
+ sc.getExpr().toString() + " (" + lines.toString() + " lines)"
diff --git a/repo-tests/codeql/cpp/ql/src/Best Practices/Unused Entities/UnusedIncludes.ql b/repo-tests/codeql/cpp/ql/src/Best Practices/Unused Entities/UnusedIncludes.ql
new file mode 100644
index 00000000000..288e4e5328b
--- /dev/null
+++ b/repo-tests/codeql/cpp/ql/src/Best Practices/Unused Entities/UnusedIncludes.ql
@@ -0,0 +1,29 @@
+/**
+ * @name Unused include
+ * @description Finds #include directives that are not needed because none of
+ * the included elements are used.
+ * @kind problem
+ * @id cpp/unused-includes
+ * @problem.severity recommendation
+ * @precision low
+ * @tags maintainability
+ * useless-code
+ */
+
+import cpp
+
+File sourceFile() {
+ result instanceof CFile or
+ result instanceof CppFile
+}
+
+from Include include, File source, File unneeded
+where
+ include.getFile() = source and
+ source = sourceFile() and
+ unneeded = include.getIncludedFile() and
+ not unneeded.getAnIncludedFile*() = source.getMetrics().getAFileDependency() and
+ unneeded.fromSource() and
+ not unneeded.getBaseName().matches("%Debug.h")
+select include, "Redundant include, this file does not require $@.", unneeded,
+ unneeded.getAbsolutePath()
diff --git a/repo-tests/codeql/cpp/ql/src/Best Practices/Unused Entities/UnusedLocals.ql b/repo-tests/codeql/cpp/ql/src/Best Practices/Unused Entities/UnusedLocals.ql
new file mode 100644
index 00000000000..0d58bc1ef4c
--- /dev/null
+++ b/repo-tests/codeql/cpp/ql/src/Best Practices/Unused Entities/UnusedLocals.ql
@@ -0,0 +1,68 @@
+/**
+ * @name Unused local variable
+ * @description A local variable that is never called or accessed may be an
+ * indication that the code is incomplete or has a typo.
+ * @kind problem
+ * @problem.severity recommendation
+ * @precision high
+ * @id cpp/unused-local-variable
+ * @tags maintainability
+ * useless-code
+ * external/cwe/cwe-563
+ */
+
+import cpp
+
+/**
+ * A type that contains a template parameter type
+ * (doesn't count pointers or references).
+ *
+ * These types may have a constructor / destructor when they are
+ * instantiated, that is not visible in their template form.
+ *
+ * Such types include template parameters, classes with a member variable
+ * of template parameter type, and classes that derive from other such
+ * classes.
+ */
+class TemplateDependentType extends Type {
+ TemplateDependentType() {
+ this instanceof TemplateParameter
+ or
+ exists(TemplateDependentType t |
+ this.refersToDirectly(t) and
+ not this instanceof PointerType and
+ not this instanceof ReferenceType
+ )
+ }
+}
+
+/**
+ * A variable whose declaration has, or may have, side effects.
+ */
+predicate declarationHasSideEffects(Variable v) {
+ exists(Class c | c = v.getUnspecifiedType() |
+ c.hasConstructor() or
+ c.hasDestructor()
+ )
+ or
+ v.getType() instanceof TemplateDependentType // may have a constructor/destructor
+}
+
+from LocalVariable v, Function f
+where
+ f = v.getFunction() and
+ not exists(v.getAnAccess()) and
+ not v.isConst() and // workaround for folded constants
+ not exists(DeclStmt ds | ds.getADeclaration() = v and ds.isInMacroExpansion()) and // variable declared in a macro expansion
+ not declarationHasSideEffects(v) and
+ not exists(AsmStmt s | f = s.getEnclosingFunction()) and
+ not v.getAnAttribute().getName() = "unused" and
+ not any(ErrorExpr e).getEnclosingFunction() = f and // unextracted expr may use `v`
+ not exists(
+ Literal l // this case can be removed when the `myFunction2( [obj](){} );` test case doesn't depend on this exclusion
+ |
+ l.getEnclosingFunction() = f and
+ not exists(l.getValue())
+ ) and
+ not any(ConditionDeclExpr cde).getEnclosingFunction() = f // this case can be removed when the `if (a = b; a)` test case doesn't depend on this exclusion
+select v, "Variable " + v.getName() + " is not used"
diff --git a/repo-tests/codeql/cpp/ql/src/Best Practices/Unused Entities/UnusedStaticFunctions.ql b/repo-tests/codeql/cpp/ql/src/Best Practices/Unused Entities/UnusedStaticFunctions.ql
new file mode 100644
index 00000000000..2d7649d534e
--- /dev/null
+++ b/repo-tests/codeql/cpp/ql/src/Best Practices/Unused Entities/UnusedStaticFunctions.ql
@@ -0,0 +1,102 @@
+/**
+ * @name Unused static function
+ * @description A static function that is never called or accessed may be an
+ * indication that the code is incomplete or has a typo.
+ * @kind problem
+ * @problem.severity recommendation
+ * @precision high
+ * @id cpp/unused-static-function
+ * @tags efficiency
+ * useless-code
+ * external/cwe/cwe-561
+ */
+
+import cpp
+
+predicate immediatelyReachableFunction(Function f) {
+ not f.isStatic() or
+ exists(BlockExpr be | be.getFunction() = f) or
+ f instanceof MemberFunction or
+ f instanceof TemplateFunction or
+ f.getFile() instanceof HeaderFile or
+ f.getAnAttribute().hasName("constructor") or
+ f.getAnAttribute().hasName("destructor") or
+ f.getAnAttribute().hasName("used") or
+ f.getAnAttribute().hasName("unused")
+}
+
+predicate immediatelyReachableVariable(Variable v) {
+ v.isTopLevel() and not v.isStatic()
+ or
+ exists(v.getDeclaringType())
+ or
+ v.getFile() instanceof HeaderFile
+ or
+ v.getAnAttribute().hasName("used")
+ or
+ v.getAnAttribute().hasName("unused")
+}
+
+class ImmediatelyReachableThing extends Thing {
+ ImmediatelyReachableThing() {
+ immediatelyReachableFunction(this) or
+ immediatelyReachableVariable(this)
+ }
+}
+
+predicate reachableThing(Thing t) {
+ t instanceof ImmediatelyReachableThing
+ or
+ exists(Thing mid | reachableThing(mid) and mid.callsOrAccesses() = t)
+}
+
+class Thing extends Locatable {
+ Thing() {
+ this instanceof Function or
+ this instanceof Variable
+ }
+
+ string getName() {
+ result = this.(Function).getName() or
+ result = this.(Variable).getName()
+ }
+
+ Thing callsOrAccesses() {
+ this.(Function).calls(result.(Function))
+ or
+ this.(Function).accesses(result.(Function))
+ or
+ this.(Function).accesses(result.(Variable))
+ or
+ exists(Access a | this.(Variable).getInitializer().getExpr().getAChild*() = a |
+ result = a.getTarget()
+ )
+ }
+}
+
+class FunctionToRemove extends Function {
+ FunctionToRemove() {
+ this.hasDefinition() and
+ not reachableThing(this)
+ }
+
+ Thing getOther() {
+ result.callsOrAccesses+() = this and
+ this != result and
+ // We will already be reporting the enclosing function of a
+ // local variable, so don't also report the variable
+ not result instanceof LocalVariable
+ }
+}
+
+from FunctionToRemove f, string clarification, Thing other
+where
+ if exists(f.getOther())
+ then (
+ clarification = " ($@ must be removed at the same time)" and
+ other = f.getOther()
+ ) else (
+ clarification = "" and other = f
+ )
+select f, "Static function " + f.getName() + " is unreachable" + clarification, other,
+ other.getName()
diff --git a/repo-tests/codeql/cpp/ql/src/Best Practices/Unused Entities/UnusedStaticVariables.ql b/repo-tests/codeql/cpp/ql/src/Best Practices/Unused Entities/UnusedStaticVariables.ql
new file mode 100644
index 00000000000..3ad43998d18
--- /dev/null
+++ b/repo-tests/codeql/cpp/ql/src/Best Practices/Unused Entities/UnusedStaticVariables.ql
@@ -0,0 +1,30 @@
+/**
+ * @name Unused static variable
+ * @description A static variable that is never accessed may be an indication
+ * that the code is incomplete or has a typo.
+ * @kind problem
+ * @problem.severity recommendation
+ * @precision high
+ * @id cpp/unused-static-variable
+ * @tags efficiency
+ * useless-code
+ * external/cwe/cwe-563
+ */
+
+import cpp
+
+predicate declarationHasSideEffects(Variable v) {
+ exists(Class c | c = v.getUnspecifiedType() | c.hasConstructor() or c.hasDestructor())
+}
+
+from Variable v
+where
+ v.isStatic() and
+ v.hasDefinition() and
+ not v.isConstexpr() and
+ not exists(VariableAccess a | a.getTarget() = v) and
+ not v instanceof MemberVariable and
+ not declarationHasSideEffects(v) and
+ not v.getAnAttribute().hasName("used") and
+ not v.getAnAttribute().hasName("unused")
+select v, "Static variable " + v.getName() + " is never read"
diff --git a/repo-tests/codeql/cpp/ql/src/Best Practices/UseOfGoto.ql b/repo-tests/codeql/cpp/ql/src/Best Practices/UseOfGoto.ql
new file mode 100644
index 00000000000..a1dcddc0123
--- /dev/null
+++ b/repo-tests/codeql/cpp/ql/src/Best Practices/UseOfGoto.ql
@@ -0,0 +1,42 @@
+/**
+ * @name Use of goto
+ * @description The goto statement can make the control flow of a function hard
+ * to understand, when used for purposes other than error handling.
+ * @kind problem
+ * @problem.severity warning
+ * @precision high
+ * @id cpp/use-of-goto
+ * @tags maintainability
+ * readability
+ * language-features
+ */
+
+import cpp
+
+class JumpTarget extends Stmt {
+ JumpTarget() { exists(GotoStmt g | g.getTarget() = this) }
+
+ FunctionDeclarationEntry getFDE() { result.getBlock() = this.getParentStmt+() }
+
+ predicate isForward() {
+ exists(GotoStmt g | g.getTarget() = this |
+ g.getLocation().getEndLine() < this.getLocation().getStartLine()
+ )
+ }
+
+ predicate isBackward() {
+ exists(GotoStmt g | g.getTarget() = this |
+ this.getLocation().getEndLine() < g.getLocation().getStartLine()
+ )
+ }
+}
+
+from FunctionDeclarationEntry fde, int nforward, int nbackward
+where
+ nforward = strictcount(JumpTarget t | t.getFDE() = fde and t.isForward()) and
+ nbackward = strictcount(JumpTarget t | t.getFDE() = fde and t.isBackward()) and
+ nforward != 1 and
+ nbackward != 1
+select fde,
+ "Multiple forward and backward goto statements may make function " + fde.getName() +
+ " hard to understand."
diff --git a/repo-tests/codeql/cpp/ql/src/Critical/DeadCodeCondition.ql b/repo-tests/codeql/cpp/ql/src/Critical/DeadCodeCondition.ql
new file mode 100644
index 00000000000..a769d3d4025
--- /dev/null
+++ b/repo-tests/codeql/cpp/ql/src/Critical/DeadCodeCondition.ql
@@ -0,0 +1,68 @@
+/**
+ * @name Branching condition always evaluates to same value
+ * @description The condition of the branching statement always evaluates to the same value. This means that only one branch will ever be executed.
+ * @kind problem
+ * @id cpp/dead-code-condition
+ * @problem.severity warning
+ * @tags reliability
+ * external/cwe/cwe-561
+ */
+
+import cpp
+
+predicate testAndBranch(Expr e, Stmt branch) {
+ exists(IfStmt ifstmt |
+ ifstmt.getCondition() = e and
+ (ifstmt.getThen() = branch or ifstmt.getElse() = branch)
+ )
+ or
+ exists(WhileStmt while |
+ while.getCondition() = e and
+ while.getStmt() = branch
+ )
+}
+
+predicate choice(StackVariable v, Stmt branch, string value) {
+ exists(AnalysedExpr e |
+ testAndBranch(e, branch) and
+ (
+ e.getNullSuccessor(v) = branch and value = "null"
+ or
+ e.getNonNullSuccessor(v) = branch and value = "non-null"
+ )
+ )
+}
+
+predicate guarded(StackVariable v, Stmt loopstart, AnalysedExpr child) {
+ choice(v, loopstart, _) and
+ loopstart.getChildStmt*() = child.getEnclosingStmt() and
+ (definition(v, child) or exists(child.getNullSuccessor(v)))
+}
+
+predicate addressLeak(Variable v, Stmt leak) {
+ exists(VariableAccess access |
+ v.getAnAccess() = access and
+ access.getEnclosingStmt() = leak and
+ access.isAddressOfAccess()
+ )
+}
+
+from StackVariable v, Stmt branch, AnalysedExpr cond, string context, string test, string testresult
+where
+ choice(v, branch, context) and
+ forall(ControlFlowNode def | definition(v, def) and definitionReaches(def, cond) |
+ not guarded(v, branch, def)
+ ) and
+ not cond.isDef(v) and
+ guarded(v, branch, cond) and
+ exists(cond.getNullSuccessor(v)) and
+ not addressLeak(v, branch.getChildStmt*()) and
+ (
+ cond.isNullCheck(v) and test = "null"
+ or
+ cond.isValidCheck(v) and test = "non-null"
+ ) and
+ (if context = test then testresult = "succeed" else testresult = "fail")
+select cond,
+ "Variable '" + v.getName() + "' is always " + context + " here, this check will always " +
+ testresult + "."
diff --git a/repo-tests/codeql/cpp/ql/src/Critical/DeadCodeFunction.ql b/repo-tests/codeql/cpp/ql/src/Critical/DeadCodeFunction.ql
new file mode 100644
index 00000000000..413bcc15eaf
--- /dev/null
+++ b/repo-tests/codeql/cpp/ql/src/Critical/DeadCodeFunction.ql
@@ -0,0 +1,36 @@
+/**
+ * @name Function is never called
+ * @description Unused functions may increase object size, decrease readability, and create the possibility of misuse.
+ * @kind problem
+ * @id cpp/dead-code-function
+ * @problem.severity warning
+ * @tags maintainability
+ * external/cwe/cwe-561
+ */
+
+import cpp
+
+predicate limitedScope(Function f) {
+ f.isStatic() and not f instanceof MemberFunction
+ or
+ f.(MemberFunction).isPrivate()
+}
+
+predicate uncalled(Function f) {
+ limitedScope(f) and
+ not exists(Function g | g = f or g = f.(VirtualFunction).getAnOverriddenFunction+() |
+ exists(g.getACallToThisFunction()) or
+ exists(FunctionAccess fa | fa.getTarget() = g)
+ )
+}
+
+from Function f
+where
+ uncalled(f) and
+ forall(Function instance | f.(TemplateFunction).getAnInstantiation() = instance |
+ uncalled(instance)
+ ) and
+ // tweaks for good results:
+ exists(f.getBlock()) and
+ not (f instanceof Constructor or f instanceof Destructor or f.hasName("operator="))
+select f, "Dead Code: this function is never called."
diff --git a/repo-tests/codeql/cpp/ql/src/Critical/DeadCodeGoto.ql b/repo-tests/codeql/cpp/ql/src/Critical/DeadCodeGoto.ql
new file mode 100644
index 00000000000..a2f8db771b0
--- /dev/null
+++ b/repo-tests/codeql/cpp/ql/src/Critical/DeadCodeGoto.ql
@@ -0,0 +1,37 @@
+/**
+ * @name Dead code due to goto or break statement
+ * @description A goto or break statement is followed by unreachable code.
+ * @kind problem
+ * @problem.severity warning
+ * @precision high
+ * @id cpp/dead-code-goto
+ * @tags maintainability
+ * external/cwe/cwe-561
+ */
+
+import cpp
+import semmle.code.cpp.commons.Exclusions
+
+Stmt getNextRealStmt(BlockStmt b, int i) {
+ result = b.getStmt(i + 1) and
+ not result instanceof EmptyStmt
+ or
+ b.getStmt(i + 1) instanceof EmptyStmt and
+ result = getNextRealStmt(b, i + 1)
+}
+
+from JumpStmt js, BlockStmt b, int i, Stmt s
+where
+ b.getStmt(i) = js and
+ s = getNextRealStmt(b, i) and
+ // the next statement isn't jumped to
+ not s instanceof LabelStmt and
+ not s instanceof SwitchCase and
+ // the next statement isn't breaking out of a switch
+ not s.(BreakStmt).getBreakable() instanceof SwitchStmt and
+ // the next statement isn't a loop that can be jumped into
+ not exists(LabelStmt ls | s.(Loop).getStmt().getAChild*() = ls) and
+ not exists(SwitchCase sc | s.(Loop).getStmt().getAChild*() = sc) and
+ // no preprocessor logic applies
+ not functionContainsPreprocCode(js.getEnclosingFunction())
+select js, "This statement makes $@ unreachable.", s, s.toString()
diff --git a/repo-tests/codeql/cpp/ql/src/Critical/DescriptorMayNotBeClosed.ql b/repo-tests/codeql/cpp/ql/src/Critical/DescriptorMayNotBeClosed.ql
new file mode 100644
index 00000000000..3ef487fbec2
--- /dev/null
+++ b/repo-tests/codeql/cpp/ql/src/Critical/DescriptorMayNotBeClosed.ql
@@ -0,0 +1,61 @@
+/**
+ * @name Open descriptor may not be closed
+ * @description Failing to close resources in the function that opened them makes it difficult to avoid and detect resource leaks.
+ * @kind problem
+ * @id cpp/descriptor-may-not-be-closed
+ * @problem.severity warning
+ * @security-severity 7.8
+ * @tags efficiency
+ * security
+ * external/cwe/cwe-775
+ */
+
+import semmle.code.cpp.pointsto.PointsTo
+import Negativity
+
+predicate closeCall(FunctionCall fc, Variable v) {
+ fc.getTarget().hasGlobalOrStdName("close") and v.getAnAccess() = fc.getArgument(0)
+ or
+ exists(FunctionCall midcall, Function mid, int arg |
+ fc.getArgument(arg) = v.getAnAccess() and
+ fc.getTarget() = mid and
+ midcall.getEnclosingFunction() = mid and
+ closeCall(midcall, mid.getParameter(arg))
+ )
+}
+
+predicate openDefinition(StackVariable v, ControlFlowNode def) {
+ exists(Expr expr | exprDefinition(v, def, expr) and allocateDescriptorCall(expr))
+}
+
+predicate openReaches(ControlFlowNode def, ControlFlowNode node) {
+ exists(StackVariable v | openDefinition(v, def) and node = def.getASuccessor())
+ or
+ exists(StackVariable v, ControlFlowNode mid |
+ openDefinition(v, def) and
+ openReaches(def, mid) and
+ not errorSuccessor(v, mid) and
+ not closeCall(mid, v) and
+ not assignedToFieldOrGlobal(v, mid) and
+ node = mid.getASuccessor()
+ )
+}
+
+predicate assignedToFieldOrGlobal(StackVariable v, Assignment assign) {
+ exists(Variable external |
+ assign.getRValue() = v.getAnAccess() and
+ assign.getLValue().(VariableAccess).getTarget() = external and
+ (external instanceof Field or external instanceof GlobalVariable)
+ )
+}
+
+from StackVariable v, ControlFlowNode def, ReturnStmt ret
+where
+ openDefinition(v, def) and
+ openReaches(def, ret) and
+ checkedSuccess(v, ret) and
+ not ret.getExpr().getAChild*() = v.getAnAccess() and
+ exists(ReturnStmt other | other.getExpr() = v.getAnAccess())
+select ret,
+ "Descriptor assigned to '" + v.getName().toString() + "' (line " +
+ def.getLocation().getStartLine().toString() + ") may not be closed."
diff --git a/repo-tests/codeql/cpp/ql/src/Critical/DescriptorNeverClosed.ql b/repo-tests/codeql/cpp/ql/src/Critical/DescriptorNeverClosed.ql
new file mode 100644
index 00000000000..85e41ad1928
--- /dev/null
+++ b/repo-tests/codeql/cpp/ql/src/Critical/DescriptorNeverClosed.ql
@@ -0,0 +1,32 @@
+/**
+ * @name Open descriptor never closed
+ * @description Functions that always return before closing the socket they opened leak resources.
+ * @kind problem
+ * @id cpp/descriptor-never-closed
+ * @problem.severity warning
+ * @security-severity 7.8
+ * @tags efficiency
+ * security
+ * external/cwe/cwe-775
+ */
+
+import semmle.code.cpp.pointsto.PointsTo
+
+predicate closed(Expr e) {
+ exists(FunctionCall fc |
+ fc.getTarget().hasGlobalOrStdName("close") and
+ fc.getArgument(0) = e
+ )
+}
+
+class ClosedExpr extends PointsToExpr {
+ ClosedExpr() { closed(this) }
+
+ override predicate interesting() { closed(this) }
+}
+
+from Expr alloc
+where
+ allocateDescriptorCall(alloc) and
+ not exists(ClosedExpr closed | closed.pointsTo() = alloc)
+select alloc, "This file descriptor is never closed"
diff --git a/repo-tests/codeql/cpp/ql/src/Critical/FileClosed.qll b/repo-tests/codeql/cpp/ql/src/Critical/FileClosed.qll
new file mode 100644
index 00000000000..e2ba3c25b81
--- /dev/null
+++ b/repo-tests/codeql/cpp/ql/src/Critical/FileClosed.qll
@@ -0,0 +1,27 @@
+import semmle.code.cpp.pointsto.PointsTo
+
+/** Holds if there exists a call to a function that might close the file specified by `e`. */
+predicate closed(Expr e) {
+ fcloseCall(_, e) or
+ exists(ExprCall c |
+ // cautiously assume that any ExprCall could be a call to fclose.
+ c.getAnArgument() = e
+ )
+}
+
+/** An expression for which there exists a function call that might close it. */
+class ClosedExpr extends PointsToExpr {
+ ClosedExpr() { closed(this) }
+
+ override predicate interesting() { closed(this) }
+}
+
+/**
+ * Holds if `fc` is a call to a function that opens a file that might be closed. For example:
+ * ```
+ * FILE* f = fopen("file.txt", "r");
+ * ...
+ * fclose(f);
+ * ```
+ */
+predicate fopenCallMayBeClosed(FunctionCall fc) { fopenCall(fc) and anythingPointsTo(fc) }
diff --git a/repo-tests/codeql/cpp/ql/src/Critical/FileMayNotBeClosed.ql b/repo-tests/codeql/cpp/ql/src/Critical/FileMayNotBeClosed.ql
new file mode 100644
index 00000000000..af38b437778
--- /dev/null
+++ b/repo-tests/codeql/cpp/ql/src/Critical/FileMayNotBeClosed.ql
@@ -0,0 +1,167 @@
+/**
+ * @name Open file may not be closed
+ * @description A function may return before closing a file that was opened in the function. Closing resources in the same function that opened them ties the lifetime of the resource to that of the function call, making it easier to avoid and detect resource leaks.
+ * @kind problem
+ * @id cpp/file-may-not-be-closed
+ * @problem.severity warning
+ * @security-severity 7.8
+ * @tags efficiency
+ * security
+ * external/cwe/cwe-775
+ */
+
+import FileClosed
+import semmle.code.cpp.controlflow.StackVariableReachability
+
+/**
+ * Extend the NullValue class used by Nullness.qll to include simple -1 as a 'null' value
+ * (for example 'open' returns -1 if there was an error)
+ */
+class MinusOne extends NullValue {
+ MinusOne() { this.(UnaryMinusExpr).getOperand().(Literal).getValue() = "1" }
+}
+
+/**
+ * 'call' is either a direct call to f, or a possible call to f
+ * via a function pointer.
+ */
+predicate mayCallFunction(Expr call, Function f) {
+ call.(FunctionCall).getTarget() = f or
+ call.(VariableCall).getVariable().getAnAssignedValue().getAChild*().(FunctionAccess).getTarget() =
+ f
+}
+
+predicate fopenCallOrIndirect(Expr e) {
+ // direct fopen call
+ fopenCall(e) and
+ // We are only interested in fopen calls that are
+ // actually closed somehow, as FileNeverClosed
+ // will catch those that aren't.
+ fopenCallMayBeClosed(e)
+ or
+ exists(ReturnStmt rtn |
+ // indirect fopen call
+ mayCallFunction(e, rtn.getEnclosingFunction()) and
+ (
+ // return fopen
+ fopenCallOrIndirect(rtn.getExpr())
+ or
+ // return variable assigned with fopen
+ exists(Variable v |
+ v = rtn.getExpr().(VariableAccess).getTarget() and
+ fopenCallOrIndirect(v.getAnAssignedValue()) and
+ not assignedToFieldOrGlobal(v, _)
+ )
+ )
+ )
+}
+
+predicate fcloseCallOrIndirect(FunctionCall fc, Variable v) {
+ // direct fclose call
+ fcloseCall(fc, v.getAnAccess())
+ or
+ // indirect fclose call
+ exists(FunctionCall midcall, Function mid, int arg |
+ fc.getArgument(arg) = v.getAnAccess() and
+ mayCallFunction(fc, mid) and
+ midcall.getEnclosingFunction() = mid and
+ fcloseCallOrIndirect(midcall, mid.getParameter(arg))
+ )
+}
+
+predicate fopenDefinition(StackVariable v, ControlFlowNode def) {
+ exists(Expr expr | exprDefinition(v, def, expr) and fopenCallOrIndirect(expr))
+}
+
+class FOpenVariableReachability extends StackVariableReachabilityWithReassignment {
+ FOpenVariableReachability() { this = "FOpenVariableReachability" }
+
+ override predicate isSourceActual(ControlFlowNode node, StackVariable v) {
+ fopenDefinition(v, node)
+ }
+
+ override predicate isSinkActual(ControlFlowNode node, StackVariable v) {
+ // node may be used in fopenReaches
+ exists(node.(AnalysedExpr).getNullSuccessor(v)) or
+ fcloseCallOrIndirect(node, v) or
+ assignedToFieldOrGlobal(v, node) or
+ // node may be used directly in query
+ v.getFunction() = node.(ReturnStmt).getEnclosingFunction()
+ }
+
+ override predicate isBarrier(ControlFlowNode node, StackVariable v) { definitionBarrier(v, node) }
+}
+
+/**
+ * The value from fopen at `def` is still held in Variable `v` upon entering `node`.
+ */
+predicate fopenVariableReaches(StackVariable v, ControlFlowNode def, ControlFlowNode node) {
+ exists(FOpenVariableReachability r |
+ // reachability
+ r.reachesTo(def, _, node, v)
+ or
+ // accept def node itself
+ r.isSource(def, v) and
+ node = def
+ )
+}
+
+class FOpenReachability extends StackVariableReachabilityExt {
+ FOpenReachability() { this = "FOpenReachability" }
+
+ override predicate isSource(ControlFlowNode node, StackVariable v) { fopenDefinition(v, node) }
+
+ override predicate isSink(ControlFlowNode node, StackVariable v) {
+ v.getFunction() = node.(ReturnStmt).getEnclosingFunction()
+ }
+
+ override predicate isBarrier(
+ ControlFlowNode source, ControlFlowNode node, ControlFlowNode next, StackVariable v
+ ) {
+ isSource(source, v) and
+ next = node.getASuccessor() and
+ // the file (stored in any variable `v0`) opened at `source` is closed or
+ // assigned to a global at node, or NULL checked on the edge node -> next.
+ exists(StackVariable v0 | fopenVariableReaches(v0, source, node) |
+ node.(AnalysedExpr).getNullSuccessor(v0) = next or
+ fcloseCallOrIndirect(node, v0) or
+ assignedToFieldOrGlobal(v0, node)
+ )
+ }
+}
+
+/**
+ * The value returned by fopen `def` has not been closed, confirmed to be null,
+ * or potentially leaked globally upon reaching `node` (regardless of what variable
+ * it's still held in, if any).
+ */
+predicate fopenReaches(ControlFlowNode def, ControlFlowNode node) {
+ exists(FOpenReachability r | r.reaches(def, _, node))
+}
+
+predicate assignedToFieldOrGlobal(StackVariable v, Expr e) {
+ // assigned to anything except a StackVariable
+ // (typically a field or global, but for example also *ptr = v)
+ e.(Assignment).getRValue() = v.getAnAccess() and
+ not e.(Assignment).getLValue().(VariableAccess).getTarget() instanceof StackVariable
+ or
+ exists(Expr midExpr, Function mid, int arg |
+ // indirect assignment
+ e.(FunctionCall).getArgument(arg) = v.getAnAccess() and
+ mayCallFunction(e, mid) and
+ midExpr.getEnclosingFunction() = mid and
+ assignedToFieldOrGlobal(mid.getParameter(arg), midExpr)
+ )
+ or
+ // assigned to a field via constructor field initializer
+ e.(ConstructorFieldInit).getExpr() = v.getAnAccess()
+}
+
+from ControlFlowNode def, ReturnStmt ret
+where
+ fopenReaches(def, ret) and
+ not exists(StackVariable v |
+ fopenVariableReaches(v, def, ret) and
+ ret.getAChild*() = v.getAnAccess()
+ )
+select def, "The file opened here may not be closed at $@.", ret, "this exit point"
diff --git a/repo-tests/codeql/cpp/ql/src/Critical/FileNeverClosed.ql b/repo-tests/codeql/cpp/ql/src/Critical/FileNeverClosed.ql
new file mode 100644
index 00000000000..b9e71978359
--- /dev/null
+++ b/repo-tests/codeql/cpp/ql/src/Critical/FileNeverClosed.ql
@@ -0,0 +1,17 @@
+/**
+ * @name Open file is not closed
+ * @description A function always returns before closing a file that was opened in the function. Closing resources in the same function that opened them ties the lifetime of the resource to that of the function call, making it easier to avoid and detect resource leaks.
+ * @kind problem
+ * @id cpp/file-never-closed
+ * @problem.severity warning
+ * @security-severity 7.8
+ * @tags efficiency
+ * security
+ * external/cwe/cwe-775
+ */
+
+import FileClosed
+
+from Expr alloc
+where fopenCall(alloc) and not fopenCallMayBeClosed(alloc)
+select alloc, "The file is never closed"
diff --git a/repo-tests/codeql/cpp/ql/src/Critical/GlobalUseBeforeInit.ql b/repo-tests/codeql/cpp/ql/src/Critical/GlobalUseBeforeInit.ql
new file mode 100644
index 00000000000..6c3435eeba9
--- /dev/null
+++ b/repo-tests/codeql/cpp/ql/src/Critical/GlobalUseBeforeInit.ql
@@ -0,0 +1,107 @@
+/**
+ * @name Global variable may be used before initialization
+ * @description Using an uninitialized variable may lead to undefined results.
+ * @kind problem
+ * @id cpp/global-use-before-init
+ * @problem.severity warning
+ * @security-severity 7.8
+ * @tags reliability
+ * security
+ * external/cwe/cwe-457
+ */
+
+import cpp
+import semmle.code.cpp.pointsto.CallGraph
+
+predicate initFunc(GlobalVariable v, Function f) {
+ exists(VariableAccess access |
+ v.getAnAccess() = access and
+ access.isUsedAsLValue() and
+ access.getEnclosingFunction() = f
+ )
+}
+
+predicate useFunc(GlobalVariable v, Function f) {
+ exists(VariableAccess access |
+ v.getAnAccess() = access and
+ access.isRValue() and
+ access.getEnclosingFunction() = f
+ ) and
+ not initFunc(v, f)
+}
+
+predicate uninitialisedBefore(GlobalVariable v, Function f) {
+ f.hasGlobalName("main")
+ or
+ exists(Call call, Function g |
+ uninitialisedBefore(v, g) and
+ call.getEnclosingFunction() = g and
+ (not functionInitialises(f, v) or locallyUninitialisedAt(v, call)) and
+ resolvedCall(call, f)
+ )
+}
+
+predicate functionInitialises(Function f, GlobalVariable v) {
+ exists(Call call |
+ call.getEnclosingFunction() = f and
+ initialisedBy(v, call)
+ )
+}
+
+// this predicate is restricted to global variables used in the
+// same function as "call"
+predicate locallyUninitialisedAt(GlobalVariable v, Call call) {
+ functionInitialises(call.getEnclosingFunction(), v) and
+ (
+ firstCall(call)
+ or
+ exists(Call mid |
+ locallyUninitialisedAt(v, mid) and not initialisedBy(v, mid) and callPair(mid, call)
+ )
+ )
+}
+
+predicate initialisedBy(GlobalVariable v, Call call) {
+ exists(Function f |
+ resolvedCall(call, f) and
+ initialises(v, f)
+ )
+}
+
+predicate initialises(GlobalVariable v, Function f) {
+ initFunc(v, f)
+ or
+ exists(Function mid | initialises(v, mid) and allCalls(f, mid))
+}
+
+predicate firstCall(Call call) { beforeCall(call) }
+
+predicate beforeCall(ControlFlowNode node) {
+ exists(Function f | f.getBlock() = node)
+ or
+ exists(ControlFlowNode mid |
+ beforeCall(mid) and
+ not mid instanceof Call and
+ node = mid.getASuccessor()
+ )
+}
+
+predicate callPair(Call call, Call successor) { callReaches(call, successor) }
+
+predicate callReaches(Call call, ControlFlowNode successor) {
+ call.getASuccessor() = successor
+ or
+ exists(ControlFlowNode mid |
+ callReaches(call, mid) and
+ not mid instanceof Call and
+ mid.getASuccessor() = successor
+ )
+}
+
+from GlobalVariable v, Function f
+where
+ uninitialisedBefore(v, f) and
+ useFunc(v, f)
+select f,
+ "The variable '" + v.getName() +
+ " is used in this function but may not be initialized when it is called."
diff --git a/repo-tests/codeql/cpp/ql/src/Critical/InconsistentNullnessTesting.ql b/repo-tests/codeql/cpp/ql/src/Critical/InconsistentNullnessTesting.ql
new file mode 100644
index 00000000000..da64be1fdb9
--- /dev/null
+++ b/repo-tests/codeql/cpp/ql/src/Critical/InconsistentNullnessTesting.ql
@@ -0,0 +1,30 @@
+/**
+ * @name Inconsistent null check of pointer
+ * @description A dereferenced pointer is not checked for nullness in this location, but it is checked in other locations. Dereferencing a null pointer leads to undefined results.
+ * @kind problem
+ * @id cpp/inconsistent-nullness-testing
+ * @problem.severity warning
+ * @security-severity 7.5
+ * @tags reliability
+ * security
+ * external/cwe/cwe-476
+ */
+
+import cpp
+
+from StackVariable v, ControlFlowNode def, VariableAccess checked, VariableAccess unchecked
+where
+ checked = v.getAnAccess() and
+ dereferenced(checked) and
+ unchecked = v.getAnAccess() and
+ dereferenced(unchecked) and
+ definitionUsePair(v, def, checked) and
+ definitionUsePair(v, def, unchecked) and
+ checkedValid(v, checked) and
+ not checkedValid(v, unchecked) and
+ not unchecked.getParent+() instanceof SizeofOperator and
+ forall(ControlFlowNode other | definitionUsePair(v, other, checked) |
+ definitionUsePair(v, other, unchecked)
+ )
+select unchecked,
+ "This dereference is not guarded by a non-null check, whereas other dereferences are guarded"
diff --git a/repo-tests/codeql/cpp/ql/src/Critical/InitialisationNotRun.ql b/repo-tests/codeql/cpp/ql/src/Critical/InitialisationNotRun.ql
new file mode 100644
index 00000000000..ba575c55921
--- /dev/null
+++ b/repo-tests/codeql/cpp/ql/src/Critical/InitialisationNotRun.ql
@@ -0,0 +1,43 @@
+/**
+ * @name Initialization code not run
+ * @description Not running initialization code may lead to unexpected behavior.
+ * @kind problem
+ * @id cpp/initialization-not-run
+ * @problem.severity warning
+ * @security-severity 7.5
+ * @tags reliability
+ * security
+ * external/cwe/cwe-456
+ */
+
+import cpp
+import semmle.code.cpp.pointsto.CallGraph
+
+predicate global(GlobalVariable v) {
+ not exists(v.getInitializer()) and
+ not v.getType() instanceof ArrayType and
+ not v.getType() instanceof Class and
+ v.getAnAccess().isUsedAsLValue()
+}
+
+predicate mainCalled(Function f) {
+ f.hasGlobalName("main")
+ or
+ exists(Function caller | mainCalled(caller) and allCalls(caller, f))
+}
+
+predicate called(Function f) {
+ mainCalled(f)
+ or
+ exists(FunctionAccess fa | fa.getTarget() = f)
+}
+
+from GlobalVariable v
+where
+ global(v) and
+ not exists(VariableAccess lval |
+ v.getAnAccess() = lval and
+ lval.isUsedAsLValue() and
+ called(lval.getEnclosingFunction())
+ )
+select v, "Initialization code for '" + v.getName() + "' is never run."
diff --git a/repo-tests/codeql/cpp/ql/src/Critical/LargeParameter.ql b/repo-tests/codeql/cpp/ql/src/Critical/LargeParameter.ql
new file mode 100644
index 00000000000..2df622a4c81
--- /dev/null
+++ b/repo-tests/codeql/cpp/ql/src/Critical/LargeParameter.ql
@@ -0,0 +1,41 @@
+/**
+ * @name Large object passed by value
+ * @description An object larger than 64 bytes is passed by value to a function. Passing large objects by value unnecessarily use up scarce stack space, increase the cost of calling a function and can be a security risk. Use a const pointer to the object instead.
+ * @kind problem
+ * @problem.severity recommendation
+ * @precision very-high
+ * @id cpp/large-parameter
+ * @tags efficiency
+ * readability
+ * statistical
+ * non-attributable
+ */
+
+import cpp
+import semmle.code.cpp.dataflow.EscapesTree
+
+from Function f, Parameter p, Type t, int size
+where
+ f.getAParameter() = p and
+ p.getType() = t and
+ t.getSize() = size and
+ size > 64 and
+ not t.getUnderlyingType() instanceof ArrayType and
+ not f instanceof CopyAssignmentOperator and
+ // exception: p is written to, which may mean the copy is intended
+ not p.getAnAccess().isAddressOfAccessNonConst() and
+ not exists(Expr e |
+ variableAccessedAsValue(p.getAnAccess(), e.getFullyConverted()) and
+ (
+ exists(Assignment an | an.getLValue() = e)
+ or
+ exists(CrementOperation co | co.getOperand() = e)
+ or
+ exists(FunctionCall fc | fc.getQualifier() = e and not fc.getTarget().hasSpecifier("const"))
+ )
+ ) and
+ // if there's no block, we can't tell how the parameter is used
+ exists(f.getBlock())
+select p,
+ "This parameter of type $@ is " + size.toString() +
+ " bytes - consider passing a const pointer/reference instead.", t, t.toString()
diff --git a/repo-tests/codeql/cpp/ql/src/Critical/LateNegativeTest.ql b/repo-tests/codeql/cpp/ql/src/Critical/LateNegativeTest.ql
new file mode 100644
index 00000000000..5de36fcc5a9
--- /dev/null
+++ b/repo-tests/codeql/cpp/ql/src/Critical/LateNegativeTest.ql
@@ -0,0 +1,52 @@
+/**
+ * @name Pointer offset used before it is checked
+ * @description Accessing a pointer or array using an offset before
+ * checking if the value is positive
+ * may result in unexpected behavior.
+ * @kind problem
+ * @id cpp/late-negative-test
+ * @problem.severity warning
+ * @security-severity 9.3
+ * @tags reliability
+ * security
+ * external/cwe/cwe-823
+ */
+
+import cpp
+
+predicate negativeCheck(StackVariable v, ComparisonOperation op) {
+ exists(int varindex, string constant, Literal lit |
+ op.getChild(varindex) = v.getAnAccess() and
+ op.getChild(1 - varindex) = lit and
+ lit.getValue() = constant and
+ (
+ op.getOperator() = "<" and varindex = 0 and constant = "0"
+ or
+ op.getOperator() = "<" and varindex = 1 and constant = "-1"
+ or
+ op.getOperator() = ">" and varindex = 0 and constant = "-1"
+ or
+ op.getOperator() = ">" and varindex = 1 and constant = "0"
+ or
+ op.getOperator() = "<=" and varindex = 0 and constant = "-1"
+ or
+ op.getOperator() = "<=" and varindex = 1 and constant = "0"
+ or
+ op.getOperator() = ">=" and varindex = 0 and constant = "0"
+ or
+ op.getOperator() = ">=" and varindex = 1 and constant = "-1"
+ )
+ )
+}
+
+from StackVariable v, ArrayExpr dangerous, Expr check
+where
+ useUsePair(v, dangerous.getArrayOffset(), check.getAChild()) and
+ negativeCheck(v, check) and
+ not exists(Expr other |
+ negativeCheck(v, other) and useUsePair(v, other.getAChild(), dangerous.getArrayOffset())
+ )
+select dangerous,
+ "Variable '" + v.getName() +
+ "' is used as an array-offset before it is tested for being negative (test on line " +
+ check.getLocation().getStartLine().toString() + "). "
diff --git a/repo-tests/codeql/cpp/ql/src/Critical/LoopBounds.qll b/repo-tests/codeql/cpp/ql/src/Critical/LoopBounds.qll
new file mode 100644
index 00000000000..b2cde8d6fe8
--- /dev/null
+++ b/repo-tests/codeql/cpp/ql/src/Critical/LoopBounds.qll
@@ -0,0 +1,66 @@
+/** Provides helpers for OverflowStatic.ql */
+
+import cpp
+
+/**
+ * An assignment to a variable with the value `0`. For example:
+ * ```
+ * int x;
+ * x = 0;
+ * ```
+ * but not:
+ * ```
+ * int x = 0;
+ * ```
+ */
+class ZeroAssignment extends AssignExpr {
+ ZeroAssignment() {
+ this.getAnOperand() instanceof VariableAccess and
+ this.getAnOperand() instanceof Zero
+ }
+
+ /** Gets a variable that is assigned the value `0`. */
+ Variable assignedVariable() { result.getAnAccess() = this.getAnOperand() }
+}
+
+private predicate staticLimit(RelationalOperation op, Variable v, int limit) {
+ op instanceof LTExpr and
+ op.getLeftOperand() = v.getAnAccess() and
+ op.getRightOperand().getValue().toInt() - 1 = limit
+ or
+ op instanceof LEExpr and
+ op.getLeftOperand() = v.getAnAccess() and
+ op.getRightOperand().getValue().toInt() = limit
+}
+
+private predicate simpleInc(IncrementOperation inc, Variable v) {
+ inc.getAChild() = v.getAnAccess()
+}
+
+/**
+ * A `for` loop of the form `for (x = 0; x < limit; x++)` with no modification
+ * of `x` in the body. Variations with `<=` and `++x` are allowed.
+ */
+class ClassicForLoop extends ForStmt {
+ ClassicForLoop() {
+ exists(LocalVariable v |
+ this.getInitialization().getAChild() instanceof ZeroAssignment and
+ staticLimit(this.getCondition(), v, _) and
+ simpleInc(this.getUpdate(), v) and
+ not exists(VariableAccess access |
+ access.isUsedAsLValue() and
+ v.getAnAccess() = access and
+ this.getStmt().getAChild*() = access.getEnclosingStmt()
+ )
+ )
+ }
+
+ /** Gets the loop variable. */
+ LocalVariable counter() { simpleInc(this.getUpdate(), result) }
+
+ /**
+ * Gets the maximum value that the loop variable may have inside the loop
+ * body. The minimum is 0.
+ */
+ int limit() { staticLimit(this.getCondition(), _, result) }
+}
diff --git a/repo-tests/codeql/cpp/ql/src/Critical/MemoryFreed.qll b/repo-tests/codeql/cpp/ql/src/Critical/MemoryFreed.qll
new file mode 100644
index 00000000000..44557503e43
--- /dev/null
+++ b/repo-tests/codeql/cpp/ql/src/Critical/MemoryFreed.qll
@@ -0,0 +1,27 @@
+import semmle.code.cpp.pointsto.PointsTo
+
+private predicate freed(Expr e) {
+ e = any(DeallocationExpr de).getFreedExpr()
+ or
+ exists(ExprCall c |
+ // cautiously assume that any `ExprCall` could be a deallocation expression.
+ c.getAnArgument() = e
+ )
+}
+
+/** An expression that might be deallocated. */
+class FreedExpr extends PointsToExpr {
+ FreedExpr() { freed(this) }
+
+ override predicate interesting() { freed(this) }
+}
+
+/**
+ * An allocation expression that might be deallocated. For example:
+ * ```
+ * int* p = new int;
+ * ...
+ * delete p;
+ * ```
+ */
+predicate allocMayBeFreed(AllocationExpr alloc) { anythingPointsTo(alloc) }
diff --git a/repo-tests/codeql/cpp/ql/src/Critical/MemoryMayNotBeFreed.ql b/repo-tests/codeql/cpp/ql/src/Critical/MemoryMayNotBeFreed.ql
new file mode 100644
index 00000000000..51467b52be8
--- /dev/null
+++ b/repo-tests/codeql/cpp/ql/src/Critical/MemoryMayNotBeFreed.ql
@@ -0,0 +1,193 @@
+/**
+ * @name Memory may not be freed
+ * @description A function may return before freeing memory that was allocated in the function. Freeing all memory allocated in the function before returning ties the lifetime of the memory blocks to that of the function call, making it easier to avoid and detect memory leaks.
+ * @kind problem
+ * @id cpp/memory-may-not-be-freed
+ * @problem.severity warning
+ * @security-severity 7.5
+ * @tags efficiency
+ * security
+ * external/cwe/cwe-401
+ */
+
+import MemoryFreed
+import semmle.code.cpp.controlflow.StackVariableReachability
+
+/**
+ * 'call' is either a direct call to f, or a possible call to f
+ * via a function pointer.
+ */
+predicate mayCallFunction(Expr call, Function f) {
+ call.(FunctionCall).getTarget() = f or
+ call.(VariableCall).getVariable().getAnAssignedValue().getAChild*().(FunctionAccess).getTarget() =
+ f
+}
+
+predicate allocCallOrIndirect(Expr e) {
+ // direct alloc call
+ e.(AllocationExpr).requiresDealloc() and
+ // We are only interested in alloc calls that are
+ // actually freed somehow, as MemoryNeverFreed
+ // will catch those that aren't.
+ allocMayBeFreed(e)
+ or
+ exists(ReturnStmt rtn |
+ // indirect alloc call
+ mayCallFunction(e, rtn.getEnclosingFunction()) and
+ (
+ // return alloc
+ allocCallOrIndirect(rtn.getExpr())
+ or
+ // return variable assigned with alloc
+ exists(Variable v |
+ v = rtn.getExpr().(VariableAccess).getTarget() and
+ allocCallOrIndirect(v.getAnAssignedValue()) and
+ not assignedToFieldOrGlobal(v, _)
+ )
+ )
+ )
+}
+
+/**
+ * The point at which a call to 'realloc' on 'v' has been verified to
+ * succeed. A failed realloc does *not* free the input pointer, which
+ * can cause memory leaks.
+ */
+predicate verifiedRealloc(FunctionCall reallocCall, Variable v, ControlFlowNode verified) {
+ reallocCall.(AllocationExpr).getReallocPtr() = v.getAnAccess() and
+ (
+ exists(Variable newV, ControlFlowNode node |
+ // a realloc followed by a null check at 'node' (return the non-null
+ // successor, i.e. where the realloc is confirmed to have succeeded)
+ newV.getAnAssignedValue() = reallocCall and
+ node.(AnalysedExpr).getNonNullSuccessor(newV) = verified and
+ // note: this case uses naive flow logic (getAnAssignedValue).
+ // special case: if the result of the 'realloc' is assigned to the
+ // same variable, we don't descriminate properly between the old
+ // and the new allocation; better to not consider this a free at
+ // all in that case.
+ newV != v
+ )
+ or
+ // a realloc(ptr, 0), which always succeeds and frees
+ // (return the realloc itself)
+ reallocCall.(AllocationExpr).getReallocPtr().getValue() = "0" and
+ verified = reallocCall
+ )
+}
+
+predicate freeCallOrIndirect(ControlFlowNode n, Variable v) {
+ // direct free call
+ n.(DeallocationExpr).getFreedExpr() = v.getAnAccess() and
+ not exists(n.(AllocationExpr).getReallocPtr())
+ or
+ // verified realloc call
+ verifiedRealloc(_, v, n)
+ or
+ exists(FunctionCall midcall, Function mid, int arg |
+ // indirect free call
+ n.(Call).getArgument(arg) = v.getAnAccess() and
+ mayCallFunction(n, mid) and
+ midcall.getEnclosingFunction() = mid and
+ freeCallOrIndirect(midcall, mid.getParameter(arg))
+ )
+}
+
+predicate allocationDefinition(StackVariable v, ControlFlowNode def) {
+ exists(Expr expr | exprDefinition(v, def, expr) and allocCallOrIndirect(expr))
+}
+
+class AllocVariableReachability extends StackVariableReachabilityWithReassignment {
+ AllocVariableReachability() { this = "AllocVariableReachability" }
+
+ override predicate isSourceActual(ControlFlowNode node, StackVariable v) {
+ allocationDefinition(v, node)
+ }
+
+ override predicate isSinkActual(ControlFlowNode node, StackVariable v) {
+ // node may be used in allocationReaches
+ exists(node.(AnalysedExpr).getNullSuccessor(v)) or
+ freeCallOrIndirect(node, v) or
+ assignedToFieldOrGlobal(v, node) or
+ // node may be used directly in query
+ v.getFunction() = node.(ReturnStmt).getEnclosingFunction()
+ }
+
+ override predicate isBarrier(ControlFlowNode node, StackVariable v) { definitionBarrier(v, node) }
+}
+
+/**
+ * The value from allocation `def` is still held in Variable `v` upon entering `node`.
+ */
+predicate allocatedVariableReaches(StackVariable v, ControlFlowNode def, ControlFlowNode node) {
+ exists(AllocVariableReachability r |
+ // reachability
+ r.reachesTo(def, _, node, v)
+ or
+ // accept def node itself
+ r.isSource(def, v) and
+ node = def
+ )
+}
+
+class AllocReachability extends StackVariableReachabilityExt {
+ AllocReachability() { this = "AllocReachability" }
+
+ override predicate isSource(ControlFlowNode node, StackVariable v) {
+ allocationDefinition(v, node)
+ }
+
+ override predicate isSink(ControlFlowNode node, StackVariable v) {
+ v.getFunction() = node.(ReturnStmt).getEnclosingFunction()
+ }
+
+ override predicate isBarrier(
+ ControlFlowNode source, ControlFlowNode node, ControlFlowNode next, StackVariable v
+ ) {
+ isSource(source, v) and
+ next = node.getASuccessor() and
+ // the memory (stored in any variable `v0`) allocated at `source` is freed or
+ // assigned to a global at node, or NULL checked on the edge node -> next.
+ exists(StackVariable v0 | allocatedVariableReaches(v0, source, node) |
+ node.(AnalysedExpr).getNullSuccessor(v0) = next or
+ freeCallOrIndirect(node, v0) or
+ assignedToFieldOrGlobal(v0, node)
+ )
+ }
+}
+
+/**
+ * The value returned by allocation `def` has not been freed, confirmed to be null,
+ * or potentially leaked globally upon reaching `node` (regardless of what variable
+ * it's still held in, if any).
+ */
+predicate allocationReaches(ControlFlowNode def, ControlFlowNode node) {
+ exists(AllocReachability r | r.reaches(def, _, node))
+}
+
+predicate assignedToFieldOrGlobal(StackVariable v, Expr e) {
+ // assigned to anything except a StackVariable
+ // (typically a field or global, but for example also *ptr = v)
+ e.(Assignment).getRValue() = v.getAnAccess() and
+ not e.(Assignment).getLValue().(VariableAccess).getTarget() instanceof StackVariable
+ or
+ exists(Expr midExpr, Function mid, int arg |
+ // indirect assignment
+ e.(FunctionCall).getArgument(arg) = v.getAnAccess() and
+ mayCallFunction(e, mid) and
+ midExpr.getEnclosingFunction() = mid and
+ assignedToFieldOrGlobal(mid.getParameter(arg), midExpr)
+ )
+ or
+ // assigned to a field via constructor field initializer
+ e.(ConstructorFieldInit).getExpr() = v.getAnAccess()
+}
+
+from ControlFlowNode def, ReturnStmt ret
+where
+ allocationReaches(def, ret) and
+ not exists(StackVariable v |
+ allocatedVariableReaches(v, def, ret) and
+ ret.getAChild*() = v.getAnAccess()
+ )
+select def, "The memory allocated here may not be released at $@.", ret, "this exit point"
diff --git a/repo-tests/codeql/cpp/ql/src/Critical/MemoryNeverFreed.ql b/repo-tests/codeql/cpp/ql/src/Critical/MemoryNeverFreed.ql
new file mode 100644
index 00000000000..e9593e9d749
--- /dev/null
+++ b/repo-tests/codeql/cpp/ql/src/Critical/MemoryNeverFreed.ql
@@ -0,0 +1,19 @@
+/**
+ * @name Memory is never freed
+ * @description A function always returns before freeing memory that was allocated in the function. Freeing all memory allocated in the function before returning ties the lifetime of the memory blocks to that of the function call, making it easier to avoid and detect memory leaks.
+ * @kind problem
+ * @id cpp/memory-never-freed
+ * @problem.severity warning
+ * @security-severity 7.5
+ * @tags efficiency
+ * security
+ * external/cwe/cwe-401
+ */
+
+import MemoryFreed
+
+from AllocationExpr alloc
+where
+ alloc.requiresDealloc() and
+ not allocMayBeFreed(alloc)
+select alloc, "This memory is never freed"
diff --git a/repo-tests/codeql/cpp/ql/src/Critical/MissingNegativityTest.ql b/repo-tests/codeql/cpp/ql/src/Critical/MissingNegativityTest.ql
new file mode 100644
index 00000000000..a4409f2dabf
--- /dev/null
+++ b/repo-tests/codeql/cpp/ql/src/Critical/MissingNegativityTest.ql
@@ -0,0 +1,72 @@
+/**
+ * @name Unchecked return value used as offset
+ * @description Using a return value as a pointer offset without checking that the value is positive
+ * may lead to buffer overruns.
+ * @kind problem
+ * @id cpp/missing-negativity-test
+ * @problem.severity warning
+ * @security-severity 9.3
+ * @tags reliability
+ * security
+ * external/cwe/cwe-823
+ */
+
+import cpp
+import Negativity
+
+class IntegralReturnValue extends FunctionCall {
+ IntegralReturnValue() { this.getType().getUnderlyingType() instanceof IntegralType }
+
+ predicate isChecked() {
+ exists(ControlFlowNode def, ControlFlowNode test, Variable v |
+ exprDefinition(v, def, this) and
+ definitionReaches(def, test) and
+ errorSuccessor(v, test.getASuccessor())
+ )
+ }
+}
+
+class FunctionWithNegativeReturn extends Function {
+ FunctionWithNegativeReturn() {
+ this.getType().getUnderlyingType() instanceof IntegralType and
+ (
+ exists(ReturnStmt ret |
+ ret.getExpr().getValue().toInt() < 0 and
+ ret.getEnclosingFunction() = this
+ )
+ or
+ count(IntegralReturnValue val | val.getTarget() = this and val.isChecked()) * 100 /
+ count(IntegralReturnValue val | val.getTarget() = this) >= 80
+ )
+ }
+}
+
+predicate dangerousUse(IntegralReturnValue val, Expr use) {
+ exists(ArrayExpr ae | ae.getArrayOffset() = val and use = val)
+ or
+ exists(StackVariable v, ControlFlowNode def, ArrayExpr ae |
+ exprDefinition(v, def, val) and
+ use = ae.getArrayOffset() and
+ not boundsChecked(v, use) and
+ definitionUsePair(v, def, use)
+ )
+ or
+ use.getParent().(AddExpr).getAnOperand() = val and
+ val = use and
+ use.getType().getUnderlyingType() instanceof PointerType
+ or
+ exists(StackVariable v, ControlFlowNode def, AddExpr add |
+ exprDefinition(v, def, val) and
+ definitionUsePair(v, def, use) and
+ add.getAnOperand() = use and
+ not boundsChecked(v, use) and
+ add.getType().getUnderlyingType() instanceof PointerType
+ )
+}
+
+from FunctionWithNegativeReturn f, IntegralReturnValue val, Expr dangerous
+where
+ val.getTarget() = f and
+ dangerousUse(val, dangerous)
+select dangerous,
+ "Dangerous use of possibly negative value (return value of '" + f.getName() + "')."
diff --git a/repo-tests/codeql/cpp/ql/src/Critical/MissingNullTest.ql b/repo-tests/codeql/cpp/ql/src/Critical/MissingNullTest.ql
new file mode 100644
index 00000000000..b50d06a8dd1
--- /dev/null
+++ b/repo-tests/codeql/cpp/ql/src/Critical/MissingNullTest.ql
@@ -0,0 +1,19 @@
+/**
+ * @name Returned pointer not checked
+ * @description Dereferencing an untested value from a function that can return null may lead to undefined behavior.
+ * @kind problem
+ * @id cpp/missing-null-test
+ * @problem.severity recommendation
+ * @security-severity 7.5
+ * @tags reliability
+ * security
+ * external/cwe/cwe-476
+ */
+
+import cpp
+
+from VariableAccess access
+where
+ maybeNull(access) and
+ dereferenced(access)
+select access, "Value may be null; it should be checked before dereferencing."
diff --git a/repo-tests/codeql/cpp/ql/src/Critical/Negativity.qll b/repo-tests/codeql/cpp/ql/src/Critical/Negativity.qll
new file mode 100644
index 00000000000..e9c0a3d2410
--- /dev/null
+++ b/repo-tests/codeql/cpp/ql/src/Critical/Negativity.qll
@@ -0,0 +1,211 @@
+import cpp
+
+/**
+ * Holds if `val` is an access to the variable `v`, or if `val`
+ * is an assignment with an access to `v` on the left-hand side.
+ */
+predicate valueOfVar(Variable v, Expr val) {
+ val = v.getAnAccess() or
+ val.(AssignExpr).getLValue() = v.getAnAccess()
+}
+
+/**
+ * Holds if either:
+ * - `cond` is an (in)equality expression that compares the variable `v` to the value `-1`, or
+ * - `cond` is a relational expression that compares the variable `v` to a constant.
+ */
+predicate boundsCheckExpr(Variable v, Expr cond) {
+ exists(EQExpr eq |
+ cond = eq and
+ eq.getAnOperand().getValue() = "-1" and
+ valueOfVar(v, eq.getAnOperand())
+ )
+ or
+ exists(NEExpr ne |
+ cond = ne and
+ ne.getAnOperand().getValue() = "-1" and
+ valueOfVar(v, ne.getAnOperand())
+ )
+ or
+ exists(LTExpr lt |
+ cond = lt and
+ valueOfVar(v, lt.getAnOperand()) and
+ exists(lt.getAnOperand().getValue())
+ )
+ or
+ exists(LEExpr le |
+ cond = le and
+ valueOfVar(v, le.getAnOperand()) and
+ exists(le.getAnOperand().getValue())
+ )
+ or
+ exists(GTExpr gt |
+ cond = gt and
+ valueOfVar(v, gt.getAnOperand()) and
+ exists(gt.getAnOperand().getValue())
+ )
+ or
+ exists(GEExpr ge |
+ cond = ge and
+ valueOfVar(v, ge.getAnOperand()) and
+ exists(ge.getAnOperand().getValue())
+ )
+}
+
+/**
+ * Holds if `node` is an expression in a conditional statement and `succ` is an
+ * immediate successor of `node` that may be reached after evaluating `node`.
+ * For example, given
+ * ```
+ * if (a < 10 && b) func1();
+ * else func2();
+ * ```
+ * this predicate holds when either:
+ * - `node` is `a < 10` and `succ` is `func2()` or `b`, or
+ * - `node` is `b` and `succ` is `func1()` or `func2()`
+ */
+predicate conditionalSuccessor(ControlFlowNode node, ControlFlowNode succ) {
+ if node.isCondition()
+ then succ = node.getATrueSuccessor() or succ = node.getAFalseSuccessor()
+ else
+ exists(BinaryLogicalOperation binop |
+ binop.getAnOperand() = node and conditionalSuccessor(binop, succ)
+ )
+}
+
+/**
+ * Holds if the current value of the variable `v` at control-flow
+ * node `n` has been used either in:
+ * - an (in)equality comparison with the value `-1`, or
+ * - a relational comparison that compares `v` to a constant.
+ */
+predicate boundsChecked(Variable v, ControlFlowNode node) {
+ exists(Expr test |
+ boundsCheckExpr(v, test) and
+ conditionalSuccessor(test, node)
+ )
+ or
+ exists(ControlFlowNode mid |
+ boundsChecked(v, mid) and mid = node.getAPredecessor() and not definitionBarrier(v, mid)
+ )
+}
+
+/**
+ * Holds if `cond` compares `v` to some common error values. Specifically, this
+ * predicate holds when:
+ * - `cond` checks that `v` is equal to `-1`, or
+ * - `cond` checks that `v` is less than `0`, or
+ * - `cond` checks that `v` is less than or equal to `-1`, or
+ * - `cond` checks that `v` is not some common success value (see `successCondition`).
+ */
+predicate errorCondition(Variable v, Expr cond) {
+ exists(EQExpr eq |
+ cond = eq and
+ eq.getAnOperand().getValue() = "-1" and
+ eq.getAnOperand() = v.getAnAccess()
+ )
+ or
+ exists(LTExpr lt |
+ cond = lt and
+ lt.getLeftOperand() = v.getAnAccess() and
+ lt.getRightOperand().getValue() = "0"
+ )
+ or
+ exists(LEExpr le |
+ cond = le and
+ le.getRightOperand() = v.getAnAccess() and
+ le.getRightOperand().getValue() = "-1"
+ )
+ or
+ exists(NotExpr ne |
+ cond = ne and
+ successCondition(v, ne.getOperand())
+ )
+}
+
+/**
+ * Holds if `cond` compares `v` to some common success values. Specifically, this
+ * predicate holds when:
+ * - `cond` checks that `v` is not equal to `-1`, or
+ * - `cond` checks that `v` is greater than or equal than `0`, or
+ * - `cond` checks that `v` is greater than `-1`, or
+ * - `cond` checks that `v` is not some common error value (see `errorCondition`).
+ */
+predicate successCondition(Variable v, Expr cond) {
+ exists(NEExpr ne |
+ cond = ne and
+ ne.getAnOperand().getValue() = "-1" and
+ ne.getAnOperand() = v.getAnAccess()
+ )
+ or
+ exists(GEExpr ge |
+ cond = ge and
+ ge.getLeftOperand() = v.getAnAccess() and
+ ge.getRightOperand().getValue() = "0"
+ )
+ or
+ exists(GTExpr gt |
+ cond = gt and
+ gt.getRightOperand() = v.getAnAccess() and
+ gt.getRightOperand().getValue() = "-1"
+ )
+ or
+ exists(NotExpr ne |
+ cond = ne and
+ errorCondition(v, ne.getOperand())
+ )
+}
+
+/**
+ * Holds if there exists a comparison operation that checks whether `v`
+ * represents some common *error* values, and `n` may be reached
+ * immediately following the comparison operation.
+ */
+predicate errorSuccessor(Variable v, ControlFlowNode n) {
+ exists(Expr cond |
+ errorCondition(v, cond) and n = cond.getATrueSuccessor()
+ or
+ successCondition(v, cond) and n = cond.getAFalseSuccessor()
+ )
+}
+
+/**
+ * Holds if there exists a comparison operation that checks whether `v`
+ * represents some common *success* values, and `n` may be reached
+ * immediately following the comparison operation.
+ */
+predicate successSuccessor(Variable v, ControlFlowNode n) {
+ exists(Expr cond |
+ successCondition(v, cond) and n = cond.getATrueSuccessor()
+ or
+ errorCondition(v, cond) and n = cond.getAFalseSuccessor()
+ )
+}
+
+/**
+ * Holds if the current value of the variable `v` at control-flow node
+ * `n` may have been checked against a common set of *error* values.
+ */
+predicate checkedError(Variable v, ControlFlowNode n) {
+ errorSuccessor(v, n)
+ or
+ exists(ControlFlowNode mid |
+ checkedError(v, mid) and
+ n = mid.getASuccessor() and
+ not definitionBarrier(v, mid)
+ )
+}
+
+/**
+ * Holds if the current value of the variable `v` at control-flow node
+ * `n` may have been checked against a common set of *success* values.
+ */
+predicate checkedSuccess(Variable v, ControlFlowNode n) {
+ successSuccessor(v, n)
+ or
+ exists(ControlFlowNode mid |
+ checkedSuccess(v, mid) and
+ n = mid.getASuccessor() and
+ not definitionBarrier(v, mid)
+ )
+}
diff --git a/repo-tests/codeql/cpp/ql/src/Critical/NewArrayDeleteMismatch.ql b/repo-tests/codeql/cpp/ql/src/Critical/NewArrayDeleteMismatch.ql
new file mode 100644
index 00000000000..d49212a2920
--- /dev/null
+++ b/repo-tests/codeql/cpp/ql/src/Critical/NewArrayDeleteMismatch.ql
@@ -0,0 +1,17 @@
+/**
+ * @name 'new[]' array freed with 'delete'
+ * @description An array allocated with 'new[]' is being freed using 'delete'. Behavior in such cases is undefined and should be avoided. Use 'delete[]' when freeing arrays allocated with 'new[]'.
+ * @kind problem
+ * @problem.severity warning
+ * @precision high
+ * @id cpp/new-array-delete-mismatch
+ * @tags reliability
+ */
+
+import NewDelete
+
+from Expr alloc, Expr free, Expr freed
+where
+ allocReaches(freed, alloc, "new[]") and
+ freeExprOrIndirect(free, freed, "delete")
+select free, "This memory may have been allocated with '$@', not 'new'.", alloc, "new[]"
diff --git a/repo-tests/codeql/cpp/ql/src/Critical/NewDelete.qll b/repo-tests/codeql/cpp/ql/src/Critical/NewDelete.qll
new file mode 100644
index 00000000000..77da1f36d80
--- /dev/null
+++ b/repo-tests/codeql/cpp/ql/src/Critical/NewDelete.qll
@@ -0,0 +1,167 @@
+/**
+ * Provides predicates for associating new/malloc calls with delete/free.
+ */
+
+import cpp
+import semmle.code.cpp.controlflow.SSA
+import semmle.code.cpp.dataflow.DataFlow
+
+/**
+ * Holds if `alloc` is a use of `malloc` or `new`. `kind` is
+ * a string describing the type of the allocation.
+ */
+predicate allocExpr(Expr alloc, string kind) {
+ (
+ exists(Function target |
+ alloc.(AllocationExpr).(FunctionCall).getTarget() = target and
+ (
+ target.getName() = "operator new" and
+ kind = "new" and
+ // exclude placement new and custom overloads as they
+ // may not conform to assumptions
+ not target.getNumberOfParameters() > 1
+ or
+ target.getName() = "operator new[]" and
+ kind = "new[]" and
+ // exclude placement new and custom overloads as they
+ // may not conform to assumptions
+ not target.getNumberOfParameters() > 1
+ or
+ not target instanceof OperatorNewAllocationFunction and
+ kind = "malloc"
+ )
+ )
+ or
+ alloc instanceof NewExpr and
+ kind = "new" and
+ // exclude placement new and custom overloads as they
+ // may not conform to assumptions
+ not alloc.(NewExpr).getAllocatorCall().getTarget().getNumberOfParameters() > 1
+ or
+ alloc instanceof NewArrayExpr and
+ kind = "new[]" and
+ // exclude placement new and custom overloads as they
+ // may not conform to assumptions
+ not alloc.(NewArrayExpr).getAllocatorCall().getTarget().getNumberOfParameters() > 1
+ ) and
+ not alloc.isFromUninstantiatedTemplate(_)
+}
+
+/**
+ * Holds if `alloc` is a use of `malloc` or `new`, or a function
+ * wrapping one of those. `kind` is a string describing the type
+ * of the allocation.
+ */
+predicate allocExprOrIndirect(Expr alloc, string kind) {
+ // direct alloc
+ allocExpr(alloc, kind)
+ or
+ exists(ReturnStmt rtn |
+ // indirect alloc via function call
+ alloc.(FunctionCall).getTarget() = rtn.getEnclosingFunction() and
+ (
+ allocExprOrIndirect(rtn.getExpr(), kind)
+ or
+ exists(Expr e |
+ allocExprOrIndirect(e, kind) and
+ DataFlow::localExprFlow(e, rtn.getExpr())
+ )
+ )
+ )
+}
+
+/**
+ * Holds if `v` is a non-local variable which is assigned with allocations of
+ * type `kind`.
+ */
+pragma[nomagic]
+private predicate allocReachesVariable(Variable v, Expr alloc, string kind) {
+ exists(Expr mid |
+ not v instanceof StackVariable and
+ v.getAnAssignedValue() = mid and
+ allocReaches0(mid, alloc, kind)
+ )
+}
+
+/**
+ * Holds if `e` is an expression which may evaluate to the
+ * result of a previous memory allocation `alloc`. `kind` is a
+ * string describing the type of that allocation.
+ */
+private predicate allocReaches0(Expr e, Expr alloc, string kind) {
+ // alloc
+ allocExprOrIndirect(alloc, kind) and
+ e = alloc
+ or
+ exists(SsaDefinition def, StackVariable v |
+ // alloc via SSA
+ allocReaches0(def.getAnUltimateDefiningValue(v), alloc, kind) and
+ e = def.getAUse(v)
+ )
+ or
+ exists(Variable v |
+ // alloc via a global
+ allocReachesVariable(v, alloc, kind) and
+ strictcount(VariableAccess va | va.getTarget() = v) <= 50 and // avoid very expensive cases
+ e.(VariableAccess).getTarget() = v
+ )
+}
+
+/**
+ * Holds if `e` is an expression which may evaluate to the
+ * result of previous memory allocations `alloc` only of type
+ * `kind`.
+ */
+predicate allocReaches(Expr e, Expr alloc, string kind) {
+ allocReaches0(e, alloc, kind) and
+ not exists(string k2 |
+ allocReaches0(e, _, k2) and
+ kind != k2
+ )
+}
+
+/**
+ * Holds if `free` is a use of free or delete. `freed` is the
+ * expression that is freed / deleted and `kind` is a string
+ * describing the type of that free or delete.
+ */
+predicate freeExpr(Expr free, Expr freed, string kind) {
+ exists(Function target |
+ freed = free.(DeallocationExpr).getFreedExpr() and
+ free.(FunctionCall).getTarget() = target and
+ (
+ target.getName() = "operator delete" and
+ kind = "delete"
+ or
+ target.getName() = "operator delete[]" and
+ kind = "delete[]"
+ or
+ not target instanceof OperatorDeleteDeallocationFunction and
+ kind = "free"
+ )
+ )
+ or
+ free.(DeleteExpr).getExpr() = freed and
+ kind = "delete"
+ or
+ free.(DeleteArrayExpr).getExpr() = freed and
+ kind = "delete[]"
+}
+
+/**
+ * Holds if `free` is a use of free or delete, or a function
+ * wrapping one of those. `freed` is the expression that is
+ * freed / deleted and `kind` is a string describing the type
+ * of that free or delete.
+ */
+predicate freeExprOrIndirect(Expr free, Expr freed, string kind) {
+ // direct free
+ freeExpr(free, freed, kind)
+ or
+ // indirect free via function call
+ exists(Expr internalFree, Expr internalFreed, int arg |
+ freeExprOrIndirect(internalFree, internalFreed, kind) and
+ free.(FunctionCall).getTarget().getParameter(arg) = internalFreed.(VariableAccess).getTarget() and
+ free.(FunctionCall).getArgument(arg) = freed
+ )
+}
diff --git a/repo-tests/codeql/cpp/ql/src/Critical/NewDeleteArrayMismatch.ql b/repo-tests/codeql/cpp/ql/src/Critical/NewDeleteArrayMismatch.ql
new file mode 100644
index 00000000000..1f37d4f393b
--- /dev/null
+++ b/repo-tests/codeql/cpp/ql/src/Critical/NewDeleteArrayMismatch.ql
@@ -0,0 +1,17 @@
+/**
+ * @name 'new' object freed with 'delete[]'
+ * @description An object that was allocated with 'new' is being freed using 'delete[]'. Behavior in such cases is undefined and should be avoided. Use 'delete' instead.
+ * @kind problem
+ * @problem.severity warning
+ * @precision high
+ * @id cpp/new-delete-array-mismatch
+ * @tags reliability
+ */
+
+import NewDelete
+
+from Expr alloc, Expr free, Expr freed
+where
+ allocReaches(freed, alloc, "new") and
+ freeExprOrIndirect(free, freed, "delete[]")
+select free, "This memory may have been allocated with '$@', not 'new[]'.", alloc, "new"
diff --git a/repo-tests/codeql/cpp/ql/src/Critical/NewFreeMismatch.ql b/repo-tests/codeql/cpp/ql/src/Critical/NewFreeMismatch.ql
new file mode 100644
index 00000000000..19b9b197214
--- /dev/null
+++ b/repo-tests/codeql/cpp/ql/src/Critical/NewFreeMismatch.ql
@@ -0,0 +1,39 @@
+/**
+ * @name Mismatching new/free or malloc/delete
+ * @description An object that was allocated with 'malloc' or 'new' is being freed using a mismatching 'free' or 'delete'.
+ * @kind problem
+ * @problem.severity warning
+ * @security-severity 7.5
+ * @precision high
+ * @id cpp/new-free-mismatch
+ * @tags reliability
+ * security
+ * external/cwe/cwe-401
+ */
+
+import NewDelete
+
+/**
+ * Holds if `allocKind` and `freeKind` indicate corresponding
+ * types of allocation and free.
+ */
+predicate correspondingKinds(string allocKind, string freeKind) {
+ allocKind = "malloc" and
+ freeKind = "free"
+ or
+ allocKind = "new" and
+ freeKind = "delete"
+}
+
+from
+ Expr alloc, string allocKind, string allocKindSimple, Expr free, Expr freed, string freeKind,
+ string freeKindSimple
+where
+ allocReaches(freed, alloc, allocKind) and
+ freeExprOrIndirect(free, freed, freeKind) and
+ allocKindSimple = allocKind.replaceAll("[]", "") and
+ freeKindSimple = freeKind.replaceAll("[]", "") and
+ not correspondingKinds(allocKindSimple, freeKindSimple)
+select free,
+ "There is a " + allocKindSimple + "/" + freeKindSimple + " mismatch between this " + freeKind +
+ " and the corresponding $@.", alloc, allocKind
diff --git a/repo-tests/codeql/cpp/ql/src/Critical/NotInitialised.ql b/repo-tests/codeql/cpp/ql/src/Critical/NotInitialised.ql
new file mode 100644
index 00000000000..14cc757457a
--- /dev/null
+++ b/repo-tests/codeql/cpp/ql/src/Critical/NotInitialised.ql
@@ -0,0 +1,79 @@
+/**
+ * @name Variable not initialized before use
+ * @description Using an uninitialized variable may lead to undefined results.
+ * @kind problem
+ * @id cpp/not-initialised
+ * @problem.severity error
+ * @tags reliability
+ * external/cwe/cwe-457
+ */
+
+/*
+ * See also InitialisationNotRun.ql and GlobalUseBeforeInit.ql
+ */
+
+import cpp
+
+/**
+ * Holds if `s` defines variable `v` (conservative).
+ */
+predicate defines(ControlFlowNode s, Variable lv) {
+ exists(VariableAccess va | va = s and va.getTarget() = lv and va.isUsedAsLValue())
+}
+
+/**
+ * Holds if `s` uses variable `v` (conservative).
+ */
+predicate uses(ControlFlowNode s, Variable lv) {
+ exists(VariableAccess va |
+ va = s and
+ va.getTarget() = lv and
+ va.isRValue() and
+ not va.getParent+() instanceof SizeofOperator
+ )
+}
+
+/**
+ * Holds if there is a path from the declaration of `lv` to `n` such that `lv` is
+ * definitely not defined before `n`.
+ */
+predicate noDefPath(LocalVariable lv, ControlFlowNode n) {
+ n.(DeclStmt).getADeclaration() = lv and not exists(lv.getInitializer())
+ or
+ exists(ControlFlowNode p | noDefPath(lv, p) and n = p.getASuccessor() and not defines(p, lv))
+}
+
+predicate isAggregateType(Type t) { t instanceof Class or t instanceof ArrayType }
+
+/**
+ * Holds if `va` is a use of a local variable that has not been previously
+ * defined.
+ */
+predicate undefinedLocalUse(VariableAccess va) {
+ exists(LocalVariable lv |
+ // it is hard to tell when a struct or array has been initialized, so we
+ // ignore them
+ not isAggregateType(lv.getUnderlyingType()) and
+ not lv.getType().hasName("va_list") and
+ va = lv.getAnAccess() and
+ noDefPath(lv, va) and
+ uses(va, lv)
+ )
+}
+
+/**
+ * Holds if `gv` is a potentially uninitialized global variable.
+ */
+predicate uninitialisedGlobal(GlobalVariable gv) {
+ exists(VariableAccess va |
+ not isAggregateType(gv.getUnderlyingType()) and
+ va = gv.getAnAccess() and
+ va.isRValue() and
+ not gv.hasInitializer() and
+ not gv.hasSpecifier("extern")
+ )
+}
+
+from Element elt
+where undefinedLocalUse(elt) or uninitialisedGlobal(elt)
+select elt, "Variable '" + elt.toString() + "' is not initialized."
diff --git a/repo-tests/codeql/cpp/ql/src/Critical/OverflowCalculated.ql b/repo-tests/codeql/cpp/ql/src/Critical/OverflowCalculated.ql
new file mode 100644
index 00000000000..d8a08cc6a69
--- /dev/null
+++ b/repo-tests/codeql/cpp/ql/src/Critical/OverflowCalculated.ql
@@ -0,0 +1,45 @@
+/**
+ * @name Buffer not sufficient for string
+ * @description A buffer allocated using 'malloc' may not have enough space for a string that is being copied into it. The operation can cause a buffer overrun. Make sure that the buffer contains enough room for the string (including the zero terminator).
+ * @kind problem
+ * @id cpp/overflow-calculated
+ * @problem.severity warning
+ * @security-severity 9.8
+ * @tags reliability
+ * security
+ * external/cwe/cwe-131
+ * external/cwe/cwe-120
+ */
+
+import cpp
+import semmle.code.cpp.dataflow.DataFlow
+import semmle.code.cpp.models.interfaces.Allocation
+
+predicate spaceProblem(FunctionCall append, string msg) {
+ exists(
+ AllocationExpr malloc, StrlenCall strlen, AddExpr add, FunctionCall insert, Variable buffer
+ |
+ add.getAChild() = strlen and
+ exists(add.getAChild().getValue()) and
+ DataFlow::localExprFlow(add, malloc.getSizeExpr()) and
+ buffer.getAnAccess() = strlen.getStringExpr() and
+ (
+ insert.getTarget().hasGlobalOrStdName("strcpy") or
+ insert.getTarget().hasGlobalOrStdName("strncpy")
+ ) and
+ (
+ append.getTarget().hasGlobalOrStdName("strcat") or
+ append.getTarget().hasGlobalOrStdName("strncat")
+ ) and
+ malloc.getASuccessor+() = insert and
+ insert.getArgument(1) = buffer.getAnAccess() and
+ insert.getASuccessor+() = append and
+ msg =
+ "This buffer only contains enough room for '" + buffer.getName() + "' (copied on line " +
+ insert.getLocation().getStartLine().toString() + ")"
+ )
+}
+
+from Expr problem, string msg
+where spaceProblem(problem, msg)
+select problem, msg
diff --git a/repo-tests/codeql/cpp/ql/src/Critical/OverflowDestination.ql b/repo-tests/codeql/cpp/ql/src/Critical/OverflowDestination.ql
new file mode 100644
index 00000000000..94d46001660
--- /dev/null
+++ b/repo-tests/codeql/cpp/ql/src/Critical/OverflowDestination.ql
@@ -0,0 +1,53 @@
+/**
+ * @name Copy function using source size
+ * @description Calling a copy operation with a size derived from the source
+ * buffer instead of the destination buffer may result in a buffer overflow.
+ * @kind problem
+ * @id cpp/overflow-destination
+ * @problem.severity warning
+ * @security-severity 9.3
+ * @precision low
+ * @tags reliability
+ * security
+ * external/cwe/cwe-119
+ * external/cwe/cwe-131
+ */
+
+import cpp
+import semmle.code.cpp.security.TaintTracking
+
+/**
+ * Holds if `fc` is a call to a copy operation where the size argument contains
+ * a reference to the source argument. For example:
+ * ```
+ * memcpy(dest, src, sizeof(src));
+ * ```
+ */
+predicate sourceSized(FunctionCall fc, Expr src) {
+ fc.getTarget().hasGlobalOrStdName(["strncpy", "strncat", "memcpy", "memmove"]) and
+ exists(Expr dest, Expr size, Variable v |
+ fc.getArgument(0) = dest and
+ fc.getArgument(1) = src and
+ fc.getArgument(2) = size and
+ src = v.getAnAccess() and
+ size.getAChild+() = v.getAnAccess() and
+ // exception: `dest` is also referenced in the size argument
+ not exists(Variable other |
+ dest = other.getAnAccess() and size.getAChild+() = other.getAnAccess()
+ ) and
+ // exception: `src` and `dest` are both arrays of the same type and size
+ not exists(ArrayType srctype, ArrayType desttype |
+ dest.getType().getUnderlyingType() = desttype and
+ src.getType().getUnderlyingType() = srctype and
+ desttype.getBaseType().getUnderlyingType() = srctype.getBaseType().getUnderlyingType() and
+ desttype.getArraySize() = srctype.getArraySize()
+ )
+ )
+}
+
+from FunctionCall fc, Expr vuln, Expr taintSource
+where
+ sourceSized(fc, vuln) and
+ tainted(taintSource, vuln)
+select fc,
+ "To avoid overflow, this operation should be bounded by destination-buffer size, not source-buffer size."
diff --git a/repo-tests/codeql/cpp/ql/src/Critical/OverflowStatic.ql b/repo-tests/codeql/cpp/ql/src/Critical/OverflowStatic.ql
new file mode 100644
index 00000000000..8b09931cd4a
--- /dev/null
+++ b/repo-tests/codeql/cpp/ql/src/Critical/OverflowStatic.ql
@@ -0,0 +1,150 @@
+/**
+ * @name Static array access may cause overflow
+ * @description Exceeding the size of a static array during write or access operations
+ * may result in a buffer overflow.
+ * @kind problem
+ * @problem.severity warning
+ * @security-severity 9.3
+ * @precision high
+ * @id cpp/static-buffer-overflow
+ * @tags reliability
+ * security
+ * external/cwe/cwe-119
+ * external/cwe/cwe-131
+ */
+
+import cpp
+import semmle.code.cpp.commons.Buffer
+import semmle.code.cpp.rangeanalysis.SimpleRangeAnalysis
+import LoopBounds
+
+private predicate staticBufferBase(VariableAccess access, Variable v) {
+ v.getType().(ArrayType).getBaseType() instanceof CharType and
+ access = v.getAnAccess() and
+ not memberMayBeVarSize(_, v)
+}
+
+predicate staticBuffer(VariableAccess access, Variable v, int size) {
+ staticBufferBase(access, v) and
+ size = getBufferSize(access, _)
+}
+
+class BufferAccess extends ArrayExpr {
+ BufferAccess() {
+ exists(int size |
+ staticBuffer(this.getArrayBase(), _, size) and
+ size != 0
+ ) and
+ // exclude accesses in macro implementation of `strcmp`,
+ // which are carefully controlled but can look dangerous.
+ not exists(Macro m |
+ m.getName() = "strcmp" and
+ m.getAnInvocation().getAnExpandedElement() = this
+ )
+ }
+
+ int bufferSize() { staticBuffer(this.getArrayBase(), _, result) }
+
+ Variable buffer() { result.getAnAccess() = this.getArrayBase() }
+}
+
+predicate overflowOffsetInLoop(BufferAccess bufaccess, string msg) {
+ exists(ClassicForLoop loop |
+ loop.getStmt().getAChild*() = bufaccess.getEnclosingStmt() and
+ loop.limit() >= bufaccess.bufferSize() and
+ loop.counter().getAnAccess() = bufaccess.getArrayOffset() and
+ // Ensure that we don't have an upper bound on the array index that's less than the buffer size.
+ not upperBound(bufaccess.getArrayOffset().getFullyConverted()) < bufaccess.bufferSize() and
+ // The upper bounds analysis must not have been widended
+ not upperBoundMayBeWidened(bufaccess.getArrayOffset().getFullyConverted()) and
+ msg =
+ "Potential buffer-overflow: counter '" + loop.counter().toString() + "' <= " +
+ loop.limit().toString() + " but '" + bufaccess.buffer().getName() + "' has " +
+ bufaccess.bufferSize().toString() + " elements."
+ )
+}
+
+predicate bufferAndSizeFunction(Function f, int buf, int size) {
+ f.hasGlobalName("read") and buf = 1 and size = 2
+ or
+ f.hasGlobalOrStdName("fgets") and buf = 0 and size = 1
+ or
+ f.hasGlobalOrStdName("strncpy") and buf = 0 and size = 2
+ or
+ f.hasGlobalOrStdName("strncat") and buf = 0 and size = 2
+ or
+ f.hasGlobalOrStdName("memcpy") and buf = 0 and size = 2
+ or
+ f.hasGlobalOrStdName("memmove") and buf = 0 and size = 2
+ or
+ f.hasGlobalOrStdName("snprintf") and buf = 0 and size = 1
+ or
+ f.hasGlobalOrStdName("vsnprintf") and buf = 0 and size = 1
+}
+
+class CallWithBufferSize extends FunctionCall {
+ CallWithBufferSize() { bufferAndSizeFunction(this.getTarget(), _, _) }
+
+ Expr buffer() {
+ exists(int i |
+ bufferAndSizeFunction(this.getTarget(), i, _) and
+ result = this.getArgument(i)
+ )
+ }
+
+ Expr statedSizeExpr() {
+ exists(int i |
+ bufferAndSizeFunction(this.getTarget(), _, i) and
+ result = this.getArgument(i)
+ )
+ }
+
+ int statedSizeValue() {
+ // `upperBound(e)` defaults to `exprMaxVal(e)` when `e` isn't analyzable. So to get a meaningful
+ // result in this case we pick the minimum value obtainable from dataflow and range analysis.
+ result =
+ upperBound(statedSizeExpr())
+ .minimum(min(Expr statedSizeSrc |
+ DataFlow::localExprFlow(statedSizeSrc, statedSizeExpr())
+ |
+ statedSizeSrc.getValue().toInt()
+ ))
+ }
+}
+
+predicate wrongBufferSize(Expr error, string msg) {
+ exists(CallWithBufferSize call, int bufsize, Variable buf, int statedSize |
+ staticBuffer(call.buffer(), buf, bufsize) and
+ statedSize = call.statedSizeValue() and
+ statedSize > bufsize and
+ error = call.statedSizeExpr() and
+ msg =
+ "Potential buffer-overflow: '" + buf.getName() + "' has size " + bufsize.toString() + " not " +
+ statedSize + "."
+ )
+}
+
+predicate outOfBounds(BufferAccess bufaccess, string msg) {
+ exists(int size, int access, string buf |
+ buf = bufaccess.buffer().getName() and
+ bufaccess.bufferSize() = size and
+ bufaccess.getArrayOffset().getValue().toInt() = access and
+ (
+ access > size
+ or
+ access = size and
+ not exists(AddressOfExpr addof | bufaccess = addof.getOperand()) and
+ not exists(BuiltInOperationBuiltInOffsetOf offsetof | offsetof.getAChild() = bufaccess)
+ ) and
+ msg =
+ "Potential buffer-overflow: '" + buf + "' has size " + size.toString() + " but '" + buf + "[" +
+ access.toString() + "]' may be accessed here."
+ )
+}
+
+from Element error, string msg
+where
+ overflowOffsetInLoop(error, msg) or
+ wrongBufferSize(error, msg) or
+ outOfBounds(error, msg)
+select error, msg
diff --git a/repo-tests/codeql/cpp/ql/src/Critical/ReturnStackAllocatedObject.ql b/repo-tests/codeql/cpp/ql/src/Critical/ReturnStackAllocatedObject.ql
new file mode 100644
index 00000000000..40082ad5d9c
--- /dev/null
+++ b/repo-tests/codeql/cpp/ql/src/Critical/ReturnStackAllocatedObject.ql
@@ -0,0 +1,34 @@
+/**
+ * @name Pointer to stack object used as return value
+ * @description Using a pointer to stack memory after the function has returned gives undefined results.
+ * @kind problem
+ * @id cpp/return-stack-allocated-object
+ * @problem.severity warning
+ * @security-severity 2.1
+ * @tags reliability
+ * security
+ * external/cwe/cwe-562
+ * @deprecated This query is not suitable for production use and has been deprecated. Use
+ * cpp/return-stack-allocated-memory instead.
+ */
+
+import semmle.code.cpp.pointsto.PointsTo
+
+class ReturnPointsToExpr extends PointsToExpr {
+ override predicate interesting() {
+ exists(ReturnStmt ret | ret.getExpr().getFullyConverted() = this) and
+ pointerValue(this)
+ }
+
+ ReturnStmt getReturnStmt() { result.getExpr().getFullyConverted() = this }
+}
+
+from ReturnPointsToExpr ret, StackVariable local, float confidence
+where
+ ret.pointsTo() = local and
+ ret.getReturnStmt().getEnclosingFunction() = local.getFunction() and
+ confidence = ret.confidence() and
+ confidence > 0.01
+select ret,
+ "This may return a pointer to '" + local.getName() + "' (declared on line " +
+ local.getADeclarationLocation().getStartLine().toString() + "), which is stack allocated."
diff --git a/repo-tests/codeql/cpp/ql/src/Critical/ReturnValueIgnored.ql b/repo-tests/codeql/cpp/ql/src/Critical/ReturnValueIgnored.ql
new file mode 100644
index 00000000000..b4a4a044068
--- /dev/null
+++ b/repo-tests/codeql/cpp/ql/src/Critical/ReturnValueIgnored.ql
@@ -0,0 +1,44 @@
+/**
+ * @name Return value of a function is ignored
+ * @description A call to a function ignores its return value, but at least 90% of the total number of calls to the function check the return value. Check the return value of functions consistently, especially for functions like 'fread' or the 'scanf' functions that return the status of the operation.
+ * @kind problem
+ * @id cpp/return-value-ignored
+ * @problem.severity recommendation
+ * @precision medium
+ * @tags reliability
+ * correctness
+ * external/cwe/cwe-252
+ */
+
+import cpp
+
+predicate unused(Expr e) { e instanceof ExprInVoidContext }
+
+predicate important(Function f, string message) {
+ message = "the result of this function must always be checked." and
+ getOptions().alwaysCheckReturnValue(f)
+}
+
+// statistically dubious ignored return values
+predicate dubious(Function f, string message) {
+ not important(f, _) and
+ exists(Options opts, int used, int total, int percentage |
+ used =
+ count(FunctionCall fc |
+ fc.getTarget() = f and not opts.okToIgnoreReturnValue(fc) and not unused(fc)
+ ) and
+ total = count(FunctionCall fc | fc.getTarget() = f and not opts.okToIgnoreReturnValue(fc)) and
+ used != total and
+ percentage = used * 100 / total and
+ percentage >= 90 and
+ message = percentage.toString() + "% of calls to this function have their result used."
+ )
+}
+
+from FunctionCall unused, string message
+where
+ unused(unused) and
+ not exists(Options opts | opts.okToIgnoreReturnValue(unused)) and
+ (important(unused.getTarget(), message) or dubious(unused.getTarget(), message)) and
+ not unused.getTarget().getName().matches("operator%") // exclude user defined operators
+select unused, "Result of call to " + unused.getTarget().getName() + " is ignored; " + message
diff --git a/repo-tests/codeql/cpp/ql/src/Critical/SizeCheck.ql b/repo-tests/codeql/cpp/ql/src/Critical/SizeCheck.ql
new file mode 100644
index 00000000000..e7a00ea3621
--- /dev/null
+++ b/repo-tests/codeql/cpp/ql/src/Critical/SizeCheck.ql
@@ -0,0 +1,46 @@
+/**
+ * @name Not enough memory allocated for pointer type
+ * @description Calling 'malloc', 'calloc' or 'realloc' without allocating enough memory to contain
+ * an instance of the type of the pointer may result in a buffer overflow
+ * @kind problem
+ * @problem.severity warning
+ * @security-severity 8.1
+ * @precision medium
+ * @id cpp/allocation-too-small
+ * @tags reliability
+ * security
+ * external/cwe/cwe-131
+ * external/cwe/cwe-122
+ */
+
+import cpp
+import semmle.code.cpp.models.Models
+
+predicate baseType(AllocationExpr alloc, Type base) {
+ exists(PointerType pointer |
+ pointer.getBaseType() = base and
+ (
+ exists(AssignExpr assign |
+ assign.getRValue() = alloc and assign.getLValue().getType() = pointer
+ )
+ or
+ exists(Variable v | v.getInitializer().getExpr() = alloc and v.getType() = pointer)
+ )
+ )
+}
+
+predicate decideOnSize(Type t, int size) {
+ // If the codebase has more than one type with the same name, it can have more than one size.
+ size = min(t.getSize())
+}
+
+from AllocationExpr alloc, Type base, int basesize, int allocated
+where
+ baseType(alloc, base) and
+ allocated = alloc.getSizeBytes() and
+ decideOnSize(base, basesize) and
+ alloc.(FunctionCall).getTarget() instanceof AllocationFunction and // exclude `new` and similar
+ basesize > allocated
+select alloc,
+ "Type '" + base.getName() + "' is " + basesize.toString() + " bytes, but only " +
+ allocated.toString() + " bytes are allocated."
diff --git a/repo-tests/codeql/cpp/ql/src/Critical/SizeCheck2.ql b/repo-tests/codeql/cpp/ql/src/Critical/SizeCheck2.ql
new file mode 100644
index 00000000000..eb3aec9a5fe
--- /dev/null
+++ b/repo-tests/codeql/cpp/ql/src/Critical/SizeCheck2.ql
@@ -0,0 +1,51 @@
+/**
+ * @name Not enough memory allocated for array of pointer type
+ * @description Calling 'malloc', 'calloc' or 'realloc' without allocating enough memory to contain
+ * multiple instances of the type of the pointer may result in a buffer overflow
+ * @kind problem
+ * @problem.severity warning
+ * @security-severity 8.1
+ * @precision medium
+ * @id cpp/suspicious-allocation-size
+ * @tags reliability
+ * security
+ * external/cwe/cwe-131
+ * external/cwe/cwe-122
+ */
+
+import cpp
+import semmle.code.cpp.models.Models
+
+predicate baseType(AllocationExpr alloc, Type base) {
+ exists(PointerType pointer |
+ pointer.getBaseType() = base and
+ (
+ exists(AssignExpr assign |
+ assign.getRValue() = alloc and assign.getLValue().getType() = pointer
+ )
+ or
+ exists(Variable v | v.getInitializer().getExpr() = alloc and v.getType() = pointer)
+ )
+ )
+}
+
+predicate decideOnSize(Type t, int size) {
+ // If the codebase has more than one type with the same name, it can have more than one size.
+ size = min(t.getSize())
+}
+
+from AllocationExpr alloc, Type base, int basesize, int allocated
+where
+ baseType(alloc, base) and
+ allocated = alloc.getSizeBytes() and
+ decideOnSize(base, basesize) and
+ alloc.(FunctionCall).getTarget() instanceof AllocationFunction and // exclude `new` and similar
+ // If the codebase has more than one type with the same name, check if any matches
+ not exists(int size | base.getSize() = size |
+ size = 0 or
+ (allocated / size) * size = allocated
+ ) and
+ not basesize > allocated // covered by SizeCheck.ql
+select alloc,
+ "Allocated memory (" + allocated.toString() + " bytes) is not a multiple of the size of '" +
+ base.getName() + "' (" + basesize.toString() + " bytes)."
diff --git a/repo-tests/codeql/cpp/ql/src/Critical/Unused.ql b/repo-tests/codeql/cpp/ql/src/Critical/Unused.ql
new file mode 100644
index 00000000000..89ebde29171
--- /dev/null
+++ b/repo-tests/codeql/cpp/ql/src/Critical/Unused.ql
@@ -0,0 +1,33 @@
+/**
+ * @name Variable is assigned a value that is never read
+ * @description Assigning a value to a variable that is not used may indicate an error in the code.
+ * @kind problem
+ * @id cpp/unused-variable
+ * @problem.severity warning
+ * @tags maintainability
+ * external/cwe/cwe-563
+ */
+
+import cpp
+
+// Sometimes it is useful to have a class which is instantiated (on the stack)
+// but not otherwise used. This is usually to perform some task and have that
+// task automatically reversed when the current scope is left. For example,
+// sometimes locking is done this way.
+//
+// Obviously, such instantiations should not be treated as unused values.
+class ScopeUtilityClass extends Class {
+ Call getAUse() { result = this.getAConstructor().getACallToThisFunction() }
+}
+
+from StackVariable v, ControlFlowNode def
+where
+ definition(v, def) and
+ not definitionUsePair(v, def, _) and
+ not v.getAnAccess().isAddressOfAccess() and
+ // parameter initializers are not in the call-graph at the moment
+ not v.(Parameter).getInitializer().getExpr() = def and
+ not v.getType().getUnderlyingType() instanceof ReferenceType and
+ not exists(ScopeUtilityClass util | def = util.getAUse()) and
+ not def.isInMacroExpansion()
+select def, "Variable '" + v.getName() + "' is assigned a value that is never used"
diff --git a/repo-tests/codeql/cpp/ql/src/Critical/UseAfterFree.ql b/repo-tests/codeql/cpp/ql/src/Critical/UseAfterFree.ql
new file mode 100644
index 00000000000..d770a42b3c2
--- /dev/null
+++ b/repo-tests/codeql/cpp/ql/src/Critical/UseAfterFree.ql
@@ -0,0 +1,66 @@
+/**
+ * @name Potential use after free
+ * @description An allocated memory block is used after it has been freed. Behavior in such cases is undefined and can cause memory corruption.
+ * @kind problem
+ * @id cpp/use-after-free
+ * @problem.severity warning
+ * @security-severity 9.3
+ * @tags reliability
+ * security
+ * external/cwe/cwe-416
+ */
+
+import cpp
+import semmle.code.cpp.controlflow.StackVariableReachability
+
+/** `e` is an expression that frees the memory pointed to by `v`. */
+predicate isFreeExpr(Expr e, StackVariable v) {
+ exists(VariableAccess va | va.getTarget() = v |
+ exists(FunctionCall fc | fc = e |
+ fc.getTarget().hasGlobalOrStdName("free") and
+ va = fc.getArgument(0)
+ )
+ or
+ e.(DeleteExpr).getExpr() = va
+ or
+ e.(DeleteArrayExpr).getExpr() = va
+ )
+}
+
+/** `e` is an expression that (may) dereference `v`. */
+predicate isDerefExpr(Expr e, StackVariable v) {
+ v.getAnAccess() = e and dereferenced(e)
+ or
+ isDerefByCallExpr(_, _, e, v)
+}
+
+/**
+ * `va` is passed by value as (part of) the `i`th argument in
+ * call `c`. The target function is either a library function
+ * or a source code function that dereferences the relevant
+ * parameter.
+ */
+predicate isDerefByCallExpr(Call c, int i, VariableAccess va, StackVariable v) {
+ v.getAnAccess() = va and
+ va = c.getAnArgumentSubExpr(i) and
+ not c.passesByReference(i, va) and
+ (c.getTarget().hasEntryPoint() implies isDerefExpr(_, c.getTarget().getParameter(i)))
+}
+
+class UseAfterFreeReachability extends StackVariableReachability {
+ UseAfterFreeReachability() { this = "UseAfterFree" }
+
+ override predicate isSource(ControlFlowNode node, StackVariable v) { isFreeExpr(node, v) }
+
+ override predicate isSink(ControlFlowNode node, StackVariable v) { isDerefExpr(node, v) }
+
+ override predicate isBarrier(ControlFlowNode node, StackVariable v) {
+ definitionBarrier(v, node) or
+ isFreeExpr(node, v)
+ }
+}
+
+from UseAfterFreeReachability r, StackVariable v, Expr free, Expr e
+where r.reaches(free, v, e)
+select e, "Memory pointed to by '" + v.getName().toString() + "' may have been previously freed $@",
+ free, "here"
diff --git a/repo-tests/codeql/cpp/ql/src/Diagnostics/ExtractionProblems.qll b/repo-tests/codeql/cpp/ql/src/Diagnostics/ExtractionProblems.qll
new file mode 100644
index 00000000000..c96e2e926e8
--- /dev/null
+++ b/repo-tests/codeql/cpp/ql/src/Diagnostics/ExtractionProblems.qll
@@ -0,0 +1,151 @@
+/**
+ * Provides a common hierarchy of all types of problems that can occur during extraction.
+ */
+
+import cpp
+
+/*
+ * A note about how the C/C++ extractor emits diagnostics:
+ * When the extractor frontend encounters a problem, it emits a diagnostic message,
+ * that includes a message, location and severity.
+ * However, that process is best-effort and may fail (e.g. due to lack of memory).
+ * Thus, if the extractor emitted at least one diagnostic of severity discretionary
+ * error (or higher), it *also* emits a simple "There was an error during this compilation"
+ * error diagnostic, without location information.
+ * In the common case, this means that a compilation during which one or more errors happened also gets
+ * the catch-all diagnostic.
+ * This diagnostic has the empty string as file path.
+ * We filter out these useless diagnostics if there is at least one warning-level diagnostic
+ * for the affected compilation in the database.
+ * Otherwise, we show it to indicate that something went wrong and that we
+ * don't know what exactly happened.
+ */
+
+/**
+ * A problem with a file that, if present, leads to a file being marked as non-successfully extracted.
+ */
+class ReportableWarning extends Diagnostic {
+ ReportableWarning() {
+ (
+ this instanceof CompilerDiscretionaryError or
+ this instanceof CompilerError or
+ this instanceof CompilerCatastrophe
+ ) and
+ // Filter for the catch-all diagnostic, see note above.
+ not this.getFile().getAbsolutePath() = ""
+ }
+}
+
+private newtype TExtractionProblem =
+ TReportableWarning(ReportableWarning err) or
+ TCompilationFailed(Compilation c, File f) {
+ f = c.getAFileCompiled() and not c.normalTermination()
+ } or
+ // Show the catch-all diagnostic (see note above) only if we haven't seen any other error-level diagnostic
+ // for that compilation
+ TUnknownProblem(CompilerError err) {
+ not exists(ReportableWarning e | e.getCompilation() = err.getCompilation())
+ }
+
+/**
+ * Superclass for the extraction problem hierarchy.
+ */
+class ExtractionProblem extends TExtractionProblem {
+ /** Gets the string representation of the problem. */
+ string toString() { none() }
+
+ /** Gets the problem message for this problem. */
+ string getProblemMessage() { none() }
+
+ /** Gets the file this problem occured in. */
+ File getFile() { none() }
+
+ /** Gets the location this problem occured in. */
+ Location getLocation() { none() }
+
+ /** Gets the SARIF severity of this problem. */
+ int getSeverity() { none() }
+}
+
+/**
+ * An unrecoverable extraction error, where extraction was unable to finish.
+ * This can be caused by a multitude of reasons, for example:
+ * - hitting a frontend assertion
+ * - crashing due to dereferencing an invalid pointer
+ * - stack overflow
+ * - out of memory
+ */
+class ExtractionUnrecoverableError extends ExtractionProblem, TCompilationFailed {
+ Compilation c;
+ File f;
+
+ ExtractionUnrecoverableError() { this = TCompilationFailed(c, f) }
+
+ override string toString() {
+ result = "Unrecoverable extraction error while compiling " + f.toString()
+ }
+
+ override string getProblemMessage() { result = "unrecoverable compilation failure." }
+
+ override File getFile() { result = f }
+
+ override Location getLocation() { result = f.getLocation() }
+
+ override int getSeverity() {
+ // These extractor errors break the analysis, so we mark them in SARIF as
+ // [errors](https://docs.oasis-open.org/sarif/sarif/v2.1.0/csprd01/sarif-v2.1.0-csprd01.html#_Toc10541338).
+ result = 2
+ }
+}
+
+/**
+ * A recoverable extraction warning.
+ * These are compiler errors from the frontend.
+ * Upon encountering one of these, we still continue extraction, but the
+ * database will be incomplete for that file.
+ */
+class ExtractionRecoverableWarning extends ExtractionProblem, TReportableWarning {
+ ReportableWarning err;
+
+ ExtractionRecoverableWarning() { this = TReportableWarning(err) }
+
+ override string toString() { result = "Recoverable extraction error: " + err }
+
+ override string getProblemMessage() { result = err.getFullMessage() }
+
+ override File getFile() { result = err.getFile() }
+
+ override Location getLocation() { result = err.getLocation() }
+
+ override int getSeverity() {
+ // Recoverable extraction problems don't tend to break the analysis, so we mark them in SARIF as
+ // [warnings](https://docs.oasis-open.org/sarif/sarif/v2.1.0/csprd01/sarif-v2.1.0-csprd01.html#_Toc10541338).
+ result = 1
+ }
+}
+
+/**
+ * An unknown problem happened during extraction.
+ * These are only displayed if we know that we encountered an problem during extraction,
+ * but, for some reason, failed to emit a proper diagnostic with location information
+ * and problem message.
+ */
+class ExtractionUnknownProblem extends ExtractionProblem, TUnknownProblem {
+ CompilerError err;
+
+ ExtractionUnknownProblem() { this = TUnknownProblem(err) }
+
+ override string toString() { result = "Unknown extraction problem: " + err }
+
+ override string getProblemMessage() { result = err.getFullMessage() }
+
+ override File getFile() { result = err.getFile() }
+
+ override Location getLocation() { result = err.getLocation() }
+
+ override int getSeverity() {
+ // Unknown extraction problems don't tend to break the analysis, so we mark them in SARIF as
+ // [warnings](https://docs.oasis-open.org/sarif/sarif/v2.1.0/csprd01/sarif-v2.1.0-csprd01.html#_Toc10541338).
+ result = 1
+ }
+}
diff --git a/repo-tests/codeql/cpp/ql/src/Diagnostics/ExtractionWarnings.ql b/repo-tests/codeql/cpp/ql/src/Diagnostics/ExtractionWarnings.ql
new file mode 100644
index 00000000000..dcfb599bbeb
--- /dev/null
+++ b/repo-tests/codeql/cpp/ql/src/Diagnostics/ExtractionWarnings.ql
@@ -0,0 +1,18 @@
+/**
+ * @name Extraction warnings
+ * @description List all extraction warnings for files in the source code directory.
+ * @kind diagnostic
+ * @id cpp/diagnostics/extraction-warnings
+ */
+
+import cpp
+import ExtractionProblems
+
+from ExtractionProblem warning
+where
+ warning instanceof ExtractionRecoverableWarning and exists(warning.getFile().getRelativePath())
+ or
+ warning instanceof ExtractionUnknownProblem
+select warning,
+ "Extraction failed in " + warning.getFile() + " with warning " + warning.getProblemMessage(),
+ warning.getSeverity()
diff --git a/repo-tests/codeql/cpp/ql/src/Diagnostics/FailedExtractorInvocations.ql b/repo-tests/codeql/cpp/ql/src/Diagnostics/FailedExtractorInvocations.ql
new file mode 100644
index 00000000000..fdc02f12135
--- /dev/null
+++ b/repo-tests/codeql/cpp/ql/src/Diagnostics/FailedExtractorInvocations.ql
@@ -0,0 +1,21 @@
+/**
+ * @name Failed extractor invocations
+ * @description Gives the command line of compilations for which extraction did not run to completion.
+ * @kind diagnostic
+ * @id cpp/diagnostics/failed-extractor-invocations
+ */
+
+import cpp
+
+string describe(Compilation c) {
+ if c.getArgument(1) = "--mimic"
+ then result = "compiler invocation " + concat(int i | i > 1 | c.getArgument(i), " " order by i)
+ else result = "extractor invocation " + concat(int i | | c.getArgument(i), " " order by i)
+}
+
+/** Gets the SARIF severity level that indicates an error. */
+private int getErrorSeverity() { result = 2 }
+
+from Compilation c
+where not c.normalTermination()
+select "Extraction aborted for " + describe(c), getErrorSeverity()
diff --git a/repo-tests/codeql/cpp/ql/src/Diagnostics/SuccessfullyExtractedFiles.ql b/repo-tests/codeql/cpp/ql/src/Diagnostics/SuccessfullyExtractedFiles.ql
new file mode 100644
index 00000000000..3e9fb12d935
--- /dev/null
+++ b/repo-tests/codeql/cpp/ql/src/Diagnostics/SuccessfullyExtractedFiles.ql
@@ -0,0 +1,15 @@
+/**
+ * @name Successfully extracted files
+ * @description Lists all files in the source code directory that were extracted without encountering a problem in the file.
+ * @kind diagnostic
+ * @id cpp/diagnostics/successfully-extracted-files
+ */
+
+import cpp
+import ExtractionProblems
+
+from File f
+where
+ not exists(ExtractionProblem e | e.getFile() = f) and
+ exists(f.getRelativePath())
+select f, "File successfully extracted"
diff --git a/repo-tests/codeql/cpp/ql/src/Documentation/CaptionedComments.qll b/repo-tests/codeql/cpp/ql/src/Documentation/CaptionedComments.qll
new file mode 100644
index 00000000000..0ccd678e79f
--- /dev/null
+++ b/repo-tests/codeql/cpp/ql/src/Documentation/CaptionedComments.qll
@@ -0,0 +1,39 @@
+/**
+ * Provides heuristics to find "todo" and "fixme" comments (in all caps).
+ */
+
+import cpp
+
+/**
+ * Gets a string representation of the comment `c` containing the caption 'TODO' or 'FIXME'.
+ * If `c` spans multiple lines, all lines after the first are abbreviated as [...].
+ */
+string getCommentTextCaptioned(Comment c, string caption) {
+ (caption = "TODO" or caption = "FIXME") and
+ exists(
+ string commentContents, string commentBody, int offset, string interestingSuffix, int endOfLine,
+ string dontCare, string captionedLine, string followingLine
+ |
+ commentContents = c.getContents() and
+ commentContents.matches("%" + caption + "%") and
+ // Add some '\n's so that any interesting line, and its
+ // following line, will definitely begin and end with '\n'.
+ commentBody = commentContents.regexpReplaceAll("(?s)^/\\*(.*)\\*/$|^//(.*)$", "\n$1$2\n\n") and
+ dontCare = commentBody.regexpFind("\\n[/* \\t\\x0B\\f\\r]*" + caption, _, offset) and
+ interestingSuffix = commentBody.suffix(offset) and
+ endOfLine = interestingSuffix.indexOf("\n", 1, 0) and
+ captionedLine =
+ interestingSuffix
+ .prefix(endOfLine)
+ .regexpReplaceAll("^[/*\\s]*" + caption + "\\s*:?", "")
+ .trim() and
+ followingLine =
+ interestingSuffix.prefix(interestingSuffix.indexOf("\n", 2, 0)).suffix(endOfLine).trim() and
+ if captionedLine = ""
+ then result = caption + " comment"
+ else
+ if followingLine = ""
+ then result = caption + " comment: " + captionedLine
+ else result = caption + " comment: " + captionedLine + " [...]"
+ )
+}
diff --git a/repo-tests/codeql/cpp/ql/src/Documentation/CommentedOutCode.ql b/repo-tests/codeql/cpp/ql/src/Documentation/CommentedOutCode.ql
new file mode 100644
index 00000000000..89411738178
--- /dev/null
+++ b/repo-tests/codeql/cpp/ql/src/Documentation/CommentedOutCode.ql
@@ -0,0 +1,15 @@
+/**
+ * @name Commented-out code
+ * @description Commented-out code makes the remaining code more difficult to read.
+ * @kind problem
+ * @problem.severity recommendation
+ * @precision high
+ * @id cpp/commented-out-code
+ * @tags maintainability
+ * documentation
+ */
+
+import CommentedOutCode
+
+from CommentedOutCode comment
+select comment, "This comment appears to contain commented-out code"
diff --git a/repo-tests/codeql/cpp/ql/src/Documentation/CommentedOutCode.qll b/repo-tests/codeql/cpp/ql/src/Documentation/CommentedOutCode.qll
new file mode 100644
index 00000000000..a4e5b948630
--- /dev/null
+++ b/repo-tests/codeql/cpp/ql/src/Documentation/CommentedOutCode.qll
@@ -0,0 +1,216 @@
+/**
+ * Provides classes and predicates for identifying C/C++ comments that look like code.
+ */
+
+import cpp
+
+/**
+ * Holds if `line` looks like a line of code.
+ */
+bindingset[line]
+private predicate looksLikeCode(string line) {
+ exists(string trimmed |
+ // trim leading and trailing whitespace, and HTML codes:
+ // * HTML entities in common notation (e.g. > and é)
+ // * HTML entities in decimal notation (e.g. à)
+ // * HTML entities in hexadecimal notation (e.g. 灟)
+ trimmed = line.regexpReplaceAll("(?i)(^\\s+|?[a-z0-9]{1,31};|\\s+$)", "")
+ |
+ (
+ // Match comment lines ending with '{', '}' or ';'
+ trimmed.regexpMatch(".*[{};]") and
+ (
+ // If this line looks like code because it ends with a closing
+ // brace that's preceded by something other than whitespace ...
+ trimmed.regexpMatch(".*.\\}")
+ implies
+ // ... then there has to be ") {" (or some variation)
+ // on the line, suggesting it's a statement like `if`
+ // or a function definition. Otherwise it's likely to be a
+ // benign use of braces such as a JSON example or explanatory
+ // pseudocode.
+ trimmed.regexpMatch(".*(\\)|const|volatile|override|final|noexcept|&)\\s*\\{.*")
+ )
+ or
+ // Match comment lines that look like preprocessor code
+ trimmed
+ .regexpMatch("#\\s*(include|define|undef|if|ifdef|ifndef|elif|else|endif|error|pragma)\\b.*")
+ ) and
+ // Exclude lines that start with '>' or contain '@{' or '@}'.
+ // To account for the code generated by protobuf, we also insist that the comment
+ // does not begin with `optional` or `repeated` and end with a `;`, which would
+ // normally be a quoted bit of literal `.proto` specification above the associated
+ // declaration.
+ // To account for emacs folding markers, we ignore any line containing
+ // `{{{` or `}}}`.
+ // Finally, some code tends to embed GUIDs in comments, so we also exclude those.
+ not trimmed
+ .regexpMatch("(>.*|.*[\\\\@][{}].*|(optional|repeated) .*;|.*(\\{\\{\\{|\\}\\}\\}).*|\\{[-0-9a-zA-Z]+\\})")
+ )
+}
+
+/**
+ * Holds if there is a preprocessor directive on the line indicated by
+ * `f` and `line` that we permit code comments besides. For example this
+ * is considered acceptable:
+ * ```
+ * #ifdef MYMACRO
+ * ...
+ * #endif // #ifdef MYMACRO
+ * ```
+ */
+private predicate preprocLine(File f, int line) {
+ exists(PreprocessorDirective pd, Location l |
+ (
+ pd instanceof PreprocessorElse or
+ pd instanceof PreprocessorElif or
+ pd instanceof PreprocessorEndif
+ ) and
+ pd.getLocation() = l and
+ l.getFile() = f and
+ l.getStartLine() = line
+ )
+}
+
+/**
+ * The line of a C++-style comment within its file `f`.
+ */
+private int lineInFile(CppStyleComment c, File f) {
+ f = c.getFile() and
+ result = c.getLocation().getStartLine() and
+ // Ignore comments on the same line as a preprocessor directive.
+ not preprocLine(f, result)
+}
+
+/**
+ * The "comment block ID" for a comment line in a file.
+ * The block ID is obtained by subtracting the line rank of the line from
+ * the line itself, where the line rank is the (1-based) rank within `f`
+ * of lines containing a C++-style comment. As a result, line comments on
+ * consecutive lines are assigned the same block ID (as both line number
+ * and line rank increase by 1 for each line), while intervening lines
+ * without line comments would increase the line number without increasing
+ * the rank and thus force a change of block ID.
+ */
+pragma[nomagic]
+private int commentLineBlockID(File f, int line) {
+ exists(int lineRank |
+ line = rank[lineRank](lineInFile(_, f)) and
+ result = line - lineRank
+ )
+}
+
+/**
+ * The comment ID of the given comment (on line `line` of file `f`).
+ * The resulting number is meaningless, except that it will be the same
+ * for all comments in a run of consecutive comment lines, and different
+ * for separate runs.
+ */
+private int commentId(CppStyleComment c, File f, int line) {
+ result = commentLineBlockID(f, line) and
+ line = lineInFile(c, f)
+}
+
+/**
+ * A contiguous block of comments.
+ */
+class CommentBlock extends Comment {
+ CommentBlock() {
+ (
+ this instanceof CppStyleComment
+ implies
+ not exists(CppStyleComment pred, File f | lineInFile(pred, f) + 1 = lineInFile(this, f))
+ ) and
+ // Ignore comments on the same line as a preprocessor directive.
+ not exists(Location l |
+ l = this.getLocation() and
+ preprocLine(l.getFile(), l.getStartLine())
+ )
+ }
+
+ /**
+ * Gets the `i`th comment associated with this comment block.
+ */
+ Comment getComment(int i) {
+ i = 0 and result = this
+ or
+ exists(File f, int thisLine, int resultLine |
+ commentId(this, f, thisLine) = commentId(result, f, resultLine)
+ |
+ i = resultLine - thisLine
+ )
+ }
+
+ /**
+ * Gets the last comment associated with this comment block.
+ */
+ Comment lastComment() { result = this.getComment(max(int i | exists(this.getComment(i)))) }
+
+ /**
+ * Gets the contents of the `i`'th comment associated with this comment block.
+ */
+ string getLine(int i) {
+ this instanceof CStyleComment and
+ result = this.getContents().regexpCapture("(?s)/\\*+(.*)\\*+/", 1).splitAt("\n", i)
+ or
+ this instanceof CppStyleComment and result = this.getComment(i).getContents().suffix(2)
+ }
+
+ /**
+ * Gets the number of lines in the comments associated with this comment block.
+ */
+ int numLines() {
+ result = strictcount(int i, string line | line = this.getLine(i) and line.trim() != "")
+ }
+
+ /**
+ * Gets the number of lines that look like code in the comments associated with this comment block.
+ */
+ int numCodeLines() {
+ result = strictcount(int i, string line | line = this.getLine(i) and looksLikeCode(line))
+ }
+
+ /**
+ * Holds if the comment block is a C-style comment, and each
+ * comment line starts with a *.
+ */
+ predicate isDocumentation() {
+ // If a C-style comment starts each line with a *, then it's
+ // probably documentation rather than code.
+ this instanceof CStyleComment and
+ forex(int i | i in [1 .. this.numLines() - 1] | this.getLine(i).trim().matches("*%"))
+ }
+
+ /**
+ * Holds if this comment block looks like code that has been commented out. Specifically:
+ * 1. It does not look like documentation (see `isDocumentation`).
+ * 2. It is not in a header file without any declaration entries or top level declarations.
+ * 3. More than half of the lines in the comment block look like code.
+ */
+ predicate isCommentedOutCode() {
+ not this.isDocumentation() and
+ not this.getFile().(HeaderFile).noTopLevelCode() and
+ this.numCodeLines().(float) / this.numLines().(float) > 0.5
+ }
+
+ /**
+ * Holds if this element is at the specified location.
+ * The location spans column `startcolumn` of line `startline` to
+ * column `endcolumn` of line `endline` in file `filepath`.
+ * For more information, see
+ * [Locations](https://codeql.github.com/docs/writing-codeql-queries/providing-locations-in-codeql-queries/).
+ */
+ predicate hasLocationInfo(
+ string filepath, int startline, int startcolumn, int endline, int endcolumn
+ ) {
+ this.getLocation().hasLocationInfo(filepath, startline, startcolumn, _, _) and
+ this.lastComment().getLocation().hasLocationInfo(_, _, _, endline, endcolumn)
+ }
+}
+
+/**
+ * A piece of commented-out code, identified using heuristics
+ */
+class CommentedOutCode extends CommentBlock {
+ CommentedOutCode() { this.isCommentedOutCode() }
+}
diff --git a/repo-tests/codeql/cpp/ql/src/Documentation/DocumentApi.ql b/repo-tests/codeql/cpp/ql/src/Documentation/DocumentApi.ql
new file mode 100644
index 00000000000..9c78c1f80fc
--- /dev/null
+++ b/repo-tests/codeql/cpp/ql/src/Documentation/DocumentApi.ql
@@ -0,0 +1,36 @@
+/**
+ * @name Undocumented API function
+ * @description Functions used from outside the file they are declared in
+ * should be documented, as they are part of a public API. Without
+ * comments, modifying such functions is dangerous because callers
+ * easily come to rely on their exact implementation.
+ * @kind problem
+ * @id cpp/document-api
+ * @problem.severity recommendation
+ * @precision medium
+ * @tags maintainability
+ * documentation
+ */
+
+import cpp
+
+predicate isCommented(FunctionDeclarationEntry f) {
+ exists(Comment c | c.getCommentedElement() = f)
+}
+
+// Uses of 'f' in 'other'
+Call uses(File other, Function f) { result.getTarget() = f and result.getFile() = other }
+
+from File callerFile, Function f, Call use, int numCalls
+where
+ numCalls = strictcount(File other | exists(uses(other, f)) and other != f.getFile()) and
+ not isCommented(f.getADeclarationEntry()) and
+ not f instanceof Constructor and
+ not f instanceof Destructor and
+ not f.hasName("operator=") and
+ f.getMetrics().getNumberOfLinesOfCode() >= 5 and
+ numCalls > 1 and
+ use = uses(callerFile, f) and
+ callerFile != f.getFile()
+select f, "Functions called from other files should be documented (called from $@).", use,
+ use.getFile().getRelativePath()
diff --git a/repo-tests/codeql/cpp/ql/src/Documentation/FixmeComments.ql b/repo-tests/codeql/cpp/ql/src/Documentation/FixmeComments.ql
new file mode 100644
index 00000000000..5ccbf129275
--- /dev/null
+++ b/repo-tests/codeql/cpp/ql/src/Documentation/FixmeComments.ql
@@ -0,0 +1,18 @@
+/**
+ * @name FIXME comment
+ * @description Comments containing 'FIXME' indicate that the code has known bugs.
+ * @kind problem
+ * @problem.severity recommendation
+ * @precision very-high
+ * @id cpp/fixme-comment
+ * @tags maintainability
+ * documentation
+ * external/cwe/cwe-546
+ */
+
+import cpp
+import Documentation.CaptionedComments
+
+from Comment c, string message
+where message = getCommentTextCaptioned(c, "FIXME")
+select c, message
diff --git a/repo-tests/codeql/cpp/ql/src/Documentation/TodoComments.ql b/repo-tests/codeql/cpp/ql/src/Documentation/TodoComments.ql
new file mode 100644
index 00000000000..4eedcba183e
--- /dev/null
+++ b/repo-tests/codeql/cpp/ql/src/Documentation/TodoComments.ql
@@ -0,0 +1,18 @@
+/**
+ * @name TODO comment
+ * @description Comments containing 'TODO' indicate that the code may be in an incomplete state.
+ * @kind problem
+ * @problem.severity recommendation
+ * @precision medium
+ * @id cpp/todo-comment
+ * @tags maintainability
+ * documentation
+ * external/cwe/cwe-546
+ */
+
+import cpp
+import Documentation.CaptionedComments
+
+from Comment c, string message
+where message = getCommentTextCaptioned(c, "TODO")
+select c, message
diff --git a/repo-tests/codeql/cpp/ql/src/Documentation/UncommentedFunction.ql b/repo-tests/codeql/cpp/ql/src/Documentation/UncommentedFunction.ql
new file mode 100644
index 00000000000..e11fc54f953
--- /dev/null
+++ b/repo-tests/codeql/cpp/ql/src/Documentation/UncommentedFunction.ql
@@ -0,0 +1,23 @@
+/**
+ * @name Poorly documented large function
+ * @description Large functions that have no or almost no comments are likely to be too complex to understand and maintain. The larger a function is, the more problematic the lack of comments.
+ * @kind problem
+ * @problem.severity warning
+ * @precision medium
+ * @id cpp/poorly-documented-function
+ * @tags maintainability
+ * documentation
+ * statistical
+ * non-attributable
+ */
+
+import cpp
+
+from MetricFunction f, int n
+where
+ n = f.getNumberOfLines() and
+ n > 100 and
+ f.getCommentRatio() <= 0.02 and
+ not f.isMultiplyDefined()
+select f,
+ "Poorly documented function: fewer than 2% comments for a function of " + n.toString() + " lines."
diff --git a/repo-tests/codeql/cpp/ql/src/Header Cleanup/Cleanup-DuplicateIncludeGuard.ql b/repo-tests/codeql/cpp/ql/src/Header Cleanup/Cleanup-DuplicateIncludeGuard.ql
new file mode 100644
index 00000000000..c06cfeabe0d
--- /dev/null
+++ b/repo-tests/codeql/cpp/ql/src/Header Cleanup/Cleanup-DuplicateIncludeGuard.ql
@@ -0,0 +1,35 @@
+/**
+ * @name Duplicate include guard
+ * @description Using the same include guard macro in more than one header file may cause unexpected behavior from the compiler.
+ * @kind problem
+ * @problem.severity error
+ * @precision high
+ * @id cpp/duplicate-include-guard
+ * @tags reliability
+ * maintainability
+ * modularity
+ */
+
+import cpp
+import semmle.code.cpp.headers.MultipleInclusion
+
+/*
+ * A duplicate include guard is an include guard that uses the same macro name as at least
+ * one other include guard. We use hasIncludeGuard, which checks the #ifndef and #endif but
+ * not the #define, to identify them (as we expect the #define to be missing from the database
+ * in the case of a file that's only ever encountered after other(s) with the same guard macro).
+ * However one case must be a correctIncludeGuard to prove that this macro really is intended
+ * to be an include guard.
+ */
+
+from HeaderFile hf, PreprocessorDirective ifndef, string macroName, int num
+where
+ hasIncludeGuard(hf, ifndef, _, macroName) and
+ exists(HeaderFile other |
+ hasIncludeGuard(other, _, _, macroName) and hf.getShortName() != other.getShortName()
+ ) and
+ num = strictcount(HeaderFile other | hasIncludeGuard(other, _, _, macroName)) and
+ correctIncludeGuard(_, _, _, _, macroName)
+select ifndef,
+ "The macro name '" + macroName + "' of this include guard is used in " + num +
+ " different header files."
diff --git a/repo-tests/codeql/cpp/ql/src/IDEContextual.qll b/repo-tests/codeql/cpp/ql/src/IDEContextual.qll
new file mode 100644
index 00000000000..f4e6267fdcf
--- /dev/null
+++ b/repo-tests/codeql/cpp/ql/src/IDEContextual.qll
@@ -0,0 +1,22 @@
+/**
+ * Provides shared predicates related to contextual queries in the code viewer.
+ */
+
+import semmle.files.FileSystem
+
+/**
+ * Returns the `File` matching the given source file name as encoded by the VS
+ * Code extension.
+ */
+cached
+File getFileBySourceArchiveName(string name) {
+ // The name provided for a file in the source archive by the VS Code extension
+ // has some differences from the absolute path in the database:
+ // 1. colons are replaced by underscores
+ // 2. there's a leading slash, even for Windows paths: "C:/foo/bar" ->
+ // "/C_/foo/bar"
+ // 3. double slashes in UNC prefixes are replaced with a single slash
+ // We can handle 2 and 3 together by unconditionally adding a leading slash
+ // before replacing double slashes.
+ name = ("/" + result.getAbsolutePath().replaceAll(":", "_")).replaceAll("//", "/")
+}
diff --git a/repo-tests/codeql/cpp/ql/src/JPL_C/LOC-2/Rule 03/ExitNonterminatingLoop.ql b/repo-tests/codeql/cpp/ql/src/JPL_C/LOC-2/Rule 03/ExitNonterminatingLoop.ql
new file mode 100644
index 00000000000..9473998e6ee
--- /dev/null
+++ b/repo-tests/codeql/cpp/ql/src/JPL_C/LOC-2/Rule 03/ExitNonterminatingLoop.ql
@@ -0,0 +1,30 @@
+/**
+ * @name Exit from permanent loop
+ * @description Permanent loops (like "while(1) {..}") are typically meant to be non-terminating and should not be terminated by other means.
+ * @kind problem
+ * @id cpp/jpl-c/exit-nonterminating-loop
+ * @problem.severity warning
+ * @tags correctness
+ * external/jpl
+ */
+
+import cpp
+
+predicate markedAsNonterminating(Loop l) {
+ exists(Comment c | c.getContents().matches("%@non-terminating@%") | c.getCommentedElement() = l)
+}
+
+Stmt exitFrom(Loop l) {
+ l.getAChild+() = result and
+ (
+ result instanceof ReturnStmt
+ or
+ exists(BreakStmt break | break = result | not l.getAChild*() = break.getTarget())
+ )
+}
+
+from Loop l, Stmt exit
+where
+ markedAsNonterminating(l) and
+ exit = exitFrom(l)
+select exit, "$@ should not be exited.", l, "This permanent loop"
diff --git a/repo-tests/codeql/cpp/ql/src/JPL_C/LOC-2/Rule 03/LoopBounds.ql b/repo-tests/codeql/cpp/ql/src/JPL_C/LOC-2/Rule 03/LoopBounds.ql
new file mode 100644
index 00000000000..f3e05bdc151
--- /dev/null
+++ b/repo-tests/codeql/cpp/ql/src/JPL_C/LOC-2/Rule 03/LoopBounds.ql
@@ -0,0 +1,146 @@
+/**
+ * @name Unbounded loop
+ * @description All loops should have a fixed upper bound; the counter should also be incremented along all paths within the loop.
+ * This check excludes loops that are meant to be nonterminating (like schedulers).
+ * @kind problem
+ * @id cpp/jpl-c/loop-bounds
+ * @problem.severity warning
+ * @tags correctness
+ * external/jpl
+ */
+
+import cpp
+
+predicate validVarForBound(Loop loop, Variable var) {
+ // The variable is read in the loop controlling expression
+ var.getAnAccess().getParent*() = loop.getControllingExpr() and
+ // The variable is not assigned in the loop body
+ not inScope(loop, var.getAnAssignment().getEnclosingStmt()) and
+ // The variable is not incremented/decremented in the loop body
+ not inScope(loop, var.getAnAccess().getParent().(CrementOperation).getEnclosingStmt())
+}
+
+predicate upperBoundCheck(Loop loop, VariableAccess checked) {
+ exists(RelationalOperation rop | loop.getControllingExpr().getAChild*() = rop |
+ checked = rop.getLesserOperand() and
+ // The RHS is something "valid", i.e. a constant or
+ // a variable that isn't assigned in the loop body
+ (
+ exists(rop.getGreaterOperand().getValue()) or
+ rop.getGreaterOperand().(VariableAccess).getTarget().isConst() or
+ validVarForBound(loop, rop.getGreaterOperand().(VariableAccess).getTarget())
+ ) and
+ not rop.getGreaterOperand() instanceof CharLiteral
+ )
+}
+
+predicate lowerBoundCheck(Loop loop, VariableAccess checked) {
+ exists(RelationalOperation rop | loop.getControllingExpr().getAChild*() = rop |
+ checked = rop.getGreaterOperand() and
+ // The RHS is something "valid", i.e. a constant or
+ // a variable that isn't assigned in the loop body
+ (
+ exists(rop.getLesserOperand().getValue()) or
+ rop.getLesserOperand().(VariableAccess).getTarget().isConst() or
+ validVarForBound(loop, rop.getLesserOperand().(VariableAccess).getTarget())
+ ) and
+ not rop.getLesserOperand() instanceof CharLiteral
+ )
+}
+
+VariableAccess getAnIncrement(Variable var) {
+ result.getTarget() = var and
+ (
+ result.getParent() instanceof IncrementOperation
+ or
+ exists(AssignAddExpr a | a.getLValue() = result and a.getRValue().getValue().toInt() > 0)
+ or
+ exists(AssignExpr a | a.getLValue() = result |
+ a.getRValue() =
+ any(AddExpr ae |
+ ae.getAnOperand() = var.getAnAccess() and
+ ae.getAnOperand().getValue().toInt() > 0
+ )
+ )
+ )
+}
+
+VariableAccess getADecrement(Variable var) {
+ result.getTarget() = var and
+ (
+ result.getParent() instanceof DecrementOperation
+ or
+ exists(AssignSubExpr a | a.getLValue() = result and a.getRValue().getValue().toInt() > 0)
+ or
+ exists(AssignExpr a | a.getLValue() = result |
+ a.getRValue() =
+ any(SubExpr ae |
+ ae.getLeftOperand() = var.getAnAccess() and
+ ae.getRightOperand().getValue().toInt() > 0
+ )
+ )
+ )
+}
+
+predicate inScope(Loop l, Stmt s) { l.getAChild*() = s }
+
+predicate reachesNoInc(VariableAccess source, ControlFlowNode target) {
+ upperBoundCheck(_, source) and source.getASuccessor() = target
+ or
+ exists(ControlFlowNode mid |
+ reachesNoInc(source, mid) and not mid = getAnIncrement(source.getTarget())
+ |
+ target = mid.getASuccessor() and
+ inScope(source.getEnclosingStmt(), target.getEnclosingStmt())
+ )
+}
+
+predicate reachesNoDec(VariableAccess source, ControlFlowNode target) {
+ lowerBoundCheck(_, source) and source.getASuccessor() = target
+ or
+ exists(ControlFlowNode mid |
+ reachesNoDec(source, mid) and not mid = getADecrement(source.getTarget())
+ |
+ target = mid.getASuccessor() and
+ inScope(source.getEnclosingStmt(), target.getEnclosingStmt())
+ )
+}
+
+predicate hasSafeBound(Loop l) {
+ exists(VariableAccess bound | upperBoundCheck(l, bound) | not reachesNoInc(bound, bound))
+ or
+ exists(VariableAccess bound | lowerBoundCheck(l, bound) | not reachesNoDec(bound, bound))
+ or
+ exists(l.getControllingExpr().getValue())
+}
+
+predicate markedAsNonterminating(Loop l) {
+ exists(Comment c | c.getContents().matches("%@non-terminating@%") | c.getCommentedElement() = l)
+}
+
+from Loop loop, string msg
+where
+ not hasSafeBound(loop) and
+ not markedAsNonterminating(loop) and
+ (
+ not upperBoundCheck(loop, _) and
+ not lowerBoundCheck(loop, _) and
+ msg = "This loop does not have a fixed bound."
+ or
+ exists(VariableAccess bound |
+ upperBoundCheck(loop, bound) and
+ reachesNoInc(bound, bound) and
+ msg =
+ "The loop counter " + bound.getTarget().getName() +
+ " is not always incremented in the loop body."
+ )
+ or
+ exists(VariableAccess bound |
+ lowerBoundCheck(loop, bound) and
+ reachesNoDec(bound, bound) and
+ msg =
+ "The loop counter " + bound.getTarget().getName() +
+ " is not always decremented in the loop body."
+ )
+ )
+select loop, msg
diff --git a/repo-tests/codeql/cpp/ql/src/JPL_C/LOC-2/Rule 04/Recursion.ql b/repo-tests/codeql/cpp/ql/src/JPL_C/LOC-2/Rule 04/Recursion.ql
new file mode 100644
index 00000000000..b5e791ce7a0
--- /dev/null
+++ b/repo-tests/codeql/cpp/ql/src/JPL_C/LOC-2/Rule 04/Recursion.ql
@@ -0,0 +1,27 @@
+/**
+ * @name Uses of recursion
+ * @description Avoiding recursion allows tools and people to better analyze the program.
+ * @kind problem
+ * @id cpp/jpl-c/recursion
+ * @problem.severity warning
+ * @tags maintainability
+ * readability
+ * testability
+ * external/jpl
+ */
+
+import cpp
+
+class RecursiveCall extends FunctionCall {
+ RecursiveCall() { this.getTarget().calls*(this.getEnclosingFunction()) }
+}
+
+from RecursiveCall call, string msg
+where
+ if call.getTarget() = call.getEnclosingFunction()
+ then msg = "This call directly invokes its containing function $@."
+ else
+ msg =
+ "The function " + call.getEnclosingFunction() +
+ " is indirectly recursive via this call to $@."
+select call, msg, call.getTarget(), call.getTarget().getName()
diff --git a/repo-tests/codeql/cpp/ql/src/JPL_C/LOC-2/Rule 05/HeapMemory.ql b/repo-tests/codeql/cpp/ql/src/JPL_C/LOC-2/Rule 05/HeapMemory.ql
new file mode 100644
index 00000000000..30324d8840d
--- /dev/null
+++ b/repo-tests/codeql/cpp/ql/src/JPL_C/LOC-2/Rule 05/HeapMemory.ql
@@ -0,0 +1,31 @@
+/**
+ * @name Dynamic allocation after initialization
+ * @description Dynamic memory allocation (using malloc() or calloc()) should be confined to the initialization routines of a program.
+ * @kind problem
+ * @id cpp/jpl-c/heap-memory
+ * @problem.severity recommendation
+ * @tags resources
+ * external/jpl
+ */
+
+import cpp
+
+class Initialization extends Function {
+ Initialization() {
+ // TODO: This could be refined to match precisely what functions count
+ // as "initialization", and are, hence, allowed to perform dynamic
+ // memory allocation.
+ this.getName().toLowerCase().matches("init%") or
+ this.getName().toLowerCase().matches("%\\_init")
+ }
+}
+
+class Allocation extends FunctionCall {
+ Allocation() { this.getTarget().getName() = ["malloc", "calloc", "alloca", "sbrk", "valloc"] }
+}
+
+from Function f, Allocation a
+where
+ not f instanceof Initialization and
+ a.getEnclosingFunction() = f
+select a, "Dynamic memory allocation is only allowed during initialization."
diff --git a/repo-tests/codeql/cpp/ql/src/JPL_C/LOC-2/Rule 07/ThreadSafety.ql b/repo-tests/codeql/cpp/ql/src/JPL_C/LOC-2/Rule 07/ThreadSafety.ql
new file mode 100644
index 00000000000..4eacedc3339
--- /dev/null
+++ b/repo-tests/codeql/cpp/ql/src/JPL_C/LOC-2/Rule 07/ThreadSafety.ql
@@ -0,0 +1,22 @@
+/**
+ * @name Use of delay function
+ * @description Task synchronization shall not be performed through the use of task delays.
+ * @kind problem
+ * @id cpp/jpl-c/thread-safety
+ * @problem.severity warning
+ * @tags correctness
+ * concurrency
+ * external/jpl
+ */
+
+import cpp
+
+class ForbiddenCall extends FunctionCall {
+ ForbiddenCall() {
+ this.getTarget().getName() =
+ ["task_delay", "taskDelay", "sleep", "nanosleep", "clock_nanosleep"]
+ }
+}
+
+from ForbiddenCall call
+select call, "Task synchronization shall not be performed through task delays."
diff --git a/repo-tests/codeql/cpp/ql/src/JPL_C/LOC-2/Rule 09/AvoidNestedSemaphores.ql b/repo-tests/codeql/cpp/ql/src/JPL_C/LOC-2/Rule 09/AvoidNestedSemaphores.ql
new file mode 100644
index 00000000000..125f64fee51
--- /dev/null
+++ b/repo-tests/codeql/cpp/ql/src/JPL_C/LOC-2/Rule 09/AvoidNestedSemaphores.ql
@@ -0,0 +1,37 @@
+/**
+ * @name Avoid nested semaphores
+ * @description Nested use of semaphores or locks should be avoided.
+ * @kind problem
+ * @id cpp/jpl-c/avoid-nested-semaphores
+ * @problem.severity warning
+ * @tags correctness
+ * concurrency
+ * external/jpl
+ */
+
+import Semaphores
+
+LockOperation maybeLocked(Function f) {
+ result.getEnclosingFunction() = f
+ or
+ exists(Function g | f.calls(g) | result = maybeLocked(g))
+}
+
+predicate intraproc(LockOperation inner, string msg, LockOperation outer) {
+ inner = outer.getAReachedNode() and
+ outer.getLocked() != inner.getLocked() and
+ msg = "This lock operation is nested in a $@."
+}
+
+predicate interproc(FunctionCall inner, string msg, LockOperation outer) {
+ inner = outer.getAReachedNode() and
+ exists(LockOperation lock |
+ lock = maybeLocked(inner.getTarget()) and lock.getLocked() != outer.getLocked()
+ |
+ msg = "This call may perform a " + lock.say() + " while under the effect of a $@."
+ )
+}
+
+from LockOperation outer, FunctionCall inner, string msg
+where intraproc(inner, msg, outer) or interproc(inner, msg, outer)
+select inner, msg, outer, outer.say()
diff --git a/repo-tests/codeql/cpp/ql/src/JPL_C/LOC-2/Rule 09/AvoidSemaphores.ql b/repo-tests/codeql/cpp/ql/src/JPL_C/LOC-2/Rule 09/AvoidSemaphores.ql
new file mode 100644
index 00000000000..01d787f2fae
--- /dev/null
+++ b/repo-tests/codeql/cpp/ql/src/JPL_C/LOC-2/Rule 09/AvoidSemaphores.ql
@@ -0,0 +1,18 @@
+/**
+ * @name Avoid semaphores
+ * @description The use of semaphores or locks to access shared data should be avoided.
+ * @kind problem
+ * @id cpp/jpl-c/avoid-semaphores
+ * @problem.severity recommendation
+ * @tags concurrency
+ * external/jpl
+ */
+
+import Semaphores
+
+from FunctionCall call, string kind
+where
+ call instanceof SemaphoreCreation and kind = "semaphores"
+ or
+ call instanceof LockingPrimitive and kind = "locking primitives"
+select call, "Use of " + kind + " should be avoided."
diff --git a/repo-tests/codeql/cpp/ql/src/JPL_C/LOC-2/Rule 09/OutOfOrderLocks.ql b/repo-tests/codeql/cpp/ql/src/JPL_C/LOC-2/Rule 09/OutOfOrderLocks.ql
new file mode 100644
index 00000000000..ab2fc74d014
--- /dev/null
+++ b/repo-tests/codeql/cpp/ql/src/JPL_C/LOC-2/Rule 09/OutOfOrderLocks.ql
@@ -0,0 +1,33 @@
+/**
+ * @name Out-of-order locks
+ * @description Where nested locks are inevitable, they should always be taken in the same order.
+ * @kind problem
+ * @id cpp/jpl-c/out-of-order-locks
+ * @problem.severity warning
+ * @tags correctness
+ * concurrency
+ * external/jpl
+ */
+
+import Semaphores
+
+predicate lockOrder(LockOperation outer, LockOperation inner) {
+ outer.getAReachedNode() = inner and
+ inner.getLocked() != outer.getLocked()
+}
+
+int orderCount(Declaration outerLock, Declaration innerLock) {
+ result =
+ strictcount(LockOperation outer, LockOperation inner |
+ outer.getLocked() = outerLock and
+ inner.getLocked() = innerLock and
+ lockOrder(outer, inner)
+ )
+}
+
+from LockOperation outer, LockOperation inner
+where
+ lockOrder(outer, inner) and
+ orderCount(outer.getLocked(), inner.getLocked()) <=
+ orderCount(inner.getLocked(), outer.getLocked())
+select inner, "Out-of-order locks: A " + inner.say() + " usually precedes a $@.", outer, outer.say()
diff --git a/repo-tests/codeql/cpp/ql/src/JPL_C/LOC-2/Rule 09/ReleaseLocksWhenAcquired.ql b/repo-tests/codeql/cpp/ql/src/JPL_C/LOC-2/Rule 09/ReleaseLocksWhenAcquired.ql
new file mode 100644
index 00000000000..16eb13a5f39
--- /dev/null
+++ b/repo-tests/codeql/cpp/ql/src/JPL_C/LOC-2/Rule 09/ReleaseLocksWhenAcquired.ql
@@ -0,0 +1,16 @@
+/**
+ * @name Unreleased lock
+ * @description Unlock operations shall always appear within the body of the same function that performs the matching lock operation.
+ * @kind problem
+ * @id cpp/jpl-c/release-locks-when-acquired
+ * @problem.severity warning
+ * @tags correctness
+ * concurrency
+ * external/jpl
+ */
+
+import Semaphores
+
+from LockOperation lock
+where lock.getAReachedNode() = lock.getEnclosingFunction()
+select lock, "This lock operation may escape the function without a matching unlock."
diff --git a/repo-tests/codeql/cpp/ql/src/JPL_C/LOC-2/Rule 09/Semaphores.qll b/repo-tests/codeql/cpp/ql/src/JPL_C/LOC-2/Rule 09/Semaphores.qll
new file mode 100644
index 00000000000..0b60a3b9877
--- /dev/null
+++ b/repo-tests/codeql/cpp/ql/src/JPL_C/LOC-2/Rule 09/Semaphores.qll
@@ -0,0 +1,88 @@
+/**
+ * Provides classes corresponding to VxWorks semaphores and locks.
+ */
+
+import cpp
+
+class SemaphoreCreation extends FunctionCall {
+ SemaphoreCreation() {
+ this.getTarget().getName() = ["semBCreate", "semMCreate", "semCCreate", "semRWCreate"]
+ }
+
+ Variable getSemaphore() { result.getAnAccess() = this.getParent().(Assignment).getLValue() }
+}
+
+abstract class LockOperation extends FunctionCall {
+ abstract UnlockOperation getMatchingUnlock();
+
+ abstract Declaration getLocked();
+
+ abstract string say();
+
+ ControlFlowNode getAReachedNode() {
+ result = this
+ or
+ exists(ControlFlowNode mid | mid = getAReachedNode() |
+ not mid != this.getMatchingUnlock() and
+ result = mid.getASuccessor()
+ )
+ }
+}
+
+abstract class UnlockOperation extends FunctionCall {
+ abstract LockOperation getMatchingLock();
+}
+
+class SemaphoreTake extends LockOperation {
+ SemaphoreTake() {
+ exists(string name | name = this.getTarget().getName() |
+ name = "semTake"
+ or
+ // '_' is a wildcard, so this matches calls like
+ // semBTakeScalable or semMTake_inline.
+ name.matches("sem_Take%")
+ )
+ }
+
+ override Variable getLocked() { result.getAnAccess() = this.getArgument(0) }
+
+ override UnlockOperation getMatchingUnlock() {
+ result.(SemaphoreGive).getLocked() = this.getLocked()
+ }
+
+ override string say() { result = "semaphore take of " + getLocked().getName() }
+}
+
+class SemaphoreGive extends UnlockOperation {
+ SemaphoreGive() {
+ exists(string name | name = this.getTarget().getName() |
+ name = "semGive" or
+ name.matches("sem%Give%")
+ )
+ }
+
+ Variable getLocked() { result.getAnAccess() = this.getArgument(0) }
+
+ override LockOperation getMatchingLock() { this = result.getMatchingUnlock() }
+}
+
+class LockingPrimitive extends FunctionCall, LockOperation {
+ LockingPrimitive() { this.getTarget().getName() = ["taskLock", "intLock", "taskRtpLock"] }
+
+ override Function getLocked() { result = this.getTarget() }
+
+ override UnlockOperation getMatchingUnlock() {
+ result.(UnlockingPrimitive).getTarget().getName() =
+ this.getTarget().getName().replaceAll("Lock", "Unlock")
+ }
+
+ override string say() { result = "call to " + getLocked().getName() }
+}
+
+class UnlockingPrimitive extends FunctionCall, UnlockOperation {
+ UnlockingPrimitive() { this.getTarget().getName() = ["taskUnlock", "intUnlock", "taskRtpUnlock"] }
+
+ Function getLocked() { result = getMatchingLock().getLocked() }
+
+ override LockOperation getMatchingLock() { this = result.getMatchingUnlock() }
+}
diff --git a/repo-tests/codeql/cpp/ql/src/JPL_C/LOC-2/Rule 11/SimpleControlFlowGoto.ql b/repo-tests/codeql/cpp/ql/src/JPL_C/LOC-2/Rule 11/SimpleControlFlowGoto.ql
new file mode 100644
index 00000000000..799ea6b7511
--- /dev/null
+++ b/repo-tests/codeql/cpp/ql/src/JPL_C/LOC-2/Rule 11/SimpleControlFlowGoto.ql
@@ -0,0 +1,15 @@
+/**
+ * @name Use of goto
+ * @description Using the goto statement complicates function control flow and hinders program understanding.
+ * @kind problem
+ * @id cpp/jpl-c/simple-control-flow-goto
+ * @problem.severity warning
+ * @tags maintainability
+ * readability
+ * external/jpl
+ */
+
+import cpp
+
+from GotoStmt goto
+select goto, "The goto statement should not be used."
diff --git a/repo-tests/codeql/cpp/ql/src/JPL_C/LOC-2/Rule 11/SimpleControlFlowJmp.ql b/repo-tests/codeql/cpp/ql/src/JPL_C/LOC-2/Rule 11/SimpleControlFlowJmp.ql
new file mode 100644
index 00000000000..0623036a592
--- /dev/null
+++ b/repo-tests/codeql/cpp/ql/src/JPL_C/LOC-2/Rule 11/SimpleControlFlowJmp.ql
@@ -0,0 +1,21 @@
+/**
+ * @name Use of setjmp or longjmp
+ * @description Using the setjmp and longjmp functions complicates control flow and hinders program understanding.
+ * @kind problem
+ * @id cpp/jpl-c/simple-control-flow-jmp
+ * @problem.severity warning
+ * @tags correctness
+ * portability
+ * readability
+ * external/jpl
+ */
+
+import cpp
+
+class ForbiddenFunction extends Function {
+ ForbiddenFunction() { this.getName() = ["setjmp", "longjmp", "sigsetjmp", "siglongjmp"] }
+}
+
+from FunctionCall call
+where call.getTarget() instanceof ForbiddenFunction
+select call, "The " + call.getTarget().getName() + " function should not be used."
diff --git a/repo-tests/codeql/cpp/ql/src/JPL_C/LOC-2/Rule 12/EnumInitialization.ql b/repo-tests/codeql/cpp/ql/src/JPL_C/LOC-2/Rule 12/EnumInitialization.ql
new file mode 100644
index 00000000000..4aa712fe1d6
--- /dev/null
+++ b/repo-tests/codeql/cpp/ql/src/JPL_C/LOC-2/Rule 12/EnumInitialization.ql
@@ -0,0 +1,61 @@
+/**
+ * @name Irregular enum initialization
+ * @description In an enumerator list, the = construct should not be used to explicitly initialize members other than the first, unless all items are explicitly initialized. An exception is the pattern to use the last element of an enumerator list to get the number of possible values.
+ * @kind problem
+ * @id cpp/jpl-c/enum-initialization
+ * @problem.severity recommendation
+ * @tags maintainability
+ * readability
+ * external/jpl
+ */
+
+import cpp
+
+predicate hasInitializer(EnumConstant c) { c.getInitializer().fromSource() }
+
+/** Does this have an initializer that is not just a ref to another constant in the same enum? */
+predicate hasNonReferenceInitializer(EnumConstant c) {
+ exists(Initializer init |
+ init = c.getInitializer() and
+ init.fromSource() and
+ not init.getExpr().(EnumConstantAccess).getTarget().getDeclaringEnum() = c.getDeclaringEnum()
+ )
+}
+
+predicate hasReferenceInitializer(EnumConstant c) {
+ exists(Initializer init |
+ init = c.getInitializer() and
+ init.fromSource() and
+ init.getExpr().(EnumConstantAccess).getTarget().getDeclaringEnum() = c.getDeclaringEnum()
+ )
+}
+
+// There exists another constant whose value is implicit, but it's
+// not the last one: the last value is okay to use to get the highest
+// enum value automatically. It can be followed by aliases though.
+predicate enumThatHasConstantWithImplicitValue(Enum e) {
+ exists(EnumConstant ec, int pos |
+ ec = e.getEnumConstant(pos) and
+ not hasInitializer(ec) and
+ exists(EnumConstant ec2, int pos2 |
+ ec2 = e.getEnumConstant(pos2) and
+ pos2 > pos and
+ not hasReferenceInitializer(ec2)
+ )
+ )
+}
+
+from Enum e, int i
+where
+ // e is at position i, and has an explicit value in the source - but
+ // not just a reference to another enum constant
+ hasNonReferenceInitializer(e.getEnumConstant(i)) and
+ // but e is not the first or the last constant of the enum
+ i != 0 and
+ exists(e.getEnumConstant(i + 1)) and
+ // and there exists another constant whose value is implicit, but it's
+ // not the last one: the last value is okay to use to get the highest
+ // enum value automatically. It can be followed by aliases though.
+ enumThatHasConstantWithImplicitValue(e)
+select e,
+ "In an enumerator list, the = construct should not be used to explicitly initialize members other than the first, unless all items are explicitly initialized."
diff --git a/repo-tests/codeql/cpp/ql/src/JPL_C/LOC-3/Rule 13/ExternDeclsInHeader.ql b/repo-tests/codeql/cpp/ql/src/JPL_C/LOC-3/Rule 13/ExternDeclsInHeader.ql
new file mode 100644
index 00000000000..4eb8b032926
--- /dev/null
+++ b/repo-tests/codeql/cpp/ql/src/JPL_C/LOC-3/Rule 13/ExternDeclsInHeader.ql
@@ -0,0 +1,18 @@
+/**
+ * @name "extern" declaration in source file
+ * @description All "extern" declarations should be placed in a header file that is included in every file referring to the corresponding data object.
+ * @kind problem
+ * @id cpp/jpl-c/extern-decls-in-header
+ * @problem.severity warning
+ * @tags maintainability
+ * external/jpl
+ */
+
+import cpp
+
+from VariableDeclarationEntry v
+where
+ v.getVariable() instanceof GlobalVariable and
+ v.hasSpecifier("extern") and
+ not v.getFile() instanceof HeaderFile
+select v, v.getName() + " should be declared only in a header file that is included as needed."
diff --git a/repo-tests/codeql/cpp/ql/src/JPL_C/LOC-3/Rule 13/LimitedScopeFile.ql b/repo-tests/codeql/cpp/ql/src/JPL_C/LOC-3/Rule 13/LimitedScopeFile.ql
new file mode 100644
index 00000000000..05d84902085
--- /dev/null
+++ b/repo-tests/codeql/cpp/ql/src/JPL_C/LOC-3/Rule 13/LimitedScopeFile.ql
@@ -0,0 +1,23 @@
+/**
+ * @name Global could be static
+ * @description Global variables that are not accessed outside their own file should be made static to promote information hiding.
+ * @kind problem
+ * @id cpp/jpl-c/limited-scope-file
+ * @problem.severity recommendation
+ * @precision low
+ * @tags maintainability
+ * modularity
+ * external/jpl
+ */
+
+import cpp
+
+from GlobalVariable v
+where
+ forex(VariableAccess va | va.getTarget() = v | va.getFile() = v.getDefinitionLocation().getFile()) and
+ not v.hasSpecifier("static") and
+ strictcount(v.getAnAccess().getEnclosingFunction()) > 1 and // If = 1, variable should be function-scope.
+ not v.getADeclarationEntry().getFile() instanceof HeaderFile // intended to be accessed elsewhere
+select v,
+ "The global variable " + v.getName() + " is not accessed outside of " + v.getFile().getBaseName() +
+ " and could be made static."
diff --git a/repo-tests/codeql/cpp/ql/src/JPL_C/LOC-3/Rule 13/LimitedScopeFunction.ql b/repo-tests/codeql/cpp/ql/src/JPL_C/LOC-3/Rule 13/LimitedScopeFunction.ql
new file mode 100644
index 00000000000..77259da860e
--- /dev/null
+++ b/repo-tests/codeql/cpp/ql/src/JPL_C/LOC-3/Rule 13/LimitedScopeFunction.ql
@@ -0,0 +1,22 @@
+/**
+ * @name Variable scope too large
+ * @description Global and file-scope variables that are accessed by only one function should be scoped within that function.
+ * @kind problem
+ * @id cpp/jpl-c/limited-scope-function
+ * @problem.severity recommendation
+ * @precision low
+ * @tags maintainability
+ * external/jpl
+ */
+
+import cpp
+
+from GlobalVariable v, Function f
+where
+ v.getAnAccess().getEnclosingFunction() = f and
+ strictcount(v.getAnAccess().getEnclosingFunction()) = 1 and
+ forall(VariableAccess a | a = v.getAnAccess() | exists(a.getEnclosingFunction())) and
+ not v.getADeclarationEntry().getFile() instanceof HeaderFile // intended to be accessed elsewhere
+select v,
+ "The variable " + v.getName() + " is only accessed in $@ and should be scoped accordingly.", f,
+ f.getName()
diff --git a/repo-tests/codeql/cpp/ql/src/JPL_C/LOC-3/Rule 13/LimitedScopeLocalHidesGlobal.ql b/repo-tests/codeql/cpp/ql/src/JPL_C/LOC-3/Rule 13/LimitedScopeLocalHidesGlobal.ql
new file mode 100644
index 00000000000..5904bce851e
--- /dev/null
+++ b/repo-tests/codeql/cpp/ql/src/JPL_C/LOC-3/Rule 13/LimitedScopeLocalHidesGlobal.ql
@@ -0,0 +1,36 @@
+/**
+ * @name Local variable hides global variable
+ * @description A local variable or parameter that hides a global variable of the same name.
+ * @kind problem
+ * @id cpp/jpl-c/limited-scope-local-hides-global
+ * @problem.severity recommendation
+ * @tags maintainability
+ * readability
+ * external/jpl
+ */
+
+import cpp
+
+class LocalVariableOrParameter extends Variable {
+ LocalVariableOrParameter() {
+ this instanceof LocalVariable
+ or
+ // A function declaration (i.e. "int foo(int bar);") doesn't usefully
+ // shadow globals; the parameter should be on the version of the function
+ // that has a body.
+ exists(Parameter p | p = this |
+ p.getFunction().getDefinitionLocation().getFile() = this.getFile() and
+ exists(p.getFunction().getBlock())
+ )
+ }
+
+ string type() {
+ if this instanceof Parameter then result = "Parameter " else result = "Local variable "
+ }
+}
+
+from LocalVariableOrParameter lv, GlobalVariable gv
+where
+ lv.getName() = gv.getName() and
+ lv.getFile() = gv.getFile()
+select lv, lv.type() + lv.getName() + " hides the global variable $@.", gv, gv.getName()
diff --git a/repo-tests/codeql/cpp/ql/src/JPL_C/LOC-3/Rule 14/CheckingReturnValues.ql b/repo-tests/codeql/cpp/ql/src/JPL_C/LOC-3/Rule 14/CheckingReturnValues.ql
new file mode 100644
index 00000000000..87215b69e34
--- /dev/null
+++ b/repo-tests/codeql/cpp/ql/src/JPL_C/LOC-3/Rule 14/CheckingReturnValues.ql
@@ -0,0 +1,42 @@
+/**
+ * @name Unchecked return value
+ * @description The return value of each non-void function call should be checked for error conditions, or cast to (void) if irrelevant.
+ * @kind problem
+ * @id cpp/jpl-c/checking-return-values
+ * @problem.severity warning
+ * @tags correctness
+ * reliability
+ * external/jpl
+ */
+
+import cpp
+
+/**
+ * In its full generality, the rule applies to all functions that
+ * return non-void, including things like 'printf' and 'close',
+ * which are routinely not checked because the behavior on success
+ * is the same as the behavior on failure. The recommendation is
+ * to add an explicit cast to void for such functions. For code
+ * bases that have not been developed with this rule in mind, at
+ * least for such commonly ignored functions, it may be better to
+ * add them as exceptions to this whitelist predicate.
+ */
+predicate whitelist(Function f) {
+ // Example:
+ // f.hasName("printf") or f.hasName("close") or // ...
+ none()
+}
+
+from FunctionCall c, string msg
+where
+ not c.getTarget().getType() instanceof VoidType and
+ not whitelist(c.getTarget()) and
+ (
+ c instanceof ExprInVoidContext and
+ msg = "The return value of non-void function $@ is not checked."
+ or
+ definition(_, c.getParent()) and
+ not definitionUsePair(_, c.getParent(), _) and
+ msg = "$@'s return value is stored but not checked."
+ )
+select c, msg, c.getTarget() as f, f.getName()
diff --git a/repo-tests/codeql/cpp/ql/src/JPL_C/LOC-3/Rule 15/CheckingParameterValues.ql b/repo-tests/codeql/cpp/ql/src/JPL_C/LOC-3/Rule 15/CheckingParameterValues.ql
new file mode 100644
index 00000000000..d9a58f6775b
--- /dev/null
+++ b/repo-tests/codeql/cpp/ql/src/JPL_C/LOC-3/Rule 15/CheckingParameterValues.ql
@@ -0,0 +1,29 @@
+/**
+ * @name Unchecked function argument
+ * @description Functions should check their arguments before their first use.
+ * @kind problem
+ * @id cpp/jpl-c/checking-parameter-values
+ * @problem.severity warning
+ * @tags correctness
+ * reliability
+ * external/jpl
+ */
+
+import JPL_C.Tasks
+
+predicate flow(Parameter p, ControlFlowNode n) {
+ exists(p.getAnAccess()) and n = p.getFunction().getBlock()
+ or
+ exists(ControlFlowNode mid |
+ flow(p, mid) and not mid = p.getAnAccess() and n = mid.getASuccessor()
+ )
+}
+
+VariableAccess firstAccess(Parameter p) { flow(p, result) and result = p.getAnAccess() }
+
+from Parameter p, VariableAccess va
+where
+ va = firstAccess(p) and
+ p.getFunction() instanceof PublicFunction and
+ not exists(Expr e | e.isCondition() | e.getAChild*() = va)
+select va, "This use of parameter " + p.getName() + " has not been checked."
diff --git a/repo-tests/codeql/cpp/ql/src/JPL_C/LOC-3/Rule 16/UseOfAssertionsConstant.ql b/repo-tests/codeql/cpp/ql/src/JPL_C/LOC-3/Rule 16/UseOfAssertionsConstant.ql
new file mode 100644
index 00000000000..8634173ccda
--- /dev/null
+++ b/repo-tests/codeql/cpp/ql/src/JPL_C/LOC-3/Rule 16/UseOfAssertionsConstant.ql
@@ -0,0 +1,20 @@
+/**
+ * @name Constant assertion
+ * @description Assertions should check dynamic properties of pre-/post-conditions and invariants. Assertions that either always succeed or always fail are an error.
+ * @kind problem
+ * @id cpp/jpl-c/use-of-assertions-constant
+ * @problem.severity warning
+ * @tags maintainability
+ * reliability
+ * external/jpl
+ */
+
+import semmle.code.cpp.commons.Assertions
+
+from Assertion a, string value, string msg
+where
+ value = a.getAsserted().getValue() and
+ if value.toInt() = 0
+ then msg = "This assertion is always false."
+ else msg = "This assertion is always true."
+select a.getAsserted(), msg
diff --git a/repo-tests/codeql/cpp/ql/src/JPL_C/LOC-3/Rule 16/UseOfAssertionsDensity.ql b/repo-tests/codeql/cpp/ql/src/JPL_C/LOC-3/Rule 16/UseOfAssertionsDensity.ql
new file mode 100644
index 00000000000..b05848d36b4
--- /dev/null
+++ b/repo-tests/codeql/cpp/ql/src/JPL_C/LOC-3/Rule 16/UseOfAssertionsDensity.ql
@@ -0,0 +1,18 @@
+/**
+ * @name Long function without assertion
+ * @description All functions of more than 10 lines should have at least one assertion.
+ * @kind problem
+ * @id cpp/jpl-c/use-of-assertions-density
+ * @problem.severity recommendation
+ * @tags maintainability
+ * reliability
+ * external/jpl
+ */
+
+import semmle.code.cpp.commons.Assertions
+
+from Function f
+where
+ f.getMetrics().getNumberOfLinesOfCode() > 10 and
+ not exists(Assertion a | a.getAsserted().getEnclosingFunction() = f)
+select f, "All functions of more than 10 lines should have at least one assertion."
diff --git a/repo-tests/codeql/cpp/ql/src/JPL_C/LOC-3/Rule 16/UseOfAssertionsNonBoolean.ql b/repo-tests/codeql/cpp/ql/src/JPL_C/LOC-3/Rule 16/UseOfAssertionsNonBoolean.ql
new file mode 100644
index 00000000000..2042aebeeda
--- /dev/null
+++ b/repo-tests/codeql/cpp/ql/src/JPL_C/LOC-3/Rule 16/UseOfAssertionsNonBoolean.ql
@@ -0,0 +1,15 @@
+/**
+ * @name Non-boolean assertion
+ * @description Assertions should be defined as Boolean tests, meaning "assert(p != NULL)" rather than "assert(p)".
+ * @kind problem
+ * @id cpp/jpl-c/use-of-assertions-non-boolean
+ * @problem.severity warning
+ * @tags correctness
+ * external/jpl
+ */
+
+import semmle.code.cpp.commons.Assertions
+
+from Assertion a
+where a.getAsserted().getType() instanceof PointerType
+select a.getAsserted(), "Assertions should be defined as Boolean tests."
diff --git a/repo-tests/codeql/cpp/ql/src/JPL_C/LOC-3/Rule 16/UseOfAssertionsSideEffect.ql b/repo-tests/codeql/cpp/ql/src/JPL_C/LOC-3/Rule 16/UseOfAssertionsSideEffect.ql
new file mode 100644
index 00000000000..29aebf64989
--- /dev/null
+++ b/repo-tests/codeql/cpp/ql/src/JPL_C/LOC-3/Rule 16/UseOfAssertionsSideEffect.ql
@@ -0,0 +1,15 @@
+/**
+ * @name Assertion with side effects
+ * @description Assertions should not have side-effects -- they may be disabled completely, changing program behavior.
+ * @kind problem
+ * @id cpp/jpl-c/use-of-assertions-side-effect
+ * @problem.severity warning
+ * @tags correctness
+ * external/jpl
+ */
+
+import semmle.code.cpp.commons.Assertions
+
+from Assertion a
+where not a.getAsserted().isPure()
+select a.getAsserted(), "Assertions should not have side effects."
diff --git a/repo-tests/codeql/cpp/ql/src/JPL_C/LOC-3/Rule 17/BasicIntTypes.ql b/repo-tests/codeql/cpp/ql/src/JPL_C/LOC-3/Rule 17/BasicIntTypes.ql
new file mode 100644
index 00000000000..82b7f146554
--- /dev/null
+++ b/repo-tests/codeql/cpp/ql/src/JPL_C/LOC-3/Rule 17/BasicIntTypes.ql
@@ -0,0 +1,53 @@
+/**
+ * @name Use of basic integral type
+ * @description Typedefs that indicate size and signedness should be used in place of the basic types.
+ * @kind problem
+ * @id cpp/jpl-c/basic-int-types
+ * @problem.severity recommendation
+ * @tags maintainability
+ * readability
+ * external/jpl
+ */
+
+import cpp
+
+predicate allowedTypedefs(TypedefType t) {
+ t.getName() = ["I64", "U64", "I32", "U32", "I16", "U16", "I8", "U8", "F64", "F32"]
+}
+
+/**
+ * Gets a type which appears literally in the declaration of `d`.
+ */
+Type getAnImmediateUsedType(Declaration d) {
+ d.hasDefinition() and
+ (
+ result = d.(Function).getType() or
+ result = d.(Variable).getType()
+ )
+}
+
+/**
+ * Gets a type which appears indirectly in `t`, stopping at allowed typedefs.
+ */
+Type getAUsedType(Type t) {
+ not allowedTypedefs(t) and
+ (
+ result = t.(TypedefType).getBaseType() or
+ result = t.(DerivedType).getBaseType()
+ )
+}
+
+predicate problematic(IntegralType t) {
+ // List any exceptions that should be allowed.
+ any()
+}
+
+from Declaration d, Type usedType
+where
+ usedType = getAUsedType*(getAnImmediateUsedType(d)) and
+ problematic(usedType) and
+ // Ignore violations for which we do not have a valid location.
+ not d.getLocation() instanceof UnknownLocation
+select d,
+ d.getName() + " uses the basic integral type " + usedType.getName() +
+ " rather than a typedef with size and signedness."
diff --git a/repo-tests/codeql/cpp/ql/src/JPL_C/LOC-3/Rule 18/CompoundExpressions.ql b/repo-tests/codeql/cpp/ql/src/JPL_C/LOC-3/Rule 18/CompoundExpressions.ql
new file mode 100644
index 00000000000..bf2367ef5c5
--- /dev/null
+++ b/repo-tests/codeql/cpp/ql/src/JPL_C/LOC-3/Rule 18/CompoundExpressions.ql
@@ -0,0 +1,23 @@
+/**
+ * @name Implicit precedence in compound expression
+ * @description In compound expressions with multiple sub-expressions the intended order of evaluation shall be made explicit with parentheses.
+ * @kind problem
+ * @id cpp/jpl-c/compound-expressions
+ * @problem.severity recommendation
+ * @tags maintainability
+ * readability
+ * external/jpl
+ */
+
+import cpp
+
+from BinaryOperation parent, BinaryOperation child
+where
+ parent.getAnOperand() = child and
+ not child.isParenthesised() and
+ (parent instanceof BinaryBitwiseOperation or child instanceof BinaryBitwiseOperation) and
+ // Some benign cases...
+ not (parent instanceof BitwiseAndExpr and child instanceof BitwiseAndExpr) and
+ not (parent instanceof BitwiseOrExpr and child instanceof BitwiseOrExpr) and
+ not (parent instanceof BitwiseXorExpr and child instanceof BitwiseXorExpr)
+select parent, "This expression involving bitwise operations should be bracketed."
diff --git a/repo-tests/codeql/cpp/ql/src/JPL_C/LOC-3/Rule 19/NoBooleanSideEffects.ql b/repo-tests/codeql/cpp/ql/src/JPL_C/LOC-3/Rule 19/NoBooleanSideEffects.ql
new file mode 100644
index 00000000000..a3b0ed3b2b9
--- /dev/null
+++ b/repo-tests/codeql/cpp/ql/src/JPL_C/LOC-3/Rule 19/NoBooleanSideEffects.ql
@@ -0,0 +1,97 @@
+/**
+ * @name Side effect in a Boolean expression
+ * @description The evaluation of a Boolean expression shall have no side effects.
+ * @kind problem
+ * @id cpp/jpl-c/no-boolean-side-effects
+ * @problem.severity warning
+ * @tags correctness
+ * readability
+ * external/jpl
+ */
+
+import cpp
+
+/**
+ * A whitelist of functions that should be considered
+ * side-effect free.
+ */
+predicate safeFunctionWhitelist(Function f) {
+ exists(string name | name = f.getName() |
+ // List functions by name which are not correctly identified
+ // as side-effect free. For example, for strlen, one might do:
+ // name = "strlen" or
+ none()
+ )
+}
+
+/**
+ * Gets a "pointer type" contained in the given type. This
+ * traverses typedefs and derived types, including types of
+ * struct or union members, returning each "pointer to X"
+ * type encountered on that traversal.
+ */
+PointerType getAPointerType(Type t) {
+ result = t or
+ result = getAPointerType(t.getUnderlyingType()) or
+ result = getAPointerType(t.(DerivedType).getBaseType()) or
+ result = getAPointerType(t.(Class).getAMemberVariable().getType())
+}
+
+/**
+ * A function is "inherently unsafe" for side effects if it
+ * writes a global or static variable, or if it calls another
+ * inherently unsafe function.
+ */
+predicate inherentlyUnsafe(Function f) {
+ exists(Variable v | v.getAnAssignedValue().getEnclosingFunction() = f |
+ v instanceof GlobalVariable or
+ v.isStatic()
+ )
+ or
+ exists(FunctionCall c | c.getEnclosingFunction() = f | inherentlyUnsafe(c.getTarget()))
+}
+
+/**
+ * Find functions that are "safe to call" without causing a side effect.
+ * Being safe to call means that any "pointer type" in an argument type
+ * actually refers to a "const" object, and, moreover, the function is
+ * not inherently unsafe.
+ */
+predicate safeToCall(Function f) {
+ forall(PointerType paramPointerType |
+ paramPointerType = getAPointerType(f.getAParameter().getType())
+ |
+ paramPointerType.getBaseType().isConst()
+ ) and
+ not inherentlyUnsafe(f)
+}
+
+/**
+ * A "Boolean expression" is an expression forbidden from having side effects
+ * by this rule.
+ */
+class BooleanExpression extends Expr {
+ BooleanExpression() {
+ exists(Loop l | l.getControllingExpr() = this) or
+ exists(IfStmt i | i.getCondition() = this) or
+ exists(ConditionalExpr e | e.getCondition() = this)
+ }
+}
+
+predicate hasSideEffect(Expr e) {
+ e instanceof Assignment
+ or
+ e instanceof CrementOperation
+ or
+ e instanceof ExprCall
+ or
+ exists(Function f | f = e.(FunctionCall).getTarget() and not safeFunctionWhitelist(f) |
+ inherentlyUnsafe(f) or not safeToCall(f)
+ )
+ or
+ hasSideEffect(e.getAChild())
+}
+
+from BooleanExpression b
+where hasSideEffect(b)
+select b, "This Boolean expression is not side-effect free."
diff --git a/repo-tests/codeql/cpp/ql/src/JPL_C/LOC-4/Rule 20/PreprocessorUse.ql b/repo-tests/codeql/cpp/ql/src/JPL_C/LOC-4/Rule 20/PreprocessorUse.ql
new file mode 100644
index 00000000000..826534761d7
--- /dev/null
+++ b/repo-tests/codeql/cpp/ql/src/JPL_C/LOC-4/Rule 20/PreprocessorUse.ql
@@ -0,0 +1,24 @@
+/**
+ * @name Disallowed preprocessor use
+ * @description The use of the preprocessor must be limited to inclusion of header files and simple macro definitions.
+ * @kind problem
+ * @id cpp/jpl-c/preprocessor-use
+ * @problem.severity recommendation
+ * @tags maintainability
+ * readability
+ * external/jpl
+ */
+
+import cpp
+
+from PreprocessorDirective p
+where
+ not p instanceof Include and
+ not p instanceof Macro and
+ not p instanceof PreprocessorIf and
+ not p instanceof PreprocessorElif and
+ not p instanceof PreprocessorElse and
+ not p instanceof PreprocessorIfdef and
+ not p instanceof PreprocessorIfndef and
+ not p instanceof PreprocessorEndif
+select p, "This preprocessor directive is not allowed."
diff --git a/repo-tests/codeql/cpp/ql/src/JPL_C/LOC-4/Rule 20/PreprocessorUseIfdef.ql b/repo-tests/codeql/cpp/ql/src/JPL_C/LOC-4/Rule 20/PreprocessorUseIfdef.ql
new file mode 100644
index 00000000000..6af58a7d09f
--- /dev/null
+++ b/repo-tests/codeql/cpp/ql/src/JPL_C/LOC-4/Rule 20/PreprocessorUseIfdef.ql
@@ -0,0 +1,18 @@
+/**
+ * @name Conditional compilation
+ * @description The use of conditional compilation directives must be kept to a minimum -- e.g. for header guards only.
+ * @kind problem
+ * @id cpp/jpl-c/preprocessor-use-ifdef
+ * @problem.severity recommendation
+ * @tags maintainability
+ * readability
+ * external/jpl
+ */
+
+import cpp
+
+from PreprocessorDirective i
+where
+ (i instanceof PreprocessorIf or i instanceof PreprocessorIfdef or i instanceof PreprocessorIfndef) and
+ not i.getFile() instanceof HeaderFile
+select i, "Use of conditional compilation must be kept to a minimum."
diff --git a/repo-tests/codeql/cpp/ql/src/JPL_C/LOC-4/Rule 20/PreprocessorUsePartial.ql b/repo-tests/codeql/cpp/ql/src/JPL_C/LOC-4/Rule 20/PreprocessorUsePartial.ql
new file mode 100644
index 00000000000..eed90414eb3
--- /dev/null
+++ b/repo-tests/codeql/cpp/ql/src/JPL_C/LOC-4/Rule 20/PreprocessorUsePartial.ql
@@ -0,0 +1,30 @@
+/**
+ * @name Partial macro
+ * @description Macros must expand to complete syntactic units -- "#define MY_IF if(" is not legal.
+ * @kind problem
+ * @id cpp/jpl-c/preprocessor-use-partial
+ * @problem.severity recommendation
+ * @tags maintainability
+ * readability
+ * external/jpl
+ */
+
+import cpp
+
+predicate incomplete(Macro m) {
+ exists(string body | body = m.getBody() and not m.getBody().matches("%\\") |
+ body.regexpMatch("[^(]*\\).*") or
+ body.regexpMatch("[^\\[]*].*") or
+ body.regexpMatch("[^{]*}.*") or
+ body.regexpMatch(".*\\([^)]*") or
+ body.regexpMatch(".*\\[[^\\]]*") or
+ body.regexpMatch(".*\\{[^}]*") or
+ count(body.indexOf("(")) != count(body.indexOf(")")) or
+ count(body.indexOf("[")) != count(body.indexOf("]")) or
+ count(body.indexOf("{")) != count(body.indexOf("}"))
+ )
+}
+
+from Macro m
+where incomplete(m)
+select m, "The macro " + m.getHead() + " will not expand into a syntactic unit."
diff --git a/repo-tests/codeql/cpp/ql/src/JPL_C/LOC-4/Rule 20/PreprocessorUseUndisciplined.ql b/repo-tests/codeql/cpp/ql/src/JPL_C/LOC-4/Rule 20/PreprocessorUseUndisciplined.ql
new file mode 100644
index 00000000000..21983ebda64
--- /dev/null
+++ b/repo-tests/codeql/cpp/ql/src/JPL_C/LOC-4/Rule 20/PreprocessorUseUndisciplined.ql
@@ -0,0 +1,21 @@
+/**
+ * @name Undisciplined macro
+ * @description Macros are not allowed to use complex preprocessor features like variable argument lists and token pasting.
+ * @kind problem
+ * @id cpp/jpl-c/preprocessor-use-undisciplined
+ * @problem.severity recommendation
+ * @tags maintainability
+ * readability
+ * external/jpl
+ */
+
+import cpp
+
+from Macro m, string msg
+where
+ m.getHead().matches("%...%") and
+ msg = "The macro " + m.getHead() + " is variadic, and hence not allowed."
+ or
+ m.getBody().matches("%##%") and
+ msg = "The macro " + m.getHead() + " uses token pasting and is not allowed."
+select m, msg
diff --git a/repo-tests/codeql/cpp/ql/src/JPL_C/LOC-4/Rule 21/MacroInBlock.ql b/repo-tests/codeql/cpp/ql/src/JPL_C/LOC-4/Rule 21/MacroInBlock.ql
new file mode 100644
index 00000000000..9ed5675e795
--- /dev/null
+++ b/repo-tests/codeql/cpp/ql/src/JPL_C/LOC-4/Rule 21/MacroInBlock.ql
@@ -0,0 +1,24 @@
+/**
+ * @name Macro definition in block
+ * @description Macros shall not be #define'd within a function or a block.
+ * @kind problem
+ * @id cpp/jpl-c/macro-in-block
+ * @problem.severity recommendation
+ * @tags maintainability
+ * readability
+ * external/jpl
+ */
+
+import cpp
+
+int lineInBlock(File f) {
+ exists(BlockStmt block, Location blockLocation |
+ block.getFile() = f and blockLocation = block.getLocation()
+ |
+ result in [blockLocation.getStartLine() .. blockLocation.getEndLine()]
+ )
+}
+
+from Macro m
+where m.getLocation().getStartLine() = lineInBlock(m.getFile())
+select m, "The macro " + m.getHead() + " is defined in a block."
diff --git a/repo-tests/codeql/cpp/ql/src/JPL_C/LOC-4/Rule 22/UseOfUndef.ql b/repo-tests/codeql/cpp/ql/src/JPL_C/LOC-4/Rule 22/UseOfUndef.ql
new file mode 100644
index 00000000000..125db1c2369
--- /dev/null
+++ b/repo-tests/codeql/cpp/ql/src/JPL_C/LOC-4/Rule 22/UseOfUndef.ql
@@ -0,0 +1,15 @@
+/**
+ * @name Use of #undef
+ * @description #undef shall not be used.
+ * @kind problem
+ * @id cpp/jpl-c/use-of-undef
+ * @problem.severity recommendation
+ * @tags maintainability
+ * readability
+ * external/jpl
+ */
+
+import cpp
+
+from PreprocessorUndef u
+select u, "The #undef directive shall not be used."
diff --git a/repo-tests/codeql/cpp/ql/src/JPL_C/LOC-4/Rule 23/MismatchedIfdefs.ql b/repo-tests/codeql/cpp/ql/src/JPL_C/LOC-4/Rule 23/MismatchedIfdefs.ql
new file mode 100644
index 00000000000..e9c619167de
--- /dev/null
+++ b/repo-tests/codeql/cpp/ql/src/JPL_C/LOC-4/Rule 23/MismatchedIfdefs.ql
@@ -0,0 +1,87 @@
+/**
+ * @name Mismatched conditional compilation directive
+ * @description All #else, #elif and #endif preprocessor directives shall reside in the same file as the #if or #ifdef directive to which they are related.
+ * @kind problem
+ * @id cpp/jpl-c/mismatched-ifdefs
+ * @problem.severity warning
+ * @tags maintainability
+ * readability
+ * external/jpl
+ */
+
+import cpp
+
+class FileWithDirectives extends File {
+ FileWithDirectives() { exists(Directive d | d.getFile() = this) }
+
+ int getDirectiveLine(Directive d) {
+ d.getFile() = this and d.getLocation().getStartLine() = result
+ }
+
+ int getDirectiveIndex(Directive d) {
+ exists(int line | line = getDirectiveLine(d) | line = rank[result](getDirectiveLine(_)))
+ }
+
+ int depth(Directive d) {
+ exists(int index | index = getDirectiveIndex(d) |
+ index = 1 and result = d.depthChange()
+ or
+ exists(Directive prev | getDirectiveIndex(prev) = index - 1 |
+ result = d.depthChange() + depth(prev)
+ )
+ )
+ }
+
+ Directive lastDirective() { getDirectiveIndex(result) = max(getDirectiveIndex(_)) }
+}
+
+abstract class Directive extends PreprocessorDirective {
+ abstract int depthChange();
+
+ abstract predicate mismatched();
+
+ int depth() { exists(FileWithDirectives f | f.depth(this) = result) }
+}
+
+class IfDirective extends Directive {
+ IfDirective() {
+ this instanceof PreprocessorIf or
+ this instanceof PreprocessorIfdef or
+ this instanceof PreprocessorIfndef
+ }
+
+ override int depthChange() { result = 1 }
+
+ override predicate mismatched() { none() }
+}
+
+class ElseDirective extends Directive {
+ ElseDirective() {
+ this instanceof PreprocessorElif or
+ this instanceof PreprocessorElse
+ }
+
+ override int depthChange() { result = 0 }
+
+ override predicate mismatched() { depth() < 1 }
+}
+
+class EndifDirective extends Directive {
+ EndifDirective() { this instanceof PreprocessorEndif }
+
+ override int depthChange() { result = -1 }
+
+ override predicate mismatched() { depth() < 0 }
+}
+
+from FileWithDirectives f, Directive d, string msg
+where
+ d.getFile() = f and
+ if d.mismatched()
+ then msg = "'" + d + "' has no matching #if in file " + f.getBaseName() + "."
+ else (
+ d = f.lastDirective() and
+ d.depth() > 0 and
+ msg = "File " + f.getBaseName() + " ends with " + d.depth() + " unterminated #if directives."
+ )
+select d, msg
diff --git a/repo-tests/codeql/cpp/ql/src/JPL_C/LOC-4/Rule 24/MultipleStmtsPerLine.ql b/repo-tests/codeql/cpp/ql/src/JPL_C/LOC-4/Rule 24/MultipleStmtsPerLine.ql
new file mode 100644
index 00000000000..5eccd2c5cad
--- /dev/null
+++ b/repo-tests/codeql/cpp/ql/src/JPL_C/LOC-4/Rule 24/MultipleStmtsPerLine.ql
@@ -0,0 +1,38 @@
+/**
+ * @name More than one statement per line
+ * @description Putting more than one statement on a single line hinders program understanding.
+ * @kind problem
+ * @id cpp/jpl-c/multiple-stmts-per-line
+ * @problem.severity recommendation
+ * @tags maintainability
+ * readability
+ * external/jpl
+ */
+
+import cpp
+
+class OneLineStmt extends Stmt {
+ OneLineStmt() {
+ this.getLocation().getStartLine() = this.getLocation().getEndLine() and
+ not exists(ForStmt for | this = for.getInitialization())
+ }
+
+ predicate onLine(File f, int line) {
+ f = this.getFile() and line = this.getLocation().getStartLine()
+ }
+}
+
+int numStmt(File f, int line) { result = strictcount(OneLineStmt o | o.onLine(f, line)) }
+
+from File f, int line, OneLineStmt o, int cnt
+where
+ numStmt(f, line) = cnt and
+ cnt > 1 and
+ o.onLine(f, line) and
+ o.getLocation().getStartColumn() =
+ min(OneLineStmt other, int toMin |
+ other.onLine(f, line) and toMin = other.getLocation().getStartColumn()
+ |
+ toMin
+ )
+select o, "This line contains " + cnt + " statements; only one is allowed."
diff --git a/repo-tests/codeql/cpp/ql/src/JPL_C/LOC-4/Rule 24/MultipleVarDeclsPerLine.ql b/repo-tests/codeql/cpp/ql/src/JPL_C/LOC-4/Rule 24/MultipleVarDeclsPerLine.ql
new file mode 100644
index 00000000000..fcdb2471b76
--- /dev/null
+++ b/repo-tests/codeql/cpp/ql/src/JPL_C/LOC-4/Rule 24/MultipleVarDeclsPerLine.ql
@@ -0,0 +1,20 @@
+/**
+ * @name Multiple variable declarations on one line
+ * @description There should be no more than one variable declaration per line.
+ * @kind problem
+ * @id cpp/jpl-c/multiple-var-decls-per-line
+ * @problem.severity recommendation
+ * @tags maintainability
+ * readability
+ * external/jpl
+ */
+
+import cpp
+
+from DeclStmt d
+where
+ exists(Variable v1, Variable v2 | v1 = d.getADeclaration() and v2 = d.getADeclaration() |
+ v1 != v2 and
+ v1.getLocation().getStartLine() = v2.getLocation().getStartLine()
+ )
+select d, "Multiple variable declarations on the same line."
diff --git a/repo-tests/codeql/cpp/ql/src/JPL_C/LOC-4/Rule 25/FunctionSizeLimits.ql b/repo-tests/codeql/cpp/ql/src/JPL_C/LOC-4/Rule 25/FunctionSizeLimits.ql
new file mode 100644
index 00000000000..ef84add2be2
--- /dev/null
+++ b/repo-tests/codeql/cpp/ql/src/JPL_C/LOC-4/Rule 25/FunctionSizeLimits.ql
@@ -0,0 +1,32 @@
+/**
+ * @name Function too long
+ * @description Function length should be limited to what can be printed on a single sheet of paper (60 lines). Number of parameters is limited to 6 or fewer.
+ * @kind problem
+ * @id cpp/jpl-c/function-size-limits
+ * @problem.severity recommendation
+ * @tags maintainability
+ * readability
+ * external/jpl
+ */
+
+import cpp
+
+string lengthWarning(Function f) {
+ exists(int lines | lines = f.getMetrics().getNumberOfLines() |
+ lines > 60 and
+ result = f.getName() + " has too many lines (" + lines + ", while 60 are allowed)."
+ )
+}
+
+string paramWarning(Function f) {
+ exists(int params | params = f.getMetrics().getNumberOfParameters() |
+ params > 6 and
+ result = f.getName() + " has too many parameters (" + params + ", while 6 are allowed)."
+ )
+}
+
+from Function f, string msg
+where
+ msg = lengthWarning(f) or
+ msg = paramWarning(f)
+select f, msg
diff --git a/repo-tests/codeql/cpp/ql/src/JPL_C/LOC-4/Rule 26/DeclarationPointerNesting.ql b/repo-tests/codeql/cpp/ql/src/JPL_C/LOC-4/Rule 26/DeclarationPointerNesting.ql
new file mode 100644
index 00000000000..528fc097d79
--- /dev/null
+++ b/repo-tests/codeql/cpp/ql/src/JPL_C/LOC-4/Rule 26/DeclarationPointerNesting.ql
@@ -0,0 +1,34 @@
+/**
+ * @name Pointer indirection in declaration too high
+ * @description The declaration of an object should contain no more than two levels of indirection.
+ * @kind problem
+ * @id cpp/jpl-c/declaration-pointer-nesting
+ * @problem.severity recommendation
+ * @tags maintainability
+ * readability
+ * external/jpl
+ */
+
+import cpp
+
+string var(Variable v) {
+ exists(int level | level = v.getType().getPointerIndirectionLevel() |
+ level > 2 and
+ result =
+ "The type of " + v.getName() + " uses " + level +
+ " levels of pointer indirection -- maximum allowed is 2."
+ )
+}
+
+string fun(Function f) {
+ exists(int level | level = f.getType().getPointerIndirectionLevel() |
+ level > 2 and
+ result =
+ "The return type of " + f.getName() + " uses " + level +
+ " levels of pointer indirection -- maximum allowed is 2."
+ )
+}
+
+from Declaration d, string msg
+where msg = var(d) or msg = fun(d)
+select d, msg
diff --git a/repo-tests/codeql/cpp/ql/src/JPL_C/LOC-4/Rule 27/PointerDereferenceInStmt.ql b/repo-tests/codeql/cpp/ql/src/JPL_C/LOC-4/Rule 27/PointerDereferenceInStmt.ql
new file mode 100644
index 00000000000..f5eb7c6463c
--- /dev/null
+++ b/repo-tests/codeql/cpp/ql/src/JPL_C/LOC-4/Rule 27/PointerDereferenceInStmt.ql
@@ -0,0 +1,19 @@
+/**
+ * @name Too many pointer dereferences in statement
+ * @description Statements should contain no more than two levels of dereferencing per object.
+ * @kind problem
+ * @id cpp/jpl-c/pointer-dereference-in-stmt
+ * @problem.severity recommendation
+ * @tags maintainability
+ * readability
+ * external/jpl
+ */
+
+import cpp
+
+from PointerDereferenceExpr e, int n
+where
+ not e.getParent+() instanceof PointerDereferenceExpr and
+ n = strictcount(PointerDereferenceExpr child | child.getParent+() = e) and
+ n > 1
+select e, "This expression involves " + n + " levels of pointer dereference; 2 are allowed."
diff --git a/repo-tests/codeql/cpp/ql/src/JPL_C/LOC-4/Rule 28/HiddenPointerDereferenceMacro.ql b/repo-tests/codeql/cpp/ql/src/JPL_C/LOC-4/Rule 28/HiddenPointerDereferenceMacro.ql
new file mode 100644
index 00000000000..864be97c43f
--- /dev/null
+++ b/repo-tests/codeql/cpp/ql/src/JPL_C/LOC-4/Rule 28/HiddenPointerDereferenceMacro.ql
@@ -0,0 +1,24 @@
+/**
+ * @name Pointer dereference hidden in macro
+ * @description Pointer dereference operations should not be hidden in macro definitions.
+ * @kind problem
+ * @id cpp/jpl-c/hidden-pointer-dereference-macro
+ * @problem.severity recommendation
+ * @tags maintainability
+ * readability
+ * external/jpl
+ */
+
+import cpp
+
+from Macro m
+where
+ forex(MacroInvocation mi | mi.getMacro() = m |
+ exists(PointerDereferenceExpr e, Location miLoc, Location eLoc | e = mi.getAGeneratedElement() |
+ miLoc = mi.getLocation() and
+ eLoc = e.getLocation() and
+ eLoc.getStartColumn() = miLoc.getStartColumn() and
+ eLoc.getStartLine() = miLoc.getStartLine()
+ )
+ )
+select m, "The macro " + m.getHead() + " hides pointer dereference operations."
diff --git a/repo-tests/codeql/cpp/ql/src/JPL_C/LOC-4/Rule 28/HiddenPointerIndirectionTypedef.ql b/repo-tests/codeql/cpp/ql/src/JPL_C/LOC-4/Rule 28/HiddenPointerIndirectionTypedef.ql
new file mode 100644
index 00000000000..b6429a43e29
--- /dev/null
+++ b/repo-tests/codeql/cpp/ql/src/JPL_C/LOC-4/Rule 28/HiddenPointerIndirectionTypedef.ql
@@ -0,0 +1,16 @@
+/**
+ * @name Hidden pointer indirection
+ * @description Pointer indirection may not be hidden by typedefs -- "typedef int* IntPtr;" is not allowed.
+ * @kind problem
+ * @id cpp/jpl-c/hidden-pointer-indirection-typedef
+ * @problem.severity recommendation
+ * @tags maintainability
+ * readability
+ * external/jpl
+ */
+
+import cpp
+
+from TypedefType t
+where t.getBaseType().getPointerIndirectionLevel() > 0
+select t, "The typedef " + t.getName() + " hides pointer indirection."
diff --git a/repo-tests/codeql/cpp/ql/src/JPL_C/LOC-4/Rule 29/NonConstFunctionPointer.ql b/repo-tests/codeql/cpp/ql/src/JPL_C/LOC-4/Rule 29/NonConstFunctionPointer.ql
new file mode 100644
index 00000000000..b2ac2fc21c0
--- /dev/null
+++ b/repo-tests/codeql/cpp/ql/src/JPL_C/LOC-4/Rule 29/NonConstFunctionPointer.ql
@@ -0,0 +1,17 @@
+/**
+ * @name Use of non-constant function pointer
+ * @description Non-constant pointers to functions should not be used.
+ * @kind problem
+ * @id cpp/jpl-c/non-const-function-pointer
+ * @problem.severity recommendation
+ * @precision low
+ * @tags maintainability
+ * readability
+ * external/jpl
+ */
+
+import cpp
+
+from ExprCall c
+where not c.getExpr().getType().isConst()
+select c, "This call does not go through a const function pointer."
diff --git a/repo-tests/codeql/cpp/ql/src/JPL_C/LOC-4/Rule 30/FunctionPointerConversions.ql b/repo-tests/codeql/cpp/ql/src/JPL_C/LOC-4/Rule 30/FunctionPointerConversions.ql
new file mode 100644
index 00000000000..5771c94ce02
--- /dev/null
+++ b/repo-tests/codeql/cpp/ql/src/JPL_C/LOC-4/Rule 30/FunctionPointerConversions.ql
@@ -0,0 +1,28 @@
+/**
+ * @name Invalid function pointer conversion
+ * @description Conversions shall not be performed between a pointer to a function and any type other than an integral type.
+ * @kind problem
+ * @id cpp/jpl-c/function-pointer-conversions
+ * @problem.severity warning
+ * @precision low
+ * @tags correctness
+ * external/jpl
+ */
+
+import cpp
+
+predicate permissibleConversion(Type t) {
+ t instanceof IntegralType or
+ t instanceof FunctionPointerType or
+ permissibleConversion(t.getUnspecifiedType()) or
+ permissibleConversion(t.(TypedefType).getBaseType()) or
+ permissibleConversion(t.(ReferenceType).getBaseType())
+}
+
+from Expr e, Type converted
+where
+ e.getType() instanceof FunctionPointerType and
+ e.getFullyConverted().getType() = converted and
+ not permissibleConversion(converted)
+select e,
+ "Function pointer converted to " + converted.getName() + ", which is not an integral type."
diff --git a/repo-tests/codeql/cpp/ql/src/JPL_C/LOC-4/Rule 31/IncludesFirst.ql b/repo-tests/codeql/cpp/ql/src/JPL_C/LOC-4/Rule 31/IncludesFirst.ql
new file mode 100644
index 00000000000..a137774aecf
--- /dev/null
+++ b/repo-tests/codeql/cpp/ql/src/JPL_C/LOC-4/Rule 31/IncludesFirst.ql
@@ -0,0 +1,40 @@
+/**
+ * @name Misplaced include
+ * @description #include directives in a file shall only be preceded by other preprocessor directives or comments.
+ * @kind problem
+ * @id cpp/jpl-c/includes-first
+ * @problem.severity recommendation
+ * @tags maintainability
+ * readability
+ * external/jpl
+ */
+
+import cpp
+
+int firstCodeLine(File f) {
+ result =
+ min(Declaration d, Location l, int toMin |
+ (
+ l = d.getLocation() and
+ l.getFile() = f and
+ not d.isInMacroExpansion()
+ ) and
+ toMin = l.getStartLine()
+ |
+ toMin
+ )
+}
+
+int badIncludeLine(File f, Include i) {
+ result = i.getLocation().getStartLine() and
+ result > firstCodeLine(f) and
+ f = i.getFile()
+}
+
+from File f, Include i, int line
+where
+ line = badIncludeLine(f, i) and
+ line = min(badIncludeLine(f, _))
+select i,
+ "'" + i.toString() + "' is preceded by code -- it should be moved above line " + firstCodeLine(f) +
+ " in " + f.getBaseName() + "."
diff --git a/repo-tests/codeql/cpp/ql/src/JPL_C/Tasks.qll b/repo-tests/codeql/cpp/ql/src/JPL_C/Tasks.qll
new file mode 100644
index 00000000000..19f4b5a9840
--- /dev/null
+++ b/repo-tests/codeql/cpp/ql/src/JPL_C/Tasks.qll
@@ -0,0 +1,31 @@
+import cpp
+
+/**
+ * A function that is used as the entry point of a VxWorks task.
+ */
+class Task extends Function {
+ Task() {
+ exists(FunctionCall taskCreate |
+ taskCreate.getTarget().getName() = ["taskCreate", "taskSpawn"] and
+ this = taskCreate.getArgument(4).(AddressOfExpr).getAddressable()
+ )
+ }
+}
+
+/**
+ * From the JPL standard: "A public function is a function that is used
+ * by multiple tasks, such as a library function". We additionally say that
+ * a function is not public if it's defined in the same file as a task.
+ *
+ * And alternative definition could be to say that all functions defined in
+ * files that don't define tasks are public.
+ */
+class PublicFunction extends Function {
+ PublicFunction() {
+ not this.isStatic() and
+ (
+ strictcount(Task t | t.calls+(this)) > 1 or
+ not exists(Task t | t.getFile() = this.getFile())
+ )
+ }
+}
diff --git a/repo-tests/codeql/cpp/ql/src/Likely Bugs/AmbiguouslySignedBitField.ql b/repo-tests/codeql/cpp/ql/src/Likely Bugs/AmbiguouslySignedBitField.ql
new file mode 100644
index 00000000000..7a7f328bd4e
--- /dev/null
+++ b/repo-tests/codeql/cpp/ql/src/Likely Bugs/AmbiguouslySignedBitField.ql
@@ -0,0 +1,35 @@
+/**
+ * @name Ambiguously signed bit-field member
+ * @description Bit fields with integral types should have explicit signedness
+ * only. For example, use `unsigned int` rather than `int`. It is
+ * implementation specific whether an `int`-typed bit field is
+ * signed, so there could be unexpected sign extension or
+ * overflow.
+ * @kind problem
+ * @problem.severity warning
+ * @precision high
+ * @id cpp/ambiguously-signed-bit-field
+ * @tags reliability
+ * readability
+ * language-features
+ * external/cwe/cwe-190
+ */
+
+import cpp
+
+from BitField bf
+where
+ not bf.getUnspecifiedType().(IntegralType).isExplicitlySigned() and
+ not bf.getUnspecifiedType().(IntegralType).isExplicitlyUnsigned() and
+ not bf.getUnspecifiedType() instanceof Enum and
+ not bf.getUnspecifiedType() instanceof BoolType and
+ // At least for C programs on Windows, BOOL is a common typedef for a type
+ // representing BoolType.
+ not bf.getType().hasName("BOOL") and
+ // If this is true, then there cannot be unsigned sign extension or overflow.
+ not bf.getDeclaredNumBits() = bf.getType().getSize() * 8 and
+ not bf.isAnonymous() and
+ not bf.isFromUninstantiatedTemplate(_)
+select bf,
+ "Bit field " + bf.getName() + " of type " + bf.getUnderlyingType().getName() +
+ " should have explicitly unsigned integral, explicitly signed integral, or enumeration type."
diff --git a/repo-tests/codeql/cpp/ql/src/Likely Bugs/Arithmetic/BadAdditionOverflowCheck.ql b/repo-tests/codeql/cpp/ql/src/Likely Bugs/Arithmetic/BadAdditionOverflowCheck.ql
new file mode 100644
index 00000000000..5a7389205f9
--- /dev/null
+++ b/repo-tests/codeql/cpp/ql/src/Likely Bugs/Arithmetic/BadAdditionOverflowCheck.ql
@@ -0,0 +1,24 @@
+/**
+ * @name Bad check for overflow of integer addition
+ * @description Checking for overflow of integer addition by comparing
+ * against one of the arguments of the addition does not work
+ * when the result of the addition is automatically promoted
+ * to a larger type.
+ * @kind problem
+ * @problem.severity error
+ * @security-severity 8.1
+ * @precision very-high
+ * @id cpp/bad-addition-overflow-check
+ * @tags reliability
+ * correctness
+ * security
+ * external/cwe/cwe-190
+ * external/cwe/cwe-192
+ */
+
+import cpp
+import BadAdditionOverflowCheck
+
+from RelationalOperation cmp, AddExpr a
+where badAdditionOverflowCheck(cmp, a)
+select cmp, "Bad overflow check."
diff --git a/repo-tests/codeql/cpp/ql/src/Likely Bugs/Arithmetic/BadAdditionOverflowCheck.qll b/repo-tests/codeql/cpp/ql/src/Likely Bugs/Arithmetic/BadAdditionOverflowCheck.qll
new file mode 100644
index 00000000000..56fa74d3aab
--- /dev/null
+++ b/repo-tests/codeql/cpp/ql/src/Likely Bugs/Arithmetic/BadAdditionOverflowCheck.qll
@@ -0,0 +1,49 @@
+/**
+ * Provides the implementation of the BadAdditionOverflowCheck query. The
+ * query is implemented as a library, so that we can avoid producing
+ * duplicate results in other similar queries.
+ */
+
+import cpp
+private import semmle.code.cpp.rangeanalysis.RangeAnalysisUtils
+
+/**
+ * Holds if `a` and `b` are the operands of `plus`. This predicate
+ * simplifies the pattern matching logic in `badAdditionOverflowCheck` by
+ * swapping the operands both ways round.
+ */
+private predicate addExpr(AddExpr plus, Expr a, Expr b) {
+ a = plus.getLeftOperand() and b = plus.getRightOperand()
+ or
+ b = plus.getLeftOperand() and a = plus.getRightOperand()
+}
+
+/**
+ * Holds if `cmp` is an overflow check of the following form:
+ *
+ * a + b < a
+ *
+ * This check does not work if the operands of `a` and `b` are
+ * automatically promoted to a larger type. If
+ * `convertedExprMightOverflow(a)` does not hold, then it is impossible for
+ * the addition to overflow, so the result of the comparison will always be
+ * false.
+ */
+predicate badAdditionOverflowCheck(RelationalOperation cmp, AddExpr plus) {
+ exists(Variable v, VariableAccess a1, VariableAccess a2, Expr b |
+ addExpr(plus, a1, b) and
+ a1 = v.getAnAccess() and
+ a2 = v.getAnAccess() and
+ not exists(a1.getQualifier()) and // Avoid structure fields
+ not exists(a2.getQualifier()) and // Avoid structure fields
+ // Simple type-based check that the addition cannot overflow.
+ exprMinVal(plus) <= exprMinVal(a1) + exprMinVal(b) and
+ exprMaxVal(plus) > exprMaxVal(a1) and
+ exprMaxVal(plus) > exprMaxVal(b) and
+ // Make sure that the plus isn't explicitly cast to a smaller type.
+ exprMinVal(plus.getExplicitlyConverted()) <= exprMinVal(plus) and
+ exprMaxVal(plus.getExplicitlyConverted()) >= exprMaxVal(plus) and
+ cmp.getAnOperand() = plus and
+ cmp.getAnOperand() = a2
+ )
+}
diff --git a/repo-tests/codeql/cpp/ql/src/Likely Bugs/Arithmetic/BadCheckOdd.ql b/repo-tests/codeql/cpp/ql/src/Likely Bugs/Arithmetic/BadCheckOdd.ql
new file mode 100644
index 00000000000..bff2492a70a
--- /dev/null
+++ b/repo-tests/codeql/cpp/ql/src/Likely Bugs/Arithmetic/BadCheckOdd.ql
@@ -0,0 +1,23 @@
+/**
+ * @name Bad check for oddness
+ * @description Using "x % 2 == 1" to check whether x is odd does not work for
+ * negative numbers.
+ * @kind problem
+ * @problem.severity warning
+ * @precision medium
+ * @id cpp/incomplete-parity-check
+ * @tags reliability
+ * correctness
+ * types
+ */
+
+import cpp
+
+from EqualityOperation t, RemExpr lhs, Literal rhs
+where
+ t.getLeftOperand() = lhs and
+ t.getRightOperand() = rhs and
+ lhs.getLeftOperand().getUnspecifiedType().(IntegralType).isSigned() and
+ lhs.getRightOperand().getValue() = "2" and
+ rhs.getValue() = "1"
+select t, "Possibly invalid test for oddness. This will fail for negative numbers."
diff --git a/repo-tests/codeql/cpp/ql/src/Likely Bugs/Arithmetic/BitwiseSignCheck.ql b/repo-tests/codeql/cpp/ql/src/Likely Bugs/Arithmetic/BitwiseSignCheck.ql
new file mode 100644
index 00000000000..fe1b2640058
--- /dev/null
+++ b/repo-tests/codeql/cpp/ql/src/Likely Bugs/Arithmetic/BitwiseSignCheck.ql
@@ -0,0 +1,29 @@
+/**
+ * @name Sign check of bitwise operation
+ * @description Checking the sign of a bitwise operation often has surprising
+ * edge cases.
+ * @kind problem
+ * @problem.severity warning
+ * @precision high
+ * @id cpp/bitwise-sign-check
+ * @tags reliability
+ * correctness
+ */
+
+import cpp
+
+from RelationalOperation e, BinaryBitwiseOperation lhs
+where
+ // `lhs > 0` (or `0 < lhs`)
+ // (note that `lhs < 0`, `lhs >= 0` or `lhs <= 0` all imply that the signedness of
+ // `lhs` is understood, so should not be flagged).
+ (e instanceof GTExpr or e instanceof LTExpr) and
+ e.getGreaterOperand() = lhs and
+ e.getLesserOperand().getValue() = "0" and
+ // lhs is signed
+ lhs.getActualType().(IntegralType).isSigned() and
+ // if `lhs` has the form `x & c`, with constant `c`, `c` is negative
+ forall(int op | op = lhs.(BitwiseAndExpr).getAnOperand().getValue().toInt() | op < 0) and
+ // exception for cases involving macros
+ not e.isAffectedByMacro()
+select e, "Potential unsafe sign check of a bitwise operation."
diff --git a/repo-tests/codeql/cpp/ql/src/Likely Bugs/Arithmetic/ComparisonPrecedence.ql b/repo-tests/codeql/cpp/ql/src/Likely Bugs/Arithmetic/ComparisonPrecedence.ql
new file mode 100644
index 00000000000..d4b23265eed
--- /dev/null
+++ b/repo-tests/codeql/cpp/ql/src/Likely Bugs/Arithmetic/ComparisonPrecedence.ql
@@ -0,0 +1,21 @@
+/**
+ * @name Unclear comparison precedence
+ * @description Using comparisons as operands of other comparisons is unusual
+ * in itself, and most readers will require parentheses to be sure
+ * of the precedence.
+ * @kind problem
+ * @problem.severity warning
+ * @precision very-high
+ * @id cpp/comparison-precedence
+ * @tags maintainability
+ * readability
+ */
+
+import cpp
+
+from ComparisonOperation co, ComparisonOperation chco
+where
+ co.getAChild() = chco and
+ not chco.isParenthesised() and
+ not co.isFromUninstantiatedTemplate(_)
+select co, "Comparison as an operand to another comparison."
diff --git a/repo-tests/codeql/cpp/ql/src/Likely Bugs/Arithmetic/ComparisonWithCancelingSubExpr.ql b/repo-tests/codeql/cpp/ql/src/Likely Bugs/Arithmetic/ComparisonWithCancelingSubExpr.ql
new file mode 100644
index 00000000000..d0e0c269b8e
--- /dev/null
+++ b/repo-tests/codeql/cpp/ql/src/Likely Bugs/Arithmetic/ComparisonWithCancelingSubExpr.ql
@@ -0,0 +1,99 @@
+/**
+ * @name Comparison with canceling sub-expression
+ * @description If the same sub-expression is added to both sides of a
+ * comparison, and there is no possibility of overflow or
+ * rounding, then the sub-expression is redundant and could be
+ * removed.
+ * @kind problem
+ * @problem.severity recommendation
+ * @precision medium
+ * @id cpp/comparison-canceling-subexpr
+ * @tags readability
+ * maintainability
+ */
+
+import cpp
+import semmle.code.cpp.rangeanalysis.SimpleRangeAnalysis
+import BadAdditionOverflowCheck
+import PointlessSelfComparison
+
+/**
+ * Holds if `parent` is a linear expression of `child`. For example:
+ *
+ * `parent = child + E`
+ * `parent = E - child`
+ * `parent = 2 * child`
+ */
+private predicate linearChild(Expr parent, Expr child, float multiplier) {
+ child = parent.(AddExpr).getAChild() and multiplier = 1.0
+ or
+ child = parent.(SubExpr).getLeftOperand() and multiplier = 1.0
+ or
+ child = parent.(SubExpr).getRightOperand() and multiplier = -1.0
+ or
+ child = parent.(UnaryPlusExpr).getOperand() and multiplier = 1.0
+ or
+ child = parent.(UnaryMinusExpr).getOperand() and multiplier = -1.0
+}
+
+/**
+ * Holds if `child` is a linear sub-expression of `cmp`, and `multiplier`
+ * is its multiplication factor. For example:
+ *
+ * `4*x - y < 3*z`
+ *
+ * In this example, `x` has multiplier 4, `y` has multiplier -1, and `z`
+ * has multiplier -3 (multipliers from the right hand child are negated).
+ */
+private predicate cmpLinearSubExpr(ComparisonOperation cmp, Expr child, float multiplier) {
+ not convertedExprMightOverflow(child) and
+ (
+ child = cmp.getLeftOperand() and multiplier = 1.0
+ or
+ child = cmp.getRightOperand() and multiplier = -1.0
+ or
+ exists(Expr parent, float m1, float m2 |
+ cmpLinearSubExpr(cmp, parent, m1) and
+ linearChild(parent, child, m2) and
+ multiplier = m1 * m2
+ )
+ )
+}
+
+/**
+ * Holds if `cmpLinearSubExpr(cmp, child, multiplier)` holds and
+ * `child` is an access of variable `v`.
+ */
+private predicate cmpLinearSubVariable(
+ ComparisonOperation cmp, Variable v, VariableAccess child, float multiplier
+) {
+ v = child.getTarget() and
+ not exists(child.getQualifier()) and
+ cmpLinearSubExpr(cmp, child, multiplier)
+}
+
+/**
+ * Holds if there are two linear sub-expressions of `cmp` that
+ * cancel each other. For example, `v` can be cancelled in each of
+ * these examples:
+ *
+ * `v < v`
+ * `v + x - v < y`
+ * `v + x + v < y + 2*v`
+ */
+private predicate cancelingSubExprs(ComparisonOperation cmp, VariableAccess a1, VariableAccess a2) {
+ exists(Variable v |
+ exists(float m | m < 0 and cmpLinearSubVariable(cmp, v, a1, m)) and
+ exists(float m | m > 0 and cmpLinearSubVariable(cmp, v, a2, m))
+ )
+}
+
+from ComparisonOperation cmp, VariableAccess a1, VariableAccess a2
+where
+ cancelingSubExprs(cmp, a1, a2) and
+ // Most practical examples found by this query are instances of
+ // BadAdditionOverflowCheck or PointlessSelfComparison.
+ not badAdditionOverflowCheck(cmp, _) and
+ not pointlessSelfComparison(cmp)
+select cmp, "Comparison can be simplified by canceling $@ with $@.", a1, a1.getTarget().getName(),
+ a2, a2.getTarget().getName()
diff --git a/repo-tests/codeql/cpp/ql/src/Likely Bugs/Arithmetic/FloatComparison.ql b/repo-tests/codeql/cpp/ql/src/Likely Bugs/Arithmetic/FloatComparison.ql
new file mode 100644
index 00000000000..5233f567fa5
--- /dev/null
+++ b/repo-tests/codeql/cpp/ql/src/Likely Bugs/Arithmetic/FloatComparison.ql
@@ -0,0 +1,24 @@
+/**
+ * @name Equality test on floating-point values
+ * @description Comparing results of floating-point computations with '==' or
+ * '!=' is likely to yield surprising results since floating-point
+ * computation does not follow the standard rules of algebra.
+ * @kind problem
+ * @problem.severity recommendation
+ * @precision high
+ * @id cpp/equality-on-floats
+ * @tags reliability
+ * correctness
+ */
+
+import cpp
+
+from EqualityOperation ro, Expr left, Expr right
+where
+ left = ro.getLeftOperand() and
+ right = ro.getRightOperand() and
+ ro.getAnOperand().getExplicitlyConverted().getType().getUnderlyingType() instanceof
+ FloatingPointType and
+ not ro.getAnOperand().isConstant() and // comparisons to constants generate too many false positives
+ not left.(VariableAccess).getTarget() = right.(VariableAccess).getTarget() // skip self comparison
+select ro, "Equality test on floating point values may not behave as expected."
diff --git a/repo-tests/codeql/cpp/ql/src/Likely Bugs/Arithmetic/IntMultToLong.ql b/repo-tests/codeql/cpp/ql/src/Likely Bugs/Arithmetic/IntMultToLong.ql
new file mode 100644
index 00000000000..ba7a6b58aa0
--- /dev/null
+++ b/repo-tests/codeql/cpp/ql/src/Likely Bugs/Arithmetic/IntMultToLong.ql
@@ -0,0 +1,224 @@
+/**
+ * @name Multiplication result converted to larger type
+ * @description A multiplication result that is converted to a larger type can
+ * be a sign that the result can overflow the type converted from.
+ * @kind problem
+ * @problem.severity warning
+ * @security-severity 8.1
+ * @precision high
+ * @id cpp/integer-multiplication-cast-to-long
+ * @tags reliability
+ * security
+ * correctness
+ * types
+ * external/cwe/cwe-190
+ * external/cwe/cwe-192
+ * external/cwe/cwe-197
+ * external/cwe/cwe-681
+ */
+
+import cpp
+import semmle.code.cpp.controlflow.SSA
+import semmle.code.cpp.rangeanalysis.SimpleRangeAnalysis
+import semmle.code.cpp.rangeanalysis.RangeAnalysisUtils
+
+/**
+ * Holds if `e` is either:
+ * - a constant
+ * - a char-typed expression, meaning it's a small number
+ * - an array access to an array of constants
+ * - flows from one of the above
+ * In these cases the value of `e` is likely to be small and
+ * controlled, so we consider it less likely to cause an overflow.
+ */
+predicate likelySmall(Expr e) {
+ e.isConstant()
+ or
+ e.getType().getSize() <= 1
+ or
+ e.(ArrayExpr).getArrayBase().getType().(ArrayType).getBaseType().isConst()
+ or
+ exists(SsaDefinition def, Variable v |
+ def.getAUse(v) = e and
+ likelySmall(def.getDefiningValue(v))
+ )
+}
+
+/**
+ * Gets an operand of a multiply expression (we need the restriction
+ * to multiply expressions to get the correct transitive closure).
+ */
+Expr getMulOperand(MulExpr me) { result = me.getAnOperand() }
+
+/**
+ * Gets the number of non-constant operands of a multiply expression,
+ * exploring into child multiply expressions rather than counting them
+ * as an operand directly. For example the top level multiply here
+ * effectively has two non-constant operands:
+ * ```
+ * (x * y) * 2
+ * ```
+ */
+int getEffectiveMulOperands(MulExpr me) {
+ result =
+ count(Expr op |
+ op = getMulOperand*(me) and
+ not op instanceof MulExpr and
+ not likelySmall(op)
+ )
+}
+
+/**
+ * As SimpleRangeAnalysis does not support reasoning about multiplication
+ * we create a tiny abstract interpreter for handling multiplication, which
+ * we invoke only after weeding out of all of trivial cases that we do
+ * not care about. By default, the maximum and minimum values are computed
+ * using SimpleRangeAnalysis.
+ */
+class AnalyzableExpr extends Expr {
+ float maxValue() { result = upperBound(this.getFullyConverted()) }
+
+ float minValue() { result = lowerBound(this.getFullyConverted()) }
+}
+
+class ParenAnalyzableExpr extends AnalyzableExpr, ParenthesisExpr {
+ override float maxValue() { result = this.getExpr().(AnalyzableExpr).maxValue() }
+
+ override float minValue() { result = this.getExpr().(AnalyzableExpr).minValue() }
+}
+
+class MulAnalyzableExpr extends AnalyzableExpr, MulExpr {
+ override float maxValue() {
+ exists(float x1, float y1, float x2, float y2 |
+ x1 = this.getLeftOperand().getFullyConverted().(AnalyzableExpr).minValue() and
+ x2 = this.getLeftOperand().getFullyConverted().(AnalyzableExpr).maxValue() and
+ y1 = this.getRightOperand().getFullyConverted().(AnalyzableExpr).minValue() and
+ y2 = this.getRightOperand().getFullyConverted().(AnalyzableExpr).maxValue() and
+ result = (x1 * y1).maximum(x1 * y2).maximum(x2 * y1).maximum(x2 * y2)
+ )
+ }
+
+ override float minValue() {
+ exists(float x1, float x2, float y1, float y2 |
+ x1 = this.getLeftOperand().getFullyConverted().(AnalyzableExpr).minValue() and
+ x2 = this.getLeftOperand().getFullyConverted().(AnalyzableExpr).maxValue() and
+ y1 = this.getRightOperand().getFullyConverted().(AnalyzableExpr).minValue() and
+ y2 = this.getRightOperand().getFullyConverted().(AnalyzableExpr).maxValue() and
+ result = (x1 * y1).minimum(x1 * y2).minimum(x2 * y1).minimum(x2 * y2)
+ )
+ }
+}
+
+class AddAnalyzableExpr extends AnalyzableExpr, AddExpr {
+ override float maxValue() {
+ result =
+ this.getLeftOperand().getFullyConverted().(AnalyzableExpr).maxValue() +
+ this.getRightOperand().getFullyConverted().(AnalyzableExpr).maxValue()
+ }
+
+ override float minValue() {
+ result =
+ this.getLeftOperand().getFullyConverted().(AnalyzableExpr).minValue() +
+ this.getRightOperand().getFullyConverted().(AnalyzableExpr).minValue()
+ }
+}
+
+class SubAnalyzableExpr extends AnalyzableExpr, SubExpr {
+ override float maxValue() {
+ result =
+ this.getLeftOperand().getFullyConverted().(AnalyzableExpr).maxValue() -
+ this.getRightOperand().getFullyConverted().(AnalyzableExpr).minValue()
+ }
+
+ override float minValue() {
+ result =
+ this.getLeftOperand().getFullyConverted().(AnalyzableExpr).minValue() -
+ this.getRightOperand().getFullyConverted().(AnalyzableExpr).maxValue()
+ }
+}
+
+class VarAnalyzableExpr extends AnalyzableExpr, VariableAccess {
+ VarAnalyzableExpr() { this.getTarget() instanceof StackVariable }
+
+ override float maxValue() {
+ exists(SsaDefinition def, Variable v |
+ def.getAUse(v) = this and
+ // if there is a defining expression, use that for
+ // computing the maximum value. Otherwise, assign the
+ // variable the largest possible value it can hold
+ if exists(def.getDefiningValue(v))
+ then result = def.getDefiningValue(v).(AnalyzableExpr).maxValue()
+ else result = upperBound(this)
+ )
+ }
+
+ override float minValue() {
+ exists(SsaDefinition def, Variable v |
+ def.getAUse(v) = this and
+ if exists(def.getDefiningValue(v))
+ then result = def.getDefiningValue(v).(AnalyzableExpr).minValue()
+ else result = lowerBound(this)
+ )
+ }
+}
+
+/**
+ * Holds if `t` is not an instance of `IntegralType`,
+ * or if `me` cannot be proven to not overflow
+ */
+pragma[inline]
+predicate overflows(MulExpr me, Type t) {
+ t instanceof IntegralType
+ implies
+ (
+ me.(MulAnalyzableExpr).maxValue() > exprMaxVal(me)
+ or
+ me.(MulAnalyzableExpr).minValue() < exprMinVal(me)
+ )
+}
+
+from MulExpr me, Type t1, Type t2
+where
+ t1 = me.getType().getUnderlyingType() and
+ t2 = me.getConversion().getType().getUnderlyingType() and
+ t1.getSize() < t2.getSize() and
+ (
+ t1.getUnspecifiedType() instanceof IntegralType and
+ t2.getUnspecifiedType() instanceof IntegralType
+ or
+ t1.getUnspecifiedType() instanceof FloatingPointType and
+ t2.getUnspecifiedType() instanceof FloatingPointType
+ ) and
+ // exclude explicit conversions
+ me.getConversion().isCompilerGenerated() and
+ // require the multiply to have two non-constant operands
+ // (the intuition here is that multiplying two unknowns is
+ // much more likely to produce a result that needs significantly
+ // more bits than the operands did, and thus requires a larger
+ // type).
+ getEffectiveMulOperands(me) >= 2 and
+ // exclude varargs promotions
+ not exists(FunctionCall fc, int vararg |
+ fc.getArgument(vararg) = me and
+ vararg >= fc.getTarget().getNumberOfParameters()
+ ) and
+ // exclude cases where the type was made bigger by a literal
+ // (compared to other cases such as assignment, this is more
+ // likely to be a trivial accident rather than suggesting a
+ // larger type is needed for the result).
+ not exists(Expr other, Expr e |
+ other = me.getParent().(BinaryOperation).getAnOperand() and
+ not other = me and
+ (
+ e = other or
+ e = other.(BinaryOperation).getAnOperand*()
+ ) and
+ e.(Literal).getType().getSize() = t2.getSize()
+ ) and
+ // only report if we cannot prove that the result of the
+ // multiplication will be less (resp. greater) than the
+ // maximum (resp. minimum) number we can compute.
+ overflows(me, t1)
+select me,
+ "Multiplication result may overflow '" + me.getType().toString() + "' before it is converted to '"
+ + me.getFullyConverted().getType().toString() + "'."
diff --git a/repo-tests/codeql/cpp/ql/src/Likely Bugs/Arithmetic/PointlessComparison.ql b/repo-tests/codeql/cpp/ql/src/Likely Bugs/Arithmetic/PointlessComparison.ql
new file mode 100644
index 00000000000..e2fe02be867
--- /dev/null
+++ b/repo-tests/codeql/cpp/ql/src/Likely Bugs/Arithmetic/PointlessComparison.ql
@@ -0,0 +1,62 @@
+/**
+ * @name Comparison result is always the same
+ * @description When a comparison operation, such as x < y, always
+ * returns the same result, it means that the comparison
+ * is redundant and may mask a bug because a different
+ * check was intended.
+ * @kind problem
+ * @problem.severity warning
+ * @precision high
+ * @id cpp/constant-comparison
+ * @tags maintainability
+ * readability
+ */
+
+import cpp
+private import semmle.code.cpp.commons.Exclusions
+private import semmle.code.cpp.rangeanalysis.PointlessComparison
+private import semmle.code.cpp.rangeanalysis.RangeAnalysisUtils
+import UnsignedGEZero
+
+// Trivial comparisons of the form 1 > 0 are usually due to macro expansion.
+// For example:
+//
+// #define PRINTMSG(val,msg) { if (val >= PRINTLEVEL) printf(msg); }
+//
+// So to reduce the number of false positives, we do not report a result if
+// the comparison is in a macro expansion. Similarly for template
+// instantiations.
+from ComparisonOperation cmp, SmallSide ss, float left, float right, boolean value, string reason
+where
+ not cmp.isInMacroExpansion() and
+ not cmp.isFromTemplateInstantiation(_) and
+ not functionContainsDisabledCode(cmp.getEnclosingFunction()) and
+ reachablePointlessComparison(cmp, left, right, value, ss) and
+ // a comparison between an enum and zero is always valid because whether
+ // the underlying type of an enum is signed is compiler-dependent
+ not exists(Expr e, ConstantZero z |
+ relOpWithSwap(cmp, e.getFullyConverted(), z, _, _) and
+ e.getUnderlyingType() instanceof Enum
+ ) and
+ // Construct a reason for the message. Something like: x >= 5 and 3 >= y.
+ exists(string cmpOp, string leftReason, string rightReason |
+ (
+ ss = LeftIsSmaller() and cmpOp = " <= "
+ or
+ ss = RightIsSmaller() and cmpOp = " >= "
+ ) and
+ leftReason = cmp.getLeftOperand().toString() + cmpOp + left.toString() and
+ rightReason = right.toString() + cmpOp + cmp.getRightOperand().toString() and
+ // If either of the operands is constant, then don't include it.
+ (
+ if cmp.getLeftOperand().isConstant()
+ then not cmp.getRightOperand().isConstant() and reason = rightReason
+ else
+ if cmp.getRightOperand().isConstant()
+ then reason = leftReason
+ else reason = leftReason + " and " + rightReason
+ )
+ ) and
+ // Don't report results which have already been reported by UnsignedGEZero.
+ not unsignedGEZero(cmp, _)
+select cmp, "Comparison is always " + value.toString() + " because " + reason + "."
diff --git a/repo-tests/codeql/cpp/ql/src/Likely Bugs/Arithmetic/PointlessSelfComparison.ql b/repo-tests/codeql/cpp/ql/src/Likely Bugs/Arithmetic/PointlessSelfComparison.ql
new file mode 100644
index 00000000000..04ef1af44ae
--- /dev/null
+++ b/repo-tests/codeql/cpp/ql/src/Likely Bugs/Arithmetic/PointlessSelfComparison.ql
@@ -0,0 +1,25 @@
+/**
+ * @name Self comparison
+ * @description Comparing a variable to itself always produces the
+ * same result, unless the purpose is to check for
+ * integer overflow or floating point NaN.
+ * @kind problem
+ * @problem.severity warning
+ * @precision high
+ * @id cpp/comparison-of-identical-expressions
+ * @tags readability
+ * maintainability
+ */
+
+import cpp
+import PointlessSelfComparison
+import semmle.code.cpp.commons.Exclusions
+
+from ComparisonOperation cmp
+where
+ pointlessSelfComparison(cmp) and
+ not nanTest(cmp) and
+ not overflowTest(cmp) and
+ not cmp.isFromTemplateInstantiation(_) and
+ not isFromMacroDefinition(cmp)
+select cmp, "Self comparison."
diff --git a/repo-tests/codeql/cpp/ql/src/Likely Bugs/Arithmetic/PointlessSelfComparison.qll b/repo-tests/codeql/cpp/ql/src/Likely Bugs/Arithmetic/PointlessSelfComparison.qll
new file mode 100644
index 00000000000..0504aad0642
--- /dev/null
+++ b/repo-tests/codeql/cpp/ql/src/Likely Bugs/Arithmetic/PointlessSelfComparison.qll
@@ -0,0 +1,74 @@
+/**
+ * Provides the implementation of the PointlessSelfComparison query. The
+ * query is implemented as a library, so that we can avoid producing
+ * duplicate results in other similar queries.
+ */
+
+import cpp
+import semmle.code.cpp.rangeanalysis.SimpleRangeAnalysis
+
+/**
+ * Holds if `cmp` is a comparison of the following form:
+ *
+ * x == x
+ * (char)x != x
+ * x < (int)x
+ *
+ * Usually, the intention of the comparison is to detect whether the value
+ * of `x` overflows when it is cast to a smaller type. However, if
+ * overflow is impossible then the comparison is either always true or
+ * always false, depending on the type of comparison (`==`, `!=`, `<`, `>`,
+ * `<=`, `>=`).
+ */
+predicate pointlessSelfComparison(ComparisonOperation cmp) {
+ exists(Variable v, VariableAccess lhs, VariableAccess rhs |
+ lhs = cmp.getLeftOperand() and
+ rhs = cmp.getRightOperand() and
+ lhs = v.getAnAccess() and
+ rhs = v.getAnAccess() and
+ not exists(lhs.getQualifier()) and // Avoid structure fields
+ not exists(rhs.getQualifier()) and // Avoid structure fields
+ not convertedExprMightOverflow(lhs) and
+ not convertedExprMightOverflow(rhs)
+ )
+}
+
+/**
+ * Holds if `cmp` is a floating point self comparison:
+ *
+ * x == x
+ * x != x
+ *
+ * If the type of `x` is a floating point type, then such comparisons can
+ * be used to detect if the value of `x` is NaN. Therefore, they should not
+ * be reported as results of the PointlessSelfComparison query.
+ */
+predicate nanTest(EqualityOperation cmp) {
+ pointlessSelfComparison(cmp) and
+ exists(Type t | t = cmp.getLeftOperand().getUnspecifiedType() |
+ t instanceof FloatingPointType or
+ t instanceof TemplateParameter
+ )
+}
+
+/**
+ * Holds if `cmp` looks like a test to see whether a value would fit in a
+ * smaller type. The type may not be smaller on the platform where we the code
+ * was extracted, but it could be smaller on a different platform.
+ *
+ * For example, `cmp` might be `x == (long)x`. If `x` already has type `long`,
+ * and `long` does not come from a macro expansion that could be
+ * platform-dependent, then `cmp` is _not_ and overflow test.
+ */
+predicate overflowTest(EqualityOperation cmp) {
+ pointlessSelfComparison(cmp) and
+ exists(Cast cast |
+ cast = cmp.getAnOperand().getConversion+() and
+ not cast.isCompilerGenerated() and
+ (
+ cast.getType() != cast.getExpr().getType()
+ or
+ cast.isAffectedByMacro()
+ )
+ )
+}
diff --git a/repo-tests/codeql/cpp/ql/src/Likely Bugs/Arithmetic/SignedOverflowCheck.ql b/repo-tests/codeql/cpp/ql/src/Likely Bugs/Arithmetic/SignedOverflowCheck.ql
new file mode 100644
index 00000000000..7911049599a
--- /dev/null
+++ b/repo-tests/codeql/cpp/ql/src/Likely Bugs/Arithmetic/SignedOverflowCheck.ql
@@ -0,0 +1,35 @@
+/**
+ * @name Signed overflow check
+ * @description Testing for overflow by adding a value to a variable
+ * to see if it "wraps around" works only for
+ * unsigned integer values.
+ * @kind problem
+ * @problem.severity warning
+ * @security-severity 8.1
+ * @precision high
+ * @id cpp/signed-overflow-check
+ * @tags correctness
+ * security
+ * external/cwe/cwe-128
+ * external/cwe/cwe-190
+ */
+
+import cpp
+private import semmle.code.cpp.valuenumbering.GlobalValueNumbering
+private import semmle.code.cpp.rangeanalysis.SimpleRangeAnalysis
+private import semmle.code.cpp.commons.Exclusions
+
+from RelationalOperation ro, AddExpr add, Expr expr1, Expr expr2
+where
+ ro.getAnOperand() = add and
+ add.getAnOperand() = expr1 and
+ ro.getAnOperand() = expr2 and
+ globalValueNumber(expr1) = globalValueNumber(expr2) and
+ add.getUnspecifiedType().(IntegralType).isSigned() and
+ not isFromMacroDefinition(ro) and
+ exprMightOverflowPositively(add) and
+ exists(Compilation c | c.getAFileCompiled() = ro.getFile() |
+ not c.getAnArgument() = "-fwrapv" and
+ not c.getAnArgument() = "-fno-strict-overflow"
+ )
+select ro, "Testing for signed overflow may produce undefined results."
diff --git a/repo-tests/codeql/cpp/ql/src/Likely Bugs/Arithmetic/UnsignedGEZero.ql b/repo-tests/codeql/cpp/ql/src/Likely Bugs/Arithmetic/UnsignedGEZero.ql
new file mode 100644
index 00000000000..7f89569c3ba
--- /dev/null
+++ b/repo-tests/codeql/cpp/ql/src/Likely Bugs/Arithmetic/UnsignedGEZero.ql
@@ -0,0 +1,19 @@
+/**
+ * @name Unsigned comparison to zero
+ * @description An unsigned value is always non-negative, even if it has been
+ * assigned a negative number, so the comparison is redundant and
+ * may mask a bug because a different check was intended.
+ * @kind problem
+ * @problem.severity warning
+ * @precision very-high
+ * @id cpp/unsigned-comparison-zero
+ * @tags maintainability
+ * readability
+ */
+
+import cpp
+import UnsignedGEZero
+
+from UnsignedGEZero ugez, string msg
+where unsignedGEZero(ugez, msg)
+select ugez, msg
diff --git a/repo-tests/codeql/cpp/ql/src/Likely Bugs/Arithmetic/UnsignedGEZero.qll b/repo-tests/codeql/cpp/ql/src/Likely Bugs/Arithmetic/UnsignedGEZero.qll
new file mode 100644
index 00000000000..45a50db6905
--- /dev/null
+++ b/repo-tests/codeql/cpp/ql/src/Likely Bugs/Arithmetic/UnsignedGEZero.qll
@@ -0,0 +1,63 @@
+/**
+ * Provides classes and predicates implementing the UnsignedGEZero query.
+ * This library is also used by the PointlessComparison query,
+ * so that we can avoid reporting the same result twice. (PointlessComparison
+ * is a newer and more general query which also finds instances of
+ * the UnsignedGEZero pattern.)
+ */
+
+import cpp
+
+class ConstantZero extends Expr {
+ ConstantZero() {
+ this.isConstant() and
+ this.getValue() = "0"
+ }
+}
+
+/**
+ * Holds if `candidate` is an expression such that if it's unsigned then we
+ * want an alert at `ge`.
+ */
+private predicate lookForUnsignedAt(RelationalOperation ge, Expr candidate) {
+ // Base case: `candidate >= 0` (or `0 <= candidate`)
+ (
+ ge instanceof GEExpr or
+ ge instanceof LEExpr
+ ) and
+ ge.getLesserOperand() instanceof ConstantZero and
+ candidate = ge.getGreaterOperand().getFullyConverted() and
+ // left/greater operand was a signed or unsigned IntegralType before conversions
+ // (not a pointer, checking a pointer >= 0 is an entirely different mistake)
+ // (not an enum, as the fully converted type of an enum is compiler dependent
+ // so checking an enum >= 0 is always reasonable)
+ ge.getGreaterOperand().getUnderlyingType() instanceof IntegralType
+ or
+ // Recursive case: `...(largerType)candidate >= 0`
+ exists(Conversion conversion |
+ lookForUnsignedAt(ge, conversion) and
+ candidate = conversion.getExpr() and
+ conversion.getType().getSize() > candidate.getType().getSize()
+ )
+}
+
+class UnsignedGEZero extends ComparisonOperation {
+ UnsignedGEZero() {
+ exists(Expr ue |
+ lookForUnsignedAt(this, ue) and
+ ue.getUnderlyingType().(IntegralType).isUnsigned()
+ )
+ }
+}
+
+predicate unsignedGEZero(UnsignedGEZero ugez, string msg) {
+ not exists(MacroInvocation mi |
+ // ugez is in mi
+ mi.getAnExpandedElement() = ugez and
+ // and ugez was apparently not passed in as a macro parameter
+ ugez.getLocation().getStartLine() = mi.getLocation().getStartLine() and
+ ugez.getLocation().getStartColumn() = mi.getLocation().getStartColumn()
+ ) and
+ not ugez.isFromTemplateInstantiation(_) and
+ msg = "Pointless comparison of unsigned value to zero."
+}
diff --git a/repo-tests/codeql/cpp/ql/src/Likely Bugs/ContinueInFalseLoop.ql b/repo-tests/codeql/cpp/ql/src/Likely Bugs/ContinueInFalseLoop.ql
new file mode 100644
index 00000000000..293595d60d8
--- /dev/null
+++ b/repo-tests/codeql/cpp/ql/src/Likely Bugs/ContinueInFalseLoop.ql
@@ -0,0 +1,48 @@
+/**
+ * @name Continue statement that does not continue
+ * @description A 'continue' statement only re-runs the loop if the loop-condition is true. Therefore
+ * using 'continue' in a loop with a constant false condition is misleading and usually
+ * a bug.
+ * @kind problem
+ * @id cpp/continue-in-false-loop
+ * @problem.severity warning
+ * @precision high
+ * @tags correctness
+ */
+
+import cpp
+
+/**
+ * Gets a `do` ... `while` loop with a constant false condition.
+ */
+DoStmt getAFalseLoop() {
+ result.getControllingExpr().getValue() = "0" and
+ not result.getControllingExpr().isAffectedByMacro()
+}
+
+/**
+ * Gets a `do` ... `while` loop surrounding a statement. This is blocked by a
+ * `switch` statement, since a `continue` inside a `switch` inside a loop may be
+ * jusitifed (`continue` breaks out of the loop whereas `break` only escapes the
+ * `switch`).
+ */
+DoStmt enclosingLoop(Stmt s) {
+ exists(Stmt parent |
+ parent = s.getParent() and
+ (
+ parent instanceof Loop and
+ result = parent
+ or
+ not parent instanceof Loop and
+ not parent instanceof SwitchStmt and
+ result = enclosingLoop(parent)
+ )
+ )
+}
+
+from DoStmt loop, ContinueStmt continue
+where
+ loop = getAFalseLoop() and
+ loop = enclosingLoop(continue)
+select continue, "This 'continue' never re-runs the loop - the $@ is always false.",
+ loop.getControllingExpr(), "loop condition"
diff --git a/repo-tests/codeql/cpp/ql/src/Likely Bugs/Conversion/ArrayArgSizeMismatch.ql b/repo-tests/codeql/cpp/ql/src/Likely Bugs/Conversion/ArrayArgSizeMismatch.ql
new file mode 100644
index 00000000000..924b5d87f90
--- /dev/null
+++ b/repo-tests/codeql/cpp/ql/src/Likely Bugs/Conversion/ArrayArgSizeMismatch.ql
@@ -0,0 +1,28 @@
+/**
+ * @name Array argument size mismatch
+ * @description Finds function calls where the size of an array being passed is smaller than the array size of the declared parameter.
+ * This could lead to accesses to memory locations beyond the parameter's array bounds.
+ * @kind problem
+ * @id cpp/array-arg-size-mismatch
+ * @problem.severity warning
+ * @precision high
+ * @tags reliability
+ */
+
+import cpp
+import semmle.code.cpp.commons.Buffer
+
+from Function f, FunctionCall c, int i, ArrayType argType, ArrayType paramType, int a, int b
+where
+ f = c.getTarget() and
+ argType = c.getArgument(i).getType() and
+ paramType = f.getParameter(i).getType() and
+ a = argType.getArraySize() and
+ b = paramType.getArraySize() and
+ argType.getBaseType().getSize() = paramType.getBaseType().getSize() and
+ a < b and
+ not memberMayBeVarSize(_, c.getArgument(i).(VariableAccess).getTarget()) and
+ // filter out results for inconsistent declarations
+ strictcount(f.getParameter(i).getType().getSize()) = 1
+select c.getArgument(i),
+ "Array of size " + a + " passed to $@ which expects an array of size " + b + ".", f, f.getName()
diff --git a/repo-tests/codeql/cpp/ql/src/Likely Bugs/Conversion/CastArrayPointerArithmetic.ql b/repo-tests/codeql/cpp/ql/src/Likely Bugs/Conversion/CastArrayPointerArithmetic.ql
new file mode 100644
index 00000000000..9032919da44
--- /dev/null
+++ b/repo-tests/codeql/cpp/ql/src/Likely Bugs/Conversion/CastArrayPointerArithmetic.ql
@@ -0,0 +1,59 @@
+/**
+ * @name Upcast array used in pointer arithmetic
+ * @description An array with elements of a derived struct type is cast to a
+ * pointer to the base type of the struct. If pointer arithmetic or
+ * an array dereference is done on the resulting pointer, it will
+ * use the width of the base type, leading to misaligned reads.
+ * @kind path-problem
+ * @problem.severity warning
+ * @security-severity 9.3
+ * @precision high
+ * @id cpp/upcast-array-pointer-arithmetic
+ * @tags correctness
+ * reliability
+ * security
+ * external/cwe/cwe-119
+ * external/cwe/cwe-843
+ */
+
+import cpp
+import semmle.code.cpp.dataflow.DataFlow
+import DataFlow::PathGraph
+
+class CastToPointerArithFlow extends DataFlow::Configuration {
+ CastToPointerArithFlow() { this = "CastToPointerArithFlow" }
+
+ override predicate isSource(DataFlow::Node node) {
+ not node.asExpr() instanceof Conversion and
+ introducesNewField(node.asExpr().getType().(DerivedType).getBaseType(),
+ node.asExpr().getConversion*().getType().(DerivedType).getBaseType())
+ }
+
+ override predicate isSink(DataFlow::Node node) {
+ exists(PointerAddExpr pae | pae.getAnOperand() = node.asExpr()) or
+ exists(ArrayExpr ae | ae.getArrayBase() = node.asExpr())
+ }
+}
+
+/**
+ * `derived` has a (possibly indirect) base class of `base`, and at least one new
+ * field has been introduced in the inheritance chain after `base`.
+ */
+predicate introducesNewField(Class derived, Class base) {
+ (
+ exists(Field f |
+ f.getDeclaringType() = derived and
+ derived.getABaseClass+() = base
+ )
+ or
+ introducesNewField(derived.getABaseClass(), base)
+ )
+}
+
+from DataFlow::PathNode source, DataFlow::PathNode sink, CastToPointerArithFlow cfg
+where
+ cfg.hasFlowPath(source, sink) and
+ source.getNode().asExpr().getFullyConverted().getUnspecifiedType() =
+ sink.getNode().asExpr().getFullyConverted().getUnspecifiedType()
+select sink, source, sink,
+ "Pointer arithmetic here may be done with the wrong type because of the cast $@.", source, "here"
diff --git a/repo-tests/codeql/cpp/ql/src/Likely Bugs/Conversion/ConversionChangesSign.ql b/repo-tests/codeql/cpp/ql/src/Likely Bugs/Conversion/ConversionChangesSign.ql
new file mode 100644
index 00000000000..c89c71fb50b
--- /dev/null
+++ b/repo-tests/codeql/cpp/ql/src/Likely Bugs/Conversion/ConversionChangesSign.ql
@@ -0,0 +1,30 @@
+/**
+ * @name Conversion changes sign
+ * @description Finds conversions from unsigned to signed.
+ * @kind problem
+ * @id cpp/conversion-changes-sign
+ * @problem.severity warning
+ * @tags reliability
+ */
+
+import cpp
+
+from Expr e1, Cast e2, IntegralType it1, IntegralType it2
+where
+ e2 = e1.getConversion() and
+ e2.isImplicit() and
+ it1 = e1.getUnderlyingType() and
+ it2 = e2.getUnderlyingType() and
+ (
+ it1.isUnsigned() and it2.isSigned() and it1.getSize() >= it2.getSize()
+ or
+ it1.isSigned() and it2.isUnsigned()
+ ) and
+ not (
+ e1.isConstant() and
+ 0 <= e1.getValue().toInt() and
+ e1.getValue().toInt() <= ((it2.getSize() * 8 - 1) * 2.log()).exp()
+ ) and
+ not e1.isConstant()
+select e1,
+ "Conversion between signed and unsigned types " + it1.toString() + " and " + it2.toString() + "."
diff --git a/repo-tests/codeql/cpp/ql/src/Likely Bugs/Conversion/ImplicitDowncastFromBitfield.ql b/repo-tests/codeql/cpp/ql/src/Likely Bugs/Conversion/ImplicitDowncastFromBitfield.ql
new file mode 100644
index 00000000000..c2e7e4a6b90
--- /dev/null
+++ b/repo-tests/codeql/cpp/ql/src/Likely Bugs/Conversion/ImplicitDowncastFromBitfield.ql
@@ -0,0 +1,27 @@
+/**
+ * @name Implicit downcast from bitfield
+ * @description A bitfield is implicitly downcast to a smaller integer type.
+ * This could lead to loss of upper bits of the bitfield.
+ * @kind problem
+ * @problem.severity warning
+ * @precision high
+ * @id cpp/implicit-bitfield-downcast
+ * @tags reliability
+ * correctness
+ * types
+ */
+
+import cpp
+
+from BitField fi, VariableAccess va, Type fct
+where
+ (
+ if va.getFullyConverted().getType() instanceof ReferenceType
+ then fct = va.getFullyConverted().getType().(ReferenceType).getBaseType()
+ else fct = va.getFullyConverted().getType()
+ ) and
+ fi.getNumBits() > fct.getSize() * 8 and
+ va.getExplicitlyConverted().getType().getSize() > fct.getSize() and
+ va.getTarget() = fi and
+ not fct.getUnspecifiedType() instanceof BoolType
+select va, "Implicit downcast of bitfield $@", fi, fi.toString()
diff --git a/repo-tests/codeql/cpp/ql/src/Likely Bugs/Conversion/LossyFunctionResultCast.ql b/repo-tests/codeql/cpp/ql/src/Likely Bugs/Conversion/LossyFunctionResultCast.ql
new file mode 100644
index 00000000000..2a8aba6a961
--- /dev/null
+++ b/repo-tests/codeql/cpp/ql/src/Likely Bugs/Conversion/LossyFunctionResultCast.ql
@@ -0,0 +1,53 @@
+/**
+ * @name Lossy function result cast
+ * @description Finds function calls whose result type is a floating point type, and which are casted to an integral type.
+ * Includes only expressions with implicit cast and excludes function calls to ceil, floor and round.
+ * @kind problem
+ * @id cpp/lossy-function-result-cast
+ * @problem.severity warning
+ * @precision medium
+ * @tags correctness
+ */
+
+import cpp
+import semmle.code.cpp.dataflow.DataFlow
+
+predicate whitelist(Function f) {
+ f.getName() =
+ [
+ "ceil", "ceilf", "ceill", "floor", "floorf", "floorl", "nearbyint", "nearbyintf",
+ "nearbyintl", "rint", "rintf", "rintl", "round", "roundf", "roundl", "trunc", "truncf",
+ "truncl"
+ ] or
+ f.getName().matches("__builtin_%")
+}
+
+predicate whitelistPow(FunctionCall fc) {
+ fc.getTarget().getName() = ["pow", "powf", "powl"] and
+ exists(float value |
+ value = fc.getArgument(0).getValue().toFloat() and
+ (value.floor() - value).abs() < 0.001
+ )
+}
+
+predicate whiteListWrapped(FunctionCall fc) {
+ whitelist(fc.getTarget())
+ or
+ whitelistPow(fc)
+ or
+ exists(Expr e, ReturnStmt rs |
+ whiteListWrapped(e) and
+ DataFlow::localExprFlow(e, rs.getExpr()) and
+ fc.getTarget() = rs.getEnclosingFunction()
+ )
+}
+
+from FunctionCall c, FloatingPointType t1, IntegralType t2
+where
+ t1 = c.getTarget().getType().getUnderlyingType() and
+ t2 = c.getActualType() and
+ c.hasImplicitConversion() and
+ not whiteListWrapped(c)
+select c,
+ "Return value of type " + t1.toString() + " is implicitly converted to " + t2.toString() +
+ " here."
diff --git a/repo-tests/codeql/cpp/ql/src/Likely Bugs/Conversion/LossyPointerCast.ql b/repo-tests/codeql/cpp/ql/src/Likely Bugs/Conversion/LossyPointerCast.ql
new file mode 100644
index 00000000000..ce130c6f199
--- /dev/null
+++ b/repo-tests/codeql/cpp/ql/src/Likely Bugs/Conversion/LossyPointerCast.ql
@@ -0,0 +1,29 @@
+/**
+ * @name Lossy pointer cast
+ * @description A pointer type is converted to a smaller integer type. This may
+ * lead to loss of information in the variable and is highly
+ * non-portable.
+ * @kind problem
+ * @problem.severity warning
+ * @precision high
+ * @id cpp/lossy-pointer-cast
+ * @tags reliability
+ * correctness
+ * types
+ */
+
+import cpp
+
+predicate lossyPointerCast(Expr e, PointerType pt, IntegralType it) {
+ not it instanceof BoolType and
+ e.getConversion().getType().getUnderlyingType() = it and
+ e.getType().getUnderlyingType() = pt and
+ it.getSize() < pt.getSize() and
+ not e.isInMacroExpansion() and
+ // low bits of pointers are sometimes used to store flags
+ not exists(BitwiseAndExpr a | a.getAnOperand() = e)
+}
+
+from Expr e, PointerType pt, IntegralType it
+where lossyPointerCast(e, pt, it)
+select e, "Converted from " + pt.getName() + " to smaller type " + it.getName()
diff --git a/repo-tests/codeql/cpp/ql/src/Likely Bugs/Conversion/NonzeroValueCastToPointer.ql b/repo-tests/codeql/cpp/ql/src/Likely Bugs/Conversion/NonzeroValueCastToPointer.ql
new file mode 100644
index 00000000000..87f52198b0d
--- /dev/null
+++ b/repo-tests/codeql/cpp/ql/src/Likely Bugs/Conversion/NonzeroValueCastToPointer.ql
@@ -0,0 +1,34 @@
+/**
+ * @name Non-zero value cast to pointer
+ * @description A constant value other than zero is converted to a pointer type. This is a dangerous practice, since the meaning of the numerical value of pointers is platform dependent.
+ * @kind problem
+ * @problem.severity recommendation
+ * @precision medium
+ * @id cpp/cast-to-pointer
+ * @tags reliability
+ * correctness
+ * types
+ */
+
+import cpp
+
+predicate commonErrorCode(string value) {
+ value = "0" or
+ value = "1" or
+ value = "-1" or
+ value = "18446744073709551615" or // 2^64-1, i.e. -1 as an unsigned int64
+ value = "4294967295" or // 2^32-1, i.e. -1 as an unsigned int32
+ value = "3735928559" or // 0xdeadbeef
+ value = "3735929054" or // 0xdeadc0de
+ value = "3405691582" // 0xcafebabe
+}
+
+from Expr e
+where
+ e.isConstant() and
+ not commonErrorCode(e.getValue()) and
+ e.getFullyConverted().getType() instanceof PointerType and
+ not e.getType() instanceof ArrayType and
+ not e.getType() instanceof PointerType and
+ not e.isInMacroExpansion()
+select e, "Nonzero value " + e.getValueText() + " cast to pointer."
diff --git a/repo-tests/codeql/cpp/ql/src/Likely Bugs/Format/NonConstantFormat.ql b/repo-tests/codeql/cpp/ql/src/Likely Bugs/Format/NonConstantFormat.ql
new file mode 100644
index 00000000000..f00dfa2213b
--- /dev/null
+++ b/repo-tests/codeql/cpp/ql/src/Likely Bugs/Format/NonConstantFormat.ql
@@ -0,0 +1,143 @@
+/**
+ * @name Non-constant format string
+ * @description Passing a non-constant 'format' string to a printf-like function can lead
+ * to a mismatch between the number of arguments defined by the 'format' and the number
+ * of arguments actually passed to the function. If the format string ultimately stems
+ * from an untrusted source, this can be used for exploits.
+ * @kind problem
+ * @problem.severity recommendation
+ * @security-severity 9.3
+ * @precision high
+ * @id cpp/non-constant-format
+ * @tags maintainability
+ * correctness
+ * security
+ * external/cwe/cwe-134
+ */
+
+import semmle.code.cpp.dataflow.TaintTracking
+import semmle.code.cpp.commons.Printf
+
+// For the following `...gettext` functions, we assume that
+// all translations preserve the type and order of `%` specifiers
+// (and hence are safe to use as format strings). This
+// assumption is hard-coded into the query.
+predicate whitelistFunction(Function f, int arg) {
+ // basic variations of gettext
+ f.getName() = "_" and arg = 0
+ or
+ f.getName() = "gettext" and arg = 0
+ or
+ f.getName() = "dgettext" and arg = 1
+ or
+ f.getName() = "dcgettext" and arg = 1
+ or
+ // plural variations of gettext that take one format string for singular and another for plural form
+ f.getName() = "ngettext" and
+ (arg = 0 or arg = 1)
+ or
+ f.getName() = "dngettext" and
+ (arg = 1 or arg = 2)
+ or
+ f.getName() = "dcngettext" and
+ (arg = 1 or arg = 2)
+}
+
+// we assume that ALL uses of the `_` macro
+// return constant string literals
+predicate underscoreMacro(Expr e) {
+ exists(MacroInvocation mi |
+ mi.getMacroName() = "_" and
+ mi.getExpr() = e
+ )
+}
+
+/**
+ * Holds if `t` cannot hold a character array, directly or indirectly.
+ */
+predicate cannotContainString(Type t) {
+ t.getUnspecifiedType() instanceof BuiltInType
+ or
+ t.getUnspecifiedType() instanceof IntegralOrEnumType
+}
+
+predicate isNonConst(DataFlow::Node node) {
+ exists(Expr e | e = node.asExpr() |
+ exists(FunctionCall fc | fc = e.(FunctionCall) |
+ not (
+ whitelistFunction(fc.getTarget(), _) or
+ fc.getTarget().hasDefinition()
+ )
+ )
+ or
+ exists(Parameter p | p = e.(VariableAccess).getTarget().(Parameter) |
+ p.getFunction().getName() = "main" and p.getType() instanceof PointerType
+ )
+ or
+ e instanceof CrementOperation
+ or
+ e instanceof AddressOfExpr
+ or
+ e instanceof ReferenceToExpr
+ or
+ e instanceof AssignPointerAddExpr
+ or
+ e instanceof AssignPointerSubExpr
+ or
+ e instanceof PointerArithmeticOperation
+ or
+ e instanceof FieldAccess
+ or
+ e instanceof PointerDereferenceExpr
+ or
+ e instanceof AddressOfExpr
+ or
+ e instanceof ExprCall
+ or
+ e instanceof NewArrayExpr
+ or
+ e instanceof AssignExpr
+ or
+ exists(Variable v | v = e.(VariableAccess).getTarget() |
+ v.getType().(ArrayType).getBaseType() instanceof CharType and
+ exists(AssignExpr ae |
+ ae.getLValue().(ArrayExpr).getArrayBase().(VariableAccess).getTarget() = v
+ )
+ )
+ )
+ or
+ node instanceof DataFlow::DefinitionByReferenceNode
+}
+
+pragma[noinline]
+predicate isSanitizerNode(DataFlow::Node node) {
+ underscoreMacro(node.asExpr())
+ or
+ cannotContainString(node.getType())
+}
+
+class NonConstFlow extends TaintTracking::Configuration {
+ NonConstFlow() { this = "NonConstFlow" }
+
+ override predicate isSource(DataFlow::Node source) {
+ isNonConst(source) and
+ not cannotContainString(source.getType())
+ }
+
+ override predicate isSink(DataFlow::Node sink) {
+ exists(FormattingFunctionCall fc | sink.asExpr() = fc.getArgument(fc.getFormatParameterIndex()))
+ }
+
+ override predicate isSanitizer(DataFlow::Node node) { isSanitizerNode(node) }
+}
+
+from FormattingFunctionCall call, Expr formatString
+where
+ call.getArgument(call.getFormatParameterIndex()) = formatString and
+ exists(NonConstFlow cf, DataFlow::Node source, DataFlow::Node sink |
+ cf.hasFlow(source, sink) and
+ sink.asExpr() = formatString
+ )
+select formatString,
+ "The format string argument to " + call.getTarget().getName() +
+ " should be constant to prevent security issues and other potential errors."
diff --git a/repo-tests/codeql/cpp/ql/src/Likely Bugs/Format/SnprintfOverflow.ql b/repo-tests/codeql/cpp/ql/src/Likely Bugs/Format/SnprintfOverflow.ql
new file mode 100644
index 00000000000..7da8db7f226
--- /dev/null
+++ b/repo-tests/codeql/cpp/ql/src/Likely Bugs/Format/SnprintfOverflow.ql
@@ -0,0 +1,115 @@
+/**
+ * @name Potentially overflowing call to snprintf
+ * @description Using the return value from snprintf without proper checks can cause overflow.
+ * @kind problem
+ * @problem.severity warning
+ * @security-severity 8.1
+ * @precision high
+ * @id cpp/overflowing-snprintf
+ * @tags reliability
+ * correctness
+ * security
+ * external/cwe/cwe-190
+ * external/cwe/cwe-253
+ */
+
+import cpp
+import semmle.code.cpp.rangeanalysis.SimpleRangeAnalysis
+
+/**
+ * Holds if there is a dataflow path from `source` to `sink`
+ * with no bounds checks along the way. `pathMightOverflow` is
+ * true if there is an arithmetic operation on the path that
+ * might overflow.
+ */
+predicate flowsToExpr(Expr source, Expr sink, boolean pathMightOverflow) {
+ // Might the current expression overflow?
+ exists(boolean otherMightOverflow | flowsToExprImpl(source, sink, otherMightOverflow) |
+ if convertedExprMightOverflow(sink)
+ then pathMightOverflow = true
+ else pathMightOverflow = otherMightOverflow
+ )
+}
+
+/**
+ * Implementation of `flowsToExpr`. Does everything except
+ * checking whether the current expression might overflow.
+ */
+predicate flowsToExprImpl(Expr source, Expr sink, boolean pathMightOverflow) {
+ source = sink and
+ pathMightOverflow = false and
+ source.(FunctionCall).getTarget().(Snprintf).returnsFullFormatLength()
+ or
+ exists(RangeSsaDefinition def, StackVariable v |
+ flowsToDef(source, def, v, pathMightOverflow) and
+ sink = def.getAUse(v)
+ )
+ or
+ flowsToExpr(source, sink.(UnaryArithmeticOperation).getOperand(), pathMightOverflow)
+ or
+ flowsToExpr(source, sink.(BinaryArithmeticOperation).getAnOperand(), pathMightOverflow)
+ or
+ flowsToExpr(source, sink.(Assignment).getRValue(), pathMightOverflow)
+ or
+ flowsToExpr(source, sink.(AssignOperation).getLValue(), pathMightOverflow)
+ or
+ exists(FormattingFunctionCall call |
+ sink = call and
+ flowsToExpr(source, call.getArgument(call.getTarget().getSizeParameterIndex()),
+ pathMightOverflow)
+ )
+}
+
+/**
+ * Holds if there is a dataflow path from `source` to the SSA
+ * definition `(def,v)`. with no bounds checks along the way.
+ * `pathMightOverflow` is true if there is an arithmetic operation
+ * on the path that might overflow.
+ */
+predicate flowsToDef(Expr source, RangeSsaDefinition def, StackVariable v, boolean pathMightOverflow) {
+ // Might the current definition overflow?
+ exists(boolean otherMightOverflow | flowsToDefImpl(source, def, v, otherMightOverflow) |
+ if defMightOverflow(def, v)
+ then pathMightOverflow = true
+ else pathMightOverflow = otherMightOverflow
+ )
+}
+
+/**
+ * Implementation of `flowsToDef`. Does everything except
+ * checking whether the current definition might overflow.
+ *
+ * Note: RangeSsa is used to exclude paths that include a bounds check.
+ * RangeSsa inserts extra definitions after conditions like `if (x < 10)`.
+ * Such definitions are ignored here, so the path will terminate when
+ * a bounds check is encounter. Of course it isn't super accurate
+ * because useless checks such as `if (x >= 0)` will also terminate
+ * the path. But it is a good way to reduce the number of false positives.
+ */
+predicate flowsToDefImpl(
+ Expr source, RangeSsaDefinition def, StackVariable v, boolean pathMightOverflow
+) {
+ // Assignment or initialization: `e = v;`
+ exists(Expr e |
+ e = def.getDefiningValue(v) and
+ flowsToExpr(source, e, pathMightOverflow)
+ )
+ or
+ // `x++`
+ exists(CrementOperation crem |
+ def = crem and
+ crem.getOperand() = v.getAnAccess() and
+ flowsToExpr(source, crem.getOperand(), pathMightOverflow)
+ )
+ or
+ // Phi definition.
+ flowsToDef(source, def.getAPhiInput(v), v, pathMightOverflow)
+}
+
+from FormattingFunctionCall call, Expr sink
+where
+ flowsToExpr(call, sink, true) and
+ sink = call.getArgument(call.getTarget().getSizeParameterIndex())
+select call,
+ "The $@ of this snprintf call is derived from its return value, which may exceed the size of the buffer and overflow.",
+ sink, "size argument"
diff --git a/repo-tests/codeql/cpp/ql/src/Likely Bugs/Format/TooManyFormatArguments.ql b/repo-tests/codeql/cpp/ql/src/Likely Bugs/Format/TooManyFormatArguments.ql
new file mode 100644
index 00000000000..f9aa87c684a
--- /dev/null
+++ b/repo-tests/codeql/cpp/ql/src/Likely Bugs/Format/TooManyFormatArguments.ql
@@ -0,0 +1,23 @@
+/**
+ * @name Too many arguments to formatting function
+ * @description A printf-like function called with too many arguments will
+ * ignore the excess arguments and output less than might
+ * have been intended.
+ * @kind problem
+ * @problem.severity recommendation
+ * @precision high
+ * @id cpp/too-many-format-arguments
+ * @tags reliability
+ * correctness
+ */
+
+import cpp
+
+from FormatLiteral fl, FormattingFunctionCall ffc, int expected, int given
+where
+ ffc = fl.getUse() and
+ expected = fl.getNumArgNeeded() and
+ given = ffc.getNumFormatArgument() and
+ expected < given and
+ fl.specsAreKnown()
+select ffc, "Format expects " + expected.toString() + " arguments but given " + given.toString()
diff --git a/repo-tests/codeql/cpp/ql/src/Likely Bugs/Format/WrongNumberOfFormatArguments.ql b/repo-tests/codeql/cpp/ql/src/Likely Bugs/Format/WrongNumberOfFormatArguments.ql
new file mode 100644
index 00000000000..cc3510ee5eb
--- /dev/null
+++ b/repo-tests/codeql/cpp/ql/src/Likely Bugs/Format/WrongNumberOfFormatArguments.ql
@@ -0,0 +1,26 @@
+/**
+ * @name Too few arguments to formatting function
+ * @description Calling a printf-like function with too few arguments can be
+ * a source of security issues.
+ * @kind problem
+ * @problem.severity error
+ * @security-severity 5.0
+ * @precision high
+ * @id cpp/wrong-number-format-arguments
+ * @tags reliability
+ * correctness
+ * security
+ * external/cwe/cwe-234
+ * external/cwe/cwe-685
+ */
+
+import cpp
+
+from FormatLiteral fl, FormattingFunctionCall ffc, int expected, int given
+where
+ ffc = fl.getUse() and
+ expected = fl.getNumArgNeeded() and
+ given = ffc.getNumFormatArgument() and
+ expected > given and
+ fl.specsAreKnown()
+select ffc, "Format expects " + expected.toString() + " arguments but given " + given.toString()
diff --git a/repo-tests/codeql/cpp/ql/src/Likely Bugs/Format/WrongTypeFormatArguments.ql b/repo-tests/codeql/cpp/ql/src/Likely Bugs/Format/WrongTypeFormatArguments.ql
new file mode 100644
index 00000000000..cb5a5209ffe
--- /dev/null
+++ b/repo-tests/codeql/cpp/ql/src/Likely Bugs/Format/WrongTypeFormatArguments.ql
@@ -0,0 +1,176 @@
+/**
+ * @name Wrong type of arguments to formatting function
+ * @description Calling a printf-like function with the wrong type of arguments causes unpredictable
+ * behavior.
+ * @kind problem
+ * @problem.severity error
+ * @security-severity 7.5
+ * @precision high
+ * @id cpp/wrong-type-format-argument
+ * @tags reliability
+ * correctness
+ * security
+ * external/cwe/cwe-686
+ */
+
+import cpp
+
+/**
+ * Holds if the argument corresponding to the `pos` conversion specifier
+ * of `ffc` is expected to have type `expected`.
+ */
+private predicate formattingFunctionCallExpectedType(
+ FormattingFunctionCall ffc, int pos, Type expected
+) {
+ ffc.getFormat().(FormatLiteral).getConversionType(pos) = expected
+}
+
+/**
+ * Holds if the argument corresponding to the `pos` conversion specifier
+ * of `ffc` could alternatively have type `expected`, for example on a different
+ * platform.
+ */
+private predicate formattingFunctionCallAlternateType(
+ FormattingFunctionCall ffc, int pos, Type expected
+) {
+ ffc.getFormat().(FormatLiteral).getConversionTypeAlternate(pos) = expected
+}
+
+/**
+ * Holds if the argument corresponding to the `pos` conversion specifier
+ * of `ffc` is `arg` and has type `actual`.
+ */
+pragma[noopt]
+predicate formattingFunctionCallActualType(
+ FormattingFunctionCall ffc, int pos, Expr arg, Type actual
+) {
+ exists(Expr argConverted |
+ ffc.getConversionArgument(pos) = arg and
+ argConverted = arg.getFullyConverted() and
+ actual = argConverted.getType()
+ )
+}
+
+/**
+ * Holds if the argument corresponding to the `pos` conversion specifier
+ * of `ffc` is expected to have a width or precision argument of type
+ * `expected` and the corresponding argument `arg` has type `actual`.
+ */
+pragma[noopt]
+predicate formatOtherArgType(
+ FormattingFunctionCall ffc, int pos, Type expected, Expr arg, Type actual
+) {
+ exists(Expr argConverted |
+ (arg = ffc.getMinFieldWidthArgument(pos) or arg = ffc.getPrecisionArgument(pos)) and
+ argConverted = arg.getFullyConverted() and
+ actual = argConverted.getType() and
+ exists(IntType it | it instanceof IntType and it.isImplicitlySigned() and expected = it)
+ )
+}
+
+/**
+ * A type that may be expected by a printf format parameter, or that may
+ * be pointed to by such a type (e.g. `wchar_t`, from `wchar_t *`).
+ */
+class ExpectedType extends Type {
+ ExpectedType() {
+ exists(Type t |
+ (
+ formattingFunctionCallExpectedType(_, _, t) or
+ formattingFunctionCallAlternateType(_, _, t) or
+ formatOtherArgType(_, _, t, _, _)
+ ) and
+ this = t.getUnspecifiedType()
+ )
+ }
+}
+
+/**
+ * Holds if it is safe to display a value of type `actual` when `printf`
+ * expects a value of type `expected`.
+ *
+ * Note that variadic arguments undergo default argument promotions before
+ * they reach `printf`, notably `bool`, `char`, `short` and `enum` types
+ * are promoted to `int` (or `unsigned int`, as appropriate) and `float`s
+ * are converted to `double`.
+ */
+predicate trivialConversion(ExpectedType expected, Type actual) {
+ exists(Type exp, Type act |
+ (
+ formattingFunctionCallExpectedType(_, _, exp) or
+ formattingFunctionCallAlternateType(_, _, exp)
+ ) and
+ formattingFunctionCallActualType(_, _, _, act) and
+ expected = exp.getUnspecifiedType() and
+ actual = act.getUnspecifiedType()
+ ) and
+ (
+ // allow a pointer type to be displayed with `%p`
+ expected instanceof VoidPointerType and actual instanceof PointerType
+ or
+ // allow a function pointer type to be displayed with `%p`
+ expected instanceof VoidPointerType and
+ actual instanceof FunctionPointerType and
+ expected.getSize() = actual.getSize()
+ or
+ // allow an `enum` type to be displayed with `%i`, `%c` etc
+ expected instanceof IntegralType and actual instanceof Enum
+ or
+ // allow any `char *` type to be displayed with `%s`
+ expected instanceof CharPointerType and actual instanceof CharPointerType
+ or
+ // allow `wchar_t *`, or any pointer to an integral type of the same size, to be displayed
+ // with `%ws`
+ expected.(PointerType).getBaseType().hasName("wchar_t") and
+ exists(Wchar_t t |
+ actual.getUnspecifiedType().(PointerType).getBaseType().(IntegralType).getSize() = t.getSize()
+ )
+ or
+ // allow an `int` (or anything promoted to `int`) to be displayed with `%c`
+ expected instanceof CharType and actual instanceof IntType
+ or
+ // allow an `int` (or anything promoted to `int`) to be displayed with `%wc`
+ expected instanceof Wchar_t and actual instanceof IntType
+ or
+ expected instanceof UnsignedCharType and actual instanceof IntType
+ or
+ // allow any integral type of the same size
+ // (this permits signedness changes)
+ expected.(IntegralType).getSize() = actual.(IntegralType).getSize()
+ or
+ // allow a pointer to any integral type of the same size
+ // (this permits signedness changes)
+ expected.(PointerType).getBaseType().(IntegralType).getSize() =
+ actual.(PointerType).getBaseType().(IntegralType).getSize()
+ or
+ expected = actual
+ )
+}
+
+/**
+ * Gets the size of the `int` type.
+ */
+int sizeof_IntType() { exists(IntType it | result = it.getSize()) }
+
+from FormattingFunctionCall ffc, int n, Expr arg, Type expected, Type actual
+where
+ (
+ formattingFunctionCallExpectedType(ffc, n, expected) and
+ formattingFunctionCallActualType(ffc, n, arg, actual) and
+ not exists(Type anyExpected |
+ (
+ formattingFunctionCallExpectedType(ffc, n, anyExpected) or
+ formattingFunctionCallAlternateType(ffc, n, anyExpected)
+ ) and
+ trivialConversion(anyExpected.getUnspecifiedType(), actual.getUnspecifiedType())
+ )
+ or
+ formatOtherArgType(ffc, n, expected, arg, actual) and
+ not actual.getUnspecifiedType().(IntegralType).getSize() = sizeof_IntType()
+ ) and
+ not arg.isAffectedByMacro() and
+ not arg.isFromUninstantiatedTemplate(_) and
+ not actual.getUnspecifiedType() instanceof ErroneousType
+select arg,
+ "This argument should be of type '" + expected.getName() + "' but is of type '" +
+ actual.getUnspecifiedType().getName() + "'"
diff --git a/repo-tests/codeql/cpp/ql/src/Likely Bugs/InconsistentCallOnResult.ql b/repo-tests/codeql/cpp/ql/src/Likely Bugs/InconsistentCallOnResult.ql
new file mode 100644
index 00000000000..719a4cf48fa
--- /dev/null
+++ b/repo-tests/codeql/cpp/ql/src/Likely Bugs/InconsistentCallOnResult.ql
@@ -0,0 +1,74 @@
+/**
+ * @name Inconsistent operation on return value
+ * @description A function is called, and the same operation is usually performed on the return value - for example, free, delete, close etc. However, in some cases it is not performed. These unusual cases may indicate misuse of the API and could cause resource leaks.
+ * @kind problem
+ * @problem.severity warning
+ * @precision medium
+ * @id cpp/inconsistent-call-on-result
+ * @tags reliability
+ * correctness
+ * statistical
+ * non-attributable
+ * external/cwe/cwe-252
+ */
+
+import cpp
+
+predicate exclude(Function f) {
+ exists(string name | name = f.getName() |
+ name.toLowerCase().matches("get%") or
+ name.matches("strto%")
+ )
+}
+
+predicate checkExpr(Expr e, string operation, Variable v) {
+ exists(FunctionCall fc | fc = e and not exclude(fc.getTarget()) |
+ fc.getTarget().getName() = operation and
+ (fc.getAnArgument() = v.getAnAccess() or fc.getQualifier() = v.getAnAccess())
+ )
+ or
+ exists(DeleteExpr del | del = e |
+ del.getExpr() = v.getAnAccess() and
+ operation = "delete"
+ )
+ or
+ exists(DeleteArrayExpr del | del = e |
+ del.getExpr() = v.getAnAccess() and
+ operation = "delete[]"
+ )
+}
+
+predicate checkedFunctionCall(FunctionCall fc, string operation) {
+ relevantFunctionCall(fc, _) and
+ exists(Variable v, Expr check | v.getAnAssignedValue() = fc |
+ checkExpr(check, operation, v) and
+ check != fc
+ )
+}
+
+predicate relevantFunctionCall(FunctionCall fc, Function f) {
+ fc.getTarget() = f and
+ exists(Variable v | v.getAnAssignedValue() = fc) and
+ not okToIgnore(fc)
+}
+
+predicate okToIgnore(FunctionCall fc) { fc.isInMacroExpansion() }
+
+predicate functionStats(Function f, string operation, int used, int total, int percentage) {
+ exists(PointerType pt | pt.getATypeNameUse() = f.getADeclarationEntry()) and
+ used = strictcount(FunctionCall fc | checkedFunctionCall(fc, operation) and f = fc.getTarget()) and
+ total = strictcount(FunctionCall fc | relevantFunctionCall(fc, f)) and
+ percentage = used * 100 / total
+}
+
+from FunctionCall unchecked, Function f, string operation, int percent
+where
+ relevantFunctionCall(unchecked, f) and
+ not checkedFunctionCall(unchecked, operation) and
+ functionStats(f, operation, _, _, percent) and
+ percent >= 70 and
+ unchecked.getFile().getAbsolutePath().matches("%fbcode%") and
+ not unchecked.getFile().getAbsolutePath().matches("%\\_build%")
+select unchecked,
+ "After " + percent.toString() + "% of calls to " + f.getName() + " there is a call to " +
+ operation + " on the return value. The call may be missing in this case."
diff --git a/repo-tests/codeql/cpp/ql/src/Likely Bugs/InconsistentCheckReturnNull.ql b/repo-tests/codeql/cpp/ql/src/Likely Bugs/InconsistentCheckReturnNull.ql
new file mode 100644
index 00000000000..63dc3cd460a
--- /dev/null
+++ b/repo-tests/codeql/cpp/ql/src/Likely Bugs/InconsistentCheckReturnNull.ql
@@ -0,0 +1,249 @@
+/**
+ * @name Inconsistent nullness check
+ * @description The result value of a function is often checked for nullness,
+ * but not always. Since the value is mostly checked, it is likely
+ * that the function can return null values in some cases, and
+ * omitting the check could crash the program.
+ * @kind problem
+ * @problem.severity error
+ * @precision medium
+ * @id cpp/inconsistent-null-check
+ * @tags reliability
+ * correctness
+ * statistical
+ * non-attributable
+ * external/cwe/cwe-476
+ */
+
+import cpp
+
+predicate assertMacro(Macro m) { m.getHead().toLowerCase().matches("%assert%") }
+
+predicate assertInvocation(File f, int line) {
+ exists(MacroInvocation i, Location l | assertMacro(i.getMacro()) and l = i.getLocation() |
+ l.getStartLine() = line and l.getEndLine() = line and f = l.getFile()
+ )
+}
+
+class InterestingExpr extends Expr {
+ InterestingExpr() { nullCheckInCondition(this, _, _) }
+}
+
+predicate nullCheckAssert(InterestingExpr e, Variable v, Declaration qualifier) {
+ exists(File f, int i |
+ e.getLocation().getStartLine() = i and
+ e.getFile() = f and
+ assertInvocation(f, i) and
+ nullCheckInCondition(e, v, qualifier)
+ )
+}
+
+VariableAccess qualifiedAccess(Variable v, Declaration qualifier) {
+ result = v.getAnAccess() and
+ (
+ result.getQualifier().(VariableAccess).getTarget() = qualifier
+ or
+ exists(PointerDereferenceExpr e, VariableAccess va | result.getQualifier() = e |
+ e.getOperand() = va and va.getTarget() = qualifier
+ )
+ or
+ not exists(result.getQualifier()) and qualifier = result.getEnclosingFunction()
+ or
+ result.getQualifier() instanceof ThisExpr and qualifier = result.getEnclosingFunction()
+ )
+}
+
+predicate nullCheckInCondition(Expr e, Variable v, Declaration qualifier) {
+ // if(v)
+ exists(FunctionCall fc |
+ relevantFunctionCall(fc, _) and fc = assignedValueForVariableAndQualifier(v, qualifier)
+ |
+ e = qualifiedAccess(v, qualifier)
+ )
+ or
+ exists(AssignExpr a | a = e and a.getLValue() = qualifiedAccess(v, qualifier))
+ or
+ // if(v == NULL), if(v != NULL), if(NULL != v), if(NULL == v)
+ exists(EqualityOperation eq |
+ eq = e and
+ nullCheckInCondition(eq.getAnOperand(), v, qualifier) and
+ eq.getAnOperand().getValue() = "0"
+ )
+ or
+ // if(v && something)
+ exists(LogicalAndExpr exp | exp = e and nullCheckInCondition(exp.getAnOperand(), v, qualifier))
+ or
+ // if(v || something)
+ exists(LogicalOrExpr exp | exp = e and nullCheckInCondition(exp.getAnOperand(), v, qualifier))
+ or
+ // if(!v)
+ exists(NotExpr exp | exp = e and nullCheckInCondition(exp.getAnOperand(), v, qualifier))
+ or
+ exists(FunctionCall c |
+ c = e and
+ nullCheckInCondition(c.getAnArgument(), v, qualifier) and
+ c.getTarget().getName() = "__builtin_expect"
+ )
+ or
+ exists(ConditionDeclExpr d | d = e and nullCheckInCondition(d.getVariableAccess(), v, qualifier))
+}
+
+predicate hasNullCheck(Function enclosing, Variable v, Declaration qualifier) {
+ exists(Expr exp |
+ nullCheckInCondition(exp, v, qualifier) and exp.getEnclosingFunction() = enclosing
+ |
+ exists(ControlStructure s | exp = s.getControllingExpr())
+ or
+ exists(ConditionalExpr e | exp = e.getCondition())
+ or
+ exists(ReturnStmt s | exp = s.getExpr() and not exp instanceof VariableAccess)
+ or
+ exists(AssignExpr e | exp = e.getRValue() and not exp instanceof VariableAccess)
+ or
+ exists(AggregateLiteral al | exp = al.getAChild() and not exp instanceof VariableAccess)
+ or
+ exists(Variable other |
+ exp = other.getInitializer().getExpr() and not exp instanceof VariableAccess
+ )
+ )
+}
+
+Expr assignedValueForVariableAndQualifier(Variable v, Declaration qualifier) {
+ result = v.getInitializer().getExpr() and qualifier = result.getEnclosingFunction()
+ or
+ exists(AssignExpr e | e.getLValue() = qualifiedAccess(v, qualifier) and result = e.getRValue())
+}
+
+predicate checkedFunctionCall(FunctionCall fc) {
+ relevantFunctionCall(fc, _) and
+ exists(Variable v, Declaration qualifier |
+ fc = assignedValueForVariableAndQualifier(v, qualifier)
+ |
+ hasNullCheck(fc.getEnclosingFunction(), v, qualifier)
+ )
+}
+
+predicate uncheckedFunctionCall(FunctionCall fc) {
+ relevantFunctionCall(fc, _) and
+ not checkedFunctionCall(fc) and
+ not exists(File f, int line | f = fc.getFile() and line = fc.getLocation().getEndLine() |
+ assertInvocation(f, line + 1) or assertInvocation(f, line)
+ ) and
+ not exists(Variable v, Declaration qualifier |
+ fc = assignedValueForVariableAndQualifier(v, qualifier)
+ |
+ nullCheckAssert(_, v, qualifier)
+ ) and
+ not exists(ControlStructure s | callResultNullCheckInCondition(s.getControllingExpr(), fc)) and
+ not exists(FunctionCall other, Variable v, Declaration qualifier, Expr arg |
+ fc = assignedValueForVariableAndQualifier(v, qualifier)
+ |
+ arg = other.getAnArgument() and
+ nullCheckInCondition(arg, v, qualifier) and
+ not arg instanceof VariableAccess
+ )
+}
+
+Declaration functionQualifier(FunctionCall fc) {
+ fc.getQualifier().(VariableAccess).getTarget() = result
+ or
+ exists(PointerDereferenceExpr e, VariableAccess va |
+ fc.getQualifier() = e and e.getOperand() = va and va.getTarget() = result
+ )
+ or
+ not exists(fc.getQualifier()) and result = fc.getEnclosingFunction()
+ or
+ fc.getQualifier() instanceof ThisExpr and result = fc.getEnclosingFunction()
+}
+
+predicate callTargetAndEnclosing(FunctionCall fc, Function target, Function enclosing) {
+ target = fc.getTarget() and enclosing = fc.getEnclosingFunction()
+}
+
+predicate callArgumentVariable(FunctionCall fc, Variable v, int i) {
+ fc.getArgument(i) = v.getAnAccess()
+}
+
+predicate callResultNullCheckInCondition(Expr e, FunctionCall fc) {
+ // if(v)
+ exists(FunctionCall other |
+ e = other and
+ relevantFunctionCall(fc, _) and
+ not checkedFunctionCall(fc) and
+ exists(Function called, Function enclosing |
+ callTargetAndEnclosing(fc, called, enclosing) and
+ callTargetAndEnclosing(other, called, enclosing)
+ ) and
+ forall(Variable v, int i | callArgumentVariable(fc, v, i) | callArgumentVariable(other, v, i)) and
+ (
+ functionQualifier(fc) = functionQualifier(other)
+ or
+ not exists(functionQualifier(fc)) and not exists(functionQualifier(other))
+ )
+ )
+ or
+ // if(v == NULL), if(v != NULL), if(NULL != v), if(NULL == v)
+ exists(EqualityOperation eq |
+ eq = e and
+ callResultNullCheckInCondition(eq.getAnOperand(), fc) and
+ eq.getAnOperand().getValue() = "0"
+ )
+ or
+ // if(v && something)
+ exists(LogicalAndExpr exp | exp = e and callResultNullCheckInCondition(exp.getAnOperand(), fc))
+ or
+ // if(v || something)
+ exists(LogicalOrExpr exp | exp = e and callResultNullCheckInCondition(exp.getAnOperand(), fc))
+ or
+ // if(!v)
+ exists(NotExpr exp | exp = e and callResultNullCheckInCondition(exp.getAnOperand(), fc))
+}
+
+predicate dereferenced(Variable v, Declaration qualifier, Function f) {
+ exists(PointerDereferenceExpr e |
+ e.getOperand() = qualifiedAccess(v, qualifier) and
+ e.getEnclosingFunction() = f and
+ not exists(SizeofExprOperator s | s.getExprOperand() = e)
+ )
+ or
+ exists(FunctionCall c |
+ c.getQualifier() = qualifiedAccess(v, qualifier) and
+ c.getEnclosingFunction() = f
+ )
+ or
+ exists(VariableAccess va |
+ va.getQualifier() = qualifiedAccess(v, qualifier) and
+ va.getEnclosingFunction() = f
+ )
+}
+
+predicate relevantFunctionCall(FunctionCall fc, Function f) {
+ fc.getTarget() = f and
+ exists(Variable v, Declaration qualifier |
+ fc = assignedValueForVariableAndQualifier(v, qualifier)
+ |
+ dereferenced(v, qualifier, fc.getEnclosingFunction())
+ ) and
+ not okToIgnore(fc)
+}
+
+predicate okToIgnore(FunctionCall fc) { fc.isInMacroExpansion() }
+
+predicate functionStats(Function f, int percentage) {
+ exists(int used, int total |
+ exists(PointerType pt | pt.getATypeNameUse() = f.getADeclarationEntry()) and
+ used = strictcount(FunctionCall fc | checkedFunctionCall(fc) and f = fc.getTarget()) and
+ total = strictcount(FunctionCall fc | relevantFunctionCall(fc, f)) and
+ percentage = used * 100 / total
+ )
+}
+
+from FunctionCall unchecked, Function f, int percent
+where
+ relevantFunctionCall(unchecked, f) and
+ uncheckedFunctionCall(unchecked) and
+ functionStats(f, percent) and
+ percent >= 70
+select unchecked,
+ "The result of this call to " + f.getName() + " is not checked for null, but " + percent +
+ "% of calls to " + f.getName() + " check for null."
diff --git a/repo-tests/codeql/cpp/ql/src/Likely Bugs/JapaneseEra/ConstructorOrMethodWithExactEraDate.ql b/repo-tests/codeql/cpp/ql/src/Likely Bugs/JapaneseEra/ConstructorOrMethodWithExactEraDate.ql
new file mode 100644
index 00000000000..fa468c74218
--- /dev/null
+++ b/repo-tests/codeql/cpp/ql/src/Likely Bugs/JapaneseEra/ConstructorOrMethodWithExactEraDate.ql
@@ -0,0 +1,22 @@
+/**
+ * @name Hard-coded Japanese era start date in call
+ * @description Japanese era changes can lead to code behaving differently. Avoid hard-coding Japanese era start dates.
+ * @kind problem
+ * @problem.severity warning
+ * @id cpp/japanese-era/constructor-or-method-with-exact-era-date
+ * @precision medium
+ * @tags reliability
+ * japanese-era
+ * @deprecated This query is deprecated, use
+ * Hard-coded Japanese era start date (`cpp/japanese-era/exact-era-date`)
+ * instead.
+ */
+
+import cpp
+
+from Call cc, int i
+where
+ cc.getArgument(i).getValue().toInt() = 1989 and
+ cc.getArgument(i + 1).getValue().toInt() = 1 and
+ cc.getArgument(i + 2).getValue().toInt() = 8
+select cc, "Call that appears to have hard-coded Japanese era start date as parameter."
diff --git a/repo-tests/codeql/cpp/ql/src/Likely Bugs/JapaneseEra/StructWithExactEraDate.ql b/repo-tests/codeql/cpp/ql/src/Likely Bugs/JapaneseEra/StructWithExactEraDate.ql
new file mode 100644
index 00000000000..f05a0351a12
--- /dev/null
+++ b/repo-tests/codeql/cpp/ql/src/Likely Bugs/JapaneseEra/StructWithExactEraDate.ql
@@ -0,0 +1,47 @@
+/**
+ * @name Hard-coded Japanese era start date in struct
+ * @description Japanese era changes can lead to code behaving differently. Avoid hard-coding Japanese era start dates.
+ * @kind problem
+ * @problem.severity warning
+ * @id cpp/japanese-era/struct-with-exact-era-date
+ * @precision medium
+ * @tags reliability
+ * japanese-era
+ * @deprecated This query is deprecated, use
+ * Hard-coded Japanese era start date (`cpp/japanese-era/exact-era-date`)
+ * instead.
+ */
+
+import cpp
+import semmle.code.cpp.commons.DateTime
+
+predicate assignedYear(Struct s, YearFieldAccess year, int value) {
+ exists(Operation yearAssignment |
+ s.getAField().getAnAccess() = year and
+ yearAssignment.getAnOperand() = year and
+ yearAssignment.getAnOperand().getValue().toInt() = value
+ )
+}
+
+predicate assignedMonth(Struct s, MonthFieldAccess month, int value) {
+ exists(Operation monthAssignment |
+ s.getAField().getAnAccess() = month and
+ monthAssignment.getAnOperand() = month and
+ monthAssignment.getAnOperand().getValue().toInt() = value
+ )
+}
+
+predicate assignedDay(Struct s, DayFieldAccess day, int value) {
+ exists(Operation dayAssignment |
+ s.getAField().getAnAccess() = day and
+ dayAssignment.getAnOperand() = day and
+ dayAssignment.getAnOperand().getValue().toInt() = value
+ )
+}
+
+from StructLikeClass s, YearFieldAccess year, MonthFieldAccess month, DayFieldAccess day
+where
+ assignedYear(s, year, 1989) and
+ assignedMonth(s, month, 1) and
+ assignedDay(s, day, 8)
+select year, "A time struct that is initialized with exact Japanese calendar era start date."
diff --git a/repo-tests/codeql/cpp/ql/src/Likely Bugs/Leap Year/Adding365DaysPerYear.ql b/repo-tests/codeql/cpp/ql/src/Likely Bugs/Leap Year/Adding365DaysPerYear.ql
new file mode 100644
index 00000000000..7171185a11c
--- /dev/null
+++ b/repo-tests/codeql/cpp/ql/src/Likely Bugs/Leap Year/Adding365DaysPerYear.ql
@@ -0,0 +1,22 @@
+/**
+ * @name Arithmetic operation assumes 365 days per year
+ * @description When an arithmetic operation modifies a date by a constant
+ * value of 365, it may be a sign that leap years are not taken
+ * into account.
+ * @kind problem
+ * @problem.severity warning
+ * @id cpp/leap-year/adding-365-days-per-year
+ * @precision medium
+ * @tags leap-year
+ * correctness
+ */
+
+import cpp
+import LeapYear
+import semmle.code.cpp.dataflow.DataFlow
+
+from Expr source, Expr sink, PossibleYearArithmeticOperationCheckConfiguration config
+where config.hasFlow(DataFlow::exprNode(source), DataFlow::exprNode(sink))
+select sink,
+ "This arithmetic operation $@ uses a constant value of 365 ends up modifying the date/time located at $@, without considering leap year scenarios.",
+ source, source.toString(), sink, sink.toString()
diff --git a/repo-tests/codeql/cpp/ql/src/Likely Bugs/Leap Year/LeapYear.qll b/repo-tests/codeql/cpp/ql/src/Likely Bugs/Leap Year/LeapYear.qll
new file mode 100644
index 00000000000..23b66bd94a6
--- /dev/null
+++ b/repo-tests/codeql/cpp/ql/src/Likely Bugs/Leap Year/LeapYear.qll
@@ -0,0 +1,287 @@
+/**
+ * Provides a library for helping create leap year related queries.
+ */
+
+import cpp
+import semmle.code.cpp.dataflow.DataFlow
+import semmle.code.cpp.commons.DateTime
+
+/**
+ * Get the top-level `BinaryOperation` enclosing the expression e.
+ */
+private BinaryOperation getATopLevelBinaryOperationExpression(Expr e) {
+ result = e.getEnclosingElement().(BinaryOperation)
+ or
+ result = getATopLevelBinaryOperationExpression(e.getEnclosingElement())
+}
+
+/**
+ * Holds if the top-level binary operation for expression `e` includes the operator specified in `operator` with an operand specified by `valueToCheck`.
+ */
+private predicate additionalLogicalCheck(Expr e, string operation, int valueToCheck) {
+ exists(BinaryLogicalOperation bo | bo = getATopLevelBinaryOperationExpression(e) |
+ exists(BinaryArithmeticOperation bao | bao = bo.getAChild*() |
+ bao.getAnOperand().getValue().toInt() = valueToCheck and
+ bao.getOperator() = operation
+ )
+ )
+}
+
+/**
+ * An `Operation` that seems to be checking for leap year.
+ */
+class CheckForLeapYearOperation extends Expr {
+ CheckForLeapYearOperation() {
+ exists(BinaryArithmeticOperation bo | bo = this |
+ bo.getAnOperand().getValue().toInt() = 4 and
+ bo.getOperator() = "%" and
+ additionalLogicalCheck(this.getEnclosingElement(), "%", 100) and
+ additionalLogicalCheck(this.getEnclosingElement(), "%", 400)
+ )
+ }
+}
+
+/**
+ * A `YearFieldAccess` that would represent an access to a year field on a struct and is used for arguing about leap year calculations.
+ */
+abstract class LeapYearFieldAccess extends YearFieldAccess {
+ /**
+ * Holds if the field access is a modification,
+ * and it involves an arithmetic operation.
+ */
+ predicate isModifiedByArithmeticOperation() {
+ this.isModified() and
+ exists(Operation op |
+ op.getAnOperand() = this and
+ (
+ op instanceof AssignArithmeticOperation or
+ exists(BinaryArithmeticOperation bao | bao = op.getAnOperand()) or
+ op instanceof CrementOperation
+ )
+ )
+ }
+
+ /**
+ * Holds if the field access is a modification,
+ * and it involves an arithmetic operation.
+ * In order to avoid false positives, the operation does not includes values that are normal for year normalization.
+ *
+ * 1900 - `struct tm` counts years since 1900
+ * 1980/80 - FAT32 epoch
+ */
+ predicate isModifiedByArithmeticOperationNotForNormalization() {
+ this.isModified() and
+ exists(Operation op |
+ op.getAnOperand() = this and
+ (
+ op instanceof AssignArithmeticOperation and
+ not (
+ op.getAChild().getValue().toInt() = 1900
+ or
+ op.getAChild().getValue().toInt() = 2000
+ or
+ op.getAChild().getValue().toInt() = 1980
+ or
+ op.getAChild().getValue().toInt() = 80
+ or
+ // Special case for transforming marshaled 2-digit year date:
+ // theTime.wYear += 100*value;
+ exists(MulExpr mulBy100 | mulBy100 = op.getAChild() |
+ mulBy100.getAChild().getValue().toInt() = 100
+ )
+ )
+ or
+ exists(BinaryArithmeticOperation bao |
+ bao = op.getAnOperand() and
+ // we're specifically interested in calculations that update the existing
+ // value (like `x = x + 1`), so look for a child `YearFieldAccess`.
+ bao.getAChild*() instanceof YearFieldAccess and
+ not (
+ bao.getAChild().getValue().toInt() = 1900
+ or
+ bao.getAChild().getValue().toInt() = 2000
+ or
+ bao.getAChild().getValue().toInt() = 1980
+ or
+ bao.getAChild().getValue().toInt() = 80
+ or
+ // Special case for transforming marshaled 2-digit year date:
+ // theTime.wYear += 100*value;
+ exists(MulExpr mulBy100 | mulBy100 = op.getAChild() |
+ mulBy100.getAChild().getValue().toInt() = 100
+ )
+ )
+ )
+ or
+ op instanceof CrementOperation
+ )
+ )
+ }
+
+ /**
+ * Holds if the top-level binary operation includes a modulus operator with an operand specified by `valueToCheck`.
+ */
+ predicate additionalModulusCheckForLeapYear(int valueToCheck) {
+ additionalLogicalCheck(this, "%", valueToCheck)
+ }
+
+ /**
+ * Holds if the top-level binary operation includes an addition or subtraction operator with an operand specified by `valueToCheck`.
+ */
+ predicate additionalAdditionOrSubstractionCheckForLeapYear(int valueToCheck) {
+ additionalLogicalCheck(this, "+", valueToCheck) or
+ additionalLogicalCheck(this, "-", valueToCheck)
+ }
+
+ /**
+ * Holds if this object is used on a modulus 4 operation, which would likely indicate the start of a leap year check.
+ */
+ predicate isUsedInMod4Operation() {
+ not this.isModified() and
+ exists(BinaryArithmeticOperation bo |
+ bo.getAnOperand() = this and
+ bo.getAnOperand().getValue().toInt() = 4 and
+ bo.getOperator() = "%"
+ )
+ }
+
+ /**
+ * Holds if this object seems to be used in a valid gregorian calendar leap year check.
+ */
+ predicate isUsedInCorrectLeapYearCheck() {
+ // The Gregorian leap year rule is:
+ // Every year that is exactly divisible by four is a leap year,
+ // except for years that are exactly divisible by 100,
+ // but these centurial years are leap years if they are exactly divisible by 400
+ //
+ // https://aa.usno.navy.mil/faq/docs/calendars.php
+ this.isUsedInMod4Operation() and
+ additionalModulusCheckForLeapYear(400) and
+ additionalModulusCheckForLeapYear(100)
+ }
+}
+
+/**
+ * `YearFieldAccess` for the `SYSTEMTIME` struct.
+ */
+class StructSystemTimeLeapYearFieldAccess extends LeapYearFieldAccess {
+ StructSystemTimeLeapYearFieldAccess() { this.getTarget().getName() = "wYear" }
+}
+
+/**
+ * `YearFieldAccess` for `struct tm`.
+ */
+class StructTmLeapYearFieldAccess extends LeapYearFieldAccess {
+ StructTmLeapYearFieldAccess() { this.getTarget().getName() = "tm_year" }
+
+ override predicate isUsedInCorrectLeapYearCheck() {
+ this.isUsedInMod4Operation() and
+ additionalModulusCheckForLeapYear(400) and
+ additionalModulusCheckForLeapYear(100) and
+ // tm_year represents years since 1900
+ (
+ additionalAdditionOrSubstractionCheckForLeapYear(1900)
+ or
+ // some systems may use 2000 for 2-digit year conversions
+ additionalAdditionOrSubstractionCheckForLeapYear(2000)
+ or
+ // converting from/to Unix epoch
+ additionalAdditionOrSubstractionCheckForLeapYear(1970)
+ )
+ }
+}
+
+/**
+ * `Function` that includes an operation that is checking for leap year.
+ */
+class ChecksForLeapYearFunction extends Function {
+ ChecksForLeapYearFunction() { this = any(CheckForLeapYearOperation clyo).getEnclosingFunction() }
+}
+
+/**
+ * `FunctionCall` that includes an operation that is checking for leap year.
+ */
+class ChecksForLeapYearFunctionCall extends FunctionCall {
+ ChecksForLeapYearFunctionCall() { this.getTarget() instanceof ChecksForLeapYearFunction }
+}
+
+/**
+ * `DataFlow::Configuration` for finding a variable access that would flow into
+ * a function call that includes an operation to check for leap year.
+ */
+class LeapYearCheckConfiguration extends DataFlow::Configuration {
+ LeapYearCheckConfiguration() { this = "LeapYearCheckConfiguration" }
+
+ override predicate isSource(DataFlow::Node source) {
+ exists(VariableAccess va | va = source.asExpr())
+ }
+
+ override predicate isSink(DataFlow::Node sink) {
+ exists(ChecksForLeapYearFunctionCall fc | sink.asExpr() = fc.getAnArgument())
+ }
+}
+
+/**
+ * `DataFlow::Configuration` for finding an operation with hardcoded 365 that will flow into a `FILEINFO` field.
+ */
+class FiletimeYearArithmeticOperationCheckConfiguration extends DataFlow::Configuration {
+ FiletimeYearArithmeticOperationCheckConfiguration() {
+ this = "FiletimeYearArithmeticOperationCheckConfiguration"
+ }
+
+ override predicate isSource(DataFlow::Node source) {
+ exists(Expr e, Operation op | e = source.asExpr() |
+ op.getAChild*().getValue().toInt() = 365 and
+ op.getAChild*() = e
+ )
+ }
+
+ override predicate isSink(DataFlow::Node sink) {
+ exists(StructLikeClass dds, FieldAccess fa, AssignExpr aexpr, Expr e | e = sink.asExpr() |
+ dds instanceof PackedTimeType and
+ fa.getQualifier().getUnderlyingType() = dds and
+ fa.isModified() and
+ aexpr.getAChild() = fa and
+ aexpr.getChild(1).getAChild*() = e
+ )
+ }
+}
+
+/**
+ * `DataFlow::Configuration` for finding an operation with hardcoded 365 that will flow into any known date/time field.
+ */
+class PossibleYearArithmeticOperationCheckConfiguration extends DataFlow::Configuration {
+ PossibleYearArithmeticOperationCheckConfiguration() {
+ this = "PossibleYearArithmeticOperationCheckConfiguration"
+ }
+
+ override predicate isSource(DataFlow::Node source) {
+ exists(Operation op | op = source.asExpr() |
+ op.getAChild*().getValue().toInt() = 365 and
+ not op.getParent() instanceof Expr
+ )
+ }
+
+ override predicate isAdditionalFlowStep(DataFlow::Node node1, DataFlow::Node node2) {
+ // flow from anything on the RHS of an assignment to a time/date structure to that
+ // assignment.
+ exists(StructLikeClass dds, FieldAccess fa, AssignExpr aexpr, Expr e |
+ e = node1.asExpr() and
+ aexpr = node2.asExpr()
+ |
+ (dds instanceof PackedTimeType or dds instanceof UnpackedTimeType) and
+ fa.getQualifier().getUnderlyingType() = dds and
+ aexpr.getLValue() = fa and
+ aexpr.getRValue().getAChild*() = e
+ )
+ }
+
+ override predicate isSink(DataFlow::Node sink) {
+ exists(StructLikeClass dds, FieldAccess fa, AssignExpr aexpr | aexpr = sink.asExpr() |
+ (dds instanceof PackedTimeType or dds instanceof UnpackedTimeType) and
+ fa.getQualifier().getUnderlyingType() = dds and
+ fa.isModified() and
+ aexpr.getLValue() = fa
+ )
+ }
+}
diff --git a/repo-tests/codeql/cpp/ql/src/Likely Bugs/Leap Year/UncheckedLeapYearAfterYearModification.ql b/repo-tests/codeql/cpp/ql/src/Likely Bugs/Leap Year/UncheckedLeapYearAfterYearModification.ql
new file mode 100644
index 00000000000..7668ca71463
--- /dev/null
+++ b/repo-tests/codeql/cpp/ql/src/Likely Bugs/Leap Year/UncheckedLeapYearAfterYearModification.ql
@@ -0,0 +1,64 @@
+/**
+ * @name Year field changed using an arithmetic operation without checking for leap year
+ * @description A field that represents a year is being modified by an arithmetic operation, but no proper check for leap years can be detected afterwards.
+ * @kind problem
+ * @problem.severity warning
+ * @id cpp/leap-year/unchecked-after-arithmetic-year-modification
+ * @precision medium
+ * @tags leap-year
+ * correctness
+ */
+
+import cpp
+import LeapYear
+
+from Variable var, LeapYearFieldAccess yfa
+where
+ exists(VariableAccess va |
+ yfa.getQualifier() = va and
+ var.getAnAccess() = va and
+ // The year is modified with an arithmetic operation. Avoid values that are likely false positives
+ yfa.isModifiedByArithmeticOperationNotForNormalization() and
+ // Avoid false positives
+ not (
+ // If there is a local check for leap year after the modification
+ exists(LeapYearFieldAccess yfacheck |
+ yfacheck.getQualifier() = var.getAnAccess() and
+ yfacheck.isUsedInCorrectLeapYearCheck() and
+ yfacheck.getBasicBlock() = yfa.getBasicBlock().getASuccessor*()
+ )
+ or
+ // If there is a data flow from the variable that was modified to a function that seems to check for leap year
+ exists(
+ VariableAccess source, ChecksForLeapYearFunctionCall fc, LeapYearCheckConfiguration config
+ |
+ source = var.getAnAccess() and
+ config.hasFlow(DataFlow::exprNode(source), DataFlow::exprNode(fc.getAnArgument()))
+ )
+ or
+ // If there is a data flow from the field that was modified to a function that seems to check for leap year
+ exists(
+ VariableAccess vacheck, YearFieldAccess yfacheck, ChecksForLeapYearFunctionCall fc,
+ LeapYearCheckConfiguration config
+ |
+ vacheck = var.getAnAccess() and
+ yfacheck.getQualifier() = vacheck and
+ config.hasFlow(DataFlow::exprNode(yfacheck), DataFlow::exprNode(fc.getAnArgument()))
+ )
+ or
+ // If there is a successor or predecessor that sets the month = 1
+ exists(MonthFieldAccess mfa, AssignExpr ae |
+ mfa.getQualifier() = var.getAnAccess() and
+ mfa.isModified() and
+ (
+ mfa.getBasicBlock() = yfa.getBasicBlock().getASuccessor*() or
+ yfa.getBasicBlock() = mfa.getBasicBlock().getASuccessor+()
+ ) and
+ ae = mfa.getEnclosingElement() and
+ ae.getAnOperand().getValue().toInt() = 1
+ )
+ )
+ )
+select yfa,
+ "Field $@ on variable $@ has been modified, but no appropriate check for LeapYear was found.",
+ yfa.getTarget(), yfa.getTarget().toString(), var, var.toString()
diff --git a/repo-tests/codeql/cpp/ql/src/Likely Bugs/Leap Year/UncheckedReturnValueForTimeFunctions.ql b/repo-tests/codeql/cpp/ql/src/Likely Bugs/Leap Year/UncheckedReturnValueForTimeFunctions.ql
new file mode 100644
index 00000000000..af02a2814a2
--- /dev/null
+++ b/repo-tests/codeql/cpp/ql/src/Likely Bugs/Leap Year/UncheckedReturnValueForTimeFunctions.ql
@@ -0,0 +1,108 @@
+/**
+ * @name Unchecked return value for time conversion function
+ * @description When the return value of a fallible time conversion function is
+ * not checked for failure, its output parameters may contain
+ * invalid dates.
+ * @kind problem
+ * @problem.severity warning
+ * @id cpp/leap-year/unchecked-return-value-for-time-conversion-function
+ * @precision medium
+ * @tags leap-year
+ * correctness
+ */
+
+import cpp
+import LeapYear
+
+/**
+ * A `YearFieldAccess` that is modifying the year by any arithmetic operation.
+ *
+ * NOTE:
+ * To change this class to work for general purpose date transformations that do not check the return value,
+ * make the following changes:
+ * - change `extends LeapYearFieldAccess` to `extends FieldAccess`.
+ * - change `this.isModifiedByArithmeticOperation()` to `this.isModified()`.
+ * Expect a lower precision for a general purpose version.
+ */
+class DateStructModifiedFieldAccess extends LeapYearFieldAccess {
+ DateStructModifiedFieldAccess() {
+ exists(Field f, StructLikeClass struct |
+ f.getAnAccess() = this and
+ struct.getAField() = f and
+ struct.getUnderlyingType() instanceof UnpackedTimeType and
+ this.isModifiedByArithmeticOperation()
+ )
+ }
+}
+
+/**
+ * This is a list of APIs that will get the system time, and therefore guarantee that the value is valid.
+ */
+class SafeTimeGatheringFunction extends Function {
+ SafeTimeGatheringFunction() {
+ this.getQualifiedName() = ["GetFileTime", "GetSystemTime", "NtQuerySystemTime"]
+ }
+}
+
+/**
+ * This list of APIs should check for the return value to detect problems during the conversion.
+ */
+class TimeConversionFunction extends Function {
+ TimeConversionFunction() {
+ this.getQualifiedName() =
+ [
+ "FileTimeToSystemTime", "SystemTimeToFileTime", "SystemTimeToTzSpecificLocalTime",
+ "SystemTimeToTzSpecificLocalTimeEx", "TzSpecificLocalTimeToSystemTime",
+ "TzSpecificLocalTimeToSystemTimeEx", "RtlLocalTimeToSystemTime",
+ "RtlTimeToSecondsSince1970", "_mkgmtime"
+ ]
+ }
+}
+
+from FunctionCall fcall, TimeConversionFunction trf, Variable var
+where
+ fcall = trf.getACallToThisFunction() and
+ fcall instanceof ExprInVoidContext and
+ var.getUnderlyingType() instanceof UnpackedTimeType and
+ (
+ exists(AddressOfExpr aoe |
+ aoe = fcall.getAnArgument() and
+ aoe.getAddressable() = var
+ )
+ or
+ exists(VariableAccess va |
+ fcall.getAnArgument() = va and
+ var.getAnAccess() = va
+ )
+ ) and
+ exists(DateStructModifiedFieldAccess dsmfa, VariableAccess modifiedVarAccess |
+ modifiedVarAccess = var.getAnAccess() and
+ modifiedVarAccess = dsmfa.getQualifier() and
+ modifiedVarAccess = fcall.getAPredecessor*()
+ ) and
+ // Remove false positives
+ not (
+ // Remove any instance where the predecessor is a SafeTimeGatheringFunction and no change to the data happened in between
+ exists(FunctionCall pred |
+ pred = fcall.getAPredecessor*() and
+ exists(SafeTimeGatheringFunction stgf | pred = stgf.getACallToThisFunction()) and
+ not exists(DateStructModifiedFieldAccess dsmfa, VariableAccess modifiedVarAccess |
+ modifiedVarAccess = var.getAnAccess() and
+ modifiedVarAccess = dsmfa.getQualifier() and
+ modifiedVarAccess = fcall.getAPredecessor*() and
+ modifiedVarAccess = pred.getASuccessor*()
+ )
+ )
+ or
+ // Remove any instance where the year is changed, but the month is set to 1 (year wrapping)
+ exists(MonthFieldAccess mfa, AssignExpr ae |
+ mfa.getQualifier() = var.getAnAccess() and
+ mfa.isModified() and
+ mfa = fcall.getAPredecessor*() and
+ ae = mfa.getEnclosingElement() and
+ ae.getAnOperand().getValue().toInt() = 1
+ )
+ )
+select fcall,
+ "Return value of $@ function should be verified to check for any error because variable $@ is not guaranteed to be safe.",
+ trf, trf.getQualifiedName().toString(), var, var.getName()
diff --git a/repo-tests/codeql/cpp/ql/src/Likely Bugs/Leap Year/UnsafeArrayForDaysOfYear.ql b/repo-tests/codeql/cpp/ql/src/Likely Bugs/Leap Year/UnsafeArrayForDaysOfYear.ql
new file mode 100644
index 00000000000..b27db937b57
--- /dev/null
+++ b/repo-tests/codeql/cpp/ql/src/Likely Bugs/Leap Year/UnsafeArrayForDaysOfYear.ql
@@ -0,0 +1,41 @@
+/**
+ * @name Unsafe array for days of the year
+ * @description An array of 365 items typically indicates one entry per day of the year, but without considering leap years, which would be 366 days.
+ * An access on a leap year could result in buffer overflow bugs.
+ * @kind problem
+ * @problem.severity warning
+ * @id cpp/leap-year/unsafe-array-for-days-of-the-year
+ * @precision low
+ * @tags security
+ * leap-year
+ */
+
+import cpp
+
+class LeapYearUnsafeDaysOfTheYearArrayType extends ArrayType {
+ LeapYearUnsafeDaysOfTheYearArrayType() { this.getArraySize() = 365 }
+}
+
+from Element element, string allocType
+where
+ exists(NewArrayExpr nae |
+ element = nae and
+ nae.getAllocatedType() instanceof LeapYearUnsafeDaysOfTheYearArrayType and
+ allocType = "an array allocation"
+ )
+ or
+ exists(Variable var |
+ var = element and
+ var.getType() instanceof LeapYearUnsafeDaysOfTheYearArrayType and
+ allocType = "an array allocation"
+ )
+ or
+ exists(ConstructorCall cc |
+ element = cc and
+ cc.getTarget().hasName("vector") and
+ cc.getArgument(0).getValue().toInt() = 365 and
+ allocType = "a std::vector allocation"
+ )
+select element,
+ "There is " + allocType +
+ " with a hard-coded set of 365 elements, which may indicate the number of days in a year without considering leap year scenarios."
diff --git a/repo-tests/codeql/cpp/ql/src/Likely Bugs/Likely Typos/AssignWhereCompareMeant.ql b/repo-tests/codeql/cpp/ql/src/Likely Bugs/Likely Typos/AssignWhereCompareMeant.ql
new file mode 100644
index 00000000000..e60b763bc53
--- /dev/null
+++ b/repo-tests/codeql/cpp/ql/src/Likely Bugs/Likely Typos/AssignWhereCompareMeant.ql
@@ -0,0 +1,119 @@
+/**
+ * @name Assignment where comparison was intended
+ * @description The '=' operator may have been used accidentally, where '=='
+ * was intended.
+ * @kind problem
+ * @problem.severity error
+ * @precision high
+ * @id cpp/assign-where-compare-meant
+ * @tags reliability
+ * correctness
+ * external/cwe/cwe-481
+ */
+
+import cpp
+import semmle.code.cpp.controlflow.StackVariableReachability
+
+class UndefReachability extends StackVariableReachability {
+ UndefReachability() { this = "UndefReachability" }
+
+ override predicate isSource(ControlFlowNode node, StackVariable v) {
+ candidateVariable(v) and
+ node = v.getParentScope() and
+ not v instanceof Parameter and
+ not v.hasInitializer()
+ }
+
+ override predicate isSink(ControlFlowNode node, StackVariable v) {
+ candidateVariable(v) and
+ node = v.getAnAccess()
+ }
+
+ override predicate isBarrier(ControlFlowNode node, StackVariable v) {
+ node.(AssignExpr).getLValue() = v.getAnAccess()
+ }
+}
+
+abstract class BooleanControllingAssignment extends AssignExpr {
+ abstract predicate isWhitelisted();
+}
+
+/**
+ * Gets an operand of a logical operation expression (we need the restriction
+ * to BinaryLogicalOperation expressions to get the correct transitive closure).
+ */
+Expr getComparisonOperand(BinaryLogicalOperation op) { result = op.getAnOperand() }
+
+class BooleanControllingAssignmentInExpr extends BooleanControllingAssignment {
+ BooleanControllingAssignmentInExpr() {
+ this.getParent() instanceof UnaryLogicalOperation or
+ this.getParent() instanceof BinaryLogicalOperation or
+ exists(ConditionalExpr c | c.getCondition() = this)
+ }
+
+ override predicate isWhitelisted() {
+ this.getConversion().(ParenthesisExpr).isParenthesised()
+ or
+ // Allow this assignment if all comparison operations in the expression that this
+ // assignment is part of, are not parenthesized. In that case it seems like programmer
+ // is fine with unparenthesized comparison operands to binary logical operators, and
+ // the parenthesis around this assignment was used to call it out as an assignment.
+ this.isParenthesised() and
+ forex(ComparisonOperation op | op = getComparisonOperand*(this.getParent+()) |
+ not op.isParenthesised()
+ )
+ or
+ // Match a pattern like:
+ // ```
+ // if((a = b) && use_value(a)) { ... }
+ // ```
+ // where the assignment is meant to update the value of `a` before it's used in some other boolean
+ // subexpression that is guarenteed to be evaluate _after_ the assignment.
+ this.isParenthesised() and
+ exists(LogicalAndExpr parent, Variable var, VariableAccess access |
+ var = this.getLValue().(VariableAccess).getTarget() and
+ access = var.getAnAccess() and
+ not access.isUsedAsLValue() and
+ parent.getRightOperand() = access.getParent*() and
+ parent.getLeftOperand() = this.getParent*()
+ )
+ }
+}
+
+class BooleanControllingAssignmentInStmt extends BooleanControllingAssignment {
+ BooleanControllingAssignmentInStmt() {
+ exists(IfStmt i | i.getCondition() = this) or
+ exists(ForStmt f | f.getCondition() = this) or
+ exists(WhileStmt w | w.getCondition() = this) or
+ exists(DoStmt d | d.getCondition() = this)
+ }
+
+ override predicate isWhitelisted() { this.isParenthesised() }
+}
+
+/**
+ * Holds if `ae` is a `BooleanControllingAssignment` that would be a result of this query,
+ * before checking for undef reachability.
+ */
+predicate candidateResult(BooleanControllingAssignment ae) {
+ ae.getRValue().isConstant() and
+ not ae.isWhitelisted() and
+ not ae.getRValue() instanceof StringLiteral
+}
+
+/**
+ * Holds if `v` is a `Variable` that might be assigned to in a result of this query.
+ */
+predicate candidateVariable(Variable v) {
+ exists(BooleanControllingAssignment ae |
+ candidateResult(ae) and
+ ae.getLValue().(VariableAccess).getTarget() = v
+ )
+}
+
+from BooleanControllingAssignment ae, UndefReachability undef
+where
+ candidateResult(ae) and
+ not ae.isFromUninstantiatedTemplate(_) and
+ not undef.reaches(_, ae.getLValue().(VariableAccess).getTarget(), ae.getLValue())
+select ae, "Use of '=' where '==' may have been intended."
diff --git a/repo-tests/codeql/cpp/ql/src/Likely Bugs/Likely Typos/BoolValueInBitOp.ql b/repo-tests/codeql/cpp/ql/src/Likely Bugs/Likely Typos/BoolValueInBitOp.ql
new file mode 100644
index 00000000000..3ece1f844d3
--- /dev/null
+++ b/repo-tests/codeql/cpp/ql/src/Likely Bugs/Likely Typos/BoolValueInBitOp.ql
@@ -0,0 +1,47 @@
+/**
+ * @name Boolean value in bitwise operation
+ * @description A Boolean value (i.e. something that has been coerced to have
+ * a value of either 0 or 1) is used in a bitwise operation.
+ * This commonly indicates missing parentheses or mistyping
+ * logical operators as bitwise operators.
+ * @kind problem
+ * @id cpp/bool-value-in-bit-op
+ * @problem.severity warning
+ * @tags reliability
+ */
+
+import cpp
+
+class BitwiseOperation extends Expr {
+ BitwiseOperation() {
+ this instanceof BinaryBitwiseOperation or
+ this instanceof UnaryBitwiseOperation
+ }
+}
+
+class LogicalOperation extends Expr {
+ LogicalOperation() {
+ this instanceof BinaryLogicalOperation or
+ this instanceof UnaryLogicalOperation or
+ this instanceof ComparisonOperation
+ }
+}
+
+/**
+ * It's common in some projects to use "non-short-circuit logic", i.e. to
+ * apply the bitwise and, or and xor operators to Boolean values. Such use,
+ * while considered bad practice, is usually not incorrect.
+ */
+predicate nonShortCircuitLogic2(BinaryBitwiseOperation op) {
+ (op instanceof BitwiseAndExpr or op instanceof BitwiseOrExpr or op instanceof BitwiseXorExpr) and
+ (op.getLeftOperand() instanceof LogicalOperation or nonShortCircuitLogic2(op.getLeftOperand())) and
+ (op.getRightOperand() instanceof LogicalOperation or nonShortCircuitLogic2(op.getRightOperand()))
+}
+
+from LogicalOperation o
+where
+ o.getParent() instanceof BitwiseOperation and
+ not nonShortCircuitLogic2(o.getParent()) and
+ not o.getParent().isInMacroExpansion() and // It's ok if o itself is in a macro expansion.
+ not o.getParent().(LShiftExpr).getLeftOperand() = o // Common pattern for producing bit masks: "(a && b) << 16".
+select o, "The result of this expression is Boolean, but it is used in a bitwise context."
diff --git a/repo-tests/codeql/cpp/ql/src/Likely Bugs/Likely Typos/CompareWhereAssignMeant.ql b/repo-tests/codeql/cpp/ql/src/Likely Bugs/Likely Typos/CompareWhereAssignMeant.ql
new file mode 100644
index 00000000000..402f26d875e
--- /dev/null
+++ b/repo-tests/codeql/cpp/ql/src/Likely Bugs/Likely Typos/CompareWhereAssignMeant.ql
@@ -0,0 +1,24 @@
+/**
+ * @name Comparison where assignment was intended
+ * @description The '==' operator may have been used accidentally, where '='
+ * was intended, resulting in a useless test.
+ * @kind problem
+ * @problem.severity error
+ * @precision high
+ * @id cpp/compare-where-assign-meant
+ * @tags reliability
+ * correctness
+ * external/cwe/cwe-482
+ */
+
+import cpp
+
+from ExprInVoidContext op
+where
+ not op.isUnevaluated() and
+ (
+ op instanceof EQExpr
+ or
+ op.(FunctionCall).getTarget().hasName("operator==")
+ )
+select op, "This '==' operator has no effect. The assignment ('=') operator was probably intended."
diff --git a/repo-tests/codeql/cpp/ql/src/Likely Bugs/Likely Typos/DubiousNullCheck.ql b/repo-tests/codeql/cpp/ql/src/Likely Bugs/Likely Typos/DubiousNullCheck.ql
new file mode 100644
index 00000000000..819ecbae8ec
--- /dev/null
+++ b/repo-tests/codeql/cpp/ql/src/Likely Bugs/Likely Typos/DubiousNullCheck.ql
@@ -0,0 +1,47 @@
+/**
+ * @name Dubious NULL check
+ * @description The address of a field (except the first) will never be NULL,
+ * so it is misleading, at best, to check for that case.
+ * @kind problem
+ * @problem.severity warning
+ * @precision very-high
+ * @id cpp/dubious-null-check
+ * @tags reliability
+ * readability
+ */
+
+import cpp
+
+predicate zeroComparison(EqualityOperation e) {
+ exists(Expr zero | zero.getValue() = "0" |
+ zero = e.getLeftOperand() or
+ zero = e.getRightOperand()
+ )
+}
+
+predicate inNullContext(AddressOfExpr e) {
+ e.getFullyConverted().getUnderlyingType() instanceof BoolType
+ or
+ exists(ControlStructure c | c.getControllingExpr() = e)
+ or
+ exists(EqualityOperation cmp | zeroComparison(cmp) |
+ e = cmp.getLeftOperand() or
+ e = cmp.getRightOperand()
+ )
+}
+
+FieldAccess chainedFields(FieldAccess fa) {
+ result = fa or
+ result = chainedFields(fa.getQualifier())
+}
+
+from AddressOfExpr addrof, FieldAccess fa, Variable v, int offset
+where
+ fa = addrof.getOperand() and
+ inNullContext(addrof) and
+ not addrof.isInMacroExpansion() and
+ v.getAnAccess() = chainedFields(fa).getQualifier() and
+ not v instanceof MemberVariable and
+ offset = strictsum(chainedFields(fa).getTarget().getByteOffset()) and
+ offset != 0
+select addrof, "This will only be NULL if " + v.getName() + " == -" + offset + "."
diff --git a/repo-tests/codeql/cpp/ql/src/Likely Bugs/Likely Typos/ExprHasNoEffect.ql b/repo-tests/codeql/cpp/ql/src/Likely Bugs/Likely Typos/ExprHasNoEffect.ql
new file mode 100644
index 00000000000..d3a7dcb8939
--- /dev/null
+++ b/repo-tests/codeql/cpp/ql/src/Likely Bugs/Likely Typos/ExprHasNoEffect.ql
@@ -0,0 +1,104 @@
+/**
+ * @name Expression has no effect
+ * @description A pure expression whose value is ignored is likely to be the
+ * result of a typo.
+ * @kind problem
+ * @problem.severity warning
+ * @precision high
+ * @id cpp/useless-expression
+ * @tags maintainability
+ * correctness
+ * external/cwe/cwe-561
+ */
+
+import cpp
+private import semmle.code.cpp.commons.Exclusions
+
+class PureExprInVoidContext extends ExprInVoidContext {
+ PureExprInVoidContext() { this.isPure() }
+}
+
+// loop variable mentioned in the init stmt of a for
+predicate accessInInitOfForStmt(Expr e) {
+ e instanceof Access and
+ exists(ForStmt f, ExprStmt s |
+ f.getInitialization() = s and
+ s.getExpr() = e
+ )
+}
+
+/**
+ * Holds if the function `f`, or a function called by it, contains
+ * code excluded by the preprocessor.
+ */
+predicate functionContainsDisabledCodeRecursive(Function f) {
+ functionContainsDisabledCode(f)
+ or
+ // recurse into function calls
+ exists(FunctionCall fc |
+ fc.getEnclosingFunction() = f and
+ functionContainsDisabledCodeRecursive(fc.getTarget())
+ )
+}
+
+/**
+ * Holds if the function `f`, or a function called by it, is inside a
+ * preprocessor branch that may have code in another arm
+ */
+predicate functionDefinedInIfDefRecursive(Function f) {
+ functionDefinedInIfDef(f)
+ or
+ // recurse into function calls
+ exists(FunctionCall fc |
+ fc.getEnclosingFunction() = f and
+ functionDefinedInIfDefRecursive(fc.getTarget())
+ )
+}
+
+/**
+ * Holds if `call` has the form `B::f()` or `q.B::f()`, where `B` is a base
+ * class of the class containing `call`.
+ *
+ * This is most often used for calling base-class functions from within
+ * overrides. Those functions may have no side effect in the current
+ * implementation, but we should not advise callers to rely on this. That would
+ * break encapsulation.
+ */
+predicate baseCall(FunctionCall call) {
+ call.getNameQualifier().getQualifyingElement() =
+ call.getEnclosingFunction().getDeclaringType().(Class).getABaseClass+()
+}
+
+from PureExprInVoidContext peivc, Locatable parent, Locatable info, string info_text, string tail
+where
+ // EQExprs are covered by CompareWhereAssignMeant.ql
+ not peivc instanceof EQExpr and
+ // as is operator==
+ not peivc.(FunctionCall).getTarget().hasName("operator==") and
+ not baseCall(peivc) and
+ not accessInInitOfForStmt(peivc) and
+ not peivc.isCompilerGenerated() and
+ not peivc.getEnclosingFunction().isDefaulted() and
+ not exists(Macro m | peivc = m.getAnInvocation().getAnExpandedElement()) and
+ not peivc.isFromTemplateInstantiation(_) and
+ not peivc.isFromUninstantiatedTemplate(_) and
+ parent = peivc.getParent() and
+ not parent.isInMacroExpansion() and
+ not peivc.isUnevaluated() and
+ not parent instanceof PureExprInVoidContext and
+ not peivc.getEnclosingFunction().isCompilerGenerated() and
+ not peivc.getType() instanceof UnknownType and
+ not functionContainsDisabledCodeRecursive(peivc.(FunctionCall).getTarget()) and
+ not functionDefinedInIfDefRecursive(peivc.(FunctionCall).getTarget()) and
+ if peivc instanceof FunctionCall
+ then
+ exists(Function target |
+ target = peivc.(FunctionCall).getTarget() and
+ info = target and
+ info_text = target.getName() and
+ tail = " (because $@ has no external side effects)."
+ )
+ else (
+ tail = "." and info = peivc and info_text = ""
+ )
+select peivc, "This expression has no effect" + tail, info, info_text
diff --git a/repo-tests/codeql/cpp/ql/src/Likely Bugs/Likely Typos/FutileConditional.ql b/repo-tests/codeql/cpp/ql/src/Likely Bugs/Likely Typos/FutileConditional.ql
new file mode 100644
index 00000000000..bf724de3d70
--- /dev/null
+++ b/repo-tests/codeql/cpp/ql/src/Likely Bugs/Likely Typos/FutileConditional.ql
@@ -0,0 +1,60 @@
+/**
+ * @name Futile conditional
+ * @description An if-statement with an empty then-branch and no else-branch
+ * may indicate that the code is incomplete.
+ * @kind problem
+ * @problem.severity recommendation
+ * @precision high
+ * @id cpp/empty-if
+ * @tags reliability
+ * readability
+ */
+
+import cpp
+
+predicate macroUse(Locatable l) {
+ l instanceof PreprocessorDirective or l instanceof MacroInvocation
+}
+
+predicate macroUseLocation(File f, int start, int end) {
+ exists(Locatable l, Location loc |
+ macroUse(l) and
+ loc = l.getLocation() and
+ f = loc.getFile() and
+ start = loc.getStartLine() and
+ end = loc.getEndLine()
+ )
+}
+
+pragma[noopt]
+predicate emptyIf(IfStmt s, BlockStmt b, File f, int start, int end) {
+ s instanceof IfStmt and
+ not exists(s.getElse()) and
+ b = s.getThen() and
+ b instanceof BlockStmt and
+ not exists(b.getAChild()) and
+ f = b.getFile() and
+ exists(Location l |
+ l = b.getLocation() and
+ start = l.getStartLine() and
+ end = l.getEndLine()
+ )
+}
+
+pragma[noopt]
+predicate query(IfStmt s, BlockStmt b) {
+ exists(File f, int blockStart, int blockEnd |
+ emptyIf(s, b, f, blockStart, blockEnd) and
+ not exists(int macroStart, int macroEnd |
+ macroUseLocation(f, macroStart, macroEnd) and
+ macroStart > blockStart and
+ macroEnd < blockEnd
+ )
+ )
+}
+
+from IfStmt s, BlockStmt b
+where
+ query(s, b) and
+ not b.isInMacroExpansion()
+select s, "If-statement with an empty then-branch and no else-branch."
diff --git a/repo-tests/codeql/cpp/ql/src/Likely Bugs/Likely Typos/IncorrectNotOperatorUsage.ql b/repo-tests/codeql/cpp/ql/src/Likely Bugs/Likely Typos/IncorrectNotOperatorUsage.ql
new file mode 100644
index 00000000000..30664869adc
--- /dev/null
+++ b/repo-tests/codeql/cpp/ql/src/Likely Bugs/Likely Typos/IncorrectNotOperatorUsage.ql
@@ -0,0 +1,36 @@
+/**
+ * @name Incorrect 'not' operator usage
+ * @description Usage of a logical-not (!) operator as an operand for a bit-wise operation.
+ * This commonly indicates the usage of an incorrect operator instead of the bit-wise not (~) operator,
+ * also known as ones' complement operator.
+ * @kind problem
+ * @id cpp/incorrect-not-operator-usage
+ * @problem.severity warning
+ * @security-severity 7.5
+ * @precision medium
+ * @tags security
+ * external/cwe/cwe-480
+ * external/microsoft/c6317
+ */
+
+import cpp
+
+/**
+ * It's common in some projects to use "a double negation" to normalize the boolean
+ * result to either 1 or 0.
+ * This predciate is intended to filter explicit usage of a double negation as it typically
+ * indicates the explicit purpose to normalize the result for bit-wise or arithmetic purposes.
+ */
+predicate doubleNegationNormalization(NotExpr notexpr) { notexpr.getAnOperand() instanceof NotExpr }
+
+from BinaryBitwiseOperation binbitwop
+where
+ exists(NotExpr notexpr |
+ binbitwop.getAnOperand() = notexpr and
+ not doubleNegationNormalization(notexpr) and
+ (
+ binbitwop instanceof BitwiseAndExpr or
+ binbitwop instanceof BitwiseOrExpr
+ )
+ )
+select binbitwop, "Usage of a logical not (!) expression as a bitwise operator."
diff --git a/repo-tests/codeql/cpp/ql/src/Likely Bugs/Likely Typos/LogicalExprCouldBeSimplified.ql b/repo-tests/codeql/cpp/ql/src/Likely Bugs/Likely Typos/LogicalExprCouldBeSimplified.ql
new file mode 100644
index 00000000000..313fc2c6b1c
--- /dev/null
+++ b/repo-tests/codeql/cpp/ql/src/Likely Bugs/Likely Typos/LogicalExprCouldBeSimplified.ql
@@ -0,0 +1,56 @@
+/**
+ * @name Logical expression could be simplified
+ * @description When a logical expression can be easily simplified, there may
+ * be an opportunity to improve readability by doing so, or it may
+ * indicate that the code contains a typo.
+ * @kind problem
+ * @id cpp/logical-expr-could-be-simplified
+ * @problem.severity warning
+ * @tags maintainability
+ */
+
+import cpp
+
+/**
+ * A simple literal (i.e. not a macro expansion, enum constant
+ * or template argument).
+ */
+predicate simple(Literal l) {
+ l instanceof OctalLiteral or
+ l instanceof HexLiteral or
+ l instanceof CharLiteral or
+ l.getValueText() = "true" or
+ l.getValueText() = "false" or
+ // Parsing doubles is too slow...
+ //exists(l.getValueText().toFloat())
+ // Instead, check whether the literal starts with a letter.
+ not l.getValueText().regexpMatch("[a-zA-Z_].*")
+}
+
+predicate booleanLiteral(Literal l) {
+ simple(l) and
+ (l.getValue() = "0" or l.getValue() = "1" or l.getValue() = "true" or l.getValue() = "false")
+}
+
+string boolLiteralInLogicalOp(Literal literal) {
+ booleanLiteral(literal) and
+ literal.getParent() instanceof BinaryLogicalOperation and
+ result =
+ "Literal value " + literal.getValueText() +
+ " is used in a logical expression; simplify or use a constant."
+}
+
+string comparisonOnLiterals(ComparisonOperation op) {
+ simple(op.getLeftOperand()) and
+ simple(op.getRightOperand()) and
+ not op.getAnOperand().isInMacroExpansion() and
+ if exists(op.getValue())
+ then result = "This comparison involves two literals and is always " + op.getValue() + "."
+ else result = "This comparison involves two literals and should be simplified."
+}
+
+from Expr e, string msg
+where
+ (msg = boolLiteralInLogicalOp(e) or msg = comparisonOnLiterals(e)) and
+ not e.isInMacroExpansion()
+select e, msg
diff --git a/repo-tests/codeql/cpp/ql/src/Likely Bugs/Likely Typos/MissingEnumCaseInSwitch.ql b/repo-tests/codeql/cpp/ql/src/Likely Bugs/Likely Typos/MissingEnumCaseInSwitch.ql
new file mode 100644
index 00000000000..554937127c0
--- /dev/null
+++ b/repo-tests/codeql/cpp/ql/src/Likely Bugs/Likely Typos/MissingEnumCaseInSwitch.ql
@@ -0,0 +1,22 @@
+/**
+ * @name Missing enum case in switch
+ * @description A switch statement over an enum type is missing a case for some enum constant
+ * and does not have a default case. This may cause logic errors.
+ * @kind problem
+ * @problem.severity warning
+ * @precision medium
+ * @id cpp/missing-case-in-switch
+ * @tags reliability
+ * correctness
+ * external/cwe/cwe-478
+ */
+
+import cpp
+
+from EnumSwitch es, float missing, float total
+where
+ not es.hasDefaultCase() and
+ missing = count(es.getAMissingCase()) and
+ total = missing + count(es.getASwitchCase()) and
+ missing / total < 0.3
+select es, "Switch statement is missing case for " + es.getAMissingCase().getName()
diff --git a/repo-tests/codeql/cpp/ql/src/Likely Bugs/Likely Typos/ShortCircuitBitMask.ql b/repo-tests/codeql/cpp/ql/src/Likely Bugs/Likely Typos/ShortCircuitBitMask.ql
new file mode 100644
index 00000000000..a1b5d7b8f35
--- /dev/null
+++ b/repo-tests/codeql/cpp/ql/src/Likely Bugs/Likely Typos/ShortCircuitBitMask.ql
@@ -0,0 +1,57 @@
+/**
+ * @name Short-circuiting operator applied to flag
+ * @description A short-circuiting logical operator is applied to what looks like a flag.
+ * This may be a typo for a bitwise operator.
+ * @kind problem
+ * @problem.severity warning
+ * @precision high
+ * @id cpp/logical-operator-applied-to-flag
+ * @tags reliability
+ * correctness
+ * external/cwe/cwe-480
+ */
+
+import cpp
+
+/**
+ * Gets the value of an expression that is a candidate for a violation, and its constant value.
+ * We look for constant operands of binary logical operations other than 0 and 1.
+ */
+float candidateExpr(Expr e) {
+ exists(BinaryLogicalOperation blo |
+ e = blo.getAnOperand() and
+ e.isConstant() and
+ result = e.getValue().toFloat() and
+ // exclusions
+ not e.isFromTemplateInstantiation(_) and
+ not e instanceof SizeofOperator and
+ not inMacroExpansion(blo) and
+ // exclude values 0 and 1
+ result != 0.0 and
+ result != 1.0
+ )
+}
+
+from Expr e, float v, int l, string msg
+where
+ v = candidateExpr(e) and
+ // before reporting an error, we check that the candidate is either a hex/octal
+ // literal, or its value is a power of two.
+ l = v.log2().floor() and
+ if v = 2.pow(l)
+ then
+ msg =
+ "Operand to short-circuiting operator looks like a flag (" + v + " = 2 ^ " + l +
+ "), may be typo for bitwise operator."
+ else
+ exists(string kind |
+ (
+ e instanceof HexLiteral and kind = "a hexadecimal literal"
+ or
+ e instanceof OctalLiteral and kind = "an octal literal"
+ ) and
+ msg =
+ "Operand to short-circuiting operator is " + kind +
+ ", and therefore likely a flag; a bitwise operator may be intended."
+ )
+select e, msg
diff --git a/repo-tests/codeql/cpp/ql/src/Likely Bugs/Likely Typos/UsingStrcpyAsBoolean.ql b/repo-tests/codeql/cpp/ql/src/Likely Bugs/Likely Typos/UsingStrcpyAsBoolean.ql
new file mode 100644
index 00000000000..074c82bc03b
--- /dev/null
+++ b/repo-tests/codeql/cpp/ql/src/Likely Bugs/Likely Typos/UsingStrcpyAsBoolean.ql
@@ -0,0 +1,87 @@
+/**
+ * @name Use of string copy function in a condition
+ * @description The return value for strcpy, strncpy, or related string copy
+ * functions have no reserved return value to indicate an error.
+ * Using them in a condition is likely to be a logic error.
+ * @kind problem
+ * @problem.severity error
+ * @precision high
+ * @id cpp/string-copy-return-value-as-boolean
+ * @tags external/microsoft/C6324
+ * correctness
+ */
+
+import cpp
+import semmle.code.cpp.models.implementations.Strcpy
+import semmle.code.cpp.dataflow.DataFlow
+
+/**
+ * A string copy function that returns a string, rather than an error code (for
+ * example, `strcpy` returns a string, whereas `strcpy_s` returns an error
+ * code).
+ */
+class InterestingStrcpyFunction extends StrcpyFunction {
+ InterestingStrcpyFunction() { getType().getUnspecifiedType() instanceof PointerType }
+}
+
+predicate isBoolean(Expr e1) {
+ exists(Type t1 |
+ t1 = e1.getType() and
+ (t1.hasName("bool") or t1.hasName("BOOL") or t1.hasName("_Bool"))
+ )
+}
+
+predicate isStringCopyCastedAsBoolean(FunctionCall func, Expr expr1, string msg) {
+ DataFlow::localExprFlow(func, expr1) and
+ isBoolean(expr1.getConversion*()) and
+ func.getTarget() instanceof InterestingStrcpyFunction and
+ msg = "Return value of " + func.getTarget().getName() + " used as a Boolean."
+}
+
+predicate isStringCopyUsedInLogicalOperationOrCondition(FunctionCall func, Expr expr1, string msg) {
+ func.getTarget() instanceof InterestingStrcpyFunction and
+ (
+ (
+ // it is being used in an equality or logical operation
+ exists(EqualityOperation eop |
+ eop = expr1 and
+ func = eop.getAnOperand()
+ )
+ or
+ exists(UnaryLogicalOperation ule |
+ expr1 = ule and
+ func = ule.getOperand()
+ )
+ or
+ exists(BinaryLogicalOperation ble |
+ expr1 = ble and
+ func = ble.getAnOperand()
+ )
+ ) and
+ msg = "Return value of " + func.getTarget().getName() + " used in a logical operation."
+ or
+ // or the string copy function is used directly as the conditional expression
+ (
+ exists(ConditionalStmt condstmt |
+ func = condstmt.getControllingExpr() and
+ expr1 = func
+ )
+ or
+ exists(ConditionalExpr ce |
+ expr1 = ce and
+ func = ce.getCondition()
+ )
+ ) and
+ msg =
+ "Return value of " + func.getTarget().getName() +
+ " used directly in a conditional expression."
+ )
+}
+
+from FunctionCall func, Expr expr1, string msg
+where
+ isStringCopyCastedAsBoolean(func, expr1, msg) and
+ not isStringCopyUsedInLogicalOperationOrCondition(func, _, _)
+ or
+ isStringCopyUsedInLogicalOperationOrCondition(func, expr1, msg)
+select expr1, msg
diff --git a/repo-tests/codeql/cpp/ql/src/Likely Bugs/Likely Typos/inconsistentLoopDirection.ql b/repo-tests/codeql/cpp/ql/src/Likely Bugs/Likely Typos/inconsistentLoopDirection.ql
new file mode 100644
index 00000000000..5e3af347821
--- /dev/null
+++ b/repo-tests/codeql/cpp/ql/src/Likely Bugs/Likely Typos/inconsistentLoopDirection.ql
@@ -0,0 +1,141 @@
+/**
+ * @name Inconsistent direction of for loop
+ * @description A for-loop iteration expression goes backward with respect of the initialization statement and condition expression.
+ * @kind problem
+ * @problem.severity error
+ * @precision high
+ * @id cpp/inconsistent-loop-direction
+ * @tags correctness
+ * external/cwe/cwe-835
+ * external/microsoft/6293
+ * @msrc.severity important
+ */
+
+import cpp
+import semmle.code.cpp.rangeanalysis.SimpleRangeAnalysis
+import semmle.code.cpp.dataflow.DataFlow
+
+/**
+ * A `for` statement whose update is a crement operation on a variable.
+ */
+predicate candidateForStmt(
+ ForStmt forStmt, Variable v, CrementOperation update, RelationalOperation rel
+) {
+ update = forStmt.getUpdate() and
+ update.getAnOperand() = v.getAnAccess() and
+ rel = forStmt.getCondition()
+}
+
+pragma[noinline]
+predicate candidateDecrForStmt(
+ ForStmt forStmt, Variable v, VariableAccess lesserOperand, Expr terminalCondition
+) {
+ exists(DecrementOperation update, RelationalOperation rel |
+ candidateForStmt(forStmt, v, update, rel) and
+ // condition is `v < terminalCondition`
+ terminalCondition = rel.getGreaterOperand() and
+ lesserOperand = rel.getLesserOperand() and
+ v.getAnAccess() = lesserOperand
+ )
+}
+
+predicate illDefinedDecrForStmt(
+ ForStmt forstmt, Variable v, Expr initialCondition, Expr terminalCondition
+) {
+ exists(VariableAccess lesserOperand |
+ // decrementing for loop
+ candidateDecrForStmt(forstmt, v, lesserOperand, terminalCondition) and
+ // `initialCondition` is a value of `v` in the for loop
+ v.getAnAssignedValue() = initialCondition and
+ DataFlow::localFlowStep(DataFlow::exprNode(initialCondition), DataFlow::exprNode(lesserOperand)) and
+ // `initialCondition` < `terminalCondition`
+ (
+ upperBound(initialCondition) < lowerBound(terminalCondition) and
+ (
+ // exclude cases where the loop counter is `unsigned` (where wrapping behaviour can be used deliberately)
+ v.getUnspecifiedType().(IntegralType).isSigned() or
+ initialCondition.getValue().toInt() = 0
+ )
+ or
+ (forstmt.conditionAlwaysFalse() or forstmt.conditionAlwaysTrue())
+ )
+ )
+}
+
+pragma[noinline]
+predicate candidateIncrForStmt(
+ ForStmt forStmt, Variable v, VariableAccess greaterOperand, Expr terminalCondition
+) {
+ exists(IncrementOperation update, RelationalOperation rel |
+ candidateForStmt(forStmt, v, update, rel) and
+ // condition is `v > terminalCondition`
+ terminalCondition = rel.getLesserOperand() and
+ greaterOperand = rel.getGreaterOperand() and
+ v.getAnAccess() = greaterOperand
+ )
+}
+
+predicate illDefinedIncrForStmt(
+ ForStmt forstmt, Variable v, Expr initialCondition, Expr terminalCondition
+) {
+ exists(VariableAccess greaterOperand |
+ // incrementing for loop
+ candidateIncrForStmt(forstmt, v, greaterOperand, terminalCondition) and
+ // `initialCondition` is a value of `v` in the for loop
+ v.getAnAssignedValue() = initialCondition and
+ DataFlow::localFlowStep(DataFlow::exprNode(initialCondition), DataFlow::exprNode(greaterOperand)) and
+ // `terminalCondition` < `initialCondition`
+ (
+ upperBound(terminalCondition) < lowerBound(initialCondition)
+ or
+ (forstmt.conditionAlwaysFalse() or forstmt.conditionAlwaysTrue())
+ )
+ )
+}
+
+predicate illDefinedForStmtWrongDirection(
+ ForStmt forstmt, Variable v, Expr initialCondition, Expr terminalCondition, boolean isIncr
+) {
+ illDefinedDecrForStmt(forstmt, v, initialCondition, terminalCondition) and isIncr = false
+ or
+ illDefinedIncrForStmt(forstmt, v, initialCondition, terminalCondition) and isIncr = true
+}
+
+bindingset[b]
+private string forLoopdirection(boolean b) {
+ if b = true then result = "upward" else result = "downward"
+}
+
+bindingset[b]
+private string forLoopTerminalConditionRelationship(boolean b) {
+ if b = true then result = "lower" else result = "higher"
+}
+
+predicate illDefinedForStmt(ForStmt for, string message) {
+ exists(boolean isIncr, Variable v, Expr initialCondition, Expr terminalCondition |
+ illDefinedForStmtWrongDirection(for, v, initialCondition, terminalCondition, isIncr) and
+ if for.conditionAlwaysFalse()
+ then
+ message =
+ "Ill-defined for-loop: a loop using variable \"" + v + "\" counts " +
+ forLoopdirection(isIncr) + " from a value (" + initialCondition +
+ "), but the terminal condition is always false."
+ else
+ if for.conditionAlwaysTrue()
+ then
+ message =
+ "Ill-defined for-loop: a loop using variable \"" + v + "\" counts " +
+ forLoopdirection(isIncr) + " from a value (" + initialCondition +
+ "), but the terminal condition is always true."
+ else
+ message =
+ "Ill-defined for-loop: a loop using variable \"" + v + "\" counts " +
+ forLoopdirection(isIncr) + " from a value (" + initialCondition +
+ "), but the terminal condition is " + forLoopTerminalConditionRelationship(isIncr) +
+ " (" + terminalCondition + ")."
+ )
+}
+
+from ForStmt forstmt, string message
+where illDefinedForStmt(forstmt, message)
+select forstmt, message
diff --git a/repo-tests/codeql/cpp/ql/src/Likely Bugs/Memory Management/AllocaInLoop.ql b/repo-tests/codeql/cpp/ql/src/Likely Bugs/Memory Management/AllocaInLoop.ql
new file mode 100644
index 00000000000..61d7a266d86
--- /dev/null
+++ b/repo-tests/codeql/cpp/ql/src/Likely Bugs/Memory Management/AllocaInLoop.ql
@@ -0,0 +1,332 @@
+/**
+ * @name Call to alloca in a loop
+ * @description Using alloca in a loop can lead to a stack overflow
+ * @kind problem
+ * @problem.severity warning
+ * @security-severity 7.5
+ * @precision high
+ * @id cpp/alloca-in-loop
+ * @tags reliability
+ * correctness
+ * security
+ * external/cwe/cwe-770
+ */
+
+import cpp
+import semmle.code.cpp.rangeanalysis.RangeAnalysisUtils
+import semmle.code.cpp.dataflow.DataFlow
+
+/** Gets a loop that contains `e`. */
+Loop getAnEnclosingLoopOfExpr(Expr e) { result = getAnEnclosingLoopOfStmt(e.getEnclosingStmt()) }
+
+/** Gets a loop that contains `s`. */
+Loop getAnEnclosingLoopOfStmt(Stmt s) {
+ result = s.getParent*() and
+ not s = result.(ForStmt).getInitialization()
+ or
+ result = getAnEnclosingLoopOfExpr(s.getParent*())
+}
+
+/** A call to `alloca` in one of its forms. */
+class AllocaCall extends FunctionCall {
+ AllocaCall() {
+ this.getTarget().getName() = "__builtin_alloca"
+ or
+ (this.getTarget().getName() = "_alloca" or this.getTarget().getName() = "_malloca") and
+ this.getTarget().getADeclarationEntry().getFile().getBaseName() = "malloc.h"
+ }
+}
+
+/**
+ * A loop that contains an `alloca` call.
+ */
+class LoopWithAlloca extends Stmt {
+ LoopWithAlloca() { this = getAnEnclosingLoopOfExpr(any(AllocaCall ac)) }
+
+ /** Get an `alloca` call inside this loop. It may be in a nested loop. */
+ AllocaCall getAnAllocaCall() { this = getAnEnclosingLoopOfExpr(result) }
+
+ /**
+ * Holds if the condition of this loop will only be true if `e` is `truth`.
+ * For example, if the loop condition is `a == 0 && b`, then
+ * `conditionRequires(a, false)` and `conditionRequires(b, true)`.
+ */
+ private predicate conditionRequires(Expr e, boolean truth) {
+ e = this.(Loop).getCondition() and
+ truth = true
+ or
+ // `e == 0`
+ exists(EQExpr eq |
+ conditionRequires(eq, truth.booleanNot()) and
+ eq.getAnOperand().getValue().toInt() = 0 and
+ e = eq.getAnOperand() and
+ not exists(e.getValue())
+ )
+ or
+ // `e != 0`
+ exists(NEExpr eq |
+ conditionRequires(eq, truth) and
+ eq.getAnOperand().getValue().toInt() = 0 and
+ e = eq.getAnOperand() and
+ not exists(e.getValue())
+ )
+ or
+ // `(bool)e == true`
+ exists(EQExpr eq |
+ conditionRequires(eq, truth) and
+ eq.getAnOperand().getValue().toInt() = 1 and
+ e = eq.getAnOperand() and
+ e.getUnspecifiedType() instanceof BoolType and
+ not exists(e.getValue())
+ )
+ or
+ // `(bool)e != true`
+ exists(NEExpr eq |
+ conditionRequires(eq, truth.booleanNot()) and
+ eq.getAnOperand().getValue().toInt() = 1 and
+ e = eq.getAnOperand() and
+ e.getUnspecifiedType() instanceof BoolType and
+ not exists(e.getValue())
+ )
+ or
+ exists(NotExpr notExpr |
+ conditionRequires(notExpr, truth.booleanNot()) and
+ e = notExpr.getOperand()
+ )
+ or
+ // If the e of `this` requires `andExpr` to be true, then it
+ // requires both of its operand to be true as well.
+ exists(LogicalAndExpr andExpr |
+ truth = true and
+ conditionRequires(andExpr, truth) and
+ e = andExpr.getAnOperand()
+ )
+ or
+ // Dually, if the e of `this` requires `orExpr` to be false, then
+ // it requires both of its operand to be false as well.
+ exists(LogicalOrExpr orExpr |
+ truth = false and
+ conditionRequires(orExpr, truth) and
+ e = orExpr.getAnOperand()
+ )
+ }
+
+ /**
+ * Holds if the condition of this loop will only be true if `e` relates to
+ * `value` as `dir`. We don't keep track of whether the equality is strict
+ * since this predicate is only used to heuristically determine whether
+ * there's a reasonably tight upper bound on the number of loop iterations.
+ *
+ * For example, if the loop condition is `a < 2 && b`, then
+ * `conditionRequiresInequality(a, 2, Lesser())`.
+ */
+ private predicate conditionRequiresInequality(Expr e, int value, RelationDirection dir) {
+ exists(RelationalOperation rel, Expr constant, boolean branch |
+ this.conditionRequires(rel, branch) and
+ relOpWithSwapAndNegate(rel, e.getFullyConverted(), constant, dir, _, branch) and
+ value = constant.getValue().toInt() and
+ not exists(e.getValue())
+ )
+ or
+ // Because we're not worried about off-by-one, it's not important whether
+ // the `CrementOperation` is a {pre,post}-{inc,dec}rement.
+ exists(CrementOperation inc |
+ this.conditionRequiresInequality(inc, value, dir) and
+ e = inc.getOperand()
+ )
+ }
+
+ /**
+ * Gets a variable that's restricted by `conditionRequires` or
+ * `conditionRequiresInequality`.
+ */
+ private Variable getAControllingVariable() {
+ conditionRequires(result.getAnAccess(), _)
+ or
+ conditionRequiresInequality(result.getAnAccess(), _, _)
+ }
+
+ /**
+ * Gets a `VariableAccess` that changes `var` inside the loop body, where
+ * `var` is a controlling variable of this loop.
+ */
+ private VariableAccess getAControllingVariableUpdate(Variable var) {
+ var = result.getTarget() and
+ var = this.getAControllingVariable() and
+ this = getAnEnclosingLoopOfExpr(result) and
+ result.isUsedAsLValue()
+ }
+
+ /**
+ * Holds if there is a control-flow path from the condition of this loop to
+ * `node` that doesn't update `var`, where `var` is a controlling variable of
+ * this loop. The path has to stay within the loop. The path will start at
+ * the successor of the loop condition. If the path reaches all the way back
+ * to the loop condition, then it's possible to go around the loop without
+ * updating `var`.
+ */
+ private predicate conditionReachesWithoutUpdate(Variable var, ControlFlowNode node) {
+ // Don't leave the loop. It might cause us to leave the scope of `var`
+ (node instanceof Stmt implies this = getAnEnclosingLoopOfStmt(node)) and
+ (
+ node = this.(Loop).getCondition().getASuccessor() and
+ var = this.getAControllingVariable()
+ or
+ this.conditionReachesWithoutUpdate(var, node.getAPredecessor()) and
+ not node = this.getAControllingVariableUpdate(var)
+ )
+ }
+
+ /**
+ * Holds if all paths around the loop will update `var`, where `var` is a
+ * controlling variable of this loop.
+ */
+ private predicate hasMandatoryUpdate(Variable var) {
+ not this.conditionReachesWithoutUpdate(var, this.(Loop).getCondition())
+ }
+
+ /**
+ * Gets a definition that may be the most recent definition of the
+ * controlling variable `var` before this loop.
+ */
+ private DataFlow::Node getAPrecedingDef(Variable var) {
+ exists(VariableAccess va |
+ va = var.getAnAccess() and
+ this.conditionRequiresInequality(va, _, _) and
+ DataFlow::localFlow(result, DataFlow::exprNode(va)) and
+ // A source is outside the loop if it's not inside the loop
+ not exists(Expr e |
+ e = result.asExpr()
+ or
+ e = result.asDefiningArgument()
+ |
+ this = getAnEnclosingLoopOfExpr(e)
+ )
+ )
+ }
+
+ /**
+ * Gets a number that may be the most recent value assigned to the
+ * controlling variable `var` before this loop.
+ */
+ private int getAControllingVarInitialValue(Variable var, DataFlow::Node source) {
+ source = this.getAPrecedingDef(var) and
+ result = source.asExpr().getValue().toInt()
+ }
+
+ /**
+ * Holds if the most recent definition of `var` before this loop may assign a
+ * value that is not a compile-time constant.
+ */
+ private predicate controllingVarHasUnknownInitialValue(Variable var) {
+ // A definition without a constant value was reached
+ exists(DataFlow::Node source |
+ source = this.getAPrecedingDef(var) and
+ not exists(this.getAControllingVarInitialValue(var, source))
+ )
+ }
+
+ /**
+ * Gets the least possible value that the controlling variable `var` may have
+ * before this loop, if such a value can be deduced.
+ */
+ private int getMinPrecedingDef(Variable var) {
+ not this.controllingVarHasUnknownInitialValue(var) and
+ result = min(this.getAControllingVarInitialValue(var, _))
+ or
+ this.controllingVarHasUnknownInitialValue(var) and
+ var.getType().(IntegralType).isUnsigned() and
+ result = 0
+ }
+
+ /**
+ * Gets the greatest possible value that the controlling variable `var` may
+ * have before this loop, if such a value can be deduced.
+ */
+ private int getMaxPrecedingDef(Variable var) {
+ not this.controllingVarHasUnknownInitialValue(var) and
+ result = max(this.getAControllingVarInitialValue(var, _))
+ }
+
+ /**
+ * Holds if this loop has a "small" number of iterations. The meaning of
+ * "small" should be such that the loop wouldn't be unreasonably large if
+ * manually unrolled.
+ */
+ predicate isTightlyBounded() {
+ exists(Variable var | this.hasMandatoryUpdate(var) |
+ this.conditionRequires(var.getAnAccess(), false) and
+ forall(VariableAccess update | update = this.getAControllingVariableUpdate(var) |
+ exists(AssignExpr assign |
+ assign.getLValue() = update and
+ assign.getRValue().getValue().toInt() != 0
+ )
+ )
+ or
+ this.conditionRequires(var.getAnAccess(), true) and
+ forall(VariableAccess update | update = this.getAControllingVariableUpdate(var) |
+ exists(AssignExpr assign |
+ assign.getLValue() = update and
+ assign.getRValue().getValue().toInt() = 0
+ )
+ )
+ or
+ exists(int bound |
+ this.conditionRequiresInequality(var.getAnAccess(), bound, Lesser()) and
+ bound - this.getMinPrecedingDef(var) <= 16 and
+ forall(VariableAccess update | update = this.getAControllingVariableUpdate(var) |
+ // var++;
+ // ++var;
+ exists(IncrementOperation inc | inc.getOperand() = update)
+ or
+ // var += positive_number;
+ exists(AssignAddExpr aa |
+ aa.getLValue() = update and
+ aa.getRValue().getValue().toInt() > 0
+ )
+ or
+ // var = var + positive_number;
+ // var = positive_number + var;
+ exists(AssignExpr assign, AddExpr add |
+ assign.getLValue() = update and
+ assign.getRValue() = add and
+ add.getAnOperand() = var.getAnAccess() and
+ add.getAnOperand().getValue().toInt() > 0
+ )
+ )
+ )
+ or
+ exists(int bound |
+ this.conditionRequiresInequality(var.getAnAccess(), bound, Greater()) and
+ this.getMaxPrecedingDef(var) - bound <= 16 and
+ forall(VariableAccess update | update = this.getAControllingVariableUpdate(var) |
+ // var--;
+ // --var;
+ exists(DecrementOperation inc | inc.getOperand() = update)
+ or
+ // var -= positive_number;
+ exists(AssignSubExpr aa |
+ aa.getLValue() = update and
+ aa.getRValue().getValue().toInt() > 0
+ )
+ or
+ // var = var - positive_number;
+ exists(AssignExpr assign, SubExpr add |
+ assign.getLValue() = update and
+ assign.getRValue() = add and
+ add.getLeftOperand() = var.getAnAccess() and
+ add.getRightOperand().getValue().toInt() > 0
+ )
+ )
+ )
+ )
+ }
+}
+
+from LoopWithAlloca l, AllocaCall alloc
+where
+ not l.(DoStmt).getCondition().getValue() = "0" and
+ not l.isTightlyBounded() and
+ alloc = l.getAnAllocaCall() and
+ alloc.getASuccessor*() = l.(Loop).getStmt()
+select alloc, "Stack allocation is inside a $@ loop.", l, l.toString()
diff --git a/repo-tests/codeql/cpp/ql/src/Likely Bugs/Memory Management/Buffer.qll b/repo-tests/codeql/cpp/ql/src/Likely Bugs/Memory Management/Buffer.qll
new file mode 100644
index 00000000000..bb4fafb92f7
--- /dev/null
+++ b/repo-tests/codeql/cpp/ql/src/Likely Bugs/Memory Management/Buffer.qll
@@ -0,0 +1,14 @@
+import cpp
+
+// an expression of the form sizeof(e) or strlen(e)
+class BufferSizeExpr extends Expr {
+ BufferSizeExpr() {
+ this instanceof SizeofExprOperator or
+ this instanceof StrlenCall
+ }
+
+ Expr getArg() {
+ result = this.(SizeofExprOperator).getExprOperand() or
+ result = this.(StrlenCall).getStringExpr()
+ }
+}
diff --git a/repo-tests/codeql/cpp/ql/src/Likely Bugs/Memory Management/ImproperNullTermination.ql b/repo-tests/codeql/cpp/ql/src/Likely Bugs/Memory Management/ImproperNullTermination.ql
new file mode 100644
index 00000000000..5c92b0a3db7
--- /dev/null
+++ b/repo-tests/codeql/cpp/ql/src/Likely Bugs/Memory Management/ImproperNullTermination.ql
@@ -0,0 +1,64 @@
+/**
+ * @name Potential improper null termination
+ * @description Using a string that may not be null terminated as an argument
+ * to a string function can result in buffer overflow or buffer over-read.
+ * @kind problem
+ * @id cpp/improper-null-termination
+ * @problem.severity warning
+ * @precision medium
+ * @security-severity 7.8
+ * @tags security
+ * external/cwe/cwe-170
+ * external/cwe/cwe-665
+ */
+
+import cpp
+import semmle.code.cpp.controlflow.StackVariableReachability
+import semmle.code.cpp.commons.NullTermination
+
+/**
+ * A declaration of a local variable that leaves the variable uninitialized.
+ */
+DeclStmt declWithNoInit(LocalVariable v) {
+ result.getADeclaration() = v and
+ not exists(v.getInitializer())
+}
+
+class ImproperNullTerminationReachability extends StackVariableReachabilityWithReassignment {
+ ImproperNullTerminationReachability() { this = "ImproperNullTerminationReachability" }
+
+ override predicate isSourceActual(ControlFlowNode node, StackVariable v) {
+ node = declWithNoInit(v)
+ or
+ exists(Call c, int bufferArg, int sizeArg |
+ c = node and
+ (
+ c.getTarget().hasName("readlink") and bufferArg = 1 and sizeArg = 2
+ or
+ c.getTarget().hasName("readlinkat") and bufferArg = 2 and sizeArg = 3
+ ) and
+ c.getArgument(bufferArg).(VariableAccess).getTarget() = v and
+ (
+ // buffer size parameter likely matches the full buffer size
+ c.getArgument(sizeArg) instanceof SizeofOperator or
+ c.getArgument(sizeArg).getValue().toInt() = v.getType().getSize()
+ )
+ )
+ }
+
+ override predicate isSinkActual(ControlFlowNode node, StackVariable v) {
+ node.(VariableAccess).getTarget() = v and
+ variableMustBeNullTerminated(node)
+ }
+
+ override predicate isBarrier(ControlFlowNode node, StackVariable v) {
+ exprDefinition(v, node, _) or
+ mayAddNullTerminator(node, v.getAnAccess()) or
+ node.(AddressOfExpr).getOperand() = v.getAnAccess() or // address taken
+ isSinkActual(node, v) // only report first use
+ }
+}
+
+from ImproperNullTerminationReachability r, LocalVariable v, VariableAccess va
+where r.reaches(_, v, va)
+select va, "Variable $@ may not be null terminated.", v, v.getName()
diff --git a/repo-tests/codeql/cpp/ql/src/Likely Bugs/Memory Management/NtohlArrayNoBound.ql b/repo-tests/codeql/cpp/ql/src/Likely Bugs/Memory Management/NtohlArrayNoBound.ql
new file mode 100644
index 00000000000..d6d0a55d148
--- /dev/null
+++ b/repo-tests/codeql/cpp/ql/src/Likely Bugs/Memory Management/NtohlArrayNoBound.ql
@@ -0,0 +1,17 @@
+/**
+ * @id cpp/network-to-host-function-as-array-bound
+ * @name Untrusted network-to-host usage
+ * @description Using the result of a network-to-host byte order function, such as ntohl, as an
+ * array bound or length value without checking it may result in buffer overflows or
+ * other vulnerabilties.
+ * @kind problem
+ * @problem.severity error
+ */
+
+import cpp
+import NtohlArrayNoBound
+import semmle.code.cpp.dataflow.DataFlow
+
+from NetworkToBufferSizeConfiguration bufConfig, DataFlow::Node source, DataFlow::Node sink
+where bufConfig.hasFlow(source, sink)
+select sink, "Unchecked use of data from network function $@", source, source.toString()
diff --git a/repo-tests/codeql/cpp/ql/src/Likely Bugs/Memory Management/NtohlArrayNoBound.qll b/repo-tests/codeql/cpp/ql/src/Likely Bugs/Memory Management/NtohlArrayNoBound.qll
new file mode 100644
index 00000000000..9606fb968ec
--- /dev/null
+++ b/repo-tests/codeql/cpp/ql/src/Likely Bugs/Memory Management/NtohlArrayNoBound.qll
@@ -0,0 +1,154 @@
+import cpp
+import semmle.code.cpp.dataflow.DataFlow
+import semmle.code.cpp.controlflow.Guards
+import semmle.code.cpp.valuenumbering.GlobalValueNumbering
+
+/**
+ * An access (read or write) to a buffer, provided as a pair of
+ * a pointer to the buffer and the length of data to be read or written.
+ * Extend this class to support different kinds of buffer access.
+ */
+abstract class BufferAccess extends Locatable {
+ /** Gets the pointer to the buffer being accessed. */
+ abstract Expr getPointer();
+
+ /** Gets the length of the data being read or written by this buffer access. */
+ abstract Expr getAccessedLength();
+}
+
+/**
+ * A buffer access through an array expression.
+ */
+class ArrayBufferAccess extends BufferAccess, ArrayExpr {
+ override Expr getPointer() { result = this.getArrayBase() }
+
+ override Expr getAccessedLength() { result = this.getArrayOffset() }
+}
+
+/**
+ * A buffer access through an overloaded array expression.
+ */
+class OverloadedArrayBufferAccess extends BufferAccess, OverloadedArrayExpr {
+ override Expr getPointer() { result = this.getQualifier() }
+
+ override Expr getAccessedLength() { result = this.getAnArgument() }
+}
+
+/**
+ * A buffer access through pointer arithmetic.
+ */
+class PointerArithmeticAccess extends BufferAccess, Expr {
+ PointerArithmeticOperation p;
+
+ PointerArithmeticAccess() {
+ this = p and
+ p.getAnOperand().getType().getUnspecifiedType() instanceof IntegralType and
+ not p.getParent() instanceof ComparisonOperation
+ }
+
+ override Expr getPointer() {
+ result = p.getAnOperand() and
+ result.getType().getUnspecifiedType() instanceof PointerType
+ }
+
+ override Expr getAccessedLength() {
+ result = p.getAnOperand() and
+ result.getType().getUnspecifiedType() instanceof IntegralType
+ }
+}
+
+/**
+ * A pair of buffer accesses through a call to memcpy.
+ */
+class MemCpy extends BufferAccess, FunctionCall {
+ MemCpy() { getTarget().hasName("memcpy") }
+
+ override Expr getPointer() {
+ result = getArgument(0) or
+ result = getArgument(1)
+ }
+
+ override Expr getAccessedLength() { result = getArgument(2) }
+}
+
+class StrncpySizeExpr extends BufferAccess, FunctionCall {
+ StrncpySizeExpr() { getTarget().hasName("strncpy") }
+
+ override Expr getPointer() {
+ result = getArgument(0) or
+ result = getArgument(1)
+ }
+
+ override Expr getAccessedLength() { result = getArgument(2) }
+}
+
+class RecvSizeExpr extends BufferAccess, FunctionCall {
+ RecvSizeExpr() { getTarget().hasName("recv") }
+
+ override Expr getPointer() { result = getArgument(1) }
+
+ override Expr getAccessedLength() { result = getArgument(2) }
+}
+
+class SendSizeExpr extends BufferAccess, FunctionCall {
+ SendSizeExpr() { getTarget().hasName("send") }
+
+ override Expr getPointer() { result = getArgument(1) }
+
+ override Expr getAccessedLength() { result = getArgument(2) }
+}
+
+class SnprintfSizeExpr extends BufferAccess, FunctionCall {
+ SnprintfSizeExpr() { getTarget().hasName("snprintf") }
+
+ override Expr getPointer() { result = getArgument(0) }
+
+ override Expr getAccessedLength() { result = getArgument(1) }
+}
+
+class MemcmpSizeExpr extends BufferAccess, FunctionCall {
+ MemcmpSizeExpr() { getTarget().hasName("Memcmp") }
+
+ override Expr getPointer() {
+ result = getArgument(0) or
+ result = getArgument(1)
+ }
+
+ override Expr getAccessedLength() { result = getArgument(2) }
+}
+
+class MallocSizeExpr extends BufferAccess, FunctionCall {
+ MallocSizeExpr() { getTarget().hasName("malloc") }
+
+ override Expr getPointer() { none() }
+
+ override Expr getAccessedLength() { result = getArgument(0) }
+}
+
+class NetworkFunctionCall extends FunctionCall {
+ NetworkFunctionCall() {
+ getTarget().hasName("ntohd") or
+ getTarget().hasName("ntohf") or
+ getTarget().hasName("ntohl") or
+ getTarget().hasName("ntohll") or
+ getTarget().hasName("ntohs")
+ }
+}
+
+class NetworkToBufferSizeConfiguration extends DataFlow::Configuration {
+ NetworkToBufferSizeConfiguration() { this = "NetworkToBufferSizeConfiguration" }
+
+ override predicate isSource(DataFlow::Node node) { node.asExpr() instanceof NetworkFunctionCall }
+
+ override predicate isSink(DataFlow::Node node) {
+ node.asExpr() = any(BufferAccess ba).getAccessedLength()
+ }
+
+ override predicate isBarrier(DataFlow::Node node) {
+ exists(GuardCondition gc, GVN gvn |
+ gc.getAChild*() = gvn.getAnExpr() and
+ globalValueNumber(node.asExpr()) = gvn and
+ gc.controls(node.asExpr().getBasicBlock(), _)
+ )
+ }
+}
diff --git a/repo-tests/codeql/cpp/ql/src/Likely Bugs/Memory Management/Padding/More64BitWaste.ql b/repo-tests/codeql/cpp/ql/src/Likely Bugs/Memory Management/Padding/More64BitWaste.ql
new file mode 100644
index 00000000000..2948ffb00d1
--- /dev/null
+++ b/repo-tests/codeql/cpp/ql/src/Likely Bugs/Memory Management/Padding/More64BitWaste.ql
@@ -0,0 +1,20 @@
+/**
+ * @name Padding increased in 64-bit migration
+ * @description Highlights structs for which the amount of padding would increase when migrating from a 32-bit architecture to 64-bit.
+ * @kind problem
+ * @id cpp/more-64-bit-waste
+ * @problem.severity warning
+ * @tags maintainability
+ * portability
+ */
+
+import semmle.code.cpp.padding.Padding
+
+from PaddedType t, ILP32 ilp32, LP64 lp64, int w32, int w64
+where
+ w32 = t.wastedSpace(ilp32) - t.trailingPadding(ilp32) and
+ w64 = t.wastedSpace(lp64) - t.trailingPadding(lp64) and
+ w64 > w32 and
+ t.isPrecise()
+select t,
+ t.getName() + " includes " + w32 + " bits of padding on ILP32, but " + w64 + " bits on LP64."
diff --git a/repo-tests/codeql/cpp/ql/src/Likely Bugs/Memory Management/Padding/NonPortablePrintf.ql b/repo-tests/codeql/cpp/ql/src/Likely Bugs/Memory Management/Padding/NonPortablePrintf.ql
new file mode 100644
index 00000000000..8eb3ba12235
--- /dev/null
+++ b/repo-tests/codeql/cpp/ql/src/Likely Bugs/Memory Management/Padding/NonPortablePrintf.ql
@@ -0,0 +1,95 @@
+/**
+ * @name Non-portable call to printf
+ * @description When using a format specifier like %d ("int"), on a 32-bit architecture it's acceptable to pass a long since it's of the same size;
+ * when migrating to a 64-bit architecture this becomes problematic. Similar problems exist when printing pointers using 32-bit-wide format specifiers.
+ * @kind problem
+ * @id cpp/non-portable-printf
+ * @problem.severity warning
+ * @tags maintainability
+ * portability
+ */
+
+import cpp
+import semmle.code.cpp.padding.Padding
+
+/**
+ * Used to avoid reporting conflicts between a char
+ * pointer type with specified signedness and an unspecified
+ * char pointer (whose signedness is compiler-dependent).
+ */
+class SignedOrUnsignedCharPointerType extends CharPointerType {
+ SignedOrUnsignedCharPointerType() {
+ this.getBaseType().(CharType).isUnsigned() or
+ this.getBaseType().(CharType).isSigned()
+ }
+}
+
+pragma[noopt]
+private predicate formattingFunctionCallExpectedType(
+ FormattingFunctionCall ffc, int pos, Type expected
+) {
+ exists(FormattingFunction f, int i, FormatLiteral fl |
+ ffc.getTarget() = f and
+ ffc instanceof FormattingFunctionCall and
+ f.getFormatParameterIndex() = i and
+ ffc.getArgument(i) = fl and
+ fl.getConversionType(pos) = expected
+ )
+}
+
+pragma[noopt]
+predicate formatArgType(FormattingFunctionCall ffc, int pos, Type expected, Expr arg, Type actual) {
+ formattingFunctionCallExpectedType(ffc, pos, expected) and
+ ffc.getConversionArgument(pos) = arg and
+ exists(Type t | t = arg.getActualType() and t.getUnspecifiedType() = actual)
+}
+
+pragma[noopt]
+predicate formatOtherArgType(
+ FormattingFunctionCall ffc, int pos, Type expected, Expr arg, Type actual
+) {
+ (arg = ffc.getMinFieldWidthArgument(pos) or arg = ffc.getPrecisionArgument(pos)) and
+ actual = arg.getActualType() and
+ exists(IntType it | it instanceof IntType and it.isImplicitlySigned() and expected = it)
+}
+
+predicate trivialConversion(Type expected, Type actual) {
+ formatArgType(_, _, expected, _, actual) and
+ (
+ expected instanceof VoidPointerType and actual instanceof PointerType
+ or
+ expected instanceof IntegralType and actual instanceof Enum
+ or
+ expected instanceof CharPointerType and actual instanceof SignedOrUnsignedCharPointerType
+ or
+ expected instanceof SignedOrUnsignedCharPointerType and actual instanceof CharPointerType
+ or
+ expected instanceof CharType and actual instanceof IntType
+ or
+ expected instanceof UnsignedCharType and actual instanceof IntType
+ or
+ expected.(IntegralType).getUnsigned() = actual.(IntegralType).getUnsigned()
+ or
+ expected = actual
+ )
+}
+
+from
+ FormattingFunctionCall ffc, int n, Expr arg, Type expected, Type actual, ILP32 ilp32, LP64 lp64,
+ int size32, int size64
+where
+ (
+ formatArgType(ffc, n, expected, arg, actual) and
+ not trivialConversion(expected, actual)
+ or
+ formatOtherArgType(ffc, n, expected, arg, actual) and
+ not actual instanceof IntType
+ ) and
+ not arg.isAffectedByMacro() and
+ size32 = ilp32.paddedSize(actual) and
+ size64 = lp64.paddedSize(actual) and
+ size64 != size32 and
+ not actual instanceof ErroneousType
+select arg,
+ "This argument should be of type '" + expected.getName() + "' but is of type '" + actual.getName()
+ + "' (which changes size from " + size32 + " to " + size64 + " on 64-bit systems)."
diff --git a/repo-tests/codeql/cpp/ql/src/Likely Bugs/Memory Management/Padding/Suboptimal64BitType.ql b/repo-tests/codeql/cpp/ql/src/Likely Bugs/Memory Management/Padding/Suboptimal64BitType.ql
new file mode 100644
index 00000000000..f2cf4367d86
--- /dev/null
+++ b/repo-tests/codeql/cpp/ql/src/Likely Bugs/Memory Management/Padding/Suboptimal64BitType.ql
@@ -0,0 +1,26 @@
+/**
+ * @name Suboptimal type definition
+ * @description Highlights structs whose members are not laid out optimally, in the sense
+ * that by reordering them one could reduce the amount of internal padding on a 64-bit architecture.
+ * @kind problem
+ * @id cpp/suboptimal-64-bit-type
+ * @problem.severity recommendation
+ * @tags efficiency
+ */
+
+import semmle.code.cpp.padding.Padding
+
+from
+ PaddedType t, Architecture arch, WideCharType wc, int holes, int size, int percentage, int optimum
+where
+ arch.pointerSize() = 64 and // Select 64-bit architecture
+ arch.wideCharSize() = (wc.getSize() * 8) and // Select Windows(sizeof(wchar_t == 2)) or non-Windows(sizeof(wchar_t == 4))
+ t.isPrecise() and
+ optimum = t.optimalSize(arch) and
+ size = arch.paddedSize(t) and
+ holes = size - optimum and
+ holes > 0 and
+ percentage = (holes * 100.0 / size.(float)).ceil()
+select t,
+ t.getName() + " could be optimized to save " + holes + "/" + t.wastedSpace(arch) +
+ " bits of padding (or " + percentage + "% of its size)."
diff --git a/repo-tests/codeql/cpp/ql/src/Likely Bugs/Memory Management/PointerOverflow.ql b/repo-tests/codeql/cpp/ql/src/Likely Bugs/Memory Management/PointerOverflow.ql
new file mode 100644
index 00000000000..e11d114d1fb
--- /dev/null
+++ b/repo-tests/codeql/cpp/ql/src/Likely Bugs/Memory Management/PointerOverflow.ql
@@ -0,0 +1,33 @@
+/**
+ * @name Pointer overflow check
+ * @description Adding a value to a pointer to check if it overflows relies
+ * on undefined behavior and may lead to memory corruption.
+ * @kind problem
+ * @problem.severity error
+ * @security-severity 2.1
+ * @precision high
+ * @id cpp/pointer-overflow-check
+ * @tags reliability
+ * security
+ * external/cwe/cwe-758
+ */
+
+import cpp
+private import semmle.code.cpp.valuenumbering.GlobalValueNumbering
+private import semmle.code.cpp.commons.Exclusions
+
+from RelationalOperation ro, PointerAddExpr add, Expr expr1, Expr expr2
+where
+ ro.getAnOperand() = add and
+ add.getAnOperand() = expr1 and
+ ro.getAnOperand() = expr2 and
+ globalValueNumber(expr1) = globalValueNumber(expr2) and
+ // Exclude macros but not their arguments
+ not isFromMacroDefinition(ro) and
+ // There must be a compilation of this file without a flag that makes pointer
+ // overflow well defined.
+ exists(Compilation c | c.getAFileCompiled() = ro.getFile() |
+ not c.getAnArgument() = "-fwrapv-pointer" and
+ not c.getAnArgument() = "-fno-strict-overflow"
+ )
+select ro, "Range check relying on pointer overflow."
diff --git a/repo-tests/codeql/cpp/ql/src/Likely Bugs/Memory Management/PotentialBufferOverflow.ql b/repo-tests/codeql/cpp/ql/src/Likely Bugs/Memory Management/PotentialBufferOverflow.ql
new file mode 100644
index 00000000000..23cf7e8364b
--- /dev/null
+++ b/repo-tests/codeql/cpp/ql/src/Likely Bugs/Memory Management/PotentialBufferOverflow.ql
@@ -0,0 +1,42 @@
+/**
+ * @name Potential buffer overflow
+ * @description Using a library function that does not check buffer bounds
+ * requires the surrounding program to be very carefully written
+ * to avoid buffer overflows.
+ * @kind problem
+ * @id cpp/potential-buffer-overflow
+ * @problem.severity warning
+ * @security-severity 10.0
+ * @tags reliability
+ * security
+ * external/cwe/cwe-676
+ * @deprecated This query is deprecated, use
+ * Potentially overrunning write (`cpp/overrunning-write`) and
+ * Potentially overrunning write with float to string conversion
+ * (`cpp/overrunning-write-with-float) instead.
+ */
+
+import cpp
+import semmle.code.cpp.commons.Buffer
+
+class SprintfCall extends FunctionCall {
+ SprintfCall() { this.getTarget().hasName("sprintf") or this.getTarget().hasName("vsprintf") }
+
+ int getBufferSize() { result = getBufferSize(this.getArgument(0), _) }
+
+ int getMaxConvertedLength() {
+ result = this.getArgument(1).(FormatLiteral).getMaxConvertedLength()
+ }
+
+ predicate isDangerous() { this.getMaxConvertedLength() > this.getBufferSize() }
+
+ string getDescription() {
+ result =
+ "This conversion may yield a string of length " + this.getMaxConvertedLength().toString() +
+ ", which exceeds the allocated buffer size of " + this.getBufferSize().toString()
+ }
+}
+
+from SprintfCall c
+where c.isDangerous()
+select c, c.getDescription()
diff --git a/repo-tests/codeql/cpp/ql/src/Likely Bugs/Memory Management/ReturnCstrOfLocalStdString.ql b/repo-tests/codeql/cpp/ql/src/Likely Bugs/Memory Management/ReturnCstrOfLocalStdString.ql
new file mode 100644
index 00000000000..3888e9a4ef5
--- /dev/null
+++ b/repo-tests/codeql/cpp/ql/src/Likely Bugs/Memory Management/ReturnCstrOfLocalStdString.ql
@@ -0,0 +1,102 @@
+/**
+ * @name Return c_str of local std::string
+ * @description Returning the c_str of a locally allocated std::string
+ * could cause the program to crash or behave non-deterministically
+ * because the memory is deallocated when the std::string goes out of
+ * scope.
+ * @kind problem
+ * @problem.severity warning
+ * @precision high
+ * @id cpp/return-c-str-of-std-string
+ * @tags reliability
+ * correctness
+ */
+
+import cpp
+import semmle.code.cpp.controlflow.SSA
+import semmle.code.cpp.dataflow.DataFlow
+
+/** The `std::string` class. */
+class StdString extends Class {
+ StdString() {
+ // `std::string` is usually a typedef and the actual class
+ // is called something like `string std::__cxx11::basic_string`.
+ exists(Type stdstring, Namespace std |
+ stdstring.getName() = "string" and
+ this = stdstring.getUnspecifiedType() and
+ // Make sure that the class is in the `std` namespace.
+ std = this.getNamespace().getParentNamespace*() and
+ std.getName() = "std" and
+ std.getParentNamespace() instanceof GlobalNamespace
+ )
+ }
+}
+
+/**
+ * Holds if `e` is a direct or indirect reference to a locally
+ * allocated `std::string`.
+ */
+predicate refToStdString(Expr e, ConstructorCall source) {
+ exists(StdString stdstring |
+ stdstring.getAMemberFunction() = source.getTarget() and
+ not exists(LocalVariable v |
+ source = v.getInitializer().getExpr() and
+ v.isStatic()
+ ) and
+ e = source
+ )
+ or
+ // Indirect use.
+ exists(Expr prev |
+ refToStdString(prev, source) and
+ DataFlow::localFlowStep(DataFlow::exprNode(prev), DataFlow::exprNode(e))
+ )
+}
+
+/**
+ * Holds if the function takes a C-style string as one of its arguments and
+ * includes the pointer in its result without making a copy of the
+ * string. An example of this is the method `JNIEnv::NewStringUTF()` (from
+ * Java's JNI), which returns a `jstring` containing a pointer to the
+ * C-style string. If the C-style string is deallocated then the `jstring`
+ * will also become invalid.
+ */
+predicate flowFunction(Function fcn, int argIndex) {
+ fcn.hasQualifiedName("", "_JNIEnv", "NewStringUTF") and argIndex = 0
+ or
+ fcn.hasQualifiedName("art", "JNI", "NewStringUTF") and argIndex = 1
+ or
+ fcn.hasQualifiedName("art", "CheckJNI", "NewStringUTF") and argIndex = 1
+ // Add other functions that behave like NewStringUTF here.
+}
+
+/**
+ * Holds if `e` is a direct or indirect reference to the result of calling
+ * `c_str` on a locally allocated `std::string`.
+ */
+predicate refToCStr(Expr e, ConstructorCall source) {
+ exists(MemberFunction f, FunctionCall call |
+ f.getName() = "c_str" and
+ call = e and
+ call.getTarget() = f and
+ refToStdString(call.getQualifier(), source)
+ )
+ or
+ // Indirect use.
+ exists(Expr prev |
+ refToCStr(prev, source) and
+ DataFlow::localFlowStep(DataFlow::exprNode(prev), DataFlow::exprNode(e))
+ )
+ or
+ // Some functions, such as `JNIEnv::NewStringUTF()` (from Java's JNI)
+ // embed return a structure containing a reference to the C-style string.
+ exists(Function f, int argIndex |
+ flowFunction(f, argIndex) and
+ f = e.(Call).getTarget() and
+ refToCStr(e.(Call).getArgument(argIndex), source)
+ )
+}
+
+from ReturnStmt r, ConstructorCall source
+where refToCStr(r.getExpr(), source)
+select r, "Return value may contain a dangling pointer to $@.", source, "this local std::string"
diff --git a/repo-tests/codeql/cpp/ql/src/Likely Bugs/Memory Management/ReturnStackAllocatedMemory.ql b/repo-tests/codeql/cpp/ql/src/Likely Bugs/Memory Management/ReturnStackAllocatedMemory.ql
new file mode 100644
index 00000000000..f5dda53d484
--- /dev/null
+++ b/repo-tests/codeql/cpp/ql/src/Likely Bugs/Memory Management/ReturnStackAllocatedMemory.ql
@@ -0,0 +1,70 @@
+/**
+ * @name Returning stack-allocated memory
+ * @description A function returns a pointer to a stack-allocated region of
+ * memory. This memory is deallocated at the end of the function,
+ * which may lead the caller to dereference a dangling pointer.
+ * @kind problem
+ * @id cpp/return-stack-allocated-memory
+ * @problem.severity warning
+ * @precision high
+ * @tags reliability
+ * external/cwe/cwe-825
+ */
+
+import cpp
+import semmle.code.cpp.dataflow.EscapesTree
+import semmle.code.cpp.models.interfaces.PointerWrapper
+import semmle.code.cpp.dataflow.DataFlow
+
+/**
+ * Holds if `n1` may flow to `n2`, ignoring flow through fields because these
+ * are currently modeled as an overapproximation that assumes all objects may
+ * alias.
+ */
+predicate conservativeDataFlowStep(DataFlow::Node n1, DataFlow::Node n2) {
+ DataFlow::localFlowStep(n1, n2) and
+ not n2.asExpr() instanceof FieldAccess and
+ not hasNontrivialConversion(n2.asExpr())
+}
+
+/**
+ * Holds if `e` has a conversion that changes it from lvalue to pointer or
+ * back. As the data-flow library does not support conversions, we cannot track
+ * data flow through such expressions.
+ */
+predicate hasNontrivialConversion(Expr e) {
+ e instanceof Conversion and
+ not (
+ e instanceof Cast
+ or
+ e instanceof ParenthesisExpr
+ )
+ or
+ // A smart pointer can be stack-allocated while the data it points to is heap-allocated.
+ // So we exclude such "conversions" from this predicate.
+ e = any(PointerWrapper wrapper).getAnUnwrapperFunction().getACallToThisFunction()
+ or
+ hasNontrivialConversion(e.getConversion())
+}
+
+from StackVariable var, VariableAccess va, ReturnStmt r
+where
+ not var.getUnspecifiedType() instanceof ReferenceType and
+ not r.isFromUninstantiatedTemplate(_) and
+ va = var.getAnAccess() and
+ (
+ // To check if the address escapes directly from `e` in `return e`, we need
+ // to check the fully-converted `e` in case there are implicit
+ // array-to-pointer conversions or reference conversions.
+ variableAddressEscapesTree(va, r.getExpr().getFullyConverted())
+ or
+ // The data flow library doesn't support conversions, so here we check that
+ // the address escapes into some expression `pointerToLocal`, which flows
+ // in one or more steps to a returned expression.
+ exists(Expr pointerToLocal |
+ variableAddressEscapesTree(va, pointerToLocal.getFullyConverted()) and
+ not hasNontrivialConversion(pointerToLocal) and
+ conservativeDataFlowStep+(DataFlow::exprNode(pointerToLocal), DataFlow::exprNode(r.getExpr()))
+ )
+ )
+select r, "May return stack-allocated memory from $@.", va, va.toString()
diff --git a/repo-tests/codeql/cpp/ql/src/Likely Bugs/Memory Management/StackAddressEscapes.ql b/repo-tests/codeql/cpp/ql/src/Likely Bugs/Memory Management/StackAddressEscapes.ql
new file mode 100644
index 00000000000..f9a4e3b8a7a
--- /dev/null
+++ b/repo-tests/codeql/cpp/ql/src/Likely Bugs/Memory Management/StackAddressEscapes.ql
@@ -0,0 +1,37 @@
+/**
+ * @name Local variable address stored in non-local memory
+ * @description Storing the address of a local variable in non-local
+ * memory can cause a dangling pointer bug if the address
+ * is used after the function returns.
+ * @kind problem
+ * @problem.severity warning
+ * @precision medium
+ * @id cpp/stack-address-escape
+ * @tags reliability
+ */
+
+import cpp
+import semmle.code.cpp.dataflow.StackAddress
+
+/**
+ * Find assignments where the rhs might be a stack pointer and the lhs is
+ * not a stack variable. Such assignments might allow a stack address to
+ * escape.
+ */
+predicate stackAddressEscapes(AssignExpr assignExpr, Expr source, boolean isLocal) {
+ stackPointerFlowsToUse(assignExpr.getRValue(), _, source, isLocal) and
+ not stackReferenceFlowsToUse(assignExpr.getLValue(), _, _, _)
+}
+
+from Expr use, Expr source, boolean isLocal, string msg, string srcStr
+where
+ stackAddressEscapes(use, source, isLocal) and
+ if isLocal = true
+ then (
+ msg = "A stack address ($@) may be assigned to a non-local variable." and
+ srcStr = "source"
+ ) else (
+ msg = "A stack address which arrived via a $@ may be assigned to a non-local variable." and
+ srcStr = "parameter"
+ )
+select use, msg, source, srcStr
diff --git a/repo-tests/codeql/cpp/ql/src/Likely Bugs/Memory Management/StrncpyFlippedArgs.ql b/repo-tests/codeql/cpp/ql/src/Likely Bugs/Memory Management/StrncpyFlippedArgs.ql
new file mode 100644
index 00000000000..8e7bc5bfcf4
--- /dev/null
+++ b/repo-tests/codeql/cpp/ql/src/Likely Bugs/Memory Management/StrncpyFlippedArgs.ql
@@ -0,0 +1,130 @@
+/**
+ * @name Possibly wrong buffer size in string copy
+ * @description Calling 'strncpy' with the size of the source buffer
+ * as the third argument may result in a buffer overflow.
+ * @kind problem
+ * @problem.severity warning
+ * @security-severity 9.3
+ * @precision medium
+ * @id cpp/bad-strncpy-size
+ * @tags reliability
+ * correctness
+ * security
+ * external/cwe/cwe-676
+ * external/cwe/cwe-119
+ * external/cwe/cwe-251
+ */
+
+import cpp
+import Buffer
+private import semmle.code.cpp.valuenumbering.GlobalValueNumbering
+
+predicate isSizePlus(Expr e, BufferSizeExpr baseSize, int plus) {
+ // baseSize
+ e = baseSize and plus = 0
+ or
+ exists(AddExpr ae, Expr operand1, Expr operand2, int plusSub |
+ // baseSize + n or n + baseSize
+ ae = e and
+ operand1 = ae.getAnOperand() and
+ operand2 = ae.getAnOperand() and
+ operand1 != operand2 and
+ isSizePlus(operand1, baseSize, plusSub) and
+ plus = plusSub + operand2.getValue().toInt()
+ )
+ or
+ exists(SubExpr se, int plusSub |
+ // baseSize - n
+ se = e and
+ isSizePlus(se.getLeftOperand(), baseSize, plusSub) and
+ plus = plusSub - se.getRightOperand().getValue().toInt()
+ )
+}
+
+predicate strncpyFunction(Function f, int argDest, int argSrc, int argLimit) {
+ exists(string name | name = f.getName() |
+ (
+ name = "strcpy_s" or // strcpy_s(dst, max_amount, src)
+ name = "wcscpy_s" or // wcscpy_s(dst, max_amount, src)
+ name = "_mbscpy_s" // _mbscpy_s(dst, max_amount, src)
+ ) and
+ argDest = 0 and
+ argSrc = 2 and
+ argLimit = 1
+ or
+ (
+ name = "strncpy" or // strncpy(dst, src, max_amount)
+ name = "strncpy_l" or // strncpy_l(dst, src, max_amount, locale)
+ name = "wcsncpy" or // wcsncpy(dst, src, max_amount)
+ name = "_wcsncpy_l" or // _wcsncpy_l(dst, src, max_amount, locale)
+ name = "_mbsncpy" or // _mbsncpy(dst, src, max_amount)
+ name = "_mbsncpy_l" // _mbsncpy_l(dst, src, max_amount, locale)
+ ) and
+ argDest = 0 and
+ argSrc = 1 and
+ argLimit = 2
+ )
+}
+
+string nthString(int num) {
+ num = 0 and
+ result = "first"
+ or
+ num = 1 and
+ result = "second"
+ or
+ num = 2 and
+ result = "third"
+}
+
+/**
+ * Gets the size of the expression, if it is initialized
+ * with a fixed size array.
+ */
+int arrayExprFixedSize(Expr e) {
+ result = e.getUnspecifiedType().(ArrayType).getSize()
+ or
+ result = e.(NewArrayExpr).getAllocatedType().(ArrayType).getSize()
+ or
+ exists(SsaDefinition def, LocalVariable v |
+ not e.getUnspecifiedType() instanceof ArrayType and
+ e = def.getAUse(v) and
+ result = arrayExprFixedSize(def.getDefiningValue(v))
+ )
+}
+
+from
+ Function f, FunctionCall fc, int argDest, int argSrc, int argLimit, int charSize, Access copyDest,
+ Access copySource, string name, string nth
+where
+ f = fc.getTarget() and
+ strncpyFunction(f, argDest, argSrc, argLimit) and
+ copyDest = fc.getArgument(argDest) and
+ copySource = fc.getArgument(argSrc) and
+ // Some of the functions operate on a larger char type, like `wchar_t`, so we
+ // need to take this into account in the fixed size case.
+ charSize = f.getParameter(argDest).getUnspecifiedType().(PointerType).getBaseType().getSize() and
+ (
+ if exists(fc.getArgument(argLimit).getValue().toInt())
+ then
+ // Fixed sized case
+ exists(int size |
+ size = arrayExprFixedSize(copyDest) and
+ size < charSize * fc.getArgument(argLimit).getValue().toInt() and
+ size != 0 // if the array has zero size, something special is going on
+ )
+ else
+ exists(Access takenSizeOf, BufferSizeExpr sizeExpr, int plus |
+ // Variable sized case
+ sizeExpr = fc.getArgument(argLimit).getAChild*() and
+ isSizePlus(fc.getArgument(argLimit), sizeExpr, plus) and
+ plus >= 0 and
+ takenSizeOf = sizeExpr.getArg() and
+ globalValueNumber(copySource) = globalValueNumber(takenSizeOf) and // e.g. strncpy(x, y, strlen(y))
+ globalValueNumber(copyDest) != globalValueNumber(takenSizeOf) // e.g. strncpy(y, y, strlen(y))
+ )
+ ) and
+ name = fc.getTarget().getName() and
+ nth = nthString(argLimit)
+select fc,
+ "Potentially unsafe call to " + name + "; " + nth + " argument should be size of destination."
diff --git a/repo-tests/codeql/cpp/ql/src/Likely Bugs/Memory Management/SuspiciousCallToMemset.ql b/repo-tests/codeql/cpp/ql/src/Likely Bugs/Memory Management/SuspiciousCallToMemset.ql
new file mode 100644
index 00000000000..8e41b414794
--- /dev/null
+++ b/repo-tests/codeql/cpp/ql/src/Likely Bugs/Memory Management/SuspiciousCallToMemset.ql
@@ -0,0 +1,138 @@
+/**
+ * @name Suspicious call to memset
+ * @description Use of memset where the size argument is computed as the size of
+ * some non-struct type. When initializing a buffer, you should specify
+ * its size as * to ensure
+ * portability.
+ * @kind problem
+ * @id cpp/suspicious-call-to-memset
+ * @problem.severity recommendation
+ * @security-severity 10.0
+ * @precision medium
+ * @tags reliability
+ * correctness
+ * security
+ * external/cwe/cwe-676
+ */
+
+import cpp
+
+/**
+ * Holds if `e` is a `sizeof` expression on type `t`, with
+ * optional multiplication by a constant.
+ */
+predicate sizeOfExpr(Expr e, Type t) {
+ t = e.(SizeofTypeOperator).getTypeOperand()
+ or
+ t = e.(SizeofExprOperator).getExprOperand().getType()
+ or
+ sizeOfExpr(e.(MulExpr).getAnOperand(), t) and
+ e.(MulExpr).getAnOperand() instanceof Literal
+}
+
+/**
+ * Gets the type `t` with typedefs, array types and references removed.
+ *
+ * This is similar to `Type.stripType` except that it doesn't remove
+ * a `PointerType`.
+ */
+Type stripType(Type t) {
+ result = stripType(t.(TypedefType).getBaseType())
+ or
+ result = stripType(t.(ArrayType).getBaseType())
+ or
+ result = stripType(t.(ReferenceType).getBaseType())
+ or
+ result = stripType(t.(SpecifiedType).getBaseType())
+ or
+ result = stripType(t.(Decltype).getBaseType())
+ or
+ not t instanceof TypedefType and
+ not t instanceof ArrayType and
+ not t instanceof ReferenceType and
+ not t instanceof SpecifiedType and
+ not t instanceof Decltype and
+ result = t
+}
+
+/**
+ * Holds if `t` points to `base` via a specified number of levels of pointer
+ * indirection. Intermediate typedefs and array types are allowed. Note that
+ * `base` is a stripped type (via `stripType`).
+ */
+predicate pointerIndirection(Type t, int indirection, Type base) {
+ base = stripType(t) and
+ not base instanceof PointerType and
+ indirection = 0
+ or
+ pointerIndirection(stripType(t).(PointerType).getBaseType(), indirection - 1, base)
+}
+
+/**
+ * Holds if `t` points to a non-pointer, non-array type via a specified number
+ * of levels of pointer indirection. Intermediate typedefs and array types are
+ * allowed.
+ */
+predicate pointerIndirection2(Type t, int indirection) {
+ not stripType(t) instanceof PointerType and
+ indirection = 0
+ or
+ pointerIndirection2(stripType(t).(PointerType).getBaseType(), indirection - 1)
+}
+
+/**
+ * Holds if `memset(dataArg, _, sizeArg)`, where `sizeArg` has the form
+ * `sizeof(type)`, could be reasonable.
+ */
+predicate reasonableMemset(FunctionCall fc) {
+ exists(Expr dataArg, Expr sizeArg |
+ dataArg = fc.getArgument(0) and
+ sizeArg = fc.getArgument(2) and
+ exists(Type dataType, Type sizeOfType |
+ dataType = dataArg.getType() and
+ sizeOfExpr(sizeArg, sizeOfType) and
+ exists(int i |
+ exists(Type base |
+ // memset(&t, _, sizeof(t))
+ pointerIndirection(dataType, i + 1, base) and
+ pointerIndirection(sizeOfType, i, base)
+ )
+ or
+ exists(Type base |
+ // memset(t[n], _, sizeof(t))
+ pointerIndirection(dataType.getUnspecifiedType().(ArrayType), i, base) and
+ pointerIndirection(sizeOfType, i, base)
+ )
+ or
+ exists(VoidType vt |
+ // memset(void *, _, sizeof(t))
+ pointerIndirection(dataType, i + 1, vt) and
+ pointerIndirection2(sizeOfType, i)
+ )
+ or
+ exists(Type ct |
+ // memset(char *, _, sizeof(t)) and similar
+ ct.getSize() = 1 and
+ pointerIndirection(dataType, i + 1, ct) and
+ pointerIndirection2(sizeOfType, i)
+ )
+ or
+ exists(Type ct |
+ // memset(char [], _, sizeof(t)) and similar
+ ct.getSize() = 1 and
+ pointerIndirection(dataType.getUnspecifiedType().(ArrayType), i, ct) and
+ pointerIndirection2(sizeOfType, i)
+ )
+ )
+ )
+ )
+}
+
+from FunctionCall fc, Type t
+where
+ fc.getTarget().hasName("memset") and
+ sizeOfExpr(fc.getArgument(2), t) and
+ not reasonableMemset(fc)
+select fc,
+ "The size of the memory area set by memset should not be the size of the type " + t.getName() +
+ "."
diff --git a/repo-tests/codeql/cpp/ql/src/Likely Bugs/Memory Management/SuspiciousCallToStrncat.ql b/repo-tests/codeql/cpp/ql/src/Likely Bugs/Memory Management/SuspiciousCallToStrncat.ql
new file mode 100644
index 00000000000..644c48622a2
--- /dev/null
+++ b/repo-tests/codeql/cpp/ql/src/Likely Bugs/Memory Management/SuspiciousCallToStrncat.ql
@@ -0,0 +1,69 @@
+/**
+ * @name Potentially unsafe call to strncat
+ * @description Calling 'strncat' with an incorrect size argument may result in a buffer overflow.
+ * @kind problem
+ * @problem.severity warning
+ * @security-severity 9.3
+ * @precision medium
+ * @id cpp/unsafe-strncat
+ * @tags reliability
+ * correctness
+ * security
+ * external/cwe/cwe-788
+ * external/cwe/cwe-676
+ * external/cwe/cwe-119
+ * external/cwe/cwe-251
+ */
+
+import cpp
+import Buffer
+import semmle.code.cpp.models.implementations.Strcat
+import semmle.code.cpp.valuenumbering.GlobalValueNumbering
+
+/**
+ * Holds if `call` is a call to `strncat` such that `sizeArg` and `destArg` are the size and
+ * destination arguments, respectively.
+ */
+predicate interestringCallWithArgs(Call call, Expr sizeArg, Expr destArg) {
+ exists(StrcatFunction strcat |
+ strcat = call.getTarget() and
+ sizeArg = call.getArgument(strcat.getParamSize()) and
+ destArg = call.getArgument(strcat.getParamDest())
+ )
+}
+
+/**
+ * Holds if `fc` is a call to `strncat` with size argument `sizeArg` and destination
+ * argument `destArg`, and `destArg` is the size of the buffer pointed to by `destArg`.
+ */
+predicate case1(FunctionCall fc, Expr sizeArg, VariableAccess destArg) {
+ interestringCallWithArgs(fc, sizeArg, destArg) and
+ exists(VariableAccess va |
+ va = sizeArg.(BufferSizeExpr).getArg() and
+ destArg.getTarget() = va.getTarget()
+ )
+}
+
+/**
+ * Holds if `fc` is a call to `strncat` with size argument `sizeArg` and destination
+ * argument `destArg`, and `sizeArg` computes the value `sizeof (dest) - strlen (dest)`.
+ */
+predicate case2(FunctionCall fc, Expr sizeArg, VariableAccess destArg) {
+ interestringCallWithArgs(fc, sizeArg, destArg) and
+ exists(SubExpr sub, int n |
+ // The destination buffer is an array of size n
+ destArg.getUnspecifiedType().(ArrayType).getSize() = n and
+ // The size argument is equivalent to a subtraction
+ globalValueNumber(sizeArg).getAnExpr() = sub and
+ // ... where the left side of the subtraction is the constant n
+ globalValueNumber(sub.getLeftOperand()).getAnExpr().getValue().toInt() = n and
+ // ... and the right side of the subtraction is a call to `strlen` where the argument is the
+ // destination buffer.
+ globalValueNumber(sub.getRightOperand()).getAnExpr().(StrlenCall).getStringExpr() =
+ globalValueNumber(destArg).getAnExpr()
+ )
+}
+
+from FunctionCall fc, Expr sizeArg, Expr destArg
+where case1(fc, sizeArg, destArg) or case2(fc, sizeArg, destArg)
+select fc, "Potentially unsafe call to strncat."
diff --git a/repo-tests/codeql/cpp/ql/src/Likely Bugs/Memory Management/SuspiciousSizeof.ql b/repo-tests/codeql/cpp/ql/src/Likely Bugs/Memory Management/SuspiciousSizeof.ql
new file mode 100644
index 00000000000..a80af562bda
--- /dev/null
+++ b/repo-tests/codeql/cpp/ql/src/Likely Bugs/Memory Management/SuspiciousSizeof.ql
@@ -0,0 +1,38 @@
+/**
+ * @name Suspicious 'sizeof' use
+ * @description Taking 'sizeof' of an array parameter is often mistakenly thought
+ * to yield the size of the underlying array, but it always yields
+ * the machine pointer size.
+ * @kind problem
+ * @problem.severity warning
+ * @security-severity 8.8
+ * @precision medium
+ * @id cpp/suspicious-sizeof
+ * @tags reliability
+ * correctness
+ * security
+ * external/cwe/cwe-467
+ */
+
+import cpp
+
+class CandidateParameter extends Parameter {
+ CandidateParameter() {
+ // an array parameter
+ getUnspecifiedType() instanceof ArrayType
+ or
+ // a pointer parameter
+ getUnspecifiedType() instanceof PointerType and
+ // whose address is never taken (rules out common
+ // false positive patterns)
+ not exists(AddressOfExpr aoe | aoe.getAddressable() = this)
+ }
+}
+
+from SizeofExprOperator seo, VariableAccess va
+where
+ seo.getExprOperand() = va and
+ va.getTarget() instanceof CandidateParameter and
+ not va.isAffectedByMacro() and
+ not va.isCompilerGenerated()
+select seo, "This evaluates to the size of the pointer type, which may not be what you want."
diff --git a/repo-tests/codeql/cpp/ql/src/Likely Bugs/Memory Management/UninitializedLocal.ql b/repo-tests/codeql/cpp/ql/src/Likely Bugs/Memory Management/UninitializedLocal.ql
new file mode 100644
index 00000000000..dd87b7883a0
--- /dev/null
+++ b/repo-tests/codeql/cpp/ql/src/Likely Bugs/Memory Management/UninitializedLocal.ql
@@ -0,0 +1,84 @@
+/**
+ * @name Potentially uninitialized local variable
+ * @description Reading from a local variable that has not been assigned to
+ * will typically yield garbage.
+ * @kind problem
+ * @id cpp/uninitialized-local
+ * @problem.severity warning
+ * @security-severity 7.8
+ * @precision medium
+ * @tags security
+ * external/cwe/cwe-665
+ * external/cwe/cwe-457
+ */
+
+import cpp
+import semmle.code.cpp.controlflow.StackVariableReachability
+
+/**
+ * Auxiliary predicate: Types that don't require initialization
+ * before they are used, since they're stack-allocated.
+ */
+predicate allocatedType(Type t) {
+ /* Arrays: "int foo[1]; foo[0] = 42;" is ok. */
+ t instanceof ArrayType
+ or
+ /* Structs: "struct foo bar; bar.baz = 42" is ok. */
+ t instanceof Class
+ or
+ /* Typedefs to other allocated types are fine. */
+ allocatedType(t.(TypedefType).getUnderlyingType())
+ or
+ /* Type specifiers don't affect whether or not a type is allocated. */
+ allocatedType(t.getUnspecifiedType())
+}
+
+/**
+ * A declaration of a local variable that leaves the
+ * variable uninitialized.
+ */
+DeclStmt declWithNoInit(LocalVariable v) {
+ result.getADeclaration() = v and
+ not exists(v.getInitializer()) and
+ /* The type of the variable is not stack-allocated. */
+ exists(Type t | t = v.getType() | not allocatedType(t))
+}
+
+class UninitialisedLocalReachability extends StackVariableReachability {
+ UninitialisedLocalReachability() { this = "UninitialisedLocal" }
+
+ override predicate isSource(ControlFlowNode node, StackVariable v) { node = declWithNoInit(v) }
+
+ override predicate isSink(ControlFlowNode node, StackVariable v) { useOfVarActual(v, node) }
+
+ override predicate isBarrier(ControlFlowNode node, StackVariable v) {
+ // only report the _first_ possibly uninitialized use
+ useOfVarActual(v, node) or
+ definitionBarrier(v, node)
+ }
+}
+
+pragma[noinline]
+predicate containsInlineAssembly(Function f) { exists(AsmStmt s | s.getEnclosingFunction() = f) }
+
+/**
+ * Auxiliary predicate: List common exceptions or false positives
+ * for this check to exclude them.
+ */
+VariableAccess commonException() {
+ // If the uninitialized use we've found is in a macro expansion, it's
+ // typically something like va_start(), and we don't want to complain.
+ result.getParent().isInMacroExpansion()
+ or
+ result.getParent() instanceof BuiltInOperation
+ or
+ // Finally, exclude functions that contain assembly blocks. It's
+ // anyone's guess what happens in those.
+ containsInlineAssembly(result.getEnclosingFunction())
+}
+
+from UninitialisedLocalReachability r, LocalVariable v, VariableAccess va
+where
+ r.reaches(_, v, va) and
+ not va = commonException()
+select va, "The variable $@ may not be initialized here.", v, v.getName()
diff --git a/repo-tests/codeql/cpp/ql/src/Likely Bugs/Memory Management/UnsafeUseOfStrcat.ql b/repo-tests/codeql/cpp/ql/src/Likely Bugs/Memory Management/UnsafeUseOfStrcat.ql
new file mode 100644
index 00000000000..d0b0f7f1e71
--- /dev/null
+++ b/repo-tests/codeql/cpp/ql/src/Likely Bugs/Memory Management/UnsafeUseOfStrcat.ql
@@ -0,0 +1,49 @@
+/**
+ * @name Potentially unsafe use of strcat
+ * @description Using 'strcat' without checking the size of the source string
+ * may result in a buffer overflow
+ * @kind problem
+ * @problem.severity warning
+ * @security-severity 9.8
+ * @precision medium
+ * @id cpp/unsafe-strcat
+ * @tags reliability
+ * correctness
+ * security
+ * external/cwe/cwe-676
+ * external/cwe/cwe-120
+ * external/cwe/cwe-251
+ */
+
+import cpp
+import Buffer
+
+/**
+ * An access to a variable that is initialized by a constant
+ * expression, and is never used as an lvalue anywhere else.
+ */
+predicate isEffectivelyConstAccess(VariableAccess a) {
+ exists(Variable v |
+ a.getTarget() = v and
+ v.getInitializer().getExpr().isConstant() and
+ not v.getAnAccess().isUsedAsLValue()
+ )
+}
+
+class StrcatSource extends VariableAccess {
+ FunctionCall strcat;
+
+ StrcatSource() {
+ strcat.getTarget().hasName("strcat") and
+ this = strcat.getArgument(1)
+ }
+
+ FunctionCall getStrcatCall() { result = strcat }
+}
+
+from StrcatSource src
+where
+ not src.getType() instanceof ArrayType and
+ not exists(BufferSizeExpr bse | bse.getArg().(VariableAccess).getTarget() = src.getTarget()) and
+ not isEffectivelyConstAccess(src)
+select src.getStrcatCall(), "Always check the size of the source buffer when using strcat."
diff --git a/repo-tests/codeql/cpp/ql/src/Likely Bugs/NestedLoopSameVar.ql b/repo-tests/codeql/cpp/ql/src/Likely Bugs/NestedLoopSameVar.ql
new file mode 100644
index 00000000000..7947472259e
--- /dev/null
+++ b/repo-tests/codeql/cpp/ql/src/Likely Bugs/NestedLoopSameVar.ql
@@ -0,0 +1,19 @@
+/**
+ * @name Nested loops with same variable
+ * @description When a nested loop uses the same iteration variable as its outer loop, the
+ * behavior of the outer loop easily becomes difficult to understand as the
+ * inner loop will affect its control flow. It is likely to be a typo.
+ * @kind problem
+ * @problem.severity warning
+ * @precision medium
+ * @id cpp/nested-loops-with-same-variable
+ * @tags maintainability
+ * correctness
+ */
+
+import NestedLoopSameVar
+
+from ForStmt inner, Variable iteration, ForStmt outer
+where nestedForViolation(inner, iteration, outer)
+select inner.getCondition(), "Nested for statement uses loop variable $@ of enclosing $@.",
+ iteration, iteration.getName(), outer, "for statement"
diff --git a/repo-tests/codeql/cpp/ql/src/Likely Bugs/NestedLoopSameVar.qll b/repo-tests/codeql/cpp/ql/src/Likely Bugs/NestedLoopSameVar.qll
new file mode 100644
index 00000000000..09ac2055fa2
--- /dev/null
+++ b/repo-tests/codeql/cpp/ql/src/Likely Bugs/NestedLoopSameVar.qll
@@ -0,0 +1,36 @@
+/**
+ * Provides the implementation of the query 'Nested loops with same variable'.
+ */
+
+import cpp
+
+/**
+ * An access to a field of the form `object.field`.
+ */
+predicate simpleFieldAccess(Variable object, Variable field, VariableAccess access) {
+ access.getTarget() = field and
+ access.getQualifier().(VariableAccess).getTarget() = object
+}
+
+/**
+ * Holds if `inner` and `outer` are nested for statements that
+ * use the same loop variable `iteration`.
+ */
+predicate nestedForViolation(ForStmt inner, Variable iteration, ForStmt outer) {
+ // same variable
+ iteration = inner.getAnIterationVariable() and
+ iteration = outer.getAnIterationVariable() and
+ // field accesses must have the same object
+ (
+ iteration instanceof Field
+ implies
+ exists(Variable obj |
+ simpleFieldAccess(obj, iteration, inner.getCondition().getAChild*()) and
+ simpleFieldAccess(obj, iteration, outer.getCondition().getAChild*())
+ )
+ ) and
+ // ordinary nested loops
+ exists(inner.getInitialization()) and
+ inner.getParent+() = outer and
+ inner.getASuccessor+() = outer.getCondition()
+}
diff --git a/repo-tests/codeql/cpp/ql/src/Likely Bugs/OO/IncorrectConstructorDelegation.ql b/repo-tests/codeql/cpp/ql/src/Likely Bugs/OO/IncorrectConstructorDelegation.ql
new file mode 100644
index 00000000000..38cd86a594d
--- /dev/null
+++ b/repo-tests/codeql/cpp/ql/src/Likely Bugs/OO/IncorrectConstructorDelegation.ql
@@ -0,0 +1,23 @@
+/**
+ * @name Incorrect constructor delegation
+ * @description A constructor in C++ cannot delegate part of the object
+ * initialization to another by calling it. This is likely to
+ * leave part of the object uninitialized.
+ * @kind problem
+ * @problem.severity warning
+ * @precision high
+ * @id cpp/constructor-delegation
+ * @tags maintainability
+ * readability
+ * language-features
+ */
+
+import cpp
+
+from FunctionCall call
+where
+ call.getTarget() = call.getEnclosingFunction().(Constructor).getDeclaringType().getAConstructor() and
+ call.getParent() instanceof ExprStmt
+select call,
+ "The constructor " + call.getTarget().getName() +
+ " may leave the instance uninitialized, as it tries to delegate to another constructor."
diff --git a/repo-tests/codeql/cpp/ql/src/Likely Bugs/OO/NonVirtualDestructor.ql b/repo-tests/codeql/cpp/ql/src/Likely Bugs/OO/NonVirtualDestructor.ql
new file mode 100644
index 00000000000..e5c364bc855
--- /dev/null
+++ b/repo-tests/codeql/cpp/ql/src/Likely Bugs/OO/NonVirtualDestructor.ql
@@ -0,0 +1,23 @@
+/**
+ * @name Non-virtual destructor
+ * @description When a class and its derived class both define non-virtual
+ * destructors, the destructor of the derived class may not
+ * always be called.
+ * @kind problem
+ * @id cpp/non-virtual-destructor
+ * @problem.severity warning
+ * @tags reliability
+ * @deprecated This query is deprecated, and replaced by
+ * No virtual destructor (`cpp/jsf/av-rule-78`), which has far
+ * fewer false positives on typical code.
+ */
+
+import cpp
+
+from Class base, Destructor d1, Class derived, Destructor d2
+where
+ derived.getABaseClass+() = base and
+ d1.getDeclaringType() = base and
+ not d1.isVirtual() and
+ d2.getDeclaringType() = derived
+select d1, "This destructor should probably be virtual."
diff --git a/repo-tests/codeql/cpp/ql/src/Likely Bugs/OO/NonVirtualDestructorInBaseClass.ql b/repo-tests/codeql/cpp/ql/src/Likely Bugs/OO/NonVirtualDestructorInBaseClass.ql
new file mode 100644
index 00000000000..7a223f56f57
--- /dev/null
+++ b/repo-tests/codeql/cpp/ql/src/Likely Bugs/OO/NonVirtualDestructorInBaseClass.ql
@@ -0,0 +1,33 @@
+/**
+ * @name Non-virtual destructor in base class
+ * @description All base classes with a virtual function should define a virtual destructor. If an application attempts to delete a derived class object through a base class pointer, the result is undefined if the base class destructor is non-virtual.
+ * @kind problem
+ * @problem.severity warning
+ * @precision high
+ * @id cpp/virtual-destructor
+ * @tags reliability
+ * readability
+ * language-features
+ */
+
+import cpp
+
+/*
+ * Find classes with virtual functions that have a destructor that is not virtual and for which there exists a derived class
+ * when calling the destructor of a derived class the destructor in the base class may not be called
+ */
+
+from Class c
+where
+ exists(VirtualFunction f | f.getDeclaringType() = c) and
+ exists(Destructor d |
+ d.getDeclaringType() = c and
+ // Ignore non-public destructors, which prevent an object of the declaring class from being deleted
+ // directly (except from within the class itself). This is a common pattern in real-world code.
+ d.hasSpecifier("public") and
+ not d.isVirtual() and
+ not d.isDeleted() and
+ not d.isCompilerGenerated()
+ ) and
+ exists(ClassDerivation d | d.getBaseClass() = c)
+select c, "A base class with a virtual function should define a virtual destructor."
diff --git a/repo-tests/codeql/cpp/ql/src/Likely Bugs/OO/SelfAssignmentCheck.ql b/repo-tests/codeql/cpp/ql/src/Likely Bugs/OO/SelfAssignmentCheck.ql
new file mode 100644
index 00000000000..9ddf445f4bf
--- /dev/null
+++ b/repo-tests/codeql/cpp/ql/src/Likely Bugs/OO/SelfAssignmentCheck.ql
@@ -0,0 +1,26 @@
+/**
+ * @name Self assignment check
+ * @description Copy assignment operators should guard against self assignment;
+ * otherwise, self assignment is likely to cause memory
+ * corruption.
+ * @kind problem
+ * @id cpp/self-assignment-check
+ * @problem.severity warning
+ * @security-severity 7.0
+ * @tags reliability
+ * security
+ * external/cwe/cwe-826
+ */
+
+import cpp
+
+// find copy assignment operators that deallocate memory but do not check for self assignment
+from CopyAssignmentOperator cao
+where
+ exists(DestructorCall d | d.getEnclosingFunction() = cao) and
+ not exists(EqualityOperation eq |
+ eq.getEnclosingFunction() = cao and
+ eq.getAChild() instanceof ThisExpr and
+ eq.getAChild().(AddressOfExpr).getAddressable() = cao.getParameter(0)
+ )
+select cao, "Copy assignment operator does not check for self assignment."
diff --git a/repo-tests/codeql/cpp/ql/src/Likely Bugs/OO/ThrowInDestructor.ql b/repo-tests/codeql/cpp/ql/src/Likely Bugs/OO/ThrowInDestructor.ql
new file mode 100644
index 00000000000..75252eb4509
--- /dev/null
+++ b/repo-tests/codeql/cpp/ql/src/Likely Bugs/OO/ThrowInDestructor.ql
@@ -0,0 +1,75 @@
+/**
+ * @name Exception thrown in destructor
+ * @description Throwing an exception from a destructor may cause immediate
+ * program termination.
+ * @kind problem
+ * @problem.severity warning
+ * @precision very-high
+ * @id cpp/throw-in-destructor
+ * @tags reliability
+ * readability
+ * language-features
+ */
+
+import cpp
+
+// This predicate finds the catch block enclosing a rethrow expression.
+predicate bindEnclosingCatch(ReThrowExpr te, CatchBlock cb) {
+ te.getEnclosingBlock().getEnclosingBlock*() = cb and
+ not exists(CatchBlock other |
+ te.getEnclosingBlock().getEnclosingBlock*() = other and other.getEnclosingBlock+() = cb
+ )
+}
+
+// This predicate strips references from types, i.e. T -> T, T* -> T*, T& -> T.
+predicate bindStrippedReferenceType(Type qualified, Type unqualified) {
+ not qualified instanceof ReferenceType and unqualified = qualified
+ or
+ unqualified = qualified.(ReferenceType).getBaseType()
+}
+
+// This predicate determines (to a first approximation) the type thrown by a throw or rethrow expression.
+predicate bindThrownType(ThrowExpr te, Type thrown) {
+ // For normal throws, the thrown type is easily determined as the type of the throw expression.
+ not te instanceof ReThrowExpr and thrown = te.getActualType()
+ or
+ // For rethrows, we use the unqualified version of the type caught by the enclosing catch block.
+ // Note that this is not precise, but is a reasonable first approximation.
+ exists(CatchBlock cb |
+ bindEnclosingCatch(te, cb) and
+ bindStrippedReferenceType(cb.getParameter().getUnspecifiedType(), thrown)
+ )
+}
+
+// This predicate determines the catch blocks that can catch the exceptions thrown by each throw expression.
+pragma[inline]
+predicate canCatch(ThrowExpr te, CatchBlock cb) {
+ exists(Type thrown, Type caught |
+ bindThrownType(te, thrown) and
+ caught = cb.getParameter().getUnspecifiedType() and
+ not bindEnclosingCatch(te, cb) and
+ (
+ // Catching primitives by value or reference
+ bindStrippedReferenceType(caught, thrown)
+ or
+ // Catching class types by value or reference
+ exists(Class c | c = thrown and bindStrippedReferenceType(caught, c.getABaseClass*()))
+ )
+ )
+}
+
+// Find throw expressions such that there is a path in the control flow graph from the expression to
+// the end of the destructor without an intervening catch block that can catch the type thrown.
+from Destructor d, ThrowExpr te
+where
+ te.getEnclosingFunction() = d and
+ not exists(CatchBlock cb |
+ te.getASuccessor+() = cb and
+ cb.getASuccessor+() = d
+ |
+ canCatch(te, cb)
+ or
+ // Catch anything -- written as `catch(...)`.
+ not exists(cb.getParameter())
+ )
+select te, "Exception thrown in destructor."
diff --git a/repo-tests/codeql/cpp/ql/src/Likely Bugs/OO/UnsafeUseOfThis.ql b/repo-tests/codeql/cpp/ql/src/Likely Bugs/OO/UnsafeUseOfThis.ql
new file mode 100644
index 00000000000..04325e8497e
--- /dev/null
+++ b/repo-tests/codeql/cpp/ql/src/Likely Bugs/OO/UnsafeUseOfThis.ql
@@ -0,0 +1,214 @@
+/**
+ * @name Unsafe use of this in constructor
+ * @description A call to a pure virtual function using a 'this'
+ * pointer of an object that is under construction
+ * may lead to undefined behavior.
+ * @kind path-problem
+ * @id cpp/unsafe-use-of-this
+ * @problem.severity error
+ * @security-severity 7.5
+ * @precision very-high
+ * @tags correctness
+ * language-features
+ * security
+ * external/cwe/cwe-670
+ */
+
+import cpp
+// We don't actually use the global value numbering library in this query, but without it we end up
+// recomputing the IR.
+private import semmle.code.cpp.valuenumbering.GlobalValueNumbering
+private import semmle.code.cpp.ir.IR
+
+bindingset[n, result]
+int unbind(int n) { result >= n and result <= n }
+
+/** Holds if `p` is the `n`'th parameter of the non-virtual function `f`. */
+predicate parameterOf(Parameter p, Function f, int n) {
+ not f.isVirtual() and f.getParameter(n) = p
+}
+
+/**
+ * Holds if `instr` is the `n`'th argument to a call to the non-virtual function `f`, and
+ * `init` is the corresponding initiazation instruction that receives the value of `instr` in `f`.
+ */
+predicate flowIntoParameter(
+ CallInstruction call, Instruction instr, Function f, int n, InitializeParameterInstruction init
+) {
+ not f.isVirtual() and
+ call.getPositionalArgument(n) = instr and
+ f = call.getStaticCallTarget() and
+ getEnclosingNonVirtualFunctionInitializeParameter(init, f) and
+ init.getParameter().getIndex() = unbind(n)
+}
+
+/**
+ * Holds if `instr` is an argument to a call to the function `f`, and `init` is the
+ * corresponding initialization instruction that receives the value of `instr` in `f`.
+ */
+pragma[noinline]
+predicate getPositionalArgumentInitParam(
+ CallInstruction call, Instruction instr, InitializeParameterInstruction init, Function f
+) {
+ exists(int n |
+ parameterOf(_, f, n) and
+ flowIntoParameter(call, instr, f, unbind(n), init)
+ )
+}
+
+/**
+ * Holds if `instr` is the qualifier to a call to the non-virtual function `f`, and
+ * `init` is the corresponding initiazation instruction that receives the value of
+ * `instr` in `f`.
+ */
+pragma[noinline]
+predicate getThisArgumentInitParam(
+ CallInstruction call, Instruction instr, InitializeParameterInstruction init, Function f
+) {
+ not f.isVirtual() and
+ call.getStaticCallTarget() = f and
+ getEnclosingNonVirtualFunctionInitializeParameter(init, f) and
+ call.getThisArgument() = instr and
+ init.getIRVariable() instanceof IRThisVariable
+}
+
+/** Holds if `instr` is a `this` pointer used by the call instruction `call`. */
+predicate isSink(Instruction instr, CallInstruction call) {
+ exists(PureVirtualFunction func |
+ call.getStaticCallTarget() = func and
+ call.getThisArgument() = instr and
+ // Weed out implicit calls to destructors of a base class
+ not func instanceof Destructor
+ )
+}
+
+/** Holds if `init` initializes the `this` pointer in class `c`. */
+predicate isSource(InitializeParameterInstruction init, string msg, Class c) {
+ (
+ exists(Constructor func |
+ not func instanceof CopyConstructor and
+ not func instanceof MoveConstructor and
+ func = init.getEnclosingFunction() and
+ msg = "construction"
+ )
+ or
+ init.getEnclosingFunction() instanceof Destructor and msg = "destruction"
+ ) and
+ init.getIRVariable() instanceof IRThisVariable and
+ init.getEnclosingFunction().getDeclaringType() = c
+}
+
+/**
+ * Holds if `instr` flows to a sink (which is a use of the value of `instr` as a `this` pointer).
+ */
+predicate flowsToSink(Instruction instr, Instruction sink) {
+ flowsFromSource(instr) and
+ (
+ isSink(instr, _) and instr = sink
+ or
+ exists(Instruction mid |
+ successor(instr, mid) and
+ flowsToSink(mid, sink)
+ )
+ )
+}
+
+/** Holds if `instr` flows from a source. */
+predicate flowsFromSource(Instruction instr) {
+ isSource(instr, _, _)
+ or
+ exists(Instruction mid |
+ successor(mid, instr) and
+ flowsFromSource(mid)
+ )
+}
+
+/** Holds if `f` is the enclosing non-virtual function of `init`. */
+predicate getEnclosingNonVirtualFunctionInitializeParameter(
+ InitializeParameterInstruction init, Function f
+) {
+ not f.isVirtual() and
+ init.getEnclosingFunction() = f
+}
+
+/** Holds if `f` is the enclosing non-virtual function of `init`. */
+predicate getEnclosingNonVirtualFunctionInitializeIndirection(
+ InitializeIndirectionInstruction init, Function f
+) {
+ not f.isVirtual() and
+ init.getEnclosingFunction() = f
+}
+
+/**
+ * Holds if `instr` is an argument (or argument indirection) to a call, and
+ * `succ` is the corresponding initialization instruction in the call target.
+ */
+predicate flowThroughCallable(Instruction instr, Instruction succ) {
+ // Flow from an argument to a parameter
+ exists(CallInstruction call, InitializeParameterInstruction init | init = succ |
+ getPositionalArgumentInitParam(call, instr, init, call.getStaticCallTarget())
+ or
+ getThisArgumentInitParam(call, instr, init, call.getStaticCallTarget())
+ )
+ or
+ // Flow from argument indirection to parameter indirection
+ exists(
+ CallInstruction call, ReadSideEffectInstruction read, InitializeIndirectionInstruction init
+ |
+ init = succ and
+ read.getPrimaryInstruction() = call and
+ getEnclosingNonVirtualFunctionInitializeIndirection(init, call.getStaticCallTarget())
+ |
+ exists(int n |
+ read.getSideEffectOperand().getAnyDef() = instr and
+ read.getIndex() = n and
+ init.getParameter().getIndex() = unbind(n)
+ )
+ or
+ call.getThisArgument() = instr and
+ init.getIRVariable() instanceof IRThisVariable
+ )
+}
+
+/** Holds if `instr` flows to `succ`. */
+predicate successor(Instruction instr, Instruction succ) {
+ succ.(CopyInstruction).getSourceValue() = instr or
+ succ.(CheckedConvertOrNullInstruction).getUnary() = instr or
+ succ.(ChiInstruction).getTotal() = instr or
+ succ.(ConvertInstruction).getUnary() = instr or
+ succ.(InheritanceConversionInstruction).getUnary() = instr or
+ flowThroughCallable(instr, succ)
+}
+
+/**
+ * Holds if:
+ * - `source` is an initialization of a `this` pointer of type `sourceClass`, and
+ * - `sink` is a use of the `this` pointer, and
+ * - `call` invokes a pure virtual function using `sink` as the `this` pointer, and
+ * - `msg` is a string describing whether `source` is from a constructor or destructor.
+ */
+predicate flows(
+ Instruction source, string msg, Class sourceClass, Instruction sink, CallInstruction call
+) {
+ isSource(source, msg, sourceClass) and
+ flowsToSink(source, sink) and
+ isSink(sink, call)
+}
+
+query predicate edges(Instruction a, Instruction b) { successor(a, b) and flowsToSink(b, _) }
+
+query predicate nodes(Instruction n, string key, string val) {
+ flowsToSink(n, _) and
+ key = "semmle.label" and
+ val = n.toString()
+}
+
+from Instruction source, Instruction sink, CallInstruction call, string msg, Class sourceClass
+where
+ flows(source, msg, sourceClass, sink, call) and
+ // Only raise an alert if there is no override of the pure virtual function in any base class.
+ not exists(Class c | c = sourceClass.getABaseClass*() |
+ c.getAMemberFunction().getAnOverriddenFunction() = call.getStaticCallTarget()
+ )
+select call.getUnconvertedResultExpression(), source, sink,
+ "Call to pure virtual function during " + msg
diff --git a/repo-tests/codeql/cpp/ql/src/Likely Bugs/OO/VirtualCallInStructor.ql b/repo-tests/codeql/cpp/ql/src/Likely Bugs/OO/VirtualCallInStructor.ql
new file mode 100644
index 00000000000..f27cad6098d
--- /dev/null
+++ b/repo-tests/codeql/cpp/ql/src/Likely Bugs/OO/VirtualCallInStructor.ql
@@ -0,0 +1,29 @@
+/**
+ * @name Virtual call in constructor or destructor
+ * @description Calling a virtual function from a constructor or destructor
+ * rarely has the intended effect. It is likely to either cause a
+ * bug or confuse readers.
+ * @kind problem
+ * @id cpp/virtual-call-in-structor
+ * @problem.severity warning
+ * @tags reliability
+ */
+
+import cpp
+
+class Structor extends MemberFunction {
+ Structor() {
+ this instanceof Constructor or
+ this instanceof Destructor
+ }
+}
+
+from Structor s, FunctionCall c, VirtualFunction vf
+where
+ c.getEnclosingFunction() = s and
+ vf = c.getTarget() and
+ exists(VirtualFunction vff |
+ vff.overrides(vf) and
+ vff.getDeclaringType().getABaseClass+() = s.getDeclaringType()
+ )
+select c, "Virtual call in constructor or destructor."
diff --git a/repo-tests/codeql/cpp/ql/src/Likely Bugs/Protocols/TlsSettingsMisconfiguration.ql b/repo-tests/codeql/cpp/ql/src/Likely Bugs/Protocols/TlsSettingsMisconfiguration.ql
new file mode 100644
index 00000000000..04b3d13a3f7
--- /dev/null
+++ b/repo-tests/codeql/cpp/ql/src/Likely Bugs/Protocols/TlsSettingsMisconfiguration.ql
@@ -0,0 +1,94 @@
+/**
+ * @name Boost_asio TLS Settings Misconfiguration
+ * @description Using the TLS or SSLv23 protocol from the boost::asio library, but not disabling deprecated protocols, or disabling minimum-recommended protocols.
+ * @kind problem
+ * @problem.severity error
+ * @id cpp/boost/tls-settings-misconfiguration
+ * @tags security
+ */
+
+import cpp
+import semmle.code.cpp.security.boostorg.asio.protocols
+
+class ExistsAnyFlowConfig extends DataFlow::Configuration {
+ ExistsAnyFlowConfig() { this = "ExistsAnyFlowConfig" }
+
+ override predicate isSource(DataFlow::Node source) {
+ exists(BoostorgAsio::SslContextClass c | c.getAContructorCall() = source.asExpr())
+ }
+
+ override predicate isSink(DataFlow::Node sink) {
+ exists(BoostorgAsio::SslSetOptionsFunction f, FunctionCall fcSetOptions |
+ f.getACallToThisFunction() = fcSetOptions and
+ fcSetOptions.getQualifier() = sink.asExpr()
+ )
+ }
+}
+
+bindingset[flag]
+predicate isOptionSet(ConstructorCall cc, int flag, FunctionCall fcSetOptions) {
+ exists(ExistsAnyFlowConfig anyFlowConfig, VariableAccess contextSetOptions |
+ anyFlowConfig.hasFlow(DataFlow::exprNode(cc), DataFlow::exprNode(contextSetOptions)) and
+ exists(BoostorgAsio::SslSetOptionsFunction f | f.getACallToThisFunction() = fcSetOptions |
+ contextSetOptions = fcSetOptions.getQualifier() and
+ forall(
+ Expr optionArgument, BoostorgAsio::SslOptionConfig optionArgConfig,
+ Expr optionArgumentSource
+ |
+ optionArgument = fcSetOptions.getArgument(0) and
+ optionArgConfig
+ .hasFlow(DataFlow::exprNode(optionArgumentSource), DataFlow::exprNode(optionArgument))
+ |
+ optionArgument.getValue().toInt().bitShiftRight(16).bitAnd(flag) = flag
+ )
+ )
+ )
+}
+
+bindingset[flag]
+predicate isOptionNotSet(ConstructorCall cc, int flag) {
+ not exists(FunctionCall fcSetOptions | isOptionSet(cc, flag, fcSetOptions))
+}
+
+from
+ BoostorgAsio::SslContextCallTlsProtocolConfig configConstructor, Expr protocolSource,
+ Expr protocolSink, ConstructorCall cc, Expr e, string msg
+where
+ configConstructor.hasFlow(DataFlow::exprNode(protocolSource), DataFlow::exprNode(protocolSink)) and
+ cc.getArgument(0) = protocolSink and
+ (
+ BoostorgAsio::isExprSslV23BoostProtocol(protocolSource) and
+ not (
+ isOptionSet(cc, BoostorgAsio::getShiftedSslOptionsNoSsl3(), _) and
+ isOptionSet(cc, BoostorgAsio::getShiftedSslOptionsNoTls1(), _) and
+ isOptionSet(cc, BoostorgAsio::getShiftedSslOptionsNoTls1_1(), _) and
+ isOptionNotSet(cc, BoostorgAsio::getShiftedSslOptionsNoTls1_2())
+ )
+ or
+ BoostorgAsio::isExprTlsBoostProtocol(protocolSource) and
+ not BoostorgAsio::isExprSslV23BoostProtocol(protocolSource) and
+ not (
+ isOptionSet(cc, BoostorgAsio::getShiftedSslOptionsNoTls1(), _) and
+ isOptionSet(cc, BoostorgAsio::getShiftedSslOptionsNoTls1_1(), _) and
+ isOptionNotSet(cc, BoostorgAsio::getShiftedSslOptionsNoTls1_2())
+ )
+ ) and
+ (
+ BoostorgAsio::isExprSslV23BoostProtocol(protocolSource) and
+ isOptionNotSet(cc, BoostorgAsio::getShiftedSslOptionsNoSsl3()) and
+ e = cc and
+ msg = "no_sslv3 has not been set"
+ or
+ isOptionNotSet(cc, BoostorgAsio::getShiftedSslOptionsNoTls1()) and
+ e = cc and
+ msg = "no_tlsv1 has not been set"
+ or
+ isOptionNotSet(cc, BoostorgAsio::getShiftedSslOptionsNoTls1_1()) and
+ e = cc and
+ msg = "no_tlsv1_1 has not been set"
+ or
+ isOptionSet(cc, BoostorgAsio::getShiftedSslOptionsNoTls1_2(), e) and
+ msg = "no_tlsv1_2 was set"
+ )
+select cc, "Usage of $@ with protocol $@ is not configured correctly: The option $@.", cc,
+ "boost::asio::ssl::context::context", protocolSource, protocolSource.toString(), e, msg
diff --git a/repo-tests/codeql/cpp/ql/src/Likely Bugs/Protocols/UseOfDeprecatedHardcodedProtocol.ql b/repo-tests/codeql/cpp/ql/src/Likely Bugs/Protocols/UseOfDeprecatedHardcodedProtocol.ql
new file mode 100644
index 00000000000..b3693ead656
--- /dev/null
+++ b/repo-tests/codeql/cpp/ql/src/Likely Bugs/Protocols/UseOfDeprecatedHardcodedProtocol.ql
@@ -0,0 +1,27 @@
+/**
+ * @name boost::asio Use of deprecated hardcoded Protocol
+ * @description Using a deprecated hard-coded protocol using the boost::asio library.
+ * @kind problem
+ * @problem.severity error
+ * @id cpp/boost/use-of-deprecated-hardcoded-security-protocol
+ * @tags security
+ */
+
+import cpp
+import semmle.code.cpp.security.boostorg.asio.protocols
+
+from
+ BoostorgAsio::SslContextCallConfig config, Expr protocolSource, Expr protocolSink,
+ ConstructorCall cc
+where
+ config.hasFlow(DataFlow::exprNode(protocolSource), DataFlow::exprNode(protocolSink)) and
+ not exists(BoostorgAsio::SslContextCallTlsProtocolConfig tlsConfig |
+ tlsConfig.hasFlow(DataFlow::exprNode(protocolSource), DataFlow::exprNode(protocolSink))
+ ) and
+ cc.getArgument(0) = protocolSink and
+ exists(BoostorgAsio::SslContextCallBannedProtocolConfig bannedConfig |
+ bannedConfig.hasFlow(DataFlow::exprNode(protocolSource), DataFlow::exprNode(protocolSink))
+ )
+select protocolSink, "Usage of $@ specifying a deprecated hardcoded protocol $@ in function $@.",
+ cc, "boost::asio::ssl::context::context", protocolSource, protocolSource.toString(),
+ cc.getEnclosingFunction(), cc.getEnclosingFunction().toString()
diff --git a/repo-tests/codeql/cpp/ql/src/Likely Bugs/RedundantNullCheckSimple.ql b/repo-tests/codeql/cpp/ql/src/Likely Bugs/RedundantNullCheckSimple.ql
new file mode 100644
index 00000000000..65ba665dff2
--- /dev/null
+++ b/repo-tests/codeql/cpp/ql/src/Likely Bugs/RedundantNullCheckSimple.ql
@@ -0,0 +1,71 @@
+/**
+ * @name Redundant null check due to previous dereference
+ * @description Checking a pointer for nullness after dereferencing it is
+ * likely to be a sign that either the check can be removed, or
+ * it should be moved before the dereference.
+ * @kind problem
+ * @problem.severity error
+ * @id cpp/redundant-null-check-simple
+ * @tags reliability
+ * correctness
+ * external/cwe/cwe-476
+ */
+
+/*
+ * Note: this query is not assigned a precision yet because we don't want it on
+ * LGTM until its performance is well understood.
+ */
+
+import cpp
+import semmle.code.cpp.ir.IR
+import semmle.code.cpp.ir.ValueNumbering
+
+class NullInstruction extends ConstantValueInstruction {
+ NullInstruction() {
+ this.getValue() = "0" and
+ this.getResultIRType() instanceof IRAddressType
+ }
+}
+
+predicate explicitNullTestOfInstruction(Instruction checked, Instruction bool) {
+ bool =
+ any(CompareInstruction cmp |
+ exists(NullInstruction null |
+ cmp.getLeft() = null and cmp.getRight() = checked
+ or
+ cmp.getLeft() = checked and cmp.getRight() = null
+ |
+ cmp instanceof CompareEQInstruction
+ or
+ cmp instanceof CompareNEInstruction
+ )
+ )
+ or
+ bool =
+ any(ConvertInstruction convert |
+ checked = convert.getUnary() and
+ convert.getResultIRType() instanceof IRBooleanType and
+ checked.getResultIRType() instanceof IRAddressType
+ )
+}
+
+pragma[noinline]
+predicate candidateResult(LoadInstruction checked, ValueNumber value, IRBlock dominator) {
+ explicitNullTestOfInstruction(checked, _) and
+ not checked.getAST().isInMacroExpansion() and
+ value.getAnInstruction() = checked and
+ dominator.dominates(checked.getBlock())
+}
+
+from LoadInstruction checked, LoadInstruction deref, ValueNumber sourceValue, IRBlock dominator
+where
+ candidateResult(checked, sourceValue, dominator) and
+ sourceValue.getAnInstruction() = deref.getSourceAddress() and
+ // This also holds if the blocks are equal, meaning that the check could come
+ // before the deref. That's still not okay because when they're in the same
+ // basic block then the deref is unavoidable even if the check concluded that
+ // the pointer was null. To follow this idea to its full generality, we
+ // should also give an alert when `check` post-dominates `deref`.
+ deref.getBlock() = dominator
+select checked, "This null check is redundant because the value is $@ in any case", deref,
+ "dereferenced here"
diff --git a/repo-tests/codeql/cpp/ql/src/Likely Bugs/ReturnConstType.ql b/repo-tests/codeql/cpp/ql/src/Likely Bugs/ReturnConstType.ql
new file mode 100644
index 00000000000..f4b7b859d49
--- /dev/null
+++ b/repo-tests/codeql/cpp/ql/src/Likely Bugs/ReturnConstType.ql
@@ -0,0 +1,19 @@
+/**
+ * @name Constant return type
+ * @description A 'const' modifier on a function return type is useless and should be removed for clarity.
+ * @kind problem
+ * @problem.severity warning
+ * @precision very-high
+ * @id cpp/non-member-const-no-effect
+ * @tags maintainability
+ * readability
+ * language-features
+ */
+
+import ReturnConstTypeCommon
+
+from Function f
+where
+ hasSuperfluousConstReturn(f) and
+ not f instanceof MemberFunction
+select f, "The 'const' modifier has no effect on a return type and can be removed."
diff --git a/repo-tests/codeql/cpp/ql/src/Likely Bugs/ReturnConstTypeCommon.qll b/repo-tests/codeql/cpp/ql/src/Likely Bugs/ReturnConstTypeCommon.qll
new file mode 100644
index 00000000000..6d81caed391
--- /dev/null
+++ b/repo-tests/codeql/cpp/ql/src/Likely Bugs/ReturnConstTypeCommon.qll
@@ -0,0 +1,21 @@
+import cpp
+
+private predicate mightHaveConstMethods(Type t) {
+ t instanceof Class or
+ t instanceof TemplateParameter
+}
+
+predicate hasSuperfluousConstReturn(Function f) {
+ exists(Type t | t = f.getType() |
+ // This is the primary thing we're testing for,
+ t instanceof SpecifiedType and
+ t.hasSpecifier("const") and
+ not affectedByMacro(t) and
+ // but "const" is meaningful when applied to user defined types,
+ not mightHaveConstMethods(t.getUnspecifiedType())
+ ) and
+ // and therefore "const T" might be meaningful for other values of "T".
+ not exists(TemplateFunction t | f = t.getAnInstantiation() |
+ t.getType().involvesTemplateParameter()
+ )
+}
diff --git a/repo-tests/codeql/cpp/ql/src/Likely Bugs/ReturnConstTypeMember.ql b/repo-tests/codeql/cpp/ql/src/Likely Bugs/ReturnConstTypeMember.ql
new file mode 100644
index 00000000000..661abfd07e9
--- /dev/null
+++ b/repo-tests/codeql/cpp/ql/src/Likely Bugs/ReturnConstTypeMember.ql
@@ -0,0 +1,25 @@
+/**
+ * @name Constant return type on member
+ * @description A 'const' modifier on a member function return type is useless. It is usually a typo or misunderstanding, since the syntax for a 'const' function is 'int foo() const', not 'const int foo()'.
+ * @kind problem
+ * @problem.severity warning
+ * @precision very-high
+ * @id cpp/member-const-no-effect
+ * @tags maintainability
+ * readability
+ * language-features
+ */
+
+import ReturnConstTypeCommon
+
+from MemberFunction f, string message
+where
+ hasSuperfluousConstReturn(f) and
+ if f.hasSpecifier("const") or f.isStatic()
+ then
+ message =
+ "The 'const' modifier has no effect on return types. The 'const' modifying the return type can be removed."
+ else
+ message =
+ "The 'const' modifier has no effect on return types. For a const function, the 'const' should go after the parameter list."
+select f, message
diff --git a/repo-tests/codeql/cpp/ql/src/Likely Bugs/ShortLoopVarName.ql b/repo-tests/codeql/cpp/ql/src/Likely Bugs/ShortLoopVarName.ql
new file mode 100644
index 00000000000..87dee3c8e30
--- /dev/null
+++ b/repo-tests/codeql/cpp/ql/src/Likely Bugs/ShortLoopVarName.ql
@@ -0,0 +1,52 @@
+/**
+ * @name Error-prone name of loop variable
+ * @description The iteration variable of a nested loop should have a descriptive name: short names like i, j, or k can cause confusion except in very simple loops.
+ * @kind problem
+ * @id cpp/short-loop-var-name
+ * @problem.severity recommendation
+ * @precision medium
+ * @tags maintainability
+ * readability
+ */
+
+import cpp
+
+predicate short(Variable v) { v.getName().length() = 1 }
+
+predicate forStmtAncestor(Stmt child, ForStmt parent) {
+ child.getParent() = parent or forStmtAncestor(child.getParent(), parent)
+}
+
+/**
+ * Gets an `ArrayExpr` that's nested directly inside `ArrayExpr ae`.
+ */
+ArrayExpr getANestedArrayExpr(ArrayExpr ae) { result.getArrayBase() = ae }
+
+/**
+ * Holds if variables `a` and `b` are accessed in a way that looks like they
+ * are a coordinate pair. For example:
+ * ```
+ * arr[x][y]
+ * arr[(y * width) + x]
+ * ```
+ */
+predicate coordinatePair(Variable a, Variable b) {
+ exists(ArrayExpr ae |
+ getANestedArrayExpr*(ae).getArrayOffset().getAChild*() = a.getAnAccess() and
+ getANestedArrayExpr*(ae).getArrayOffset().getAChild*() = b.getAnAccess() and
+ not a = b
+ )
+}
+
+from ForStmt outer, ForStmt inner, Variable iterationVar, Variable innerVar
+where
+ forStmtAncestor(inner, outer) and
+ short(innerVar) and
+ iterationVar = outer.getAnIterationVariable() and
+ innerVar = inner.getAnIterationVariable() and
+ short(iterationVar) and
+ not coordinatePair(iterationVar, innerVar)
+select iterationVar,
+ "Iteration variable " + iterationVar.getName() +
+ " for $@ should have a descriptive name, since there is $@.", outer, "this loop", inner,
+ "a nested loop"
diff --git a/repo-tests/codeql/cpp/ql/src/Likely Bugs/Underspecified Functions/ImplicitFunctionDeclaration.ql b/repo-tests/codeql/cpp/ql/src/Likely Bugs/Underspecified Functions/ImplicitFunctionDeclaration.ql
new file mode 100644
index 00000000000..a361a3401f3
--- /dev/null
+++ b/repo-tests/codeql/cpp/ql/src/Likely Bugs/Underspecified Functions/ImplicitFunctionDeclaration.ql
@@ -0,0 +1,48 @@
+/**
+ * @name Implicit function declaration
+ * @description An implicitly declared function is assumed to take no
+ * arguments and return an integer. If this assumption does not hold, it
+ * may lead to unpredictable behavior.
+ * @kind problem
+ * @problem.severity warning
+ * @precision high
+ * @id cpp/implicit-function-declaration
+ * @tags correctness
+ * maintainability
+ */
+
+import cpp
+import MistypedFunctionArguments
+import TooFewArguments
+import TooManyArguments
+import semmle.code.cpp.commons.Exclusions
+
+predicate locInfo(Locatable e, File file, int line, int col) {
+ e.getFile() = file and
+ e.getLocation().getStartLine() = line and
+ e.getLocation().getStartColumn() = col
+}
+
+predicate sameLocation(FunctionDeclarationEntry fde, FunctionCall fc) {
+ exists(File file, int line, int col |
+ locInfo(fde, file, line, col) and
+ locInfo(fc, file, line, col)
+ )
+}
+
+predicate isCompiledAsC(File f) {
+ f.compiledAsC()
+ or
+ exists(File src | isCompiledAsC(src) | src.getAnIncludedFile() = f)
+}
+
+from FunctionDeclarationEntry fdeIm, FunctionCall fc
+where
+ isCompiledAsC(fdeIm.getFile()) and
+ not isFromMacroDefinition(fc) and
+ fdeIm.isImplicit() and
+ sameLocation(fdeIm, fc) and
+ not mistypedFunctionArguments(fc, _, _) and
+ not tooFewArguments(fc, _) and
+ not tooManyArguments(fc, _)
+select fc, "Function call implicitly declares '" + fdeIm.getName() + "'."
diff --git a/repo-tests/codeql/cpp/ql/src/Likely Bugs/Underspecified Functions/MistypedFunctionArguments.ql b/repo-tests/codeql/cpp/ql/src/Likely Bugs/Underspecified Functions/MistypedFunctionArguments.ql
new file mode 100644
index 00000000000..cbdf0eab50d
--- /dev/null
+++ b/repo-tests/codeql/cpp/ql/src/Likely Bugs/Underspecified Functions/MistypedFunctionArguments.ql
@@ -0,0 +1,21 @@
+/**
+ * @name Call to a function with one or more incompatible arguments
+ * @description When the type of a function argument is not compatible
+ * with the type of the corresponding parameter, it may lead to
+ * unpredictable behavior.
+ * @kind problem
+ * @problem.severity warning
+ * @precision medium
+ * @id cpp/mistyped-function-arguments
+ * @tags correctness
+ * maintainability
+ */
+
+import cpp
+import MistypedFunctionArguments
+
+from FunctionCall fc, Function f, Parameter p
+where mistypedFunctionArguments(fc, f, p)
+select fc, "Calling $@: argument $@ of type $@ is incompatible with parameter $@.", f, f.toString(),
+ fc.getArgument(p.getIndex()) as arg, arg.toString(),
+ arg.getExplicitlyConverted().getUnspecifiedType() as atype, atype.toString(), p, p.getTypedName()
diff --git a/repo-tests/codeql/cpp/ql/src/Likely Bugs/Underspecified Functions/MistypedFunctionArguments.qll b/repo-tests/codeql/cpp/ql/src/Likely Bugs/Underspecified Functions/MistypedFunctionArguments.qll
new file mode 100644
index 00000000000..2dced5d8d84
--- /dev/null
+++ b/repo-tests/codeql/cpp/ql/src/Likely Bugs/Underspecified Functions/MistypedFunctionArguments.qll
@@ -0,0 +1,97 @@
+/**
+ * Provides the implementation of the MistypedFunctionArguments query. The
+ * query is implemented as a library, so that we can avoid producing
+ * duplicate results in other similar queries.
+ */
+
+import cpp
+
+pragma[inline]
+private predicate arithTypesMatch(Type arg, Type parm) {
+ arg = parm
+ or
+ arg.getSize() = parm.getSize() and
+ (
+ arg instanceof IntegralOrEnumType and
+ parm instanceof IntegralOrEnumType
+ or
+ arg instanceof FloatingPointType and
+ parm instanceof FloatingPointType
+ )
+}
+
+pragma[inline]
+private predicate nestedPointerArgTypeMayBeUsed(Type arg, Type parm) {
+ // arithmetic types
+ arithTypesMatch(arg, parm)
+ or
+ // conversion to/from pointers to void is allowed
+ arg instanceof VoidType
+ or
+ parm instanceof VoidType
+}
+
+pragma[inline]
+private predicate pointerArgTypeMayBeUsed(Type arg, Type parm) {
+ nestedPointerArgTypeMayBeUsed(arg, parm)
+ or
+ // nested pointers
+ nestedPointerArgTypeMayBeUsed(arg.(PointerType).getBaseType().getUnspecifiedType(),
+ parm.(PointerType).getBaseType().getUnspecifiedType())
+ or
+ nestedPointerArgTypeMayBeUsed(arg.(ArrayType).getBaseType().getUnspecifiedType(),
+ parm.(PointerType).getBaseType().getUnspecifiedType())
+}
+
+pragma[inline]
+private predicate argTypeMayBeUsed(Type arg, Type parm) {
+ // arithmetic types
+ arithTypesMatch(arg, parm)
+ or
+ // pointers to compatible types
+ pointerArgTypeMayBeUsed(arg.(PointerType).getBaseType().getUnspecifiedType(),
+ parm.(PointerType).getBaseType().getUnspecifiedType())
+ or
+ pointerArgTypeMayBeUsed(arg.(ArrayType).getBaseType().getUnspecifiedType(),
+ parm.(PointerType).getBaseType().getUnspecifiedType())
+ or
+ // C11 arrays
+ pointerArgTypeMayBeUsed(arg.(PointerType).getBaseType().getUnspecifiedType(),
+ parm.(ArrayType).getBaseType().getUnspecifiedType())
+ or
+ pointerArgTypeMayBeUsed(arg.(ArrayType).getBaseType().getUnspecifiedType(),
+ parm.(ArrayType).getBaseType().getUnspecifiedType())
+}
+
+// This predicate holds whenever expression `arg` may be used to initialize
+// function parameter `parm` without need for run-time conversion.
+pragma[inline]
+private predicate argMayBeUsed(Expr arg, Parameter parm) {
+ argTypeMayBeUsed(arg.getFullyConverted().getUnspecifiedType(), parm.getUnspecifiedType())
+}
+
+// True if function was ()-declared, but not (void)-declared or K&R-defined
+private predicate hasZeroParamDecl(Function f) {
+ exists(FunctionDeclarationEntry fde | fde = f.getADeclarationEntry() |
+ not fde.hasVoidParamList() and fde.getNumberOfParameters() = 0 and not fde.isDefinition()
+ )
+}
+
+// True if this file (or header) was compiled as a C file
+private predicate isCompiledAsC(File f) {
+ f.compiledAsC()
+ or
+ exists(File src | isCompiledAsC(src) | src.getAnIncludedFile() = f)
+}
+
+predicate mistypedFunctionArguments(FunctionCall fc, Function f, Parameter p) {
+ f = fc.getTarget() and
+ p = f.getAParameter() and
+ hasZeroParamDecl(f) and
+ isCompiledAsC(f.getFile()) and
+ not f.isVarargs() and
+ not f instanceof BuiltInFunction and
+ p.getIndex() < fc.getNumberOfArguments() and
+ // Parameter p and its corresponding call argument must have mismatched types
+ not argMayBeUsed(fc.getArgument(p.getIndex()), p)
+}
diff --git a/repo-tests/codeql/cpp/ql/src/Likely Bugs/Underspecified Functions/TooFewArguments.ql b/repo-tests/codeql/cpp/ql/src/Likely Bugs/Underspecified Functions/TooFewArguments.ql
new file mode 100644
index 00000000000..bc53015c905
--- /dev/null
+++ b/repo-tests/codeql/cpp/ql/src/Likely Bugs/Underspecified Functions/TooFewArguments.ql
@@ -0,0 +1,25 @@
+/**
+ * @name Call to function with fewer arguments than declared parameters
+ * @description A function call is passing fewer arguments than the number of
+ * declared parameters of the function. This may indicate
+ * that the code does not follow the author's intent. It is also
+ * a vulnerability, since the function is likely to operate on
+ * undefined data.
+ * @kind problem
+ * @problem.severity error
+ * @security-severity 5.0
+ * @precision very-high
+ * @id cpp/too-few-arguments
+ * @tags correctness
+ * maintainability
+ * security
+ * external/cwe/cwe-234
+ * external/cwe/cwe-685
+ */
+
+import cpp
+import TooFewArguments
+
+from FunctionCall fc, Function f
+where tooFewArguments(fc, f)
+select fc, "This call has fewer arguments than required by $@.", f, f.toString()
diff --git a/repo-tests/codeql/cpp/ql/src/Likely Bugs/Underspecified Functions/TooFewArguments.qll b/repo-tests/codeql/cpp/ql/src/Likely Bugs/Underspecified Functions/TooFewArguments.qll
new file mode 100644
index 00000000000..6f3f4d43e9a
--- /dev/null
+++ b/repo-tests/codeql/cpp/ql/src/Likely Bugs/Underspecified Functions/TooFewArguments.qll
@@ -0,0 +1,55 @@
+/**
+ * Provides the implementation of the TooFewArguments query. The
+ * query is implemented as a library, so that we can avoid producing
+ * duplicate results in other similar queries.
+ */
+
+import cpp
+
+/**
+ * Holds if `fde` has a parameter declaration that's clear on the minimum
+ * number of parameters. This is essentially true for everything except
+ * `()`-declarations.
+ */
+private predicate hasDefiniteNumberOfParameters(FunctionDeclarationEntry fde) {
+ fde.hasVoidParamList()
+ or
+ fde.getNumberOfParameters() > 0
+ or
+ fde.isDefinition()
+}
+
+/* Holds if function was ()-declared, but not (void)-declared or K&R-defined. */
+private predicate hasZeroParamDecl(Function f) {
+ exists(FunctionDeclarationEntry fde | fde = f.getADeclarationEntry() |
+ not hasDefiniteNumberOfParameters(fde)
+ )
+}
+
+/* Holds if this file (or header) was compiled as a C file. */
+private predicate isCompiledAsC(File f) {
+ f.compiledAsC()
+ or
+ exists(File src | isCompiledAsC(src) | src.getAnIncludedFile() = f)
+}
+
+/** Holds if `fc` is a call to `f` with too few arguments. */
+predicate tooFewArguments(FunctionCall fc, Function f) {
+ f = fc.getTarget() and
+ not f.isVarargs() and
+ not f instanceof BuiltInFunction and
+ // This query should only have results on C (not C++) functions that have a
+ // `()` parameter list somewhere. If it has results on other functions, then
+ // it's probably because the extractor only saw a partial compilation.
+ hasZeroParamDecl(f) and
+ isCompiledAsC(f.getFile()) and
+ // Produce an alert when all declarations that are authoritative on the
+ // parameter count specify a parameter count larger than the number of call
+ // arguments.
+ forex(FunctionDeclarationEntry fde |
+ fde = f.getADeclarationEntry() and
+ hasDefiniteNumberOfParameters(fde)
+ |
+ fde.getNumberOfParameters() > fc.getNumberOfArguments()
+ )
+}
diff --git a/repo-tests/codeql/cpp/ql/src/Likely Bugs/Underspecified Functions/TooManyArguments.ql b/repo-tests/codeql/cpp/ql/src/Likely Bugs/Underspecified Functions/TooManyArguments.ql
new file mode 100644
index 00000000000..4e7f85097b8
--- /dev/null
+++ b/repo-tests/codeql/cpp/ql/src/Likely Bugs/Underspecified Functions/TooManyArguments.ql
@@ -0,0 +1,19 @@
+/**
+ * @name Call to function with extraneous arguments
+ * @description A function call to a function passed more arguments than there are
+ * declared parameters of the function. This may indicate
+ * that the code does not follow the author's intent.
+ * @kind problem
+ * @problem.severity warning
+ * @precision very-high
+ * @id cpp/futile-params
+ * @tags correctness
+ * maintainability
+ */
+
+import cpp
+import TooManyArguments
+
+from FunctionCall fc, Function f
+where tooManyArguments(fc, f)
+select fc, "This call has more arguments than required by $@.", f, f.toString()
diff --git a/repo-tests/codeql/cpp/ql/src/Likely Bugs/Underspecified Functions/TooManyArguments.qll b/repo-tests/codeql/cpp/ql/src/Likely Bugs/Underspecified Functions/TooManyArguments.qll
new file mode 100644
index 00000000000..7fba78b5550
--- /dev/null
+++ b/repo-tests/codeql/cpp/ql/src/Likely Bugs/Underspecified Functions/TooManyArguments.qll
@@ -0,0 +1,38 @@
+/**
+ * Provides the implementation of the TooManyArguments query. The
+ * query is implemented as a library, so that we can avoid producing
+ * duplicate results in other similar queries.
+ */
+
+import cpp
+
+// True if function was ()-declared, but not (void)-declared or K&R-defined
+// or implicitly declared (i.e., lacking a prototype)
+private predicate hasZeroParamDecl(Function f) {
+ exists(FunctionDeclarationEntry fde | fde = f.getADeclarationEntry() |
+ not fde.isImplicit() and
+ not fde.hasVoidParamList() and
+ fde.getNumberOfParameters() = 0 and
+ not fde.isDefinition()
+ )
+}
+
+// True if this file (or header) was compiled as a C file
+private predicate isCompiledAsC(File f) {
+ f.compiledAsC()
+ or
+ exists(File src | isCompiledAsC(src) | src.getAnIncludedFile() = f)
+}
+
+predicate tooManyArguments(FunctionCall fc, Function f) {
+ f = fc.getTarget() and
+ not f.isVarargs() and
+ hasZeroParamDecl(f) and
+ isCompiledAsC(f.getFile()) and
+ exists(f.getBlock()) and
+ // There must not exist a declaration with the number of parameters
+ // at least as large as the number of call arguments
+ not exists(FunctionDeclarationEntry fde | fde = f.getADeclarationEntry() |
+ fde.getNumberOfParameters() >= fc.getNumberOfArguments()
+ )
+}
diff --git a/repo-tests/codeql/cpp/ql/src/Likely Bugs/UseInOwnInitializer.ql b/repo-tests/codeql/cpp/ql/src/Likely Bugs/UseInOwnInitializer.ql
new file mode 100644
index 00000000000..6bb411b7844
--- /dev/null
+++ b/repo-tests/codeql/cpp/ql/src/Likely Bugs/UseInOwnInitializer.ql
@@ -0,0 +1,43 @@
+/**
+ * @name Variable used in its own initializer
+ * @id cpp/use-in-own-initializer
+ * @description Loading from a variable in its own initializer may lead to undefined behavior.
+ * @kind problem
+ * @problem.severity warning
+ * @precision high
+ * @tags maintainability
+ * correctness
+ */
+
+import cpp
+
+class VariableAccessInInitializer extends VariableAccess {
+ Variable var;
+ Initializer init;
+
+ VariableAccessInInitializer() {
+ init.getDeclaration() = var and
+ init.getExpr().getAChild*() = this
+ }
+
+ predicate initializesItself(Variable v, Initializer i) {
+ v = var and i = init and var = this.getTarget()
+ }
+}
+
+from Initializer init, Variable v, VariableAccessInInitializer va
+where
+ va.initializesItself(v, init) and
+ (
+ va.hasLValueToRValueConversion() or
+ exists(Assignment assn | assn.getLValue() = va) or
+ exists(CrementOperation crement | crement.getAnOperand() = va)
+ ) and
+ not va.isUnevaluated() and
+ not v.isConst() and
+ not (
+ va.getParent() = init and
+ exists(MacroInvocation mi | va = mi.getExpr())
+ ) and
+ not va.getEnclosingStmt().isInMacroExpansion()
+select va, v.getName() + " is used in its own initializer."
diff --git a/repo-tests/codeql/cpp/ql/src/Metrics/Classes/CAfferentCoupling.ql b/repo-tests/codeql/cpp/ql/src/Metrics/Classes/CAfferentCoupling.ql
new file mode 100644
index 00000000000..1da834bfdce
--- /dev/null
+++ b/repo-tests/codeql/cpp/ql/src/Metrics/Classes/CAfferentCoupling.ql
@@ -0,0 +1,17 @@
+/**
+ * @name Incoming dependencies per class
+ * @description The number of classes that depend on a class.
+ * @kind treemap
+ * @id cpp/afferent-coupling-per-class
+ * @treemap.warnOn highValues
+ * @metricType reftype
+ * @metricAggregate avg max
+ * @tags maintainability
+ * modularity
+ */
+
+import cpp
+
+from Class c
+where c.fromSource()
+select c, c.getMetrics().getAfferentCoupling() as n order by n desc
diff --git a/repo-tests/codeql/cpp/ql/src/Metrics/Classes/CEfferentCoupling.ql b/repo-tests/codeql/cpp/ql/src/Metrics/Classes/CEfferentCoupling.ql
new file mode 100644
index 00000000000..25ba3cad7bb
--- /dev/null
+++ b/repo-tests/codeql/cpp/ql/src/Metrics/Classes/CEfferentCoupling.ql
@@ -0,0 +1,17 @@
+/**
+ * @name Outgoing dependencies per class
+ * @description The number of classes on which a class depends.
+ * @kind treemap
+ * @id cpp/outgoing-type-dependencies
+ * @treemap.warnOn highValues
+ * @metricType reftype
+ * @metricAggregate avg max
+ * @tags testability
+ * modularity
+ */
+
+import cpp
+
+from Class c
+where c.fromSource()
+select c, c.getMetrics().getEfferentCoupling() as n order by n desc
diff --git a/repo-tests/codeql/cpp/ql/src/Metrics/Classes/CHalsteadBugs.ql b/repo-tests/codeql/cpp/ql/src/Metrics/Classes/CHalsteadBugs.ql
new file mode 100644
index 00000000000..b8467b6b51d
--- /dev/null
+++ b/repo-tests/codeql/cpp/ql/src/Metrics/Classes/CHalsteadBugs.ql
@@ -0,0 +1,16 @@
+/**
+ * @name Halstead bug measure
+ * @description Measures the expected number of delivered defects.
+ * The Halstead bug count is known to be an underestimate.
+ * @kind treemap
+ * @id cpp/halstead-bugs-per-class
+ * @treemap.warnOn highValues
+ * @metricType reftype
+ * @metricAggregate avg sum max
+ * @tags maintainability
+ */
+
+import cpp
+
+from MetricClass mc
+select mc, mc.getHalsteadDeliveredBugs()
diff --git a/repo-tests/codeql/cpp/ql/src/Metrics/Classes/CHalsteadDifficulty.ql b/repo-tests/codeql/cpp/ql/src/Metrics/Classes/CHalsteadDifficulty.ql
new file mode 100644
index 00000000000..817e1f5334e
--- /dev/null
+++ b/repo-tests/codeql/cpp/ql/src/Metrics/Classes/CHalsteadDifficulty.ql
@@ -0,0 +1,15 @@
+/**
+ * @name Halstead difficulty
+ * @description Measures the error proneness of implementing the program
+ * @kind treemap
+ * @id cpp/halstead-difficulty-per-class
+ * @treemap.warnOn highValues
+ * @metricType reftype
+ * @metricAggregate avg sum max
+ * @tags maintainability
+ */
+
+import cpp
+
+from MetricClass mc
+select mc, mc.getHalsteadDifficulty()
diff --git a/repo-tests/codeql/cpp/ql/src/Metrics/Classes/CHalsteadEffort.ql b/repo-tests/codeql/cpp/ql/src/Metrics/Classes/CHalsteadEffort.ql
new file mode 100644
index 00000000000..af6f72e9b77
--- /dev/null
+++ b/repo-tests/codeql/cpp/ql/src/Metrics/Classes/CHalsteadEffort.ql
@@ -0,0 +1,15 @@
+/**
+ * @name Halstead effort
+ * @description Measures the effort to implement the program
+ * @kind treemap
+ * @id cpp/halstead-effort-per-class
+ * @treemap.warnOn highValues
+ * @metricType reftype
+ * @metricAggregate avg sum max
+ * @tags maintainability
+ */
+
+import cpp
+
+from MetricClass mc
+select mc, mc.getHalsteadEffort()
diff --git a/repo-tests/codeql/cpp/ql/src/Metrics/Classes/CHalsteadLength.ql b/repo-tests/codeql/cpp/ql/src/Metrics/Classes/CHalsteadLength.ql
new file mode 100644
index 00000000000..88d179be9ea
--- /dev/null
+++ b/repo-tests/codeql/cpp/ql/src/Metrics/Classes/CHalsteadLength.ql
@@ -0,0 +1,15 @@
+/**
+ * @name Halstead length
+ * @description Total number of operands and operators
+ * @kind treemap
+ * @id cpp/halstead-length-per-class
+ * @treemap.warnOn highValues
+ * @metricType reftype
+ * @metricAggregate avg sum max
+ * @tags maintainability
+ */
+
+import cpp
+
+from MetricClass mc
+select mc, mc.getHalsteadLength()
diff --git a/repo-tests/codeql/cpp/ql/src/Metrics/Classes/CHalsteadVocabulary.ql b/repo-tests/codeql/cpp/ql/src/Metrics/Classes/CHalsteadVocabulary.ql
new file mode 100644
index 00000000000..b351f918e71
--- /dev/null
+++ b/repo-tests/codeql/cpp/ql/src/Metrics/Classes/CHalsteadVocabulary.ql
@@ -0,0 +1,15 @@
+/**
+ * @name Halstead vocabulary
+ * @description Number of distinct operands and operators used
+ * @kind treemap
+ * @id cpp/halstead-vocabulary-per-class
+ * @treemap.warnOn highValues
+ * @metricType reftype
+ * @metricAggregate avg sum max
+ * @tags maintainability
+ */
+
+import cpp
+
+from MetricClass mc
+select mc, mc.getHalsteadVocabulary()
diff --git a/repo-tests/codeql/cpp/ql/src/Metrics/Classes/CHalsteadVolume.ql b/repo-tests/codeql/cpp/ql/src/Metrics/Classes/CHalsteadVolume.ql
new file mode 100644
index 00000000000..7fc17ed221d
--- /dev/null
+++ b/repo-tests/codeql/cpp/ql/src/Metrics/Classes/CHalsteadVolume.ql
@@ -0,0 +1,15 @@
+/**
+ * @name Halstead volume
+ * @description The information contents of the program
+ * @kind treemap
+ * @id cpp/halstead-volume-per-class
+ * @treemap.warnOn highValues
+ * @metricType reftype
+ * @metricAggregate avg sum max
+ * @tags maintainability
+ */
+
+import cpp
+
+from MetricClass mc
+select mc, mc.getHalsteadVolume()
diff --git a/repo-tests/codeql/cpp/ql/src/Metrics/Classes/CInheritanceDepth.ql b/repo-tests/codeql/cpp/ql/src/Metrics/Classes/CInheritanceDepth.ql
new file mode 100644
index 00000000000..e6ae8bb62c7
--- /dev/null
+++ b/repo-tests/codeql/cpp/ql/src/Metrics/Classes/CInheritanceDepth.ql
@@ -0,0 +1,16 @@
+/**
+ * @name Inheritance depth per class
+ * @description The depth of a class in the inheritance hierarchy.
+ * @kind treemap
+ * @id cpp/inheritance-depth-per-class
+ * @treemap.warnOn highValues
+ * @metricType reftype
+ * @metricAggregate avg max
+ * @tags modularity
+ */
+
+import cpp
+
+from Class c
+where c.fromSource()
+select c, c.getMetrics().getInheritanceDepth() as n order by n desc
diff --git a/repo-tests/codeql/cpp/ql/src/Metrics/Classes/CLackOfCohesionCK.ql b/repo-tests/codeql/cpp/ql/src/Metrics/Classes/CLackOfCohesionCK.ql
new file mode 100644
index 00000000000..b41d8f9392b
--- /dev/null
+++ b/repo-tests/codeql/cpp/ql/src/Metrics/Classes/CLackOfCohesionCK.ql
@@ -0,0 +1,17 @@
+/**
+ * @name Lack of cohesion per class (LCOM-CK)
+ * @description Lack of cohesion for a class as defined by Chidamber
+ * and Kemerer.
+ * @kind treemap
+ * @id cpp/lack-of-cohesion-ck
+ * @treemap.warnOn highValues
+ * @metricType reftype
+ * @metricAggregate avg max
+ * @tags modularity
+ */
+
+import cpp
+
+from Class c
+where c.fromSource()
+select c, c.getMetrics().getLackOfCohesionCK() as n order by n desc
diff --git a/repo-tests/codeql/cpp/ql/src/Metrics/Classes/CLackOfCohesionHS.ql b/repo-tests/codeql/cpp/ql/src/Metrics/Classes/CLackOfCohesionHS.ql
new file mode 100644
index 00000000000..484213e113a
--- /dev/null
+++ b/repo-tests/codeql/cpp/ql/src/Metrics/Classes/CLackOfCohesionHS.ql
@@ -0,0 +1,16 @@
+/**
+ * @name Lack of cohesion per class (LCOM-HS)
+ * @description Lack of cohesion for a class as defined by Henderson-Sellers.
+ * @kind treemap
+ * @id cpp/lack-of-cohesion-hs
+ * @treemap.warnOn highValues
+ * @metricType reftype
+ * @metricAggregate avg max
+ * @tags modularity
+ */
+
+import cpp
+
+from Class c
+where c.fromSource()
+select c, c.getMetrics().getLackOfCohesionHS() as n order by n desc
diff --git a/repo-tests/codeql/cpp/ql/src/Metrics/Classes/CLinesOfCode.ql b/repo-tests/codeql/cpp/ql/src/Metrics/Classes/CLinesOfCode.ql
new file mode 100644
index 00000000000..c516aede711
--- /dev/null
+++ b/repo-tests/codeql/cpp/ql/src/Metrics/Classes/CLinesOfCode.ql
@@ -0,0 +1,20 @@
+/**
+ * @name Lines of code per class
+ * @description The number of lines of code in a class.
+ * @kind treemap
+ * @id cpp/lines-of-code-per-class
+ * @treemap.warnOn highValues
+ * @metricType reftype
+ * @metricAggregate avg sum max
+ * @tags maintainability
+ */
+
+import cpp
+
+from Class c, int n
+where
+ c.fromSource() and
+ n =
+ c.getMetrics().getNumberOfMembers() +
+ sum(Function f | c.getACanonicalMemberFunction() = f | f.getMetrics().getNumberOfLinesOfCode())
+select c, n order by n desc
diff --git a/repo-tests/codeql/cpp/ql/src/Metrics/Classes/CNumberOfFields.ql b/repo-tests/codeql/cpp/ql/src/Metrics/Classes/CNumberOfFields.ql
new file mode 100644
index 00000000000..9baf194735b
--- /dev/null
+++ b/repo-tests/codeql/cpp/ql/src/Metrics/Classes/CNumberOfFields.ql
@@ -0,0 +1,16 @@
+/**
+ * @name Number of fields per class
+ * @description The number of fields in a class.
+ * @kind treemap
+ * @id cpp/fields-per-type
+ * @treemap.warnOn highValues
+ * @metricType reftype
+ * @metricAggregate avg sum max
+ * @tags maintainability
+ */
+
+import cpp
+
+from Class c
+where c.fromSource()
+select c, c.getMetrics().getNumberOfFields() as n order by n desc
diff --git a/repo-tests/codeql/cpp/ql/src/Metrics/Classes/CNumberOfFunctions.ql b/repo-tests/codeql/cpp/ql/src/Metrics/Classes/CNumberOfFunctions.ql
new file mode 100644
index 00000000000..cdc6e12e9d4
--- /dev/null
+++ b/repo-tests/codeql/cpp/ql/src/Metrics/Classes/CNumberOfFunctions.ql
@@ -0,0 +1,16 @@
+/**
+ * @name Number of functions per class
+ * @description The number of member functions in a class.
+ * @kind treemap
+ * @id cpp/number-of-functions-per-class
+ * @treemap.warnOn highValues
+ * @metricType reftype
+ * @metricAggregate avg sum max
+ * @tags maintainability
+ */
+
+import cpp
+
+from Class c
+where c.fromSource()
+select c, c.getMetrics().getNumberOfMemberFunctions() as n order by n desc
diff --git a/repo-tests/codeql/cpp/ql/src/Metrics/Classes/CNumberOfStatements.ql b/repo-tests/codeql/cpp/ql/src/Metrics/Classes/CNumberOfStatements.ql
new file mode 100644
index 00000000000..d7fd8adc2fc
--- /dev/null
+++ b/repo-tests/codeql/cpp/ql/src/Metrics/Classes/CNumberOfStatements.ql
@@ -0,0 +1,20 @@
+/**
+ * @name Number of statements per class
+ * @description The number of statements in the member functions of a class.
+ * For template functions, only the statements in the template
+ * itself, not in the instantiations, are counted.
+ * @kind treemap
+ * @id cpp/statements-per-type
+ * @treemap.warnOn highValues
+ * @metricType reftype
+ * @metricAggregate avg sum max
+ * @tags maintainability
+ */
+
+import cpp
+
+from Class c, int n
+where
+ c.fromSource() and
+ n = count(Stmt s | s.getEnclosingFunction() = c.getACanonicalMemberFunction())
+select c, n
diff --git a/repo-tests/codeql/cpp/ql/src/Metrics/Classes/CPercentageOfComplexCode.ql b/repo-tests/codeql/cpp/ql/src/Metrics/Classes/CPercentageOfComplexCode.ql
new file mode 100644
index 00000000000..dafe176d3a3
--- /dev/null
+++ b/repo-tests/codeql/cpp/ql/src/Metrics/Classes/CPercentageOfComplexCode.ql
@@ -0,0 +1,29 @@
+/**
+ * @name Percentage of complex code per class
+ * @description The percentage of the code in a class that is part of
+ * a complex member function.
+ * @kind treemap
+ * @id cpp/percentage-of-complex-code-per-class
+ * @treemap.warnOn highValues
+ * @metricType reftype
+ * @metricAggregate avg max
+ * @tags complexity
+ */
+
+import cpp
+
+from Class c, int ccLoc, int loc
+where
+ c.fromSource() and
+ ccLoc =
+ sum(Function f |
+ c.getACanonicalMemberFunction() = f and
+ f.getMetrics().getCyclomaticComplexity() > 18
+ |
+ f.getMetrics().getNumberOfLinesOfCode()
+ ) and
+ loc =
+ sum(Function f | c.getACanonicalMemberFunction() = f | f.getMetrics().getNumberOfLinesOfCode()) +
+ c.getMetrics().getNumberOfMembers() and
+ loc != 0
+select c, (ccLoc * 100).(float) / loc as n order by n desc
diff --git a/repo-tests/codeql/cpp/ql/src/Metrics/Classes/CResponse.ql b/repo-tests/codeql/cpp/ql/src/Metrics/Classes/CResponse.ql
new file mode 100644
index 00000000000..cccaa058efb
--- /dev/null
+++ b/repo-tests/codeql/cpp/ql/src/Metrics/Classes/CResponse.ql
@@ -0,0 +1,18 @@
+/**
+ * @name Response per class
+ * @description The number of different member functions or
+ * constructors that can be executed by a class.
+ * @kind treemap
+ * @id cpp/response-per-class
+ * @treemap.warnOn highValues
+ * @metricType reftype
+ * @metricAggregate avg max
+ * @tags maintainability
+ * complexity
+ */
+
+import cpp
+
+from Class c
+where c.fromSource()
+select c, c.getMetrics().getResponse()
diff --git a/repo-tests/codeql/cpp/ql/src/Metrics/Classes/CSizeOfAPI.ql b/repo-tests/codeql/cpp/ql/src/Metrics/Classes/CSizeOfAPI.ql
new file mode 100644
index 00000000000..060c19aa122
--- /dev/null
+++ b/repo-tests/codeql/cpp/ql/src/Metrics/Classes/CSizeOfAPI.ql
@@ -0,0 +1,18 @@
+/**
+ * @name Size of API per class
+ * @description The number of public member functions in a public class.
+ * @kind treemap
+ * @id cpp/size-of-api-per-class
+ * @treemap.warnOn highValues
+ * @metricType reftype
+ * @metricAggregate avg sum max
+ * @tags modularity
+ */
+
+import cpp
+
+from Class c, int n
+where
+ c.fromSource() and
+ n = count(Function f | c.getAPublicCanonicalMember() = f)
+select c, n order by n desc
diff --git a/repo-tests/codeql/cpp/ql/src/Metrics/Classes/CSpecialisation.ql b/repo-tests/codeql/cpp/ql/src/Metrics/Classes/CSpecialisation.ql
new file mode 100644
index 00000000000..be71f1f0269
--- /dev/null
+++ b/repo-tests/codeql/cpp/ql/src/Metrics/Classes/CSpecialisation.ql
@@ -0,0 +1,17 @@
+/**
+ * @name Specialization per class
+ * @description The extent to which a subclass refines the behavior
+ * of its superclasses.
+ * @kind treemap
+ * @id cpp/specialisation-per-class
+ * @treemap.warnOn highValues
+ * @metricType reftype
+ * @metricAggregate avg max
+ * @tags modularity
+ */
+
+import cpp
+
+from Class c
+where c.fromSource()
+select c, c.getMetrics().getSpecialisationIndex()
diff --git a/repo-tests/codeql/cpp/ql/src/Metrics/Dependencies/ExternalDependencies.ql b/repo-tests/codeql/cpp/ql/src/Metrics/Dependencies/ExternalDependencies.ql
new file mode 100644
index 00000000000..a4e9c925119
--- /dev/null
+++ b/repo-tests/codeql/cpp/ql/src/Metrics/Dependencies/ExternalDependencies.ql
@@ -0,0 +1,16 @@
+/**
+ * @deprecated
+ * @name External dependencies
+ * @description Count the number of dependencies a C/C++ source file has on external libraries.
+ * @kind treemap
+ * @treemap.warnOn highValues
+ * @metricType externalDependency
+ * @id cpp/external-dependencies
+ * @tags modularity
+ */
+
+import ExternalDependencies
+
+from File file, int num, string encodedDependency
+where encodedDependencies(file, encodedDependency, num)
+select encodedDependency, num order by num desc
diff --git a/repo-tests/codeql/cpp/ql/src/Metrics/Dependencies/ExternalDependencies.qll b/repo-tests/codeql/cpp/ql/src/Metrics/Dependencies/ExternalDependencies.qll
new file mode 100644
index 00000000000..352cf86ddd5
--- /dev/null
+++ b/repo-tests/codeql/cpp/ql/src/Metrics/Dependencies/ExternalDependencies.qll
@@ -0,0 +1,108 @@
+/**
+ * Support for ExternalDependencies.ql query.
+ *
+ * This performs a "technology inventory" by associating each source file
+ * with the libraries it uses.
+ */
+
+import cpp
+import semmle.code.cpp.commons.Dependency
+
+/**
+ * An `Element` that is to be considered a Library.
+ */
+abstract class LibraryElement extends Element {
+ abstract string getName();
+
+ abstract string getVersion();
+
+ abstract File getAFile();
+}
+
+/**
+ * Anything that is to be considered a library.
+ */
+private newtype LibraryT =
+ LibraryTElement(LibraryElement lib, string name, string version) {
+ lib.getName() = name and
+ lib.getVersion() = version
+ } or
+ LibraryTExternalPackage(@external_package ep, string name, string version) {
+ exists(string namespace, string package_name |
+ external_packages(ep, namespace, package_name, version) and
+ name = package_name
+ )
+ }
+
+/**
+ * A library that can have dependencies on it.
+ */
+class Library extends LibraryT {
+ string name;
+ string version;
+
+ Library() {
+ exists(LibraryElement lib | this = LibraryTElement(lib, name, version)) or
+ exists(@external_package ep | this = LibraryTExternalPackage(ep, name, version))
+ }
+
+ string getName() { result = name }
+
+ string getVersion() {
+ // The versions reported for C/C++ dependencies are just the versions that
+ // happen to be installed on the system where the build takes place.
+ // Reporting those versions is likely to cause misunderstandings, both for
+ // people reading them and for the vulnerability checker of lgtm.
+ result = "unknown"
+ }
+
+ string toString() { result = getName() + "-" + getVersion() }
+
+ File getAFile() {
+ exists(LibraryElement lib |
+ this = LibraryTElement(lib, _, _) and
+ result = lib.getAFile()
+ )
+ or
+ exists(@external_package ep |
+ this = LibraryTExternalPackage(ep, _, _) and
+ header_to_external_package(unresolveElement(result), ep)
+ )
+ }
+}
+
+/**
+ * Holds if there are `num` dependencies from `sourceFile` on `destLib` (and
+ * `sourceFile` is not in `destLib`).
+ */
+predicate libDependencies(File sourceFile, Library destLib, int num) {
+ num =
+ strictcount(Element source, Element dest, File destFile |
+ // dependency from source -> dest.
+ dependsOnSimple(source, dest) and
+ sourceFile = source.getFile() and
+ destFile = dest.getFile() and
+ // destFile is inside destLib, sourceFile is outside.
+ destFile = destLib.getAFile() and
+ not sourceFile = destLib.getAFile() and
+ // don't include dependencies from template instantiations that
+ // may depend back on types in the using code.
+ not source.isFromTemplateInstantiation(_) and
+ // exclude very common dependencies
+ not destLib.getName() = "linux" and
+ not destLib.getName().regexpMatch("gcc-[0-9]+") and
+ not destLib.getName() = "glibc"
+ )
+}
+
+/**
+ * Generate the table of dependencies for the query (with some
+ * packages that basically all projects depend on excluded).
+ */
+predicate encodedDependencies(File source, string encodedDependency, int num) {
+ exists(Library destLib |
+ libDependencies(source, destLib, num) and
+ encodedDependency =
+ "/" + source.getRelativePath() + "<|>" + destLib.getName() + "<|>" + destLib.getVersion()
+ )
+}
diff --git a/repo-tests/codeql/cpp/ql/src/Metrics/Dependencies/ExternalDependenciesSourceLinks.ql b/repo-tests/codeql/cpp/ql/src/Metrics/Dependencies/ExternalDependenciesSourceLinks.ql
new file mode 100644
index 00000000000..cf305ab0aea
--- /dev/null
+++ b/repo-tests/codeql/cpp/ql/src/Metrics/Dependencies/ExternalDependenciesSourceLinks.ql
@@ -0,0 +1,23 @@
+/**
+ * @deprecated
+ * @name External dependency source links
+ * @kind source-link
+ * @metricType externalDependency
+ * @id cpp/dependency-source-links
+ * @tags modularity
+ */
+
+import ExternalDependencies
+
+/*
+ * This query creates the source links for the ExternalDependencies.ql query.
+ * Although the entities in question are of the form '/file/path<|>dependency<|>version',
+ * the /file/path is a bare string relative to the root of the source archive, and not
+ * tied to a particular revision. We need the File entity (the second column here) to
+ * recover that information once we are in the dashboard database, using the
+ * ExternalEntity.getASourceLink() method.
+ */
+
+from File file, int num, string encodedDependency
+where encodedDependencies(file, encodedDependency, num)
+select encodedDependency, file
diff --git a/repo-tests/codeql/cpp/ql/src/Metrics/External/FileCompilationDisplayStrings.ql b/repo-tests/codeql/cpp/ql/src/Metrics/External/FileCompilationDisplayStrings.ql
new file mode 100644
index 00000000000..3c57afa49c2
--- /dev/null
+++ b/repo-tests/codeql/cpp/ql/src/Metrics/External/FileCompilationDisplayStrings.ql
@@ -0,0 +1,12 @@
+/**
+ * @name Display strings for file compilations
+ * @kind display-string
+ * @id cpp/file-compilation-display-strings
+ * @metricType fileCompilation
+ */
+
+import cpp
+
+from Compilation c, int i
+select c.toString() + ":" + i.toString(),
+ c.toString() + ":" + i.toString() + ":" + c.getFileCompiled(i)
diff --git a/repo-tests/codeql/cpp/ql/src/Metrics/External/FileCompilationSourceLinks.ql b/repo-tests/codeql/cpp/ql/src/Metrics/External/FileCompilationSourceLinks.ql
new file mode 100644
index 00000000000..6d0137c77d7
--- /dev/null
+++ b/repo-tests/codeql/cpp/ql/src/Metrics/External/FileCompilationSourceLinks.ql
@@ -0,0 +1,11 @@
+/**
+ * @name Source links for file compilations
+ * @kind source-link
+ * @id cpp/file-compilation-source-links
+ * @metricType fileCompilation
+ */
+
+import cpp
+
+from Compilation c, int i
+select c.toString() + ":" + i.toString(), c.getFileCompiled(i)
diff --git a/repo-tests/codeql/cpp/ql/src/Metrics/Files/AutogeneratedLOC.ql b/repo-tests/codeql/cpp/ql/src/Metrics/Files/AutogeneratedLOC.ql
new file mode 100644
index 00000000000..7c6ef5c0f10
--- /dev/null
+++ b/repo-tests/codeql/cpp/ql/src/Metrics/Files/AutogeneratedLOC.ql
@@ -0,0 +1,19 @@
+/**
+ * @name Autogenerated lines of code
+ * @kind treemap
+ * @id cpp/autogenerated-loc
+ * @description Measures the number of lines in autogenerated files that
+ * contain code (rather than lines that only contain
+ * comments or are blank).
+ * @treemap.warnOn highValues
+ * @metricType file
+ * @metricAggregate sum
+ * @tags maintainability
+ */
+
+import cpp
+import semmle.code.cpp.AutogeneratedFile
+
+from AutogeneratedFile af
+where af.fromSource()
+select af, af.getMetrics().getNumberOfLinesOfCode() as n order by n desc
diff --git a/repo-tests/codeql/cpp/ql/src/Metrics/Files/ConditionalSegmentConditions.ql b/repo-tests/codeql/cpp/ql/src/Metrics/Files/ConditionalSegmentConditions.ql
new file mode 100644
index 00000000000..ea2f6be5cb6
--- /dev/null
+++ b/repo-tests/codeql/cpp/ql/src/Metrics/Files/ConditionalSegmentConditions.ql
@@ -0,0 +1,38 @@
+/**
+ * @name Number of distinct conditions used in #if, #ifdef, #ifndef etc. per file
+ * @description For each file, the number of unique conditions used by
+ * `#if`, `#ifdef`, and `#ifndef`.
+ * @kind treemap
+ * @id cpp/conditional-segment-conditions
+ * @treemap.warnOn highValues
+ * @metricType file
+ * @metricAggregate avg max
+ * @tags maintainability
+ * readability
+ */
+
+import cpp
+
+predicate preprocessorOpenCondition(PreprocessorDirective d) {
+ d instanceof PreprocessorIf or
+ d instanceof PreprocessorIfdef or
+ d instanceof PreprocessorIfndef
+}
+
+predicate headerGuard(PreprocessorIfndef notdef) {
+ notdef.getHead().regexpMatch(".*_H_.*")
+ or
+ notdef.getHead().regexpMatch(".*_H")
+}
+
+from File f
+where f.fromSource()
+select f,
+ count(string s |
+ exists(PreprocessorDirective open |
+ preprocessorOpenCondition(open) and
+ not headerGuard(open) and
+ open.getFile() = f and
+ s = open.getHead()
+ )
+ )
diff --git a/repo-tests/codeql/cpp/ql/src/Metrics/Files/ConditionalSegmentLines.ql b/repo-tests/codeql/cpp/ql/src/Metrics/Files/ConditionalSegmentLines.ql
new file mode 100644
index 00000000000..9d20b869801
--- /dev/null
+++ b/repo-tests/codeql/cpp/ql/src/Metrics/Files/ConditionalSegmentLines.ql
@@ -0,0 +1,125 @@
+/**
+ * @name Number of conditionally compiled lines
+ * @description The number of lines that are subject to conditional
+ * compilation constraints defined using `#if`, `#ifdef`,
+ * and `#ifndef`.
+ * @kind treemap
+ * @id cpp/conditional-segment-lines
+ * @treemap.warnOn highValues
+ * @metricType file
+ * @metricAggregate avg sum max
+ * @tags maintainability
+ * readability
+ */
+
+import cpp
+import semmle.code.cpp.headers.MultipleInclusion
+
+predicate preprocessorOpenCondition(PreprocessorDirective d, File f, int line) {
+ (
+ d instanceof PreprocessorIf or
+ d instanceof PreprocessorIfdef or
+ d instanceof PreprocessorIfndef
+ ) and
+ exists(Location l | l = d.getLocation() | f = l.getFile() and line = l.getStartLine())
+}
+
+predicate preprocessorCloseCondition(PreprocessorDirective d, File f, int line) {
+ d instanceof PreprocessorEndif and
+ exists(Location l | l = d.getLocation() | f = l.getFile() and line = l.getStartLine())
+}
+
+private predicate relevantLine(File f, int line) {
+ preprocessorOpenCondition(_, f, line) or
+ preprocessorCloseCondition(_, f, line)
+}
+
+predicate relevantDirective(PreprocessorDirective d, File f, int line) {
+ preprocessorOpenCondition(d, f, line) or
+ preprocessorCloseCondition(d, f, line)
+}
+
+private predicate relevantLineWithRank(File f, int rnk, int line) {
+ line = rank[rnk](int l | relevantLine(f, l) | l)
+}
+
+private PreprocessorDirective next(PreprocessorDirective ppd) {
+ exists(File f, int line, int rnk, int nextLine |
+ relevantDirective(ppd, f, line) and
+ relevantLineWithRank(f, rnk, line) and
+ relevantLineWithRank(f, rnk + 1, nextLine) and
+ relevantDirective(result, f, nextLine)
+ )
+}
+
+private int level(PreprocessorDirective ppd) {
+ relevantDirective(ppd, _, _) and
+ not exists(PreprocessorDirective previous | ppd = next(previous)) and
+ result = 0
+ or
+ exists(PreprocessorDirective previous |
+ ppd = next(previous) and
+ preprocessorOpenCondition(previous, _, _) and
+ result = level(previous) + 1
+ )
+ or
+ exists(PreprocessorDirective previous |
+ ppd = next(previous) and
+ preprocessorCloseCondition(previous, _, _) and
+ result = level(previous) - 1
+ )
+}
+
+private predicate openWithDepth(int depth, File f, PreprocessorDirective open, int line) {
+ preprocessorOpenCondition(open, f, line) and
+ depth = level(open) and
+ depth < 2 // beyond 2, we don't care about the macros anymore
+}
+
+private predicate closeWithDepth(int depth, File f, PreprocessorDirective close, int line) {
+ preprocessorCloseCondition(close, f, line) and
+ depth = level(close) - 1 and
+ depth < 2 // beyond 2, we don't care about the macros anymore
+}
+
+predicate length(PreprocessorDirective open, int length) {
+ exists(int depth, File f, int start, int end |
+ openWithDepth(depth, f, open, start) and
+ end =
+ min(PreprocessorDirective endif, int closeLine |
+ closeWithDepth(depth, f, endif, closeLine) and
+ closeLine > start
+ |
+ closeLine
+ ) and
+ length = end - start - 1
+ )
+}
+
+predicate headerGuard(PreprocessorDirective notdef, File f) {
+ exists(CorrectIncludeGuard g | notdef = g.getIfndef() and f = notdef.getFile())
+}
+
+predicate headerGuardChild(PreprocessorDirective open) {
+ exists(File f, PreprocessorDirective headerGuard |
+ headerGuard(headerGuard, f) and
+ openWithDepth(1, f, open, _)
+ )
+}
+
+predicate topLevelOpen(PreprocessorDirective open) {
+ openWithDepth(0, _, open, _) and not headerGuard(open, _)
+ or
+ headerGuardChild(open)
+}
+
+from File f
+where f.fromSource()
+select f,
+ sum(PreprocessorDirective open, int length |
+ open.getFile() = f and
+ topLevelOpen(open) and
+ length(open, length)
+ |
+ length
+ )
diff --git a/repo-tests/codeql/cpp/ql/src/Metrics/Files/FAfferentCoupling.ql b/repo-tests/codeql/cpp/ql/src/Metrics/Files/FAfferentCoupling.ql
new file mode 100644
index 00000000000..e3c91035f58
--- /dev/null
+++ b/repo-tests/codeql/cpp/ql/src/Metrics/Files/FAfferentCoupling.ql
@@ -0,0 +1,17 @@
+/**
+ * @name Incoming dependencies per file
+ * @description The number of files that depend on a file.
+ * @kind treemap
+ * @id cpp/afferent-coupling-per-file
+ * @treemap.warnOn highValues
+ * @metricType file
+ * @metricAggregate avg max
+ * @tags maintainability
+ * modularity
+ */
+
+import cpp
+
+from File f
+where f.fromSource()
+select f, f.getMetrics().getAfferentCoupling() as n order by n desc
diff --git a/repo-tests/codeql/cpp/ql/src/Metrics/Files/FCommentRatio.ql b/repo-tests/codeql/cpp/ql/src/Metrics/Files/FCommentRatio.ql
new file mode 100644
index 00000000000..578c725116e
--- /dev/null
+++ b/repo-tests/codeql/cpp/ql/src/Metrics/Files/FCommentRatio.ql
@@ -0,0 +1,17 @@
+/**
+ * @name Percentage of comments
+ * @description The percentage of lines that contain comments.
+ * @kind treemap
+ * @id cpp/comment-ratio-per-file
+ * @treemap.warnOn lowValues
+ * @metricType file
+ * @metricAggregate avg max
+ * @tags maintainability
+ * documentation
+ */
+
+import cpp
+
+from File f, int comments, int total
+where f.fromSource() and numlines(unresolveElement(f), total, _, comments) and total > 0
+select f, 100.0 * (comments.(float) / total.(float)) as ratio order by ratio desc
diff --git a/repo-tests/codeql/cpp/ql/src/Metrics/Files/FCyclomaticComplexity.ql b/repo-tests/codeql/cpp/ql/src/Metrics/Files/FCyclomaticComplexity.ql
new file mode 100644
index 00000000000..8aae5042ca4
--- /dev/null
+++ b/repo-tests/codeql/cpp/ql/src/Metrics/Files/FCyclomaticComplexity.ql
@@ -0,0 +1,29 @@
+/**
+ * @name Average cyclomatic complexity of files
+ * @description The average cyclomatic complexity of the functions in a file.
+ * @kind treemap
+ * @id cpp/average-cyclomatic-complexity-per-file
+ * @treemap.warnOn highValues
+ * @metricType file
+ * @metricAggregate avg max
+ * @tags testability
+ * complexity
+ */
+
+import cpp
+
+from File f, float complexity, float loc
+where
+ f.fromSource() and
+ loc = sum(FunctionDeclarationEntry fde | fde.getFile() = f | fde.getNumberOfLines()).(float) and
+ if loc > 0
+ then
+ // Weighted average of complexity by function length
+ complexity =
+ sum(FunctionDeclarationEntry fde |
+ fde.getFile() = f
+ |
+ fde.getNumberOfLines() * fde.getCyclomaticComplexity()
+ ).(float) / loc
+ else complexity = 0
+select f, complexity
diff --git a/repo-tests/codeql/cpp/ql/src/Metrics/Files/FDirectIncludes.ql b/repo-tests/codeql/cpp/ql/src/Metrics/Files/FDirectIncludes.ql
new file mode 100644
index 00000000000..35ae06ff9c3
--- /dev/null
+++ b/repo-tests/codeql/cpp/ql/src/Metrics/Files/FDirectIncludes.ql
@@ -0,0 +1,20 @@
+/**
+ * @name Includes per file
+ * @description The number of files directly included by this file using
+ * `#include`.
+ * @kind treemap
+ * @id cpp/direct-includes-per-file
+ * @treemap.warnOn highValues
+ * @metricType file
+ * @metricAggregate avg max
+ * @tags maintainability
+ * modularity
+ */
+
+import cpp
+
+from File f, int n
+where
+ f.fromSource() and
+ n = count(Include i | i.getFile() = f)
+select f, n
diff --git a/repo-tests/codeql/cpp/ql/src/Metrics/Files/FEfferentCoupling.ql b/repo-tests/codeql/cpp/ql/src/Metrics/Files/FEfferentCoupling.ql
new file mode 100644
index 00000000000..4bf9ba1555f
--- /dev/null
+++ b/repo-tests/codeql/cpp/ql/src/Metrics/Files/FEfferentCoupling.ql
@@ -0,0 +1,18 @@
+/**
+ * @name Outgoing dependencies per file
+ * @description The number of files that a file depends on.
+ * @kind treemap
+ * @id cpp/efferent-coupling-per-file
+ * @treemap.warnOn highValues
+ * @metricType file
+ * @metricAggregate avg max
+ * @tags testability
+ * modularity
+ * maintainability
+ */
+
+import cpp
+
+from File f
+where f.fromSource()
+select f, f.getMetrics().getEfferentCoupling() as n order by n desc
diff --git a/repo-tests/codeql/cpp/ql/src/Metrics/Files/FHalsteadBugs.ql b/repo-tests/codeql/cpp/ql/src/Metrics/Files/FHalsteadBugs.ql
new file mode 100644
index 00000000000..6a782a6466a
--- /dev/null
+++ b/repo-tests/codeql/cpp/ql/src/Metrics/Files/FHalsteadBugs.ql
@@ -0,0 +1,17 @@
+/**
+ * @name Halstead bug measure
+ * @description Measures the expected number of delivered bugs. The
+ * Halstead bug count is known to be an underestimate.
+ * @kind treemap
+ * @id cpp/halstead-bugs-per-file
+ * @treemap.warnOn highValues
+ * @metricType file
+ * @metricAggregate avg sum max
+ * @tags maintainability
+ */
+
+import cpp
+
+from MetricFile mc
+where mc.fromSource()
+select mc, mc.getHalsteadDeliveredBugs()
diff --git a/repo-tests/codeql/cpp/ql/src/Metrics/Files/FHalsteadDifficulty.ql b/repo-tests/codeql/cpp/ql/src/Metrics/Files/FHalsteadDifficulty.ql
new file mode 100644
index 00000000000..c2a69b7f24c
--- /dev/null
+++ b/repo-tests/codeql/cpp/ql/src/Metrics/Files/FHalsteadDifficulty.ql
@@ -0,0 +1,16 @@
+/**
+ * @name Halstead difficulty
+ * @description Measures the error proneness of implementing the program.
+ * @kind treemap
+ * @id cpp/halstead-difficulty-per-file
+ * @treemap.warnOn highValues
+ * @metricType file
+ * @metricAggregate avg sum max
+ * @tags maintainability
+ */
+
+import cpp
+
+from MetricFile mc
+where mc.fromSource()
+select mc, mc.getHalsteadDifficulty()
diff --git a/repo-tests/codeql/cpp/ql/src/Metrics/Files/FHalsteadEffort.ql b/repo-tests/codeql/cpp/ql/src/Metrics/Files/FHalsteadEffort.ql
new file mode 100644
index 00000000000..5ee2cad6b89
--- /dev/null
+++ b/repo-tests/codeql/cpp/ql/src/Metrics/Files/FHalsteadEffort.ql
@@ -0,0 +1,16 @@
+/**
+ * @name Halstead effort
+ * @description Measures the effort to implement the program.
+ * @kind treemap
+ * @id cpp/halstead-effort-per-file
+ * @treemap.warnOn highValues
+ * @metricType file
+ * @metricAggregate avg sum max
+ * @tags maintainability
+ */
+
+import cpp
+
+from MetricFile mc
+where mc.fromSource()
+select mc, mc.getHalsteadEffort()
diff --git a/repo-tests/codeql/cpp/ql/src/Metrics/Files/FHalsteadLength.ql b/repo-tests/codeql/cpp/ql/src/Metrics/Files/FHalsteadLength.ql
new file mode 100644
index 00000000000..55a6293a3d8
--- /dev/null
+++ b/repo-tests/codeql/cpp/ql/src/Metrics/Files/FHalsteadLength.ql
@@ -0,0 +1,16 @@
+/**
+ * @name Halstead length
+ * @description Total number of operands and operators
+ * @kind treemap
+ * @id cpp/halstead-length-per-file
+ * @treemap.warnOn highValues
+ * @metricType file
+ * @metricAggregate avg sum max
+ * @tags maintainability
+ */
+
+import cpp
+
+from MetricFile mc
+where mc.fromSource()
+select mc, mc.getHalsteadLength()
diff --git a/repo-tests/codeql/cpp/ql/src/Metrics/Files/FHalsteadVocabulary.ql b/repo-tests/codeql/cpp/ql/src/Metrics/Files/FHalsteadVocabulary.ql
new file mode 100644
index 00000000000..93237473499
--- /dev/null
+++ b/repo-tests/codeql/cpp/ql/src/Metrics/Files/FHalsteadVocabulary.ql
@@ -0,0 +1,16 @@
+/**
+ * @name Halstead vocabulary
+ * @description Number of distinct operands and operators used.
+ * @kind treemap
+ * @id cpp/halstead-vocabulary-per-file
+ * @treemap.warnOn highValues
+ * @metricType file
+ * @metricAggregate avg sum max
+ * @tags maintainability
+ */
+
+import cpp
+
+from MetricFile mc
+where mc.fromSource()
+select mc, mc.getHalsteadVocabulary()
diff --git a/repo-tests/codeql/cpp/ql/src/Metrics/Files/FHalsteadVolume.ql b/repo-tests/codeql/cpp/ql/src/Metrics/Files/FHalsteadVolume.ql
new file mode 100644
index 00000000000..26df8ffbead
--- /dev/null
+++ b/repo-tests/codeql/cpp/ql/src/Metrics/Files/FHalsteadVolume.ql
@@ -0,0 +1,16 @@
+/**
+ * @name Halstead volume
+ * @description The information contents of the program.
+ * @kind treemap
+ * @id cpp/halstead-volume-per-file
+ * @treemap.warnOn highValues
+ * @metricType file
+ * @metricAggregate avg sum max
+ * @tags maintainability
+ */
+
+import cpp
+
+from MetricFile mc
+where mc.fromSource()
+select mc, mc.getHalsteadVolume()
diff --git a/repo-tests/codeql/cpp/ql/src/Metrics/Files/FLines.ql b/repo-tests/codeql/cpp/ql/src/Metrics/Files/FLines.ql
new file mode 100644
index 00000000000..a4c5895d4c1
--- /dev/null
+++ b/repo-tests/codeql/cpp/ql/src/Metrics/Files/FLines.ql
@@ -0,0 +1,16 @@
+/**
+ * @name Number of lines
+ * @description The number of lines in each file.
+ * @kind treemap
+ * @id cpp/lines-per-file
+ * @treemap.warnOn highValues
+ * @metricType file
+ * @metricAggregate avg sum max
+ * @tags maintainability
+ */
+
+import cpp
+
+from File f
+where f.fromSource()
+select f, f.getMetrics().getNumberOfLines() as n order by n desc
diff --git a/repo-tests/codeql/cpp/ql/src/Metrics/Files/FLinesOfCode.ql b/repo-tests/codeql/cpp/ql/src/Metrics/Files/FLinesOfCode.ql
new file mode 100644
index 00000000000..11b3eac2977
--- /dev/null
+++ b/repo-tests/codeql/cpp/ql/src/Metrics/Files/FLinesOfCode.ql
@@ -0,0 +1,19 @@
+/**
+ * @name Lines of code in files
+ * @kind treemap
+ * @description Measures the number of lines in a file that contain
+ * code (rather than lines that only contain comments
+ * or are blank)
+ * @treemap.warnOn highValues
+ * @metricType file
+ * @metricAggregate avg sum max
+ * @id cpp/lines-of-code-in-files
+ * @tags maintainability
+ * complexity
+ */
+
+import cpp
+
+from File f
+where f.fromSource()
+select f, f.getMetrics().getNumberOfLinesOfCode() as n order by n desc
diff --git a/repo-tests/codeql/cpp/ql/src/Metrics/Files/FLinesOfCommentedOutCode.ql b/repo-tests/codeql/cpp/ql/src/Metrics/Files/FLinesOfCommentedOutCode.ql
new file mode 100644
index 00000000000..13ff54e5525
--- /dev/null
+++ b/repo-tests/codeql/cpp/ql/src/Metrics/Files/FLinesOfCommentedOutCode.ql
@@ -0,0 +1,16 @@
+/**
+ * @name Lines of commented-out code in files
+ * @description The number of lines of commented-out code in a file.
+ * @kind treemap
+ * @treemap.warnOn highValues
+ * @metricType file
+ * @metricAggregate avg sum max
+ * @id cpp/lines-of-commented-out-code-in-files
+ * @tags documentation
+ */
+
+import Documentation.CommentedOutCode
+
+from File f, int n
+where n = sum(CommentedOutCode comment | comment.getFile() = f | comment.numCodeLines())
+select f, n order by n desc
diff --git a/repo-tests/codeql/cpp/ql/src/Metrics/Files/FLinesOfComments.ql b/repo-tests/codeql/cpp/ql/src/Metrics/Files/FLinesOfComments.ql
new file mode 100644
index 00000000000..2372f5cc375
--- /dev/null
+++ b/repo-tests/codeql/cpp/ql/src/Metrics/Files/FLinesOfComments.ql
@@ -0,0 +1,19 @@
+/**
+ * @name Lines of comments in files
+ * @description Measures the number of lines which contain a comment
+ * or part of a comment (that is, which are part of a
+ * multi-line comment).
+ * @kind treemap
+ * @treemap.warnOn lowValues
+ * @metricType file
+ * @metricAggregate avg sum max
+ * @id cpp/lines-of-comments-in-files
+ * @tags maintainability
+ * documentation
+ */
+
+import cpp
+
+from File f
+where f.fromSource()
+select f, f.getMetrics().getNumberOfLinesOfComments() as n order by n desc
diff --git a/repo-tests/codeql/cpp/ql/src/Metrics/Files/FLinesOfDuplicatedCode.ql b/repo-tests/codeql/cpp/ql/src/Metrics/Files/FLinesOfDuplicatedCode.ql
new file mode 100644
index 00000000000..8b46df05adc
--- /dev/null
+++ b/repo-tests/codeql/cpp/ql/src/Metrics/Files/FLinesOfDuplicatedCode.ql
@@ -0,0 +1,27 @@
+/**
+ * @deprecated
+ * @name Duplicated lines in files
+ * @description The number of lines in a file, including code, comment
+ * and whitespace lines, which are duplicated in at least
+ * one other place.
+ * @kind treemap
+ * @treemap.warnOn highValues
+ * @metricType file
+ * @metricAggregate avg sum max
+ * @id cpp/duplicated-lines-in-files
+ * @tags testability
+ * modularity
+ */
+
+import external.CodeDuplication
+
+from File f, int n
+where
+ n =
+ count(int line |
+ exists(DuplicateBlock d | d.sourceFile() = f |
+ line in [d.sourceStartLine() .. d.sourceEndLine()]
+ ) and
+ not whitelistedLineForDuplication(f, line)
+ )
+select f, n order by n desc
diff --git a/repo-tests/codeql/cpp/ql/src/Metrics/Files/FMacroRatio.ql b/repo-tests/codeql/cpp/ql/src/Metrics/Files/FMacroRatio.ql
new file mode 100644
index 00000000000..6c07c34d964
--- /dev/null
+++ b/repo-tests/codeql/cpp/ql/src/Metrics/Files/FMacroRatio.ql
@@ -0,0 +1,31 @@
+/**
+ * @name Usage of macros
+ * @description The percentage of source lines in each file that contain
+ * use of macros.
+ * @kind treemap
+ * @id cpp/macro-ratio-per-file
+ * @treemap.warnOn highValues
+ * @metricType file
+ * @metricAggregate avg max
+ * @tags maintainability
+ * readability
+ */
+
+import cpp
+
+predicate macroLines(File f, int line) {
+ exists(MacroInvocation mi |
+ mi.getFile() = f and
+ mi.getLocation().getStartLine() = line
+ )
+}
+
+predicate macroLineCount(File f, int num) { num = count(int line | macroLines(f, line)) }
+
+from MetricFile f, int macroLines, int loc
+where
+ f.fromSource() and
+ loc = f.getNumberOfLinesOfCode() and
+ loc > 0 and
+ macroLineCount(f, macroLines)
+select f, 100.0 * (macroLines.(float) / loc.(float)) as ratio order by ratio desc
diff --git a/repo-tests/codeql/cpp/ql/src/Metrics/Files/FNumberOfClasses.ql b/repo-tests/codeql/cpp/ql/src/Metrics/Files/FNumberOfClasses.ql
new file mode 100644
index 00000000000..845ff5d0994
--- /dev/null
+++ b/repo-tests/codeql/cpp/ql/src/Metrics/Files/FNumberOfClasses.ql
@@ -0,0 +1,18 @@
+/**
+ * @name Number of classes per file
+ * @kind treemap
+ * @id cpp/number-of-classes-per-file
+ * @treemap.warnOn highValues
+ * @metricType file
+ * @metricAggregate avg sum max
+ * @tags maintainability
+ * modularity
+ */
+
+import cpp
+
+from File f, int n
+where
+ f.fromSource() and
+ n = count(Class c | c.getAFile() = f)
+select f, n order by n desc
diff --git a/repo-tests/codeql/cpp/ql/src/Metrics/Files/FNumberOfTests.ql b/repo-tests/codeql/cpp/ql/src/Metrics/Files/FNumberOfTests.ql
new file mode 100644
index 00000000000..c7ec5983253
--- /dev/null
+++ b/repo-tests/codeql/cpp/ql/src/Metrics/Files/FNumberOfTests.ql
@@ -0,0 +1,27 @@
+/**
+ * @name Number of tests
+ * @description The number of test methods defined in a file.
+ * @kind treemap
+ * @treemap.warnOn lowValues
+ * @metricType file
+ * @metricAggregate avg sum max
+ * @id cpp/tests-in-files
+ * @tags maintainability
+ */
+
+import cpp
+
+Expr getTest() {
+ // cppunit tests; https://freedesktop.org/wiki/Software/cppunit/
+ result.(FunctionCall).getTarget().hasQualifiedName("CppUnit", _, "addTest")
+ or
+ // boost tests; http://www.boost.org/
+ result.(FunctionCall).getTarget().hasQualifiedName("boost::unit_test", "make_test_case")
+ or
+ // googletest tests; https://github.com/google/googletest/
+ result.(FunctionCall).getTarget().hasQualifiedName("testing::internal", "MakeAndRegisterTestInfo")
+}
+
+from File f, int n
+where n = strictcount(Expr e | e = getTest() and e.getFile() = f)
+select f, n order by n desc
diff --git a/repo-tests/codeql/cpp/ql/src/Metrics/Files/FTimeInFrontend.ql b/repo-tests/codeql/cpp/ql/src/Metrics/Files/FTimeInFrontend.ql
new file mode 100644
index 00000000000..fd6527ee184
--- /dev/null
+++ b/repo-tests/codeql/cpp/ql/src/Metrics/Files/FTimeInFrontend.ql
@@ -0,0 +1,23 @@
+/**
+ * @name Compilation time
+ * @description Measures the amount of time (in milliseconds) spent
+ * compiling a C/C++ file, including time spent processing
+ * all files included by the pre-processor.
+ * @kind treemap
+ * @id cpp/time-in-frontend-per-file
+ * @metricType fileCompilation
+ * @metricAggregate avg sum max
+ * @tags maintainability
+ * testability
+ */
+
+import cpp
+
+from string x, float t
+where
+ exists(Compilation c, int i |
+ x = c.toString() + ":" + i.toString() and
+ t = 1000 * c.getFrontendCpuSeconds(i) and
+ c.getFileCompiled(i).fromSource()
+ )
+select x, t order by t desc
diff --git a/repo-tests/codeql/cpp/ql/src/Metrics/Files/FTodoComments.ql b/repo-tests/codeql/cpp/ql/src/Metrics/Files/FTodoComments.ql
new file mode 100644
index 00000000000..ef92f9f4796
--- /dev/null
+++ b/repo-tests/codeql/cpp/ql/src/Metrics/Files/FTodoComments.ql
@@ -0,0 +1,26 @@
+/**
+ * @name Number of todo/fixme comments per file
+ * @description The number of TODO or FIXME comments in a file.
+ * @kind treemap
+ * @id cpp/todo-comments-per-file
+ * @treemap.warnOn highValues
+ * @metricType file
+ * @metricAggregate avg sum max
+ * @tags maintainability
+ * documentation
+ */
+
+import cpp
+
+from File f, int n
+where
+ f.fromSource() and
+ n =
+ count(Comment c |
+ c.getFile() = f and
+ (
+ c.getContents().matches("%TODO%") or
+ c.getContents().matches("%FIXME%")
+ )
+ )
+select f, n order by n
diff --git a/repo-tests/codeql/cpp/ql/src/Metrics/Files/FTransitiveIncludes.ql b/repo-tests/codeql/cpp/ql/src/Metrics/Files/FTransitiveIncludes.ql
new file mode 100644
index 00000000000..2cde128e4bd
--- /dev/null
+++ b/repo-tests/codeql/cpp/ql/src/Metrics/Files/FTransitiveIncludes.ql
@@ -0,0 +1,22 @@
+/**
+ * @name Indirect includes per file
+ * @description The number of files included by the
+ * pre-processor - either directly by an `#include`
+ * directive, or indirectly (by being included by an
+ * included file).
+ * @kind treemap
+ * @id cpp/transitive-includes-per-file
+ * @treemap.warnOn highValues
+ * @metricType file
+ * @metricAggregate avg max
+ * @tags maintainability
+ * modularity
+ */
+
+import cpp
+
+from File f, int n
+where
+ f.fromSource() and
+ n = count(File g | g = f.getAnIncludedFile+())
+select f, n
diff --git a/repo-tests/codeql/cpp/ql/src/Metrics/Files/FTransitiveSourceIncludes.ql b/repo-tests/codeql/cpp/ql/src/Metrics/Files/FTransitiveSourceIncludes.ql
new file mode 100644
index 00000000000..d480699b8c4
--- /dev/null
+++ b/repo-tests/codeql/cpp/ql/src/Metrics/Files/FTransitiveSourceIncludes.ql
@@ -0,0 +1,30 @@
+/**
+ * @name Indirect source includes per file
+ * @description The number of source files included by the
+ * pre-processor - either directly by an `#include`
+ * directive, or indirectly (by being included by an
+ * included file). This metric excludes included files
+ * that aren't part of the main code base (like system
+ * headers).
+ * @kind treemap
+ * @id cpp/transitive-source-includes-per-file
+ * @treemap.warnOn highValues
+ * @metricType file
+ * @metricAggregate avg max
+ * @tags maintainability
+ * modularity
+ */
+
+import cpp
+
+predicate isInCodebase(File f) {
+ exists(string prefix | sourceLocationPrefix(prefix) |
+ f.getAbsolutePath().prefix(prefix.length()) = prefix
+ )
+}
+
+from File f, int n
+where
+ f.fromSource() and
+ n = count(File g | g = f.getAnIncludedFile+() and isInCodebase(g))
+select f, n
diff --git a/repo-tests/codeql/cpp/ql/src/Metrics/Files/FunctionLength.ql b/repo-tests/codeql/cpp/ql/src/Metrics/Files/FunctionLength.ql
new file mode 100644
index 00000000000..d753e5b5ddc
--- /dev/null
+++ b/repo-tests/codeql/cpp/ql/src/Metrics/Files/FunctionLength.ql
@@ -0,0 +1,23 @@
+/**
+ * @name Function length
+ * @description The average number of lines in functions in each file.
+ * @kind treemap
+ * @id cpp/function-length
+ * @treemap.warnOn highValues
+ * @metricType file
+ * @metricAggregate avg max
+ * @tags maintainability
+ * readability
+ */
+
+import cpp
+
+from File f
+where f.fromSource()
+select f,
+ avg(MetricFunction fn |
+ fn.getFile() = f and
+ not fn instanceof MemberFunction
+ |
+ fn.getNumberOfLinesOfCode()
+ )
diff --git a/repo-tests/codeql/cpp/ql/src/Metrics/Files/NumberOfFunctions.ql b/repo-tests/codeql/cpp/ql/src/Metrics/Files/NumberOfFunctions.ql
new file mode 100644
index 00000000000..3fa9387935b
--- /dev/null
+++ b/repo-tests/codeql/cpp/ql/src/Metrics/Files/NumberOfFunctions.ql
@@ -0,0 +1,20 @@
+/**
+ * @name Functions per file
+ * @description The total number of functions in each file.
+ * @kind treemap
+ * @id cpp/number-of-functions
+ * @treemap.warnOn highValues
+ * @metricType file
+ * @metricAggregate avg sum max
+ * @tags maintainability
+ */
+
+import cpp
+
+from File f
+where f.fromSource()
+select f,
+ count(Function fn |
+ fn.getFile() = f and
+ not fn instanceof MemberFunction
+ )
diff --git a/repo-tests/codeql/cpp/ql/src/Metrics/Files/NumberOfGlobals.ql b/repo-tests/codeql/cpp/ql/src/Metrics/Files/NumberOfGlobals.ql
new file mode 100644
index 00000000000..36ebfcf4d8a
--- /dev/null
+++ b/repo-tests/codeql/cpp/ql/src/Metrics/Files/NumberOfGlobals.ql
@@ -0,0 +1,44 @@
+/**
+ * @name Global variables
+ * @description The total number of global variables in each file.
+ * @kind treemap
+ * @id cpp/number-of-globals
+ * @treemap.warnOn highValues
+ * @metricType file
+ * @metricAggregate avg sum max
+ * @tags maintainability
+ */
+
+import cpp
+
+predicate macroLocation(File f, int startLine, int endLine) {
+ exists(MacroInvocation mi, Location l |
+ l = mi.getLocation() and
+ l.getFile() = f and
+ l.getStartLine() = startLine and
+ l.getEndLine() = endLine
+ )
+}
+
+pragma[nomagic]
+Location getVariableLocation(Variable v) { result = v.getLocation() }
+
+predicate globalLocation(GlobalVariable gv, File f, int startLine, int endLine) {
+ exists(Location l |
+ l = getVariableLocation(gv) and
+ l.hasLocationInfo(f.getAbsolutePath(), startLine, _, endLine, _)
+ )
+}
+
+predicate inMacro(GlobalVariable gv) {
+ exists(File f, int macroStart, int macroEnd, int varStart, int varEnd |
+ macroLocation(f, macroStart, macroEnd) and
+ globalLocation(gv, f, varStart, varEnd) and
+ varStart >= macroStart and
+ varEnd <= macroEnd
+ )
+}
+
+from File f
+where f.fromSource()
+select f, count(GlobalVariable gv | gv.getFile() = f and not inMacro(gv))
diff --git a/repo-tests/codeql/cpp/ql/src/Metrics/Files/NumberOfParameters.ql b/repo-tests/codeql/cpp/ql/src/Metrics/Files/NumberOfParameters.ql
new file mode 100644
index 00000000000..447c2af0e88
--- /dev/null
+++ b/repo-tests/codeql/cpp/ql/src/Metrics/Files/NumberOfParameters.ql
@@ -0,0 +1,22 @@
+/**
+ * @name Parameters per function
+ * @description The average number of parameters of functions in each file.
+ * @kind treemap
+ * @id cpp/number-of-parameters
+ * @treemap.warnOn highValues
+ * @metricType file
+ * @metricAggregate avg max
+ * @tags testability
+ * complexity
+ */
+
+import cpp
+
+from File f
+where f.fromSource()
+select f,
+ avg(Function fn |
+ fn.getFile() = f and not fn instanceof MemberFunction
+ |
+ fn.getNumberOfParameters()
+ )
diff --git a/repo-tests/codeql/cpp/ql/src/Metrics/Files/NumberOfPublicFunctions.ql b/repo-tests/codeql/cpp/ql/src/Metrics/Files/NumberOfPublicFunctions.ql
new file mode 100644
index 00000000000..f3cfee52e93
--- /dev/null
+++ b/repo-tests/codeql/cpp/ql/src/Metrics/Files/NumberOfPublicFunctions.ql
@@ -0,0 +1,22 @@
+/**
+ * @name Public functions per file
+ * @description The total number of public (non-static) functions in
+ * each file.
+ * @kind treemap
+ * @id cpp/number-of-public-functions
+ * @treemap.warnOn highValues
+ * @metricType file
+ * @metricAggregate avg sum max
+ * @tags maintainability
+ */
+
+import cpp
+
+from File f
+where f.fromSource()
+select f,
+ count(Function fn |
+ fn.getFile() = f and
+ not fn instanceof MemberFunction and
+ not fn.isStatic()
+ )
diff --git a/repo-tests/codeql/cpp/ql/src/Metrics/Files/NumberOfPublicGlobals.ql b/repo-tests/codeql/cpp/ql/src/Metrics/Files/NumberOfPublicGlobals.ql
new file mode 100644
index 00000000000..953f4a4366e
--- /dev/null
+++ b/repo-tests/codeql/cpp/ql/src/Metrics/Files/NumberOfPublicGlobals.ql
@@ -0,0 +1,51 @@
+/**
+ * @name Public global variables
+ * @description The total number of global variables in each file with
+ * external (public) visibility.
+ * @kind treemap
+ * @id cpp/number-of-public-globals
+ * @treemap.warnOn highValues
+ * @metricType file
+ * @metricAggregate avg sum max
+ * @tags maintainability
+ * modularity
+ */
+
+import cpp
+
+predicate macroLocation(File f, int startLine, int endLine) {
+ exists(MacroInvocation mi, Location l |
+ l = mi.getLocation() and
+ l.getFile() = f and
+ l.getStartLine() = startLine and
+ l.getEndLine() = endLine
+ )
+}
+
+pragma[nomagic]
+Location getVariableLocation(Variable v) { result = v.getLocation() }
+
+predicate globalLocation(GlobalVariable gv, File f, int startLine, int endLine) {
+ exists(Location l |
+ l = getVariableLocation(gv) and
+ l.hasLocationInfo(f.getAbsolutePath(), startLine, _, endLine, _)
+ )
+}
+
+predicate inMacro(GlobalVariable gv) {
+ exists(File f, int macroStart, int macroEnd, int varStart, int varEnd |
+ macroLocation(f, macroStart, macroEnd) and
+ globalLocation(gv, f, varStart, varEnd) and
+ varStart >= macroStart and
+ varEnd <= macroEnd
+ )
+}
+
+from File f
+where f.fromSource()
+select f,
+ count(GlobalVariable gv |
+ gv.getFile() = f and
+ not gv.isStatic() and
+ not inMacro(gv)
+ )
diff --git a/repo-tests/codeql/cpp/ql/src/Metrics/Functions/FunCyclomaticComplexity.ql b/repo-tests/codeql/cpp/ql/src/Metrics/Functions/FunCyclomaticComplexity.ql
new file mode 100644
index 00000000000..44b64a46809
--- /dev/null
+++ b/repo-tests/codeql/cpp/ql/src/Metrics/Functions/FunCyclomaticComplexity.ql
@@ -0,0 +1,20 @@
+/**
+ * @name Cyclomatic complexity of functions
+ * @description The Cyclomatic complexity (an indication of how many
+ * tests are necessary, based on the number of branching
+ * statements) per function.
+ * @kind treemap
+ * @id cpp/cyclomatic-complexity-per-function
+ * @treemap.warnOn highValues
+ * @metricType callable
+ * @metricAggregate avg max sum
+ * @tags testability
+ * maintainability
+ * complexity
+ */
+
+import cpp
+
+from Function f
+where strictcount(f.getEntryPoint()) = 1
+select f, f.getMetrics().getCyclomaticComplexity()
diff --git a/repo-tests/codeql/cpp/ql/src/Metrics/Functions/FunIterationNestingDepth.ql b/repo-tests/codeql/cpp/ql/src/Metrics/Functions/FunIterationNestingDepth.ql
new file mode 100644
index 00000000000..bc1cb095fd1
--- /dev/null
+++ b/repo-tests/codeql/cpp/ql/src/Metrics/Functions/FunIterationNestingDepth.ql
@@ -0,0 +1,21 @@
+/**
+ * @name Iteration depth
+ * @description The maximum number of nested loops in each function.
+ * @kind treemap
+ * @id cpp/iteration-nesting-depth-per-function
+ * @treemap.warnOn highValues
+ * @metricType callable
+ * @metricAggregate avg max
+ * @tags maintainability
+ * complexity
+ */
+
+import cpp
+
+int iterationDepth(Stmt l) { result = count(Loop other | l.getParent*() = other) }
+
+from Function f, int depth
+where
+ depth = max(Stmt s | s.getEnclosingFunction() = f | iterationDepth(s)) and
+ strictcount(f.getEntryPoint()) = 1
+select f, depth order by depth desc
diff --git a/repo-tests/codeql/cpp/ql/src/Metrics/Functions/FunLinesOfCode.ql b/repo-tests/codeql/cpp/ql/src/Metrics/Functions/FunLinesOfCode.ql
new file mode 100644
index 00000000000..979640a9626
--- /dev/null
+++ b/repo-tests/codeql/cpp/ql/src/Metrics/Functions/FunLinesOfCode.ql
@@ -0,0 +1,18 @@
+/**
+ * @name Lines of code per function
+ * @description Measures the number of lines in a function that contain
+ * code (rather than lines that only contain comments or
+ * are blank).
+ * @kind treemap
+ * @id cpp/lines-of-code-per-function
+ * @treemap.warnOn highValues
+ * @metricType callable
+ * @metricAggregate avg sum max
+ * @tags maintainability
+ */
+
+import cpp
+
+from Function f
+where strictcount(f.getEntryPoint()) = 1
+select f, f.getMetrics().getNumberOfLinesOfCode()
diff --git a/repo-tests/codeql/cpp/ql/src/Metrics/Functions/FunLinesOfComments.ql b/repo-tests/codeql/cpp/ql/src/Metrics/Functions/FunLinesOfComments.ql
new file mode 100644
index 00000000000..283c2085a41
--- /dev/null
+++ b/repo-tests/codeql/cpp/ql/src/Metrics/Functions/FunLinesOfComments.ql
@@ -0,0 +1,19 @@
+/**
+ * @name Lines of comments per function
+ * @description Measures the number of lines in a function that contain
+ * a comment or part of a comment (that is, which are part
+ * of a multi-line comment).
+ * @kind treemap
+ * @id cpp/lines-of-comments-per-function
+ * @treemap.warnOn lowValues
+ * @metricType callable
+ * @metricAggregate avg sum max
+ * @tags maintainability
+ * documentation
+ */
+
+import cpp
+
+from Function f
+where strictcount(f.getEntryPoint()) = 1
+select f, f.getMetrics().getNumberOfLinesOfComments()
diff --git a/repo-tests/codeql/cpp/ql/src/Metrics/Functions/FunNumberOfCalls.ql b/repo-tests/codeql/cpp/ql/src/Metrics/Functions/FunNumberOfCalls.ql
new file mode 100644
index 00000000000..a36bf9a074a
--- /dev/null
+++ b/repo-tests/codeql/cpp/ql/src/Metrics/Functions/FunNumberOfCalls.ql
@@ -0,0 +1,31 @@
+/**
+ * @name Number of function calls per function
+ * @description The number of C/C++ function calls per function.
+ * @kind treemap
+ * @id cpp/number-of-calls-per-function
+ * @treemap.warnOn highValues
+ * @metricType callable
+ * @metricAggregate avg sum max
+ * @tags maintainability
+ * complexity
+ */
+
+import cpp
+
+predicate callToOperator(FunctionCall fc) {
+ fc.getTarget() instanceof Operator or
+ fc.getTarget() instanceof ConversionOperator
+}
+
+from Function f, int n, int o
+where
+ strictcount(f.getEntryPoint()) = 1 and
+ o =
+ count(FunctionCall c |
+ c.getEnclosingFunction() = f and
+ not c.isInMacroExpansion() and
+ not c.isCompilerGenerated() and
+ not callToOperator(c)
+ ) and
+ n = o / count(f.getBlock())
+select f, n
diff --git a/repo-tests/codeql/cpp/ql/src/Metrics/Functions/FunNumberOfParameters.ql b/repo-tests/codeql/cpp/ql/src/Metrics/Functions/FunNumberOfParameters.ql
new file mode 100644
index 00000000000..2e8f8de50ac
--- /dev/null
+++ b/repo-tests/codeql/cpp/ql/src/Metrics/Functions/FunNumberOfParameters.ql
@@ -0,0 +1,17 @@
+/**
+ * @name Number of parameters per function
+ * @description The number of formal parameters for each function.
+ * @kind treemap
+ * @id cpp/number-of-parameters-per-function
+ * @treemap.warnOn highValues
+ * @metricType callable
+ * @metricAggregate avg max
+ * @tags testability
+ * maintainability
+ */
+
+import cpp
+
+from Function f
+where strictcount(f.getEntryPoint()) = 1
+select f, f.getMetrics().getNumberOfParameters()
diff --git a/repo-tests/codeql/cpp/ql/src/Metrics/Functions/FunNumberOfStatements.ql b/repo-tests/codeql/cpp/ql/src/Metrics/Functions/FunNumberOfStatements.ql
new file mode 100644
index 00000000000..565afe71c82
--- /dev/null
+++ b/repo-tests/codeql/cpp/ql/src/Metrics/Functions/FunNumberOfStatements.ql
@@ -0,0 +1,18 @@
+/**
+ * @name Number of statements per function
+ * @description The number of C/C++ statements per function.
+ * @kind treemap
+ * @id cpp/statements-per-function
+ * @treemap.warnOn highValues
+ * @metricType callable
+ * @metricAggregate avg sum max
+ * @tags maintainability
+ */
+
+import cpp
+
+from Function f, int n
+where
+ strictcount(f.getEntryPoint()) = 1 and
+ n = count(Stmt s | s.getEnclosingFunction() = f)
+select f, n
diff --git a/repo-tests/codeql/cpp/ql/src/Metrics/Functions/FunPercentageOfComments.ql b/repo-tests/codeql/cpp/ql/src/Metrics/Functions/FunPercentageOfComments.ql
new file mode 100644
index 00000000000..f00f8590055
--- /dev/null
+++ b/repo-tests/codeql/cpp/ql/src/Metrics/Functions/FunPercentageOfComments.ql
@@ -0,0 +1,18 @@
+/**
+ * @name Comment ratio per function
+ * @description The ratio of comment lines to the total number of lines
+ * in a function.
+ * @kind treemap
+ * @id cpp/percentage-of-comments-per-function
+ * @treemap.warnOn lowValues
+ * @metricType callable
+ * @metricAggregate avg max
+ * @tags maintainability
+ * documentation
+ */
+
+import cpp
+
+from MetricFunction f
+where f.getNumberOfLines() > 0 and strictcount(f.getEntryPoint()) = 1
+select f, 100.0 * (f.getNumberOfLinesOfComments().(float) / f.getNumberOfLines().(float))
diff --git a/repo-tests/codeql/cpp/ql/src/Metrics/Functions/StatementNestingDepth.ql b/repo-tests/codeql/cpp/ql/src/Metrics/Functions/StatementNestingDepth.ql
new file mode 100644
index 00000000000..7b5dda4f7b0
--- /dev/null
+++ b/repo-tests/codeql/cpp/ql/src/Metrics/Functions/StatementNestingDepth.ql
@@ -0,0 +1,20 @@
+/**
+ * @name Nesting depth
+ * @description The maximum number of nested statements (for example,
+ * `if`, `for`, `while`, etc.). Blocks are not counted.
+ * @kind treemap
+ * @id cpp/statement-nesting-depth
+ * @treemap.warnOn highValues
+ * @metricType callable
+ * @metricAggregate avg max
+ * @tags maintainability
+ * complexity
+ */
+
+import cpp
+
+from MetricFunction f, int depth
+where
+ depth = f.getNestingDepth() and
+ strictcount(f.getEntryPoint()) = 1
+select f, depth order by depth desc
diff --git a/repo-tests/codeql/cpp/ql/src/Metrics/Internal/CallableDisplayStrings.ql b/repo-tests/codeql/cpp/ql/src/Metrics/Internal/CallableDisplayStrings.ql
new file mode 100644
index 00000000000..9923ebc26a4
--- /dev/null
+++ b/repo-tests/codeql/cpp/ql/src/Metrics/Internal/CallableDisplayStrings.ql
@@ -0,0 +1,12 @@
+/**
+ * @name Display strings of functions
+ * @kind display-string
+ * @id cpp/callable-display-strings
+ * @metricType callable
+ */
+
+import cpp
+
+from Function f
+where f.fromSource()
+select f, f.getName()
diff --git a/repo-tests/codeql/cpp/ql/src/Metrics/Internal/CallableExtents.ql b/repo-tests/codeql/cpp/ql/src/Metrics/Internal/CallableExtents.ql
new file mode 100644
index 00000000000..7a376c6da72
--- /dev/null
+++ b/repo-tests/codeql/cpp/ql/src/Metrics/Internal/CallableExtents.ql
@@ -0,0 +1,35 @@
+/**
+ * @name Extents of functions
+ * @kind extent
+ * @id cpp/callable-extents
+ * @metricType callable
+ */
+
+import cpp
+
+/**
+ * A Function with location overridden to cover its entire range,
+ * including the body (if any), as opposed to the location of its name
+ * only.
+ */
+class RangeFunction extends Function {
+ /**
+ * Holds if this function is at the specified location.
+ * The location spans column `startcolumn` of line `startline` to
+ * column `endcolumn` of line `endline` in file `filepath`.
+ * For more information, see
+ * [Locations](https://codeql.github.com/docs/writing-codeql-queries/providing-locations-in-codeql-queries/).
+ */
+ predicate hasLocationInfo(string path, int sl, int sc, int el, int ec) {
+ super.getLocation().hasLocationInfo(path, sl, sc, _, _) and
+ (
+ this.getBlock().getLocation().hasLocationInfo(path, _, _, el, ec)
+ or
+ not exists(this.getBlock()) and el = sl + 1 and ec = 1
+ )
+ }
+}
+
+from RangeFunction f
+where f.fromSource()
+select f.getLocation(), f
diff --git a/repo-tests/codeql/cpp/ql/src/Metrics/Internal/CallableSourceLinks.ql b/repo-tests/codeql/cpp/ql/src/Metrics/Internal/CallableSourceLinks.ql
new file mode 100644
index 00000000000..7430aa30a07
--- /dev/null
+++ b/repo-tests/codeql/cpp/ql/src/Metrics/Internal/CallableSourceLinks.ql
@@ -0,0 +1,12 @@
+/**
+ * @name Source links of functions
+ * @kind source-link
+ * @id cpp/callable-source-links
+ * @metricType callable
+ */
+
+import cpp
+
+from Function f
+where f.fromSource()
+select f, f.getFile()
diff --git a/repo-tests/codeql/cpp/ql/src/Metrics/Internal/DiagnosticsSumElapsedTimes.ql b/repo-tests/codeql/cpp/ql/src/Metrics/Internal/DiagnosticsSumElapsedTimes.ql
new file mode 100644
index 00000000000..530811c0801
--- /dev/null
+++ b/repo-tests/codeql/cpp/ql/src/Metrics/Internal/DiagnosticsSumElapsedTimes.ql
@@ -0,0 +1,12 @@
+/**
+ * @name Sum of frontend and extractor time
+ * @description The sum of elapsed frontend time, and the sum of elapsed extractor time.
+ * This query is for internal use only and may change without notice.
+ * @kind table
+ * @id cpp/frontend-and-extractor-time
+ */
+
+import cpp
+
+select sum(Compilation c, float seconds | compilation_time(c, _, 2, seconds) | seconds) as sum_frontend_elapsed_seconds,
+ sum(Compilation c, float seconds | compilation_time(c, _, 4, seconds) | seconds) as sum_extractor_elapsed_seconds
diff --git a/repo-tests/codeql/cpp/ql/src/Metrics/Internal/ReftypeDisplayStrings.ql b/repo-tests/codeql/cpp/ql/src/Metrics/Internal/ReftypeDisplayStrings.ql
new file mode 100644
index 00000000000..68c966467f2
--- /dev/null
+++ b/repo-tests/codeql/cpp/ql/src/Metrics/Internal/ReftypeDisplayStrings.ql
@@ -0,0 +1,12 @@
+/**
+ * @name Display strings of classes
+ * @kind display-string
+ * @id cpp/reference-type-display-strings
+ * @metricType reftype
+ */
+
+import cpp
+
+from Class c
+where c.fromSource()
+select c, c.getName()
diff --git a/repo-tests/codeql/cpp/ql/src/Metrics/Internal/ReftypeSourceLinks.ql b/repo-tests/codeql/cpp/ql/src/Metrics/Internal/ReftypeSourceLinks.ql
new file mode 100644
index 00000000000..1d03a2b6e64
--- /dev/null
+++ b/repo-tests/codeql/cpp/ql/src/Metrics/Internal/ReftypeSourceLinks.ql
@@ -0,0 +1,12 @@
+/**
+ * @name Source links of classes
+ * @kind source-link
+ * @id cpp/reference-type-source-links
+ * @metricType reftype
+ */
+
+import cpp
+
+from Class c
+where c.fromSource()
+select c, c.getFile()
diff --git a/repo-tests/codeql/cpp/ql/src/Metrics/Namespaces/AbstractNamespaces.ql b/repo-tests/codeql/cpp/ql/src/Metrics/Namespaces/AbstractNamespaces.ql
new file mode 100644
index 00000000000..2d1440813f4
--- /dev/null
+++ b/repo-tests/codeql/cpp/ql/src/Metrics/Namespaces/AbstractNamespaces.ql
@@ -0,0 +1,17 @@
+/**
+ * @name Abstract namespaces
+ * @description Finds namespaces that have an abstractness greater than 0.20.
+ * @kind chart
+ * @id cpp/abstract-namespaces
+ * @chart.type bar
+ * @tags maintainability
+ */
+
+import cpp
+
+from Namespace n, float c
+where
+ n.fromSource() and
+ c = n.getMetrics().getAbstractness() and
+ c > 0.2
+select n as Namespace, c as Abstractness order by Abstractness desc
diff --git a/repo-tests/codeql/cpp/ql/src/Metrics/Namespaces/ConcreteNamespaces.ql b/repo-tests/codeql/cpp/ql/src/Metrics/Namespaces/ConcreteNamespaces.ql
new file mode 100644
index 00000000000..c71f654f275
--- /dev/null
+++ b/repo-tests/codeql/cpp/ql/src/Metrics/Namespaces/ConcreteNamespaces.ql
@@ -0,0 +1,16 @@
+/**
+ * @name Concrete namespaces
+ * @description Finds namespaces that have an abstractness equal to 0.
+ * @kind tree
+ * @id cpp/concrete-namespaces
+ * @tags maintainability
+ */
+
+import cpp
+
+from Namespace n, float c
+where
+ n.fromSource() and
+ c = n.getMetrics().getAbstractness() and
+ c = 0
+select n as Namespace, c as Abstractness order by Abstractness desc
diff --git a/repo-tests/codeql/cpp/ql/src/Metrics/Namespaces/HighAfferentCouplingNamespaces.ql b/repo-tests/codeql/cpp/ql/src/Metrics/Namespaces/HighAfferentCouplingNamespaces.ql
new file mode 100644
index 00000000000..89dae1a3404
--- /dev/null
+++ b/repo-tests/codeql/cpp/ql/src/Metrics/Namespaces/HighAfferentCouplingNamespaces.ql
@@ -0,0 +1,18 @@
+/**
+ * @name High afferent coupling namespaces
+ * @description Finds namespaces that have an afferent coupling greater
+ * than 20.
+ * @kind chart
+ * @id cpp/high-afferent-coupling-namespaces
+ * @chart.type bar
+ * @tags maintainability
+ */
+
+import cpp
+
+from Namespace n, int c
+where
+ n.fromSource() and
+ c = n.getMetrics().getAfferentCoupling() and
+ c > 20
+select n as Namespace, c as AfferentCoupling order by AfferentCoupling desc
diff --git a/repo-tests/codeql/cpp/ql/src/Metrics/Namespaces/HighDistanceFromMainLineNamespaces.ql b/repo-tests/codeql/cpp/ql/src/Metrics/Namespaces/HighDistanceFromMainLineNamespaces.ql
new file mode 100644
index 00000000000..ce5553749d2
--- /dev/null
+++ b/repo-tests/codeql/cpp/ql/src/Metrics/Namespaces/HighDistanceFromMainLineNamespaces.ql
@@ -0,0 +1,18 @@
+/**
+ * @name Namespaces far from main line
+ * @description Finds namespaces that do not have a good balance between
+ * abstractness and stability.
+ * @kind chart
+ * @id cpp/high-distance-from-main-line-namespaces
+ * @chart.type bar
+ * @tags maintainability
+ */
+
+import cpp
+
+from Namespace n, float c
+where
+ n.fromSource() and
+ c = n.getMetrics().getDistanceFromMain() and
+ c > 0.7
+select n as Namespace, c as DistanceFromMainline order by DistanceFromMainline desc
diff --git a/repo-tests/codeql/cpp/ql/src/Metrics/Namespaces/HighEfferentCouplingNamespaces.ql b/repo-tests/codeql/cpp/ql/src/Metrics/Namespaces/HighEfferentCouplingNamespaces.ql
new file mode 100644
index 00000000000..1de659f1384
--- /dev/null
+++ b/repo-tests/codeql/cpp/ql/src/Metrics/Namespaces/HighEfferentCouplingNamespaces.ql
@@ -0,0 +1,18 @@
+/**
+ * @name High efferent coupling namespaces
+ * @description Finds namespaces that have an efferent coupling greater than 20.
+ * @kind chart
+ * @id cpp/high-efferent-coupling-namespaces
+ * @chart.type bar
+ * @tags maintainability
+ * modularity
+ */
+
+import cpp
+
+from Namespace n, int c
+where
+ n.fromSource() and
+ c = n.getMetrics().getEfferentCoupling() and
+ c > 20
+select n as Namespace, c as EfferentCoupling order by EfferentCoupling desc
diff --git a/repo-tests/codeql/cpp/ql/src/Metrics/Namespaces/StableNamespaces.ql b/repo-tests/codeql/cpp/ql/src/Metrics/Namespaces/StableNamespaces.ql
new file mode 100644
index 00000000000..3fd17ee4066
--- /dev/null
+++ b/repo-tests/codeql/cpp/ql/src/Metrics/Namespaces/StableNamespaces.ql
@@ -0,0 +1,17 @@
+/**
+ * @name Stable namespaces
+ * @description Finds namespaces that have an instability lower than 0.2.
+ * @kind chart
+ * @id cpp/stable-namespaces
+ * @chart.type bar
+ * @tags maintainability
+ */
+
+import cpp
+
+from Namespace n, float c
+where
+ n.fromSource() and
+ c = n.getMetrics().getInstability() and
+ c < 0.2
+select n as Namespace, c as Instability order by Instability desc
diff --git a/repo-tests/codeql/cpp/ql/src/Metrics/Namespaces/UnstableNamespaces.ql b/repo-tests/codeql/cpp/ql/src/Metrics/Namespaces/UnstableNamespaces.ql
new file mode 100644
index 00000000000..8e150d62f03
--- /dev/null
+++ b/repo-tests/codeql/cpp/ql/src/Metrics/Namespaces/UnstableNamespaces.ql
@@ -0,0 +1,17 @@
+/**
+ * @name Unstable namespaces
+ * @description Finds namespaces that have an instability higher than 0.8.
+ * @kind chart
+ * @id cpp/unstable-namespaces
+ * @chart.type bar
+ * @tags maintainability
+ */
+
+import cpp
+
+from Namespace n, float c
+where
+ n.fromSource() and
+ c = n.getMetrics().getInstability() and
+ c > 0.8
+select n as Package, c as Instability order by Instability desc
diff --git a/repo-tests/codeql/cpp/ql/src/Microsoft/CallWithNullSAL.ql b/repo-tests/codeql/cpp/ql/src/Microsoft/CallWithNullSAL.ql
new file mode 100644
index 00000000000..d5c74547c3e
--- /dev/null
+++ b/repo-tests/codeql/cpp/ql/src/Microsoft/CallWithNullSAL.ql
@@ -0,0 +1,22 @@
+/**
+ * @name SAL requires non-null argument
+ * @description When null is passed to a function that is SAL-annotated to
+ * forbid this, undefined behavior may result.
+ * @kind problem
+ * @id cpp/call-with-null-sal
+ * @problem.severity warning
+ * @tags reliability
+ */
+
+import cpp
+import SAL
+
+from Parameter p, Call c, Expr arg
+where
+ any(SALNotNull a).getDeclaration() = p and
+ c.getTarget() = p.getFunction() and
+ arg = c.getArgument(p.getIndex()) and
+ nullValue(arg)
+select arg,
+ "Argument (" + arg.toString() + ") for parameter $@ in call to " + c.getTarget().getName() +
+ " may be null, but a SAL annotation forbids this.", p, p.getName()
diff --git a/repo-tests/codeql/cpp/ql/src/Microsoft/IgnoreReturnValueSAL.ql b/repo-tests/codeql/cpp/ql/src/Microsoft/IgnoreReturnValueSAL.ql
new file mode 100644
index 00000000000..bf52b2bb615
--- /dev/null
+++ b/repo-tests/codeql/cpp/ql/src/Microsoft/IgnoreReturnValueSAL.ql
@@ -0,0 +1,24 @@
+/**
+ * @name SAL requires inspecting return value
+ * @description When a return value is discarded even though the SAL annotation
+ * requires inspecting it, a recoverable error may turn into a
+ * whole-program crash.
+ * @kind problem
+ * @id cpp/ignore-return-value-sal
+ * @problem.severity warning
+ * @tags reliability
+ * external/cwe/cwe-573
+ * external/cwe/cwe-252
+ * @microsoft.severity Important
+ */
+
+import SAL
+
+from Function f, FunctionCall call
+where
+ call.getTarget() = f and
+ call instanceof ExprInVoidContext and
+ any(SALCheckReturn a).getDeclaration() = f and
+ not getOptions().okToIgnoreReturnValue(call)
+select call, "Return value of $@ discarded although a SAL annotation " + "requires inspecting it.",
+ f, f.getName()
diff --git a/repo-tests/codeql/cpp/ql/src/Microsoft/InconsistentSAL.ql b/repo-tests/codeql/cpp/ql/src/Microsoft/InconsistentSAL.ql
new file mode 100644
index 00000000000..e5965e62b49
--- /dev/null
+++ b/repo-tests/codeql/cpp/ql/src/Microsoft/InconsistentSAL.ql
@@ -0,0 +1,41 @@
+/**
+ * @name Inconsistent SAL annotation
+ * @description Annotations are different between declaration and definition.
+ * @kind problem
+ * @id cpp/inconsistent-sal
+ * @problem.severity warning
+ * @tags reliability
+ */
+
+import SAL
+
+/** Holds if `e` has SAL annotation `name`. */
+predicate hasAnnotation(DeclarationEntry e, string name) {
+ exists(SALAnnotation a |
+ a.getMacro().getName() = name and
+ a.getDeclarationEntry() = e
+ )
+}
+
+/** Holds if `e` is annotated to take its annotation from its declaration. */
+predicate inheritsDeclAnnotations(DeclarationEntry e) {
+ // Is directly annotated
+ e.isDefinition() and
+ exists(SALAnnotation a | a.getMacro().getName() = "_Use_decl_annotations_" |
+ a.getDeclarationEntry() = e
+ )
+ or
+ // or is a parameter of a function with such an annotation
+ inheritsDeclAnnotations(e.(ParameterDeclarationEntry).getFunctionDeclarationEntry())
+}
+
+from DeclarationEntry e1, DeclarationEntry e2, string name
+where
+ e1.getDeclaration() = e2.getDeclaration() and
+ hasAnnotation(e1, name) and
+ not hasAnnotation(e2, name) and
+ not name = "_Use_decl_annotations_" and
+ not inheritsDeclAnnotations(e2)
+select e2,
+ "Missing SAL annotation " + name + " in " + e2.toString() + " although it is present on $@.", e1,
+ e1.toString()
diff --git a/repo-tests/codeql/cpp/ql/src/Microsoft/SAL.qll b/repo-tests/codeql/cpp/ql/src/Microsoft/SAL.qll
new file mode 100644
index 00000000000..46fedbb5d80
--- /dev/null
+++ b/repo-tests/codeql/cpp/ql/src/Microsoft/SAL.qll
@@ -0,0 +1,218 @@
+/**
+ * Provides classes for identifying and reasoning about Microsoft source code
+ * annotation language (SAL) macros.
+ */
+
+import cpp
+
+/**
+ * A SAL macro defined in `sal.h` or a similar header file.
+ */
+class SALMacro extends Macro {
+ SALMacro() {
+ this.getFile().getBaseName() =
+ ["sal.h", "specstrings_strict.h", "specstrings.h", "w32p.h", "minwindef.h"] and
+ (
+ // Dialect for Windows 8 and above
+ this.getName().matches("\\_%\\_")
+ or
+ // Dialect for Windows 7
+ this.getName().matches("\\_\\_%")
+ )
+ }
+}
+
+pragma[noinline]
+private predicate isTopLevelMacroAccess(MacroAccess ma) { not exists(ma.getParentInvocation()) }
+
+/**
+ * An invocation of a SAL macro (excluding invocations inside other macros).
+ */
+class SALAnnotation extends MacroInvocation {
+ SALAnnotation() {
+ this.getMacro() instanceof SALMacro and
+ isTopLevelMacroAccess(this)
+ }
+
+ /** Gets the `Declaration` annotated by `this`. */
+ Declaration getDeclaration() {
+ annotatesAt(this, result.getADeclarationEntry(), _, _) and
+ not result instanceof Type // exclude typedefs
+ }
+
+ /** Gets the `DeclarationEntry` annotated by `this`. */
+ DeclarationEntry getDeclarationEntry() {
+ annotatesAt(this, result, _, _) and
+ not result instanceof TypeDeclarationEntry // exclude typedefs
+ }
+}
+
+/**
+ * A SAL macro indicating that the return value of a function should always be
+ * checked.
+ */
+class SALCheckReturn extends SALAnnotation {
+ SALCheckReturn() {
+ this.getMacro().(SALMacro).getName() = ["_Check_return_", "_Must_inspect_result_"]
+ }
+}
+
+/**
+ * A SAL macro indicating that a pointer variable or return value should not be
+ * `NULL`.
+ */
+class SALNotNull extends SALAnnotation {
+ SALNotNull() {
+ exists(SALMacro m | m = this.getMacro() |
+ not m.getName().matches("%\\_opt\\_%") and
+ (
+ m.getName().matches("_In%") or
+ m.getName().matches("_Out%") or
+ m.getName() = "_Ret_notnull_"
+ )
+ ) and
+ exists(Type t |
+ t = this.getDeclaration().(Variable).getType() or
+ t = this.getDeclaration().(Function).getType()
+ |
+ t.getUnspecifiedType() instanceof PointerType
+ )
+ }
+}
+
+/**
+ * A SAL macro indicating that a value may be `NULL`.
+ */
+class SALMaybeNull extends SALAnnotation {
+ SALMaybeNull() {
+ exists(SALMacro m | m = this.getMacro() |
+ m.getName().matches("%\\_opt\\_%") or
+ m.getName().matches("\\_Ret_maybenull\\_%") or
+ m.getName() = "_Result_nullonfailure_"
+ )
+ }
+}
+
+/**
+ * A parameter annotated by one or more SAL annotations.
+ */
+class SALParameter extends Parameter {
+ /** One of this parameter's annotations. */
+ SALAnnotation a;
+
+ SALParameter() { annotatesAt(a, this.getADeclarationEntry(), _, _) }
+
+ predicate isIn() { a.getMacroName().toLowerCase().matches("%\\_in%") }
+
+ predicate isOut() { a.getMacroName().toLowerCase().matches("%\\_out%") }
+
+ predicate isInOut() { a.getMacroName().toLowerCase().matches("%\\_inout%") }
+}
+
+///////////////////////////////////////////////////////////////////////////////
+// Implementation details
+/**
+ * Holds if `a` annotates the declaration entry `d` and
+ * its start position is the `idx`th position in `file` that holds a SAL element.
+ */
+private predicate annotatesAt(SALAnnotation a, DeclarationEntry d, File file, int idx) {
+ annotatesAtPosition(a.(SALElement).getStartPosition(), d, file, idx)
+}
+
+/**
+ * Holds if `pos` is the `idx`th position in `file` that holds a SAL element,
+ * which annotates the declaration entry `d` (by occurring before it without
+ * any other declaration entries in between).
+ */
+// For performance reasons, do not mention the annotation itself here,
+// but compute with positions instead. This performs better on databases
+// with many annotations at the same position.
+private predicate annotatesAtPosition(SALPosition pos, DeclarationEntry d, File file, int idx) {
+ pos = salRelevantPositionAt(file, idx) and
+ salAnnotationPos(pos) and
+ (
+ // Base case: `pos` right before `d`
+ d.(SALElement).getStartPosition() = salRelevantPositionAt(file, idx + 1)
+ or
+ // Recursive case: `pos` right before some annotation on `d`
+ annotatesAtPosition(_, d, file, idx + 1)
+ )
+}
+
+/**
+ * A SAL element, that is, a SAL annotation or a declaration entry
+ * that may have SAL annotations.
+ */
+library class SALElement extends Element {
+ SALElement() {
+ containsSALAnnotation(this.(DeclarationEntry).getFile()) or
+ this instanceof SALAnnotation
+ }
+
+ predicate hasStartPosition(File file, int line, int col) {
+ exists(Location loc | loc = this.getLocation() |
+ file = loc.getFile() and
+ line = loc.getStartLine() and
+ col = loc.getStartColumn()
+ )
+ }
+
+ predicate hasEndPosition(File file, int line, int col) {
+ exists(Location loc |
+ loc = this.(FunctionDeclarationEntry).getBlock().getLocation()
+ or
+ this =
+ any(VariableDeclarationEntry vde |
+ vde.isDefinition() and
+ loc = vde.getVariable().getInitializer().getLocation()
+ )
+ |
+ file = loc.getFile() and
+ line = loc.getEndLine() and
+ col = loc.getEndColumn()
+ )
+ }
+
+ SALPosition getStartPosition() {
+ exists(File file, int line, int col |
+ this.hasStartPosition(file, line, col) and
+ result = MkSALPosition(file, line, col)
+ )
+ }
+}
+
+/** Holds if `file` contains a SAL annotation. */
+pragma[noinline]
+private predicate containsSALAnnotation(File file) { any(SALAnnotation a).getFile() = file }
+
+/**
+ * A source-file position of a `SALElement`. Unlike location, this denotes a
+ * point in the file rather than a range.
+ */
+private newtype SALPosition =
+ MkSALPosition(File file, int line, int col) {
+ exists(SALElement e |
+ e.hasStartPosition(file, line, col)
+ or
+ e.hasEndPosition(file, line, col)
+ )
+ }
+
+/** Holds if `pos` is the start position of a SAL annotation. */
+pragma[noinline]
+private predicate salAnnotationPos(SALPosition pos) {
+ any(SALAnnotation a).(SALElement).getStartPosition() = pos
+}
+
+/**
+ * Gets the `idx`th position in `file` that holds a SAL element,
+ * ordering positions lexicographically by their start line and start column.
+ */
+private SALPosition salRelevantPositionAt(File file, int idx) {
+ result =
+ rank[idx](SALPosition pos, int line, int col |
+ pos = MkSALPosition(file, line, col)
+ |
+ pos order by line, col
+ )
+}
diff --git a/repo-tests/codeql/cpp/ql/src/PointsTo/Debug.ql b/repo-tests/codeql/cpp/ql/src/PointsTo/Debug.ql
new file mode 100644
index 00000000000..c667af836ec
--- /dev/null
+++ b/repo-tests/codeql/cpp/ql/src/PointsTo/Debug.ql
@@ -0,0 +1,21 @@
+/**
+ * @name Debug - find out what a particular function-pointer points to
+ * @description Query to help investigate mysterious results with ReturnStackAllocatedObject
+ * @kind table
+ * @id cpp/points-to/debug
+ * @deprecated This query is not suitable for production use and has been deprecated.
+ */
+
+import cpp
+import semmle.code.cpp.pointsto.PointsTo
+
+class FieldAccessPT extends PointsToExpr {
+ override predicate interesting() { this instanceof FieldAccess }
+}
+
+from Function outer, FieldAccessPT fa
+where
+ outer.hasName("rtLnDeliverableMayContainDividends") and
+ fa.(FieldAccess).getTarget().hasName("pfFunction") and
+ fa.getEnclosingFunction() = outer
+select fa, fa.pointsTo()
diff --git a/repo-tests/codeql/cpp/ql/src/PointsTo/PreparedStagedPointsTo.ql b/repo-tests/codeql/cpp/ql/src/PointsTo/PreparedStagedPointsTo.ql
new file mode 100644
index 00000000000..14a15761053
--- /dev/null
+++ b/repo-tests/codeql/cpp/ql/src/PointsTo/PreparedStagedPointsTo.ql
@@ -0,0 +1,12 @@
+/**
+ * @name PrepareStagedPointsTo
+ * @description Query to force evaluation of staged points-to predicates
+ * @kind table
+ * @id cpp/points-to/prepared-staged-points-to
+ * @deprecated This query is not suitable for production use and has been deprecated.
+ */
+
+import semmle.code.cpp.pointsto.PointsTo
+
+select count(int set, Element location | setlocations(set, unresolveElement(location))),
+ count(int set, Element element | pointstosets(set, unresolveElement(element)))
diff --git a/repo-tests/codeql/cpp/ql/src/PointsTo/Stats.ql b/repo-tests/codeql/cpp/ql/src/PointsTo/Stats.ql
new file mode 100644
index 00000000000..605934f986e
--- /dev/null
+++ b/repo-tests/codeql/cpp/ql/src/PointsTo/Stats.ql
@@ -0,0 +1,18 @@
+/**
+ * @name Flow Statistics
+ * @description Count the number points-to sets with 0 or 1 incoming flow edges, and the total number of points-to sets
+ * @kind table
+ * @id cpp/points-to/stats
+ * @deprecated This query is not suitable for production use and has been deprecated.
+ */
+
+import cpp
+import semmle.code.cpp.pointsto.PointsTo
+
+predicate inc(int set, int cnt) {
+ (setflow(set, _) or setflow(_, set)) and
+ cnt = count(int i | setflow(i, set) and i != set)
+}
+
+select count(int set | inc(set, _)) as total, count(int set | inc(set, 0)) as nullary,
+ count(int set | inc(set, 1)) as unary, total - nullary - unary as rest
diff --git a/repo-tests/codeql/cpp/ql/src/PointsTo/TaintedFormatStrings.ql b/repo-tests/codeql/cpp/ql/src/PointsTo/TaintedFormatStrings.ql
new file mode 100644
index 00000000000..1b671697573
--- /dev/null
+++ b/repo-tests/codeql/cpp/ql/src/PointsTo/TaintedFormatStrings.ql
@@ -0,0 +1,122 @@
+/**
+ * @name Taint test
+ * @kind table
+ * @id cpp/points-to/tainted-format-strings
+ * @deprecated This query is not suitable for production use and has been deprecated.
+ */
+
+import cpp
+import semmle.code.cpp.pointsto.PointsTo
+import semmle.code.cpp.pointsto.CallGraph
+
+predicate inputArgument(string function, int arg) {
+ function = "read" and arg = 1
+ or
+ function = "fread" and arg = 0
+ or
+ function = "fgets" and arg = 0
+ // ... add more
+}
+
+predicate inputBuffer(Expr e) {
+ exists(FunctionCall fc, string fname, int i |
+ fc.getTarget().getName() = fname and
+ inputArgument(fname, i) and
+ e = fc.getArgument(i)
+ )
+}
+
+class InputBuffer extends PointsToExpr {
+ InputBuffer() { inputBuffer(this) }
+
+ override predicate interesting() { inputBuffer(this) }
+}
+
+predicate formatArgument(string function, int i) {
+ function = "printf" and i = 0
+ or
+ function = "fprintf" and i = 1
+ or
+ function = "sprintf" and i = 1
+ or
+ function = "snprintf" and i = 2
+ or
+ function = "d_printf" and i = 0
+ or
+ function = "talloc_asprintf" and i = 1
+ or
+ function = "fstr_sprintf" and i = 1
+ or
+ function = "talloc_asprintf_append" and i = 1
+ or
+ function = "d_fprintf" and i = 1
+ or
+ function = "asprintf" and i = 1
+ or
+ function = "talloc_asprintf_append_buffer" and i = 1
+ or
+ function = "fdprintf" and i = 1
+ or
+ function = "d_vfprintf" and i = 1
+ or
+ function = "smb_xvasprintf" and i = 1
+ or
+ function = "asprintf_strupper_m" and i = 1
+ or
+ function = "talloc_asprintf_strupper_m" and i = 1
+ or
+ function = "sprintf_append" and i = 4
+ or
+ function = "x_vfprintf" and i = 1
+ or
+ function = "x_fprintf" and i = 1
+ or
+ function = "vasprintf" and i = 1
+ or
+ function = "ldb_asprintf_errstring" and i = 1
+ or
+ function = "talloc_vasprintf" and i = 1
+ or
+ function = "talloc_vasprintf" and i = 1
+ or
+ function = "fprintf_file" and i = 1
+ or
+ function = "vsnprintf" and i = 2
+ or
+ function = "talloc_vasprintf_append" and i = 1
+ or
+ function = "__talloc_vaslenprintf_append" and i = 2
+ or
+ function = "talloc_vasprintf_append_buffer" and i = 1
+ or
+ function = "fprintf_attr" and i = 2
+ or
+ function = "vprintf" and i = 0
+ or
+ function = "vsprintf" and i = 1
+}
+
+predicate formatBuffer(Expr e) {
+ exists(FunctionCall fc, string fname, int i |
+ fc.getTarget().getName() = fname and
+ formatArgument(fname, i) and
+ fc.getArgument(i) = e
+ )
+}
+
+class FormatBuffer extends PointsToExpr {
+ FormatBuffer() { formatBuffer(this) }
+
+ override predicate interesting() { formatBuffer(this) }
+}
+
+predicate potentialViolation(InputBuffer source, FormatBuffer dest) {
+ source.pointsTo() = dest.pointsTo() and
+ not exists(FunctionCall fc |
+ fc = dest and fc.getTarget().hasName("lang_msg_rotate") and fc.getArgument(1) instanceof Literal
+ )
+}
+
+from InputBuffer source, FormatBuffer dest
+where potentialViolation(source, dest)
+select dest.getFile() as File, dest as FormatString
diff --git a/repo-tests/codeql/cpp/ql/src/Power of 10/Rule 1/UseOfGoto.ql b/repo-tests/codeql/cpp/ql/src/Power of 10/Rule 1/UseOfGoto.ql
new file mode 100644
index 00000000000..e214b8d2fbb
--- /dev/null
+++ b/repo-tests/codeql/cpp/ql/src/Power of 10/Rule 1/UseOfGoto.ql
@@ -0,0 +1,15 @@
+/**
+ * @name Use of goto
+ * @description Using the goto statement complicates function control flow and hinders program understanding.
+ * @kind problem
+ * @id cpp/power-of-10/use-of-goto
+ * @problem.severity warning
+ * @tags maintainability
+ * readability
+ * external/powerof10
+ */
+
+import cpp
+
+from GotoStmt goto
+select goto, "The goto statement should not be used."
diff --git a/repo-tests/codeql/cpp/ql/src/Power of 10/Rule 1/UseOfJmp.ql b/repo-tests/codeql/cpp/ql/src/Power of 10/Rule 1/UseOfJmp.ql
new file mode 100644
index 00000000000..9a6d143bfb6
--- /dev/null
+++ b/repo-tests/codeql/cpp/ql/src/Power of 10/Rule 1/UseOfJmp.ql
@@ -0,0 +1,28 @@
+/**
+ * @name Use of setjmp or longjmp
+ * @description Using the setjmp and longjmp functions complicates control flow and hinders program understanding.
+ * @kind problem
+ * @id cpp/power-of-10/use-of-jmp
+ * @problem.severity warning
+ * @tags correctness
+ * portability
+ * readability
+ * external/powerof10
+ */
+
+import cpp
+
+class ForbiddenFunction extends Function {
+ ForbiddenFunction() {
+ exists(string name | name = this.getName() |
+ name = "setjmp" or
+ name = "longjmp" or
+ name = "sigsetjmp" or
+ name = "siglongjmp"
+ )
+ }
+}
+
+from FunctionCall call
+where call.getTarget() instanceof ForbiddenFunction
+select call, "The " + call.getTarget().getName() + " function should not be used."
diff --git a/repo-tests/codeql/cpp/ql/src/Power of 10/Rule 1/UseOfRecursion.ql b/repo-tests/codeql/cpp/ql/src/Power of 10/Rule 1/UseOfRecursion.ql
new file mode 100644
index 00000000000..7d615f586e1
--- /dev/null
+++ b/repo-tests/codeql/cpp/ql/src/Power of 10/Rule 1/UseOfRecursion.ql
@@ -0,0 +1,27 @@
+/**
+ * @name Use of recursion
+ * @description Recursion makes the program call graph cyclic and hinders
+ * program understanding.
+ * @kind problem
+ * @id cpp/power-of-10/use-of-recursion
+ * @problem.severity warning
+ * @tags maintainability
+ * readability
+ * external/powerof10
+ */
+
+import cpp
+
+class RecursiveCall extends FunctionCall {
+ RecursiveCall() { this.getTarget().calls*(this.getEnclosingFunction()) }
+}
+
+from RecursiveCall call, string msg
+where
+ if call.getTarget() = call.getEnclosingFunction()
+ then msg = "This call directly invokes its containing function $@."
+ else
+ msg =
+ "The function " + call.getEnclosingFunction() +
+ " is indirectly recursive via this call to $@."
+select call, msg, call.getTarget(), call.getTarget().getName()
diff --git a/repo-tests/codeql/cpp/ql/src/Power of 10/Rule 2/BoundedLoopIterations.ql b/repo-tests/codeql/cpp/ql/src/Power of 10/Rule 2/BoundedLoopIterations.ql
new file mode 100644
index 00000000000..8576b658588
--- /dev/null
+++ b/repo-tests/codeql/cpp/ql/src/Power of 10/Rule 2/BoundedLoopIterations.ql
@@ -0,0 +1,143 @@
+/**
+ * @name Unbounded loop
+ * @description All loops should have a fixed upper bound; the counter should also be incremented along all paths within the loop.
+ * This check excludes loops that are meant to be nonterminating (like schedulers).
+ * @kind problem
+ * @id cpp/power-of-10/bounded-loop-iterations
+ * @problem.severity recommendation
+ * @precision low
+ * @tags correctness
+ * reliability
+ * external/powerof10
+ */
+
+import cpp
+
+predicate validVarForBound(Loop loop, Variable var) {
+ // The variable is read in the loop controlling expression
+ var.getAnAccess().getParent*() = loop.getControllingExpr() and
+ // The variable is not assigned in the loop body
+ not inScope(loop, var.getAnAssignment().getEnclosingStmt()) and
+ // The variable is not incremented/decremented in the loop body
+ not inScope(loop, var.getAnAccess().getParent().(CrementOperation).getEnclosingStmt())
+}
+
+predicate upperBoundCheck(Loop loop, VariableAccess checked) {
+ exists(RelationalOperation rop | loop.getControllingExpr().getAChild*() = rop |
+ checked = rop.getLesserOperand() and
+ // The RHS is something "valid", i.e. a constant or
+ // a variable that isn't assigned in the loop body
+ (
+ exists(rop.getGreaterOperand().getValue()) or
+ rop.getGreaterOperand().(VariableAccess).getTarget().isConst() or
+ validVarForBound(loop, rop.getGreaterOperand().(VariableAccess).getTarget())
+ ) and
+ not rop.getGreaterOperand() instanceof CharLiteral
+ )
+}
+
+predicate lowerBoundCheck(Loop loop, VariableAccess checked) {
+ exists(RelationalOperation rop | loop.getControllingExpr().getAChild*() = rop |
+ checked = rop.getGreaterOperand() and
+ // The RHS is something "valid", i.e. a constant or
+ // a variable that isn't assigned in the loop body
+ (
+ exists(rop.getLesserOperand().getValue()) or
+ rop.getLesserOperand().(VariableAccess).getTarget().isConst() or
+ validVarForBound(loop, rop.getLesserOperand().(VariableAccess).getTarget())
+ ) and
+ not rop.getLesserOperand() instanceof CharLiteral
+ )
+}
+
+VariableAccess getAnIncrement(Variable var) {
+ result.getTarget() = var and
+ (
+ result.getParent() instanceof IncrementOperation
+ or
+ exists(AssignAddExpr a | a.getLValue() = result and a.getRValue().getValue().toInt() > 0)
+ or
+ exists(AssignExpr a | a.getLValue() = result |
+ a.getRValue() =
+ any(AddExpr ae |
+ ae.getAnOperand() = var.getAnAccess() and
+ ae.getAnOperand().getValue().toInt() > 0
+ )
+ )
+ )
+}
+
+VariableAccess getADecrement(Variable var) {
+ result.getTarget() = var and
+ (
+ result.getParent() instanceof DecrementOperation
+ or
+ exists(AssignSubExpr a | a.getLValue() = result and a.getRValue().getValue().toInt() > 0)
+ or
+ exists(AssignExpr a | a.getLValue() = result |
+ a.getRValue() =
+ any(SubExpr ae |
+ ae.getLeftOperand() = var.getAnAccess() and
+ ae.getRightOperand().getValue().toInt() > 0
+ )
+ )
+ )
+}
+
+predicate inScope(Loop l, Stmt s) { l.getAChild*() = s }
+
+predicate reachesNoInc(VariableAccess source, ControlFlowNode target) {
+ upperBoundCheck(_, source) and source.getASuccessor() = target
+ or
+ exists(ControlFlowNode mid |
+ reachesNoInc(source, mid) and not mid = getAnIncrement(source.getTarget())
+ |
+ target = mid.getASuccessor() and
+ inScope(source.getEnclosingStmt(), target.getEnclosingStmt())
+ )
+}
+
+predicate reachesNoDec(VariableAccess source, ControlFlowNode target) {
+ lowerBoundCheck(_, source) and source.getASuccessor() = target
+ or
+ exists(ControlFlowNode mid |
+ reachesNoDec(source, mid) and not mid = getADecrement(source.getTarget())
+ |
+ target = mid.getASuccessor() and
+ inScope(source.getEnclosingStmt(), target.getEnclosingStmt())
+ )
+}
+
+predicate hasSafeBound(Loop l) {
+ exists(VariableAccess bound | upperBoundCheck(l, bound) | not reachesNoInc(bound, bound))
+ or
+ exists(VariableAccess bound | lowerBoundCheck(l, bound) | not reachesNoDec(bound, bound))
+ or
+ exists(l.getControllingExpr().getValue())
+}
+
+from Loop loop, string msg
+where
+ not hasSafeBound(loop) and
+ (
+ not upperBoundCheck(loop, _) and
+ not lowerBoundCheck(loop, _) and
+ msg = "This loop does not have a fixed bound."
+ or
+ exists(VariableAccess bound |
+ upperBoundCheck(loop, bound) and
+ reachesNoInc(bound, bound) and
+ msg =
+ "The loop counter " + bound.getTarget().getName() +
+ " is not always incremented in the loop body."
+ )
+ or
+ exists(VariableAccess bound |
+ lowerBoundCheck(loop, bound) and
+ reachesNoDec(bound, bound) and
+ msg =
+ "The loop counter " + bound.getTarget().getName() +
+ " is not always decremented in the loop body."
+ )
+ )
+select loop, msg
diff --git a/repo-tests/codeql/cpp/ql/src/Power of 10/Rule 2/ExitPermanentLoop.ql b/repo-tests/codeql/cpp/ql/src/Power of 10/Rule 2/ExitPermanentLoop.ql
new file mode 100644
index 00000000000..901dc1ba420
--- /dev/null
+++ b/repo-tests/codeql/cpp/ql/src/Power of 10/Rule 2/ExitPermanentLoop.ql
@@ -0,0 +1,27 @@
+/**
+ * @name Exit from permanent loop
+ * @description Permanent loops (like "while(1) {..}") are typically meant to be non-terminating and should not be terminated by other means.
+ * @kind problem
+ * @id cpp/power-of-10/exit-permanent-loop
+ * @problem.severity recommendation
+ * @precision low
+ * @tags correctness
+ * external/powerof10
+ */
+
+import cpp
+
+Stmt exitFrom(Loop l) {
+ l.getAChild+() = result and
+ (
+ result instanceof ReturnStmt
+ or
+ exists(BreakStmt break | break = result | not l.getAChild*() = break.getTarget())
+ )
+}
+
+from Loop l, Stmt exit
+where
+ l.getControllingExpr().getValue().toInt() != 0 and
+ exit = exitFrom(l)
+select exit, "$@ should not be exited.", l, "This permanent loop"
diff --git a/repo-tests/codeql/cpp/ql/src/Power of 10/Rule 3/DynamicAllocAfterInit.ql b/repo-tests/codeql/cpp/ql/src/Power of 10/Rule 3/DynamicAllocAfterInit.ql
new file mode 100644
index 00000000000..f44ae6e3a7b
--- /dev/null
+++ b/repo-tests/codeql/cpp/ql/src/Power of 10/Rule 3/DynamicAllocAfterInit.ql
@@ -0,0 +1,33 @@
+/**
+ * @name Dynamic allocation after initialization
+ * @description Dynamic memory allocation (using malloc() or calloc()) should be confined to the initialization routines of a program.
+ * @kind problem
+ * @id cpp/power-of-10/dynamic-alloc-after-init
+ * @problem.severity recommendation
+ * @tags resources
+ * external/powerof10
+ */
+
+import cpp
+
+class Initialization extends Function {
+ Initialization() {
+ // Adapt this query to your codebase by changing this predicate to match
+ // precisely what functions count as "initialization", and are, hence,
+ // allowed to perform dynamic memory allocation.
+ this.getName().toLowerCase().matches("init%") or
+ this.getName().matches("%\\_init")
+ }
+}
+
+class Allocation extends FunctionCall {
+ Allocation() {
+ exists(string name | name = this.getTarget().getName() | name = "malloc" or name = "calloc")
+ }
+}
+
+from Function f, Allocation a
+where
+ not f instanceof Initialization and
+ a.getEnclosingFunction() = f
+select a, "Dynamic memory allocation is only allowed during initialization."
diff --git a/repo-tests/codeql/cpp/ql/src/Power of 10/Rule 4/FunctionTooLong.ql b/repo-tests/codeql/cpp/ql/src/Power of 10/Rule 4/FunctionTooLong.ql
new file mode 100644
index 00000000000..701d56e6ce2
--- /dev/null
+++ b/repo-tests/codeql/cpp/ql/src/Power of 10/Rule 4/FunctionTooLong.ql
@@ -0,0 +1,40 @@
+/**
+ * @name Function too long
+ * @description Function length should be limited to what can be printed on a single sheet of paper (60 logical lines).
+ * @kind problem
+ * @id cpp/power-of-10/function-too-long
+ * @problem.severity recommendation
+ * @tags maintainability
+ * readability
+ * external/powerof10
+ */
+
+import cpp
+
+class MacroFunctionCall extends MacroInvocation {
+ MacroFunctionCall() {
+ not exists(getParentInvocation()) and
+ this.getMacro().getHead().matches("%(%")
+ }
+
+ FunctionDeclarationEntry getFunction() {
+ result.getFunction() = this.getAGeneratedElement().(Stmt).getEnclosingFunction()
+ }
+}
+
+int logicalLength(FunctionDeclarationEntry f) {
+ result =
+ count(Stmt s |
+ s.getEnclosingFunction() = f.getFunction() and
+ s.getFile() = f.getFile() and
+ not s instanceof BlockStmt and
+ not s instanceof EmptyStmt and
+ not exists(ForStmt for | s = for.getInitialization()) and
+ not s.isAffectedByMacro()
+ ) + count(MacroFunctionCall mf | mf.getFunction() = f)
+}
+
+from FunctionDeclarationEntry f, int n
+where logicalLength(f) = n and n > 60
+select f.getFunction(),
+ "Function " + f.getName() + " has too many logical lines (" + n + ", while 60 are allowed)."
diff --git a/repo-tests/codeql/cpp/ql/src/Power of 10/Rule 4/OneStmtPerLine.ql b/repo-tests/codeql/cpp/ql/src/Power of 10/Rule 4/OneStmtPerLine.ql
new file mode 100644
index 00000000000..7c66cdd41f4
--- /dev/null
+++ b/repo-tests/codeql/cpp/ql/src/Power of 10/Rule 4/OneStmtPerLine.ql
@@ -0,0 +1,48 @@
+/**
+ * @name More than one statement per line
+ * @description Putting more than one statement on a single line hinders program understanding.
+ * @kind problem
+ * @id cpp/power-of-10/one-stmt-per-line
+ * @problem.severity recommendation
+ * @tags maintainability
+ * readability
+ * external/powerof10
+ */
+
+import cpp
+
+class OneLineStmt extends Stmt {
+ OneLineStmt() {
+ this.getLocation().getStartLine() = this.getLocation().getEndLine() and
+ not this instanceof BlockStmt and
+ not exists(ForStmt for | this = for.getInitialization()) and
+ (
+ // Either this statement is not touched by a macro at all...
+ not this.isAffectedByMacro()
+ or
+ // ... or it's the top-level statement generated by a macro invocation.
+ exists(MacroInvocation mi | this = mi.getAGeneratedElement() |
+ not this.getAChild+() = mi.getAGeneratedElement()
+ )
+ )
+ }
+
+ predicate onLine(File f, int line) {
+ f = this.getFile() and line = this.getLocation().getStartLine()
+ }
+}
+
+int numStmt(File f, int line) { result = strictcount(OneLineStmt o | o.onLine(f, line)) }
+
+from File f, int line, OneLineStmt o, int cnt
+where
+ numStmt(f, line) = cnt and
+ cnt > 1 and
+ o.onLine(f, line) and
+ o.getLocation().getStartColumn() =
+ min(OneLineStmt other, int toMin |
+ other.onLine(f, line) and toMin = other.getLocation().getStartColumn()
+ |
+ toMin
+ )
+select o, "This line contains " + cnt + " statements; only one is allowed."
diff --git a/repo-tests/codeql/cpp/ql/src/Power of 10/Rule 5/AssertionDensity.ql b/repo-tests/codeql/cpp/ql/src/Power of 10/Rule 5/AssertionDensity.ql
new file mode 100644
index 00000000000..3bb926b8a64
--- /dev/null
+++ b/repo-tests/codeql/cpp/ql/src/Power of 10/Rule 5/AssertionDensity.ql
@@ -0,0 +1,53 @@
+/**
+ * @name Too few assertions
+ * @description Each function over 20 logical lines should have at least two assertions.
+ * @kind problem
+ * @id cpp/power-of-10/assertion-density
+ * @problem.severity recommendation
+ * @tags maintainability
+ * reliability
+ * external/powerof10
+ */
+
+import semmle.code.cpp.commons.Assertions
+
+class MacroFunctionCall extends MacroInvocation {
+ MacroFunctionCall() {
+ not exists(getParentInvocation()) and
+ this.getMacro().getHead().matches("%(%")
+ }
+
+ FunctionDeclarationEntry getFunction() {
+ result.getFunction() = this.getAGeneratedElement().(Stmt).getEnclosingFunction()
+ }
+}
+
+int logicalLength(FunctionDeclarationEntry f) {
+ result =
+ count(Stmt s |
+ s.getEnclosingFunction() = f.getFunction() and
+ s.getFile() = f.getFile() and
+ not s instanceof BlockStmt and
+ not s instanceof EmptyStmt and
+ not exists(ForStmt for | s = for.getInitialization()) and
+ not s.isAffectedByMacro()
+ ) + count(MacroFunctionCall mf | mf.getFunction() = f)
+}
+
+int assertionCount(FunctionDeclarationEntry f) {
+ result =
+ count(Assertion a |
+ a.getAsserted().getEnclosingFunction() = f.getFunction() and a.getFile() = f.getFile()
+ )
+}
+
+from FunctionDeclarationEntry f, int numAsserts, int size, int minSize
+where
+ minSize = 20 and
+ numAsserts = assertionCount(f) and
+ numAsserts < 2 and
+ size = logicalLength(f) and
+ size > minSize
+select f.getFunction(),
+ "Function " + f.getName() + " has " + size + " logical lines, but only " + numAsserts +
+ " assertion(s) -- minimum is 2 (for functions over " + minSize + " logical lines)."
diff --git a/repo-tests/codeql/cpp/ql/src/Power of 10/Rule 5/AssertionSideEffect.ql b/repo-tests/codeql/cpp/ql/src/Power of 10/Rule 5/AssertionSideEffect.ql
new file mode 100644
index 00000000000..47b861186a4
--- /dev/null
+++ b/repo-tests/codeql/cpp/ql/src/Power of 10/Rule 5/AssertionSideEffect.ql
@@ -0,0 +1,16 @@
+/**
+ * @name Assertion with side effects
+ * @description When an assertion has side effects, disabling assertions will
+ * alter program behavior.
+ * @kind problem
+ * @id cpp/power-of-10/assertion-side-effect
+ * @problem.severity warning
+ * @tags correctness
+ * external/powerof10
+ */
+
+import semmle.code.cpp.commons.Assertions
+
+from Assertion a
+where not a.getAsserted().isPure()
+select a.getAsserted(), "Assertions should not have side effects."
diff --git a/repo-tests/codeql/cpp/ql/src/Power of 10/Rule 5/ConstantAssertion.ql b/repo-tests/codeql/cpp/ql/src/Power of 10/Rule 5/ConstantAssertion.ql
new file mode 100644
index 00000000000..08d0d3ce1b0
--- /dev/null
+++ b/repo-tests/codeql/cpp/ql/src/Power of 10/Rule 5/ConstantAssertion.ql
@@ -0,0 +1,20 @@
+/**
+ * @name Constant assertion
+ * @description Assertions should check dynamic properties of pre-/post-conditions and invariants. Assertions that either always succeed or always fail are an error.
+ * @kind problem
+ * @id cpp/power-of-10/constant-assertion
+ * @problem.severity warning
+ * @tags maintainability
+ * reliability
+ * external/powerof10
+ */
+
+import semmle.code.cpp.commons.Assertions
+
+from Assertion a, string value, string msg
+where
+ value = a.getAsserted().getValue() and
+ if value.toInt() = 0
+ then msg = "This assertion is always false."
+ else msg = "This assertion is always true."
+select a.getAsserted(), msg
diff --git a/repo-tests/codeql/cpp/ql/src/Power of 10/Rule 5/NonBooleanAssertion.ql b/repo-tests/codeql/cpp/ql/src/Power of 10/Rule 5/NonBooleanAssertion.ql
new file mode 100644
index 00000000000..7175d103c09
--- /dev/null
+++ b/repo-tests/codeql/cpp/ql/src/Power of 10/Rule 5/NonBooleanAssertion.ql
@@ -0,0 +1,15 @@
+/**
+ * @name Non-Boolean assertion
+ * @description Assertions should be defined as Boolean tests, meaning "assert(p != NULL)" rather than "assert(p)".
+ * @kind problem
+ * @id cpp/power-of-10/non-boolean-assertion
+ * @problem.severity warning
+ * @tags correctness
+ * external/powerof10
+ */
+
+import semmle.code.cpp.commons.Assertions
+
+from Assertion a
+where a.getAsserted().getType() instanceof PointerType
+select a.getAsserted(), "Assertions should be defined as Boolean tests."
diff --git a/repo-tests/codeql/cpp/ql/src/Power of 10/Rule 6/GlobalCouldBeStatic.ql b/repo-tests/codeql/cpp/ql/src/Power of 10/Rule 6/GlobalCouldBeStatic.ql
new file mode 100644
index 00000000000..b72a00cc6e0
--- /dev/null
+++ b/repo-tests/codeql/cpp/ql/src/Power of 10/Rule 6/GlobalCouldBeStatic.ql
@@ -0,0 +1,23 @@
+/**
+ * @name Global could be static
+ * @description Global variables that are not accessed outside their own file could be made static to promote information hiding.
+ * @kind problem
+ * @id cpp/power-of-10/global-could-be-static
+ * @problem.severity recommendation
+ * @precision low
+ * @tags maintainability
+ * modularity
+ * external/powerof10
+ */
+
+import cpp
+
+from GlobalVariable v
+where
+ forex(VariableAccess va | va.getTarget() = v | va.getFile() = v.getDefinitionLocation().getFile()) and
+ not v.hasSpecifier("static") and
+ strictcount(v.getAnAccess().getEnclosingFunction()) > 1 and // If = 1, variable should be function-scope.
+ not v.getADeclarationEntry().getFile() instanceof HeaderFile // intended to be accessed elsewhere
+select v,
+ "The global variable " + v.getName() + " is not accessed outside of " + v.getFile().getBaseName() +
+ " and could be made static."
diff --git a/repo-tests/codeql/cpp/ql/src/Power of 10/Rule 6/VariableScopeTooLarge.ql b/repo-tests/codeql/cpp/ql/src/Power of 10/Rule 6/VariableScopeTooLarge.ql
new file mode 100644
index 00000000000..86529e116e2
--- /dev/null
+++ b/repo-tests/codeql/cpp/ql/src/Power of 10/Rule 6/VariableScopeTooLarge.ql
@@ -0,0 +1,22 @@
+/**
+ * @name Variable scope too large
+ * @description Global and file-scope variables that are accessed by only one function should be scoped within that function.
+ * @kind problem
+ * @id cpp/power-of-10/variable-scope-too-large
+ * @problem.severity recommendation
+ * @precision low
+ * @tags maintainability
+ * external/powerof10
+ */
+
+import cpp
+
+from GlobalVariable v, Function f
+where
+ v.getAnAccess().getEnclosingFunction() = f and
+ strictcount(v.getAnAccess().getEnclosingFunction()) = 1 and
+ forall(VariableAccess a | a = v.getAnAccess() | exists(a.getEnclosingFunction())) and
+ not v.getADeclarationEntry().getFile() instanceof HeaderFile // intended to be accessed elsewhere
+select v,
+ "The variable " + v.getName() + " is only accessed in $@ and should be scoped accordingly.", f,
+ f.getName()
diff --git a/repo-tests/codeql/cpp/ql/src/Power of 10/Rule 7/CheckArguments.ql b/repo-tests/codeql/cpp/ql/src/Power of 10/Rule 7/CheckArguments.ql
new file mode 100644
index 00000000000..f32631fe2ee
--- /dev/null
+++ b/repo-tests/codeql/cpp/ql/src/Power of 10/Rule 7/CheckArguments.ql
@@ -0,0 +1,28 @@
+/**
+ * @name Unchecked function argument
+ * @description Functions should check their arguments before their first use.
+ * @kind problem
+ * @id cpp/power-of-10/check-arguments
+ * @problem.severity warning
+ * @tags correctness
+ * reliability
+ * external/powerof10
+ */
+
+import cpp
+
+predicate flow(Parameter p, ControlFlowNode n) {
+ exists(p.getAnAccess()) and n = p.getFunction().getBlock()
+ or
+ exists(ControlFlowNode mid |
+ flow(p, mid) and not mid = p.getAnAccess() and n = mid.getASuccessor()
+ )
+}
+
+VariableAccess firstAccess(Parameter p) { flow(p, result) and result = p.getAnAccess() }
+
+from Parameter p, VariableAccess va
+where
+ va = firstAccess(p) and
+ not exists(Expr e | e.isCondition() | e.getAChild*() = va)
+select va, "This use of parameter " + p.getName() + " has not been checked."
diff --git a/repo-tests/codeql/cpp/ql/src/Power of 10/Rule 7/CheckReturnValues.ql b/repo-tests/codeql/cpp/ql/src/Power of 10/Rule 7/CheckReturnValues.ql
new file mode 100644
index 00000000000..5bca7123d52
--- /dev/null
+++ b/repo-tests/codeql/cpp/ql/src/Power of 10/Rule 7/CheckReturnValues.ql
@@ -0,0 +1,42 @@
+/**
+ * @name Unchecked return value
+ * @description The return value of each non-void function call should be checked for error conditions.
+ * @kind problem
+ * @id cpp/power-of-10/check-return-values
+ * @problem.severity warning
+ * @tags correctness
+ * reliability
+ * external/powerof10
+ */
+
+import cpp
+
+/**
+ * In its full generality, the rule applies to all functions that
+ * return non-void, including things like 'printf' and 'close',
+ * which are routinely not checked because the behavior on success
+ * is the same as the behavior on failure. The recommendation is
+ * to add an explicit cast to void for such functions. For code
+ * bases that have not been developed with this rule in mind, at
+ * least for such commonly ignored functions, it may be better to
+ * add them as exceptions to this whitelist predicate.
+ */
+predicate whitelist(Function f) {
+ // Example:
+ // f.hasName("printf") or f.hasName("close") or // ...
+ none()
+}
+
+from FunctionCall c, string msg
+where
+ not c.getTarget().getType() instanceof VoidType and
+ not whitelist(c.getTarget()) and
+ (
+ c instanceof ExprInVoidContext and
+ msg = "The return value of non-void function $@ is not checked."
+ or
+ definition(_, c.getParent()) and
+ not definitionUsePair(_, c.getParent(), _) and
+ msg = "$@'s return value is stored but not checked."
+ )
+select c, msg, c.getTarget() as f, f.getName()
diff --git a/repo-tests/codeql/cpp/ql/src/Power of 10/Rule 8/AvoidConditionalCompilation.ql b/repo-tests/codeql/cpp/ql/src/Power of 10/Rule 8/AvoidConditionalCompilation.ql
new file mode 100644
index 00000000000..d49d51e0b2a
--- /dev/null
+++ b/repo-tests/codeql/cpp/ql/src/Power of 10/Rule 8/AvoidConditionalCompilation.ql
@@ -0,0 +1,18 @@
+/**
+ * @name Conditional compilation
+ * @description The use of conditional compilation directives must be kept to a minimum -- e.g. for header guards only.
+ * @kind problem
+ * @id cpp/power-of-10/avoid-conditional-compilation
+ * @problem.severity recommendation
+ * @tags maintainability
+ * readability
+ * external/powerof10
+ */
+
+import cpp
+
+from PreprocessorDirective i
+where
+ (i instanceof PreprocessorIf or i instanceof PreprocessorIfdef or i instanceof PreprocessorIfndef) and
+ not i.getFile() instanceof HeaderFile
+select i, "Use of conditional compilation must be kept to a minimum."
diff --git a/repo-tests/codeql/cpp/ql/src/Power of 10/Rule 8/PartialMacro.ql b/repo-tests/codeql/cpp/ql/src/Power of 10/Rule 8/PartialMacro.ql
new file mode 100644
index 00000000000..2598118805c
--- /dev/null
+++ b/repo-tests/codeql/cpp/ql/src/Power of 10/Rule 8/PartialMacro.ql
@@ -0,0 +1,30 @@
+/**
+ * @name Partial macro
+ * @description Macros must expand to complete syntactic units -- "#define MY_IF if(" is not legal.
+ * @kind problem
+ * @id cpp/power-of-10/partial-macro
+ * @problem.severity recommendation
+ * @tags maintainability
+ * readability
+ * external/powerof10
+ */
+
+import cpp
+
+predicate incomplete(Macro m) {
+ exists(string body | body = m.getBody() and not m.getBody().matches("%\\") |
+ body.regexpMatch("[^(]*\\).*") or
+ body.regexpMatch("[^\\[]*].*") or
+ body.regexpMatch("[^{]*}.*") or
+ body.regexpMatch(".*\\([^)]*") or
+ body.regexpMatch(".*\\[[^\\]]*") or
+ body.regexpMatch(".*\\{[^}]*") or
+ count(body.indexOf("(")) != count(body.indexOf(")")) or
+ count(body.indexOf("[")) != count(body.indexOf("]")) or
+ count(body.indexOf("{")) != count(body.indexOf("}"))
+ )
+}
+
+from Macro m
+where incomplete(m)
+select m, "The macro " + m.getHead() + " will not expand into a syntactic unit."
diff --git a/repo-tests/codeql/cpp/ql/src/Power of 10/Rule 8/RestrictPreprocessor.ql b/repo-tests/codeql/cpp/ql/src/Power of 10/Rule 8/RestrictPreprocessor.ql
new file mode 100644
index 00000000000..9c1767c5569
--- /dev/null
+++ b/repo-tests/codeql/cpp/ql/src/Power of 10/Rule 8/RestrictPreprocessor.ql
@@ -0,0 +1,24 @@
+/**
+ * @name Disallowed preprocessor use
+ * @description The use of the preprocessor must be limited to inclusion of header files and simple macro definitions.
+ * @kind problem
+ * @id cpp/power-of-10/restrict-preprocessor
+ * @problem.severity recommendation
+ * @tags maintainability
+ * readability
+ * external/powerof10
+ */
+
+import cpp
+
+from PreprocessorDirective p
+where
+ not p instanceof Include and
+ not p instanceof Macro and
+ not p instanceof PreprocessorIf and
+ not p instanceof PreprocessorElif and
+ not p instanceof PreprocessorElse and
+ not p instanceof PreprocessorIfdef and
+ not p instanceof PreprocessorIfndef and
+ not p instanceof PreprocessorEndif
+select p, "This preprocessor directive is not allowed."
diff --git a/repo-tests/codeql/cpp/ql/src/Power of 10/Rule 8/UndisciplinedMacro.ql b/repo-tests/codeql/cpp/ql/src/Power of 10/Rule 8/UndisciplinedMacro.ql
new file mode 100644
index 00000000000..6874c4a87fb
--- /dev/null
+++ b/repo-tests/codeql/cpp/ql/src/Power of 10/Rule 8/UndisciplinedMacro.ql
@@ -0,0 +1,21 @@
+/**
+ * @name Undisciplined macro
+ * @description Macros are not allowed to use complex preprocessor features like variable argument lists and token pasting.
+ * @kind problem
+ * @id cpp/power-of-10/undisciplined-macro
+ * @problem.severity recommendation
+ * @tags maintainability
+ * readability
+ * external/powerof10
+ */
+
+import cpp
+
+from Macro m, string msg
+where
+ m.getHead().matches("%...%") and
+ msg = "The macro " + m.getHead() + " is variadic, and hence not allowed."
+ or
+ m.getBody().matches("%##%") and
+ msg = "The macro " + m.getHead() + " uses token pasting and is not allowed."
+select m, msg
diff --git a/repo-tests/codeql/cpp/ql/src/Power of 10/Rule 9/FunctionPointer.ql b/repo-tests/codeql/cpp/ql/src/Power of 10/Rule 9/FunctionPointer.ql
new file mode 100644
index 00000000000..53fc88bccef
--- /dev/null
+++ b/repo-tests/codeql/cpp/ql/src/Power of 10/Rule 9/FunctionPointer.ql
@@ -0,0 +1,15 @@
+/**
+ * @name Function pointer call
+ * @description Function pointers are not permitted -- they make it impossible for a tool to prove the absence of recursion.
+ * @kind problem
+ * @id cpp/power-of-10/function-pointer
+ * @problem.severity recommendation
+ * @tags maintainability
+ * testability
+ * external/powerof10
+ */
+
+import cpp
+
+from ExprCall e
+select e, "Calls through function pointers are not permitted."
diff --git a/repo-tests/codeql/cpp/ql/src/Power of 10/Rule 9/HiddenPointerIndirection.ql b/repo-tests/codeql/cpp/ql/src/Power of 10/Rule 9/HiddenPointerIndirection.ql
new file mode 100644
index 00000000000..411acf96e4b
--- /dev/null
+++ b/repo-tests/codeql/cpp/ql/src/Power of 10/Rule 9/HiddenPointerIndirection.ql
@@ -0,0 +1,16 @@
+/**
+ * @name Hidden pointer indirection
+ * @description Pointer indirection may not be hidden by typedefs -- "typedef int* IntPtr;" is not allowed.
+ * @kind problem
+ * @id cpp/power-of-10/hidden-pointer-indirection
+ * @problem.severity recommendation
+ * @tags maintainability
+ * readability
+ * external/powerof10
+ */
+
+import cpp
+
+from TypedefType t
+where t.getBaseType().getPointerIndirectionLevel() > 0
+select t, "The typedef " + t.getName() + " hides pointer indirection."
diff --git a/repo-tests/codeql/cpp/ql/src/Power of 10/Rule 9/PointerNesting.ql b/repo-tests/codeql/cpp/ql/src/Power of 10/Rule 9/PointerNesting.ql
new file mode 100644
index 00000000000..e5715c44b12
--- /dev/null
+++ b/repo-tests/codeql/cpp/ql/src/Power of 10/Rule 9/PointerNesting.ql
@@ -0,0 +1,16 @@
+/**
+ * @name Pointer nesting too high
+ * @description No more than one level of pointer nesting/dereferencing should be used.
+ * @kind problem
+ * @id cpp/power-of-10/pointer-nesting
+ * @problem.severity recommendation
+ * @tags maintainability
+ * readability
+ * external/powerof10
+ */
+
+import cpp
+
+from Variable v, int n
+where n = v.getType().(PointerType).getPointerIndirectionLevel() and n > 1
+select v, "The variable " + v.getName() + " uses " + n + " levels of pointer indirection."
diff --git a/repo-tests/codeql/cpp/ql/src/Security/CWE/CWE-014/MemsetMayBeDeleted.ql b/repo-tests/codeql/cpp/ql/src/Security/CWE/CWE-014/MemsetMayBeDeleted.ql
new file mode 100644
index 00000000000..33c31972295
--- /dev/null
+++ b/repo-tests/codeql/cpp/ql/src/Security/CWE/CWE-014/MemsetMayBeDeleted.ql
@@ -0,0 +1,82 @@
+/**
+ * @name Call to `memset` may be deleted
+ * @description Using the `memset` function to clear private data in a variable that has no subsequent use
+ * can make information-leak vulnerabilities easier to exploit because the compiler can remove the call.
+ * @kind problem
+ * @id cpp/memset-may-be-deleted
+ * @problem.severity warning
+ * @security-severity 7.8
+ * @precision high
+ * @tags security
+ * external/cwe/cwe-14
+ */
+
+import cpp
+import semmle.code.cpp.dataflow.EscapesTree
+import semmle.code.cpp.commons.Exclusions
+import semmle.code.cpp.models.interfaces.Alias
+
+class MemsetFunction extends Function {
+ MemsetFunction() {
+ this.hasGlobalOrStdOrBslName("memset")
+ or
+ this.hasGlobalOrStdName("wmemset")
+ or
+ this.hasGlobalName(["bzero", "__builtin_memset"])
+ }
+}
+
+predicate isNonEscapingArgument(Expr escaped) {
+ exists(Call call, AliasFunction aliasFunction, int i |
+ aliasFunction = call.getTarget() and
+ call.getArgument(i) = escaped.getUnconverted() and
+ (
+ aliasFunction.parameterNeverEscapes(i)
+ or
+ aliasFunction.parameterEscapesOnlyViaReturn(i) and
+ (call instanceof ExprInVoidContext or call.getConversion*() instanceof BoolConversion)
+ )
+ )
+}
+
+pragma[noinline]
+predicate callToMemsetWithRelevantVariable(
+ LocalVariable v, VariableAccess acc, FunctionCall call, MemsetFunction memset
+) {
+ not v.isStatic() and
+ // Reference-typed variables get special treatment in `variableAddressEscapesTree` so we leave them
+ // out of this query.
+ not v.getUnspecifiedType() instanceof ReferenceType and
+ call.getTarget() = memset and
+ acc = v.getAnAccess() and
+ // `v` escapes as the argument to `memset`
+ variableAddressEscapesTree(acc, call.getArgument(0).getFullyConverted())
+}
+
+pragma[noinline]
+predicate relevantVariable(LocalVariable v, FunctionCall call, MemsetFunction memset) {
+ exists(VariableAccess acc, VariableAccess anotherAcc |
+ callToMemsetWithRelevantVariable(v, acc, call, memset) and
+ // `v` is not only just used in the call to `memset`.
+ anotherAcc = v.getAnAccess() and
+ acc != anotherAcc and
+ not anotherAcc.isUnevaluated()
+ )
+}
+
+from FunctionCall call, LocalVariable v, MemsetFunction memset
+where
+ relevantVariable(v, call, memset) and
+ not isFromMacroDefinition(call) and
+ // `v` doesn't escape anywhere else.
+ forall(Expr escape | variableAddressEscapesTree(v.getAnAccess(), escape) |
+ isNonEscapingArgument(escape)
+ ) and
+ // There is no later use of `v`.
+ not v.getAnAccess() = call.getASuccessor*() and
+ // Not using the `-fno-builtin-memset` flag
+ exists(Compilation c |
+ c.getAFileCompiled() = call.getFile() and
+ not c.getAnArgument() = "-fno-builtin-memset"
+ )
+select call, "Call to " + memset.getName() + " may be deleted by the compiler."
diff --git a/repo-tests/codeql/cpp/ql/src/Security/CWE/CWE-020/CountUntrustedDataToExternalAPI.ql b/repo-tests/codeql/cpp/ql/src/Security/CWE/CWE-020/CountUntrustedDataToExternalAPI.ql
new file mode 100644
index 00000000000..8c75e8da6e2
--- /dev/null
+++ b/repo-tests/codeql/cpp/ql/src/Security/CWE/CWE-020/CountUntrustedDataToExternalAPI.ql
@@ -0,0 +1,17 @@
+/**
+ * @name Frequency counts for external APIs that are used with untrusted data
+ * @description This reports the external APIs that are used with untrusted data, along with how
+ * frequently the API is called, and how many unique sources of untrusted data flow
+ * to it.
+ * @id cpp/count-untrusted-data-external-api
+ * @kind table
+ * @tags security external/cwe/cwe-20
+ */
+
+import cpp
+import ExternalAPIs
+
+from ExternalAPIUsedWithUntrustedData externalAPI
+select externalAPI, count(externalAPI.getUntrustedDataNode()) as numberOfUses,
+ externalAPI.getNumberOfUntrustedSources() as numberOfUntrustedSources order by
+ numberOfUntrustedSources desc
diff --git a/repo-tests/codeql/cpp/ql/src/Security/CWE/CWE-020/ExternalAPIs.qll b/repo-tests/codeql/cpp/ql/src/Security/CWE/CWE-020/ExternalAPIs.qll
new file mode 100644
index 00000000000..29d5b20cfc4
--- /dev/null
+++ b/repo-tests/codeql/cpp/ql/src/Security/CWE/CWE-020/ExternalAPIs.qll
@@ -0,0 +1,50 @@
+/**
+ * Definitions for reasoning about untrusted data used in APIs defined outside the
+ * database.
+ */
+
+private import cpp
+private import semmle.code.cpp.models.interfaces.DataFlow
+private import semmle.code.cpp.models.interfaces.Taint
+import ExternalAPIsSpecific
+
+/** A node representing untrusted data being passed to an external API. */
+class UntrustedExternalAPIDataNode extends ExternalAPIDataNode {
+ UntrustedExternalAPIDataNode() { any(UntrustedDataToExternalAPIConfig c).hasFlow(_, this) }
+
+ /** Gets a source of untrusted data which is passed to this external API data node. */
+ DataFlow::Node getAnUntrustedSource() {
+ any(UntrustedDataToExternalAPIConfig c).hasFlow(result, this)
+ }
+}
+
+private newtype TExternalAPI =
+ TExternalAPIParameter(Function f, int index) {
+ exists(UntrustedExternalAPIDataNode n |
+ f = n.getExternalFunction() and
+ index = n.getIndex()
+ )
+ }
+
+/** An external API which is used with untrusted data. */
+class ExternalAPIUsedWithUntrustedData extends TExternalAPI {
+ /** Gets a possibly untrusted use of this external API. */
+ UntrustedExternalAPIDataNode getUntrustedDataNode() {
+ this = TExternalAPIParameter(result.getExternalFunction(), result.getIndex())
+ }
+
+ /** Gets the number of untrusted sources used with this external API. */
+ int getNumberOfUntrustedSources() {
+ result = strictcount(getUntrustedDataNode().getAnUntrustedSource())
+ }
+
+ /** Gets a textual representation of this element. */
+ string toString() {
+ exists(Function f, int index, string indexString |
+ if index = -1 then indexString = "qualifier" else indexString = "param " + index
+ |
+ this = TExternalAPIParameter(f, index) and
+ result = f.toString() + " [" + indexString + "]"
+ )
+ }
+}
diff --git a/repo-tests/codeql/cpp/ql/src/Security/CWE/CWE-020/ExternalAPIsSpecific.qll b/repo-tests/codeql/cpp/ql/src/Security/CWE/CWE-020/ExternalAPIsSpecific.qll
new file mode 100644
index 00000000000..9ca598f86d6
--- /dev/null
+++ b/repo-tests/codeql/cpp/ql/src/Security/CWE/CWE-020/ExternalAPIsSpecific.qll
@@ -0,0 +1,56 @@
+/**
+ * Provides AST-specific definitions for use in the `ExternalAPI` library.
+ */
+
+import semmle.code.cpp.dataflow.TaintTracking
+import semmle.code.cpp.models.interfaces.FlowSource
+import semmle.code.cpp.models.interfaces.DataFlow
+import SafeExternalAPIFunction
+
+/** A node representing untrusted data being passed to an external API. */
+class ExternalAPIDataNode extends DataFlow::Node {
+ Call call;
+ int i;
+
+ ExternalAPIDataNode() {
+ // Argument to call to a function
+ (
+ this.asExpr() = call.getArgument(i)
+ or
+ i = -1 and this.asExpr() = call.getQualifier()
+ ) and
+ exists(Function f |
+ f = call.getTarget() and
+ // Defined outside the source archive
+ not f.hasDefinition() and
+ // Not already modeled as a dataflow or taint step
+ not f instanceof DataFlowFunction and
+ not f instanceof TaintFunction and
+ // Not a call to a known safe external API
+ not f instanceof SafeExternalAPIFunction
+ )
+ }
+
+ /** Gets the called API `Function`. */
+ Function getExternalFunction() { result = call.getTarget() }
+
+ /** Gets the index which is passed untrusted data (where -1 indicates the qualifier). */
+ int getIndex() { result = i }
+
+ /** Gets the description of the function being called. */
+ string getFunctionDescription() { result = getExternalFunction().toString() }
+}
+
+/** A configuration for tracking flow from `RemoteFlowSource`s to `ExternalAPIDataNode`s. */
+class UntrustedDataToExternalAPIConfig extends TaintTracking::Configuration {
+ UntrustedDataToExternalAPIConfig() { this = "UntrustedDataToExternalAPIConfig" }
+
+ override predicate isSource(DataFlow::Node source) {
+ exists(RemoteFlowSourceFunction remoteFlow |
+ remoteFlow = source.asExpr().(Call).getTarget() and
+ remoteFlow.hasRemoteFlowSource(_, _)
+ )
+ }
+
+ override predicate isSink(DataFlow::Node sink) { sink instanceof ExternalAPIDataNode }
+}
diff --git a/repo-tests/codeql/cpp/ql/src/Security/CWE/CWE-020/IRCountUntrustedDataToExternalAPI.ql b/repo-tests/codeql/cpp/ql/src/Security/CWE/CWE-020/IRCountUntrustedDataToExternalAPI.ql
new file mode 100644
index 00000000000..4d0c2174809
--- /dev/null
+++ b/repo-tests/codeql/cpp/ql/src/Security/CWE/CWE-020/IRCountUntrustedDataToExternalAPI.ql
@@ -0,0 +1,17 @@
+/**
+ * @name Frequency counts for external APIs that are used with untrusted data
+ * @description This reports the external APIs that are used with untrusted data, along with how
+ * frequently the API is called, and how many unique sources of untrusted data flow
+ * to it.
+ * @id cpp/count-untrusted-data-external-api-ir
+ * @kind table
+ * @tags security external/cwe/cwe-20
+ */
+
+import cpp
+import ir.ExternalAPIs
+
+from ExternalAPIUsedWithUntrustedData externalAPI
+select externalAPI, count(externalAPI.getUntrustedDataNode()) as numberOfUses,
+ externalAPI.getNumberOfUntrustedSources() as numberOfUntrustedSources order by
+ numberOfUntrustedSources desc
diff --git a/repo-tests/codeql/cpp/ql/src/Security/CWE/CWE-020/IRUntrustedDataToExternalAPI.ql b/repo-tests/codeql/cpp/ql/src/Security/CWE/CWE-020/IRUntrustedDataToExternalAPI.ql
new file mode 100644
index 00000000000..47a0bf14b7f
--- /dev/null
+++ b/repo-tests/codeql/cpp/ql/src/Security/CWE/CWE-020/IRUntrustedDataToExternalAPI.ql
@@ -0,0 +1,22 @@
+/**
+ * @name Untrusted data passed to external API
+ * @description Data provided remotely is used in this external API without sanitization, which could be a security risk.
+ * @id cpp/untrusted-data-to-external-api-ir
+ * @kind path-problem
+ * @precision low
+ * @problem.severity error
+ * @security-severity 7.8
+ * @tags security external/cwe/cwe-20
+ */
+
+import cpp
+import semmle.code.cpp.ir.dataflow.TaintTracking
+import ir.ExternalAPIs
+import semmle.code.cpp.security.FlowSources
+import DataFlow::PathGraph
+
+from UntrustedDataToExternalAPIConfig config, DataFlow::PathNode source, DataFlow::PathNode sink
+where config.hasFlowPath(source, sink)
+select sink, source, sink,
+ "Call to " + sink.getNode().(ExternalAPIDataNode).getExternalFunction().toString() +
+ " with untrusted data from $@.", source, source.getNode().(RemoteFlowSource).getSourceType()
diff --git a/repo-tests/codeql/cpp/ql/src/Security/CWE/CWE-020/SafeExternalAPIFunction.qll b/repo-tests/codeql/cpp/ql/src/Security/CWE/CWE-020/SafeExternalAPIFunction.qll
new file mode 100644
index 00000000000..5eb0b23d914
--- /dev/null
+++ b/repo-tests/codeql/cpp/ql/src/Security/CWE/CWE-020/SafeExternalAPIFunction.qll
@@ -0,0 +1,24 @@
+/**
+ * Provides a class for modeling external functions that are "safe" from a security perspective.
+ */
+
+private import cpp
+private import semmle.code.cpp.models.interfaces.SideEffect
+
+/**
+ * A `Function` that is considered a "safe" external API from a security perspective.
+ */
+abstract class SafeExternalAPIFunction extends Function { }
+
+/** The default set of "safe" external APIs. */
+private class DefaultSafeExternalAPIFunction extends SafeExternalAPIFunction {
+ DefaultSafeExternalAPIFunction() {
+ // If a function does not write to any of its arguments, we consider it safe to
+ // pass untrusted data to it. This means that string functions such as `strcmp`
+ // and `strlen`, as well as memory functions such as `memcmp`, are considered safe.
+ exists(SideEffectFunction model | model = this |
+ model.hasOnlySpecificWriteSideEffects() and
+ not model.hasSpecificWriteSideEffect(_, _, _)
+ )
+ }
+}
diff --git a/repo-tests/codeql/cpp/ql/src/Security/CWE/CWE-020/UntrustedDataToExternalAPI.ql b/repo-tests/codeql/cpp/ql/src/Security/CWE/CWE-020/UntrustedDataToExternalAPI.ql
new file mode 100644
index 00000000000..b85a5b26a7f
--- /dev/null
+++ b/repo-tests/codeql/cpp/ql/src/Security/CWE/CWE-020/UntrustedDataToExternalAPI.ql
@@ -0,0 +1,21 @@
+/**
+ * @name Untrusted data passed to external API
+ * @description Data provided remotely is used in this external API without sanitization, which could be a security risk.
+ * @id cpp/untrusted-data-to-external-api
+ * @kind path-problem
+ * @precision low
+ * @problem.severity error
+ * @security-severity 7.8
+ * @tags security external/cwe/cwe-20
+ */
+
+import cpp
+import semmle.code.cpp.dataflow.TaintTracking
+import ExternalAPIs
+import DataFlow::PathGraph
+
+from UntrustedDataToExternalAPIConfig config, DataFlow::PathNode source, DataFlow::PathNode sink
+where config.hasFlowPath(source, sink)
+select sink, source, sink,
+ "Call to " + sink.getNode().(ExternalAPIDataNode).getExternalFunction().toString() +
+ " with untrusted data from $@.", source, source.toString()
diff --git a/repo-tests/codeql/cpp/ql/src/Security/CWE/CWE-020/ir/ExternalAPIs.qll b/repo-tests/codeql/cpp/ql/src/Security/CWE/CWE-020/ir/ExternalAPIs.qll
new file mode 100644
index 00000000000..29d5b20cfc4
--- /dev/null
+++ b/repo-tests/codeql/cpp/ql/src/Security/CWE/CWE-020/ir/ExternalAPIs.qll
@@ -0,0 +1,50 @@
+/**
+ * Definitions for reasoning about untrusted data used in APIs defined outside the
+ * database.
+ */
+
+private import cpp
+private import semmle.code.cpp.models.interfaces.DataFlow
+private import semmle.code.cpp.models.interfaces.Taint
+import ExternalAPIsSpecific
+
+/** A node representing untrusted data being passed to an external API. */
+class UntrustedExternalAPIDataNode extends ExternalAPIDataNode {
+ UntrustedExternalAPIDataNode() { any(UntrustedDataToExternalAPIConfig c).hasFlow(_, this) }
+
+ /** Gets a source of untrusted data which is passed to this external API data node. */
+ DataFlow::Node getAnUntrustedSource() {
+ any(UntrustedDataToExternalAPIConfig c).hasFlow(result, this)
+ }
+}
+
+private newtype TExternalAPI =
+ TExternalAPIParameter(Function f, int index) {
+ exists(UntrustedExternalAPIDataNode n |
+ f = n.getExternalFunction() and
+ index = n.getIndex()
+ )
+ }
+
+/** An external API which is used with untrusted data. */
+class ExternalAPIUsedWithUntrustedData extends TExternalAPI {
+ /** Gets a possibly untrusted use of this external API. */
+ UntrustedExternalAPIDataNode getUntrustedDataNode() {
+ this = TExternalAPIParameter(result.getExternalFunction(), result.getIndex())
+ }
+
+ /** Gets the number of untrusted sources used with this external API. */
+ int getNumberOfUntrustedSources() {
+ result = strictcount(getUntrustedDataNode().getAnUntrustedSource())
+ }
+
+ /** Gets a textual representation of this element. */
+ string toString() {
+ exists(Function f, int index, string indexString |
+ if index = -1 then indexString = "qualifier" else indexString = "param " + index
+ |
+ this = TExternalAPIParameter(f, index) and
+ result = f.toString() + " [" + indexString + "]"
+ )
+ }
+}
diff --git a/repo-tests/codeql/cpp/ql/src/Security/CWE/CWE-020/ir/ExternalAPIsSpecific.qll b/repo-tests/codeql/cpp/ql/src/Security/CWE/CWE-020/ir/ExternalAPIsSpecific.qll
new file mode 100644
index 00000000000..10d1728aa01
--- /dev/null
+++ b/repo-tests/codeql/cpp/ql/src/Security/CWE/CWE-020/ir/ExternalAPIsSpecific.qll
@@ -0,0 +1,51 @@
+/**
+ * Provides IR-specific definitions for use in the `ExternalAPI` library.
+ */
+
+import semmle.code.cpp.ir.dataflow.TaintTracking
+private import semmle.code.cpp.security.FlowSources
+private import semmle.code.cpp.models.interfaces.DataFlow
+import SafeExternalAPIFunction
+
+/** A node representing untrusted data being passed to an external API. */
+class ExternalAPIDataNode extends DataFlow::Node {
+ Call call;
+ int i;
+
+ ExternalAPIDataNode() {
+ // Argument to call to a function
+ (
+ this.asExpr() = call.getArgument(i)
+ or
+ i = -1 and this.asExpr() = call.getQualifier()
+ ) and
+ exists(Function f |
+ f = call.getTarget() and
+ // Defined outside the source archive
+ not f.hasDefinition() and
+ // Not already modeled as a dataflow or taint step
+ not f instanceof DataFlowFunction and
+ not f instanceof TaintFunction and
+ // Not a call to a known safe external API
+ not f instanceof SafeExternalAPIFunction
+ )
+ }
+
+ /** Gets the called API `Function`. */
+ Function getExternalFunction() { result = call.getTarget() }
+
+ /** Gets the index which is passed untrusted data (where -1 indicates the qualifier). */
+ int getIndex() { result = i }
+
+ /** Gets the description of the function being called. */
+ string getFunctionDescription() { result = getExternalFunction().toString() }
+}
+
+/** A configuration for tracking flow from `RemoteFlowSource`s to `ExternalAPIDataNode`s. */
+class UntrustedDataToExternalAPIConfig extends TaintTracking::Configuration {
+ UntrustedDataToExternalAPIConfig() { this = "UntrustedDataToExternalAPIConfigIR" }
+
+ override predicate isSource(DataFlow::Node source) { source instanceof RemoteFlowSource }
+
+ override predicate isSink(DataFlow::Node sink) { sink instanceof ExternalAPIDataNode }
+}
diff --git a/repo-tests/codeql/cpp/ql/src/Security/CWE/CWE-020/ir/SafeExternalAPIFunction.qll b/repo-tests/codeql/cpp/ql/src/Security/CWE/CWE-020/ir/SafeExternalAPIFunction.qll
new file mode 100644
index 00000000000..5eb0b23d914
--- /dev/null
+++ b/repo-tests/codeql/cpp/ql/src/Security/CWE/CWE-020/ir/SafeExternalAPIFunction.qll
@@ -0,0 +1,24 @@
+/**
+ * Provides a class for modeling external functions that are "safe" from a security perspective.
+ */
+
+private import cpp
+private import semmle.code.cpp.models.interfaces.SideEffect
+
+/**
+ * A `Function` that is considered a "safe" external API from a security perspective.
+ */
+abstract class SafeExternalAPIFunction extends Function { }
+
+/** The default set of "safe" external APIs. */
+private class DefaultSafeExternalAPIFunction extends SafeExternalAPIFunction {
+ DefaultSafeExternalAPIFunction() {
+ // If a function does not write to any of its arguments, we consider it safe to
+ // pass untrusted data to it. This means that string functions such as `strcmp`
+ // and `strlen`, as well as memory functions such as `memcmp`, are considered safe.
+ exists(SideEffectFunction model | model = this |
+ model.hasOnlySpecificWriteSideEffects() and
+ not model.hasSpecificWriteSideEffect(_, _, _)
+ )
+ }
+}
diff --git a/repo-tests/codeql/cpp/ql/src/Security/CWE/CWE-022/TaintedPath.ql b/repo-tests/codeql/cpp/ql/src/Security/CWE/CWE-022/TaintedPath.ql
new file mode 100644
index 00000000000..5e22506d03a
--- /dev/null
+++ b/repo-tests/codeql/cpp/ql/src/Security/CWE/CWE-022/TaintedPath.ql
@@ -0,0 +1,71 @@
+/**
+ * @name Uncontrolled data used in path expression
+ * @description Accessing paths influenced by users can allow an
+ * attacker to access unexpected resources.
+ * @kind path-problem
+ * @problem.severity warning
+ * @security-severity 7.5
+ * @precision medium
+ * @id cpp/path-injection
+ * @tags security
+ * external/cwe/cwe-022
+ * external/cwe/cwe-023
+ * external/cwe/cwe-036
+ * external/cwe/cwe-073
+ */
+
+import cpp
+import semmle.code.cpp.security.FunctionWithWrappers
+import semmle.code.cpp.security.Security
+import semmle.code.cpp.security.TaintTracking
+import TaintedWithPath
+
+/**
+ * A function for opening a file.
+ */
+class FileFunction extends FunctionWithWrappers {
+ FileFunction() {
+ exists(string nme | this.hasGlobalName(nme) |
+ nme = "fopen" or
+ nme = "_fopen" or
+ nme = "_wfopen" or
+ nme = "open" or
+ nme = "_open" or
+ nme = "_wopen" or
+ // create file function on windows
+ nme.matches("CreateFile%")
+ )
+ or
+ this.hasQualifiedName("std", "fopen")
+ or
+ // on any of the fstream classes, or filebuf
+ exists(string nme | this.getDeclaringType().hasQualifiedName("std", nme) |
+ nme = "basic_fstream" or
+ nme = "basic_ifstream" or
+ nme = "basic_ofstream" or
+ nme = "basic_filebuf"
+ ) and
+ // we look for either the open method or the constructor
+ (this.getName() = "open" or this instanceof Constructor)
+ }
+
+ // conveniently, all of these functions take the path as the first parameter!
+ override predicate interestingArg(int arg) { arg = 0 }
+}
+
+class TaintedPathConfiguration extends TaintTrackingConfiguration {
+ override predicate isSink(Element tainted) {
+ exists(FileFunction fileFunction | fileFunction.outermostWrapperFunctionCall(tainted, _))
+ }
+}
+
+from
+ FileFunction fileFunction, Expr taintedArg, Expr taintSource, PathNode sourceNode,
+ PathNode sinkNode, string taintCause, string callChain
+where
+ fileFunction.outermostWrapperFunctionCall(taintedArg, callChain) and
+ taintedWithPath(taintSource, taintedArg, sourceNode, sinkNode) and
+ isUserInput(taintSource, taintCause)
+select taintedArg, sourceNode, sinkNode,
+ "This argument to a file access function is derived from $@ and then passed to " + callChain,
+ taintSource, "user input (" + taintCause + ")"
diff --git a/repo-tests/codeql/cpp/ql/src/Security/CWE/CWE-078/ExecTainted.ql b/repo-tests/codeql/cpp/ql/src/Security/CWE/CWE-078/ExecTainted.ql
new file mode 100644
index 00000000000..26652d9c1da
--- /dev/null
+++ b/repo-tests/codeql/cpp/ql/src/Security/CWE/CWE-078/ExecTainted.ql
@@ -0,0 +1,219 @@
+/**
+ * @name Uncontrolled data used in OS command
+ * @description Using user-supplied data in an OS command, without
+ * neutralizing special elements, can make code vulnerable
+ * to command injection.
+ * @kind path-problem
+ * @problem.severity error
+ * @security-severity 9.8
+ * @precision high
+ * @id cpp/command-line-injection
+ * @tags security
+ * external/cwe/cwe-078
+ * external/cwe/cwe-088
+ */
+
+import cpp
+import semmle.code.cpp.security.CommandExecution
+import semmle.code.cpp.security.Security
+import semmle.code.cpp.valuenumbering.GlobalValueNumbering
+import semmle.code.cpp.ir.IR
+import semmle.code.cpp.ir.dataflow.TaintTracking
+import semmle.code.cpp.ir.dataflow.TaintTracking2
+import semmle.code.cpp.security.FlowSources
+import semmle.code.cpp.models.implementations.Strcat
+
+Expr sinkAsArgumentIndirection(DataFlow::Node sink) {
+ result =
+ sink.asOperand()
+ .(SideEffectOperand)
+ .getAddressOperand()
+ .getAnyDef()
+ .getUnconvertedResultExpression()
+}
+
+/**
+ * Holds if `fst` is a string that is used in a format or concatenation function resulting in `snd`,
+ * and is *not* placed at the start of the resulting string. This indicates that the author did not
+ * expect `fst` to control what program is run if the resulting string is eventually interpreted as
+ * a command line, for example as an argument to `system`.
+ */
+predicate interestingConcatenation(DataFlow::Node fst, DataFlow::Node snd) {
+ exists(FormattingFunctionCall call, int index, FormatLiteral literal |
+ sinkAsArgumentIndirection(fst) = call.getConversionArgument(index) and
+ snd.asDefiningArgument() = call.getOutputArgument(false) and
+ literal = call.getFormat() and
+ not literal.getConvSpecOffset(index) = 0 and
+ literal.getConversionChar(index) = ["s", "S"]
+ )
+ or
+ // strcat and friends
+ exists(StrcatFunction strcatFunc, CallInstruction call, ReadSideEffectInstruction rse |
+ call.getStaticCallTarget() = strcatFunc and
+ rse.getArgumentDef() = call.getArgument(strcatFunc.getParamSrc()) and
+ fst.asOperand() = rse.getSideEffectOperand() and
+ snd.asInstruction().(WriteSideEffectInstruction).getDestinationAddress() =
+ call.getArgument(strcatFunc.getParamDest())
+ )
+ or
+ exists(CallInstruction call, Operator op, ReadSideEffectInstruction rse |
+ call.getStaticCallTarget() = op and
+ op.hasQualifiedName("std", "operator+") and
+ op.getType().(UserType).hasQualifiedName("std", "basic_string") and
+ call.getArgument(1) = rse.getArgumentOperand().getAnyDef() and // left operand
+ fst.asOperand() = rse.getSideEffectOperand() and
+ call = snd.asInstruction()
+ )
+}
+
+class TaintToConcatenationConfiguration extends TaintTracking::Configuration {
+ TaintToConcatenationConfiguration() { this = "TaintToConcatenationConfiguration" }
+
+ override predicate isSource(DataFlow::Node source) { source instanceof FlowSource }
+
+ override predicate isSink(DataFlow::Node sink) { interestingConcatenation(sink, _) }
+
+ override predicate isSanitizer(DataFlow::Node node) {
+ node.asInstruction().getResultType() instanceof IntegralType
+ or
+ node.asInstruction().getResultType() instanceof FloatingPointType
+ }
+}
+
+class ExecTaintConfiguration extends TaintTracking2::Configuration {
+ ExecTaintConfiguration() { this = "ExecTaintConfiguration" }
+
+ override predicate isSource(DataFlow::Node source) {
+ exists(DataFlow::Node prevSink, TaintToConcatenationConfiguration conf |
+ conf.hasFlow(_, prevSink) and
+ interestingConcatenation(prevSink, source)
+ )
+ }
+
+ override predicate isSink(DataFlow::Node sink) {
+ shellCommand(sinkAsArgumentIndirection(sink), _)
+ }
+
+ override predicate isSanitizerOut(DataFlow::Node node) {
+ isSink(node) // Prevent duplicates along a call chain, since `shellCommand` will include wrappers
+ }
+}
+
+module StitchedPathGraph {
+ // There's a different PathNode class for each DataFlowImplN.qll, so we can't simply combine the
+ // PathGraph predicates directly. Instead, we use a newtype so there's a single type that
+ // contains both sets of PathNodes.
+ newtype TMergedPathNode =
+ TPathNode1(DataFlow::PathNode node) or
+ TPathNode2(DataFlow2::PathNode node)
+
+ // this wraps the toString and location predicates so we can use the merged node type in a
+ // selection
+ class MergedPathNode extends TMergedPathNode {
+ string toString() {
+ exists(DataFlow::PathNode n |
+ this = TPathNode1(n) and
+ result = n.toString()
+ )
+ or
+ exists(DataFlow2::PathNode n |
+ this = TPathNode2(n) and
+ result = n.toString()
+ )
+ }
+
+ DataFlow::Node getNode() {
+ exists(DataFlow::PathNode n |
+ this = TPathNode1(n) and
+ result = n.getNode()
+ )
+ or
+ exists(DataFlow2::PathNode n |
+ this = TPathNode2(n) and
+ result = n.getNode()
+ )
+ }
+
+ DataFlow::PathNode getPathNode1() { this = TPathNode1(result) }
+
+ DataFlow2::PathNode getPathNode2() { this = TPathNode2(result) }
+
+ predicate hasLocationInfo(
+ string filepath, int startline, int startcolumn, int endline, int endcolumn
+ ) {
+ exists(DataFlow::PathNode n |
+ this = TPathNode1(n) and
+ n.hasLocationInfo(filepath, startline, startcolumn, endline, endcolumn)
+ )
+ or
+ exists(DataFlow2::PathNode n |
+ this = TPathNode2(n) and
+ n.hasLocationInfo(filepath, startline, startcolumn, endline, endcolumn)
+ )
+ }
+ }
+
+ query predicate edges(MergedPathNode a, MergedPathNode b) {
+ exists(DataFlow::PathNode an, DataFlow::PathNode bn |
+ a = TPathNode1(an) and
+ b = TPathNode1(bn) and
+ DataFlow::PathGraph::edges(an, bn)
+ )
+ or
+ exists(DataFlow2::PathNode an, DataFlow2::PathNode bn |
+ a = TPathNode2(an) and
+ b = TPathNode2(bn) and
+ DataFlow2::PathGraph::edges(an, bn)
+ )
+ or
+ // This is where paths from the two configurations are connected. `interestingConcatenation`
+ // is the only thing in this module that's actually specific to the query - everything else is
+ // just using types and predicates from the DataFlow library.
+ interestingConcatenation(a.getNode(), b.getNode()) and
+ a instanceof TPathNode1 and
+ b instanceof TPathNode2
+ }
+
+ query predicate nodes(MergedPathNode mpn, string key, string val) {
+ // here we just need the union of the underlying `nodes` predicates
+ exists(DataFlow::PathNode n |
+ mpn = TPathNode1(n) and
+ DataFlow::PathGraph::nodes(n, key, val)
+ )
+ or
+ exists(DataFlow2::PathNode n |
+ mpn = TPathNode2(n) and
+ DataFlow2::PathGraph::nodes(n, key, val)
+ )
+ }
+
+ query predicate subpaths(
+ MergedPathNode arg, MergedPathNode par, MergedPathNode ret, MergedPathNode out
+ ) {
+ // just forward subpaths from the underlying libraries. This might be slightly awkward when
+ // the concatenation is deep in a call chain.
+ DataFlow::PathGraph::subpaths(arg.getPathNode1(), par.getPathNode1(), ret.getPathNode1(),
+ out.getPathNode1())
+ or
+ DataFlow2::PathGraph::subpaths(arg.getPathNode2(), par.getPathNode2(), ret.getPathNode2(),
+ out.getPathNode2())
+ }
+}
+
+import StitchedPathGraph
+
+from
+ DataFlow::PathNode sourceNode, DataFlow::PathNode concatSink, DataFlow2::PathNode concatSource,
+ DataFlow2::PathNode sinkNode, string taintCause, string callChain,
+ TaintToConcatenationConfiguration conf1, ExecTaintConfiguration conf2
+where
+ taintCause = sourceNode.getNode().(FlowSource).getSourceType() and
+ conf1.hasFlowPath(sourceNode, concatSink) and
+ interestingConcatenation(concatSink.getNode(), concatSource.getNode()) and // this loses call context
+ conf2.hasFlowPath(concatSource, sinkNode) and
+ shellCommand(sinkAsArgumentIndirection(sinkNode.getNode()), callChain)
+select sinkAsArgumentIndirection(sinkNode.getNode()), TPathNode1(sourceNode).(MergedPathNode),
+ TPathNode2(sinkNode).(MergedPathNode),
+ "This argument to an OS command is derived from $@, dangerously concatenated into $@, and then passed to "
+ + callChain, sourceNode, "user input (" + taintCause + ")", concatSource,
+ concatSource.toString()
diff --git a/repo-tests/codeql/cpp/ql/src/Security/CWE/CWE-079/CgiXss.ql b/repo-tests/codeql/cpp/ql/src/Security/CWE/CWE-079/CgiXss.ql
new file mode 100644
index 00000000000..bb38609927e
--- /dev/null
+++ b/repo-tests/codeql/cpp/ql/src/Security/CWE/CWE-079/CgiXss.ql
@@ -0,0 +1,47 @@
+/**
+ * @name CGI script vulnerable to cross-site scripting
+ * @description Writing user input directly to a web page
+ * allows for a cross-site scripting vulnerability.
+ * @kind path-problem
+ * @problem.severity error
+ * @security-severity 6.1
+ * @precision high
+ * @id cpp/cgi-xss
+ * @tags security
+ * external/cwe/cwe-079
+ */
+
+import cpp
+import semmle.code.cpp.commons.Environment
+import semmle.code.cpp.security.TaintTracking
+import TaintedWithPath
+
+/** A call that prints its arguments to `stdout`. */
+class PrintStdoutCall extends FunctionCall {
+ PrintStdoutCall() {
+ getTarget().hasGlobalOrStdName("puts") or
+ getTarget().hasGlobalOrStdName("printf")
+ }
+}
+
+/** A read of the QUERY_STRING environment variable */
+class QueryString extends EnvironmentRead {
+ QueryString() { getEnvironmentVariable() = "QUERY_STRING" }
+}
+
+class Configuration extends TaintTrackingConfiguration {
+ override predicate isSource(Expr source) { source instanceof QueryString }
+
+ override predicate isSink(Element tainted) {
+ exists(PrintStdoutCall call | call.getAnArgument() = tainted)
+ }
+
+ override predicate isBarrier(Expr e) {
+ super.isBarrier(e) or e.getUnspecifiedType() instanceof IntegralType
+ }
+}
+
+from QueryString query, Element printedArg, PathNode sourceNode, PathNode sinkNode
+where taintedWithPath(query, printedArg, sourceNode, sinkNode)
+select printedArg, sourceNode, sinkNode, "Cross-site scripting vulnerability due to $@.", query,
+ "this query data"
diff --git a/repo-tests/codeql/cpp/ql/src/Security/CWE/CWE-089/SqlTainted.ql b/repo-tests/codeql/cpp/ql/src/Security/CWE/CWE-089/SqlTainted.ql
new file mode 100644
index 00000000000..92c8b9a2bd5
--- /dev/null
+++ b/repo-tests/codeql/cpp/ql/src/Security/CWE/CWE-089/SqlTainted.ql
@@ -0,0 +1,54 @@
+/**
+ * @name Uncontrolled data in SQL query
+ * @description Including user-supplied data in a SQL query without
+ * neutralizing special elements can make code vulnerable
+ * to SQL Injection.
+ * @kind path-problem
+ * @problem.severity error
+ * @security-severity 8.8
+ * @precision high
+ * @id cpp/sql-injection
+ * @tags security
+ * external/cwe/cwe-089
+ */
+
+import cpp
+import semmle.code.cpp.security.Security
+import semmle.code.cpp.security.FunctionWithWrappers
+import semmle.code.cpp.security.TaintTracking
+import TaintedWithPath
+
+class SQLLikeFunction extends FunctionWithWrappers {
+ SQLLikeFunction() { sqlArgument(this.getName(), _) }
+
+ override predicate interestingArg(int arg) { sqlArgument(this.getName(), arg) }
+}
+
+class Configuration extends TaintTrackingConfiguration {
+ override predicate isSink(Element tainted) {
+ exists(SQLLikeFunction runSql | runSql.outermostWrapperFunctionCall(tainted, _))
+ }
+
+ override predicate isBarrier(Expr e) {
+ super.isBarrier(e)
+ or
+ e.getUnspecifiedType() instanceof IntegralType
+ or
+ exists(SqlBarrierFunction sql, int arg, FunctionInput input |
+ e = sql.getACallToThisFunction().getArgument(arg) and
+ input.isParameterDeref(arg) and
+ sql.barrierSqlArgument(input, _)
+ )
+ }
+}
+
+from
+ SQLLikeFunction runSql, Expr taintedArg, Expr taintSource, PathNode sourceNode, PathNode sinkNode,
+ string taintCause, string callChain
+where
+ runSql.outermostWrapperFunctionCall(taintedArg, callChain) and
+ taintedWithPath(taintSource, taintedArg, sourceNode, sinkNode) and
+ isUserInput(taintSource, taintCause)
+select taintedArg, sourceNode, sinkNode,
+ "This argument to a SQL query function is derived from $@ and then passed to " + callChain,
+ taintSource, "user input (" + taintCause + ")"
diff --git a/repo-tests/codeql/cpp/ql/src/Security/CWE/CWE-114/UncontrolledProcessOperation.ql b/repo-tests/codeql/cpp/ql/src/Security/CWE/CWE-114/UncontrolledProcessOperation.ql
new file mode 100644
index 00000000000..e75f62b0eb7
--- /dev/null
+++ b/repo-tests/codeql/cpp/ql/src/Security/CWE/CWE-114/UncontrolledProcessOperation.ql
@@ -0,0 +1,38 @@
+/**
+ * @name Uncontrolled process operation
+ * @description Using externally controlled strings in a process
+ * operation can allow an attacker to execute malicious
+ * commands.
+ * @kind path-problem
+ * @problem.severity warning
+ * @security-severity 8.2
+ * @precision medium
+ * @id cpp/uncontrolled-process-operation
+ * @tags security
+ * external/cwe/cwe-114
+ */
+
+import cpp
+import semmle.code.cpp.security.Security
+import semmle.code.cpp.security.TaintTracking
+import TaintedWithPath
+
+predicate isProcessOperationExplanation(Expr arg, string processOperation) {
+ exists(int processOperationArg, FunctionCall call |
+ isProcessOperationArgument(processOperation, processOperationArg) and
+ call.getTarget().getName() = processOperation and
+ call.getArgument(processOperationArg) = arg
+ )
+}
+
+class Configuration extends TaintTrackingConfiguration {
+ override predicate isSink(Element arg) { isProcessOperationExplanation(arg, _) }
+}
+
+from string processOperation, Expr arg, Expr source, PathNode sourceNode, PathNode sinkNode
+where
+ isProcessOperationExplanation(arg, processOperation) and
+ taintedWithPath(source, arg, sourceNode, sinkNode)
+select arg, sourceNode, sinkNode,
+ "The value of this argument may come from $@ and is being passed to " + processOperation, source,
+ source.toString()
diff --git a/repo-tests/codeql/cpp/ql/src/Security/CWE/CWE-119/OverflowBuffer.ql b/repo-tests/codeql/cpp/ql/src/Security/CWE/CWE-119/OverflowBuffer.ql
new file mode 100644
index 00000000000..1c903081baf
--- /dev/null
+++ b/repo-tests/codeql/cpp/ql/src/Security/CWE/CWE-119/OverflowBuffer.ql
@@ -0,0 +1,58 @@
+/**
+ * @name Call to memory access function may overflow buffer
+ * @description Incorrect use of a function that accesses a memory
+ * buffer may read or write data past the end of that
+ * buffer.
+ * @kind problem
+ * @id cpp/overflow-buffer
+ * @problem.severity recommendation
+ * @security-severity 9.3
+ * @tags security
+ * external/cwe/cwe-119
+ * external/cwe/cwe-121
+ * external/cwe/cwe-122
+ * external/cwe/cwe-126
+ */
+
+import semmle.code.cpp.security.BufferWrite
+import semmle.code.cpp.security.BufferAccess
+
+bindingset[num, singular, plural]
+string plural(int num, string singular, string plural) {
+ if num = 1 then result = num + singular else result = num + plural
+}
+
+from
+ BufferAccess ba, string bufferDesc, int accessSize, int accessType, Element bufferAlloc,
+ int bufferSize, string message
+where
+ accessSize = ba.getSize() and
+ bufferSize = getBufferSize(ba.getBuffer(bufferDesc, accessType), bufferAlloc) and
+ (
+ accessSize > bufferSize
+ or
+ accessSize <= 0 and accessType = 3
+ ) and
+ if accessType = 1
+ then
+ message =
+ "This '" + ba.getName() + "' operation accesses " + plural(accessSize, " byte", " bytes") +
+ " but the $@ is only " + plural(bufferSize, " byte", " bytes") + "."
+ else
+ if accessType = 2
+ then
+ message =
+ "This '" + ba.getName() + "' operation may access " + plural(accessSize, " byte", " bytes") +
+ " but the $@ is only " + plural(bufferSize, " byte", " bytes") + "."
+ else (
+ if accessSize > 0
+ then
+ message =
+ "This array indexing operation accesses byte offset " + (accessSize - 1) +
+ " but the $@ is only " + plural(bufferSize, " byte", " bytes") + "."
+ else
+ message =
+ "This array indexing operation accesses a negative index " +
+ ((accessSize / ba.getActualType().getSize()) - 1) + " on the $@."
+ )
+select ba, message, bufferAlloc, bufferDesc
diff --git a/repo-tests/codeql/cpp/ql/src/Security/CWE/CWE-120/BadlyBoundedWrite.ql b/repo-tests/codeql/cpp/ql/src/Security/CWE/CWE-120/BadlyBoundedWrite.ql
new file mode 100644
index 00000000000..247606c683d
--- /dev/null
+++ b/repo-tests/codeql/cpp/ql/src/Security/CWE/CWE-120/BadlyBoundedWrite.ql
@@ -0,0 +1,31 @@
+/**
+ * @name Badly bounded write
+ * @description Buffer write operations with a length parameter that
+ * does not match the size of the destination buffer may
+ * overflow.
+ * @kind problem
+ * @problem.severity error
+ * @security-severity 9.3
+ * @precision high
+ * @id cpp/badly-bounded-write
+ * @tags reliability
+ * security
+ * external/cwe/cwe-120
+ * external/cwe/cwe-787
+ * external/cwe/cwe-805
+ */
+
+import semmle.code.cpp.security.BufferWrite
+
+/*
+ * See CWE-120/UnboundedWrite.ql for a summary of CWE-120 alert cases.
+ */
+
+from BufferWrite bw, int destSize
+where
+ bw.hasExplicitLimit() and // has an explicit size limit
+ destSize = getBufferSize(bw.getDest(), _) and
+ bw.getExplicitLimit() > destSize // but it's larger than the destination
+select bw,
+ "This '" + bw.getBWDesc() + "' operation is limited to " + bw.getExplicitLimit() +
+ " bytes but the destination is only " + destSize + " bytes."
diff --git a/repo-tests/codeql/cpp/ql/src/Security/CWE/CWE-120/OverrunWrite.ql b/repo-tests/codeql/cpp/ql/src/Security/CWE/CWE-120/OverrunWrite.ql
new file mode 100644
index 00000000000..ac4144d1c6f
--- /dev/null
+++ b/repo-tests/codeql/cpp/ql/src/Security/CWE/CWE-120/OverrunWrite.ql
@@ -0,0 +1,34 @@
+/**
+ * @name Potentially overrunning write
+ * @description Buffer write operations that do not control the length
+ * of data written may overflow.
+ * @kind problem
+ * @problem.severity error
+ * @security-severity 9.3
+ * @precision medium
+ * @id cpp/overrunning-write
+ * @tags reliability
+ * security
+ * external/cwe/cwe-120
+ * external/cwe/cwe-787
+ * external/cwe/cwe-805
+ */
+
+import semmle.code.cpp.security.BufferWrite
+import semmle.code.cpp.commons.Alloc
+
+/*
+ * See CWE-120/UnboundedWrite.ql for a summary of CWE-120 alert cases.
+ */
+
+from BufferWrite bw, Expr dest, int destSize
+where
+ not bw.hasExplicitLimit() and // has no explicit size limit
+ dest = bw.getDest() and
+ destSize = getBufferSize(dest, _) and
+ // we can deduce that too much data may be copied (even without
+ // long '%f' conversions)
+ bw.getMaxDataLimited() > destSize
+select bw,
+ "This '" + bw.getBWDesc() + "' operation requires " + bw.getMaxData() +
+ " bytes but the destination is only " + destSize + " bytes."
diff --git a/repo-tests/codeql/cpp/ql/src/Security/CWE/CWE-120/OverrunWriteFloat.ql b/repo-tests/codeql/cpp/ql/src/Security/CWE/CWE-120/OverrunWriteFloat.ql
new file mode 100644
index 00000000000..27adab9b06c
--- /dev/null
+++ b/repo-tests/codeql/cpp/ql/src/Security/CWE/CWE-120/OverrunWriteFloat.ql
@@ -0,0 +1,34 @@
+/**
+ * @name Potentially overrunning write with float to string conversion
+ * @description Buffer write operations that do not control the length
+ * of data written may overflow when floating point inputs
+ * take extreme values.
+ * @kind problem
+ * @problem.severity error
+ * @security-severity 9.3
+ * @precision medium
+ * @id cpp/overrunning-write-with-float
+ * @tags reliability
+ * security
+ * external/cwe/cwe-120
+ * external/cwe/cwe-787
+ * external/cwe/cwe-805
+ */
+
+import semmle.code.cpp.security.BufferWrite
+
+/*
+ * See CWE-120/UnboundedWrite.ql for a summary of CWE-120 alert cases.
+ */
+
+from BufferWrite bw, int destSize
+where
+ not bw.hasExplicitLimit() and
+ // has no explicit size limit
+ destSize = getBufferSize(bw.getDest(), _) and
+ bw.getMaxData() > destSize and
+ // and we can deduce that too much data may be copied
+ bw.getMaxDataLimited() <= destSize // but it would fit without long '%f' conversions
+select bw,
+ "This '" + bw.getBWDesc() + "' operation may require " + bw.getMaxData() +
+ " bytes because of float conversions, but the target is only " + destSize + " bytes."
diff --git a/repo-tests/codeql/cpp/ql/src/Security/CWE/CWE-120/UnboundedWrite.ql b/repo-tests/codeql/cpp/ql/src/Security/CWE/CWE-120/UnboundedWrite.ql
new file mode 100644
index 00000000000..b9922da9c75
--- /dev/null
+++ b/repo-tests/codeql/cpp/ql/src/Security/CWE/CWE-120/UnboundedWrite.ql
@@ -0,0 +1,103 @@
+/**
+ * @name Unbounded write
+ * @description Buffer write operations that do not control the length
+ * of data written may overflow.
+ * @kind path-problem
+ * @problem.severity error
+ * @security-severity 9.3
+ * @precision medium
+ * @id cpp/unbounded-write
+ * @tags reliability
+ * security
+ * external/cwe/cwe-120
+ * external/cwe/cwe-787
+ * external/cwe/cwe-805
+ */
+
+import semmle.code.cpp.security.BufferWrite
+import semmle.code.cpp.security.Security
+import semmle.code.cpp.security.TaintTracking
+import TaintedWithPath
+
+/*
+ * --- Summary of CWE-120 alerts ---
+ *
+ * The essence of CWE-120 is that string / buffer copies that are
+ * potentially unbounded, e.g. null terminated string copy,
+ * should be controlled e.g. by using strncpy instead of strcpy.
+ * In practice this is divided into several queries that
+ * handle slightly different sub-cases, exclude some acceptable uses,
+ * and produce reasonable messages to fit each issue.
+ *
+ * cases:
+ * hasExplicitLimit() exists(getMaxData()) exists(getBufferSize(bw.getDest(), _))) handled by
+ * NO NO either UnboundedWrite.ql isUnboundedWrite()
+ * NO YES NO UnboundedWrite.ql isMaybeUnboundedWrite()
+ * NO YES YES OverrunWrite.ql, OverrunWriteFloat.ql
+ * YES either YES BadlyBoundedWrite.ql
+ * YES either NO (assumed OK)
+ */
+
+/*
+ * --- CWE-120/UnboundedWrite ---
+ */
+
+predicate isUnboundedWrite(BufferWrite bw) {
+ not bw.hasExplicitLimit() and // has no explicit size limit
+ not exists(bw.getMaxData()) // and we can't deduce an upper bound to the amount copied
+}
+
+/*
+ * predicate isMaybeUnboundedWrite(BufferWrite bw)
+ * {
+ * not bw.hasExplicitLimit() // has no explicit size limit
+ * and exists(bw.getMaxData()) // and we can deduce an upper bound to the amount copied
+ * and (not exists(getBufferSize(bw.getDest(), _))) // but we can't work out the size of the destination to be sure
+ * }
+ */
+
+/**
+ * Holds if `e` is a source buffer going into an unbounded write `bw` or a
+ * qualifier of (a qualifier of ...) such a source.
+ */
+predicate unboundedWriteSource(Expr e, BufferWrite bw) {
+ isUnboundedWrite(bw) and e = bw.getASource()
+ or
+ exists(FieldAccess fa | unboundedWriteSource(fa, bw) and e = fa.getQualifier())
+}
+
+/*
+ * --- user input reach ---
+ */
+
+class Configuration extends TaintTrackingConfiguration {
+ override predicate isSink(Element tainted) { unboundedWriteSource(tainted, _) }
+
+ override predicate taintThroughGlobals() { any() }
+}
+
+/*
+ * --- put it together ---
+ */
+
+/*
+ * An unbounded write is, for example `strcpy(..., tainted)`. We're looking
+ * for a tainted source buffer of an unbounded write, where this source buffer
+ * is a sink in the taint-tracking analysis.
+ *
+ * In the case of `gets` and `scanf`, where the source buffer is implicit, the
+ * `BufferWrite` library reports the source buffer to be the same as the
+ * destination buffer. Since those destination-buffer arguments are also
+ * modeled in the taint-tracking library as being _sources_ of taint, they are
+ * in practice reported as being tainted because the `security.TaintTracking`
+ * library does not distinguish between taint going into an argument and out of
+ * an argument. Thus, we get the desired alerts.
+ */
+
+from BufferWrite bw, Expr inputSource, Expr tainted, PathNode sourceNode, PathNode sinkNode
+where
+ taintedWithPath(inputSource, tainted, sourceNode, sinkNode) and
+ unboundedWriteSource(tainted, bw)
+select bw, sourceNode, sinkNode,
+ "This '" + bw.getBWDesc() + "' with input from $@ may overflow the destination.", inputSource,
+ inputSource.toString()
diff --git a/repo-tests/codeql/cpp/ql/src/Security/CWE/CWE-121/UnterminatedVarargsCall.ql b/repo-tests/codeql/cpp/ql/src/Security/CWE/CWE-121/UnterminatedVarargsCall.ql
new file mode 100644
index 00000000000..842798102bd
--- /dev/null
+++ b/repo-tests/codeql/cpp/ql/src/Security/CWE/CWE-121/UnterminatedVarargsCall.ql
@@ -0,0 +1,84 @@
+/**
+ * @name Unterminated variadic call
+ * @description Calling a variadic function without a sentinel value
+ * may result in a buffer overflow if the function expects
+ * a specific value to terminate the argument list.
+ * @kind problem
+ * @problem.severity warning
+ * @security-severity 8.8
+ * @precision medium
+ * @id cpp/unterminated-variadic-call
+ * @tags reliability
+ * security
+ * external/cwe/cwe-121
+ */
+
+import cpp
+
+/**
+ * Gets a normalized textual representation of `e`'s value.
+ * The result is the same as `Expr.getValue()`, except if there is a
+ * trailing `".0"` then it is removed. This means that, for example,
+ * the values of `-1` and `-1.0` would be considered the same.
+ */
+string normalisedExprValue(Expr e) { result = e.getValue().regexpReplaceAll("\\.0$", "") }
+
+/**
+ * A variadic function which is not a formatting function.
+ */
+class VarargsFunction extends Function {
+ VarargsFunction() {
+ this.isVarargs() and
+ not this instanceof FormattingFunction
+ }
+
+ Expr trailingArgumentIn(FunctionCall fc) {
+ fc = this.getACallToThisFunction() and
+ result = fc.getArgument(fc.getNumberOfArguments() - 1)
+ }
+
+ string trailingArgValue(FunctionCall fc) {
+ result = normalisedExprValue(this.trailingArgumentIn(fc))
+ }
+
+ private int trailingArgValueCount(string value) {
+ result = strictcount(FunctionCall fc | trailingArgValue(fc) = value)
+ }
+
+ string nonTrailingVarArgValue(FunctionCall fc, int index) {
+ fc = this.getACallToThisFunction() and
+ index >= this.getNumberOfParameters() and
+ index < fc.getNumberOfArguments() - 1 and
+ result = normalisedExprValue(fc.getArgument(index))
+ }
+
+ private int totalCount() {
+ result = strictcount(FunctionCall fc | fc = this.getACallToThisFunction())
+ }
+
+ string normalTerminator(int cnt) {
+ result = ["0", "-1"] and
+ cnt = trailingArgValueCount(result) and
+ 2 * cnt > totalCount() and
+ not exists(FunctionCall fc, int index |
+ // terminator value is used in a non-terminating position
+ nonTrailingVarArgValue(fc, index) = result
+ )
+ }
+
+ predicate isWhitelisted() {
+ this.hasGlobalName("open") or
+ this.hasGlobalName("fcntl") or
+ this.hasGlobalName("ptrace")
+ }
+}
+
+from VarargsFunction f, FunctionCall fc, string terminator, int cnt
+where
+ terminator = f.normalTerminator(cnt) and
+ fc = f.getACallToThisFunction() and
+ not normalisedExprValue(f.trailingArgumentIn(fc)) = terminator and
+ not f.isWhitelisted()
+select fc,
+ "Calls to $@ should use the value " + terminator + " as a terminator (" + cnt + " calls do).", f,
+ f.getQualifiedName()
diff --git a/repo-tests/codeql/cpp/ql/src/Security/CWE/CWE-129/ImproperArrayIndexValidation.ql b/repo-tests/codeql/cpp/ql/src/Security/CWE/CWE-129/ImproperArrayIndexValidation.ql
new file mode 100644
index 00000000000..0621def4d98
--- /dev/null
+++ b/repo-tests/codeql/cpp/ql/src/Security/CWE/CWE-129/ImproperArrayIndexValidation.ql
@@ -0,0 +1,42 @@
+/**
+ * @name Unclear validation of array index
+ * @description Accessing an array without first checking
+ * that the index is within the bounds of the array can
+ * cause undefined behavior and can also be a security risk.
+ * @kind problem
+ * @id cpp/unclear-array-index-validation
+ * @problem.severity warning
+ * @security-severity 8.8
+ * @tags security
+ * external/cwe/cwe-129
+ */
+
+import cpp
+import semmle.code.cpp.controlflow.Guards
+private import semmle.code.cpp.rangeanalysis.RangeAnalysisUtils
+import semmle.code.cpp.security.TaintTracking
+
+predicate hasUpperBound(VariableAccess offsetExpr) {
+ exists(BasicBlock controlled, StackVariable offsetVar, SsaDefinition def |
+ controlled.contains(offsetExpr) and
+ linearBoundControls(controlled, def, offsetVar) and
+ offsetExpr = def.getAUse(offsetVar)
+ )
+}
+
+pragma[noinline]
+predicate linearBoundControls(BasicBlock controlled, SsaDefinition def, StackVariable offsetVar) {
+ exists(GuardCondition guard, boolean branch |
+ guard.controls(controlled, branch) and
+ cmpWithLinearBound(guard, def.getAUse(offsetVar), Lesser(), branch)
+ )
+}
+
+from Expr origin, ArrayExpr arrayExpr, VariableAccess offsetExpr
+where
+ tainted(origin, offsetExpr) and
+ offsetExpr = arrayExpr.getArrayOffset() and
+ not hasUpperBound(offsetExpr)
+select offsetExpr,
+ "$@ flows to here and is used in an array indexing expression, potentially causing an invalid access.",
+ origin, "User-provided value"
diff --git a/repo-tests/codeql/cpp/ql/src/Security/CWE/CWE-131/NoSpaceForZeroTerminator.ql b/repo-tests/codeql/cpp/ql/src/Security/CWE/CWE-131/NoSpaceForZeroTerminator.ql
new file mode 100644
index 00000000000..1780c2a0199
--- /dev/null
+++ b/repo-tests/codeql/cpp/ql/src/Security/CWE/CWE-131/NoSpaceForZeroTerminator.ql
@@ -0,0 +1,52 @@
+/**
+ * @name No space for zero terminator
+ * @description Allocating a buffer using 'malloc' without ensuring that
+ * there is always space for the entire string and a zero
+ * terminator can cause a buffer overrun.
+ * @kind problem
+ * @problem.severity error
+ * @security-severity 9.8
+ * @precision high
+ * @id cpp/no-space-for-terminator
+ * @tags reliability
+ * security
+ * external/cwe/cwe-131
+ * external/cwe/cwe-120
+ * external/cwe/cwe-122
+ */
+
+import cpp
+import semmle.code.cpp.dataflow.DataFlow
+import semmle.code.cpp.models.interfaces.ArrayFunction
+import semmle.code.cpp.models.interfaces.Allocation
+
+predicate terminationProblem(AllocationExpr malloc, string msg) {
+ // malloc(strlen(...))
+ exists(StrlenCall strlen | DataFlow::localExprFlow(strlen, malloc.getSizeExpr())) and
+ // flows to a call that implies this is a null-terminated string
+ exists(ArrayFunction af, FunctionCall fc, int arg |
+ DataFlow::localExprFlow(malloc, fc.getArgument(arg)) and
+ fc.getTarget() = af and
+ (
+ // flows into null terminated string argument
+ af.hasArrayWithNullTerminator(arg)
+ or
+ // flows into likely null terminated string argument (such as `strcpy`, `strcat`)
+ af.hasArrayWithUnknownSize(arg)
+ or
+ // flows into string argument to a formatting function (such as `printf`)
+ exists(int n, FormatLiteral fl |
+ fc.getArgument(arg) = fc.(FormattingFunctionCall).getConversionArgument(n) and
+ fl = fc.(FormattingFunctionCall).getFormat() and
+ fl.getConversionType(n) instanceof PointerType and // `%s`, `%ws` etc
+ not fl.getConversionType(n) instanceof VoidPointerType and // exclude: `%p`
+ not fl.hasPrecision(n) // exclude: `%.*s`
+ )
+ )
+ ) and
+ msg = "This allocation does not include space to null-terminate the string."
+}
+
+from Expr problem, string msg
+where terminationProblem(problem, msg)
+select problem, msg
diff --git a/repo-tests/codeql/cpp/ql/src/Security/CWE/CWE-134/UncontrolledFormatString.ql b/repo-tests/codeql/cpp/ql/src/Security/CWE/CWE-134/UncontrolledFormatString.ql
new file mode 100644
index 00000000000..f24510bba05
--- /dev/null
+++ b/repo-tests/codeql/cpp/ql/src/Security/CWE/CWE-134/UncontrolledFormatString.ql
@@ -0,0 +1,37 @@
+/**
+ * @name Uncontrolled format string
+ * @description Using externally-controlled format strings in
+ * printf-style functions can lead to buffer overflows
+ * or data representation problems.
+ * @kind path-problem
+ * @problem.severity warning
+ * @security-severity 9.3
+ * @precision high
+ * @id cpp/tainted-format-string
+ * @tags reliability
+ * security
+ * external/cwe/cwe-134
+ */
+
+import cpp
+import semmle.code.cpp.security.Security
+import semmle.code.cpp.security.FunctionWithWrappers
+import semmle.code.cpp.security.TaintTracking
+import TaintedWithPath
+
+class Configuration extends TaintTrackingConfiguration {
+ override predicate isSink(Element tainted) {
+ exists(PrintfLikeFunction printf | printf.outermostWrapperFunctionCall(tainted, _))
+ }
+}
+
+from
+ PrintfLikeFunction printf, Expr arg, PathNode sourceNode, PathNode sinkNode,
+ string printfFunction, Expr userValue, string cause
+where
+ printf.outermostWrapperFunctionCall(arg, printfFunction) and
+ taintedWithPath(userValue, arg, sourceNode, sinkNode) and
+ isUserInput(userValue, cause)
+select arg, sourceNode, sinkNode,
+ "The value of this argument may come from $@ and is being used as a formatting argument to " +
+ printfFunction, userValue, cause
diff --git a/repo-tests/codeql/cpp/ql/src/Security/CWE/CWE-134/UncontrolledFormatStringThroughGlobalVar.ql b/repo-tests/codeql/cpp/ql/src/Security/CWE/CWE-134/UncontrolledFormatStringThroughGlobalVar.ql
new file mode 100644
index 00000000000..d2f5243d4a4
--- /dev/null
+++ b/repo-tests/codeql/cpp/ql/src/Security/CWE/CWE-134/UncontrolledFormatStringThroughGlobalVar.ql
@@ -0,0 +1,40 @@
+/**
+ * @name Uncontrolled format string (through global variable)
+ * @description Using externally-controlled format strings in
+ * printf-style functions can lead to buffer overflows
+ * or data representation problems.
+ * @kind path-problem
+ * @problem.severity warning
+ * @security-severity 9.3
+ * @precision high
+ * @id cpp/tainted-format-string-through-global
+ * @tags reliability
+ * security
+ * external/cwe/cwe-134
+ */
+
+import cpp
+import semmle.code.cpp.security.FunctionWithWrappers
+import semmle.code.cpp.security.Security
+import semmle.code.cpp.security.TaintTracking
+import TaintedWithPath
+
+class Configuration extends TaintTrackingConfiguration {
+ override predicate isSink(Element tainted) {
+ exists(PrintfLikeFunction printf | printf.outermostWrapperFunctionCall(tainted, _))
+ }
+
+ override predicate taintThroughGlobals() { any() }
+}
+
+from
+ PrintfLikeFunction printf, Expr arg, PathNode sourceNode, PathNode sinkNode,
+ string printfFunction, Expr userValue, string cause
+where
+ printf.outermostWrapperFunctionCall(arg, printfFunction) and
+ not taintedWithoutGlobals(arg) and
+ taintedWithPath(userValue, arg, sourceNode, sinkNode) and
+ isUserInput(userValue, cause)
+select arg, sourceNode, sinkNode,
+ "The value of this argument may come from $@ and is being used as a formatting argument to " +
+ printfFunction, userValue, cause
diff --git a/repo-tests/codeql/cpp/ql/src/Security/CWE/CWE-170/ImproperNullTerminationTainted.ql b/repo-tests/codeql/cpp/ql/src/Security/CWE/CWE-170/ImproperNullTerminationTainted.ql
new file mode 100644
index 00000000000..31ce1037b27
--- /dev/null
+++ b/repo-tests/codeql/cpp/ql/src/Security/CWE/CWE-170/ImproperNullTerminationTainted.ql
@@ -0,0 +1,94 @@
+/**
+ * @name User-controlled data may not be null terminated
+ * @description String operations on user-controlled strings can result in
+ * buffer overflow or buffer over-read.
+ * @kind problem
+ * @id cpp/user-controlled-null-termination-tainted
+ * @problem.severity warning
+ * @precision medium
+ * @security-severity 10.0
+ * @tags security
+ * external/cwe/cwe-170
+ */
+
+import cpp
+import semmle.code.cpp.commons.NullTermination
+import semmle.code.cpp.security.TaintTracking
+
+/** A user-controlled expression that may not be null terminated. */
+class TaintSource extends VariableAccess {
+ TaintSource() {
+ exists(SecurityOptions x, string cause |
+ this.getTarget() instanceof SemanticStackVariable and
+ x.isUserInput(this, cause)
+ |
+ cause = "read" or
+ cause = "fread" or
+ cause = "recv" or
+ cause = "recvfrom" or
+ cause = "recvmsg"
+ )
+ }
+
+ /**
+ * Holds if `sink` is a tainted variable access that must be null
+ * terminated.
+ */
+ private predicate isSink(VariableAccess sink) {
+ tainted(this, sink) and
+ variableMustBeNullTerminated(sink)
+ }
+
+ /**
+ * Holds if this source can reach `va`, possibly using intermediate
+ * reassignments.
+ */
+ private predicate sourceReaches(VariableAccess va) {
+ definitionUsePair(_, this, va)
+ or
+ exists(VariableAccess mid, Expr def |
+ sourceReaches(mid) and
+ exprDefinition(_, def, mid) and
+ definitionUsePair(_, def, va)
+ )
+ }
+
+ /**
+ * Holds if the sink `sink` is reachable both from this source and
+ * from `va`, possibly using intermediate reassignments.
+ */
+ private predicate reachesSink(VariableAccess va, VariableAccess sink) {
+ isSink(sink) and
+ va = sink
+ or
+ exists(VariableAccess mid, Expr def |
+ reachesSink(mid, sink) and
+ exprDefinition(_, def, va) and
+ definitionUsePair(_, def, mid)
+ )
+ }
+
+ /**
+ * Holds if `sink` is a tainted variable access that must be null
+ * terminated, and no access which null terminates its contents can
+ * either reach the sink or be reached from the source. (Ideally,
+ * we should instead look for such accesses only on the path from
+ * this source to `sink` found via `tainted(source, sink)`.)
+ */
+ predicate reaches(VariableAccess sink) {
+ isSink(sink) and
+ not exists(VariableAccess va |
+ va != this and
+ va != sink and
+ mayAddNullTerminator(_, va)
+ |
+ sourceReaches(va)
+ or
+ reachesSink(va, sink)
+ )
+ }
+}
+
+from TaintSource source, VariableAccess sink
+where source.reaches(sink)
+select sink, "$@ flows to here and may not be null terminated.", source, "User-provided value"
diff --git a/repo-tests/codeql/cpp/ql/src/Security/CWE/CWE-190/ArithmeticTainted.ql b/repo-tests/codeql/cpp/ql/src/Security/CWE/CWE-190/ArithmeticTainted.ql
new file mode 100644
index 00000000000..e00bd87c86a
--- /dev/null
+++ b/repo-tests/codeql/cpp/ql/src/Security/CWE/CWE-190/ArithmeticTainted.ql
@@ -0,0 +1,54 @@
+/**
+ * @name User-controlled data in arithmetic expression
+ * @description Arithmetic operations on user-controlled data that is
+ * not validated can cause overflows.
+ * @kind path-problem
+ * @problem.severity warning
+ * @security-severity 8.6
+ * @precision low
+ * @id cpp/tainted-arithmetic
+ * @tags security
+ * external/cwe/cwe-190
+ * external/cwe/cwe-191
+ */
+
+import cpp
+import semmle.code.cpp.security.Overflow
+import semmle.code.cpp.security.Security
+import semmle.code.cpp.security.TaintTracking
+import TaintedWithPath
+import Bounded
+
+bindingset[op]
+predicate missingGuard(Operation op, Expr e, string effect) {
+ missingGuardAgainstUnderflow(op, e) and effect = "underflow"
+ or
+ missingGuardAgainstOverflow(op, e) and effect = "overflow"
+ or
+ not e instanceof VariableAccess and effect = "overflow"
+}
+
+class Configuration extends TaintTrackingConfiguration {
+ override predicate isSink(Element e) {
+ exists(Operation op |
+ missingGuard(op, e, _) and
+ op.getAnOperand() = e
+ |
+ op instanceof UnaryArithmeticOperation or
+ op instanceof BinaryArithmeticOperation
+ )
+ }
+
+ override predicate isBarrier(Expr e) {
+ super.isBarrier(e) or bounded(e) or e.getUnspecifiedType().(IntegralType).getSize() <= 1
+ }
+}
+
+from Expr origin, Expr e, string effect, PathNode sourceNode, PathNode sinkNode, Operation op
+where
+ taintedWithPath(origin, e, sourceNode, sinkNode) and
+ op.getAnOperand() = e and
+ missingGuard(op, e, effect)
+select e, sourceNode, sinkNode,
+ "$@ flows to here and is used in arithmetic, potentially causing an " + effect + ".", origin,
+ "User-provided value"
diff --git a/repo-tests/codeql/cpp/ql/src/Security/CWE/CWE-190/ArithmeticUncontrolled.ql b/repo-tests/codeql/cpp/ql/src/Security/CWE/CWE-190/ArithmeticUncontrolled.ql
new file mode 100644
index 00000000000..e1efd2932df
--- /dev/null
+++ b/repo-tests/codeql/cpp/ql/src/Security/CWE/CWE-190/ArithmeticUncontrolled.ql
@@ -0,0 +1,136 @@
+/**
+ * @name Uncontrolled data in arithmetic expression
+ * @description Arithmetic operations on uncontrolled data that is not
+ * validated can cause overflows.
+ * @kind path-problem
+ * @problem.severity warning
+ * @security-severity 8.6
+ * @precision medium
+ * @id cpp/uncontrolled-arithmetic
+ * @tags security
+ * external/cwe/cwe-190
+ * external/cwe/cwe-191
+ */
+
+import cpp
+import semmle.code.cpp.security.Overflow
+import semmle.code.cpp.security.Security
+import semmle.code.cpp.security.FlowSources
+import semmle.code.cpp.ir.dataflow.TaintTracking
+import DataFlow::PathGraph
+import Bounded
+
+/**
+ * A function that outputs random data such as `std::rand`.
+ */
+abstract class RandomFunction extends Function {
+ /**
+ * Gets the `FunctionOutput` that describes how this function returns the random data.
+ */
+ FunctionOutput getFunctionOutput() { result.isReturnValue() }
+}
+
+/**
+ * The standard function `std::rand`.
+ */
+private class StdRand extends RandomFunction {
+ StdRand() {
+ this.hasGlobalOrStdOrBslName("rand") and
+ this.getNumberOfParameters() = 0
+ }
+}
+
+/**
+ * The Unix function `rand_r`.
+ */
+private class RandR extends RandomFunction {
+ RandR() {
+ this.hasGlobalName("rand_r") and
+ this.getNumberOfParameters() = 1
+ }
+}
+
+/**
+ * The Unix function `random`.
+ */
+private class Random extends RandomFunction {
+ Random() {
+ this.hasGlobalName("random") and
+ this.getNumberOfParameters() = 1
+ }
+}
+
+/**
+ * The Windows `rand_s` function.
+ */
+private class RandS extends RandomFunction {
+ RandS() {
+ this.hasGlobalName("rand_s") and
+ this.getNumberOfParameters() = 1
+ }
+
+ override FunctionOutput getFunctionOutput() { result.isParameterDeref(0) }
+}
+
+predicate missingGuard(VariableAccess va, string effect) {
+ exists(Operation op | op.getAnOperand() = va |
+ // underflow - random numbers are usually non-negative, so underflow is
+ // only likely if the type is unsigned. Multiplication is also unlikely to
+ // cause underflow of a non-negative number.
+ missingGuardAgainstUnderflow(op, va) and
+ effect = "underflow" and
+ op.getUnspecifiedType().(IntegralType).isUnsigned() and
+ not op instanceof MulExpr
+ or
+ // overflow
+ missingGuardAgainstOverflow(op, va) and effect = "overflow"
+ )
+}
+
+class UncontrolledArithConfiguration extends TaintTracking::Configuration {
+ UncontrolledArithConfiguration() { this = "UncontrolledArithConfiguration" }
+
+ override predicate isSource(DataFlow::Node source) {
+ exists(RandomFunction rand, Call call | call.getTarget() = rand |
+ rand.getFunctionOutput().isReturnValue() and
+ source.asExpr() = call
+ or
+ exists(int n |
+ source.asDefiningArgument() = call.getArgument(n) and
+ rand.getFunctionOutput().isParameterDeref(n)
+ )
+ )
+ }
+
+ override predicate isSink(DataFlow::Node sink) { missingGuard(sink.asExpr(), _) }
+
+ override predicate isSanitizer(DataFlow::Node node) {
+ bounded(node.asExpr())
+ or
+ // If this expression is part of bitwise 'and' or 'or' operation it's likely that the value is
+ // only used as a bit pattern.
+ node.asExpr() =
+ any(Operation op |
+ op instanceof BitwiseOrExpr or
+ op instanceof BitwiseAndExpr or
+ op instanceof ComplementExpr
+ ).getAnOperand*()
+ or
+ // block unintended flow to pointers
+ node.asExpr().getUnspecifiedType() instanceof PointerType
+ }
+}
+
+/** Gets the expression that corresponds to `node`, if any. */
+Expr getExpr(DataFlow::Node node) { result = [node.asExpr(), node.asDefiningArgument()] }
+
+from
+ UncontrolledArithConfiguration config, DataFlow::PathNode source, DataFlow::PathNode sink,
+ VariableAccess va, string effect
+where
+ config.hasFlowPath(source, sink) and
+ sink.getNode().asExpr() = va and
+ missingGuard(va, effect)
+select sink.getNode(), source, sink,
+ "$@ flows to here and is used in arithmetic, potentially causing an " + effect + ".",
+ getExpr(source.getNode()), "Uncontrolled value"
diff --git a/repo-tests/codeql/cpp/ql/src/Security/CWE/CWE-190/ArithmeticWithExtremeValues.ql b/repo-tests/codeql/cpp/ql/src/Security/CWE/CWE-190/ArithmeticWithExtremeValues.ql
new file mode 100644
index 00000000000..35668953acc
--- /dev/null
+++ b/repo-tests/codeql/cpp/ql/src/Security/CWE/CWE-190/ArithmeticWithExtremeValues.ql
@@ -0,0 +1,79 @@
+/**
+ * @name Use of extreme values in arithmetic expression
+ * @description If a variable is assigned the maximum or minimum value
+ * for that variable's type and is then used in an
+ * arithmetic expression, this may result in an overflow.
+ * @kind problem
+ * @id cpp/arithmetic-with-extreme-values
+ * @problem.severity warning
+ * @security-severity 8.6
+ * @precision low
+ * @tags security
+ * reliability
+ * external/cwe/cwe-190
+ * external/cwe/cwe-191
+ */
+
+import cpp
+import semmle.code.cpp.security.Overflow
+import semmle.code.cpp.security.Security
+import semmle.code.cpp.security.TaintTracking
+
+predicate isMaxValue(Expr mie) {
+ exists(MacroInvocation mi |
+ mi.getExpr() = mie and
+ (
+ mi.getMacroName() = "CHAR_MAX" or
+ mi.getMacroName() = "LLONG_MAX" or
+ mi.getMacroName() = "INT_MAX" or
+ mi.getMacroName() = "SHRT_MAX" or
+ mi.getMacroName() = "UINT_MAX"
+ )
+ )
+}
+
+predicate isMinValue(Expr mie) {
+ exists(MacroInvocation mi |
+ mi.getExpr() = mie and
+ (
+ mi.getMacroName() = "CHAR_MIN" or
+ mi.getMacroName() = "LLONG_MIN" or
+ mi.getMacroName() = "INT_MIN" or
+ mi.getMacroName() = "SHRT_MIN"
+ )
+ )
+}
+
+class SecurityOptionsArith extends SecurityOptions {
+ override predicate isUserInput(Expr expr, string cause) {
+ isMaxValue(expr) and cause = "max value"
+ or
+ isMinValue(expr) and cause = "min value"
+ }
+}
+
+predicate taintedVarAccess(Expr origin, VariableAccess va, string cause) {
+ isUserInput(origin, cause) and
+ tainted(origin, va)
+}
+
+predicate causeEffectCorrespond(string cause, string effect) {
+ cause = "max value" and
+ effect = "overflow"
+ or
+ cause = "min value" and
+ effect = "underflow"
+}
+
+from Expr origin, Operation op, VariableAccess va, string cause, string effect
+where
+ taintedVarAccess(origin, va, cause) and
+ op.getAnOperand() = va and
+ (
+ missingGuardAgainstUnderflow(op, va) and effect = "underflow"
+ or
+ missingGuardAgainstOverflow(op, va) and effect = "overflow"
+ ) and
+ causeEffectCorrespond(cause, effect)
+select va, "$@ flows to here and is used in arithmetic, potentially causing an " + effect + ".",
+ origin, "Extreme value"
diff --git a/repo-tests/codeql/cpp/ql/src/Security/CWE/CWE-190/Bounded.qll b/repo-tests/codeql/cpp/ql/src/Security/CWE/CWE-190/Bounded.qll
new file mode 100644
index 00000000000..b6b0d608d2a
--- /dev/null
+++ b/repo-tests/codeql/cpp/ql/src/Security/CWE/CWE-190/Bounded.qll
@@ -0,0 +1,55 @@
+/**
+ * This file provides the `bounded` predicate that is used in both `cpp/uncontrolled-arithmetic`
+ * and `cpp/tainted-arithmetic`.
+ */
+
+private import cpp
+private import semmle.code.cpp.rangeanalysis.SimpleRangeAnalysis
+private import semmle.code.cpp.rangeanalysis.RangeAnalysisUtils
+
+/**
+ * An operand `e` of a bitwise and expression `andExpr` (i.e., `andExpr` is either an `BitwiseAndExpr`
+ * or an `AssignAndExpr`) with operands `operand1` and `operand2` is the operand that is not `e` is upper
+ * bounded by some number that is less than the maximum integer allowed by the result type of `andExpr`.
+ */
+pragma[inline]
+private predicate boundedBitwiseAnd(Expr e, Expr andExpr, Expr operand1, Expr operand2) {
+ operand1 != operand2 and
+ e = operand1 and
+ upperBound(operand2.getFullyConverted()) < exprMaxVal(andExpr.getFullyConverted())
+}
+
+/**
+ * Holds if `e` is an arithmetic expression that cannot overflow, or if `e` is an operand of an
+ * operation that may greatly reduce the range of possible values.
+ */
+predicate bounded(Expr e) {
+ (
+ e instanceof UnaryArithmeticOperation or
+ e instanceof BinaryArithmeticOperation or
+ e instanceof AssignArithmeticOperation
+ ) and
+ not convertedExprMightOverflow(e)
+ or
+ // Optimitically assume that a remainder expression always yields a much smaller value.
+ e = any(RemExpr rem).getLeftOperand()
+ or
+ e = any(AssignRemExpr rem).getLValue()
+ or
+ exists(BitwiseAndExpr andExpr |
+ boundedBitwiseAnd(e, andExpr, andExpr.getAnOperand(), andExpr.getAnOperand())
+ )
+ or
+ exists(AssignAndExpr andExpr |
+ boundedBitwiseAnd(e, andExpr, andExpr.getAnOperand(), andExpr.getAnOperand())
+ )
+ or
+ // Optimitically assume that a division always yields a much smaller value.
+ e = any(DivExpr div).getLeftOperand()
+ or
+ e = any(AssignDivExpr div).getLValue()
+ or
+ e = any(RShiftExpr shift).getLeftOperand()
+ or
+ e = any(AssignRShiftExpr div).getLValue()
+}
diff --git a/repo-tests/codeql/cpp/ql/src/Security/CWE/CWE-190/ComparisonWithWiderType.ql b/repo-tests/codeql/cpp/ql/src/Security/CWE/CWE-190/ComparisonWithWiderType.ql
new file mode 100644
index 00000000000..6636d100746
--- /dev/null
+++ b/repo-tests/codeql/cpp/ql/src/Security/CWE/CWE-190/ComparisonWithWiderType.ql
@@ -0,0 +1,73 @@
+/**
+ * @name Comparison of narrow type with wide type in loop condition
+ * @description Comparisons between types of different widths in a loop
+ * condition can cause the loop to behave unexpectedly.
+ * @id cpp/comparison-with-wider-type
+ * @kind problem
+ * @problem.severity warning
+ * @security-severity 7.8
+ * @precision high
+ * @tags reliability
+ * security
+ * external/cwe/cwe-190
+ * external/cwe/cwe-197
+ * external/cwe/cwe-835
+ */
+
+import cpp
+import semmle.code.cpp.controlflow.Dominance
+import semmle.code.cpp.controlflow.SSA
+import semmle.code.cpp.rangeanalysis.SimpleRangeAnalysis
+
+/**
+ * C++ references are all pointer width, but the comparison takes place with
+ * the pointed-to value
+ */
+int getComparisonSize(Expr e) {
+ if e.getType() instanceof ReferenceType
+ then result = e.getType().(ReferenceType).getBaseType().getSize()
+ else result = e.getType().getSize()
+}
+
+predicate loopVariant(VariableAccess e, Loop loop) {
+ exists(SsaDefinition d | d.getAUse(e.getTarget()) = e |
+ d.getAnUltimateDefiningValue(e.getTarget()) = loop.getCondition().getAChild*() or
+ d.getAnUltimateDefiningValue(e.getTarget()).getEnclosingStmt().getParent*() = loop.getStmt() or
+ d.getAnUltimateDefiningValue(e.getTarget()) = loop.(ForStmt).getUpdate().getAChild*()
+ )
+}
+
+Element friendlyLoc(Expr e) {
+ result = e.(Access).getTarget()
+ or
+ result = e.(Call).getTarget()
+ or
+ not e instanceof Access and not e instanceof Call and result = e
+}
+
+from Loop l, RelationalOperation rel, VariableAccess small, Expr large
+where
+ small = rel.getLesserOperand() and
+ large = rel.getGreaterOperand() and
+ rel = l.getCondition().getAChild*() and
+ forall(Expr conv | conv = large.getConversion*() |
+ upperBound(conv).log2() > getComparisonSize(small) * 8
+ ) and
+ // Ignore cases where the smaller type is int or larger
+ // These are still bugs, but you should need a very large string or array to
+ // trigger them. We will want to disable this for some applications, but it's
+ // very noisy on codebases that started as 32-bit
+ small.getExplicitlyConverted().getType().getSize() < 4 and
+ // Ignore cases where integer promotion has occurred on /, -, or >> expressions.
+ not getComparisonSize(large.(DivExpr).getLeftOperand().getExplicitlyConverted()) <=
+ getComparisonSize(small) and
+ not getComparisonSize(large.(SubExpr).getLeftOperand().getExplicitlyConverted()) <=
+ getComparisonSize(small) and
+ not getComparisonSize(large.(RShiftExpr).getLeftOperand().getExplicitlyConverted()) <=
+ getComparisonSize(small) and
+ // ignore loop-invariant smaller variables
+ loopVariant(small, l)
+select rel,
+ "Comparison between $@ of type " + small.getType().getName() + " and $@ of wider type " +
+ large.getType().getName() + ".", friendlyLoc(small), small.toString(), friendlyLoc(large),
+ large.toString()
diff --git a/repo-tests/codeql/cpp/ql/src/Security/CWE/CWE-190/IntegerOverflowTainted.ql b/repo-tests/codeql/cpp/ql/src/Security/CWE/CWE-190/IntegerOverflowTainted.ql
new file mode 100644
index 00000000000..bc0dff58244
--- /dev/null
+++ b/repo-tests/codeql/cpp/ql/src/Security/CWE/CWE-190/IntegerOverflowTainted.ql
@@ -0,0 +1,40 @@
+/**
+ * @name Potential integer arithmetic overflow
+ * @description A user-controlled integer arithmetic expression
+ * that is not validated can cause overflows.
+ * @kind problem
+ * @id cpp/integer-overflow-tainted
+ * @problem.severity warning
+ * @security-severity 8.1
+ * @precision low
+ * @tags security
+ * external/cwe/cwe-190
+ * external/cwe/cwe-197
+ * external/cwe/cwe-681
+ */
+
+import cpp
+import semmle.code.cpp.rangeanalysis.SimpleRangeAnalysis
+import semmle.code.cpp.security.TaintTracking
+
+/** Holds if `expr` might overflow. */
+predicate outOfBoundsExpr(Expr expr, string kind) {
+ if convertedExprMightOverflowPositively(expr)
+ then kind = "overflow"
+ else
+ if convertedExprMightOverflowNegatively(expr)
+ then kind = "overflow negatively"
+ else none()
+}
+
+from Expr use, Expr origin, string kind
+where
+ not use.getUnspecifiedType() instanceof PointerType and
+ outOfBoundsExpr(use, kind) and
+ tainted(origin, use) and
+ origin != use and
+ not inSystemMacroExpansion(use) and
+ // Avoid double-counting: don't include all the conversions of `use`.
+ not use instanceof Conversion
+select use, "$@ flows to here and is used in an expression which might " + kind + ".", origin,
+ "User-provided value"
diff --git a/repo-tests/codeql/cpp/ql/src/Security/CWE/CWE-190/TaintedAllocationSize.ql b/repo-tests/codeql/cpp/ql/src/Security/CWE/CWE-190/TaintedAllocationSize.ql
new file mode 100644
index 00000000000..585875798cc
--- /dev/null
+++ b/repo-tests/codeql/cpp/ql/src/Security/CWE/CWE-190/TaintedAllocationSize.ql
@@ -0,0 +1,69 @@
+/**
+ * @name Overflow in uncontrolled allocation size
+ * @description Allocating memory with a size controlled by an external
+ * user can result in integer overflow.
+ * @kind path-problem
+ * @problem.severity error
+ * @security-severity 8.1
+ * @precision medium
+ * @id cpp/uncontrolled-allocation-size
+ * @tags reliability
+ * security
+ * external/cwe/cwe-190
+ * external/cwe/cwe-789
+ */
+
+import cpp
+import semmle.code.cpp.rangeanalysis.SimpleRangeAnalysis
+import semmle.code.cpp.security.TaintTracking
+import TaintedWithPath
+
+/**
+ * Holds if `alloc` is an allocation, and `tainted` is a child of it that is a
+ * taint sink.
+ */
+predicate allocSink(Expr alloc, Expr tainted) {
+ isAllocationExpr(alloc) and
+ tainted = alloc.getAChild() and
+ tainted.getUnspecifiedType() instanceof IntegralType
+}
+
+class TaintedAllocationSizeConfiguration extends TaintTrackingConfiguration {
+ override predicate isSink(Element tainted) { allocSink(_, tainted) }
+
+ override predicate isBarrier(Expr e) {
+ super.isBarrier(e)
+ or
+ // There can be two separate reasons for `convertedExprMightOverflow` not holding:
+ // 1. `e` really cannot overflow.
+ // 2. `e` isn't analyzable.
+ // If we didn't rule out case 2 we would place barriers on anything that isn't analyzable.
+ (
+ e instanceof UnaryArithmeticOperation or
+ e instanceof BinaryArithmeticOperation or
+ e instanceof AssignArithmeticOperation
+ ) and
+ not convertedExprMightOverflow(e)
+ or
+ // Subtracting two pointers is either well-defined (and the result will likely be small), or
+ // terribly undefined and dangerous. Here, we assume that the programmer has ensured that the
+ // result is well-defined (i.e., the two pointers point to the same object), and thus the result
+ // will likely be small.
+ e = any(PointerDiffExpr diff).getAnOperand()
+ }
+}
+
+predicate taintedAllocSize(
+ Expr source, Expr alloc, PathNode sourceNode, PathNode sinkNode, string taintCause
+) {
+ isUserInput(source, taintCause) and
+ exists(Expr tainted |
+ allocSink(alloc, tainted) and
+ taintedWithPath(source, tainted, sourceNode, sinkNode)
+ )
+}
+
+from Expr source, Expr alloc, PathNode sourceNode, PathNode sinkNode, string taintCause
+where taintedAllocSize(source, alloc, sourceNode, sinkNode, taintCause)
+select alloc, sourceNode, sinkNode, "This allocation size is derived from $@ and might overflow",
+ source, "user input (" + taintCause + ")"
diff --git a/repo-tests/codeql/cpp/ql/src/Security/CWE/CWE-191/UnsignedDifferenceExpressionComparedZero.ql b/repo-tests/codeql/cpp/ql/src/Security/CWE/CWE-191/UnsignedDifferenceExpressionComparedZero.ql
new file mode 100644
index 00000000000..5be71472c92
--- /dev/null
+++ b/repo-tests/codeql/cpp/ql/src/Security/CWE/CWE-191/UnsignedDifferenceExpressionComparedZero.ql
@@ -0,0 +1,82 @@
+/**
+ * @name Unsigned difference expression compared to zero
+ * @description A subtraction with an unsigned result can never be negative. Using such an expression in a relational comparison with `0` is likely to be wrong.
+ * @kind problem
+ * @id cpp/unsigned-difference-expression-compared-zero
+ * @problem.severity warning
+ * @security-severity 9.8
+ * @precision medium
+ * @tags security
+ * correctness
+ * external/cwe/cwe-191
+ */
+
+import cpp
+import semmle.code.cpp.commons.Exclusions
+import semmle.code.cpp.rangeanalysis.SimpleRangeAnalysis
+import semmle.code.cpp.rangeanalysis.RangeAnalysisUtils
+import semmle.code.cpp.controlflow.Guards
+import semmle.code.cpp.dataflow.DataFlow
+
+/**
+ * Holds if `sub` is guarded by a condition which ensures that
+ * `left >= right`.
+ */
+pragma[noinline]
+predicate isGuarded(SubExpr sub, Expr left, Expr right) {
+ exists(GuardCondition guard, int k |
+ guard.controls(sub.getBasicBlock(), _) and
+ guard.ensuresLt(left, right, k, sub.getBasicBlock(), false) and
+ k >= 0
+ )
+}
+
+/**
+ * Holds if `n` is known or suspected to be less than or equal to
+ * `sub.getLeftOperand()`.
+ */
+predicate exprIsSubLeftOrLess(SubExpr sub, DataFlow::Node n) {
+ n.asExpr() = sub.getLeftOperand()
+ or
+ exists(DataFlow::Node other |
+ // dataflow
+ exprIsSubLeftOrLess(sub, other) and
+ (
+ DataFlow::localFlowStep(n, other) or
+ DataFlow::localFlowStep(other, n)
+ )
+ )
+ or
+ exists(DataFlow::Node other |
+ // guard constraining `sub`
+ exprIsSubLeftOrLess(sub, other) and
+ isGuarded(sub, other.asExpr(), n.asExpr()) // other >= n
+ )
+ or
+ exists(DataFlow::Node other, float p, float q |
+ // linear access of `other`
+ exprIsSubLeftOrLess(sub, other) and
+ linearAccess(n.asExpr(), other.asExpr(), p, q) and // n = p * other + q
+ p <= 1 and
+ q <= 0
+ )
+ or
+ exists(DataFlow::Node other, float p, float q |
+ // linear access of `n`
+ exprIsSubLeftOrLess(sub, other) and
+ linearAccess(other.asExpr(), n.asExpr(), p, q) and // other = p * n + q
+ p >= 1 and
+ q >= 0
+ )
+}
+
+from RelationalOperation ro, SubExpr sub
+where
+ not isFromMacroDefinition(ro) and
+ not isFromMacroDefinition(sub) and
+ ro.getLesserOperand().getValue().toInt() = 0 and
+ ro.getGreaterOperand() = sub and
+ sub.getFullyConverted().getUnspecifiedType().(IntegralType).isUnsigned() and
+ exprMightOverflowNegatively(sub.getFullyConverted()) and // generally catches false positives involving constants
+ not exprIsSubLeftOrLess(sub, DataFlow::exprNode(sub.getRightOperand())) // generally catches false positives where there's a relation between the left and right operands
+select ro, "Unsigned subtraction can never be negative."
diff --git a/repo-tests/codeql/cpp/ql/src/Security/CWE/CWE-253/HResultBooleanConversion.ql b/repo-tests/codeql/cpp/ql/src/Security/CWE/CWE-253/HResultBooleanConversion.ql
new file mode 100644
index 00000000000..67ba5b0c45b
--- /dev/null
+++ b/repo-tests/codeql/cpp/ql/src/Security/CWE/CWE-253/HResultBooleanConversion.ql
@@ -0,0 +1,76 @@
+/**
+ * @name Cast between HRESULT and a Boolean type
+ * @description Casting an HRESULT to/from a Boolean type and then using it in a test expression will yield an incorrect result because success (S_OK) in HRESULT is indicated by a value of 0.
+ * @kind problem
+ * @id cpp/hresult-boolean-conversion
+ * @problem.severity error
+ * @security-severity 7.5
+ * @precision high
+ * @tags security
+ * external/cwe/cwe-253
+ * external/microsoft/C6214
+ * external/microsoft/C6215
+ * external/microsoft/C6216
+ * external/microsoft/C6217
+ * external/microsoft/C6230
+ */
+
+import cpp
+
+predicate isHresultBooleanConverted(Expr e1, Cast e2) {
+ exists(Type t1, Type t2 |
+ t1 = e1.getType() and
+ t2 = e2.getType() and
+ (
+ (t1.hasName("bool") or t1.hasName("BOOL") or t1.hasName("_Bool")) and
+ t2.hasName("HRESULT")
+ or
+ (t2.hasName("bool") or t2.hasName("BOOL") or t2.hasName("_Bool")) and
+ t1.hasName("HRESULT")
+ )
+ )
+}
+
+predicate isHresultBooleanConverted(Expr e1) {
+ exists(Cast e2 |
+ e2 = e1.getConversion() and
+ isHresultBooleanConverted(e1, e2)
+ )
+}
+
+from Expr e1, string msg
+where
+ exists(Cast e2 | e2 = e1.getConversion() |
+ isHresultBooleanConverted(e1, e2) and
+ if e2.isImplicit()
+ then
+ msg = "Implicit conversion from " + e1.getType().toString() + " to " + e2.getType().toString()
+ else
+ msg = "Explicit conversion from " + e1.getType().toString() + " to " + e2.getType().toString()
+ )
+ or
+ exists(ControlStructure ctls |
+ ctls.getControllingExpr() = e1 and
+ e1.getType().(TypedefType).hasName("HRESULT") and
+ not isHresultBooleanConverted(e1) and
+ not ctls instanceof SwitchStmt and // not controlled by a boolean condition
+ msg = "Direct usage of a type " + e1.getType().toString() + " as a conditional expression"
+ )
+ or
+ (
+ exists(BinaryLogicalOperation blop | blop.getAnOperand() = e1 |
+ e1.getType().(TypedefType).hasName("HRESULT") and
+ msg =
+ "Usage of a type " + e1.getType().toString() +
+ " as an argument of a binary logical operation"
+ )
+ or
+ exists(UnaryLogicalOperation ulop | ulop.getAnOperand() = e1 |
+ e1.getType().(TypedefType).hasName("HRESULT") and
+ msg =
+ "Usage of a type " + e1.getType().toString() +
+ " as an argument of a unary logical operation"
+ ) and
+ not isHresultBooleanConverted(e1)
+ )
+select e1, msg
diff --git a/repo-tests/codeql/cpp/ql/src/Security/CWE/CWE-290/AuthenticationBypass.ql b/repo-tests/codeql/cpp/ql/src/Security/CWE/CWE-290/AuthenticationBypass.ql
new file mode 100644
index 00000000000..814c6aff21b
--- /dev/null
+++ b/repo-tests/codeql/cpp/ql/src/Security/CWE/CWE-290/AuthenticationBypass.ql
@@ -0,0 +1,124 @@
+/**
+ * @name Authentication bypass by spoofing
+ * @description Authentication by checking that the peer's address
+ * matches a known IP or web address is unsafe as it is
+ * vulnerable to spoofing attacks.
+ * @kind path-problem
+ * @problem.severity warning
+ * @security-severity 8.1
+ * @precision medium
+ * @id cpp/user-controlled-bypass
+ * @tags security
+ * external/cwe/cwe-290
+ */
+
+import semmle.code.cpp.security.TaintTracking
+import TaintedWithPath
+
+predicate hardCodedAddressOrIP(StringLiteral txt) {
+ exists(string s | s = txt.getValueText() |
+ // Hard-coded ip addresses, such as 127.0.0.1
+ s.regexpMatch("\"[0-9]+[.][0-9]+[.][0-9]+[.][0-9]+\"") or
+ // Hard-coded addresses such as www.mycompany.com
+ s.matches("\"www.%\"") or
+ s.matches("\"http:%\"") or
+ s.matches("\"https:%\"") or
+ s.matches("\"%.com\"") or
+ s.matches("\"%.ru\"") or
+ s.matches("\"%.net\"") or
+ s.matches("\"%.org\"") or
+ s.matches("\"%.de\"") or
+ s.matches("\"%.jp\"") or
+ s.matches("\"%.uk\"") or
+ s.matches("\"%.br\"") or
+ s.matches("\"%.pl\"") or
+ s.matches("\"%.in\"") or
+ s.matches("\"%.it\"") or
+ s.matches("\"%.fr\"") or
+ s.matches("\"%.au\"") or
+ s.matches("\"%.info\"") or
+ s.matches("\"%.nl\"") or
+ s.matches("\"%.cn\"") or
+ s.matches("\"%.ir\"") or
+ s.matches("\"%.es\"") or
+ s.matches("\"%.cz\"") or
+ s.matches("\"%.biz\"") or
+ s.matches("\"%.ca\"") or
+ s.matches("\"%.eu\"") or
+ s.matches("\"%.ua\"") or
+ s.matches("\"%.kr\"") or
+ s.matches("\"%.za\"") or
+ s.matches("\"%.co\"") or
+ s.matches("\"%.gr\"") or
+ s.matches("\"%.ro\"") or
+ s.matches("\"%.se\"") or
+ s.matches("\"%.tw\"") or
+ s.matches("\"%.vn\"") or
+ s.matches("\"%.mx\"") or
+ s.matches("\"%.ch\"") or
+ s.matches("\"%.tr\"") or
+ s.matches("\"%.at\"") or
+ s.matches("\"%.be\"") or
+ s.matches("\"%.hu\"") or
+ s.matches("\"%.tv\"") or
+ s.matches("\"%.dk\"") or
+ s.matches("\"%.me\"") or
+ s.matches("\"%.ar\"") or
+ s.matches("\"%.us\"") or
+ s.matches("\"%.no\"") or
+ s.matches("\"%.sk\"") or
+ s.matches("\"%.fi\"") or
+ s.matches("\"%.id\"") or
+ s.matches("\"%.cl\"") or
+ s.matches("\"%.nz\"") or
+ s.matches("\"%.by\"") or
+ s.matches("\"%.xyz\"") or
+ s.matches("\"%.pt\"") or
+ s.matches("\"%.ie\"") or
+ s.matches("\"%.il\"") or
+ s.matches("\"%.kz\"") or
+ s.matches("\"%.my\"") or
+ s.matches("\"%.hk\"") or
+ s.matches("\"%.lt\"") or
+ s.matches("\"%.cc\"") or
+ s.matches("\"%.sg\"") or
+ s.matches("\"%.io\"") or
+ s.matches("\"%.edu\"") or
+ s.matches("\"%.gov\"")
+ )
+}
+
+predicate useOfHardCodedAddressOrIP(Expr use) {
+ hardCodedAddressOrIP(use)
+ or
+ exists(Expr def, Expr src, Variable v |
+ useOfHardCodedAddressOrIP(src) and
+ exprDefinition(v, def, src) and
+ definitionUsePair(v, def, use)
+ )
+}
+
+/**
+ * Find `IfStmt`s that have a hard-coded IP or web address in
+ * their condition. If the condition also depends on an
+ * untrusted input then it might be vulnerable to a spoofing
+ * attack.
+ */
+predicate hardCodedAddressInCondition(Expr subexpression, Expr condition) {
+ subexpression = condition.getAChild+() and
+ // One of the sub-expressions of the condition is a hard-coded
+ // IP or web-address.
+ exists(Expr use | use = condition.getAChild+() | useOfHardCodedAddressOrIP(use)) and
+ condition = any(IfStmt ifStmt).getCondition()
+}
+
+class Configuration extends TaintTrackingConfiguration {
+ override predicate isSink(Element sink) { hardCodedAddressInCondition(sink, _) }
+}
+
+from Expr subexpression, Expr source, Expr condition, PathNode sourceNode, PathNode sinkNode
+where
+ hardCodedAddressInCondition(subexpression, condition) and
+ taintedWithPath(source, subexpression, sourceNode, sinkNode)
+select condition, sourceNode, sinkNode,
+ "Untrusted input $@ might be vulnerable to a spoofing attack.", source, source.toString()
diff --git a/repo-tests/codeql/cpp/ql/src/Security/CWE/CWE-311/CleartextBufferWrite.ql b/repo-tests/codeql/cpp/ql/src/Security/CWE/CWE-311/CleartextBufferWrite.ql
new file mode 100644
index 00000000000..696c5764fcd
--- /dev/null
+++ b/repo-tests/codeql/cpp/ql/src/Security/CWE/CWE-311/CleartextBufferWrite.ql
@@ -0,0 +1,34 @@
+/**
+ * @name Cleartext storage of sensitive information in buffer
+ * @description Storing sensitive information in cleartext can expose it
+ * to an attacker.
+ * @kind path-problem
+ * @problem.severity warning
+ * @security-severity 7.5
+ * @precision medium
+ * @id cpp/cleartext-storage-buffer
+ * @tags security
+ * external/cwe/cwe-312
+ */
+
+import cpp
+import semmle.code.cpp.security.BufferWrite
+import semmle.code.cpp.security.TaintTracking
+import semmle.code.cpp.security.SensitiveExprs
+import TaintedWithPath
+
+class Configuration extends TaintTrackingConfiguration {
+ override predicate isSink(Element tainted) { exists(BufferWrite w | w.getASource() = tainted) }
+}
+
+from
+ BufferWrite w, Expr taintedArg, Expr taintSource, PathNode sourceNode, PathNode sinkNode,
+ string taintCause, SensitiveExpr dest
+where
+ taintedWithPath(taintSource, taintedArg, sourceNode, sinkNode) and
+ isUserInput(taintSource, taintCause) and
+ w.getASource() = taintedArg and
+ dest = w.getDest()
+select w, sourceNode, sinkNode,
+ "This write into buffer '" + dest.toString() + "' may contain unencrypted data from $@",
+ taintSource, "user input (" + taintCause + ")"
diff --git a/repo-tests/codeql/cpp/ql/src/Security/CWE/CWE-311/CleartextFileWrite.ql b/repo-tests/codeql/cpp/ql/src/Security/CWE/CWE-311/CleartextFileWrite.ql
new file mode 100644
index 00000000000..62bd05d95aa
--- /dev/null
+++ b/repo-tests/codeql/cpp/ql/src/Security/CWE/CWE-311/CleartextFileWrite.ql
@@ -0,0 +1,53 @@
+/**
+ * @name Cleartext storage of sensitive information in file
+ * @description Storing sensitive information in cleartext can expose it
+ * to an attacker.
+ * @kind problem
+ * @problem.severity warning
+ * @security-severity 7.5
+ * @precision high
+ * @id cpp/cleartext-storage-file
+ * @tags security
+ * external/cwe/cwe-313
+ */
+
+import cpp
+import semmle.code.cpp.security.SensitiveExprs
+import semmle.code.cpp.security.FileWrite
+import semmle.code.cpp.dataflow.DataFlow
+import semmle.code.cpp.valuenumbering.GlobalValueNumbering
+
+/**
+ * An operation on a filename.
+ */
+predicate filenameOperation(FunctionCall op, Expr path) {
+ exists(string name | name = op.getTarget().getName() |
+ name =
+ [
+ "remove", "unlink", "rmdir", "rename", "fopen", "open", "freopen", "_open", "_wopen",
+ "_wfopen", "_fsopen", "_wfsopen", "chmod", "chown", "stat", "lstat", "fstat", "access",
+ "_access", "_waccess", "_access_s", "_waccess_s"
+ ] and
+ path = op.getArgument(0)
+ or
+ name = ["fopen_s", "wfopen_s", "rename"] and
+ path = op.getArgument(1)
+ )
+}
+
+predicate isFileName(GVN gvn) {
+ exists(FunctionCall op, Expr path |
+ filenameOperation(op, path) and
+ gvn = globalValueNumber(path)
+ )
+}
+
+from FileWrite w, SensitiveExpr source, Expr mid, Expr dest
+where
+ DataFlow::localFlow(DataFlow::exprNode(source), DataFlow::exprNode(mid)) and
+ mid = w.getASource() and
+ dest = w.getDest() and
+ not isFileName(globalValueNumber(source)) and // file names are not passwords
+ not exists(string convChar | convChar = w.getSourceConvChar(mid) | not convChar = ["s", "S"]) // ignore things written with other conversion characters
+select w, "This write into file '" + dest.toString() + "' may contain unencrypted data from $@",
+ source, "this source."
diff --git a/repo-tests/codeql/cpp/ql/src/Security/CWE/CWE-311/CleartextTransmission.ql b/repo-tests/codeql/cpp/ql/src/Security/CWE/CWE-311/CleartextTransmission.ql
new file mode 100644
index 00000000000..d7e5343d6dc
--- /dev/null
+++ b/repo-tests/codeql/cpp/ql/src/Security/CWE/CWE-311/CleartextTransmission.ql
@@ -0,0 +1,124 @@
+/**
+ * @name Cleartext transmission of sensitive information
+ * @description Transmitting sensitive information across a network in
+ * cleartext can expose it to an attacker.
+ * @kind path-problem
+ * @problem.severity warning
+ * @security-severity 7.5
+ * @precision medium
+ * @id cpp/cleartext-transmission
+ * @tags security
+ * external/cwe/cwe-319
+ */
+
+import cpp
+import semmle.code.cpp.security.SensitiveExprs
+import semmle.code.cpp.dataflow.TaintTracking
+import semmle.code.cpp.models.interfaces.FlowSource
+import DataFlow::PathGraph
+
+/**
+ * A function call that sends or receives data over a network.
+ */
+abstract class NetworkSendRecv extends FunctionCall {
+ /**
+ * Gets the expression for the socket or similar object used for sending or
+ * receiving data (if any).
+ */
+ abstract Expr getSocketExpr();
+
+ /**
+ * Gets the expression for the buffer to be sent from / received into.
+ */
+ abstract Expr getDataExpr();
+}
+
+/**
+ * A function call that sends data over a network.
+ *
+ * note: functions such as `write` may be writing to a network source or a file. We could attempt to determine which, and sort results into `cpp/cleartext-transmission` and perhaps `cpp/cleartext-storage-file`. In practice it usually isn't very important which query reports a result as long as its reported exactly once.
+ */
+class NetworkSend extends NetworkSendRecv {
+ RemoteFlowSinkFunction target;
+
+ NetworkSend() { target = this.getTarget() }
+
+ override Expr getSocketExpr() {
+ exists(FunctionInput input, int arg |
+ target.hasSocketInput(input) and
+ input.isParameter(arg) and
+ result = this.getArgument(arg)
+ )
+ }
+
+ override Expr getDataExpr() {
+ exists(FunctionInput input, int arg |
+ target.hasRemoteFlowSink(input, _) and
+ input.isParameterDeref(arg) and
+ result = this.getArgument(arg)
+ )
+ }
+}
+
+/**
+ * A function call that receives data over a network.
+ */
+class NetworkRecv extends NetworkSendRecv {
+ RemoteFlowSourceFunction target;
+
+ NetworkRecv() { target = this.getTarget() }
+
+ override Expr getSocketExpr() {
+ exists(FunctionInput input, int arg |
+ target.hasSocketInput(input) and
+ input.isParameter(arg) and
+ result = this.getArgument(arg)
+ )
+ }
+
+ override Expr getDataExpr() {
+ exists(FunctionOutput output, int arg |
+ target.hasRemoteFlowSource(output, _) and
+ output.isParameterDeref(arg) and
+ result = this.getArgument(arg)
+ )
+ }
+}
+
+/**
+ * Taint flow from a sensitive expression to a network operation with data
+ * tainted by that expression.
+ */
+class SensitiveSendRecvConfiguration extends TaintTracking::Configuration {
+ SensitiveSendRecvConfiguration() { this = "SensitiveSendRecvConfiguration" }
+
+ override predicate isSource(DataFlow::Node source) { source.asExpr() instanceof SensitiveExpr }
+
+ override predicate isSink(DataFlow::Node sink) {
+ exists(NetworkSendRecv transmission |
+ sink.asExpr() = transmission.getDataExpr() and
+ // a zero socket descriptor is standard input, which is not interesting for this query.
+ not exists(Zero zero |
+ DataFlow::localFlow(DataFlow::exprNode(zero),
+ DataFlow::exprNode(transmission.getSocketExpr()))
+ )
+ )
+ }
+}
+
+from
+ SensitiveSendRecvConfiguration config, DataFlow::PathNode source, DataFlow::PathNode sink,
+ NetworkSendRecv transmission, string msg
+where
+ config.hasFlowPath(source, sink) and
+ sink.getNode().asExpr() = transmission.getDataExpr() and
+ if transmission instanceof NetworkSend
+ then
+ msg =
+ "This operation transmits '" + sink.toString() +
+ "', which may contain unencrypted sensitive data from $@"
+ else
+ msg =
+ "This operation receives into '" + sink.toString() +
+ "', which may put unencrypted sensitive data into $@"
+select transmission, source, sink, msg, source, source.getNode().asExpr().toString()
diff --git a/repo-tests/codeql/cpp/ql/src/Security/CWE/CWE-313/CleartextSqliteDatabase.ql b/repo-tests/codeql/cpp/ql/src/Security/CWE/CWE-313/CleartextSqliteDatabase.ql
new file mode 100644
index 00000000000..bb9135a92ff
--- /dev/null
+++ b/repo-tests/codeql/cpp/ql/src/Security/CWE/CWE-313/CleartextSqliteDatabase.ql
@@ -0,0 +1,58 @@
+/**
+ * @name Cleartext storage of sensitive information in an SQLite database
+ * @description Storing sensitive information in a non-encrypted
+ * database can expose it to an attacker.
+ * @kind path-problem
+ * @problem.severity warning
+ * @security-severity 7.5
+ * @precision medium
+ * @id cpp/cleartext-storage-database
+ * @tags security
+ * external/cwe/cwe-313
+ */
+
+import cpp
+import semmle.code.cpp.security.SensitiveExprs
+import semmle.code.cpp.security.TaintTracking
+import TaintedWithPath
+
+class UserInputIsSensitiveExpr extends SecurityOptions {
+ override predicate isUserInput(Expr expr, string cause) {
+ expr instanceof SensitiveExpr and cause = "sensitive information"
+ }
+}
+
+class SqliteFunctionCall extends FunctionCall {
+ SqliteFunctionCall() { this.getTarget().getName().matches("sqlite%") }
+
+ Expr getASource() { result = this.getAnArgument() }
+}
+
+predicate sqlite_encryption_used() {
+ any(StringLiteral l).getValue().toLowerCase().regexpMatch("pragma key.*") or
+ any(StringLiteral l).getValue().toLowerCase().matches("%attach%database%key%") or
+ any(FunctionCall fc).getTarget().getName().matches("sqlite%\\_key\\_%")
+}
+
+class Configuration extends TaintTrackingConfiguration {
+ override predicate isSource(Expr source) {
+ super.isSource(source) and source instanceof SensitiveExpr
+ }
+
+ override predicate isSink(Element taintedArg) {
+ exists(SqliteFunctionCall sqliteCall |
+ taintedArg = sqliteCall.getASource() and
+ not sqlite_encryption_used()
+ )
+ }
+}
+
+from
+ SensitiveExpr taintSource, Expr taintedArg, SqliteFunctionCall sqliteCall, PathNode sourceNode,
+ PathNode sinkNode
+where
+ taintedWithPath(taintSource, taintedArg, sourceNode, sinkNode) and
+ taintedArg = sqliteCall.getASource()
+select sqliteCall, sourceNode, sinkNode,
+ "This SQLite call may store $@ in a non-encrypted SQLite database", taintSource,
+ "sensitive information"
diff --git a/repo-tests/codeql/cpp/ql/src/Security/CWE/CWE-327/BrokenCryptoAlgorithm.ql b/repo-tests/codeql/cpp/ql/src/Security/CWE/CWE-327/BrokenCryptoAlgorithm.ql
new file mode 100644
index 00000000000..e6c7b186ce2
--- /dev/null
+++ b/repo-tests/codeql/cpp/ql/src/Security/CWE/CWE-327/BrokenCryptoAlgorithm.ql
@@ -0,0 +1,168 @@
+/**
+ * @name Use of a broken or risky cryptographic algorithm
+ * @description Using broken or weak cryptographic algorithms can allow
+ * an attacker to compromise security.
+ * @kind problem
+ * @problem.severity error
+ * @security-severity 7.5
+ * @precision high
+ * @id cpp/weak-cryptographic-algorithm
+ * @tags security
+ * external/cwe/cwe-327
+ */
+
+import cpp
+import semmle.code.cpp.security.Encryption
+
+/**
+ * A function which may relate to an insecure encryption algorithm.
+ */
+Function getAnInsecureEncryptionFunction() {
+ (
+ isInsecureEncryption(result.getName()) or
+ isInsecureEncryption(result.getAParameter().getName()) or
+ isInsecureEncryption(result.getDeclaringType().getName())
+ ) and
+ exists(result.getACallToThisFunction())
+}
+
+/**
+ * A function with additional evidence it is related to encryption.
+ */
+Function getAnAdditionalEvidenceFunction() {
+ (
+ isEncryptionAdditionalEvidence(result.getName()) or
+ isEncryptionAdditionalEvidence(result.getAParameter().getName())
+ ) and
+ exists(result.getACallToThisFunction())
+}
+
+/**
+ * A macro which may relate to an insecure encryption algorithm.
+ */
+Macro getAnInsecureEncryptionMacro() {
+ isInsecureEncryption(result.getName()) and
+ exists(result.getAnInvocation())
+}
+
+/**
+ * A macro with additional evidence it is related to encryption.
+ */
+Macro getAnAdditionalEvidenceMacro() {
+ isEncryptionAdditionalEvidence(result.getName()) and
+ exists(result.getAnInvocation())
+}
+
+/**
+ * An enum constant which may relate to an insecure encryption algorithm.
+ */
+EnumConstant getAnInsecureEncryptionEnumConst() { isInsecureEncryption(result.getName()) }
+
+/**
+ * An enum constant with additional evidence it is related to encryption.
+ */
+EnumConstant getAdditionalEvidenceEnumConst() { isEncryptionAdditionalEvidence(result.getName()) }
+
+/**
+ * A function call we have a high confidence is related to use of an insecure encryption algorithm, along
+ * with an associated `Element` which might be the best point to blame, and a description of that element.
+ */
+predicate getInsecureEncryptionEvidence(FunctionCall fc, Element blame, string description) {
+ // find use of an insecure algorithm name
+ (
+ exists(FunctionCall fc2 |
+ fc.getAChild*() = fc2 and
+ fc2.getTarget() = getAnInsecureEncryptionFunction() and
+ blame = fc2 and
+ description = "call to " + fc.getTarget().getName()
+ )
+ or
+ exists(MacroInvocation mi |
+ (
+ mi.getAnExpandedElement() = fc or
+ mi.getAnExpandedElement() = fc.getAnArgument()
+ ) and
+ mi.getMacro() = getAnInsecureEncryptionMacro() and
+ blame = mi and
+ description = "invocation of macro " + mi.getMacro().getName()
+ )
+ or
+ exists(EnumConstantAccess ec |
+ ec = fc.getAnArgument() and
+ ec.getTarget() = getAnInsecureEncryptionEnumConst() and
+ blame = ec and
+ description = "access of enum constant " + ec.getTarget().getName()
+ )
+ ) and
+ // find additional evidence that this function is related to encryption.
+ (
+ exists(FunctionCall fc2 |
+ fc.getAChild*() = fc2 and
+ fc2.getTarget() = getAnAdditionalEvidenceFunction()
+ )
+ or
+ exists(MacroInvocation mi |
+ (
+ mi.getAnExpandedElement() = fc or
+ mi.getAnExpandedElement() = fc.getAnArgument()
+ ) and
+ mi.getMacro() = getAnAdditionalEvidenceMacro()
+ )
+ or
+ exists(EnumConstantAccess ec |
+ ec = fc.getAnArgument() and
+ ec.getTarget() = getAdditionalEvidenceEnumConst()
+ )
+ ) and
+ // exclude calls from templates as this is rarely the right place to flag an
+ // issue
+ not fc.isFromTemplateInstantiation(_) and
+ (
+ // the function should have an input that looks like a non-constant buffer
+ exists(Expr e |
+ fc.getAnArgument() = e and
+ (
+ e.getUnspecifiedType() instanceof PointerType or
+ e.getUnspecifiedType() instanceof ReferenceType or
+ e.getUnspecifiedType() instanceof ArrayType
+ ) and
+ not e.getType().isDeeplyConstBelow() and
+ not e.isConstant()
+ )
+ or
+ // or be a non-const member function of an object
+ fc.getTarget() instanceof MemberFunction and
+ not fc.getTarget() instanceof ConstMemberFunction and
+ not fc.getTarget().isStatic()
+ )
+}
+
+/**
+ * An element that is the `blame` of an `InsecureFunctionCall`.
+ */
+class BlamedElement extends Element {
+ string description;
+
+ BlamedElement() { getInsecureEncryptionEvidence(_, this, description) }
+
+ /**
+ * Holds if this is the `num`-th `BlamedElement` in `f`.
+ */
+ predicate hasFileRank(File f, int num) {
+ exists(int loc |
+ getLocation().charLoc(f, loc, _) and
+ loc =
+ rank[num](BlamedElement other, int loc2 | other.getLocation().charLoc(f, loc2, _) | loc2)
+ )
+ }
+
+ string getDescription() { result = description }
+}
+
+from File f, BlamedElement firstResult, BlamedElement thisResult
+where
+ firstResult.hasFileRank(f, 1) and
+ thisResult.hasFileRank(f, _)
+select firstResult,
+ "This file makes use of a broken or weak cryptographic algorithm (specified by $@).", thisResult,
+ thisResult.getDescription()
diff --git a/repo-tests/codeql/cpp/ql/src/Security/CWE/CWE-327/OpenSslHeartbleed.ql b/repo-tests/codeql/cpp/ql/src/Security/CWE/CWE-327/OpenSslHeartbleed.ql
new file mode 100644
index 00000000000..38067ae200c
--- /dev/null
+++ b/repo-tests/codeql/cpp/ql/src/Security/CWE/CWE-327/OpenSslHeartbleed.ql
@@ -0,0 +1,64 @@
+/**
+ * @name Use of a version of OpenSSL with Heartbleed
+ * @description Using an old version of OpenSSL can allow remote
+ * attackers to retrieve portions of memory.
+ * @kind problem
+ * @problem.severity error
+ * @security-severity 7.5
+ * @precision very-high
+ * @id cpp/openssl-heartbleed
+ * @tags security
+ * external/cwe/cwe-327
+ * external/cwe/cwe-788
+ */
+
+import cpp
+
+/**
+ * Holds if `v` and `w` are ever compared to each other.
+ */
+predicate comparedTo(Variable v, Variable w) {
+ v.getAnAssignedValue() = w.getAnAccess()
+ or
+ exists(ComparisonOperation comp |
+ comp = v.getAnAccess().getParent+() and
+ comp = w.getAnAccess().getParent+()
+ )
+}
+
+class DataVariable extends Variable {
+ DataVariable() {
+ exists(Struct ssl3_record_st |
+ ssl3_record_st.hasName("ssl3_record_st") and
+ this = ssl3_record_st.getAField() and
+ this.hasName("data")
+ )
+ }
+}
+
+/**
+ * Holds if expression `e` might evaluate to a pointer
+ * into the memory region pointed to by `v`.
+ */
+predicate pointsInto(Expr e, DataVariable v) {
+ e = v.getAnAccess() or
+ e.(AddressOfExpr).getOperand().(ArrayExpr).getArrayBase() = v.getAnAccess() or
+ varPointsInto(e.(VariableAccess).getTarget(), v)
+}
+
+pragma[nomagic]
+predicate varPointsInto(Variable tainted, DataVariable src) {
+ pointsInto(tainted.getAnAssignedValue(), src)
+}
+
+from FunctionCall fc, Struct ssl3_record_st, Field data, Field length
+where
+ fc.getTarget().getName().matches("%memcpy%") and
+ ssl3_record_st.hasName("ssl3_record_st") and
+ data = ssl3_record_st.getAField() and
+ data.hasName("data") and
+ length = ssl3_record_st.getAField() and
+ length.hasName("length") and
+ pointsInto(fc.getArgument(1), data) and
+ not comparedTo(fc.getArgument(2).(VariableAccess).getTarget(), length)
+select fc, "This call to memcpy is insecure (Heartbleed vulnerability)."
diff --git a/repo-tests/codeql/cpp/ql/src/Security/CWE/CWE-367/TOCTOUFilesystemRace.ql b/repo-tests/codeql/cpp/ql/src/Security/CWE/CWE-367/TOCTOUFilesystemRace.ql
new file mode 100644
index 00000000000..49f95f039c1
--- /dev/null
+++ b/repo-tests/codeql/cpp/ql/src/Security/CWE/CWE-367/TOCTOUFilesystemRace.ql
@@ -0,0 +1,133 @@
+/**
+ * @name Time-of-check time-of-use filesystem race condition
+ * @description Separately checking the state of a file before operating
+ * on it may allow an attacker to modify the file between
+ * the two operations.
+ * @kind problem
+ * @problem.severity warning
+ * @security-severity 7.7
+ * @precision high
+ * @id cpp/toctou-race-condition
+ * @tags security
+ * external/cwe/cwe-367
+ */
+
+import cpp
+import semmle.code.cpp.controlflow.Guards
+
+/**
+ * An operation on a filename that is likely to modify the corresponding file
+ * and may return an indication of success.
+ *
+ * Note: we're not interested in operations where the file is specified by a
+ * descriptor, rather than a filename, as they are better behaved. We are
+ * interested in functions that take a filename and return a file descriptor,
+ * however.
+ */
+FunctionCall filenameOperation(Expr path) {
+ exists(string name | name = result.getTarget().getName() |
+ name =
+ [
+ "remove", "unlink", "rmdir", "rename", "fopen", "open", "freopen", "_open", "_wopen",
+ "_wfopen", "_fsopen", "_wfsopen"
+ ] and
+ result.getArgument(0) = path
+ or
+ name = ["fopen_s", "wfopen_s", "rename"] and
+ result.getArgument(1) = path
+ )
+ or
+ result = sensitiveFilenameOperation(path)
+}
+
+/**
+ * An operation on a filename that is likely to modify the security properties
+ * of the corresponding file and may return an indication of success.
+ */
+FunctionCall sensitiveFilenameOperation(Expr path) {
+ exists(string name | name = result.getTarget().getName() |
+ name = ["chmod", "chown"] and
+ result.getArgument(0) = path
+ )
+}
+
+/**
+ * An operation on a filename that returns information in the return value but
+ * does not modify the corresponding file. For example, `access`.
+ */
+FunctionCall accessCheck(Expr path) {
+ exists(string name | name = result.getTarget().getName() |
+ name = ["access", "_access", "_waccess", "_access_s", "_waccess_s"]
+ ) and
+ path = result.getArgument(0)
+}
+
+/**
+ * An operation on a filename that returns information via a pointer argument
+ * and any return value, but does not modify the corresponding file. For
+ * example, `stat`.
+ */
+FunctionCall stat(Expr path, Expr buf) {
+ exists(string name | name = result.getTarget().getName() |
+ name = ["stat", "lstat", "fstat"] or
+ name.matches("\\_stat%") or
+ name.matches("\\_wstat%")
+ ) and
+ path = result.getArgument(0) and
+ buf = result.getArgument(1)
+}
+
+/**
+ * Holds if `use` refers to `source`, either by being the same or by
+ * one step of variable indirection.
+ */
+predicate referenceTo(Expr source, Expr use) {
+ source = use
+ or
+ exists(SsaDefinition def, StackVariable v |
+ def.getAnUltimateDefiningValue(v) = source and def.getAUse(v) = use
+ )
+}
+
+from Expr check, Expr checkPath, FunctionCall use, Expr usePath
+where
+ // `check` looks like a check on a filename
+ (
+ (
+ // either:
+ // an access check
+ check = accessCheck(checkPath)
+ or
+ // a stat
+ check = stat(checkPath, _)
+ or
+ // access to a member variable on the stat buf
+ // (morally, this should be a use-use pair, but it seems unlikely
+ // that this variable will get reused in practice)
+ exists(Expr call, Expr e, Variable v |
+ call = stat(checkPath, e) and
+ e.getAChild*().(VariableAccess).getTarget() = v and
+ check.(VariableAccess).getTarget() = v and
+ not e.getAChild*() = check // the call that writes to the pointer is not where the pointer is checked.
+ )
+ ) and
+ // `op` looks like an operation on a filename
+ use = filenameOperation(usePath)
+ or
+ // another filename operation (null pointers can indicate errors)
+ check = filenameOperation(checkPath) and
+ // `op` looks like a sensitive operation on a filename
+ use = sensitiveFilenameOperation(usePath)
+ ) and
+ // `checkPath` and `usePath` refer to the same SSA variable
+ exists(SsaDefinition def, StackVariable v |
+ def.getAUse(v) = checkPath and def.getAUse(v) = usePath
+ ) and
+ // the return value of `check` is used (possibly with one step of
+ // variable indirection) in a guard which controls `use`
+ exists(GuardCondition guard | referenceTo(check, guard.getAChild*()) |
+ guard.controls(use.(ControlFlowNode).getBasicBlock(), _)
+ )
+select use,
+ "The $@ being operated upon was previously $@, but the underlying file may have been changed since then.",
+ usePath, "filename", check, "checked"
diff --git a/repo-tests/codeql/cpp/ql/src/Security/CWE/CWE-428/UnsafeCreateProcessCall.ql b/repo-tests/codeql/cpp/ql/src/Security/CWE/CWE-428/UnsafeCreateProcessCall.ql
new file mode 100644
index 00000000000..7c540e9d313
--- /dev/null
+++ b/repo-tests/codeql/cpp/ql/src/Security/CWE/CWE-428/UnsafeCreateProcessCall.ql
@@ -0,0 +1,116 @@
+/**
+ * @name NULL application name with an unquoted path in call to CreateProcess
+ * @description Calling a function of the CreateProcess* family of functions, where the path contains spaces, introduces a security vulnerability.
+ * @id cpp/unsafe-create-process-call
+ * @kind problem
+ * @problem.severity error
+ * @security-severity 7.8
+ * @precision medium
+ * @msrc.severity important
+ * @tags security
+ * external/cwe/cwe-428
+ * external/microsoft/C6277
+ */
+
+import cpp
+import semmle.code.cpp.dataflow.DataFlow
+import semmle.code.cpp.dataflow.DataFlow2
+
+predicate isCreateProcessFunction(FunctionCall call, int applicationNameIndex, int commandLineIndex) {
+ call.getTarget().hasGlobalName("CreateProcessA") and
+ applicationNameIndex = 0 and
+ commandLineIndex = 1
+ or
+ call.getTarget().hasGlobalName("CreateProcessW") and
+ applicationNameIndex = 0 and
+ commandLineIndex = 1
+ or
+ call.getTarget().hasGlobalName("CreateProcessWithTokenW") and
+ applicationNameIndex = 2 and
+ commandLineIndex = 3
+ or
+ call.getTarget().hasGlobalName("CreateProcessWithLogonW") and
+ applicationNameIndex = 4 and
+ commandLineIndex = 5
+ or
+ call.getTarget().hasGlobalName("CreateProcessAsUserA") and
+ applicationNameIndex = 1 and
+ commandLineIndex = 2
+ or
+ call.getTarget().hasGlobalName("CreateProcessAsUserW") and
+ applicationNameIndex = 1 and
+ commandLineIndex = 2
+}
+
+/**
+ * A function call to CreateProcess (either wide-char or single byte string versions)
+ */
+class CreateProcessFunctionCall extends FunctionCall {
+ CreateProcessFunctionCall() { isCreateProcessFunction(this, _, _) }
+
+ int getApplicationNameArgumentId() { isCreateProcessFunction(this, result, _) }
+
+ int getCommandLineArgumentId() { isCreateProcessFunction(this, _, result) }
+}
+
+/**
+ * Dataflow that detects a call to CreateProcess with a NULL value for lpApplicationName argument
+ */
+class NullAppNameCreateProcessFunctionConfiguration extends DataFlow::Configuration {
+ NullAppNameCreateProcessFunctionConfiguration() {
+ this = "NullAppNameCreateProcessFunctionConfiguration"
+ }
+
+ override predicate isSource(DataFlow::Node source) { source.asExpr() instanceof NullValue }
+
+ override predicate isSink(DataFlow::Node sink) {
+ exists(CreateProcessFunctionCall call, Expr val | val = sink.asExpr() |
+ val = call.getArgument(call.getApplicationNameArgumentId())
+ )
+ }
+}
+
+/**
+ * Dataflow that detects a call to CreateProcess with an unquoted commandLine argument
+ */
+class QuotedCommandInCreateProcessFunctionConfiguration extends DataFlow2::Configuration {
+ QuotedCommandInCreateProcessFunctionConfiguration() {
+ this = "QuotedCommandInCreateProcessFunctionConfiguration"
+ }
+
+ override predicate isSource(DataFlow2::Node source) {
+ exists(string s |
+ s = source.asExpr().getValue().toString() and
+ not isQuotedOrNoSpaceApplicationNameOnCmd(s)
+ )
+ }
+
+ override predicate isSink(DataFlow2::Node sink) {
+ exists(CreateProcessFunctionCall call, Expr val | val = sink.asExpr() |
+ val = call.getArgument(call.getCommandLineArgumentId())
+ )
+ }
+}
+
+bindingset[s]
+predicate isQuotedOrNoSpaceApplicationNameOnCmd(string s) {
+ s.regexpMatch("\"([^\"])*\"[\\s\\S]*") // The first element (path) is quoted
+ or
+ s.regexpMatch("[^\\s]+") // There are no spaces in the string
+}
+
+from CreateProcessFunctionCall call, string msg1, string msg2
+where
+ exists(Expr source, Expr appName, NullAppNameCreateProcessFunctionConfiguration nullAppConfig |
+ appName = call.getArgument(call.getApplicationNameArgumentId()) and
+ nullAppConfig.hasFlow(DataFlow2::exprNode(source), DataFlow2::exprNode(appName)) and
+ msg1 = call.toString() + " with lpApplicationName == NULL (" + appName + ")"
+ ) and
+ exists(Expr source, Expr cmd, QuotedCommandInCreateProcessFunctionConfiguration quotedConfig |
+ cmd = call.getArgument(call.getCommandLineArgumentId()) and
+ quotedConfig.hasFlow(DataFlow2::exprNode(source), DataFlow2::exprNode(cmd)) and
+ msg2 =
+ " and with an unquoted lpCommandLine (" + cmd +
+ ") introduces a security vulnerability if the path contains spaces."
+ )
+select call, msg1 + " " + msg2
diff --git a/repo-tests/codeql/cpp/ql/src/Security/CWE/CWE-457/ConditionallyUninitializedVariable.ql b/repo-tests/codeql/cpp/ql/src/Security/CWE/CWE-457/ConditionallyUninitializedVariable.ql
new file mode 100644
index 00000000000..9a08bbd64a6
--- /dev/null
+++ b/repo-tests/codeql/cpp/ql/src/Security/CWE/CWE-457/ConditionallyUninitializedVariable.ql
@@ -0,0 +1,33 @@
+/**
+ * @name Conditionally uninitialized variable
+ * @description An initialization function is used to initialize a local variable, but the
+ * returned status code is not checked. The variable may be left in an uninitialized
+ * state, and reading the variable may result in undefined behavior.
+ * @kind problem
+ * @problem.severity warning
+ * @security-severity 7.8
+ * @id cpp/conditionally-uninitialized-variable
+ * @tags security
+ * external/cwe/cwe-457
+ */
+
+import cpp
+import semmle.code.cpp.controlflow.SSA
+private import UninitializedVariables
+
+from
+ ConditionallyInitializedVariable v, ConditionalInitializationFunction f,
+ ConditionalInitializationCall call, string defined, Evidence e
+where
+ exists(v.getARiskyAccess(f, call, e)) and
+ (
+ if e = DefinitionInSnapshot()
+ then defined = ""
+ else
+ if e = SuggestiveSALAnnotation()
+ then defined = "externally defined (SAL) "
+ else defined = "externally defined (CSV) "
+ )
+select call,
+ "The status of this call to " + defined +
+ "$@ is not checked, potentially leaving $@ uninitialized.", f, f.getName(), v, v.getName()
diff --git a/repo-tests/codeql/cpp/ql/src/Security/CWE/CWE-457/InitializationFunctions.qll b/repo-tests/codeql/cpp/ql/src/Security/CWE/CWE-457/InitializationFunctions.qll
new file mode 100644
index 00000000000..4739c7ad5cf
--- /dev/null
+++ b/repo-tests/codeql/cpp/ql/src/Security/CWE/CWE-457/InitializationFunctions.qll
@@ -0,0 +1,690 @@
+/**
+ * Provides classes and predicates for identifying functions that initialize their arguments.
+ */
+
+import cpp
+import external.ExternalArtifact
+private import semmle.code.cpp.dispatch.VirtualDispatchPrototype
+import semmle.code.cpp.NestedFields
+import Microsoft.SAL
+import semmle.code.cpp.controlflow.Guards
+
+/** A context under which a function may be called. */
+private newtype TContext =
+ /** No specific call context. */
+ NoContext() or
+ /**
+ * The call context is that the given other parameter is null.
+ *
+ * This context is created for all parameters that are null checked in the body of the function.
+ */
+ ParamNull(Parameter p) { p = any(ParameterNullCheck pnc).getParameter() } or
+ /**
+ * The call context is that the given other parameter is not null.
+ *
+ * This context is created for all parameters that are null checked in the body of the function.
+ */
+ ParamNotNull(Parameter p) { p = any(ParameterNullCheck pnc).getParameter() }
+
+/**
+ * A context under which a function may be called.
+ *
+ * Some functions may conditionally initialize a parameter depending on the value of another
+ * parameter. Consider:
+ * ```
+ * int MyInitFunction(int* paramToBeInitialized, int* paramToCheck) {
+ * if (!paramToCheck) {
+ * // fail!
+ * return -1;
+ * }
+ * paramToBeInitialized = 0;
+ * }
+ * ```
+ * In this case, whether `paramToBeInitialized` is initialized when this function call completes
+ * depends on whether `paramToCheck` is or is not null. A call-context insensitive analysis will
+ * determine that any call to this function may leave the parameter uninitialized, even if the
+ * argument to paramToCheck is guaranteed to be non-null (`&foo`, for example).
+ *
+ * This class models call contexts that can be considered when calculating whether a given parameter
+ * initializes or not. The supported contexts are:
+ * - `ParamNull(otherParam)` - the given `otherParam` is considered to be null. Applies when
+ * exactly one parameter other than this one is null checked.
+ * - `ParamNotNull(otherParam)` - the given `otherParam` is considered to be not null. Applies when
+ * exactly one parameter other than this one is null checked.
+ * - `NoContext()` - applies in all other circumstances.
+ */
+class Context extends TContext {
+ string toString() {
+ this = NoContext() and result = "NoContext"
+ or
+ this = ParamNull(any(Parameter p | result = "ParamNull(" + p.getName() + ")"))
+ or
+ this = ParamNotNull(any(Parameter p | result = "ParamNotNull(" + p.getName() + ")"))
+ }
+}
+
+/**
+ * A check against a parameter.
+ */
+abstract class ParameterCheck extends Expr {
+ /**
+ * Gets a successor of this check that should be ignored for the given context.
+ */
+ abstract ControlFlowNode getIgnoredSuccessorForContext(Context c);
+}
+
+/** A null-check expression on a parameter. */
+class ParameterNullCheck extends ParameterCheck {
+ Parameter p;
+ ControlFlowNode nullSuccessor;
+ ControlFlowNode notNullSuccessor;
+
+ ParameterNullCheck() {
+ this.isCondition() and
+ p.getFunction() instanceof InitializationFunction and
+ p.getType().getUnspecifiedType() instanceof PointerType and
+ exists(VariableAccess va | va = p.getAnAccess() |
+ nullSuccessor = getATrueSuccessor() and
+ notNullSuccessor = getAFalseSuccessor() and
+ (
+ va = this.(NotExpr).getOperand() or
+ va = any(EQExpr eq | eq = this and eq.getAnOperand().getValue() = "0").getAnOperand() or
+ va = getCheckedFalseCondition(this) or
+ va =
+ any(NEExpr eq | eq = getCheckedFalseCondition(this) and eq.getAnOperand().getValue() = "0")
+ .getAnOperand()
+ )
+ or
+ nullSuccessor = getAFalseSuccessor() and
+ notNullSuccessor = getATrueSuccessor() and
+ (
+ va = this or
+ va = any(NEExpr eq | eq = this and eq.getAnOperand().getValue() = "0").getAnOperand() or
+ va =
+ any(EQExpr eq | eq = getCheckedFalseCondition(this) and eq.getAnOperand().getValue() = "0")
+ .getAnOperand()
+ )
+ )
+ }
+
+ /** The parameter being null-checked. */
+ Parameter getParameter() { result = p }
+
+ override ControlFlowNode getIgnoredSuccessorForContext(Context c) {
+ c = ParamNull(p) and result = notNullSuccessor
+ or
+ c = ParamNotNull(p) and result = nullSuccessor
+ }
+
+ /** The successor at which the parameter is confirmed to be null. */
+ ControlFlowNode getNullSuccessor() { result = nullSuccessor }
+
+ /** The successor at which the parameter is confirmed to be not-null. */
+ ControlFlowNode getNotNullSuccessor() { result = notNullSuccessor }
+}
+
+/**
+ * An entry in a CSV file in cond-init that contains externally defined functions that are
+ * conditional initializers. These files are typically produced by running the
+ * ConditionallyInitializedFunction companion query.
+ */
+class ValidatedExternalCondInitFunction extends ExternalData {
+ ValidatedExternalCondInitFunction() { this.getDataPath().matches("%cond-init%.csv") }
+
+ predicate isExternallyVerified(Function f, int param) {
+ functionSignature(f, getField(1), getField(2)) and param = getFieldAsInt(3)
+ }
+}
+
+/**
+ * The type of evidence used to determine whether a function initializes a parameter.
+ */
+newtype Evidence =
+ /**
+ * The function is defined in the snapshot, and the CFG has been analyzed to determine that the
+ * parameter is not initialized on at least one path to the exit.
+ */
+ DefinitionInSnapshot() or
+ /**
+ * The function is externally defined, but the parameter has an `_out` SAL annotation which
+ * suggests that it is initialized in the function.
+ */
+ SuggestiveSALAnnotation() or
+ /**
+ * We have been given a CSV file which indicates this parameter is conditionally initialized.
+ */
+ ExternalEvidence()
+
+/**
+ * A call to an function which initializes one or more of its parameters.
+ */
+class InitializationFunctionCall extends FunctionCall {
+ Expr initializedArgument;
+
+ InitializationFunctionCall() { initializedArgument = getAnInitializedArgument(this) }
+
+ /** Gets a parameter that is initialized by this call. */
+ Parameter getAnInitParameter() { result.getAnAccess() = initializedArgument }
+}
+
+/**
+ * A variable access which is dereferenced then assigned to.
+ */
+private predicate isPointerDereferenceAssignmentTarget(VariableAccess target) {
+ target.getParent().(PointerDereferenceExpr) = any(Assignment e).getLValue()
+}
+
+/**
+ * A function which initializes one or more of its parameters.
+ */
+class InitializationFunction extends Function {
+ int i;
+ Evidence evidence;
+
+ InitializationFunction() {
+ evidence = DefinitionInSnapshot() and
+ (
+ // Assignment by pointer dereferencing the parameter
+ isPointerDereferenceAssignmentTarget(this.getParameter(i).getAnAccess()) or
+ // Field wise assignment to the parameter
+ any(Assignment e).getLValue() = getAFieldAccess(this.getParameter(i)) or
+ i =
+ this.(MemberFunction)
+ .getAnOverridingFunction+()
+ .(InitializationFunction)
+ .initializedParameter() or
+ getParameter(i) = any(InitializationFunctionCall c).getAnInitParameter()
+ )
+ or
+ // If we have no definition, we look at SAL annotations
+ not this.hasDefinition() and
+ this.getParameter(i).(SALParameter).isOut() and
+ evidence = SuggestiveSALAnnotation()
+ or
+ // We have some external information that this function conditionally initializes
+ not this.hasDefinition() and
+ any(ValidatedExternalCondInitFunction vc).isExternallyVerified(this, i) and
+ evidence = ExternalEvidence()
+ }
+
+ /** Gets a parameter index which is initialized by this function. */
+ int initializedParameter() { result = i }
+
+ /** Gets a `ControlFlowNode` which assigns a new value to the parameter with the given index. */
+ ControlFlowNode paramReassignment(int index) {
+ index = i and
+ (
+ result = this.getParameter(i).getAnAccess() and
+ (
+ result = any(Assignment a).getLValue().(PointerDereferenceExpr).getOperand()
+ or
+ // Field wise assignment to the parameter
+ result = any(Assignment a).getLValue().(FieldAccess).getQualifier()
+ or
+ // Assignment to a nested field of the parameter
+ result = any(Assignment a).getLValue().(NestedFieldAccess).getUltimateQualifier()
+ or
+ result = getAnInitializedArgument(any(Call c))
+ or
+ exists(IfStmt check | result = check.getCondition().getAChild*() |
+ paramReassignmentCondition(check)
+ )
+ )
+ or
+ result =
+ any(AssumeExpr e |
+ e.getEnclosingFunction() = this and e.getAChild().(Literal).getValue() = "0"
+ )
+ )
+ }
+
+ /**
+ * Helper predicate: holds if the `if` statement `check` contains a
+ * reassignment to the `i`th parameter within its `then` statement.
+ */
+ pragma[noinline]
+ private predicate paramReassignmentCondition(IfStmt check) {
+ this.paramReassignment(i).getEnclosingStmt().getParentStmt*() = check.getThen()
+ }
+
+ /** Holds if `n` can be reached without the parameter at `index` being reassigned. */
+ predicate paramNotReassignedAt(ControlFlowNode n, int index, Context c) {
+ c = getAContext(index) and
+ (
+ not exists(this.getEntryPoint()) and index = i and n = this
+ or
+ n = this.getEntryPoint() and index = i
+ or
+ exists(ControlFlowNode mid | paramNotReassignedAt(mid, index, c) |
+ n = mid.getASuccessor() and
+ not n = paramReassignment(index) and
+ /*
+ * Ignore successor edges where the parameter is null, because it is then confirmed to be
+ * initialized.
+ */
+
+ not exists(ParameterNullCheck nullCheck |
+ nullCheck = mid and
+ nullCheck = getANullCheck(index) and
+ n = nullCheck.getNullSuccessor()
+ ) and
+ /*
+ * Ignore successor edges which are excluded by the given context
+ */
+
+ not exists(ParameterCheck paramCheck | paramCheck = mid |
+ n = paramCheck.getIgnoredSuccessorForContext(c)
+ )
+ )
+ )
+ }
+
+ /** Gets a null-check on the parameter at `index`. */
+ private ParameterNullCheck getANullCheck(int index) {
+ getParameter(index) = result.getParameter()
+ }
+
+ /** Gets a parameter which is not at the given index. */
+ private Parameter getOtherParameter(int index) {
+ index = i and
+ result = getAParameter() and
+ not result.getIndex() = index
+ }
+
+ /**
+ * Gets a call `Context` that is applicable when considering whether parameter at the `index` can
+ * be conditionally initialized.
+ */
+ Context getAContext(int index) {
+ index = i and
+ /*
+ * If there is one and only one other parameter which is null checked in the body of the method,
+ * then we have two contexts to consider - that the other param is null, or that the other param
+ * is not null.
+ */
+
+ if
+ strictcount(Parameter p |
+ exists(Context c | c = ParamNull(p) or c = ParamNotNull(p)) and
+ p = getOtherParameter(index)
+ ) = 1
+ then
+ exists(Parameter p | p = getOtherParameter(index) |
+ result = ParamNull(p) or result = ParamNotNull(p)
+ )
+ else
+ // Otherwise, only consider NoContext.
+ result = NoContext()
+ }
+
+ /**
+ * Holds if this function should be whitelisted - that is, not considered as conditionally
+ * initializing its parameters.
+ */
+ predicate whitelisted() {
+ exists(string name | this.hasName(name) |
+ // Return value is not a success code but the output functions never fail.
+ name.matches("_Interlocked%")
+ or
+ name =
+ [
+ // Functions that never fail, according to MSDN.
+ "QueryPerformanceCounter", "QueryPerformanceFrequency",
+ // Functions that never fail post-Vista, according to MSDN.
+ "InitializeCriticalSectionAndSpinCount",
+ // `rand_s` writes 0 to a non-null argument if it fails, according to MSDN.
+ "rand_s",
+ // IntersectRect initializes the argument regardless of whether the input intersects
+ "IntersectRect", "SetRect", "UnionRect",
+ // These functions appears to have an incorrect CFG, which leads to false positives
+ "PhysicalToLogicalDPIPoint", "LogicalToPhysicalDPIPoint",
+ // Sets NtProductType to default on error
+ "RtlGetNtProductType",
+ // Our CFG is not sophisticated enough to detect that the argument is always initialized
+ "StringCchLengthA",
+ // All paths init the argument, and always returns SUCCESS.
+ "RtlUnicodeToMultiByteSize",
+ // All paths init the argument, and always returns SUCCESS.
+ "RtlMultiByteToUnicodeSize",
+ // All paths init the argument, and always returns SUCCESS.
+ "RtlUnicodeToMultiByteN",
+ // Always initializes argument
+ "RtlGetFirstRange",
+ // Destination range is zeroed out on failure, assuming first two parameters are valid
+ "memcpy_s",
+ // This zeroes the memory unconditionally
+ "SeCreateAccessState",
+ // Argument initialization is optional, but always succeeds
+ "KeGetCurrentProcessorNumberEx"
+ ]
+ )
+ }
+}
+
+/**
+ * A function which initializes one or more of its parameters, but not on all paths.
+ */
+class ConditionalInitializationFunction extends InitializationFunction {
+ Context c;
+
+ ConditionalInitializationFunction() {
+ c = this.getAContext(i) and
+ not this.whitelisted() and
+ exists(Type status | status = this.getType().getUnspecifiedType() |
+ status instanceof IntegralType or
+ status instanceof Enum
+ ) and
+ not this.getType().getName().toLowerCase() = "size_t" and
+ (
+ /*
+ * If there is no definition, consider this to be conditionally initializing (based on either
+ * SAL or external data).
+ */
+
+ not evidence = DefinitionInSnapshot()
+ or
+ /*
+ * If this function is defined in this snapshot, then it conditionally initializes if there
+ * is at least one path through the function which doesn't initialize the parameter.
+ *
+ * Explicitly ignore pure virtual functions.
+ */
+
+ this.hasDefinition() and
+ this.paramNotReassignedAt(this, i, c) and
+ not this instanceof PureVirtualFunction
+ )
+ }
+
+ /** Gets the evidence associated with the given parameter. */
+ Evidence getEvidence(int param) {
+ /*
+ * Note: due to the way the predicate dispatch interacts with fields, this needs to be
+ * implemented on this class, not `InitializationFunction`. If implemented on the latter it
+ * can return evidence that does not result in conditional initialization.
+ */
+
+ param = i and evidence = result
+ }
+
+ /** Gets the index of a parameter which is conditionally initialized. */
+ int conditionallyInitializedParameter(Context context) { result = i and context = c }
+}
+
+/**
+ * More elaborate tracking, flagging cases where the status is checked after
+ * the potentially uninitialized variable has been used, and ignoring cases
+ * where the status is not checked but there is no use of the potentially
+ * uninitialized variable, may be obtained via `getARiskyAccess`.
+ */
+class ConditionalInitializationCall extends FunctionCall {
+ ConditionalInitializationFunction target;
+
+ ConditionalInitializationCall() { target = getTarget(this) }
+
+ /** Gets the argument passed for the given parameter to this call. */
+ Expr getArgumentForParameter(Parameter p) {
+ p = getTarget().getAParameter() and
+ result = getArgument(p.getIndex())
+ }
+
+ /**
+ * Gets an argument conditionally initialized by this call.
+ */
+ Expr getAConditionallyInitializedArgument(ConditionalInitializationFunction condTarget, Evidence e) {
+ condTarget = target and
+ exists(Context context |
+ result = getAConditionallyInitializedArgument(this, condTarget, context, e)
+ |
+ context = NoContext()
+ or
+ exists(Parameter otherP, Expr otherArg |
+ context = ParamNotNull(otherP) or
+ context = ParamNull(otherP)
+ |
+ otherArg = getArgumentForParameter(otherP) and
+ (otherArg instanceof AddressOfExpr implies context = ParamNotNull(otherP)) and
+ (otherArg.getType() instanceof ArrayType implies context = ParamNotNull(otherP)) and
+ (otherArg.getValue() = "0" implies context = ParamNull(otherP))
+ )
+ )
+ }
+
+ VariableAccess getAConditionallyInitializedVariable() {
+ not result.getTarget().getAnAssignedValue().getASuccessor+() = result and
+ // Should not be assigned field-wise prior to the call.
+ not exists(Assignment a, FieldAccess fa |
+ fa.getQualifier() = result.getTarget().getAnAccess() and
+ a.getLValue() = fa and
+ fa.getASuccessor+() = result
+ ) and
+ result =
+ this.getArgument(getTarget(this)
+ .(ConditionalInitializationFunction)
+ .conditionallyInitializedParameter(_)).(AddressOfExpr).getOperand()
+ }
+
+ Variable getStatusVariable() {
+ exists(AssignExpr a | a.getLValue() = result.getAnAccess() | a.getRValue() = this)
+ or
+ result.getInitializer().getExpr() = this
+ }
+
+ Expr getSuccessCheck() {
+ exists(this.getAFalseSuccessor()) and result = this
+ or
+ result = this.getParent() and
+ (
+ result instanceof NotExpr or
+ result.(EQExpr).getAnOperand().getValue() = "0" or
+ result.(GEExpr).getLesserOperand().getValue() = "0"
+ )
+ }
+
+ Expr getFailureCheck() {
+ result = this.getParent() and
+ (
+ result instanceof NotExpr or
+ result.(NEExpr).getAnOperand().getValue() = "0" or
+ result.(LTExpr).getLesserOperand().getValue() = "0"
+ )
+ }
+
+ private predicate inCheckedContext() {
+ exists(Call parent | this = parent.getAnArgument() |
+ parent.getTarget() instanceof Operator or
+ parent.getTarget().hasName("VerifyOkCatastrophic")
+ )
+ }
+
+ ControlFlowNode uncheckedReaches(LocalVariable var) {
+ (
+ not exists(var.getInitializer()) and
+ var = this.getAConditionallyInitializedVariable().getTarget() and
+ if exists(this.getFailureCheck())
+ then result = this.getFailureCheck().getATrueSuccessor()
+ else
+ if exists(this.getSuccessCheck())
+ then result = this.getSuccessCheck().getAFalseSuccessor()
+ else (
+ result = this.getASuccessor() and not this.inCheckedContext()
+ )
+ )
+ or
+ exists(ControlFlowNode mid | mid = uncheckedReaches(var) |
+ not mid = getStatusVariable().getAnAccess() and
+ not mid = var.getAnAccess() and
+ not exists(VariableAccess write | result = write and write = var.getAnAccess() |
+ write = any(AssignExpr a).getLValue() or
+ write = any(AddressOfExpr a).getOperand()
+ ) and
+ result = mid.getASuccessor()
+ )
+ }
+
+ VariableAccess getARiskyRead(Function f) {
+ f = this.getTarget() and
+ exists(this.getFile().getRelativePath()) and
+ result = this.uncheckedReaches(result.getTarget()) and
+ not this.(GuardCondition).controls(result.getBasicBlock(), _)
+ }
+}
+
+/**
+ * Gets the position of an argument to the call which is initialized by the call.
+ */
+pragma[nomagic]
+int initializedArgument(Call call) {
+ exists(InitializationFunction target |
+ target = getTarget(call) and
+ result = target.initializedParameter()
+ )
+}
+
+/**
+ * Gets an argument which is initialized by the call.
+ */
+Expr getAnInitializedArgument(Call call) { result = call.getArgument(initializedArgument(call)) }
+
+/**
+ * Gets the position of an argument to the call to the target which is conditionally initialized by
+ * the call, under the given context and evidence.
+ */
+pragma[nomagic]
+private int conditionallyInitializedArgument(
+ Call call, ConditionalInitializationFunction target, Context c, Evidence e
+) {
+ target = getTarget(call) and
+ c = target.getAContext(result) and
+ e = target.getEvidence(result) and
+ result = target.conditionallyInitializedParameter(c)
+}
+
+/**
+ * Gets an argument which is conditionally initialized by the call to the given target under the given context and evidence.
+ */
+Expr getAConditionallyInitializedArgument(
+ Call call, ConditionalInitializationFunction target, Context c, Evidence e
+) {
+ result = call.getArgument(conditionallyInitializedArgument(call, target, c, e))
+}
+
+/**
+ * Gets the type signature for the functions parameters.
+ */
+private string typeSig(Function f) {
+ result =
+ concat(int i, Type pt |
+ pt = f.getParameter(i).getType()
+ |
+ pt.getUnspecifiedType().toString(), "," order by i
+ )
+}
+
+/**
+ * Holds where qualifiedName and typeSig make up the signature for the function.
+ */
+private predicate functionSignature(Function f, string qualifiedName, string typeSig) {
+ qualifiedName = f.getQualifiedName() and
+ typeSig = typeSig(f)
+}
+
+/**
+ * Gets a possible definition for the undefined function by matching the undefined function name
+ * and parameter arity with a defined function.
+ *
+ * This is useful for identifying call to target dependencies across libraries, where the libraries
+ * are never statically linked together.
+ */
+private Function getAPossibleDefinition(Function undefinedFunction) {
+ not undefinedFunction.hasDefinition() and
+ exists(string qn, string typeSig |
+ functionSignature(undefinedFunction, qn, typeSig) and functionSignature(result, qn, typeSig)
+ ) and
+ result.hasDefinition()
+}
+
+/**
+ * Helper predicate for `getTarget`, that computes possible targets of a `Call`.
+ *
+ * If there is at least one defined target after performing some simple virtual dispatch
+ * resolution, then the result is all the defined targets.
+ */
+private Function getTarget1(Call c) {
+ result = VirtualDispatch::getAViableTarget(c) and
+ result.hasDefinition()
+}
+
+/**
+ * Helper predicate for `getTarget`, that computes possible targets of a `Call`.
+ *
+ * If we can use the heuristic matching of functions to find definitions for some of the viable
+ * targets, return those.
+ */
+private Function getTarget2(Call c) {
+ not exists(getTarget1(c)) and
+ result = getAPossibleDefinition(VirtualDispatch::getAViableTarget(c))
+}
+
+/**
+ * Helper predicate for `getTarget`, that computes possible targets of a `Call`.
+ *
+ * Otherwise, the result is the undefined `Function` instances.
+ */
+private Function getTarget3(Call c) {
+ not exists(getTarget1(c)) and
+ not exists(getTarget2(c)) and
+ result = VirtualDispatch::getAViableTarget(c)
+}
+
+/**
+ * Gets a possible target for the `Call`, using the name and parameter matching if we did not associate
+ * this call with a specific definition at link or compile time, and performing simple virtual
+ * dispatch resolution.
+ */
+Function getTarget(Call c) {
+ result = getTarget1(c) or
+ result = getTarget2(c) or
+ result = getTarget3(c)
+}
+
+/**
+ * Get an access of a field on `Variable` v.
+ */
+FieldAccess getAFieldAccess(Variable v) {
+ exists(VariableAccess va, Expr qualifierExpr |
+ // Find an access of the variable, or an AddressOfExpr that has the access
+ va = v.getAnAccess() and
+ (
+ qualifierExpr = va or
+ qualifierExpr.(AddressOfExpr).getOperand() = va
+ )
+ |
+ // Direct field access
+ qualifierExpr = result.getQualifier()
+ or
+ // Nested field access
+ qualifierExpr = result.(NestedFieldAccess).getUltimateQualifier()
+ )
+}
+
+/**
+ * Gets a condition which is checked to be false by the given `ne` expression, according to this pattern:
+ * ```
+ * int a = !!result;
+ * if (!a) { // <- ne
+ * ....
+ * }
+ * ```
+ */
+private Expr getCheckedFalseCondition(NotExpr ne) {
+ exists(LocalVariable v |
+ result = v.getInitializer().getExpr().(NotExpr).getOperand().(NotExpr).getOperand() and
+ ne.getOperand() = v.getAnAccess() and
+ nonAssignedVariable(v)
+ // and not passed by val?
+ )
+}
+
+pragma[noinline]
+private predicate nonAssignedVariable(Variable v) { not exists(v.getAnAssignment()) }
diff --git a/repo-tests/codeql/cpp/ql/src/Security/CWE/CWE-457/UninitializedVariables.qll b/repo-tests/codeql/cpp/ql/src/Security/CWE/CWE-457/UninitializedVariables.qll
new file mode 100644
index 00000000000..4289f66e21d
--- /dev/null
+++ b/repo-tests/codeql/cpp/ql/src/Security/CWE/CWE-457/UninitializedVariables.qll
@@ -0,0 +1,190 @@
+/**
+ * A module for identifying conditionally initialized variables.
+ */
+
+import cpp
+import InitializationFunctions
+
+// Optimised reachability predicates
+private predicate reaches(ControlFlowNode a, ControlFlowNode b) = fastTC(successor/2)(a, b)
+
+private predicate successor(ControlFlowNode a, ControlFlowNode b) { b = a.getASuccessor() }
+
+class WhitelistedCallsConfig extends string {
+ WhitelistedCallsConfig() { this = "config" }
+
+ abstract predicate isWhitelisted(Call c);
+}
+
+abstract class WhitelistedCall extends Call {
+ override Function getTarget() { none() }
+}
+
+private predicate hasConditionalInitialization(
+ ConditionalInitializationFunction f, ConditionalInitializationCall call, LocalVariable v,
+ VariableAccess initAccess, Evidence e
+) {
+ // Ignore whitelisted calls
+ not call instanceof WhitelistedCall and
+ f = getTarget(call) and
+ initAccess = v.getAnAccess() and
+ initAccess = call.getAConditionallyInitializedArgument(f, e).(AddressOfExpr).getOperand()
+}
+
+/**
+ * A variable that can be conditionally initialized by a call.
+ */
+class ConditionallyInitializedVariable extends LocalVariable {
+ ConditionalInitializationCall call;
+ ConditionalInitializationFunction f;
+ VariableAccess initAccess;
+ Evidence e;
+
+ ConditionallyInitializedVariable() {
+ // Find a call that conditionally initializes this variable
+ hasConditionalInitialization(f, call, this, initAccess, e) and
+ // Ignore cases where the variable is assigned prior to the call
+ not reaches(getAnAssignedValue(), initAccess) and
+ // Ignore cases where the variable is assigned field-wise prior to the call.
+ not exists(FieldAccess fa |
+ exists(Assignment a |
+ fa = getAFieldAccess(this) and
+ a.getLValue() = fa
+ )
+ |
+ reaches(fa, initAccess)
+ ) and
+ // Ignore cases where the variable is assigned by a prior call to an initialization function
+ not exists(Call c |
+ getAnAccess() = getAnInitializedArgument(c).(AddressOfExpr).getOperand() and
+ reaches(c, initAccess)
+ ) and
+ /*
+ * Static local variables with constant initializers do not have the initializer expr as part of
+ * the CFG, but should always be considered as initialized, so exclude them.
+ */
+
+ not exists(getInitializer().getExpr())
+ }
+
+ /**
+ * Gets an access of the variable `v` which is not used as an lvalue, and not used as an argument
+ * to an initialization function.
+ */
+ private VariableAccess getAReadAccess() {
+ result = this.getAnAccess() and
+ // Not used as an lvalue
+ not result = any(AssignExpr a).getLValue() and
+ // Not passed to another initialization function
+ not exists(Call c, int j | j = c.getTarget().(InitializationFunction).initializedParameter() |
+ result = c.getArgument(j).(AddressOfExpr).getOperand()
+ ) and
+ // Not a pointless read
+ not result = any(ExprStmt es).getExpr()
+ }
+
+ /**
+ * Gets a read access of variable `v` that occurs after the `initializingCall`.
+ */
+ private VariableAccess getAReadAccessAfterCall(ConditionalInitializationCall initializingCall) {
+ // Variable associated with this particular call
+ call = initializingCall and
+ // Access is a meaningful read access
+ result = getAReadAccess() and
+ // Which occurs after the call
+ reaches(call, result) and
+ /*
+ * Ignore risky accesses which are arguments to calls which also include another parameter to
+ * the original call. This is an attempt to eliminate results where the "status" can be checked
+ * through another parameter that assigned as part of the original call.
+ */
+
+ not exists(Call c |
+ c.getAnArgument() = result or
+ c.getAnArgument().(AddressOfExpr).getOperand() = result
+ |
+ exists(LocalVariable lv |
+ call.getAnArgument().(AddressOfExpr).getOperand() = lv.getAnAccess() and
+ not lv = this
+ |
+ c.getAnArgument() = lv.getAnAccess()
+ )
+ )
+ }
+
+ /**
+ * Gets an access to the variable that is risky because the variable may not be initialized after
+ * the `call`, and the status of the call is never checked.
+ */
+ VariableAccess getARiskyAccessWithNoStatusCheck(
+ ConditionalInitializationFunction initializingFunction,
+ ConditionalInitializationCall initializingCall, Evidence evidence
+ ) {
+ // Variable associated with this particular call
+ call = initializingCall and
+ initializingFunction = f and
+ e = evidence and
+ result = getAReadAccessAfterCall(initializingCall) and
+ (
+ // Access is risky because status return code ignored completely
+ call instanceof ExprInVoidContext
+ or
+ // Access is risky because status return code ignored completely
+ exists(LocalVariable status | call = status.getAnAssignedValue() |
+ not exists(status.getAnAccess())
+ )
+ )
+ }
+
+ /**
+ * Gets an access to the variable that is risky because the variable may not be initialized after
+ * the `call`, and the status of the call is only checked after the risky access.
+ */
+ VariableAccess getARiskyAccessBeforeStatusCheck(
+ ConditionalInitializationFunction initializingFunction,
+ ConditionalInitializationCall initializingCall, Evidence evidence
+ ) {
+ // Variable associated with this particular call
+ call = initializingCall and
+ initializingFunction = f and
+ e = evidence and
+ result = getAReadAccessAfterCall(initializingCall) and
+ exists(LocalVariable status, Assignment a |
+ a.getRValue() = call and
+ call = status.getAnAssignedValue() and
+ // There exists a check of the status code
+ definitionUsePair(status, a, _) and
+ // And the check of the status code does not occur before the risky access
+ not exists(VariableAccess statusAccess |
+ definitionUsePair(status, a, statusAccess) and
+ reaches(statusAccess, result)
+ ) and
+ // Ignore cases where the assignment to the status code is used directly
+ a instanceof ExprInVoidContext and
+ /*
+ * Ignore risky accesses which are arguments to calls which also include the status code.
+ * If both the risky value and status code are passed to a different function, that
+ * function is responsible for checking the status code.
+ */
+
+ not exists(Call c |
+ c.getAnArgument() = result or
+ c.getAnArgument().(AddressOfExpr).getOperand() = result
+ |
+ definitionUsePair(status, a, c.getAnArgument())
+ )
+ )
+ }
+
+ /**
+ * Gets an access to the variable that is risky because the variable may not be initialized after
+ * the `call`.
+ */
+ VariableAccess getARiskyAccess(
+ ConditionalInitializationFunction initializingFunction,
+ ConditionalInitializationCall initializingCall, Evidence evidence
+ ) {
+ result = getARiskyAccessBeforeStatusCheck(initializingFunction, initializingCall, evidence) or
+ result = getARiskyAccessWithNoStatusCheck(initializingFunction, initializingCall, evidence)
+ }
+}
diff --git a/repo-tests/codeql/cpp/ql/src/Security/CWE/CWE-468/IncorrectPointerScaling.ql b/repo-tests/codeql/cpp/ql/src/Security/CWE/CWE-468/IncorrectPointerScaling.ql
new file mode 100644
index 00000000000..ce99ce1ebce
--- /dev/null
+++ b/repo-tests/codeql/cpp/ql/src/Security/CWE/CWE-468/IncorrectPointerScaling.ql
@@ -0,0 +1,48 @@
+/**
+ * @name Suspicious pointer scaling
+ * @description Implicit scaling of pointer arithmetic expressions
+ * can cause buffer overflow conditions.
+ * @kind problem
+ * @problem.severity warning
+ * @security-severity 8.8
+ * @precision medium
+ * @id cpp/suspicious-pointer-scaling
+ * @tags security
+ * external/cwe/cwe-468
+ */
+
+import IncorrectPointerScalingCommon
+
+from Expr dest, Type destType, Type sourceType, Type sourceBase, Type destBase, Location sourceLoc
+where
+ exists(pointerArithmeticParent(dest)) and
+ exprSourceType(dest, sourceType, sourceLoc) and
+ sourceBase = baseType(sourceType) and
+ destType = dest.getFullyConverted().getType() and
+ destBase = baseType(destType) and
+ destBase.getSize() != sourceBase.getSize() and
+ not dest.isInMacroExpansion() and
+ // If the source type is a `char*` or `void*` then don't
+ // produce a result, because it is likely to be a false
+ // positive.
+ not sourceBase instanceof CharType and
+ not sourceBase instanceof VoidType and
+ // Low-level pointer tricks often involve casting a struct pointer to a
+ // char pointer, then accessing it at byte offsets. For example, this can
+ // be necessary in order to resume an interrupted `write(2)`.
+ not destBase instanceof CharType and
+ // Similarly, gcc and compilers emulating it will allow void pointer
+ // arithmetic as if void were a 1-byte type
+ not destBase instanceof VoidType and
+ // Don't produce an alert if the root expression computes
+ // an offset, rather than a pointer. For example:
+ // ```
+ // (p + 1) - q
+ // ```
+ forall(Expr parent | parent = pointerArithmeticParent+(dest) |
+ parent.getFullyConverted().getUnspecifiedType() instanceof PointerType
+ )
+select dest,
+ "This pointer might have type $@ (size " + sourceBase.getSize() +
+ "), but the pointer arithmetic here is done with type " + destType + " (size " +
+ destBase.getSize() + ").", sourceLoc, sourceBase.toString()
diff --git a/repo-tests/codeql/cpp/ql/src/Security/CWE/CWE-468/IncorrectPointerScalingChar.ql b/repo-tests/codeql/cpp/ql/src/Security/CWE/CWE-468/IncorrectPointerScalingChar.ql
new file mode 100644
index 00000000000..5b7e3379929
--- /dev/null
+++ b/repo-tests/codeql/cpp/ql/src/Security/CWE/CWE-468/IncorrectPointerScalingChar.ql
@@ -0,0 +1,55 @@
+/**
+ * @name Suspicious pointer scaling to char
+ * @description Implicit scaling of pointer arithmetic expressions
+ * can cause buffer overflow conditions.
+ * @kind problem
+ * @id cpp/incorrect-pointer-scaling-char
+ * @problem.severity warning
+ * @security-severity 8.8
+ * @precision low
+ * @tags security
+ * external/cwe/cwe-468
+ */
+
+import IncorrectPointerScalingCommon
+
+from Expr dest, Type destType, Type sourceType, Type sourceBase, Type destBase, Location sourceLoc
+where
+ exists(pointerArithmeticParent(dest)) and
+ exprSourceType(dest, sourceType, sourceLoc) and
+ sourceBase = baseType(sourceType) and
+ destType = dest.getFullyConverted().getType() and
+ destBase = baseType(destType) and
+ destBase.getSize() != sourceBase.getSize() and
+ not dest.isInMacroExpansion() and
+ // If the source type is a `char*` or `void*` then don't
+ // produce a result, because it is likely to be a false
+ // positive.
+ not sourceBase instanceof CharType and
+ not sourceBase instanceof VoidType and
+ // Don't produce an alert if the dest type is `char *` but the
+ // expression contains a `sizeof`, which is probably correct. For
+ // example:
+ // ```
+ // int x[3] = {1,2,3};
+ // char* p = (char*)x;
+ // return *(int*)(p + (2 * sizeof(int)))
+ // ```
+ not (
+ destBase instanceof CharType and
+ dest.getParent().(Expr).getAChild*() instanceof SizeofOperator
+ ) and
+ // Don't produce an alert if the root expression computes
+ // an offset, rather than a pointer. For example:
+ // ```
+ // (p + 1) - q
+ // ```
+ forall(Expr parent | parent = pointerArithmeticParent+(dest) |
+ parent.getFullyConverted().getUnspecifiedType() instanceof PointerType
+ ) and
+ // Only produce alerts that are not produced by `IncorrectPointerScaling.ql`.
+ destBase instanceof CharType
+select dest,
+ "This pointer might have type $@ (size " + sourceBase.getSize() +
+ "), but the pointer arithmetic here is done with type " + destType + " (size " +
+ destBase.getSize() + ").", sourceLoc, sourceBase.toString()
diff --git a/repo-tests/codeql/cpp/ql/src/Security/CWE/CWE-468/IncorrectPointerScalingCommon.qll b/repo-tests/codeql/cpp/ql/src/Security/CWE/CWE-468/IncorrectPointerScalingCommon.qll
new file mode 100644
index 00000000000..9978d9ece0b
--- /dev/null
+++ b/repo-tests/codeql/cpp/ql/src/Security/CWE/CWE-468/IncorrectPointerScalingCommon.qll
@@ -0,0 +1,163 @@
+/**
+ * Shared utilities for the CWE-468 queries.
+ */
+
+import cpp
+
+/**
+ * Gets the type parameter of `sizeof` expression `e`.
+ */
+private Type sizeofParam(Expr e) {
+ result = e.(SizeofExprOperator).getExprOperand().getFullyConverted().getType()
+ or
+ result = e.(SizeofTypeOperator).getTypeOperand()
+}
+
+/**
+ * Holds if `e` is `sizeof` expression `sizeofExpr`, possibly multiplied
+ * by another expression, and `sizeofParam` is `sizeofExpr`'s type
+ * parameter.
+ *
+ * For example, if `e` is `4 * sizeof(T)` then `sizeofExpr` is
+ * `sizeof(T)` and `sizeofParam` is `T`.
+ */
+private predicate multiplyWithSizeof(Expr e, Expr sizeofExpr, Type sizeofParam) {
+ e = sizeofExpr and sizeofParam = sizeofParam(e).getUnspecifiedType()
+ or
+ multiplyWithSizeof(e.(MulExpr).getAnOperand(), sizeofExpr, sizeofParam)
+}
+
+/**
+ * Holds if the pointer `e` is added to the `sizeof` expression
+ * `sizeofExpr` (which may first be multiplied by another expression),
+ * and `sizeofParam` is `sizeofExpr`'s type parameter.
+ *
+ * For example, if the program contains the expression
+ * `p - (i * sizeof(T))` then `e` would be `p`, `sizeofExpr` would be
+ * `sizeof(T)`, and `sizeofParam` would be `T`.
+ */
+predicate addWithSizeof(Expr e, Expr sizeofExpr, Type sizeofParam) {
+ exists(PointerAddExpr addExpr |
+ e = addExpr.getLeftOperand() and
+ multiplyWithSizeof(addExpr.getRightOperand(), sizeofExpr, sizeofParam)
+ )
+ or
+ exists(PointerSubExpr subExpr |
+ e = subExpr.getLeftOperand() and
+ multiplyWithSizeof(subExpr.getRightOperand(), sizeofExpr, sizeofParam)
+ )
+}
+
+/**
+ * Holds if `t` is a pointer or array type.
+ */
+predicate isPointerType(Type t) {
+ t instanceof PointerType or
+ t instanceof ArrayType
+}
+
+/**
+ * Gets the base type of a pointer or array type. In the case of an array of
+ * arrays, the inner base type is returned.
+ */
+Type baseType(Type t) {
+ (
+ exists(PointerType dt |
+ dt = t.getUnspecifiedType() and
+ result = dt.getBaseType().getUnspecifiedType()
+ )
+ or
+ exists(ArrayType at |
+ at = t.getUnspecifiedType() and
+ not at.getBaseType().getUnspecifiedType() instanceof ArrayType and
+ result = at.getBaseType().getUnspecifiedType()
+ )
+ or
+ exists(ArrayType at, ArrayType at2 |
+ at = t.getUnspecifiedType() and
+ at2 = at.getBaseType().getUnspecifiedType() and
+ result = baseType(at2)
+ )
+ ) and
+ // Make sure that the type has a size and that it isn't ambiguous.
+ strictcount(result.getSize()) = 1
+}
+
+/**
+ * Holds if there is a pointer expression with type `sourceType` at
+ * location `sourceLoc` which might be the source expression for `use`.
+ *
+ * For example, with
+ * ```
+ * int intArray[5] = { 1, 2, 3, 4, 5 };
+ * char *charPointer = (char *)intArray;
+ * return *(charPointer + i);
+ * ```
+ * the array initializer on the first line is a source expression
+ * for the use of `charPointer` on the third line.
+ *
+ * The source will either be an `Expr` or a `Parameter`.
+ */
+predicate exprSourceType(Expr use, Type sourceType, Location sourceLoc) {
+ // Reaching definitions.
+ if exists(SsaDefinition def, StackVariable v | use = def.getAUse(v))
+ then
+ exists(SsaDefinition def, StackVariable v | use = def.getAUse(v) |
+ defSourceType(def, v, sourceType, sourceLoc)
+ )
+ else
+ // Pointer arithmetic
+ if use instanceof PointerAddExpr
+ then exprSourceType(use.(PointerAddExpr).getLeftOperand(), sourceType, sourceLoc)
+ else
+ if use instanceof PointerSubExpr
+ then exprSourceType(use.(PointerSubExpr).getLeftOperand(), sourceType, sourceLoc)
+ else
+ if use instanceof AddExpr
+ then exprSourceType(use.(AddExpr).getAnOperand(), sourceType, sourceLoc)
+ else
+ if use instanceof SubExpr
+ then exprSourceType(use.(SubExpr).getAnOperand(), sourceType, sourceLoc)
+ else
+ if use instanceof CrementOperation
+ then exprSourceType(use.(CrementOperation).getOperand(), sourceType, sourceLoc)
+ else (
+ // Conversions are not in the AST, so ignore them.
+ not use instanceof Conversion and
+ // Source expressions
+ sourceType = use.getUnspecifiedType() and
+ isPointerType(sourceType) and
+ sourceLoc = use.getLocation()
+ )
+}
+
+/**
+ * Holds if there is a pointer expression with type `sourceType` at
+ * location `sourceLoc` which might define the value of `v` at `def`.
+ */
+predicate defSourceType(SsaDefinition def, StackVariable v, Type sourceType, Location sourceLoc) {
+ exprSourceType(def.getDefiningValue(v), sourceType, sourceLoc)
+ or
+ defSourceType(def.getAPhiInput(v), v, sourceType, sourceLoc)
+ or
+ exists(Parameter p |
+ p = v and
+ def.definedByParameter(p) and
+ sourceType = p.getUnspecifiedType() and
+ strictcount(p.getType()) = 1 and
+ isPointerType(sourceType) and
+ sourceLoc = p.getLocation()
+ )
+}
+
+/**
+ * Gets the pointer arithmetic expression that `e` is (directly) used
+ * in, if any.
+ *
+ * For example, in `(char*)(p + 1)`, for `p`, ths result is `p + 1`.
+ */
+Expr pointerArithmeticParent(Expr e) {
+ e = result.(PointerAddExpr).getLeftOperand() or
+ e = result.(PointerSubExpr).getLeftOperand() or
+ e = result.(PointerDiffExpr).getAnOperand()
+}
diff --git a/repo-tests/codeql/cpp/ql/src/Security/CWE/CWE-468/IncorrectPointerScalingVoid.ql b/repo-tests/codeql/cpp/ql/src/Security/CWE/CWE-468/IncorrectPointerScalingVoid.ql
new file mode 100644
index 00000000000..460c98bf1e3
--- /dev/null
+++ b/repo-tests/codeql/cpp/ql/src/Security/CWE/CWE-468/IncorrectPointerScalingVoid.ql
@@ -0,0 +1,29 @@
+/**
+ * @name Suspicious pointer scaling to void
+ * @description Implicit scaling of pointer arithmetic expressions
+ * can cause buffer overflow conditions.
+ * @kind problem
+ * @problem.severity warning
+ * @security-severity 8.8
+ * @precision medium
+ * @id cpp/suspicious-pointer-scaling-void
+ * @tags security
+ * external/cwe/cwe-468
+ */
+
+import IncorrectPointerScalingCommon
+
+from Expr dest, Type destType, Type sourceType, Type sourceBase, Type destBase, Location sourceLoc
+where
+ exists(pointerArithmeticParent(dest)) and
+ exprSourceType(dest, sourceType, sourceLoc) and
+ sourceBase = baseType(sourceType) and
+ destType = dest.getFullyConverted().getType() and
+ destBase = baseType(destType) and
+ destBase.getSize() != sourceBase.getSize() and
+ not dest.isInMacroExpansion() and
+ // Only produce alerts that are not produced by `IncorrectPointerScaling.ql`.
+ destBase instanceof VoidType
+select dest,
+ "This pointer might have type $@ (size " + sourceBase.getSize() +
+ "), but the pointer arithmetic here is done with type void", sourceLoc, sourceBase.toString()
diff --git a/repo-tests/codeql/cpp/ql/src/Security/CWE/CWE-468/SuspiciousAddWithSizeof.ql b/repo-tests/codeql/cpp/ql/src/Security/CWE/CWE-468/SuspiciousAddWithSizeof.ql
new file mode 100644
index 00000000000..4ac00fc42c6
--- /dev/null
+++ b/repo-tests/codeql/cpp/ql/src/Security/CWE/CWE-468/SuspiciousAddWithSizeof.ql
@@ -0,0 +1,33 @@
+/**
+ * @name Suspicious add with sizeof
+ * @description Explicitly scaled pointer arithmetic expressions
+ * can cause buffer overflow conditions if the offset is also
+ * implicitly scaled.
+ * @kind problem
+ * @problem.severity warning
+ * @security-severity 8.8
+ * @precision high
+ * @id cpp/suspicious-add-sizeof
+ * @tags security
+ * external/cwe/cwe-468
+ */
+
+import cpp
+import IncorrectPointerScalingCommon
+
+private predicate isCharSzPtrExpr(Expr e) {
+ exists(PointerType pt | pt = e.getFullyConverted().getUnspecifiedType() |
+ pt.getBaseType() instanceof CharType or
+ pt.getBaseType() instanceof VoidType
+ )
+}
+
+from Expr sizeofExpr, Expr e
+where
+ // If we see an addWithSizeof then we expect the type of
+ // the pointer expression to be `char*` or `void*`. Otherwise it
+ // is probably a mistake.
+ addWithSizeof(e, sizeofExpr, _) and not isCharSzPtrExpr(e)
+select sizeofExpr,
+ "Suspicious sizeof offset in a pointer arithmetic expression. The type of the pointer is $@.",
+ e.getFullyConverted().getType() as t, t.toString()
diff --git a/repo-tests/codeql/cpp/ql/src/Security/CWE/CWE-497/ExposedSystemData.ql b/repo-tests/codeql/cpp/ql/src/Security/CWE/CWE-497/ExposedSystemData.ql
new file mode 100644
index 00000000000..bbe3b0805e1
--- /dev/null
+++ b/repo-tests/codeql/cpp/ql/src/Security/CWE/CWE-497/ExposedSystemData.ql
@@ -0,0 +1,435 @@
+/**
+ * @name Exposure of system data to an unauthorized control sphere
+ * @description Exposing system data or debugging information helps
+ * an adversary learn about the system and form an
+ * attack plan.
+ * @kind problem
+ * @problem.severity warning
+ * @security-severity 6.5
+ * @precision medium
+ * @id cpp/system-data-exposure
+ * @tags security
+ * external/cwe/cwe-497
+ */
+
+import cpp
+import semmle.code.cpp.commons.Environment
+import semmle.code.cpp.security.OutputWrite
+
+/**
+ * An element that should not be exposed to an adversary.
+ */
+abstract class SystemData extends Element {
+ /**
+ * Gets an expression that is part of this `SystemData`.
+ */
+ abstract Expr getAnExpr();
+
+ /**
+ * Gets an expression whose value originates from, or is used by,
+ * this `SystemData`.
+ */
+ Expr getAnExprIndirect() {
+ // direct SystemData
+ result = getAnExpr() or
+ // flow via global or member variable (conservative approximation)
+ result = getAnAffectedVar().getAnAccess() or
+ // flow via stack variable
+ definitionUsePair(_, getAnExprIndirect(), result) or
+ useUsePair(_, getAnExprIndirect(), result) or
+ useUsePair(_, result, getAnExprIndirect()) or
+ // flow from assigned value to assignment expression
+ result.(AssignExpr).getRValue() = getAnExprIndirect()
+ }
+
+ /**
+ * Gets a global or member variable that may be affected by this system
+ * data (conservative approximation).
+ */
+ private Variable getAnAffectedVar() {
+ (
+ result.getAnAssignedValue() = this.getAnExprIndirect() or
+ result.getAnAccess() = this.getAnExprIndirect()
+ ) and
+ not result instanceof LocalScopeVariable
+ }
+}
+
+/**
+ * Data originating from the environment.
+ */
+class EnvData extends SystemData {
+ EnvData() { this instanceof EnvironmentRead }
+
+ override Expr getAnExpr() { result = this }
+}
+
+/**
+ * Data originating from a call to `mysql_get_client_info()`.
+ */
+class SQLClientInfo extends SystemData {
+ SQLClientInfo() { this.(FunctionCall).getTarget().hasName("mysql_get_client_info") }
+
+ override Expr getAnExpr() { result = this }
+}
+
+private predicate sqlConnectInfo(FunctionCall source, VariableAccess use) {
+ (
+ source.getTarget().hasName("mysql_connect") or
+ source.getTarget().hasName("mysql_real_connect")
+ ) and
+ use = source.getAnArgument()
+}
+
+/**
+ * Data passed into an SQL connect function.
+ */
+class SQLConnectInfo extends SystemData {
+ SQLConnectInfo() { sqlConnectInfo(this, _) }
+
+ override Expr getAnExpr() { sqlConnectInfo(this, result) }
+}
+
+private predicate posixSystemInfo(FunctionCall source, Element use) {
+ // long sysconf(int name)
+ // - various OS / system values and limits
+ source.getTarget().hasName("sysconf") and
+ use = source
+ or
+ // size_t confstr(int name, char *buf, size_t len)
+ // - various OS / system strings, such as the libc version
+ // int statvfs(const char *__path, struct statvfs *__buf)
+ // int fstatvfs(int __fd, struct statvfs *__buf)
+ // - various filesystem parameters
+ // int uname(struct utsname *buf)
+ // - OS name and version
+ (
+ source.getTarget().hasName("confstr") or
+ source.getTarget().hasName("statvfs") or
+ source.getTarget().hasName("fstatvfs") or
+ source.getTarget().hasName("uname")
+ ) and
+ use = source.getArgument(1)
+}
+
+/**
+ * Data obtained from a POSIX system information call.
+ */
+class PosixSystemInfo extends SystemData {
+ PosixSystemInfo() { posixSystemInfo(this, _) }
+
+ override Expr getAnExpr() { posixSystemInfo(this, result) }
+}
+
+private predicate posixPWInfo(FunctionCall source, Element use) {
+ // struct passwd *getpwnam(const char *name);
+ // struct passwd *getpwuid(uid_t uid);
+ // struct passwd *getpwent(void);
+ // struct group *getgrnam(const char *name);
+ // struct group *getgrgid(gid_t);
+ // struct group *getgrent(void);
+ (
+ source.getTarget().hasName("getpwnam") or
+ source.getTarget().hasName("getpwuid") or
+ source.getTarget().hasName("getpwent") or
+ source.getTarget().hasName("getgrnam") or
+ source.getTarget().hasName("getgrgid") or
+ source.getTarget().hasName("getgrent")
+ ) and
+ use = source
+ or
+ // int getpwnam_r(const char *name, struct passwd *pwd,
+ // char *buf, size_t buflen, struct passwd **result);
+ // int getpwuid_r(uid_t uid, struct passwd *pwd,
+ // char *buf, size_t buflen, struct passwd **result);
+ // int getgrgid_r(gid_t gid, struct group *grp,
+ // char *buf, size_t buflen, struct group **result);
+ // int getgrnam_r(const char *name, struct group *grp,
+ // char *buf, size_t buflen, struct group **result);
+ (
+ source.getTarget().hasName("getpwnam_r") or
+ source.getTarget().hasName("getpwuid_r") or
+ source.getTarget().hasName("getgrgid_r") or
+ source.getTarget().hasName("getgrnam_r")
+ ) and
+ (
+ use = source.getArgument(1) or
+ use = source.getArgument(2) or
+ use = source.getArgument(4)
+ )
+ or
+ // int getpwent_r(struct passwd *pwd, char *buffer, size_t bufsize,
+ // struct passwd **result);
+ // int getgrent_r(struct group *gbuf, char *buf,
+ // size_t buflen, struct group **gbufp);
+ (
+ source.getTarget().hasName("getpwent_r") or
+ source.getTarget().hasName("getgrent_r")
+ ) and
+ (
+ use = source.getArgument(0) or
+ use = source.getArgument(1) or
+ use = source.getArgument(3)
+ )
+}
+
+/**
+ * Data obtained from a POSIX user/password/group database information call.
+ */
+class PosixPWInfo extends SystemData {
+ PosixPWInfo() { posixPWInfo(this, _) }
+
+ override Expr getAnExpr() { posixPWInfo(this, result) }
+}
+
+private predicate windowsSystemInfo(FunctionCall source, Element use) {
+ // DWORD WINAPI GetVersion(void);
+ source.getTarget().hasGlobalName("GetVersion") and
+ use = source
+ or
+ // BOOL WINAPI GetVersionEx(_Inout_ LPOSVERSIONINFO lpVersionInfo);
+ // void WINAPI GetSystemInfo(_Out_ LPSYSTEM_INFO lpSystemInfo);
+ // void WINAPI GetNativeSystemInfo(_Out_ LPSYSTEM_INFO lpSystemInfo);
+ (
+ source.getTarget().hasGlobalName("GetVersionEx") or
+ source.getTarget().hasGlobalName("GetVersionExA") or
+ source.getTarget().hasGlobalName("GetVersionExW") or
+ source.getTarget().hasGlobalName("GetSystemInfo") or
+ source.getTarget().hasGlobalName("GetNativeSystemInfo")
+ ) and
+ use = source.getArgument(0)
+}
+
+/**
+ * Data obtained from a Windows system information call.
+ */
+class WindowsSystemInfo extends SystemData {
+ WindowsSystemInfo() { windowsSystemInfo(this, _) }
+
+ override Expr getAnExpr() { windowsSystemInfo(this, result) }
+}
+
+private predicate windowsFolderPath(FunctionCall source, Element use) {
+ // BOOL SHGetSpecialFolderPath(
+ // HWND hwndOwner,
+ // _Out_ LPTSTR lpszPath,
+ // _In_ int csidl,
+ // _In_ BOOL fCreate
+ // );
+ (
+ source.getTarget().hasGlobalName("SHGetSpecialFolderPath") or
+ source.getTarget().hasGlobalName("SHGetSpecialFolderPathA") or
+ source.getTarget().hasGlobalName("SHGetSpecialFolderPathW")
+ ) and
+ use = source.getArgument(1)
+ or
+ // HRESULT SHGetKnownFolderPath(
+ // _In_ REFKNOWNFOLDERID rfid,
+ // _In_ DWORD dwFlags,
+ // _In_opt_ HANDLE hToken,
+ // _Out_ PWSTR *ppszPath
+ // );
+ source.getTarget().hasGlobalName("SHGetKnownFolderPath") and
+ use = source.getArgument(3)
+ or
+ // HRESULT SHGetFolderPath(
+ // _In_ HWND hwndOwner,
+ // _In_ int nFolder,
+ // _In_ HANDLE hToken,
+ // _In_ DWORD dwFlags,
+ // _Out_ LPTSTR pszPath
+ // );
+ (
+ source.getTarget().hasGlobalName("SHGetFolderPath") or
+ source.getTarget().hasGlobalName("SHGetFolderPathA") or
+ source.getTarget().hasGlobalName("SHGetFolderPathW")
+ ) and
+ use = source.getArgument(4)
+ or
+ // HRESULT SHGetFolderPathAndSubDir(
+ // _In_ HWND hwnd,
+ // _In_ int csidl,
+ // _In_ HANDLE hToken,
+ // _In_ DWORD dwFlags,
+ // _In_ LPCTSTR pszSubDir,
+ // _Out_ LPTSTR pszPath
+ // );
+ (
+ source.getTarget().hasGlobalName("SHGetFolderPathAndSubDir") or
+ source.getTarget().hasGlobalName("SHGetFolderPathAndSubDirA") or
+ source.getTarget().hasGlobalName("SHGetFolderPathAndSubDirW")
+ ) and
+ use = source.getArgument(5)
+}
+
+/**
+ * Data obtained about Windows special paths (for example, the
+ * location of `System32`).
+ */
+class WindowsFolderPath extends SystemData {
+ WindowsFolderPath() { windowsFolderPath(this, _) }
+
+ override Expr getAnExpr() { windowsFolderPath(this, result) }
+}
+
+private predicate logonUser(FunctionCall source, VariableAccess use) {
+ (
+ source.getTarget().hasGlobalName("LogonUser") or
+ source.getTarget().hasGlobalName("LogonUserW") or
+ source.getTarget().hasGlobalName("LogonUserA")
+ ) and
+ use = source.getAnArgument()
+}
+
+/**
+ * Data passed into a `LogonUser` (Windows) function.
+ */
+class LogonUser extends SystemData {
+ LogonUser() { logonUser(this, _) }
+
+ override Expr getAnExpr() { logonUser(this, result) }
+}
+
+private predicate regQuery(FunctionCall source, VariableAccess use) {
+ // LONG WINAPI RegQueryValue(
+ // _In_ HKEY hKey,
+ // _In_opt_ LPCTSTR lpSubKey,
+ // _Out_opt_ LPTSTR lpValue,
+ // _Inout_opt_ PLONG lpcbValue
+ // );
+ (
+ source.getTarget().hasGlobalName("RegQueryValue") or
+ source.getTarget().hasGlobalName("RegQueryValueA") or
+ source.getTarget().hasGlobalName("RegQueryValueW")
+ ) and
+ use = source.getArgument(2)
+ or
+ // LONG WINAPI RegQueryMultipleValues(
+ // _In_ HKEY hKey,
+ // _Out_ PVALENT val_list,
+ // _In_ DWORD num_vals,
+ // _Out_opt_ LPTSTR lpValueBuf,
+ // _Inout_opt_ LPDWORD ldwTotsize
+ // );
+ (
+ source.getTarget().hasGlobalName("RegQueryMultipleValues") or
+ source.getTarget().hasGlobalName("RegQueryMultipleValuesA") or
+ source.getTarget().hasGlobalName("RegQueryMultipleValuesW")
+ ) and
+ use = source.getArgument(3)
+ or
+ // LONG WINAPI RegQueryValueEx(
+ // _In_ HKEY hKey,
+ // _In_opt_ LPCTSTR lpValueName,
+ // _Reserved_ LPDWORD lpReserved,
+ // _Out_opt_ LPDWORD lpType,
+ // _Out_opt_ LPBYTE lpData,
+ // _Inout_opt_ LPDWORD lpcbData
+ // );
+ (
+ source.getTarget().hasGlobalName("RegQueryValueEx") or
+ source.getTarget().hasGlobalName("RegQueryValueExA") or
+ source.getTarget().hasGlobalName("RegQueryValueExW")
+ ) and
+ use = source.getArgument(4)
+ or
+ // LONG WINAPI RegGetValue(
+ // _In_ HKEY hkey,
+ // _In_opt_ LPCTSTR lpSubKey,
+ // _In_opt_ LPCTSTR lpValue,
+ // _In_opt_ DWORD dwFlags,
+ // _Out_opt_ LPDWORD pdwType,
+ // _Out_opt_ PVOID pvData,
+ // _Inout_opt_ LPDWORD pcbData
+ // );
+ (
+ source.getTarget().hasGlobalName("RegGetValue") or
+ source.getTarget().hasGlobalName("RegGetValueA") or
+ source.getTarget().hasGlobalName("RegGetValueW")
+ ) and
+ use = source.getArgument(5)
+}
+
+/**
+ * Data read from the Windows registry.
+ */
+class RegQuery extends SystemData {
+ RegQuery() { regQuery(this, _) }
+
+ override Expr getAnExpr() { regQuery(this, result) }
+}
+
+/**
+ * Somewhere data is output.
+ */
+abstract class DataOutput extends Element {
+ /**
+ * Get an expression containing data that is output.
+ */
+ abstract Expr getASource();
+}
+
+/**
+ * Data that is output via standard output or standard error.
+ */
+class StandardOutput extends DataOutput {
+ StandardOutput() { this instanceof OutputWrite }
+
+ override Expr getASource() { result = this.(OutputWrite).getASource() }
+}
+
+private predicate socketCallOrIndirect(FunctionCall call) {
+ // direct socket call
+ // int socket(int domain, int type, int protocol);
+ call.getTarget().getName() = "socket"
+ or
+ exists(ReturnStmt rtn |
+ // indirect socket call
+ call.getTarget() = rtn.getEnclosingFunction() and
+ (
+ socketCallOrIndirect(rtn.getExpr()) or
+ socketCallOrIndirect(rtn.getExpr().(VariableAccess).getTarget().getAnAssignedValue())
+ )
+ )
+}
+
+private predicate socketFileDescriptor(Expr e) {
+ exists(Variable var, FunctionCall socket |
+ socketCallOrIndirect(socket) and
+ var.getAnAssignedValue() = socket and
+ e = var.getAnAccess()
+ )
+}
+
+private predicate socketOutput(FunctionCall call, Expr data) {
+ (
+ // ssize_t send(int sockfd, const void *buf, size_t len, int flags);
+ // ssize_t sendto(int sockfd, const void *buf, size_t len, int flags,
+ // const struct sockaddr *dest_addr, socklen_t addrlen);
+ // ssize_t sendmsg(int sockfd, const struct msghdr *msg, int flags);
+ // int write(int handle, void *buffer, int nbyte);
+ (
+ call.getTarget().hasGlobalName("send") or
+ call.getTarget().hasGlobalName("sendto") or
+ call.getTarget().hasGlobalName("sendmsg") or
+ call.getTarget().hasGlobalName("write")
+ ) and
+ data = call.getArgument(1) and
+ socketFileDescriptor(call.getArgument(0))
+ )
+}
+
+/**
+ * Data that is output via a socket.
+ */
+class SocketOutput extends DataOutput {
+ SocketOutput() { socketOutput(this, _) }
+
+ override Expr getASource() { socketOutput(this, result) }
+}
+
+from SystemData sd, DataOutput ow
+where
+ sd.getAnExprIndirect() = ow.getASource() or
+ sd.getAnExprIndirect() = ow.getASource().(Expr).getAChild*()
+select ow, "This operation exposes system data from $@.", sd, sd.toString()
diff --git a/repo-tests/codeql/cpp/ql/src/Security/CWE/CWE-570/IncorrectAllocationErrorHandling.ql b/repo-tests/codeql/cpp/ql/src/Security/CWE/CWE-570/IncorrectAllocationErrorHandling.ql
new file mode 100644
index 00000000000..357e6375570
--- /dev/null
+++ b/repo-tests/codeql/cpp/ql/src/Security/CWE/CWE-570/IncorrectAllocationErrorHandling.ql
@@ -0,0 +1,254 @@
+/**
+ * @name Incorrect allocation-error handling
+ * @description Mixing up the failure conditions of 'operator new' and 'operator new(std::nothrow)' can result in unexpected behavior.
+ * @kind problem
+ * @id cpp/incorrect-allocation-error-handling
+ * @problem.severity warning
+ * @security-severity 7.5
+ * @precision medium
+ * @tags correctness
+ * security
+ * external/cwe/cwe-570
+ * external/cwe/cwe-252
+ * external/cwe/cwe-755
+ */
+
+import cpp
+import semmle.code.cpp.valuenumbering.GlobalValueNumbering
+import semmle.code.cpp.controlflow.Guards
+
+/**
+ * A C++ `delete` or `delete[]` expression.
+ */
+class DeleteOrDeleteArrayExpr extends Expr {
+ DeleteOrDeleteArrayExpr() { this instanceof DeleteExpr or this instanceof DeleteArrayExpr }
+
+ DeallocationFunction getDeallocator() {
+ result = [this.(DeleteExpr).getDeallocator(), this.(DeleteArrayExpr).getDeallocator()]
+ }
+
+ Destructor getDestructor() {
+ result = [this.(DeleteExpr).getDestructor(), this.(DeleteArrayExpr).getDestructor()]
+ }
+}
+
+/** Gets the `Constructor` invoked when `newExpr` allocates memory. */
+Constructor getConstructorForAllocation(NewOrNewArrayExpr newExpr) {
+ result.getACallToThisFunction() = newExpr.getInitializer()
+}
+
+/** Gets the `Destructor` invoked when `deleteExpr` deallocates memory. */
+Destructor getDestructorForDeallocation(DeleteOrDeleteArrayExpr deleteExpr) {
+ result = deleteExpr.getDestructor()
+}
+
+/** Holds if the evaluation of `newExpr` may throw an exception. */
+predicate newMayThrow(NewOrNewArrayExpr newExpr) {
+ functionMayThrow(newExpr.getAllocator()) or
+ functionMayThrow(getConstructorForAllocation(newExpr))
+}
+
+/** Holds if the evaluation of `deleteExpr` may throw an exception. */
+predicate deleteMayThrow(DeleteOrDeleteArrayExpr deleteExpr) {
+ functionMayThrow(deleteExpr.getDeallocator()) or
+ functionMayThrow(getDestructorForDeallocation(deleteExpr))
+}
+
+/**
+ * Holds if the function may throw an exception when called. That is, if the body of the function looks
+ * like it might throw an exception, and the function does not have a `noexcept` or `throw()` specifier.
+ */
+predicate functionMayThrow(Function f) {
+ (not exists(f.getBlock()) or stmtMayThrow(f.getBlock())) and
+ not f.isNoExcept() and
+ not f.isNoThrow()
+}
+
+/** Holds if the evaluation of `stmt` may throw an exception. */
+predicate stmtMayThrow(Stmt stmt) {
+ stmtMayThrow(stmt.(BlockStmt).getAStmt())
+ or
+ convertedExprMayThrow(stmt.(ExprStmt).getExpr())
+ or
+ convertedExprMayThrow(stmt.(DeclStmt).getADeclaration().(Variable).getInitializer().getExpr())
+ or
+ exists(IfStmt ifStmt | ifStmt = stmt |
+ convertedExprMayThrow(ifStmt.getCondition()) or
+ stmtMayThrow([ifStmt.getThen(), ifStmt.getElse()])
+ )
+ or
+ exists(ConstexprIfStmt constIfStmt | constIfStmt = stmt |
+ stmtMayThrow([constIfStmt.getThen(), constIfStmt.getElse()])
+ )
+ or
+ exists(Loop loop | loop = stmt |
+ convertedExprMayThrow(loop.getCondition()) or
+ stmtMayThrow(loop.getStmt())
+ )
+ or
+ // The case for `Loop` already checked the condition and the statement.
+ convertedExprMayThrow(stmt.(RangeBasedForStmt).getUpdate())
+ or
+ // The case for `Loop` already checked the condition and the statement.
+ exists(ForStmt forStmt | forStmt = stmt |
+ stmtMayThrow(forStmt.getInitialization())
+ or
+ convertedExprMayThrow(forStmt.getUpdate())
+ )
+ or
+ exists(SwitchStmt switchStmt | switchStmt = stmt |
+ convertedExprMayThrow(switchStmt.getExpr()) or
+ stmtMayThrow(switchStmt.getStmt())
+ )
+ or
+ // NOTE: We don't include `TryStmt` as those exceptions are not "observable" outside the function.
+ stmtMayThrow(stmt.(Handler).getBlock())
+ or
+ convertedExprMayThrow(stmt.(CoReturnStmt).getExpr())
+ or
+ convertedExprMayThrow(stmt.(ReturnStmt).getExpr())
+}
+
+/** Holds if the evaluation of `e` (including conversions) may throw an exception. */
+predicate convertedExprMayThrow(Expr e) {
+ exprMayThrow(e)
+ or
+ convertedExprMayThrow(e.getConversion())
+}
+
+/** Holds if the evaluation of `e` may throw an exception. */
+predicate exprMayThrow(Expr e) {
+ e instanceof DynamicCast
+ or
+ e instanceof TypeidOperator
+ or
+ e instanceof ThrowExpr
+ or
+ newMayThrow(e)
+ or
+ deleteMayThrow(e)
+ or
+ convertedExprMayThrow(e.(UnaryOperation).getOperand())
+ or
+ exists(BinaryOperation binOp | binOp = e |
+ convertedExprMayThrow([binOp.getLeftOperand(), binOp.getRightOperand()])
+ )
+ or
+ exists(Assignment assign | assign = e |
+ convertedExprMayThrow([assign.getLValue(), assign.getRValue()])
+ )
+ or
+ exists(CommaExpr comma | comma = e |
+ convertedExprMayThrow([comma.getLeftOperand(), comma.getRightOperand()])
+ )
+ or
+ exists(StmtExpr stmtExpr | stmtExpr = e |
+ convertedExprMayThrow(stmtExpr.getResultExpr()) or
+ stmtMayThrow(stmtExpr.getStmt())
+ )
+ or
+ convertedExprMayThrow(e.(Conversion).getExpr())
+ or
+ exists(FunctionCall fc | fc = e |
+ not exists(fc.getTarget()) or
+ functionMayThrow(fc.getTarget()) or
+ convertedExprMayThrow(fc.getAnArgument())
+ )
+}
+
+/** The `std::nothrow_t` class and its `bsl` variant. */
+class NoThrowType extends Struct {
+ NoThrowType() { this.hasGlobalOrStdOrBslName("nothrow_t") }
+}
+
+/** An allocator that might throw an exception. */
+class ThrowingAllocator extends Function {
+ ThrowingAllocator() {
+ exists(NewOrNewArrayExpr newExpr |
+ newExpr.getAllocator() = this and
+ // Exclude custom overloads of `operator new`.
+ // What we really want here is to only include the functions that satisfy `functionMayThrow`, but
+ // there seems to be examples where `throw()` isn't extracted (which causes false positives).
+ //
+ // As noted in the QLDoc for `Function.getAllocatorCall`:
+ //
+ // "As a rule of thumb, there will be an allocator call precisely when the type
+ // being allocated has a custom `operator new`, or when an argument list appears
+ // after the `new` keyword and before the name of the type being allocated.
+ //
+ // In particular note that uses of placement-new and nothrow-new will have an
+ // allocator call."
+ //
+ // So we say an allocator might throw if:
+ // 1. It doesn't have a body
+ // 2. there isn't a parameter with type `nothrow_t`
+ // 3. the allocator isn't marked with `throw()` or `noexcept`.
+ not exists(this.getBlock()) and
+ not exists(Parameter p | p = this.getAParameter() |
+ p.getUnspecifiedType().stripType() instanceof NoThrowType
+ ) and
+ not this.isNoExcept() and
+ not this.isNoThrow()
+ )
+ }
+}
+
+/** The `std::bad_alloc` exception and its `bsl` variant. */
+class BadAllocType extends Class {
+ BadAllocType() { this.hasGlobalOrStdOrBslName("bad_alloc") }
+}
+
+/**
+ * A catch block that catches a `std::bad_alloc` (or any of its superclasses), or a catch
+ * block that catches every exception (i.e., `catch(...)`).
+ */
+class BadAllocCatchBlock extends CatchBlock {
+ BadAllocCatchBlock() {
+ this.getParameter().getUnspecifiedType().stripType() =
+ any(BadAllocType badAlloc).getABaseClass*()
+ or
+ not exists(this.getParameter())
+ }
+}
+
+/**
+ * Holds if `newExpr` is embedded in a `try` statement with a catch block `catchBlock` that
+ * catches a `std::bad_alloc` exception, but nothing in the `try` block (including the `newExpr`)
+ * will throw that exception.
+ */
+predicate noThrowInTryBlock(NewOrNewArrayExpr newExpr, BadAllocCatchBlock catchBlock) {
+ exists(TryStmt try |
+ not stmtMayThrow(try.getStmt()) and
+ try.getACatchClause() = catchBlock and
+ newExpr.getEnclosingBlock().getEnclosingBlock*() = try.getStmt()
+ )
+}
+
+/**
+ * Holds if `newExpr` is handles allocation failures by throwing an exception, yet
+ * the guard condition `guard` compares the result of `newExpr` to a null value.
+ */
+predicate nullCheckInThrowingNew(NewOrNewArrayExpr newExpr, GuardCondition guard) {
+ newExpr.getAllocator() instanceof ThrowingAllocator and
+ (
+ // Handles null comparisons.
+ guard.ensuresEq(globalValueNumber(newExpr).getAnExpr(), any(NullValue null), _, _, _)
+ or
+ // Handles `if(ptr)` and `if(!ptr)` cases.
+ guard = globalValueNumber(newExpr).getAnExpr()
+ )
+}
+
+from NewOrNewArrayExpr newExpr, Element element, string msg, string elementString
+where
+ not newExpr.isFromUninstantiatedTemplate(_) and
+ (
+ noThrowInTryBlock(newExpr, element) and
+ msg = "This allocation cannot throw. $@ is unnecessary." and
+ elementString = "This catch block"
+ or
+ nullCheckInThrowingNew(newExpr, element) and
+ msg = "This allocation cannot return null. $@ is unnecessary." and
+ elementString = "This check"
+ )
+select newExpr, msg, element, elementString
diff --git a/repo-tests/codeql/cpp/ql/src/Security/CWE/CWE-676/DangerousFunctionOverflow.ql b/repo-tests/codeql/cpp/ql/src/Security/CWE/CWE-676/DangerousFunctionOverflow.ql
new file mode 100644
index 00000000000..35955665a9e
--- /dev/null
+++ b/repo-tests/codeql/cpp/ql/src/Security/CWE/CWE-676/DangerousFunctionOverflow.ql
@@ -0,0 +1,21 @@
+/**
+ * @name Use of dangerous function
+ * @description Use of a standard library function that does not guard against buffer overflow.
+ * @kind problem
+ * @problem.severity error
+ * @security-severity 10.0
+ * @precision very-high
+ * @id cpp/dangerous-function-overflow
+ * @tags reliability
+ * security
+ * external/cwe/cwe-242
+ * external/cwe/cwe-676
+ */
+
+import cpp
+
+from FunctionCall call, Function target
+where
+ call.getTarget() = target and
+ target.hasGlobalOrStdName("gets")
+select call, "gets does not guard against buffer overflow"
diff --git a/repo-tests/codeql/cpp/ql/src/Security/CWE/CWE-676/DangerousUseOfCin.ql b/repo-tests/codeql/cpp/ql/src/Security/CWE/CWE-676/DangerousUseOfCin.ql
new file mode 100644
index 00000000000..07a7ef1de9b
--- /dev/null
+++ b/repo-tests/codeql/cpp/ql/src/Security/CWE/CWE-676/DangerousUseOfCin.ql
@@ -0,0 +1,237 @@
+/**
+ * @name Dangerous use of 'cin'
+ * @description Using `cin` without specifying the length of the input
+ * may be dangerous.
+ * @kind problem
+ * @problem.severity error
+ * @security-severity 10.0
+ * @precision high
+ * @id cpp/dangerous-cin
+ * @tags reliability
+ * security
+ * external/cwe/cwe-676
+ */
+
+import cpp
+
+/**
+ * A C/C++ `char*` or `wchar_t*` type.
+ */
+class AnyCharPointerType extends PointerType {
+ AnyCharPointerType() {
+ this.getBaseType().getUnderlyingType() instanceof CharType or
+ this.getBaseType().getUnderlyingType() instanceof Wchar_t
+ }
+}
+
+/**
+ * A C/C++ `char[]` or `wchar_t[]` type.
+ */
+class AnyCharArrayType extends ArrayType {
+ AnyCharArrayType() {
+ this.getBaseType().getUnderlyingType() instanceof CharType or
+ this.getBaseType().getUnderlyingType() instanceof Wchar_t
+ }
+}
+
+/**
+ * A C++ `std::basic_string` type (the underlying type of `std::string`
+ * and `std::wstring`).
+ */
+class AnyStdStringType extends Type {
+ AnyStdStringType() {
+ exists(Namespace std |
+ std.getName() = "std" and
+ std.getADeclaration() = this
+ ) and
+ this.getName().matches("basic\\_string<%")
+ }
+}
+
+/**
+ * A C++ `std::basic_ifstream` type (the underlying type of
+ * `std::ifstream` and `std::wifstream`).
+ */
+class IFStream extends Type {
+ IFStream() {
+ exists(Namespace std |
+ std.getName() = "std" and
+ std.getADeclaration() = this
+ ) and
+ this.getName().matches("basic\\_ifstream<%")
+ }
+}
+
+/**
+ * The variable `std::cin` or `std::wcin`.
+ */
+class CinVariable extends NamespaceVariable {
+ CinVariable() {
+ getName() = ["cin", "wcin"] and
+ getNamespace().getName() = "std"
+ }
+}
+
+/** A call to `std::operator>>`. */
+class OperatorRShiftCall extends FunctionCall {
+ OperatorRShiftCall() {
+ getTarget().getNamespace().getName() = "std" and
+ getTarget().hasName("operator>>")
+ }
+
+ /*
+ * This is complicated by the fact this overload can be made
+ * in two ways:
+ * - as a member of the `std::istream` class, with one parameter.
+ * - as an independent function, with two parameters.
+ */
+
+ Expr getSource() {
+ if getTarget() instanceof MemberFunction
+ then result = getQualifier()
+ else result = getArgument(0)
+ }
+
+ Expr getDest() {
+ if getTarget() instanceof MemberFunction
+ then result = getArgument(0)
+ else result = getArgument(1)
+ }
+}
+
+/**
+ * A potentially dangerous `std::istream` or `std::wistream`, for
+ * example, an access to `std::cin`.
+ */
+abstract class PotentiallyDangerousInput extends Expr {
+ /**
+ * Gets the variable that is the source of this input stream, if
+ * it can be determined.
+ */
+ abstract Variable getStreamVariable();
+
+ /**
+ * Gets the previous access to the same input stream, if any.
+ */
+ abstract PotentiallyDangerousInput getPreviousAccess();
+
+ /**
+ * Gets the width restriction that applies to the input stream
+ * for this expression, if any.
+ */
+ Expr getWidth() { result = getPreviousAccess().getWidthAfter() }
+
+ private Expr getWidthSetHere() {
+ exists(FunctionCall widthCall |
+ // std::istream.width or std::wistream.width
+ widthCall.getQualifier() = this and
+ widthCall.getTarget().getName() = "width" and
+ result = widthCall.getArgument(0)
+ )
+ or
+ exists(FunctionCall setwCall, Function setw |
+ // >> std::setw
+ setwCall = this.(OperatorRShiftCall).getDest() and
+ setw = setwCall.getTarget() and
+ setw.getNamespace().getName() = "std" and
+ setw.hasName("setw") and
+ result = setwCall.getArgument(0)
+ )
+ }
+
+ private predicate isWidthConsumedHere() {
+ // std::cin >> s, where s is a char*, char[] or std::string type
+ // or wide character equivalent
+ exists(Type t | t = this.(OperatorRShiftCall).getDest().getUnderlyingType() |
+ t instanceof AnyCharPointerType or
+ t instanceof AnyCharArrayType or
+ t instanceof AnyStdStringType
+ )
+ }
+
+ /**
+ * Gets the width restriction that applies to the input stream
+ * after this expression, if any.
+ */
+ Expr getWidthAfter() {
+ result = getWidthSetHere()
+ or
+ not exists(getWidthSetHere()) and
+ not isWidthConsumedHere() and
+ result = getWidth()
+ }
+}
+
+predicate nextPotentiallyDangerousInput(
+ ControlFlowNode cfn, PotentiallyDangerousInput next, Variable streamVariable
+) {
+ // this node
+ next = cfn and
+ next.getStreamVariable() = streamVariable
+ or
+ // flow
+ not cfn.(PotentiallyDangerousInput).getStreamVariable() = streamVariable and
+ nextPotentiallyDangerousInput(cfn.getASuccessor(), next, streamVariable)
+}
+
+/**
+ * A direct access to `std::cin` or `std::wcin`.
+ */
+class CinAccess extends PotentiallyDangerousInput {
+ CinAccess() { this.(VariableAccess).getTarget() instanceof CinVariable }
+
+ override Variable getStreamVariable() { result = this.(VariableAccess).getTarget() }
+
+ override PotentiallyDangerousInput getPreviousAccess() {
+ nextPotentiallyDangerousInput(result.getASuccessor(), this, result.getStreamVariable())
+ }
+}
+
+/**
+ * A direct access to a variable of type `std::ifstream` or `std::wifstream`.
+ */
+class IFStreamAccess extends PotentiallyDangerousInput {
+ IFStreamAccess() { this.(VariableAccess).getTarget().getUnderlyingType() instanceof IFStream }
+
+ override Variable getStreamVariable() { result = this.(VariableAccess).getTarget() }
+
+ override PotentiallyDangerousInput getPreviousAccess() {
+ nextPotentiallyDangerousInput(result.getASuccessor(), this, result.getStreamVariable())
+ }
+}
+
+/**
+ * A chained call to `std::operator>>` on a potentially dangerous input.
+ */
+class ChainedInput extends PotentiallyDangerousInput {
+ ChainedInput() { this.(OperatorRShiftCall).getSource() instanceof PotentiallyDangerousInput }
+
+ override Variable getStreamVariable() {
+ result = this.(OperatorRShiftCall).getSource().(PotentiallyDangerousInput).getStreamVariable()
+ }
+
+ override PotentiallyDangerousInput getPreviousAccess() {
+ result = this.(OperatorRShiftCall).getSource()
+ }
+}
+
+from PotentiallyDangerousInput input, OperatorRShiftCall rshift, Expr dest
+where
+ // a call to operator>> on a potentially dangerous input
+ input = rshift.getSource() and
+ dest = rshift.getDest() and
+ (
+ // destination is char* or wchar_t*
+ dest.getUnderlyingType() instanceof AnyCharPointerType and
+ // assume any width setting makes this safe
+ not exists(input.getWidthAfter())
+ or
+ exists(int arraySize |
+ // destination is char[] or wchar_t* or a wide character equivalent.
+ arraySize = dest.getUnderlyingType().(AnyCharArrayType).getArraySize() and
+ // assume any width setting makes this safe, unless we know
+ // it to be larger than the array.
+ forall(Expr w | w = input.getWidthAfter() | w.getValue().toInt() > arraySize)
+ )
+ )
+select rshift, "Use of 'cin' without specifying the length of the input may be dangerous."
diff --git a/repo-tests/codeql/cpp/ql/src/Security/CWE/CWE-676/PotentiallyDangerousFunction.ql b/repo-tests/codeql/cpp/ql/src/Security/CWE/CWE-676/PotentiallyDangerousFunction.ql
new file mode 100644
index 00000000000..a77c3ce7a1e
--- /dev/null
+++ b/repo-tests/codeql/cpp/ql/src/Security/CWE/CWE-676/PotentiallyDangerousFunction.ql
@@ -0,0 +1,27 @@
+/**
+ * @name Use of potentially dangerous function
+ * @description Use of a standard library function that is not thread-safe.
+ * @kind problem
+ * @problem.severity warning
+ * @security-severity 10.0
+ * @precision medium
+ * @id cpp/potentially-dangerous-function
+ * @tags reliability
+ * security
+ * external/cwe/cwe-676
+ */
+
+import cpp
+
+predicate potentiallyDangerousFunction(Function f, string message) {
+ exists(string name | f.hasGlobalName(name) |
+ name = ["gmtime", "localtime", "ctime", "asctime"] and
+ message = "Call to " + name + " is potentially dangerous"
+ )
+}
+
+from FunctionCall call, Function target, string message
+where
+ call.getTarget() = target and
+ potentiallyDangerousFunction(target, message)
+select call, message
diff --git a/repo-tests/codeql/cpp/ql/src/Security/CWE/CWE-704/WcharCharConversion.ql b/repo-tests/codeql/cpp/ql/src/Security/CWE/CWE-704/WcharCharConversion.ql
new file mode 100644
index 00000000000..65551a1f138
--- /dev/null
+++ b/repo-tests/codeql/cpp/ql/src/Security/CWE/CWE-704/WcharCharConversion.ql
@@ -0,0 +1,31 @@
+/**
+ * @name Cast from char* to wchar_t*
+ * @description Casting a byte string to a wide-character string is likely
+ * to yield a string that is incorrectly terminated or aligned.
+ * This can lead to undefined behavior, including buffer overruns.
+ * @kind problem
+ * @id cpp/incorrect-string-type-conversion
+ * @problem.severity error
+ * @security-severity 8.8
+ * @precision high
+ * @tags security
+ * external/cwe/cwe-704
+ * external/microsoft/c/c6276
+ */
+
+import cpp
+
+class WideCharPointerType extends PointerType {
+ WideCharPointerType() { this.getBaseType() instanceof WideCharType }
+}
+
+from Expr e1, Cast e2
+where
+ e2 = e1.getConversion() and
+ exists(WideCharPointerType w, CharPointerType c |
+ w = e2.getUnspecifiedType().(PointerType) and
+ c = e1.getUnspecifiedType().(PointerType)
+ )
+select e1,
+ "Conversion from " + e1.getType().toString() + " to " + e2.getType().toString() +
+ ". Use of invalid string can lead to undefined behavior."
diff --git a/repo-tests/codeql/cpp/ql/src/Security/CWE/CWE-732/DoNotCreateWorldWritable.ql b/repo-tests/codeql/cpp/ql/src/Security/CWE/CWE-732/DoNotCreateWorldWritable.ql
new file mode 100644
index 00000000000..1fd55a02d01
--- /dev/null
+++ b/repo-tests/codeql/cpp/ql/src/Security/CWE/CWE-732/DoNotCreateWorldWritable.ql
@@ -0,0 +1,39 @@
+/**
+ * @name File created without restricting permissions
+ * @description Creating a file that is world-writable can allow an attacker to write to the file.
+ * @kind problem
+ * @problem.severity warning
+ * @security-severity 7.8
+ * @precision medium
+ * @id cpp/world-writable-file-creation
+ * @tags security
+ * external/cwe/cwe-732
+ */
+
+import cpp
+import FilePermissions
+import semmle.code.cpp.commons.unix.Constants
+
+predicate worldWritableCreation(FileCreationExpr fc, int mode) {
+ mode = localUmask(fc).mask(fc.getMode()) and
+ sets(mode, s_iwoth())
+}
+
+predicate setWorldWritable(FunctionCall fc, int mode) {
+ fc.getTarget().getName() = ["chmod", "fchmod", "_chmod", "_wchmod"] and
+ mode = fc.getArgument(1).getValue().toInt() and
+ sets(mode, s_iwoth())
+}
+
+from Expr fc, int mode, string message
+where
+ worldWritableCreation(fc, mode) and
+ message =
+ "A file may be created here with mode " + octalFileMode(mode) +
+ ", which would make it world-writable."
+ or
+ setWorldWritable(fc, mode) and
+ message =
+ "This sets a file's permissions to " + octalFileMode(mode) +
+ ", which would make it world-writable."
+select fc, message
diff --git a/repo-tests/codeql/cpp/ql/src/Security/CWE/CWE-732/FilePermissions.qll b/repo-tests/codeql/cpp/ql/src/Security/CWE/CWE-732/FilePermissions.qll
new file mode 100644
index 00000000000..ad5ed29098c
--- /dev/null
+++ b/repo-tests/codeql/cpp/ql/src/Security/CWE/CWE-732/FilePermissions.qll
@@ -0,0 +1,158 @@
+import cpp
+import semmle.code.cpp.commons.unix.Constants
+
+bindingset[n, digit]
+private string octalDigit(int n, int digit) {
+ result = n.bitShiftRight(digit * 3).bitAnd(7).toString()
+}
+
+bindingset[n, digit]
+private string octalDigitOpt(int n, int digit) {
+ exists(string s | s = octalDigit(n, digit) | if s = "0" then result = "" else result = s)
+}
+
+bindingset[mode]
+string octalFileMode(int mode) {
+ if mode >= 0 and mode <= 4095
+ then
+ /* octal 07777 */ result =
+ "0" + octalDigitOpt(mode, 3) + octalDigit(mode, 2) + octalDigit(mode, 1) + octalDigit(mode, 0)
+ else result = "[non-standard mode: decimal " + mode + "]"
+}
+
+/**
+ * Holds if the bitmask `mask` sets any of the bit fields in `fields`.
+ */
+bindingset[mask, fields]
+predicate sets(int mask, int fields) { mask.bitAnd(fields) != 0 }
+
+/**
+ * Gets the value that `fc` sets the umask to, if `fc` is a call to
+ * one of the `umask` family of functions.
+ */
+private int umask(FunctionCall fc) {
+ fc.getTarget().getName() = ["umask", "_umask", "_umask_s"] and
+ result = fc.getArgument(0).getValue().toInt()
+}
+
+class Umask extends int {
+ Umask() { this = 0 or this = umask(_) }
+
+ bindingset[mode, this]
+ int mask(int mode) { result = mode.bitAnd(this.bitNot()) }
+}
+
+Umask defaultUmask() { result = 0 }
+
+/**
+ * Gets the last umask set in `block`.
+ */
+private Umask lastUmask(BasicBlock block) {
+ exists(int i |
+ result = umask(block.getNode(i)) and
+ not exists(int j | j > i | exists(umask(block.getNode(j))))
+ )
+}
+
+private Umask umaskStrictlyReaches(BasicBlock block) {
+ exists(BasicBlock pred | pred = block.getAPredecessor() |
+ if exists(umask(pred.getNode(_)))
+ then result = lastUmask(pred)
+ else result = umaskStrictlyReaches(pred)
+ )
+}
+
+private Umask localDefinedUmask(FileCreationExpr e) {
+ exists(BasicBlock b, int i | e = b.getNode(i) |
+ not exists(umask(b.getNode(_))) and result = umaskStrictlyReaches(b)
+ or
+ exists(Expr um, int j | um = b.getNode(j) and j <= i |
+ result = umask(um) and
+ not exists(int k | k in [j + 1 .. i] | exists(umask(b.getNode(k))))
+ )
+ )
+}
+
+Umask localUmask(FileCreationExpr e) {
+ if exists(localDefinedUmask(e)) then result = localDefinedUmask(e) else result = defaultUmask()
+}
+
+abstract class FileCreationExpr extends FunctionCall {
+ abstract Expr getPath();
+
+ abstract int getMode();
+}
+
+class OpenCreationExpr extends FileCreationExpr {
+ OpenCreationExpr() {
+ this.getTarget().getName() = ["open", "_open", "_wopen"] and
+ sets(this.getArgument(1).getValue().toInt(), o_creat())
+ }
+
+ override Expr getPath() { result = this.getArgument(0) }
+
+ override int getMode() {
+ if exists(this.getArgument(2))
+ then result = this.getArgument(2).getValue().toInt()
+ else
+ // assume anything is permitted
+ result = 0.bitNot()
+ }
+}
+
+class CreatCreationExpr extends FileCreationExpr {
+ CreatCreationExpr() { this.getTarget().getName() = "creat" }
+
+ override Expr getPath() { result = this.getArgument(0) }
+
+ override int getMode() { result = this.getArgument(1).getValue().toInt() }
+}
+
+class OpenatCreationExpr extends FileCreationExpr {
+ OpenatCreationExpr() {
+ this.getTarget().getName() = "openat" and
+ this.getNumberOfArguments() = 4
+ }
+
+ override Expr getPath() { result = this.getArgument(1) }
+
+ override int getMode() { result = this.getArgument(3).getValue().toInt() }
+}
+
+private int fopenMode() {
+ result =
+ s_irusr().bitOr(s_irgrp()).bitOr(s_iroth()).bitOr(s_iwusr()).bitOr(s_iwgrp()).bitOr(s_iwoth())
+}
+
+class FopenCreationExpr extends FileCreationExpr {
+ FopenCreationExpr() {
+ this.getTarget().getName() = ["fopen", "_wfopen", "fsopen", "_wfsopen"] and
+ exists(string mode |
+ mode = ["w", "a"] and
+ this.getArgument(1).getValue().matches(mode + "%")
+ )
+ }
+
+ override Expr getPath() { result = this.getArgument(0) }
+
+ override int getMode() { result = fopenMode() }
+}
+
+class FopensCreationExpr extends FileCreationExpr {
+ FopensCreationExpr() {
+ this.getTarget().getName() = ["fopen_s", "_wfopen_s"] and
+ exists(string mode |
+ mode = ["w", "a"] and
+ this.getArgument(2).getValue().matches(mode + "%")
+ )
+ }
+
+ override Expr getPath() { result = this.getArgument(1) }
+
+ override int getMode() {
+ // fopen_s has restrictive permissions unless you have "u" in the mode
+ if this.getArgument(2).getValue().charAt(_) = "u"
+ then result = fopenMode()
+ else result = s_irusr().bitOr(s_iwusr())
+ }
+}
diff --git a/repo-tests/codeql/cpp/ql/src/Security/CWE/CWE-732/UnsafeDaclSecurityDescriptor.ql b/repo-tests/codeql/cpp/ql/src/Security/CWE/CWE-732/UnsafeDaclSecurityDescriptor.ql
new file mode 100644
index 00000000000..bf673826347
--- /dev/null
+++ b/repo-tests/codeql/cpp/ql/src/Security/CWE/CWE-732/UnsafeDaclSecurityDescriptor.ql
@@ -0,0 +1,101 @@
+/**
+ * @name Setting a DACL to NULL in a SECURITY_DESCRIPTOR
+ * @description Setting a DACL to NULL in a SECURITY_DESCRIPTOR will result in an unprotected object.
+ * If the DACL that belongs to the security descriptor of an object is set to NULL, a null DACL is created.
+ * A null DACL grants full access to any user who requests it;
+ * normal security checking is not performed with respect to the object.
+ * @id cpp/unsafe-dacl-security-descriptor
+ * @kind problem
+ * @problem.severity error
+ * @security-severity 7.8
+ * @precision high
+ * @tags security
+ * external/cwe/cwe-732
+ * external/microsoft/C6248
+ */
+
+import cpp
+import semmle.code.cpp.dataflow.DataFlow
+import semmle.code.cpp.dataflow.DataFlow2
+
+/**
+ * A function call to SetSecurityDescriptorDacl to set the ACL, specified by (2nd argument) bDaclPresent = TRUE
+ */
+class SetSecurityDescriptorDaclFunctionCall extends FunctionCall {
+ SetSecurityDescriptorDaclFunctionCall() {
+ this.getTarget().hasGlobalName("SetSecurityDescriptorDacl") and
+ this.getArgument(1).getValue().toInt() != 0
+ }
+}
+
+/**
+ * Dataflow that detects a call to SetSecurityDescriptorDacl with a NULL DACL as the pDacl argument
+ */
+class NullDaclConfig extends DataFlow::Configuration {
+ NullDaclConfig() { this = "NullDaclConfig" }
+
+ override predicate isSource(DataFlow::Node source) {
+ exists(NullValue nullExpr | source.asExpr() = nullExpr)
+ }
+
+ override predicate isSink(DataFlow::Node sink) {
+ exists(SetSecurityDescriptorDaclFunctionCall call, VariableAccess val | val = sink.asExpr() |
+ val = call.getArgument(2)
+ )
+ }
+}
+
+/**
+ * Dataflow that detects a call to SetSecurityDescriptorDacl with a pDacl
+ * argument that's _not_ likely to be NULL.
+ */
+class NonNullDaclConfig extends DataFlow2::Configuration {
+ NonNullDaclConfig() { this = "NonNullDaclConfig" }
+
+ override predicate isSource(DataFlow::Node source) {
+ source.getType().getUnspecifiedType().(PointerType).getBaseType() =
+ any(Type t | t.getName() = "ACL").getUnspecifiedType() and
+ (
+ // If the value comes from a function whose body we can't see, assume
+ // it's not null.
+ exists(Call call |
+ not exists(call.getTarget().getBlock()) and
+ source.asExpr() = call
+ )
+ or
+ // If the value is assigned by reference, assume it's not null. The data
+ // flow library cannot currently follow flow from the body of a function to
+ // an assignment by reference, so this rule applies whether we see the
+ // body or not.
+ exists(Call call | call.getAnArgument() = source.asDefiningArgument())
+ )
+ }
+
+ override predicate isSink(DataFlow::Node sink) {
+ exists(SetSecurityDescriptorDaclFunctionCall call | sink.asExpr() = call.getArgument(2))
+ }
+}
+
+from SetSecurityDescriptorDaclFunctionCall call, string message
+where
+ exists(NullValue nullExpr |
+ message =
+ "Setting a DACL to NULL in a SECURITY_DESCRIPTOR will result in an unprotected object."
+ |
+ call.getArgument(1).getValue().toInt() != 0 and
+ call.getArgument(2) = nullExpr
+ )
+ or
+ exists(
+ Expr constassign, VariableAccess var, NullDaclConfig nullDaclConfig,
+ NonNullDaclConfig nonNullDaclConfig
+ |
+ message =
+ "Setting a DACL to NULL in a SECURITY_DESCRIPTOR using variable " + var +
+ " that is set to NULL will result in an unprotected object."
+ |
+ var = call.getArgument(2) and
+ nullDaclConfig.hasFlow(DataFlow::exprNode(constassign), DataFlow::exprNode(var)) and
+ not nonNullDaclConfig.hasFlow(_, DataFlow::exprNode(var))
+ )
+select call, message
diff --git a/repo-tests/codeql/cpp/ql/src/Security/CWE/CWE-764/LockFlow.qll b/repo-tests/codeql/cpp/ql/src/Security/CWE/CWE-764/LockFlow.qll
new file mode 100644
index 00000000000..8d104b57fa2
--- /dev/null
+++ b/repo-tests/codeql/cpp/ql/src/Security/CWE/CWE-764/LockFlow.qll
@@ -0,0 +1,150 @@
+/**
+ * Provides classes and predicates for analyzing mutexes and the control
+ * flow regions where they might be locked.
+ */
+
+import cpp
+import semmle.code.cpp.commons.Synchronization
+
+/**
+ * Holds if `cond` is a test for whether locking `access` succeeded,
+ * and `failNode` is the control flow node we continue with if it did
+ * not. Suitable locking statements may look like `if(tryLock(m))` or
+ * like `if(lock(m) != 0)`.
+ */
+cached
+predicate tryLockCondition(VariableAccess access, ControlFlowNode cond, ControlFlowNode failNode) {
+ exists(FunctionCall call | lockCall(access, call) |
+ cond = call and call.getAFalseSuccessor() = failNode
+ or
+ // Look for code like this:
+ //
+ // if (pthread_mutex_lock(mtx) != 0) return -1;
+ //
+ cond = call.getParent*() and
+ cond.isCondition() and
+ failNode = cond.getASuccessor() and
+ failNode instanceof BasicBlockWithReturn
+ )
+}
+
+/**
+ * A basic block that ends with a return statement.
+ */
+class BasicBlockWithReturn extends BasicBlock {
+ BasicBlockWithReturn() { this.contains(any(ReturnStmt r)) }
+}
+
+/**
+ * Holds if mutex variable `v` might be locked or unlocked during
+ * function call `call`?
+ */
+private predicate lockedOrUnlockedInCall(Variable v, FunctionCall call) {
+ lockCall(v.getAnAccess(), call)
+ or
+ unlockCall(v.getAnAccess(), call)
+ or
+ // Interprocedural analysis: look for mutexes which are locked or
+ // unlocked in the body of the callee.
+ exists(Function fcn, Variable x |
+ fcn = call.getTarget() and
+ lockedOrUnlockedInFunction(x, fcn)
+ |
+ // If `x` is one of the function's parameters, then map it to the
+ // corresponding argument.
+ if x = fcn.getAParameter()
+ then exists(int i | x = fcn.getParameter(i) | v.getAnAccess() = call.getArgument(i))
+ else v = x
+ )
+}
+
+/**
+ * Holds if mutex variable `v` might be locked or unlocked by this
+ * function, either directly or indirectly (through a call to another
+ * function).
+ */
+private predicate lockedOrUnlockedInFunction(Variable v, Function fcn) {
+ exists(FunctionCall call |
+ lockedOrUnlockedInCall(v, call) and
+ call.getEnclosingFunction() = fcn
+ )
+}
+
+/**
+ * Holds if the mutex locked at `access` might still be locked after
+ * control flow node `node` has executed. That is, the lock which was
+ * obtained at `access` has not been canceled by a matching unlock or
+ * superseded by a more recent call to the lock method.
+ */
+predicate lockedOnExit(VariableAccess access, ControlFlowNode node) {
+ lockCall(access, node)
+ or
+ lockedOnEntry(access, node) and
+ // Remove mutexes which are either unlocked by this statement or
+ // superseded by a another call to the lock method.
+ not lockedOrUnlockedInCall(access.getTarget(), node)
+ or
+ // Interprocedural analysis: if the node is a function call and a mutex
+ // is still locked at the end of the function body, then it is also
+ // locked after the function returns. Note that the Function object is
+ // used to represent the exit node in the control flow graph.
+ exists(Function fcn, Variable x, VariableAccess xAccess |
+ fcn = node.(FunctionCall).getTarget() and
+ lockedOnEntry(xAccess, fcn) and
+ x = xAccess.getTarget()
+ |
+ // If `x` is one of the function's parameters, then map it to the
+ // corresponding argument.
+ if x = fcn.getAParameter()
+ then exists(int i | x = fcn.getParameter(i) | access = node.(FunctionCall).getArgument(i))
+ else access = xAccess
+ )
+}
+
+/**
+ * Holds if the mutex locked at `access` might still be locked before
+ * control flow node `node` executes. That is, if it might be locked
+ * after a predecessor of `node` has executed.
+ */
+predicate lockedOnEntry(VariableAccess access, ControlFlowNode node) {
+ exists(ControlFlowNode prev |
+ lockedOnExit(access, prev) and
+ node = prev.getASuccessor() and
+ // If we are on the false branch of a call to `try_lock` then the
+ // mutex is not locked.
+ not tryLockCondition(access, prev, node)
+ )
+}
+
+/**
+ * Holds if mutex `access` is locked either directly or indirectly by
+ * this function call. This is a generalization of `lockCall`.
+ */
+predicate lockedInCall(VariableAccess access, FunctionCall call) {
+ lockCall(access, call)
+ or
+ // Interprocedural analysis: look for mutexes which are locked in the
+ // body of the callee.
+ exists(Function fcn, Variable x, VariableAccess xAccess |
+ fcn = call.getTarget() and
+ pathToLock(xAccess, fcn.getEntryPoint()) and
+ x = xAccess.getTarget()
+ |
+ // If `x` is one of the function's parameters, then map it to the
+ // corresponding argument.
+ if x = fcn.getAParameter()
+ then exists(int i | x = fcn.getParameter(i) | access = call.getArgument(i))
+ else access = xAccess
+ )
+}
+
+/**
+ * Holds if mutex `access` might be locked at `node` or one of its
+ * successors.
+ */
+predicate pathToLock(VariableAccess access, ControlFlowNode node) {
+ lockedInCall(access, node)
+ or
+ pathToLock(access, node.getASuccessor()) and
+ not lockedOrUnlockedInCall(access.getTarget(), node)
+}
diff --git a/repo-tests/codeql/cpp/ql/src/Security/CWE/CWE-764/LockOrderCycle.ql b/repo-tests/codeql/cpp/ql/src/Security/CWE/CWE-764/LockOrderCycle.ql
new file mode 100644
index 00000000000..cd5d8771807
--- /dev/null
+++ b/repo-tests/codeql/cpp/ql/src/Security/CWE/CWE-764/LockOrderCycle.ql
@@ -0,0 +1,42 @@
+/**
+ * @name Cyclic lock order dependency
+ * @description Locking mutexes in different orders in different
+ * threads can cause deadlock.
+ * @kind problem
+ * @id cpp/lock-order-cycle
+ * @problem.severity error
+ * @security-severity 5.0
+ * @tags security
+ * external/cwe/cwe-764
+ * external/cwe/cwe-833
+ */
+
+import cpp
+import semmle.code.cpp.commons.Synchronization
+import LockFlow
+
+/**
+ * Gets a variable that might be locked while a lock on `v` is held.
+ *
+ * For example, with
+ * ```
+ * x.lock()
+ * y.lock()
+ * x.unlock()
+ * y.unlock()
+ *```
+ * `x` is already locked when `y.lock()` is called, so `y` is a result
+ * of `lockSuccessor(x)`. If you consider this an edge from `x` to `y`
+ * in a directed graph, then a cycle in the graph indicates a potential
+ * source of deadlock. The dining philosophers are the classic example.
+ */
+Variable lockSuccessor(Variable v) {
+ exists(FunctionCall call |
+ lockedOnEntry(v.getAnAccess(), call) and
+ lockedInCall(result.getAnAccess(), call)
+ )
+}
+
+from Variable v1, Variable v2
+where v1 != v2 and lockSuccessor+(v1) = v2 and lockSuccessor+(v2) = v1
+select v1, "Mutex " + v1 + " has a cyclic lock order dependency with $@.", v2, "mutex " + v2
diff --git a/repo-tests/codeql/cpp/ql/src/Security/CWE/CWE-764/TwiceLocked.ql b/repo-tests/codeql/cpp/ql/src/Security/CWE/CWE-764/TwiceLocked.ql
new file mode 100644
index 00000000000..051ad2eeeea
--- /dev/null
+++ b/repo-tests/codeql/cpp/ql/src/Security/CWE/CWE-764/TwiceLocked.ql
@@ -0,0 +1,55 @@
+/**
+ * @name Mutex locked twice
+ * @description Calling the lock method of a mutex twice in succession
+ * might cause a deadlock.
+ * @kind problem
+ * @id cpp/twice-locked
+ * @problem.severity error
+ * @security-severity 5.0
+ * @precision low
+ * @tags security
+ * external/cwe/cwe-764
+ * external/cwe/cwe-833
+ */
+
+import cpp
+import semmle.code.cpp.commons.Synchronization
+import LockFlow
+
+/**
+ * Holds if `call` locks `v`, via the access `a`, but `v` might already
+ * be locked when we reach `call`. The access `a` might be in a function
+ * which is called indirectly from `call`.
+ */
+cached
+private predicate twiceLocked(FunctionCall call, Variable v, VariableAccess a) {
+ lockedOnEntry(v.getAnAccess(), call) and
+ lockedInCall(a, call)
+}
+
+// When this query finds a result, there are often multiple call sites
+// associated with one instance of the problem. For this reason, we do not
+// include `call` in the result. However, it is sometimes helpful to
+// include `call.getLocation()` in the result, because it can help to find
+// the control flow path which might be responsible.
+from FunctionCall call, Variable v, VariableAccess access2
+where
+ twiceLocked(call, v, access2) and
+ v = access2.getTarget() and
+ // If the second lock is a `try_lock` then it won't cause a deadlock.
+ // We want to be extra sure that the second lock is not a `try_lock`
+ // to make sure that we don't generate too many false positives, so
+ // we use three heuristics:
+ //
+ // 1. The call is to a function named "try_lock".
+ // 2. The result of the call is used in a condition. For example:
+ // if (pthread_mutex_lock(mtx) != 0) return -1;
+ // 3. The call is a condition. Because the analysis is interprocedural,
+ // `call` might be an indirect call to `lock`, so this heuristic
+ // catches some cases which the second heuristic does not.
+ not (
+ trylockCall(access2, _) or
+ tryLockCondition(access2, _, _) or
+ call.isCondition()
+ )
+select access2, "Mutex " + v + " might be locked already, which could cause a deadlock."
diff --git a/repo-tests/codeql/cpp/ql/src/Security/CWE/CWE-764/UnreleasedLock.ql b/repo-tests/codeql/cpp/ql/src/Security/CWE/CWE-764/UnreleasedLock.ql
new file mode 100644
index 00000000000..dd224352b12
--- /dev/null
+++ b/repo-tests/codeql/cpp/ql/src/Security/CWE/CWE-764/UnreleasedLock.ql
@@ -0,0 +1,85 @@
+/**
+ * @name Lock may not be released
+ * @description A lock that is acquired one or more times without a
+ * matching number of unlocks may cause a deadlock.
+ * @kind problem
+ * @id cpp/unreleased-lock
+ * @problem.severity error
+ * @security-severity 5.0
+ * @precision low
+ * @tags security
+ * external/cwe/cwe-764
+ * external/cwe/cwe-833
+ */
+
+import cpp
+import semmle.code.cpp.commons.Synchronization
+
+predicate lockBlock(MutexType t, BasicBlock b, int locks) {
+ locks = strictcount(int i | b.getNode(i) = t.getLockAccess())
+}
+
+predicate unlockBlock(MutexType t, BasicBlock b, int unlocks) {
+ unlocks = strictcount(int i | b.getNode(i) = t.getUnlockAccess())
+}
+
+/**
+ * Holds if there is a call to `lock` or `tryLock` on `t` in
+ * `lockblock`, and `failblock` is the successor if it fails.
+ */
+predicate failedLock(MutexType t, BasicBlock lockblock, BasicBlock failblock) {
+ exists(ControlFlowNode lock |
+ lock = lockblock.getEnd() and
+ lock = t.getLockAccess() and
+ lock.getAFalseSuccessor() = failblock
+ )
+}
+
+/**
+ * Holds if `b` locks `t` a net `netlocks` times. For example, if `b`
+ * locks `t` twice and unlocks `t` four times, then `netlocks` will be
+ * `-2`.
+ */
+predicate lockUnlockBlock(MutexType t, BasicBlock b, int netlocks) {
+ lockBlock(t, b, netlocks) and not unlockBlock(t, b, _)
+ or
+ exists(int unlocks |
+ not lockBlock(t, b, _) and unlockBlock(t, b, unlocks) and netlocks = -unlocks
+ )
+ or
+ exists(int locks, int unlocks |
+ lockBlock(t, b, locks) and unlockBlock(t, b, unlocks) and netlocks = locks - unlocks
+ )
+}
+
+/**
+ * Holds if there is a control flow path from `src` to `b` such that
+ * on that path the net number of locks is `locks`, and `locks` is
+ * positive.
+ */
+predicate blockIsLocked(MutexType t, BasicBlock src, BasicBlock b, int locks) {
+ lockUnlockBlock(t, b, locks) and src = b and locks > 0
+ or
+ exists(BasicBlock pred, int predlocks, int curlocks, int failedlock | pred = b.getAPredecessor() |
+ blockIsLocked(t, src, pred, predlocks) and
+ (if failedLock(t, pred, b) then failedlock = 1 else failedlock = 0) and // count a failed lock as an unlock so the net is zero
+ (
+ not lockUnlockBlock(t, b, _) and curlocks = 0
+ or
+ lockUnlockBlock(t, b, curlocks)
+ ) and
+ locks = predlocks + curlocks - failedlock and
+ locks > 0 and
+ locks < 10 // arbitrary bound to fail gracefully in case of locking in a loop
+ )
+}
+
+from Function c, MutexType t, BasicBlock src, BasicBlock exit, FunctionCall lock
+where
+ // restrict results to those methods that actually attempt to unlock
+ t.getUnlockAccess().getEnclosingFunction() = c and
+ blockIsLocked(t, src, exit, _) and
+ exit.getEnd() = c and
+ lock = src.getANode() and
+ lock = t.getLockAccess()
+select lock, "This lock might not be unlocked or might be locked more times than it is unlocked."
diff --git a/repo-tests/codeql/cpp/ql/src/Security/CWE/CWE-807/TaintedCondition.ql b/repo-tests/codeql/cpp/ql/src/Security/CWE/CWE-807/TaintedCondition.ql
new file mode 100644
index 00000000000..64505ee8283
--- /dev/null
+++ b/repo-tests/codeql/cpp/ql/src/Security/CWE/CWE-807/TaintedCondition.ql
@@ -0,0 +1,41 @@
+/**
+ * @name Untrusted input for a condition
+ * @description Using untrusted inputs in a statement that makes a
+ * security decision makes code vulnerable to
+ * attack.
+ * @kind path-problem
+ * @problem.severity warning
+ * @security-severity 7.5
+ * @precision medium
+ * @id cpp/tainted-permissions-check
+ * @tags security
+ * external/cwe/cwe-807
+ */
+
+import semmle.code.cpp.security.TaintTracking
+import TaintedWithPath
+
+predicate sensitiveCondition(Expr condition, Expr raise) {
+ raisesPrivilege(raise) and
+ exists(IfStmt ifstmt |
+ ifstmt.getCondition() = condition and
+ raise.getEnclosingStmt().getParentStmt*() = ifstmt
+ )
+}
+
+class Configuration extends TaintTrackingConfiguration {
+ override predicate isSink(Element tainted) { sensitiveCondition(tainted, _) }
+}
+
+/*
+ * Produce an alert if there is an 'if' statement whose condition `condition`
+ * is influenced by tainted data `source`, and the body contains
+ * `raise` which escalates privilege.
+ */
+
+from Expr source, Expr condition, Expr raise, PathNode sourceNode, PathNode sinkNode
+where
+ taintedWithPath(source, condition, sourceNode, sinkNode) and
+ sensitiveCondition(condition, raise)
+select condition, sourceNode, sinkNode, "Reliance on untrusted input $@ to raise privilege at $@",
+ source, source.toString(), raise, raise.toString()
diff --git a/repo-tests/codeql/cpp/ql/src/Security/CWE/CWE-835/InfiniteLoopWithUnsatisfiableExitCondition.ql b/repo-tests/codeql/cpp/ql/src/Security/CWE/CWE-835/InfiniteLoopWithUnsatisfiableExitCondition.ql
new file mode 100644
index 00000000000..3db5e15874b
--- /dev/null
+++ b/repo-tests/codeql/cpp/ql/src/Security/CWE/CWE-835/InfiniteLoopWithUnsatisfiableExitCondition.ql
@@ -0,0 +1,61 @@
+/**
+ * @name Infinite loop with unsatisfiable exit condition
+ * @description A loop with an unsatisfiable exit condition could
+ * prevent the program from terminating, making it
+ * vulnerable to a denial of service attack.
+ * @kind problem
+ * @id cpp/infinite-loop-with-unsatisfiable-exit-condition
+ * @problem.severity warning
+ * @security-severity 7.5
+ * @tags security
+ * external/cwe/cwe-835
+ */
+
+import cpp
+import semmle.code.cpp.controlflow.BasicBlocks
+private import semmle.code.cpp.rangeanalysis.PointlessComparison
+import semmle.code.cpp.controlflow.internal.ConstantExprs
+
+/**
+ * Holds if there is a control flow edge from `src` to `dst`, but
+ * it can never be taken due to `cmp` always having value `value`.
+ */
+predicate impossibleEdge(ComparisonOperation cmp, boolean value, BasicBlock src, BasicBlock dst) {
+ cmp = src.getEnd() and
+ reachablePointlessComparison(cmp, _, _, value, _) and
+ if value = true then dst = src.getAFalseSuccessor() else dst = src.getATrueSuccessor()
+}
+
+BasicBlock enhancedSucc(BasicBlock bb) {
+ result = bb.getASuccessor() and not impossibleEdge(_, _, bb, result)
+}
+
+/**
+ * Holds if `cmp` always has value `value`, and if that will cause
+ * non-termination.
+ *
+ * It only holds if the function exit is reachable using
+ * the standard `getASuccessor` relation, but not using
+ * `enhancedSucc`. This means that it does not hold for
+ * comparison operations which are trivially true or false, such as
+ * ```
+ * while (1) { ... }
+ * ```
+ * Since this loop is obviously infinite, we assume that it was written
+ * intentionally.
+ */
+predicate impossibleEdgeCausesNonTermination(ComparisonOperation cmp, boolean value) {
+ exists(BasicBlock src |
+ impossibleEdge(cmp, value, src, _) and
+ src.getASuccessor+() instanceof ExitBasicBlock and
+ not enhancedSucc+(src) instanceof ExitBasicBlock and
+ // Make sure that the source is reachable to reduce
+ // false positives.
+ exists(EntryBasicBlock entry | src = enhancedSucc+(entry))
+ )
+}
+
+from ComparisonOperation cmp, boolean value
+where impossibleEdgeCausesNonTermination(cmp, value)
+select cmp,
+ "Function exit is unreachable because this condition is always " + value.toString() + "."
diff --git a/repo-tests/codeql/cpp/ql/src/Summary/LinesOfCode.ql b/repo-tests/codeql/cpp/ql/src/Summary/LinesOfCode.ql
new file mode 100644
index 00000000000..3b2aa2ac4c9
--- /dev/null
+++ b/repo-tests/codeql/cpp/ql/src/Summary/LinesOfCode.ql
@@ -0,0 +1,11 @@
+/**
+ * @id cpp/summary/lines-of-code
+ * @name Total lines of C/C++ code in the database
+ * @description The total number of lines of C/C++ code across all files, including system headers, libraries, and auto-generated files. This is a useful metric of the size of a database. For all files that were seen during the build, this query counts the lines of code, excluding whitespace or comments.
+ * @kind metric
+ * @tags summary
+ */
+
+import cpp
+
+select sum(File f | f.fromSource() | f.getMetrics().getNumberOfLinesOfCode())
diff --git a/repo-tests/codeql/cpp/ql/src/Summary/LinesOfUserCode.ql b/repo-tests/codeql/cpp/ql/src/Summary/LinesOfUserCode.ql
new file mode 100644
index 00000000000..67d3aa6a8e0
--- /dev/null
+++ b/repo-tests/codeql/cpp/ql/src/Summary/LinesOfUserCode.ql
@@ -0,0 +1,17 @@
+/**
+ * @name Total lines of user written C/C++ code in the database
+ * @description The total number of lines of C/C++ code from the source code directory, excluding auto-generated files. This query counts the lines of code, excluding whitespace or comments. Note: If external libraries are included in the codebase either in a checked-in virtual environment or as vendored code, that will currently be counted as user written code.
+ * @kind metric
+ * @tags summary
+ * lines-of-code
+ * @id cpp/summary/lines-of-user-code
+ */
+
+import cpp
+import semmle.code.cpp.AutogeneratedFile
+
+select sum(File f |
+ f.fromSource() and exists(f.getRelativePath()) and not f instanceof AutogeneratedFile
+ |
+ f.getMetrics().getNumberOfLinesOfCode()
+ )
diff --git a/repo-tests/codeql/cpp/ql/src/default.qll b/repo-tests/codeql/cpp/ql/src/default.qll
new file mode 100644
index 00000000000..6bc0f1b009d
--- /dev/null
+++ b/repo-tests/codeql/cpp/ql/src/default.qll
@@ -0,0 +1,7 @@
+/**
+ * DEPRECATED: use `import cpp` instead of `import default`.
+ *
+ * Provides classes and predicates for working with C/C++ code.
+ */
+
+import cpp
diff --git a/repo-tests/codeql/cpp/ql/src/definitions.ql b/repo-tests/codeql/cpp/ql/src/definitions.ql
new file mode 100644
index 00000000000..84ef77fdc9d
--- /dev/null
+++ b/repo-tests/codeql/cpp/ql/src/definitions.ql
@@ -0,0 +1,18 @@
+/**
+ * @name Jump-to-definition links
+ * @description Generates use-definition pairs that provide the data
+ * for jump-to-definition in the code viewer of LGTM.
+ * @kind definitions
+ * @id cpp/jump-to-definition
+ */
+
+import definitions
+
+from Top e, Top def, string kind
+where
+ def = definitionOf(e, kind) and
+ // We need to exclude definitions for elements inside template instantiations,
+ // as these often lead to multiple links to definitions from the same source location.
+ // LGTM does not support this bevaviour.
+ not e.isFromTemplateInstantiation(_)
+select e, def, kind
diff --git a/repo-tests/codeql/cpp/ql/src/definitions.qll b/repo-tests/codeql/cpp/ql/src/definitions.qll
new file mode 100644
index 00000000000..cb229d66ef1
--- /dev/null
+++ b/repo-tests/codeql/cpp/ql/src/definitions.qll
@@ -0,0 +1,210 @@
+/**
+ * Provides classes and predicates related to jump-to-definition links
+ * in the code viewer.
+ */
+
+import cpp
+import IDEContextual
+
+/**
+ * Any element that might be the source or target of a jump-to-definition
+ * link.
+ *
+ * In some cases it is preferable to modify locations (the
+ * `hasLocationInfo()` predicate) so that they are short, and
+ * non-overlapping with other locations that might be highlighted in
+ * the LGTM interface.
+ *
+ * We need to give locations that may not be in the database, so
+ * we use `hasLocationInfo()` rather than `getLocation()`.
+ */
+class Top extends Element {
+ /**
+ * Holds if this element is at the specified location.
+ * The location spans column `startcolumn` of line `startline` to
+ * column `endcolumn` of line `endline` in file `filepath`.
+ * For more information, see
+ * [Locations](https://codeql.github.com/docs/writing-codeql-queries/providing-locations-in-codeql-queries/).
+ */
+ pragma[noopt]
+ final predicate hasLocationInfo(
+ string filepath, int startline, int startcolumn, int endline, int endcolumn
+ ) {
+ interestingElement(this) and
+ not this instanceof MacroAccess and
+ not this instanceof Include and
+ exists(Location l |
+ l = this.getLocation() and
+ l.hasLocationInfo(filepath, startline, startcolumn, endline, endcolumn)
+ )
+ or
+ // This has a location that covers only the name of the accessed
+ // macro, not its arguments (which are included by `MacroAccess`'s
+ // `getLocation()`).
+ exists(Location l, MacroAccess ma |
+ ma instanceof MacroAccess and
+ ma = this and
+ l = ma.getLocation() and
+ l.hasLocationInfo(filepath, startline, startcolumn, _, _) and
+ endline = startline and
+ exists(string macroName, int nameLength, int nameLengthMinusOne |
+ macroName = ma.getMacroName() and
+ nameLength = macroName.length() and
+ nameLengthMinusOne = nameLength - 1 and
+ endcolumn = startcolumn + nameLengthMinusOne
+ )
+ )
+ or
+ hasLocationInfo_Include(this, filepath, startline, startcolumn, endline, endcolumn)
+ }
+}
+
+/**
+ * An `Include` with a `hasLocationInfo` predicate.
+ *
+ * This has a location that covers only the name of the included
+ * file, not the `#include` text or whitespace before it.
+ */
+predicate hasLocationInfo_Include(Include i, string path, int sl, int sc, int el, int ec) {
+ exists(Location l |
+ l = i.getLocation() and
+ path = l.getFile().getAbsolutePath() and
+ sl = l.getEndLine() and
+ sc = l.getEndColumn() + 1 - i.getIncludeText().length() and
+ el = l.getEndLine() and
+ ec = l.getEndColumn()
+ )
+}
+
+/** Holds if `e` is a source or a target of jump-to-definition. */
+predicate interestingElement(Element e) {
+ exists(definitionOf(e, _))
+ or
+ e = definitionOf(_, _)
+}
+
+/**
+ * Holds if `f`, `line`, `column` indicate the start character
+ * of `cc`.
+ */
+private predicate constructorCallStartLoc(ConstructorCall cc, File f, int line, int column) {
+ exists(Location l |
+ l = cc.getLocation() and
+ l.getFile() = f and
+ l.getStartLine() = line and
+ l.getStartColumn() = column
+ )
+}
+
+/**
+ * Holds if `f`, `line`, `column` indicate the start character
+ * of `tm`, which mentions `t`. Type mentions for instantiations
+ * are filtered out.
+ */
+private predicate typeMentionStartLoc(TypeMention tm, Type t, File f, int line, int column) {
+ exists(Location l |
+ l = tm.getLocation() and
+ l.getFile() = f and
+ l.getStartLine() = line and
+ l.getStartColumn() = column
+ ) and
+ t = tm.getMentionedType() and
+ not t instanceof ClassTemplateInstantiation
+}
+
+/**
+ * Holds if `cc` and `tm` begin at the same character.
+ */
+cached
+private predicate constructorCallTypeMention(ConstructorCall cc, TypeMention tm) {
+ exists(File f, int line, int column |
+ constructorCallStartLoc(cc, f, line, column) and
+ typeMentionStartLoc(tm, _, f, line, column)
+ )
+}
+
+/**
+ * Gets an element, of kind `kind`, that element `e` uses, if any.
+ * Attention: This predicate yields multiple definitions for a single location.
+ *
+ * The `kind` is a string representing what kind of use it is:
+ * - `"M"` for function and method calls
+ * - `"T"` for uses of types
+ * - `"V"` for variable accesses
+ * - `"X"` for macro accesses
+ * - `"I"` for import / include directives
+ */
+cached
+Top definitionOf(Top e, string kind) {
+ (
+ // call -> function called
+ kind = "M" and
+ result = e.(Call).getTarget() and
+ not e.(Expr).isCompilerGenerated() and
+ not e instanceof ConstructorCall // handled elsewhere
+ or
+ // access -> function, variable or enum constant accessed
+ kind = "V" and
+ result = e.(Access).getTarget() and
+ not e.(Expr).isCompilerGenerated()
+ or
+ // macro access -> macro accessed
+ kind = "X" and
+ result = e.(MacroAccess).getMacro()
+ or
+ // type mention -> type
+ kind = "T" and
+ e.(TypeMention).getMentionedType() = result and
+ not constructorCallTypeMention(_, e) and // handled elsewhere
+ // Multiple type mentions can be generated when a typedef is used, and
+ // in such cases we want to exclude all but the originating typedef.
+ not exists(Type secondary |
+ exists(TypeMention tm, File f, int startline, int startcol |
+ typeMentionStartLoc(e, result, f, startline, startcol) and
+ typeMentionStartLoc(tm, secondary, f, startline, startcol) and
+ (
+ result = secondary.(TypedefType).getBaseType() or
+ result = secondary.(TypedefType).getBaseType().(SpecifiedType).getBaseType()
+ )
+ )
+ )
+ or
+ // constructor call -> function called
+ // - but only if there is a corresponding type mention, since
+ // we don't want links for implicit conversions.
+ // - using the location of the type mention, since it's
+ // tighter that the location of the function call.
+ kind = "M" and
+ exists(ConstructorCall cc |
+ constructorCallTypeMention(cc, e) and
+ result = cc.getTarget()
+ )
+ or
+ // include -> included file
+ kind = "I" and
+ result = e.(Include).getIncludedFile() and
+ // exclude `#include` directives containing macros
+ not exists(MacroInvocation mi, Location l1, Location l2 |
+ l1 = e.(Include).getLocation() and
+ l2 = mi.getLocation() and
+ l1.getContainer() = l2.getContainer() and
+ l1.getStartLine() = l2.getStartLine()
+ // (an #include directive must be always on it's own line)
+ )
+ ) and
+ (
+ // exclude things inside macro invocations, as they will overlap
+ // with the macro invocation.
+ not e.(Element).isInMacroExpansion() and
+ // exclude nested macro invocations, as they will overlap with
+ // the top macro invocation.
+ not exists(e.(MacroAccess).getParentInvocation())
+ ) and
+ // Some entities have many locations. This can arise for an external
+ // function that is frequently declared but not defined, or perhaps
+ // for a struct type that is declared in many places. Rather than
+ // letting the result set explode, we just exclude results that are
+ // "too ambiguous" -- we could also arbitrarily pick one location
+ // later on.
+ strictcount(result.getLocation()) < 10
+}
diff --git a/repo-tests/codeql/cpp/ql/src/experimental/Likely Bugs/RedundantNullCheckParam.ql b/repo-tests/codeql/cpp/ql/src/experimental/Likely Bugs/RedundantNullCheckParam.ql
new file mode 100644
index 00000000000..f1a3663bb96
--- /dev/null
+++ b/repo-tests/codeql/cpp/ql/src/experimental/Likely Bugs/RedundantNullCheckParam.ql
@@ -0,0 +1,56 @@
+/**
+ * @name Redundant null check or missing null check of parameter
+ * @description Checking a parameter for nullness in one path,
+ * and not in another is likely to be a sign that either
+ * the check can be removed, or added in the other case.
+ * @kind problem
+ * @id cpp/redundant-null-check-param
+ * @problem.severity recommendation
+ * @tags reliability
+ * security
+ * external/cwe/cwe-476
+ */
+
+import cpp
+
+predicate blockDominates(BlockStmt check, BlockStmt access) {
+ check.getLocation().getStartLine() <= access.getLocation().getStartLine() and
+ check.getLocation().getEndLine() >= access.getLocation().getEndLine()
+}
+
+predicate isCheckedInstruction(VariableAccess unchecked, VariableAccess checked) {
+ checked = any(VariableAccess va | va.getTarget() = unchecked.getTarget()) and
+ //Simple test if the first access in this code path is dereferenced
+ not dereferenced(checked) and
+ blockDominates(checked.getEnclosingBlock(), unchecked.getEnclosingBlock())
+}
+
+predicate candidateResultUnchecked(VariableAccess unchecked) {
+ not isCheckedInstruction(unchecked, _)
+}
+
+predicate candidateResultChecked(VariableAccess check, EqualityOperation eqop) {
+ //not dereferenced to check against pointer, not its pointed value
+ not dereferenced(check) and
+ //assert macros are not taken into account
+ not check.isInMacroExpansion() and
+ // is part of a comparison against some constant NULL
+ eqop.getAnOperand() = check and
+ eqop.getAnOperand() instanceof NullValue
+}
+
+from VariableAccess unchecked, VariableAccess check, EqualityOperation eqop, Parameter param
+where
+ // a dereference
+ dereferenced(unchecked) and
+ // for a function parameter
+ unchecked.getTarget() = param and
+ // this function parameter is not overwritten
+ count(param.getAnAssignment()) = 0 and
+ check.getTarget() = param and
+ // which is once checked
+ candidateResultChecked(check, eqop) and
+ // and which has not been checked before in this code path
+ candidateResultUnchecked(unchecked)
+select check, "This null check is redundant or there is a missing null check before $@ ", unchecked,
+ "where dereferencing happens"
diff --git a/repo-tests/codeql/cpp/ql/src/experimental/Security/CWE/CWE-020/LateCheckOfFunctionArgument.ql b/repo-tests/codeql/cpp/ql/src/experimental/Security/CWE/CWE-020/LateCheckOfFunctionArgument.ql
new file mode 100644
index 00000000000..3ef5bf3405e
--- /dev/null
+++ b/repo-tests/codeql/cpp/ql/src/experimental/Security/CWE/CWE-020/LateCheckOfFunctionArgument.ql
@@ -0,0 +1,66 @@
+/**
+ * @name Late Check Of Function Argument
+ * @description --Checking the function argument after calling the function itself.
+ * --This situation looks suspicious and requires the attention of the developer.
+ * --It may be necessary to add validation before calling the function.
+ * @kind problem
+ * @id cpp/late-check-of-function-argument
+ * @problem.severity warning
+ * @precision medium
+ * @tags correctness
+ * security
+ * external/cwe/cwe-20
+ */
+
+import cpp
+import semmle.code.cpp.valuenumbering.GlobalValueNumbering
+
+/** Holds for a function `f` that has an argument at index `apos` used for positioning in a buffer. */
+predicate numberArgument(Function f, int apos) {
+ f.hasGlobalOrStdName("write") and apos = 2
+ or
+ f.hasGlobalOrStdName("read") and apos = 2
+ or
+ f.hasGlobalOrStdName("lseek") and apos = 1
+ or
+ f.hasGlobalOrStdName("memmove") and apos = 2
+ or
+ f.hasGlobalOrStdName("memset") and apos = 2
+ or
+ f.hasGlobalOrStdName("memcpy") and apos = 2
+ or
+ f.hasGlobalOrStdName("memcmp") and apos = 2
+ or
+ f.hasGlobalOrStdName("strncat") and apos = 2
+ or
+ f.hasGlobalOrStdName("strncpy") and apos = 2
+ or
+ f.hasGlobalOrStdName("strncmp") and apos = 2
+ or
+ f.hasGlobalOrStdName("snprintf") and apos = 1
+ or
+ f.hasGlobalOrStdName("strndup") and apos = 2
+}
+
+class IfCompareWithZero extends IfStmt {
+ IfCompareWithZero() { this.getCondition().(RelationalOperation).getAChild().getValue() = "0" }
+
+ Expr noZerroOperand() {
+ if this.getCondition().(RelationalOperation).getGreaterOperand().getValue() = "0"
+ then result = this.getCondition().(RelationalOperation).getLesserOperand()
+ else result = this.getCondition().(RelationalOperation).getGreaterOperand()
+ }
+}
+
+from FunctionCall fc, IfCompareWithZero ifc, int na
+where
+ numberArgument(fc.getTarget(), na) and
+ globalValueNumber(fc.getArgument(na)) = globalValueNumber(ifc.noZerroOperand()) and
+ dominates(fc, ifc) and
+ not exists(IfStmt ifc1 |
+ dominates(ifc1, fc) and
+ globalValueNumber(fc.getArgument(na)) = globalValueNumber(ifc1.getCondition().getAChild*())
+ )
+select fc,
+ "The value of argument '$@' appears to be checked after the call, rather than before it.",
+ fc.getArgument(na), fc.getArgument(na).toString()
diff --git a/repo-tests/codeql/cpp/ql/src/experimental/Security/CWE/CWE-1041/FindWrapperFunctions.ql b/repo-tests/codeql/cpp/ql/src/experimental/Security/CWE/CWE-1041/FindWrapperFunctions.ql
new file mode 100644
index 00000000000..4b147fa3612
--- /dev/null
+++ b/repo-tests/codeql/cpp/ql/src/experimental/Security/CWE/CWE-1041/FindWrapperFunctions.ql
@@ -0,0 +1,141 @@
+/**
+ * @name Missed opportunity to call wrapper function
+ * @description If a wrapper function is defined for a given function, any call to the given function should be via the wrapper function.
+ * @kind problem
+ * @id cpp/call-to-function-without-wrapper
+ * @problem.severity warning
+ * @precision medium
+ * @tags correctness
+ * maintainability
+ * security
+ * external/cwe/cwe-1041
+ */
+
+import cpp
+import semmle.code.cpp.valuenumbering.GlobalValueNumbering
+import semmle.code.cpp.commons.Assertions
+
+/**
+ * A function call that is used in error situations (logging, throwing an exception, abnormal termination).
+ */
+class CallUsedToHandleErrors extends FunctionCall {
+ CallUsedToHandleErrors() {
+ // call that is known to not return
+ not exists(this.(ControlFlowNode).getASuccessor())
+ or
+ // call throwing an exception
+ exists(ThrowExpr tex | tex = this.(ControlFlowNode).getASuccessor())
+ or
+ // call logging a message, possibly an error
+ exists(FormattingFunction ff | ff = this.(ControlFlowNode).getASuccessor())
+ or
+ // enabling recursive search
+ exists(CallUsedToHandleErrors fr | getTarget() = fr.getEnclosingFunction())
+ }
+}
+
+/** Holds if the conditions for a call outside the wrapper function are met. */
+predicate conditionsOutsideWrapper(FunctionCall fcp) {
+ fcp.getNumberOfArguments() > 0 and
+ not exists(ConditionalStmt cdtmp | fcp.getEnclosingStmt().getParentStmt*() = cdtmp) and
+ not exists(Loop lptmp | fcp.getEnclosingStmt().getParentStmt*() = lptmp) and
+ not exists(ReturnStmt rttmp | fcp.getEnclosingStmt().getParentStmt*() = rttmp) and
+ not exists(FunctionCall fctmp2 | fcp = fctmp2.getAnArgument().getAChild*()) and
+ not exists(Assignment astmp | fcp = astmp.getRValue().getAChild*()) and
+ not exists(Initializer intmp | fcp = intmp.getExpr().getAChild*()) and
+ not exists(Assertion astmp | fcp = astmp.getAsserted().getAChild*()) and
+ not exists(Operation optmp | fcp = optmp.getAChild*()) and
+ not exists(ArrayExpr aetmp | fcp = aetmp.getAChild*()) and
+ not exists(ExprCall ectmp | fcp = ectmp.getAnArgument().getAChild*())
+}
+
+/** Holds if the conditions for calling `fcp` inside the `fnp` wrapper function are met. */
+pragma[inline]
+predicate conditionsInsideWrapper(FunctionCall fcp, Function fnp) {
+ not exists(FunctionCall fctmp2 |
+ fctmp2.getEnclosingFunction() = fnp and fcp = fctmp2.getAnArgument().getAChild*()
+ ) and
+ not fcp instanceof CallUsedToHandleErrors and
+ not fcp.getAnArgument().isConstant() and
+ fcp.getEnclosingFunction() = fnp and
+ fnp.getNumberOfParameters() > 0 and
+ // the call arguments must be passed through the arguments of the wrapper function
+ forall(int i | i in [0 .. fcp.getNumberOfArguments() - 1] |
+ globalValueNumber(fcp.getArgument(i)) = globalValueNumber(fnp.getAParameter().getAnAccess())
+ ) and
+ // there should be no more than one required call inside the wrapper function
+ not exists(FunctionCall fctmp |
+ fctmp.getTarget() = fcp.getTarget() and
+ fctmp.getFile() = fcp.getFile() and
+ fctmp != fcp and
+ fctmp.getEnclosingFunction() = fnp
+ ) and
+ // inside the wrapper function there should be no calls without paths to the desired function
+ not exists(FunctionCall fctmp |
+ fctmp.getEnclosingFunction() = fnp and
+ fctmp.getFile() = fcp.getFile() and
+ fctmp != fcp and
+ (
+ fctmp = fcp.getAPredecessor+()
+ or
+ not exists(FunctionCall fctmp1 |
+ fctmp1 = fcp and
+ (
+ fctmp.getASuccessor+() = fctmp1 or
+ fctmp.getAPredecessor+() = fctmp1
+ )
+ )
+ )
+ )
+}
+
+/** Holds if the conditions for the wrapper function are met. */
+pragma[inline]
+predicate conditionsForWrapper(FunctionCall fcp, Function fnp) {
+ not exists(ExprCall ectmp | fnp = ectmp.getEnclosingFunction()) and
+ not exists(Loop lp | lp.getEnclosingFunction() = fnp) and
+ not exists(SwitchStmt sw | sw.getEnclosingFunction() = fnp) and
+ not fnp instanceof Operator and
+ // inside the wrapper function there should be checks of arguments or the result,
+ // perhaps by means of passing the latter as an argument to some function
+ (
+ exists(IfStmt ifs |
+ ifs.getEnclosingFunction() = fnp and
+ (
+ globalValueNumber(ifs.getCondition().getAChild*()) = globalValueNumber(fcp.getAnArgument()) and
+ ifs.getASuccessor*() = fcp
+ or
+ ifs.getCondition().getAChild() = fcp
+ )
+ )
+ or
+ exists(FunctionCall fctmp |
+ fctmp.getEnclosingFunction() = fnp and
+ globalValueNumber(fctmp.getAnArgument().getAChild*()) = globalValueNumber(fcp)
+ )
+ ) and
+ // inside the wrapper function there must be a function call to handle the error
+ exists(CallUsedToHandleErrors fctmp |
+ fctmp.getEnclosingFunction() = fnp and
+ forall(int i | i in [0 .. fnp.getNumberOfParameters() - 1] |
+ fnp.getParameter(i).getAnAccess().getTarget() =
+ fcp.getAnArgument().(VariableAccess).getTarget() or
+ fnp.getParameter(i).getUnspecifiedType() instanceof Class or
+ fnp.getParameter(i).getUnspecifiedType().(ReferenceType).getBaseType() instanceof Class or
+ fnp.getParameter(i).getAnAccess().getTarget() =
+ fctmp.getAnArgument().(VariableAccess).getTarget()
+ )
+ )
+}
+
+from FunctionCall fc, Function fn
+where
+ exists(FunctionCall fctmp |
+ conditionsInsideWrapper(fctmp, fn) and
+ conditionsForWrapper(fctmp, fn) and
+ conditionsOutsideWrapper(fc) and
+ fctmp.getTarget() = fc.getTarget() and
+ fc.getEnclosingFunction() != fn and
+ fc.getEnclosingFunction().getMetrics().getNumberOfCalls() > fn.getMetrics().getNumberOfCalls()
+ )
+select fc, "Consider changing the call to $@", fn, fn.getName()
diff --git a/repo-tests/codeql/cpp/ql/src/experimental/Security/CWE/CWE-1126/DeclarationOfVariableWithUnnecessarilyWideScope.ql b/repo-tests/codeql/cpp/ql/src/experimental/Security/CWE/CWE-1126/DeclarationOfVariableWithUnnecessarilyWideScope.ql
new file mode 100644
index 00000000000..e73f36145c6
--- /dev/null
+++ b/repo-tests/codeql/cpp/ql/src/experimental/Security/CWE/CWE-1126/DeclarationOfVariableWithUnnecessarilyWideScope.ql
@@ -0,0 +1,60 @@
+/**
+ * @name Errors When Using Variable Declaration Inside Loop
+ * @description Using variables with the same name is dangerous.
+ * However, such a situation inside the while loop can create an infinite loop exhausting resources.
+ * Requires the attention of developers.
+ * @kind problem
+ * @id cpp/errors-when-using-variable-declaration-inside-loop
+ * @problem.severity warning
+ * @precision medium
+ * @tags correctness
+ * security
+ * external/cwe/cwe-1126
+ */
+
+import cpp
+
+/**
+ * Errors when using a variable declaration inside a loop.
+ */
+class DangerousWhileLoop extends WhileStmt {
+ Expr exp;
+ Declaration dl;
+
+ DangerousWhileLoop() {
+ this = dl.getParentScope().(BlockStmt).getParent*() and
+ exp = this.getCondition().getAChild*() and
+ not exp instanceof PointerFieldAccess and
+ not exp instanceof ValueFieldAccess and
+ exp.(VariableAccess).getTarget().getName() = dl.getName() and
+ not exp.getParent*() instanceof FunctionCall
+ }
+
+ Declaration getDeclaration() { result = dl }
+
+ /** Holds when there are changes to the variables involved in the condition. */
+ predicate isUseThisVariable() {
+ exists(Variable v |
+ this.getCondition().getAChild*().(VariableAccess).getTarget() = v and
+ (
+ exists(Assignment aexp |
+ this = aexp.getEnclosingStmt().getParentStmt*() and
+ (
+ aexp.getLValue().(ArrayExpr).getArrayBase().(VariableAccess).getTarget() = v
+ or
+ aexp.getLValue().(VariableAccess).getTarget() = v
+ )
+ )
+ or
+ exists(CrementOperation crm |
+ this = crm.getEnclosingStmt().getParentStmt*() and
+ crm.getOperand().(VariableAccess).getTarget() = v
+ )
+ )
+ )
+ }
+}
+
+from DangerousWhileLoop lp
+where not lp.isUseThisVariable()
+select lp.getDeclaration(), "A variable with this name is used in the $@ condition.", lp, "loop"
diff --git a/repo-tests/codeql/cpp/ql/src/experimental/Security/CWE/CWE-120/MemoryUnsafeFunctionScan.ql b/repo-tests/codeql/cpp/ql/src/experimental/Security/CWE/CWE-120/MemoryUnsafeFunctionScan.ql
new file mode 100644
index 00000000000..dd5c389fdaf
--- /dev/null
+++ b/repo-tests/codeql/cpp/ql/src/experimental/Security/CWE/CWE-120/MemoryUnsafeFunctionScan.ql
@@ -0,0 +1,19 @@
+/**
+ * @name Scanf function without a specified length
+ * @description Use of one of the scanf functions without a specified length.
+ * @kind problem
+ * @problem.severity warning
+ * @id cpp/memory-unsafe-function-scan
+ * @tags reliability
+ * security
+ * external/cwe/cwe-120
+ */
+
+import cpp
+import semmle.code.cpp.commons.Scanf
+
+from FunctionCall call, ScanfFunction sff
+where
+ call.getTarget() = sff and
+ call.getArgument(sff.getFormatParameterIndex()).getValue().regexpMatch(".*%l?s.*")
+select call, "Dangerous use of one of the scanf functions"
diff --git a/repo-tests/codeql/cpp/ql/src/experimental/Security/CWE/CWE-190/AllocMultiplicationOverflow.ql b/repo-tests/codeql/cpp/ql/src/experimental/Security/CWE/CWE-190/AllocMultiplicationOverflow.ql
new file mode 100644
index 00000000000..3a253854679
--- /dev/null
+++ b/repo-tests/codeql/cpp/ql/src/experimental/Security/CWE/CWE-190/AllocMultiplicationOverflow.ql
@@ -0,0 +1,40 @@
+/**
+ * @name Multiplication result may overflow and be used in allocation
+ * @description Using a multiplication result that may overflow in the size of an allocation may lead to buffer overflows when the allocated memory is used.
+ * @kind path-problem
+ * @problem.severity warning
+ * @precision low
+ * @tags security
+ * correctness
+ * external/cwe/cwe-190
+ * external/cwe/cwe-128
+ * @id cpp/multiplication-overflow-in-alloc
+ */
+
+import cpp
+import semmle.code.cpp.models.interfaces.Allocation
+import semmle.code.cpp.dataflow.DataFlow
+import DataFlow::PathGraph
+
+class MultToAllocConfig extends DataFlow::Configuration {
+ MultToAllocConfig() { this = "MultToAllocConfig" }
+
+ override predicate isSource(DataFlow::Node node) {
+ // a multiplication of two non-constant expressions
+ exists(MulExpr me |
+ me = node.asExpr() and
+ forall(Expr e | e = me.getAnOperand() | not exists(e.getValue()))
+ )
+ }
+
+ override predicate isSink(DataFlow::Node node) {
+ // something that affects an allocation size
+ node.asExpr() = any(AllocationExpr ae).getSizeExpr().getAChild*()
+ }
+}
+
+from MultToAllocConfig config, DataFlow::PathNode source, DataFlow::PathNode sink
+where config.hasFlowPath(source, sink)
+select sink, source, sink,
+ "Potentially overflowing value from $@ is used in the size of this allocation.", source,
+ "multiplication"
diff --git a/repo-tests/codeql/cpp/ql/src/experimental/Security/CWE/CWE-273/PrivilegeDroppingOutoforder.ql b/repo-tests/codeql/cpp/ql/src/experimental/Security/CWE/CWE-273/PrivilegeDroppingOutoforder.ql
new file mode 100644
index 00000000000..7798203205a
--- /dev/null
+++ b/repo-tests/codeql/cpp/ql/src/experimental/Security/CWE/CWE-273/PrivilegeDroppingOutoforder.ql
@@ -0,0 +1,101 @@
+/**
+ * @name LinuxPrivilegeDroppingOutoforder
+ * @description A syscall commonly associated with privilege dropping is being called out of order.
+ * Normally a process drops group ID and sets supplimental groups for the target user
+ * before setting the target user ID. This can have security impact if the return code
+ * from these methods is not checked.
+ * @kind problem
+ * @problem.severity recommendation
+ * @id cpp/drop-linux-privileges-outoforder
+ * @tags security
+ * external/cwe/cwe-273
+ * @precision medium
+ */
+
+import cpp
+
+predicate argumentMayBeRoot(Expr e) {
+ e.getValue() = "0" or
+ e.(VariableAccess).getTarget().getName().toLowerCase().matches("%root%")
+}
+
+class SetuidLikeFunctionCall extends FunctionCall {
+ SetuidLikeFunctionCall() {
+ (getTarget().hasGlobalName("setuid") or getTarget().hasGlobalName("setresuid")) and
+ // setuid/setresuid with the root user are false positives.
+ not argumentMayBeRoot(getArgument(0))
+ }
+}
+
+class SetuidLikeWrapperCall extends FunctionCall {
+ SetuidLikeFunctionCall baseCall;
+
+ SetuidLikeWrapperCall() {
+ this = baseCall
+ or
+ exists(SetuidLikeWrapperCall fc |
+ this.getTarget() = fc.getEnclosingFunction() and
+ baseCall = fc.getBaseCall()
+ )
+ }
+
+ SetuidLikeFunctionCall getBaseCall() { result = baseCall }
+}
+
+class CallBeforeSetuidFunctionCall extends FunctionCall {
+ CallBeforeSetuidFunctionCall() {
+ (
+ getTarget().hasGlobalName("setgid") or
+ getTarget().hasGlobalName("setresgid") or
+ // Compatibility may require skipping initgroups and setgroups return checks.
+ // A stricter best practice is to check the result and errnor for EPERM.
+ getTarget().hasGlobalName("initgroups") or
+ getTarget().hasGlobalName("setgroups")
+ ) and
+ // setgid/setresgid/etc with the root group are false positives.
+ not argumentMayBeRoot(getArgument(0))
+ }
+}
+
+class CallBeforeSetuidWrapperCall extends FunctionCall {
+ CallBeforeSetuidFunctionCall baseCall;
+
+ CallBeforeSetuidWrapperCall() {
+ this = baseCall
+ or
+ exists(CallBeforeSetuidWrapperCall fc |
+ this.getTarget() = fc.getEnclosingFunction() and
+ baseCall = fc.getBaseCall()
+ )
+ }
+
+ CallBeforeSetuidFunctionCall getBaseCall() { result = baseCall }
+}
+
+predicate setuidBeforeSetgid(
+ SetuidLikeWrapperCall setuidWrapper, CallBeforeSetuidWrapperCall setgidWrapper
+) {
+ setgidWrapper.getAPredecessor+() = setuidWrapper
+}
+
+predicate isAccessed(FunctionCall fc) {
+ exists(Variable v | v.getAnAssignedValue() = fc)
+ or
+ exists(Operation c | fc = c.getAChild() | c.isCondition())
+ or
+ // ignore pattern where result is intentionally ignored by a cast to void.
+ fc.hasExplicitConversion()
+}
+
+from Function func, CallBeforeSetuidFunctionCall fc, SetuidLikeFunctionCall setuid
+where
+ setuidBeforeSetgid(setuid, fc) and
+ // Require the call return code to be used in a condition or assigned.
+ // This introduces false negatives where the return is checked but then
+ // errno == EPERM allows execution to continue.
+ not isAccessed(fc) and
+ func = fc.getEnclosingFunction()
+select fc,
+ "This function is called within " + func + ", and potentially after " +
+ "$@, and may not succeed. Be sure to check the return code and errno, otherwise permissions " +
+ "may not be dropped.", setuid, setuid.getTarget().getName()
diff --git a/repo-tests/codeql/cpp/ql/src/experimental/Security/CWE/CWE-359/PrivateCleartextWrite.ql b/repo-tests/codeql/cpp/ql/src/experimental/Security/CWE/CWE-359/PrivateCleartextWrite.ql
new file mode 100644
index 00000000000..205f17c06c9
--- /dev/null
+++ b/repo-tests/codeql/cpp/ql/src/experimental/Security/CWE/CWE-359/PrivateCleartextWrite.ql
@@ -0,0 +1,21 @@
+/**
+ * @name Exposure of private information
+ * @description If private information is written to an external location, it may be accessible by
+ * unauthorized persons.
+ * @kind path-problem
+ * @problem.severity error
+ * @id cpp/private-cleartext-write
+ * @tags security
+ * external/cwe/cwe-359
+ */
+
+import cpp
+import experimental.semmle.code.cpp.security.PrivateCleartextWrite
+import experimental.semmle.code.cpp.security.PrivateCleartextWrite::PrivateCleartextWrite
+import DataFlow::PathGraph
+
+from WriteConfig b, DataFlow::PathNode source, DataFlow::PathNode sink
+where b.hasFlowPath(source, sink)
+select sink.getNode(), source, sink,
+ "This write into the external location '" + sink.getNode() +
+ "' may contain unencrypted data from $@", source, "this source."
diff --git a/repo-tests/codeql/cpp/ql/src/experimental/Security/CWE/CWE-401/MemoryLeakOnFailedCallToRealloc.ql b/repo-tests/codeql/cpp/ql/src/experimental/Security/CWE/CWE-401/MemoryLeakOnFailedCallToRealloc.ql
new file mode 100644
index 00000000000..cf0afc64013
--- /dev/null
+++ b/repo-tests/codeql/cpp/ql/src/experimental/Security/CWE/CWE-401/MemoryLeakOnFailedCallToRealloc.ql
@@ -0,0 +1,61 @@
+/**
+ * @name Memory leak on failed call to realloc
+ * @description The expression mem = realloc (mem, size) is potentially dangerous, if the call fails, we will lose the pointer to the memory block.
+ * We recommend storing the result in a temporary variable and eliminating memory leak.
+ * @kind problem
+ * @id cpp/memory-leak-on-failed-call-to-realloc
+ * @problem.severity warning
+ * @precision medium
+ * @tags correctness
+ * security
+ * external/cwe/cwe-401
+ */
+
+import cpp
+import semmle.code.cpp.controlflow.Guards
+import semmle.code.cpp.valuenumbering.HashCons
+
+/**
+ * A function call that potentially does not return (such as `exit`).
+ */
+class CallMayNotReturn extends FunctionCall {
+ CallMayNotReturn() {
+ // call that is known to not return
+ not exists(this.(ControlFlowNode).getASuccessor())
+ or
+ // call to another function that may not return
+ exists(CallMayNotReturn exit | getTarget() = exit.getEnclosingFunction())
+ }
+}
+
+/**
+ * A call to `realloc` of the form `v = realloc(v, size)`, for some variable `v`.
+ */
+class ReallocCallLeak extends FunctionCall {
+ Variable v;
+
+ ReallocCallLeak() {
+ exists(AssignExpr ex |
+ this.getTarget().hasGlobalOrStdName("realloc") and
+ this = ex.getRValue() and
+ hashCons(ex.getLValue()) = hashCons(this.getArgument(0)) and
+ v.getAnAccess() = this.getArgument(0)
+ )
+ }
+
+ /**
+ * Holds if failure of this allocation may be handled by termination, for
+ * example a call to `exit()`.
+ */
+ predicate mayHandleByTermination() {
+ exists(GuardCondition guard, CallMayNotReturn exit |
+ this.(ControlFlowNode).getASuccessor*() = guard and
+ guard.getAChild*() = v.getAnAccess() and
+ guard.controls(exit.getBasicBlock(), _)
+ )
+ }
+}
+
+from ReallocCallLeak rcl
+where not rcl.mayHandleByTermination()
+select rcl, "possible loss of original pointer on unsuccessful call realloc"
diff --git a/repo-tests/codeql/cpp/ql/src/experimental/Security/CWE/CWE-415/DoubleFree.ql b/repo-tests/codeql/cpp/ql/src/experimental/Security/CWE/CWE-415/DoubleFree.ql
new file mode 100644
index 00000000000..0544c2aefd5
--- /dev/null
+++ b/repo-tests/codeql/cpp/ql/src/experimental/Security/CWE/CWE-415/DoubleFree.ql
@@ -0,0 +1,43 @@
+/**
+ * @name Errors When Double Free
+ * @description Freeing a previously allocated resource twice can lead to various vulnerabilities in the program.
+ * @kind problem
+ * @id cpp/double-free
+ * @problem.severity warning
+ * @precision medium
+ * @tags security
+ * external/cwe/cwe-415
+ */
+
+import cpp
+
+from FunctionCall fc, FunctionCall fc2, LocalScopeVariable v
+where
+ freeCall(fc, v.getAnAccess()) and
+ freeCall(fc2, v.getAnAccess()) and
+ fc != fc2 and
+ fc.getASuccessor*() = fc2 and
+ not exists(Expr exptmp |
+ (exptmp = v.getAnAssignedValue() or exptmp.(AddressOfExpr).getOperand() = v.getAnAccess()) and
+ exptmp = fc.getASuccessor*() and
+ exptmp = fc2.getAPredecessor*()
+ ) and
+ not exists(FunctionCall fctmp |
+ not fctmp instanceof DeallocationExpr and
+ fctmp = fc.getASuccessor*() and
+ fctmp = fc2.getAPredecessor*() and
+ fctmp.getAnArgument().(VariableAccess).getTarget() = v
+ ) and
+ (
+ fc.getTarget().hasGlobalOrStdName("realloc") and
+ (
+ not fc.getParent*() instanceof IfStmt and
+ not exists(IfStmt iftmp |
+ iftmp.getCondition().getAChild*().(VariableAccess).getTarget().getAnAssignedValue() = fc
+ )
+ )
+ or
+ not fc.getTarget().hasGlobalOrStdName("realloc")
+ )
+select fc2.getArgument(0),
+ "This pointer may have already been cleared in the line " + fc.getLocation().getStartLine() + "."
diff --git a/repo-tests/codeql/cpp/ql/src/experimental/Security/CWE/CWE-561/FindIncorrectlyUsedSwitch.ql b/repo-tests/codeql/cpp/ql/src/experimental/Security/CWE/CWE-561/FindIncorrectlyUsedSwitch.ql
new file mode 100644
index 00000000000..7fc26e54ae9
--- /dev/null
+++ b/repo-tests/codeql/cpp/ql/src/experimental/Security/CWE/CWE-561/FindIncorrectlyUsedSwitch.ql
@@ -0,0 +1,153 @@
+/**
+ * @name Incorrect switch statement
+ * @description --Finding places the dangerous use of a switch.
+ * --For example, when the range of values for a condition does not cover all of the selection values..
+ * @kind problem
+ * @id cpp/operator-find-incorrectly-used-switch
+ * @problem.severity warning
+ * @precision medium
+ * @tags correctness
+ * security
+ * external/cwe/cwe-561
+ * external/cwe/cwe-691
+ * external/cwe/cwe-478
+ */
+
+import cpp
+import semmle.code.cpp.rangeanalysis.SimpleRangeAnalysis
+
+/** Holds if the range contains no boundary values. */
+predicate isRealRange(Expr exp) {
+ upperBound(exp).toString() != "18446744073709551616" and
+ upperBound(exp).toString() != "9223372036854775807" and
+ upperBound(exp).toString() != "4294967295" and
+ upperBound(exp).toString() != "Infinity" and
+ upperBound(exp).toString() != "NaN" and
+ lowerBound(exp).toString() != "-9223372036854775808" and
+ lowerBound(exp).toString() != "-4294967296" and
+ lowerBound(exp).toString() != "-Infinity" and
+ lowerBound(exp).toString() != "NaN" and
+ upperBound(exp) != 2147483647 and
+ upperBound(exp) != 268435455 and
+ upperBound(exp) != 33554431 and
+ upperBound(exp) != 8388607 and
+ upperBound(exp) != 65535 and
+ upperBound(exp) != 32767 and
+ upperBound(exp) != 255 and
+ upperBound(exp) != 127 and
+ upperBound(exp) != 63 and
+ upperBound(exp) != 31 and
+ upperBound(exp) != 15 and
+ upperBound(exp) != 7 and
+ lowerBound(exp) != -2147483648 and
+ lowerBound(exp) != -268435456 and
+ lowerBound(exp) != -33554432 and
+ lowerBound(exp) != -8388608 and
+ lowerBound(exp) != -65536 and
+ lowerBound(exp) != -32768 and
+ lowerBound(exp) != -128
+}
+
+/** Holds if the range of values for the condition is less than the choices. */
+predicate isNotAllSelected(SwitchStmt swtmp) {
+ not swtmp.getExpr().isConstant() and
+ exists(int i |
+ i != 0 and
+ (
+ i = lowerBound(swtmp.getASwitchCase().getExpr()) and
+ upperBound(swtmp.getExpr()) < i
+ or
+ (
+ i = upperBound(swtmp.getASwitchCase().getExpr()) or
+ i = upperBound(swtmp.getASwitchCase().getEndExpr())
+ ) and
+ lowerBound(swtmp.getExpr()) > i
+ )
+ )
+}
+
+/** Holds if the range of values for the condition is greater than the selection. */
+predicate isConditionBig(SwitchStmt swtmp) {
+ not swtmp.hasDefaultCase() and
+ not exists(int iu, int il |
+ (
+ iu = upperBound(swtmp.getASwitchCase().getExpr()) or
+ iu = upperBound(swtmp.getASwitchCase().getEndExpr())
+ ) and
+ upperBound(swtmp.getExpr()) = iu and
+ (
+ il = lowerBound(swtmp.getASwitchCase().getExpr()) or
+ il = lowerBound(swtmp.getASwitchCase().getEndExpr())
+ ) and
+ lowerBound(swtmp.getExpr()) = il
+ )
+}
+
+/** Holds if there are labels inside the block with names similar to `default` or `case`. */
+predicate isWrongLableName(SwitchStmt swtmp) {
+ not swtmp.hasDefaultCase() and
+ exists(LabelStmt lb |
+ (
+ (
+ lb.getName().charAt(0) = "d" or
+ lb.getName().charAt(0) = "c"
+ ) and
+ (
+ lb.getName().charAt(1) = "e" or
+ lb.getName().charAt(1) = "a"
+ ) and
+ (
+ lb.getName().charAt(2) = "f" or
+ lb.getName().charAt(2) = "s"
+ )
+ ) and
+ lb.getEnclosingStmt().getParentStmt*() = swtmp.getStmt() and
+ not exists(GotoStmt gs | gs.getName() = lb.getName())
+ )
+}
+
+/** Holds if the block contains code before the first `case`. */
+predicate isCodeBeforeCase(SwitchStmt swtmp) {
+ exists(Expr exp |
+ exp.getEnclosingStmt().getParentStmt*() = swtmp.getStmt() and
+ not exists(Loop lp |
+ exp.getEnclosingStmt().getParentStmt*() = lp and
+ lp.getEnclosingStmt().getParentStmt*() = swtmp.getStmt()
+ ) and
+ not exists(Stmt sttmp, SwitchCase sctmp |
+ sttmp = swtmp.getASwitchCase().getAStmt() and
+ sctmp = swtmp.getASwitchCase() and
+ (
+ exp.getEnclosingStmt().getParentStmt*() = sttmp or
+ exp.getEnclosingStmt() = sctmp
+ )
+ )
+ )
+}
+
+from SwitchStmt sw, string msg
+where
+ isRealRange(sw.getExpr()) and
+ lowerBound(sw.getExpr()) != upperBound(sw.getExpr()) and
+ lowerBound(sw.getExpr()) != 0 and
+ not exists(Expr cexp |
+ cexp = sw.getASwitchCase().getExpr() and not isRealRange(cexp)
+ or
+ cexp = sw.getASwitchCase().getEndExpr() and not isRealRange(cexp)
+ ) and
+ not exists(Expr exptmp |
+ exptmp = sw.getExpr().getAChild*() and
+ not exptmp.isConstant() and
+ not isRealRange(exptmp)
+ ) and
+ (sw.getASwitchCase().terminatesInBreakStmt() or sw.getASwitchCase().terminatesInReturnStmt()) and
+ (
+ isNotAllSelected(sw) and msg = "The range of condition values is less than the selection."
+ or
+ isConditionBig(sw) and msg = "The range of condition values is wider than the choices."
+ )
+ or
+ isWrongLableName(sw) and msg = "Possibly erroneous label name."
+ or
+ isCodeBeforeCase(sw) and msg = "Code before case will not be executed."
+select sw, msg
diff --git a/repo-tests/codeql/cpp/ql/src/experimental/Security/CWE/CWE-675/DoubleRelease.ql b/repo-tests/codeql/cpp/ql/src/experimental/Security/CWE/CWE-675/DoubleRelease.ql
new file mode 100644
index 00000000000..474f00acc55
--- /dev/null
+++ b/repo-tests/codeql/cpp/ql/src/experimental/Security/CWE/CWE-675/DoubleRelease.ql
@@ -0,0 +1,142 @@
+/**
+ * @name Errors When Double Release
+ * @description Double release of the descriptor can lead to a crash of the program.
+ * @kind problem
+ * @id cpp/double-release
+ * @problem.severity warning
+ * @precision medium
+ * @tags security
+ * external/cwe/cwe-675
+ * external/cwe/cwe-666
+ */
+
+import cpp
+import semmle.code.cpp.commons.File
+import semmle.code.cpp.valuenumbering.GlobalValueNumbering
+import semmle.code.cpp.valuenumbering.HashCons
+
+/**
+ * A function call that potentially does not return (such as `exit`).
+ */
+class CallMayNotReturn extends FunctionCall {
+ CallMayNotReturn() {
+ // call that is known to not return
+ not exists(this.(ControlFlowNode).getASuccessor())
+ or
+ // call to another function that may not return
+ exists(CallMayNotReturn exit | getTarget() = exit.getEnclosingFunction())
+ or
+ exists(ThrowExpr tex | tex = this.(ControlFlowNode).getASuccessor())
+ }
+}
+
+/** Holds if there are no assignment expressions to the function argument. */
+pragma[inline]
+predicate checkChangeVariable(FunctionCall fc0, ControlFlowNode fc1, ControlFlowNode fc2) {
+ not exists(Expr exptmp |
+ (
+ exptmp = fc0.getArgument(0).(VariableAccess).getTarget().getAnAssignedValue() or
+ exptmp.(AddressOfExpr).getOperand() =
+ fc0.getArgument(0).(VariableAccess).getTarget().getAnAccess()
+ ) and
+ exptmp = fc1.getASuccessor*() and
+ exptmp = fc2.getAPredecessor*()
+ ) and
+ (
+ (
+ not fc0.getArgument(0) instanceof PointerFieldAccess and
+ not fc0.getArgument(0) instanceof ValueFieldAccess
+ or
+ fc0.getArgument(0).(VariableAccess).getQualifier() instanceof ThisExpr
+ )
+ or
+ not exists(Expr exptmp |
+ (
+ exptmp =
+ fc0.getArgument(0)
+ .(VariableAccess)
+ .getQualifier()
+ .(VariableAccess)
+ .getTarget()
+ .getAnAssignedValue() or
+ exptmp.(AddressOfExpr).getOperand() =
+ fc0.getArgument(0)
+ .(VariableAccess)
+ .getQualifier()
+ .(VariableAccess)
+ .getTarget()
+ .getAnAccess()
+ ) and
+ exptmp = fc1.getASuccessor*() and
+ exptmp = fc2.getAPredecessor*()
+ )
+ )
+}
+
+/** Holds if the underlying expression is a call to the close function. Provided that the function parameter does not change after the call. */
+predicate closeReturn(FunctionCall fc) {
+ fcloseCall(fc, _) and
+ checkChangeVariable(fc, fc, fc.getEnclosingFunction())
+}
+
+/** Holds if the underlying expression is a call to the close function. Provided that the function parameter does not change before the call. */
+predicate closeWithoutChangeBefore(FunctionCall fc) {
+ fcloseCall(fc, _) and
+ checkChangeVariable(fc, fc.getEnclosingFunction().getEntryPoint(), fc)
+}
+
+/** Holds, if a sequential call of the specified functions is possible, via a higher-level function call. */
+predicate callInOtherFunctions(FunctionCall fc, FunctionCall fc1) {
+ exists(FunctionCall fec1, FunctionCall fec2 |
+ fc.getEnclosingFunction() != fc1.getEnclosingFunction() and
+ fec1 = fc.getEnclosingFunction().getACallToThisFunction() and
+ fec2 = fc1.getEnclosingFunction().getACallToThisFunction() and
+ fec1.getASuccessor*() = fec2 and
+ checkChangeVariable(fc, fec1, fec2)
+ )
+}
+
+/** Holds if successive calls to close functions are possible. */
+predicate interDoubleCloseFunctions(FunctionCall fc, FunctionCall fc1) {
+ fcloseCall(fc, _) and
+ fcloseCall(fc1, _) and
+ fc != fc1 and
+ fc.getASuccessor*() = fc1 and
+ checkChangeVariable(fc, fc, fc1)
+}
+
+/** Holds if the first arguments of the two functions are similar. */
+predicate similarArguments(FunctionCall fc, FunctionCall fc1) {
+ globalValueNumber(fc.getArgument(0)) = globalValueNumber(fc1.getArgument(0))
+ or
+ fc.getArgument(0).(VariableAccess).getTarget() = fc1.getArgument(0).(VariableAccess).getTarget() and
+ (
+ not fc.getArgument(0) instanceof PointerFieldAccess and
+ not fc.getArgument(0) instanceof ValueFieldAccess
+ or
+ fc.getArgument(0).(VariableAccess).getQualifier() instanceof ThisExpr
+ )
+ or
+ fc.getArgument(0).(VariableAccess).getTarget() = fc1.getArgument(0).(VariableAccess).getTarget() and
+ (
+ fc.getArgument(0) instanceof PointerFieldAccess or
+ fc.getArgument(0) instanceof ValueFieldAccess
+ ) and
+ hashCons(fc.getArgument(0)) = hashCons(fc1.getArgument(0))
+}
+
+from FunctionCall fc, FunctionCall fc1
+where
+ not exists(CallMayNotReturn fctmp | fctmp = fc.getASuccessor*()) and
+ not exists(IfStmt ifs | ifs.getCondition().getAChild*() = fc) and
+ (
+ // detecting a repeated call situation within one function
+ closeReturn(fc) and
+ closeWithoutChangeBefore(fc1) and
+ callInOtherFunctions(fc, fc1)
+ or
+ // detection of repeated call in different functions
+ interDoubleCloseFunctions(fc, fc1)
+ ) and
+ similarArguments(fc, fc1)
+select fc, "Second call to the $@ function is possible.", fc1, fc1.getTarget().getName()
diff --git a/repo-tests/codeql/cpp/ql/src/experimental/Security/CWE/CWE-691/InsufficientControlFlowManagementAfterRefactoringTheCode.ql b/repo-tests/codeql/cpp/ql/src/experimental/Security/CWE/CWE-691/InsufficientControlFlowManagementAfterRefactoringTheCode.ql
new file mode 100644
index 00000000000..163305dd039
--- /dev/null
+++ b/repo-tests/codeql/cpp/ql/src/experimental/Security/CWE/CWE-691/InsufficientControlFlowManagementAfterRefactoringTheCode.ql
@@ -0,0 +1,119 @@
+/**
+ * @name Errors After Refactoring
+ * @description --In some situations, after code refactoring, parts of the old constructs may remain.
+ * --They are correctly accepted by the compiler, but can critically affect program execution.
+ * --For example, if you switch from `do {...} while ();` to `while () {...}` with errors, you run the risk of running out of resources.
+ * --These code snippets look suspicious and require the developer's attention.
+ * @kind problem
+ * @id cpp/errors-after-refactoring
+ * @problem.severity warning
+ * @precision medium
+ * @tags correctness
+ * security
+ * external/cwe/cwe-691
+ */
+
+import cpp
+import semmle.code.cpp.valuenumbering.HashCons
+import semmle.code.cpp.valuenumbering.GlobalValueNumbering
+
+/**
+ * Using `while` directly after the body of another` while`.
+ */
+class UsingWhileAfterWhile extends WhileStmt {
+ /**
+ * Using a loop call after another loop has finished running can result in an eternal loop.
+ * For example, perhaps as a result of refactoring, the `do ... while ()` loop was incorrectly corrected.
+ * Even in the case of deliberate use of such an expression, it is better to correct it.
+ */
+ UsingWhileAfterWhile() {
+ exists(WhileStmt wh1 |
+ wh1.getStmt().getAChild*().(BreakStmt).(ControlFlowNode).getASuccessor().getASuccessor() =
+ this and
+ hashCons(wh1.getCondition()) = hashCons(this.getCondition()) and
+ this.getStmt() instanceof EmptyStmt
+ )
+ or
+ exists(ForStmt fr1 |
+ fr1.getStmt().getAChild*().(BreakStmt).(ControlFlowNode).getASuccessor().getASuccessor() =
+ this and
+ hashCons(fr1.getCondition()) = hashCons(this.getCondition()) and
+ this.getStmt() instanceof EmptyStmt
+ )
+ }
+}
+
+/**
+ * Using arithmetic in a condition.
+ */
+class UsingArithmeticInComparison extends BinaryArithmeticOperation {
+ /**
+ * Using arithmetic operations in a comparison operation can be dangerous.
+ * For example, part of the comparison may have been lost as a result of refactoring.
+ * Even if you deliberately use such an expression, it is better to add an explicit comparison.
+ */
+ UsingArithmeticInComparison() {
+ this.getParent*() instanceof IfStmt and
+ not this.getAChild*().isConstant() and
+ not this.getParent*() instanceof Call and
+ not this.getParent*() instanceof AssignExpr and
+ not this.getParent*() instanceof ArrayExpr and
+ not this.getParent*() instanceof RemExpr and
+ not this.getParent*() instanceof AssignBitwiseOperation and
+ not this.getParent*() instanceof AssignArithmeticOperation and
+ not this.getParent*() instanceof EqualityOperation and
+ not this.getParent*() instanceof RelationalOperation
+ }
+
+ /** Holds when the expression is inside the loop body. */
+ predicate insideTheLoop() { exists(Loop lp | lp.getStmt().getAChild*() = this.getParent*()) }
+
+ /** Holds when the expression is used in binary operations. */
+ predicate workingWithValue() {
+ this.getParent*() instanceof BinaryBitwiseOperation or
+ this.getParent*() instanceof NotExpr
+ }
+
+ /** Holds when the expression contains a pointer. */
+ predicate workingWithPointer() {
+ this.getAChild*().getFullyConverted().getType() instanceof DerivedType
+ }
+
+ /** Holds when a null comparison expression exists. */
+ predicate compareWithZero() {
+ exists(Expr exp |
+ exp instanceof ComparisonOperation and
+ (
+ globalValueNumber(exp.getAChild*()) = globalValueNumber(this) or
+ hashCons(exp.getAChild*()) = hashCons(this)
+ ) and
+ (
+ exp.(ComparisonOperation).getLeftOperand().getValue() = "0" or
+ exp.(ComparisonOperation).getRightOperand().getValue() = "0"
+ )
+ )
+ }
+
+ /** Holds when a comparison expression exists. */
+ predicate compareWithOutZero() {
+ exists(Expr exp |
+ exp instanceof ComparisonOperation and
+ (
+ globalValueNumber(exp.getAChild*()) = globalValueNumber(this) or
+ hashCons(exp.getAChild*()) = hashCons(this)
+ )
+ )
+ }
+}
+
+from Expr exp
+where
+ exp instanceof UsingArithmeticInComparison and
+ not exp.(UsingArithmeticInComparison).workingWithValue() and
+ not exp.(UsingArithmeticInComparison).workingWithPointer() and
+ not exp.(UsingArithmeticInComparison).insideTheLoop() and
+ not exp.(UsingArithmeticInComparison).compareWithZero() and
+ exp.(UsingArithmeticInComparison).compareWithOutZero()
+ or
+ exists(WhileStmt wst | wst instanceof UsingWhileAfterWhile and exp = wst.getCondition())
+select exp, "this expression needs your attention"
diff --git a/repo-tests/codeql/cpp/ql/src/experimental/Security/CWE/CWE-691/InsufficientControlFlowManagementWhenUsingBitOperations.ql b/repo-tests/codeql/cpp/ql/src/experimental/Security/CWE/CWE-691/InsufficientControlFlowManagementWhenUsingBitOperations.ql
new file mode 100644
index 00000000000..72d7625b517
--- /dev/null
+++ b/repo-tests/codeql/cpp/ql/src/experimental/Security/CWE/CWE-691/InsufficientControlFlowManagementWhenUsingBitOperations.ql
@@ -0,0 +1,78 @@
+/**
+ * @name Errors When Using Bit Operations
+ * @description Unlike the binary operations `||` and `&&`, there is no sequence point after evaluating an
+ * operand of a bitwise operation like `|` or `&`. If left-to-right evaluation is expected this may be confusing.
+ * @kind problem
+ * @id cpp/errors-when-using-bit-operations
+ * @problem.severity warning
+ * @precision medium
+ * @tags correctness
+ * security
+ * external/cwe/cwe-691
+ */
+
+import cpp
+import semmle.code.cpp.valuenumbering.GlobalValueNumbering
+
+/**
+ * Dangerous uses of bit operations.
+ * For example: `if(intA>0 & intA<10 & charBuf&myFunc(charBuf[intA]))`.
+ * In this case, the function will be called in any case, and even the sequence of the call is not guaranteed.
+ */
+class DangerousBitOperations extends BinaryBitwiseOperation {
+ FunctionCall bfc;
+
+ /**
+ * The assignment indicates the conscious use of the bit operator.
+ * Use in comparison, conversion, or return value indicates conscious use of the bit operator.
+ * The use of shifts and bitwise operations on any element of an expression indicates a conscious use of the bitwise operator.
+ */
+ DangerousBitOperations() {
+ bfc = this.getRightOperand() and
+ not this.getParent*() instanceof Assignment and
+ not this.getParent*() instanceof Initializer and
+ not this.getParent*() instanceof ReturnStmt and
+ not this.getParent*() instanceof EqualityOperation and
+ not this.getParent*() instanceof UnaryLogicalOperation and
+ not this.getParent*() instanceof BinaryLogicalOperation and
+ not this.getAChild*() instanceof BitwiseXorExpr and
+ not this.getAChild*() instanceof LShiftExpr and
+ not this.getAChild*() instanceof RShiftExpr
+ }
+
+ /** Holds when part of a bit expression is used in a logical operation. */
+ predicate useInLogicalOperations() {
+ exists(BinaryLogicalOperation blop, Expr exp |
+ blop.getAChild*() = exp and
+ exp.(FunctionCall).getTarget() = bfc.getTarget() and
+ not exp.getParent() instanceof ComparisonOperation and
+ not exp.getParent() instanceof BinaryBitwiseOperation
+ )
+ }
+
+ /** Holds when part of a bit expression is used as part of another supply. For example, as an argument to another function. */
+ predicate useInOtherCalls() {
+ bfc.hasQualifier() or
+ bfc.getTarget() instanceof Operator or
+ exists(FunctionCall fc | fc.getAnArgument().getAChild*() = this) or
+ bfc.getTarget() instanceof BuiltInFunction
+ }
+
+ /** Holds when the bit expression contains both arguments and a function call. */
+ predicate dangerousArgumentChecking() {
+ not this.getLeftOperand() instanceof Call and
+ globalValueNumber(this.getLeftOperand().getAChild*()) = globalValueNumber(bfc.getAnArgument())
+ }
+
+ /** Holds when function calls are present in the bit expression. */
+ predicate functionCallsInBitsExpression() {
+ this.getLeftOperand().getAChild*() instanceof FunctionCall
+ }
+}
+
+from DangerousBitOperations dbo
+where
+ not dbo.useInOtherCalls() and
+ dbo.useInLogicalOperations() and
+ (not dbo.functionCallsInBitsExpression() or dbo.dangerousArgumentChecking())
+select dbo, "This bitwise operation appears in a context where a Boolean operation is expected."
diff --git a/repo-tests/codeql/cpp/ql/src/experimental/Security/CWE/CWE-703/FindIncorrectlyUsedExceptions.ql b/repo-tests/codeql/cpp/ql/src/experimental/Security/CWE/CWE-703/FindIncorrectlyUsedExceptions.ql
new file mode 100644
index 00000000000..34e055534e6
--- /dev/null
+++ b/repo-tests/codeql/cpp/ql/src/experimental/Security/CWE/CWE-703/FindIncorrectlyUsedExceptions.ql
@@ -0,0 +1,50 @@
+/**
+ * @name Operator Find Incorrectly Used Exceptions
+ * @description --Finding places for the dangerous use of exceptions.
+ * @kind problem
+ * @id cpp/operator-find-incorrectly-used-exceptions
+ * @problem.severity warning
+ * @precision medium
+ * @tags correctness
+ * security
+ * external/cwe/cwe-703
+ * external/cwe/cwe-248
+ * external/cwe/cwe-390
+ */
+
+import cpp
+
+from FunctionCall fc, string msg
+where
+ exists(ThrowExpr texp |
+ texp.getEnclosingFunction() = fc.getTarget() and
+ (
+ fc.getTarget().hasGlobalOrStdName("DllMain") and
+ not exists(TryStmt ts |
+ texp.getEnclosingStmt().getParentStmt*() = ts.getStmt() and
+ not ts.getACatchClause().isEmpty()
+ ) and
+ msg = "DllMain contains an exeption not wrapped in a try..catch block."
+ or
+ texp.getExpr().isParenthesised() and
+ texp.getExpr().(CommaExpr).getLeftOperand().isConstant() and
+ texp.getExpr().(CommaExpr).getRightOperand().isConstant() and
+ msg = "There is an exception in the function that requires your attention."
+ )
+ )
+ or
+ fc.getTarget() instanceof Constructor and
+ (
+ fc.getTargetType().(Class).getABaseClass+().hasGlobalOrStdName("exception") or
+ fc.getTargetType().(Class).getABaseClass+().hasGlobalOrStdName("CException")
+ ) and
+ not fc.isInMacroExpansion() and
+ not exists(ThrowExpr texp | fc.getEnclosingStmt() = texp.getEnclosingStmt()) and
+ not exists(FunctionCall fctmp | fctmp.getAnArgument() = fc) and
+ not fc instanceof ConstructorDirectInit and
+ not fc.getEnclosingStmt() instanceof DeclStmt and
+ not fc instanceof ConstructorDelegationInit and
+ not fc.getParent() instanceof Initializer and
+ not fc.getParent() instanceof AllocationExpr and
+ msg = "This object does not generate an exception."
+select fc, msg
diff --git a/repo-tests/codeql/cpp/ql/src/experimental/Security/CWE/CWE-758/UndefinedOrImplementationDefinedBehavior.ql b/repo-tests/codeql/cpp/ql/src/experimental/Security/CWE/CWE-758/UndefinedOrImplementationDefinedBehavior.ql
new file mode 100644
index 00000000000..bafe3d13b84
--- /dev/null
+++ b/repo-tests/codeql/cpp/ql/src/experimental/Security/CWE/CWE-758/UndefinedOrImplementationDefinedBehavior.ql
@@ -0,0 +1,167 @@
+/**
+ * @name Errors Of Undefined Program Behavior
+ * @description --In some situations, the code constructs used may be executed in the wrong order in which the developer designed them.
+ * --For example, if you call multiple functions as part of a single expression, and the functions have the ability to modify a shared resource, then the sequence in which the resource is changed can be unpredictable.
+ * --These code snippets look suspicious and require the developer's attention.
+ * @kind problem
+ * @id cpp/errors-of-undefined-program-behavior
+ * @problem.severity warning
+ * @precision medium
+ * @tags security
+ * external/cwe/cwe-758
+ */
+
+import cpp
+import semmle.code.cpp.valuenumbering.HashCons
+import semmle.code.cpp.valuenumbering.GlobalValueNumbering
+
+/**
+ * Threatening expressions of undefined behavior.
+ */
+class ExpressionsOfTheSameLevel extends Expr {
+ Expr exp2;
+
+ ExpressionsOfTheSameLevel() {
+ this != exp2 and
+ this.getParent() = exp2.getParent()
+ }
+
+ /** Holds if the underlying expression is a function call. */
+ predicate expressionCall() {
+ this instanceof FunctionCall and
+ exp2.getAChild*() instanceof FunctionCall and
+ not this.getParent() instanceof Operator and
+ not this.(FunctionCall).hasQualifier()
+ }
+
+ /** Holds if the underlying expression is a call to a function to free resources. */
+ predicate existsCloseOrFreeCall() {
+ (
+ globalValueNumber(this.(FunctionCall).getAnArgument()) =
+ globalValueNumber(exp2.getAChild*().(FunctionCall).getAnArgument()) or
+ hashCons(this.(FunctionCall).getAnArgument()) =
+ hashCons(exp2.getAChild*().(FunctionCall).getAnArgument())
+ ) and
+ (
+ this.(FunctionCall).getTarget().hasGlobalOrStdName("close") or
+ this.(FunctionCall).getTarget().hasGlobalOrStdName("free") or
+ this.(FunctionCall).getTarget().hasGlobalOrStdName("fclose")
+ )
+ }
+
+ /** Holds if the arguments in the function can be changed. */
+ predicate generalArgumentDerivedType() {
+ exists(Parameter prt1, Parameter prt2, AssignExpr aet1, AssignExpr aet2, int i, int j |
+ not this.(FunctionCall).getArgument(i).isConstant() and
+ hashCons(this.(FunctionCall).getArgument(i)) =
+ hashCons(exp2.getAChild*().(FunctionCall).getArgument(j)) and
+ prt1 = this.(FunctionCall).getTarget().getParameter(i) and
+ prt2 = exp2.getAChild*().(FunctionCall).getTarget().getParameter(j) and
+ prt1.getType() instanceof DerivedType and
+ (
+ aet1 = this.(FunctionCall).getTarget().getEntryPoint().getASuccessor*() and
+ (
+ aet1.getLValue().(ArrayExpr).getArrayBase().(VariableAccess).getTarget() =
+ prt1.getAnAccess().getTarget() or
+ aet1.getLValue().(VariableAccess).getTarget() = prt1.getAnAccess().getTarget()
+ )
+ or
+ exists(FunctionCall fc1 |
+ fc1.getTarget().hasGlobalName("memcpy") and
+ fc1.getArgument(0).(VariableAccess).getTarget() = prt1.getAnAccess().getTarget() and
+ fc1 = this.(FunctionCall).getTarget().getEntryPoint().getASuccessor*()
+ )
+ ) and
+ (
+ aet2 = exp2.getAChild*().(FunctionCall).getTarget().getEntryPoint().getASuccessor*() and
+ (
+ aet2.getLValue().(ArrayExpr).getArrayBase().(VariableAccess).getTarget() =
+ prt2.getAnAccess().getTarget() or
+ aet2.getLValue().(VariableAccess).getTarget() = prt2.getAnAccess().getTarget()
+ )
+ or
+ exists(FunctionCall fc1 |
+ fc1.getTarget().hasGlobalName("memcpy") and
+ fc1.getArgument(0).(VariableAccess).getTarget() = prt2.getAnAccess().getTarget() and
+ fc1 = exp2.(FunctionCall).getTarget().getEntryPoint().getASuccessor*()
+ )
+ )
+ )
+ }
+
+ /** Holds if functions have a common global argument. */
+ predicate generalGlobalArgument() {
+ exists(Declaration dl, AssignExpr aet1, AssignExpr aet2 |
+ dl instanceof GlobalVariable and
+ (
+ (
+ aet1.getLValue().(Access).getTarget() = dl or
+ aet1.getLValue().(ArrayExpr).getArrayBase().(VariableAccess).getTarget() = dl
+ ) and
+ aet1 = this.(FunctionCall).getTarget().getEntryPoint().getASuccessor*() and
+ not aet1.getRValue().isConstant()
+ or
+ exists(FunctionCall fc1 |
+ fc1.getTarget().hasGlobalName("memcpy") and
+ fc1.getArgument(0).(VariableAccess).getTarget() = dl and
+ fc1 = this.(FunctionCall).getTarget().getEntryPoint().getASuccessor*()
+ )
+ ) and
+ (
+ (
+ aet2.getLValue().(Access).getTarget() = dl or
+ aet2.getLValue().(ArrayExpr).getArrayBase().(VariableAccess).getTarget() = dl
+ ) and
+ aet2 = exp2.(FunctionCall).getTarget().getEntryPoint().getASuccessor*()
+ or
+ exists(FunctionCall fc1 |
+ fc1.getTarget().hasGlobalName("memcpy") and
+ fc1.getArgument(0).(VariableAccess).getTarget() = dl and
+ fc1 = exp2.(FunctionCall).getTarget().getEntryPoint().getASuccessor*()
+ )
+ )
+ )
+ }
+
+ /** Holds if sequence point is not present in expression. */
+ predicate orderOfActionExpressions() {
+ not this.getParent() instanceof BinaryLogicalOperation and
+ not this.getParent() instanceof ConditionalExpr and
+ not this.getParent() instanceof Loop and
+ not this.getParent() instanceof CommaExpr
+ }
+
+ /** Holds if expression is crement. */
+ predicate dangerousCrementChanges() {
+ hashCons(this.(CrementOperation).getOperand()) = hashCons(exp2.(CrementOperation).getOperand())
+ or
+ hashCons(this.(CrementOperation).getOperand()) = hashCons(exp2)
+ or
+ hashCons(this.(CrementOperation).getOperand()) = hashCons(exp2.(ArrayExpr).getArrayOffset())
+ or
+ hashCons(this.(Assignment).getLValue()) = hashCons(exp2.(Assignment).getLValue())
+ or
+ not this.getAChild*() instanceof Call and
+ (
+ hashCons(this.getAChild*().(CrementOperation).getOperand()) = hashCons(exp2) or
+ hashCons(this.getAChild*().(CrementOperation).getOperand()) =
+ hashCons(exp2.(Assignment).getLValue())
+ )
+ }
+}
+
+from ExpressionsOfTheSameLevel eots
+where
+ eots.orderOfActionExpressions() and
+ (
+ eots.expressionCall() and
+ (
+ eots.generalArgumentDerivedType() or
+ eots.generalGlobalArgument() or
+ eots.existsCloseOrFreeCall()
+ )
+ or
+ eots.dangerousCrementChanges()
+ )
+select eots,
+ "This expression may have undefined behavior, because the order of evaluation is not specified."
diff --git a/repo-tests/codeql/cpp/ql/src/experimental/Security/CWE/CWE-783/OperatorPrecedenceLogicErrorWhenUseBitwiseOrLogicalOperations.ql b/repo-tests/codeql/cpp/ql/src/experimental/Security/CWE/CWE-783/OperatorPrecedenceLogicErrorWhenUseBitwiseOrLogicalOperations.ql
new file mode 100644
index 00000000000..eae74f76749
--- /dev/null
+++ b/repo-tests/codeql/cpp/ql/src/experimental/Security/CWE/CWE-783/OperatorPrecedenceLogicErrorWhenUseBitwiseOrLogicalOperations.ql
@@ -0,0 +1,195 @@
+/**
+ * @name Operator Precedence Logic Error When Use Bitwise Or Logical Operations
+ * @description --Finding places to use bit and logical operations, without explicit priority allocation.
+ * --For example, `a || b ^ c` and `(a || b) ^ c` give different results when `b` is zero.
+ * @kind problem
+ * @id cpp/operator-precedence-logic-error-when-use-bitwise-logical-operations
+ * @problem.severity recommendation
+ * @precision medium
+ * @tags maintainability
+ * readability
+ * external/cwe/cwe-783
+ * external/cwe/cwe-480
+ */
+
+import cpp
+import semmle.code.cpp.rangeanalysis.SimpleRangeAnalysis
+
+/** Holds if `exptmp` equals expression logical or followed by logical and. */
+predicate isLogicalOrAndExpr(LogicalOrExpr exptmp) {
+ not exptmp.getLeftOperand() instanceof BinaryOperation and
+ not exptmp.getRightOperand().isParenthesised() and
+ exptmp.getRightOperand() instanceof LogicalAndExpr
+}
+
+/** Holds if `exptmp` equals expression logical or followed by bit operation. */
+predicate isLogicalOrandBitwise(Expr exptmp) {
+ not exptmp.(LogicalOrExpr).getLeftOperand() instanceof BinaryOperation and
+ not exptmp.(LogicalOrExpr).getRightOperand().isParenthesised() and
+ (
+ exptmp.(LogicalOrExpr).getRightOperand().(BinaryBitwiseOperation).getLeftOperand().getType()
+ instanceof BoolType and
+ // The essence of these lines is to improve the quality of detection by eliminating the situation
+ // of processing a logical type by bit operations. In fact, the predicate looks for a situation
+ // when the left operand of a bit operation has a boolean type, which already suggests that the priority is not correct.
+ // But if the right-hand operand is 0 or 1, then there is a possibility that the author intended so.
+ not exptmp
+ .(LogicalOrExpr)
+ .getRightOperand()
+ .(BinaryBitwiseOperation)
+ .getRightOperand()
+ .getValue() = "0" and
+ not exptmp
+ .(LogicalOrExpr)
+ .getRightOperand()
+ .(BinaryBitwiseOperation)
+ .getRightOperand()
+ .getValue() = "1"
+ )
+ or
+ not exptmp.(LogicalAndExpr).getLeftOperand() instanceof BinaryOperation and
+ not exptmp.(LogicalAndExpr).getRightOperand().isParenthesised() and
+ (
+ exptmp.(LogicalAndExpr).getRightOperand().(BinaryBitwiseOperation).getLeftOperand().getType()
+ instanceof BoolType and
+ // Looking for a situation in which the right-hand operand of a bit operation is not limited to 0 or 1.
+ // In this case, the logical operation will be performed with the result of a binary operation that is not a Boolean type.
+ // In my opinion this indicates a priority error. after all, it will be quite difficult for a developer
+ // to evaluate the conversion of the results of a bit operation to a boolean type.
+ not exptmp
+ .(LogicalAndExpr)
+ .getRightOperand()
+ .(BinaryBitwiseOperation)
+ .getRightOperand()
+ .getValue() = "0" and
+ not exptmp
+ .(LogicalAndExpr)
+ .getRightOperand()
+ .(BinaryBitwiseOperation)
+ .getRightOperand()
+ .getValue() = "1"
+ )
+}
+
+/** Holds if `exptmp` equals expression bit operations in reverse priority order. */
+predicate isBitwiseandBitwise(Expr exptmp) {
+ not exptmp.(BitwiseOrExpr).getLeftOperand() instanceof BinaryOperation and
+ not exptmp.(BitwiseOrExpr).getRightOperand().isParenthesised() and
+ (
+ exptmp.(BitwiseOrExpr).getRightOperand() instanceof BitwiseAndExpr or
+ exptmp.(BitwiseOrExpr).getRightOperand() instanceof BitwiseXorExpr
+ )
+ or
+ not exptmp.(BitwiseXorExpr).getLeftOperand() instanceof BinaryOperation and
+ not exptmp.(BitwiseXorExpr).getRightOperand().isParenthesised() and
+ exptmp.(BitwiseXorExpr).getRightOperand() instanceof BitwiseAndExpr
+}
+
+/** Holds if the range contains no boundary values. */
+predicate isRealRange(Expr exp) {
+ upperBound(exp).toString() != "18446744073709551616" and
+ upperBound(exp).toString() != "9223372036854775807" and
+ upperBound(exp).toString() != "4294967295" and
+ upperBound(exp).toString() != "Infinity" and
+ upperBound(exp).toString() != "NaN" and
+ lowerBound(exp).toString() != "-9223372036854775808" and
+ lowerBound(exp).toString() != "-4294967296" and
+ lowerBound(exp).toString() != "-Infinity" and
+ lowerBound(exp).toString() != "NaN" and
+ upperBound(exp) != 2147483647 and
+ upperBound(exp) != 268435455 and
+ upperBound(exp) != 33554431 and
+ upperBound(exp) != 8388607 and
+ upperBound(exp) != 65535 and
+ upperBound(exp) != 32767 and
+ upperBound(exp) != 255 and
+ upperBound(exp) != 127 and
+ lowerBound(exp) != -2147483648 and
+ lowerBound(exp) != -268435456 and
+ lowerBound(exp) != -33554432 and
+ lowerBound(exp) != -8388608 and
+ lowerBound(exp) != -65536 and
+ lowerBound(exp) != -32768 and
+ lowerBound(exp) != -128
+ or
+ lowerBound(exp) = 0 and
+ upperBound(exp) = 1
+}
+
+/** Holds if expressions are of different size or range */
+pragma[inline]
+predicate isDifferentSize(Expr exp1, Expr exp2, Expr exp3) {
+ exp1.getType().getSize() = exp2.getType().getSize() and
+ exp1.getType().getSize() != exp3.getType().getSize()
+ or
+ (
+ isRealRange(exp1) and
+ isRealRange(exp2) and
+ isRealRange(exp3)
+ ) and
+ upperBound(exp1).maximum(upperBound(exp2)) - upperBound(exp1).minimum(upperBound(exp2)) < 16 and
+ lowerBound(exp1).maximum(lowerBound(exp2)) - lowerBound(exp1).minimum(lowerBound(exp2)) < 16 and
+ (
+ upperBound(exp1).maximum(upperBound(exp3)) - upperBound(exp1).minimum(upperBound(exp3)) > 256 or
+ lowerBound(exp1).maximum(lowerBound(exp2)) - lowerBound(exp1).minimum(lowerBound(exp2)) > 256
+ )
+}
+
+/** Holds if different values of the expression obtained from the parameters of the predicate can be obtained. */
+pragma[inline]
+predicate isDifferentResults(
+ Expr exp1, Expr exp2, Expr exp3, BinaryBitwiseOperation op1, BinaryBitwiseOperation op2
+) {
+ (
+ isRealRange(exp1) and
+ isRealRange(exp2) and
+ isRealRange(exp3)
+ ) and
+ exists(int i1, int i2, int i3 |
+ i1 in [lowerBound(exp1).floor() .. upperBound(exp1).floor()] and
+ i2 in [lowerBound(exp2).floor() .. upperBound(exp2).floor()] and
+ i3 in [lowerBound(exp3).floor() .. upperBound(exp3).floor()] and
+ (
+ op1 instanceof BitwiseOrExpr and
+ op2 instanceof BitwiseAndExpr and
+ i1.bitOr(i2).bitAnd(i3) != i2.bitAnd(i3).bitOr(i1)
+ or
+ op1 instanceof BitwiseOrExpr and
+ op2 instanceof BitwiseXorExpr and
+ i1.bitOr(i2).bitXor(i3) != i2.bitXor(i3).bitOr(i1)
+ or
+ op1 instanceof BitwiseXorExpr and
+ op2 instanceof BitwiseAndExpr and
+ i1.bitXor(i2).bitAnd(i3) != i2.bitAnd(i3).bitXor(i1)
+ )
+ )
+}
+
+from Expr exp, string msg
+where
+ isLogicalOrAndExpr(exp) and
+ msg = "Logical AND has a higher priority."
+ or
+ isLogicalOrandBitwise(exp) and
+ msg = "Binary operations have higher priority."
+ or
+ // Looking for a situation where the equality of the sizes of the first operands
+ // might indicate that the developer planned to perform an operation between them.
+ // However, the absence of parentheses means that the rightmost operation will be performed initially.
+ isBitwiseandBitwise(exp) and
+ isDifferentSize(exp.(BinaryBitwiseOperation).getLeftOperand(),
+ exp.(BinaryBitwiseOperation).getRightOperand().(BinaryBitwiseOperation).getLeftOperand(),
+ exp.(BinaryBitwiseOperation).getRightOperand().(BinaryBitwiseOperation).getRightOperand()) and
+ msg = "Expression ranges do not match operation precedence."
+ or
+ // Looking for a out those expressions that, as a result of identifying the priority with parentheses,
+ // will give different values. As a consequence, this piece of code was supposed to find errors associated
+ // with possible outcomes of operations.
+ isBitwiseandBitwise(exp) and
+ isDifferentResults(exp.(BinaryBitwiseOperation).getLeftOperand(),
+ exp.(BinaryBitwiseOperation).getRightOperand().(BinaryBitwiseOperation).getLeftOperand(),
+ exp.(BinaryBitwiseOperation).getRightOperand().(BinaryBitwiseOperation).getRightOperand(),
+ exp.(BinaryBitwiseOperation),
+ exp.(BinaryBitwiseOperation).getRightOperand().(BinaryBitwiseOperation)) and
+ msg = "specify the priority with parentheses."
+select exp, msg
diff --git a/repo-tests/codeql/cpp/ql/src/experimental/Security/CWE/CWE-783/OperatorPrecedenceLogicErrorWhenUseBoolType.ql b/repo-tests/codeql/cpp/ql/src/experimental/Security/CWE/CWE-783/OperatorPrecedenceLogicErrorWhenUseBoolType.ql
new file mode 100644
index 00000000000..4f30f112eb0
--- /dev/null
+++ b/repo-tests/codeql/cpp/ql/src/experimental/Security/CWE/CWE-783/OperatorPrecedenceLogicErrorWhenUseBoolType.ql
@@ -0,0 +1,54 @@
+/**
+ * @name Operator Precedence Logic Error When Use Bool Type
+ * @description --Finding places of confusing use of boolean type.
+ * --For example, a unary minus does not work before a boolean type and an increment always gives true.
+ * @kind problem
+ * @id cpp/operator-precedence-logic-error-when-use-bool-type
+ * @problem.severity warning
+ * @precision medium
+ * @tags correctness
+ * security
+ * external/cwe/cwe-783
+ * external/cwe/cwe-480
+ */
+
+import cpp
+import semmle.code.cpp.valuenumbering.HashCons
+
+/** Holds if `exp` increments a boolean value. */
+predicate incrementBoolType(IncrementOperation exp) {
+ exp.getOperand().getType() instanceof BoolType
+}
+
+/** Holds if `exp` applies the unary minus operator to a boolean type. */
+predicate revertSignBoolType(UnaryMinusExpr exp) {
+ exp.getAnOperand().getType() instanceof BoolType and
+ exp.getFullyConverted().getType() instanceof BoolType
+}
+
+/** Holds, if this is an expression, uses comparison and assignment outside of execution precedence. */
+predicate assignBoolType(Expr exp) {
+ exists(ComparisonOperation co |
+ exp.(AssignExpr).getRValue() = co and
+ exp.isCondition() and
+ not co.isParenthesised() and
+ not exp.(AssignExpr).getLValue().getType() instanceof BoolType and
+ not exists(Expr exbl |
+ hashCons(exbl.(AssignExpr).getLValue()) = hashCons(exp.(AssignExpr).getLValue()) and
+ not exbl.isCondition() and
+ exbl.(AssignExpr).getRValue().getType() instanceof BoolType and
+ exbl.(AssignExpr).getLValue().getType() = exp.(AssignExpr).getLValue().getType()
+ ) and
+ co.getLeftOperand() instanceof FunctionCall and
+ not co.getRightOperand().getType() instanceof BoolType and
+ not co.getRightOperand().getValue() = "0" and
+ not co.getRightOperand().getValue() = "1"
+ )
+}
+
+from Expr exp
+where
+ incrementBoolType(exp) or
+ revertSignBoolType(exp) or
+ assignBoolType(exp)
+select exp, "this expression needs attention"
diff --git a/repo-tests/codeql/cpp/ql/src/experimental/Security/CWE/CWE-787/UnsignedToSignedPointerArith.ql b/repo-tests/codeql/cpp/ql/src/experimental/Security/CWE/CWE-787/UnsignedToSignedPointerArith.ql
new file mode 100644
index 00000000000..1fe82c9cc51
--- /dev/null
+++ b/repo-tests/codeql/cpp/ql/src/experimental/Security/CWE/CWE-787/UnsignedToSignedPointerArith.ql
@@ -0,0 +1,30 @@
+/**
+ * @name unsigned to signed used in pointer arithmetic
+ * @description finds unsigned to signed conversions used in pointer arithmetic, potentially causing an out-of-bound access
+ * @id cpp/sign-conversion-pointer-arithmetic
+ * @kind problem
+ * @problem.severity warning
+ * @tags reliability
+ * security
+ * external/cwe/cwe-787
+ */
+
+import cpp
+import semmle.code.cpp.dataflow.DataFlow
+import semmle.code.cpp.security.Overflow
+
+from FunctionCall call, Function f, Parameter p, DataFlow::Node sink, PointerArithmeticOperation pao
+where
+ f = call.getTarget() and
+ p = f.getAParameter() and
+ p.getUnspecifiedType().(IntegralType).isSigned() and
+ call.getArgument(p.getIndex()).getUnspecifiedType().(IntegralType).isUnsigned() and
+ pao.getAnOperand() = sink.asExpr() and
+ not exists(Operation a | guardedLesser(a, sink.asExpr())) and
+ not exists(Operation b | guardedGreater(b, call.getArgument(p.getIndex()))) and
+ not call.getArgument(p.getIndex()).isConstant() and
+ DataFlow::localFlow(DataFlow::parameterNode(p), sink) and
+ p.getUnspecifiedType().getSize() < 8
+select call,
+ "This call: $@ passes an unsigned int to a function that requires a signed int: $@. And then used in pointer arithmetic: $@",
+ call, call.toString(), f, f.toString(), sink, sink.toString()
diff --git a/repo-tests/codeql/cpp/ql/src/experimental/Security/CWE/CWE-788/AccessOfMemoryLocationAfterEndOfBufferUsingStrlen.ql b/repo-tests/codeql/cpp/ql/src/experimental/Security/CWE/CWE-788/AccessOfMemoryLocationAfterEndOfBufferUsingStrlen.ql
new file mode 100644
index 00000000000..e4577968730
--- /dev/null
+++ b/repo-tests/codeql/cpp/ql/src/experimental/Security/CWE/CWE-788/AccessOfMemoryLocationAfterEndOfBufferUsingStrlen.ql
@@ -0,0 +1,34 @@
+/**
+ * @name Access Of Memory Location After End Of Buffer
+ * @description The expression `buffer [strlen (buffer)] = 0` is potentially dangerous, if the variable `buffer` does not have a terminal zero, then access beyond the bounds of the allocated memory is possible, which will lead to undefined behavior.
+ * If terminal zero is present, then the specified expression is meaningless.
+ * @kind problem
+ * @id cpp/access-memory-location-after-end-buffer-strlen
+ * @problem.severity warning
+ * @precision medium
+ * @tags correctness
+ * security
+ * external/cwe/cwe-788
+ */
+
+import cpp
+import semmle.code.cpp.valuenumbering.GlobalValueNumbering
+import semmle.code.cpp.dataflow.DataFlow
+
+from StrlenCall fc, AssignExpr expr, ArrayExpr exprarr
+where
+ exprarr = expr.getLValue() and
+ expr.getRValue().getValue().toInt() = 0 and
+ globalValueNumber(exprarr.getArrayOffset()) = globalValueNumber(fc) and
+ not exists(Expr exptmp |
+ (
+ DataFlow::localExprFlow(fc, exptmp) or
+ exptmp.getAChild*() = fc.getArgument(0).(VariableAccess).getTarget().getAnAccess()
+ ) and
+ dominates(exptmp, expr) and
+ postDominates(exptmp, fc) and
+ not exptmp.getEnclosingStmt() = fc.getEnclosingStmt() and
+ not exptmp.getEnclosingStmt() = expr.getEnclosingStmt()
+ ) and
+ globalValueNumber(fc.getArgument(0)) = globalValueNumber(exprarr.getArrayBase())
+select expr, "potential unsafe or redundant assignment."
diff --git a/repo-tests/codeql/cpp/ql/src/external/CodeDuplication.qll b/repo-tests/codeql/cpp/ql/src/external/CodeDuplication.qll
new file mode 100644
index 00000000000..1550ca697a3
--- /dev/null
+++ b/repo-tests/codeql/cpp/ql/src/external/CodeDuplication.qll
@@ -0,0 +1,334 @@
+/** Provides classes for detecting duplicate or similar code. */
+
+import cpp
+
+private string relativePath(File file) { result = file.getRelativePath().replaceAll("\\", "/") }
+
+cached
+private predicate tokenLocation(string path, int sl, int sc, int ec, int el, Copy copy, int index) {
+ path = copy.sourceFile().getAbsolutePath() and
+ tokens(copy, index, sl, sc, ec, el)
+}
+
+/** A token block used for detection of duplicate and similar code. */
+class Copy extends @duplication_or_similarity {
+ /** Gets the index of the last token in this block. */
+ private int lastToken() { result = max(int i | tokens(this, i, _, _, _, _) | i) }
+
+ /** Gets the index of the token in this block starting at the location `loc`, if any. */
+ int tokenStartingAt(Location loc) {
+ exists(string filepath, int startline, int startcol |
+ loc.hasLocationInfo(filepath, startline, startcol, _, _) and
+ tokenLocation(filepath, startline, startcol, _, _, this, result)
+ )
+ }
+
+ /** Gets the index of the token in this block ending at the location `loc`, if any. */
+ int tokenEndingAt(Location loc) {
+ exists(string filepath, int endline, int endcol |
+ loc.hasLocationInfo(filepath, _, _, endline, endcol) and
+ tokenLocation(filepath, _, _, endline, endcol, this, result)
+ )
+ }
+
+ /** Gets the line on which the first token in this block starts. */
+ int sourceStartLine() { tokens(this, 0, result, _, _, _) }
+
+ /** Gets the column on which the first token in this block starts. */
+ int sourceStartColumn() { tokens(this, 0, _, result, _, _) }
+
+ /** Gets the line on which the last token in this block ends. */
+ int sourceEndLine() { tokens(this, lastToken(), _, _, result, _) }
+
+ /** Gets the column on which the last token in this block ends. */
+ int sourceEndColumn() { tokens(this, lastToken(), _, _, _, result) }
+
+ /** Gets the number of lines containing at least (part of) one token in this block. */
+ int sourceLines() { result = this.sourceEndLine() + 1 - this.sourceStartLine() }
+
+ /** Gets an opaque identifier for the equivalence class of this block. */
+ int getEquivalenceClass() { duplicateCode(this, _, result) or similarCode(this, _, result) }
+
+ /** Gets the source file in which this block appears. */
+ File sourceFile() {
+ exists(string name | duplicateCode(this, name, _) or similarCode(this, name, _) |
+ name.replaceAll("\\", "/") = relativePath(result)
+ )
+ }
+
+ /**
+ * Holds if this element is at the specified location.
+ * The location spans column `startcolumn` of line `startline` to
+ * column `endcolumn` of line `endline` in file `filepath`.
+ * For more information, see
+ * [Locations](https://codeql.github.com/docs/writing-codeql-queries/providing-locations-in-codeql-queries/).
+ */
+ predicate hasLocationInfo(
+ string filepath, int startline, int startcolumn, int endline, int endcolumn
+ ) {
+ sourceFile().getAbsolutePath() = filepath and
+ startline = sourceStartLine() and
+ startcolumn = sourceStartColumn() and
+ endline = sourceEndLine() and
+ endcolumn = sourceEndColumn()
+ }
+
+ /** Gets a textual representation of this element. */
+ string toString() { none() }
+}
+
+/** A block of duplicated code. */
+class DuplicateBlock extends Copy, @duplication {
+ override string toString() { result = "Duplicate code: " + sourceLines() + " duplicated lines." }
+}
+
+/** A block of similar code. */
+class SimilarBlock extends Copy, @similarity {
+ override string toString() {
+ result = "Similar code: " + sourceLines() + " almost duplicated lines."
+ }
+}
+
+/** Gets a function with a body and a location. */
+FunctionDeclarationEntry sourceMethod() {
+ result.isDefinition() and
+ exists(result.getLocation()) and
+ numlines(unresolveElement(result.getFunction()), _, _, _)
+}
+
+/** Gets the number of member functions in `c` with a body and a location. */
+int numberOfSourceMethods(Class c) {
+ result =
+ count(FunctionDeclarationEntry m |
+ m = sourceMethod() and
+ m.getFunction().getDeclaringType() = c
+ )
+}
+
+private predicate blockCoversStatement(int equivClass, int first, int last, Stmt stmt) {
+ exists(DuplicateBlock b, Location loc |
+ stmt.getLocation() = loc and
+ first = b.tokenStartingAt(loc) and
+ last = b.tokenEndingAt(loc) and
+ b.getEquivalenceClass() = equivClass
+ )
+}
+
+private Stmt statementInMethod(FunctionDeclarationEntry m) {
+ result.getParent+() = m.getBlock() and
+ not result.getLocation() instanceof UnknownStmtLocation and
+ not result instanceof BlockStmt
+}
+
+private predicate duplicateStatement(
+ FunctionDeclarationEntry m1, FunctionDeclarationEntry m2, Stmt s1, Stmt s2
+) {
+ exists(int equivClass, int first, int last |
+ s1 = statementInMethod(m1) and
+ s2 = statementInMethod(m2) and
+ blockCoversStatement(equivClass, first, last, s1) and
+ blockCoversStatement(equivClass, first, last, s2) and
+ s1 != s2 and
+ m1 != m2
+ )
+}
+
+/**
+ * Holds if `m1` is a function with `total` lines, and `m2` is a function
+ * that has `duplicate` lines in common with `m1`.
+ */
+predicate duplicateStatements(
+ FunctionDeclarationEntry m1, FunctionDeclarationEntry m2, int duplicate, int total
+) {
+ duplicate = strictcount(Stmt s | duplicateStatement(m1, m2, s, _)) and
+ total = strictcount(statementInMethod(m1))
+}
+
+/** Holds if `m` and other are identical functions. */
+predicate duplicateMethod(FunctionDeclarationEntry m, FunctionDeclarationEntry other) {
+ exists(int total | duplicateStatements(m, other, total, total))
+}
+
+/**
+ * INTERNAL: do not use.
+ *
+ * Holds if `line` in `f` is similar to a line somewhere else.
+ */
+predicate similarLines(File f, int line) {
+ exists(SimilarBlock b | b.sourceFile() = f and line in [b.sourceStartLine() .. b.sourceEndLine()])
+}
+
+private predicate similarLinesPerEquivalenceClass(int equivClass, int lines, File f) {
+ lines =
+ strictsum(SimilarBlock b, int toSum |
+ (b.sourceFile() = f and b.getEquivalenceClass() = equivClass) and
+ toSum = b.sourceLines()
+ |
+ toSum
+ )
+}
+
+private predicate similarLinesCoveredFiles(File f, File otherFile) {
+ exists(int numLines | numLines = f.getMetrics().getNumberOfLines() |
+ exists(int coveredApprox |
+ coveredApprox =
+ strictsum(int num |
+ exists(int equivClass |
+ similarLinesPerEquivalenceClass(equivClass, num, f) and
+ similarLinesPerEquivalenceClass(equivClass, num, otherFile) and
+ f != otherFile
+ )
+ ) and
+ (coveredApprox * 100) / numLines > 75
+ )
+ )
+}
+
+/** Holds if `coveredLines` lines of `f` are similar to lines in `otherFile`. */
+predicate similarLinesCovered(File f, int coveredLines, File otherFile) {
+ exists(int numLines | numLines = f.getMetrics().getNumberOfLines() |
+ similarLinesCoveredFiles(f, otherFile) and
+ exists(int notCovered |
+ notCovered =
+ count(int j |
+ j in [1 .. numLines] and
+ not similarLines(f, j)
+ ) and
+ coveredLines = numLines - notCovered
+ )
+ )
+}
+
+/**
+ * INTERNAL: do not use.
+ *
+ * Holds if `line` in `f` is duplicated by a line somewhere else.
+ */
+predicate duplicateLines(File f, int line) {
+ exists(DuplicateBlock b |
+ b.sourceFile() = f and line in [b.sourceStartLine() .. b.sourceEndLine()]
+ )
+}
+
+private predicate duplicateLinesPerEquivalenceClass(int equivClass, int lines, File f) {
+ lines =
+ strictsum(DuplicateBlock b, int toSum |
+ (b.sourceFile() = f and b.getEquivalenceClass() = equivClass) and
+ toSum = b.sourceLines()
+ |
+ toSum
+ )
+}
+
+/** Holds if `coveredLines` lines of `f` are duplicates of lines in `otherFile`. */
+predicate duplicateLinesCovered(File f, int coveredLines, File otherFile) {
+ exists(int numLines | numLines = f.getMetrics().getNumberOfLines() |
+ exists(int coveredApprox |
+ coveredApprox =
+ strictsum(int num |
+ exists(int equivClass |
+ duplicateLinesPerEquivalenceClass(equivClass, num, f) and
+ duplicateLinesPerEquivalenceClass(equivClass, num, otherFile) and
+ f != otherFile
+ )
+ ) and
+ (coveredApprox * 100) / numLines > 75
+ ) and
+ exists(int notCovered |
+ notCovered =
+ count(int j |
+ j in [1 .. numLines] and
+ not duplicateLines(f, j)
+ ) and
+ coveredLines = numLines - notCovered
+ )
+ )
+}
+
+/** Holds if most of `f` (`percent`%) is similar to `other`. */
+predicate similarFiles(File f, File other, int percent) {
+ exists(int covered, int total |
+ similarLinesCovered(f, covered, other) and
+ total = f.getMetrics().getNumberOfLines() and
+ covered * 100 / total = percent and
+ percent > 80
+ ) and
+ not duplicateFiles(f, other, _)
+}
+
+/** Holds if most of `f` (`percent`%) is duplicated by `other`. */
+predicate duplicateFiles(File f, File other, int percent) {
+ exists(int covered, int total |
+ duplicateLinesCovered(f, covered, other) and
+ total = f.getMetrics().getNumberOfLines() and
+ covered * 100 / total = percent and
+ percent > 70
+ )
+}
+
+/**
+ * Holds if most member functions of `c` (`numDup` out of `total`) are
+ * duplicates of member functions in `other`.
+ */
+predicate mostlyDuplicateClassBase(Class c, Class other, int numDup, int total) {
+ numDup =
+ strictcount(FunctionDeclarationEntry m1 |
+ exists(FunctionDeclarationEntry m2 |
+ duplicateMethod(m1, m2) and
+ m1 = sourceMethod() and
+ exists(Function f | f = m1.getFunction() and f.getDeclaringType() = c) and
+ exists(Function f | f = m2.getFunction() and f.getDeclaringType() = other) and
+ c != other
+ )
+ ) and
+ total = numberOfSourceMethods(c) and
+ (numDup * 100) / total > 80
+}
+
+/**
+ * Holds if most member functions of `c` are duplicates of member functions in
+ * `other`. Provides the human-readable `message` to describe the amount of
+ * duplication.
+ */
+predicate mostlyDuplicateClass(Class c, Class other, string message) {
+ exists(int numDup, int total |
+ mostlyDuplicateClassBase(c, other, numDup, total) and
+ (
+ total != numDup and
+ exists(string s1, string s2, string s3, string name |
+ s1 = " out of " and
+ s2 = " methods in " and
+ s3 = " are duplicated in $@." and
+ name = c.getName()
+ |
+ message = numDup + s1 + total + s2 + name + s3
+ )
+ or
+ total = numDup and
+ exists(string s1, string s2, string name |
+ s1 = "All methods in " and s2 = " are identical in $@." and name = c.getName()
+ |
+ message = s1 + name + s2
+ )
+ )
+ )
+}
+
+/** Holds if `f` and `other` are similar or duplicates. */
+predicate fileLevelDuplication(File f, File other) {
+ similarFiles(f, other, _) or duplicateFiles(f, other, _)
+}
+
+/**
+ * Holds if most member functions of `c` are duplicates of member functions in
+ * `other`.
+ */
+predicate classLevelDuplication(Class c, Class other) { mostlyDuplicateClass(c, other, _) }
+
+/**
+ * Holds if `line` in `f` should be allowed to be duplicated. This is the case
+ * for `#include` directives.
+ */
+predicate whitelistedLineForDuplication(File f, int line) {
+ exists(Include i | i.getFile() = f and i.getLocation().getStartLine() = line)
+}
diff --git a/repo-tests/codeql/cpp/ql/src/external/DefectFilter.qll b/repo-tests/codeql/cpp/ql/src/external/DefectFilter.qll
new file mode 100644
index 00000000000..b932ffd0470
--- /dev/null
+++ b/repo-tests/codeql/cpp/ql/src/external/DefectFilter.qll
@@ -0,0 +1,55 @@
+/** Provides a class for working with defect query results stored in dashboard databases. */
+
+import cpp
+
+/**
+ * Holds if `id` in the opaque identifier of a result reported by query `queryPath`,
+ * such that `message` is the associated message and the location of the result spans
+ * column `startcolumn` of line `startline` to column `endcolumn` of line `endline`
+ * in file `filepath`.
+ *
+ * For more information, see [Locations](https://codeql.github.com/docs/writing-codeql-queries/providing-locations-in-codeql-queries/).
+ */
+external predicate defectResults(
+ int id, string queryPath, string file, int startline, int startcol, int endline, int endcol,
+ string message
+);
+
+/**
+ * A defect query result stored in a dashboard database.
+ */
+class DefectResult extends int {
+ DefectResult() { defectResults(this, _, _, _, _, _, _, _) }
+
+ /** Gets the path of the query that reported the result. */
+ string getQueryPath() { defectResults(this, result, _, _, _, _, _, _) }
+
+ /** Gets the file in which this query result was reported. */
+ File getFile() {
+ exists(string path |
+ defectResults(this, _, path, _, _, _, _, _) and result.getAbsolutePath() = path
+ )
+ }
+
+ /** Gets the line on which the location of this query result starts. */
+ int getStartLine() { defectResults(this, _, _, result, _, _, _, _) }
+
+ /** Gets the column on which the location of this query result starts. */
+ int getStartColumn() { defectResults(this, _, _, _, result, _, _, _) }
+
+ /** Gets the line on which the location of this query result ends. */
+ int getEndLine() { defectResults(this, _, _, _, _, result, _, _) }
+
+ /** Gets the column on which the location of this query result ends. */
+ int getEndColumn() { defectResults(this, _, _, _, _, _, result, _) }
+
+ /** Gets the message associated with this query result. */
+ string getMessage() { defectResults(this, _, _, _, _, _, _, result) }
+
+ /** Gets the URL corresponding to the location of this query result. */
+ string getURL() {
+ result =
+ "file://" + getFile().getAbsolutePath() + ":" + getStartLine() + ":" + getStartColumn() + ":" +
+ getEndLine() + ":" + getEndColumn()
+ }
+}
diff --git a/repo-tests/codeql/cpp/ql/src/external/DuplicateBlock.ql b/repo-tests/codeql/cpp/ql/src/external/DuplicateBlock.ql
new file mode 100644
index 00000000000..3fdef9b510d
--- /dev/null
+++ b/repo-tests/codeql/cpp/ql/src/external/DuplicateBlock.ql
@@ -0,0 +1,27 @@
+/**
+ * @deprecated
+ * @name Duplicate code
+ * @description This block of code is duplicated elsewhere. If possible, the shared code should be refactored so there is only one occurrence left. It may not always be possible to address these issues; other duplicate code checks (such as duplicate function, duplicate class) give subsets of the results with higher confidence.
+ * @kind problem
+ * @id cpp/duplicate-block
+ * @problem.severity recommendation
+ * @precision medium
+ * @tags testability
+ * maintainability
+ * duplicate-code
+ * non-attributable
+ */
+
+import CodeDuplication
+
+from DuplicateBlock d, DuplicateBlock other, int lines, File otherFile, int otherLine
+where
+ lines = d.sourceLines() and
+ lines > 10 and
+ other.getEquivalenceClass() = d.getEquivalenceClass() and
+ other != d and
+ otherFile = other.sourceFile() and
+ otherLine = other.sourceStartLine()
+select d,
+ "Duplicate code: " + lines + " lines are duplicated at " + otherFile.getBaseName() + ":" +
+ otherLine
diff --git a/repo-tests/codeql/cpp/ql/src/external/DuplicateFunction.ql b/repo-tests/codeql/cpp/ql/src/external/DuplicateFunction.ql
new file mode 100644
index 00000000000..1a861867fcb
--- /dev/null
+++ b/repo-tests/codeql/cpp/ql/src/external/DuplicateFunction.ql
@@ -0,0 +1,39 @@
+/**
+ * @deprecated
+ * @name Duplicate function
+ * @description There is another identical implementation of this function. Extract the code to a common file or superclass or delegate to improve sharing.
+ * @kind problem
+ * @id cpp/duplicate-function
+ * @problem.severity recommendation
+ * @precision medium
+ * @tags testability
+ * maintainability
+ * duplicate-code
+ * non-attributable
+ */
+
+import cpp
+import CodeDuplication
+
+predicate relevant(FunctionDeclarationEntry m) {
+ exists(Location loc |
+ loc = m.getBlock().getLocation() and
+ (
+ loc.getStartLine() + 5 < loc.getEndLine() and not m.getName().matches("get%")
+ or
+ loc.getStartLine() + 10 < loc.getEndLine()
+ )
+ )
+}
+
+from FunctionDeclarationEntry m, FunctionDeclarationEntry other
+where
+ duplicateMethod(m, other) and
+ relevant(m) and
+ not m.getFunction().isConstructedFrom(_) and
+ not other.getFunction().isConstructedFrom(_) and
+ not fileLevelDuplication(m.getFile(), other.getFile()) and
+ not classLevelDuplication(m.getFunction().getDeclaringType(),
+ other.getFunction().getDeclaringType())
+select m, "Function " + m.getName() + " is duplicated at $@.", other,
+ other.getFile().getBaseName() + ":" + other.getLocation().getStartLine().toString()
diff --git a/repo-tests/codeql/cpp/ql/src/external/MetricFilter.qll b/repo-tests/codeql/cpp/ql/src/external/MetricFilter.qll
new file mode 100644
index 00000000000..58e8bf154e9
--- /dev/null
+++ b/repo-tests/codeql/cpp/ql/src/external/MetricFilter.qll
@@ -0,0 +1,73 @@
+/** Provides a class for working with metric query results stored in dashboard databases. */
+
+import cpp
+
+/**
+ * Holds if `id` in the opaque identifier of a result reported by query `queryPath`,
+ * such that `value` is the reported metric value and the location of the result spans
+ * column `startcolumn` of line `startline` to column `endcolumn` of line `endline`
+ * in file `filepath`.
+ *
+ * For more information, see [Locations](https://codeql.github.com/docs/writing-codeql-queries/providing-locations-in-codeql-queries/).
+ */
+external predicate metricResults(
+ int id, string queryPath, string file, int startline, int startcol, int endline, int endcol,
+ float value
+);
+
+/**
+ * A metric query result stored in a dashboard database.
+ */
+class MetricResult extends int {
+ MetricResult() { metricResults(this, _, _, _, _, _, _, _) }
+
+ /** Gets the path of the query that reported the result. */
+ string getQueryPath() { metricResults(this, result, _, _, _, _, _, _) }
+
+ /** Gets the file in which this query result was reported. */
+ File getFile() {
+ exists(string path |
+ metricResults(this, _, path, _, _, _, _, _) and result.getAbsolutePath() = path
+ )
+ }
+
+ /** Gets the line on which the location of this query result starts. */
+ int getStartLine() { metricResults(this, _, _, result, _, _, _, _) }
+
+ /** Gets the column on which the location of this query result starts. */
+ int getStartColumn() { metricResults(this, _, _, _, result, _, _, _) }
+
+ /** Gets the line on which the location of this query result ends. */
+ int getEndLine() { metricResults(this, _, _, _, _, result, _, _) }
+
+ /** Gets the column on which the location of this query result ends. */
+ int getEndColumn() { metricResults(this, _, _, _, _, _, result, _) }
+
+ /**
+ * Holds if there is a `Location` entity whose location is the same as
+ * the location of this query result.
+ */
+ predicate hasMatchingLocation() { exists(this.getMatchingLocation()) }
+
+ /**
+ * Gets the `Location` entity whose location is the same as the location
+ * of this query result.
+ */
+ Location getMatchingLocation() {
+ result.getFile() = this.getFile() and
+ result.getStartLine() = this.getStartLine() and
+ result.getEndLine() = this.getEndLine() and
+ result.getStartColumn() = this.getStartColumn() and
+ result.getEndColumn() = this.getEndColumn()
+ }
+
+ /** Gets the value associated with this query result. */
+ float getValue() { metricResults(this, _, _, _, _, _, _, result) }
+
+ /** Gets the URL corresponding to the location of this query result. */
+ string getURL() {
+ result =
+ "file://" + getFile().getAbsolutePath() + ":" + getStartLine() + ":" + getStartColumn() + ":" +
+ getEndLine() + ":" + getEndColumn()
+ }
+}
diff --git a/repo-tests/codeql/cpp/ql/src/external/MostlyDuplicateClass.ql b/repo-tests/codeql/cpp/ql/src/external/MostlyDuplicateClass.ql
new file mode 100644
index 00000000000..20b9f39214e
--- /dev/null
+++ b/repo-tests/codeql/cpp/ql/src/external/MostlyDuplicateClass.ql
@@ -0,0 +1,24 @@
+/**
+ * @deprecated
+ * @name Mostly duplicate class
+ * @description More than 80% of the methods in this class are duplicated in another class. Create a common supertype to improve code sharing.
+ * @kind problem
+ * @id cpp/duplicate-class
+ * @problem.severity recommendation
+ * @precision medium
+ * @tags testability
+ * maintainability
+ * duplicate-code
+ * non-attributable
+ */
+
+import cpp
+import CodeDuplication
+
+from Class c, Class other, string message
+where
+ mostlyDuplicateClass(c, other, message) and
+ not c.isConstructedFrom(_) and
+ not other.isConstructedFrom(_) and
+ not fileLevelDuplication(c.getFile(), _)
+select c, message, other, other.getQualifiedName()
diff --git a/repo-tests/codeql/cpp/ql/src/external/MostlyDuplicateFile.ql b/repo-tests/codeql/cpp/ql/src/external/MostlyDuplicateFile.ql
new file mode 100644
index 00000000000..8cb23a432d2
--- /dev/null
+++ b/repo-tests/codeql/cpp/ql/src/external/MostlyDuplicateFile.ql
@@ -0,0 +1,21 @@
+/**
+ * @deprecated
+ * @name Mostly duplicate file
+ * @description There is another file that shares a lot of the code with this file. Merge the two files to improve maintainability.
+ * @kind problem
+ * @id cpp/duplicate-file
+ * @problem.severity recommendation
+ * @precision medium
+ * @tags testability
+ * maintainability
+ * duplicate-code
+ * non-attributable
+ */
+
+import cpp
+import CodeDuplication
+
+from File f, File other, int percent
+where duplicateFiles(f, other, percent)
+select f, percent + "% of the lines in " + f.getBaseName() + " are copies of lines in $@.", other,
+ other.getBaseName()
diff --git a/repo-tests/codeql/cpp/ql/src/external/MostlyDuplicateFunction.ql b/repo-tests/codeql/cpp/ql/src/external/MostlyDuplicateFunction.ql
new file mode 100644
index 00000000000..8a7454e4c97
--- /dev/null
+++ b/repo-tests/codeql/cpp/ql/src/external/MostlyDuplicateFunction.ql
@@ -0,0 +1,31 @@
+/**
+ * @deprecated
+ * @name Mostly duplicate function
+ * @description There is another function that shares a lot of the code with this one. Extract the code to a common file/superclass or delegate to improve sharing.
+ * @kind problem
+ * @id cpp/mostly-duplicate-function
+ * @problem.severity recommendation
+ * @precision medium
+ * @tags testability
+ * duplicate-code
+ * non-attributable
+ */
+
+import cpp
+import CodeDuplication
+
+from FunctionDeclarationEntry m, int covered, int total, FunctionDeclarationEntry other, int percent
+where
+ duplicateStatements(m, other, covered, total) and
+ covered != total and
+ total > 5 and
+ covered * 100 / total = percent and
+ percent > 80 and
+ not m.getFunction().isConstructedFrom(_) and
+ not other.getFunction().isConstructedFrom(_) and
+ not duplicateMethod(m, other) and
+ not classLevelDuplication(m.getFunction().getDeclaringType(),
+ other.getFunction().getDeclaringType()) and
+ not fileLevelDuplication(m.getFile(), other.getFile())
+select m, percent + "% of the statements in " + m.getName() + " are duplicated in $@.", other,
+ other.getFunction().getQualifiedName()
diff --git a/repo-tests/codeql/cpp/ql/src/external/MostlySimilarFile.ql b/repo-tests/codeql/cpp/ql/src/external/MostlySimilarFile.ql
new file mode 100644
index 00000000000..81a6ed02d6c
--- /dev/null
+++ b/repo-tests/codeql/cpp/ql/src/external/MostlySimilarFile.ql
@@ -0,0 +1,21 @@
+/**
+ * @deprecated
+ * @name Mostly similar file
+ * @description There is another file that shares a lot of the code with this file. Notice that names of variables and types may have been changed. Merge the two files to improve maintainability.
+ * @kind problem
+ * @id cpp/similar-file
+ * @problem.severity recommendation
+ * @precision medium
+ * @tags testability
+ * maintainability
+ * duplicate-code
+ * non-attributable
+ */
+
+import cpp
+import CodeDuplication
+
+from File f, File other, int percent
+where similarFiles(f, other, percent)
+select f, percent + "% of the lines in " + f.getBaseName() + " are similar to lines in $@.", other,
+ other.getBaseName()
diff --git a/repo-tests/codeql/cpp/ql/src/external/examples/filters/BumpMetricBy10.ql b/repo-tests/codeql/cpp/ql/src/external/examples/filters/BumpMetricBy10.ql
new file mode 100644
index 00000000000..05faf374a8d
--- /dev/null
+++ b/repo-tests/codeql/cpp/ql/src/external/examples/filters/BumpMetricBy10.ql
@@ -0,0 +1,13 @@
+/**
+ * @name Metric filter: increase the value of a metric by 10
+ * @description This filter demonstrates how to change the value
+ * computed by the metric that it is filtering. In this
+ * example the value is increased by 10.
+ * @tags filter
+ */
+
+import cpp
+import external.MetricFilter
+
+from MetricResult res
+select res, res.getValue() + 10
diff --git a/repo-tests/codeql/cpp/ql/src/external/examples/filters/EditDefectMessage.ql b/repo-tests/codeql/cpp/ql/src/external/examples/filters/EditDefectMessage.ql
new file mode 100644
index 00000000000..366a76f6bad
--- /dev/null
+++ b/repo-tests/codeql/cpp/ql/src/external/examples/filters/EditDefectMessage.ql
@@ -0,0 +1,14 @@
+/**
+ * @name Filter: alter the message generated by a query
+ * @description This filter demonstrates how to edit the message
+ * generated by the query that it is filtering. In this
+ * example the string `Filtered query result: ` is
+ * prepended to the message.
+ * @tags filter
+ */
+
+import cpp
+import external.DefectFilter
+
+from DefectResult res
+select res, "Filtered query result: " + res.getMessage()
diff --git a/repo-tests/codeql/cpp/ql/src/external/examples/filters/ExcludeGeneratedCode.ql b/repo-tests/codeql/cpp/ql/src/external/examples/filters/ExcludeGeneratedCode.ql
new file mode 100644
index 00000000000..44ef72735fc
--- /dev/null
+++ b/repo-tests/codeql/cpp/ql/src/external/examples/filters/ExcludeGeneratedCode.ql
@@ -0,0 +1,17 @@
+/**
+ * @name Filter: exclude results from generated code
+ * @description This filter demonstrates how to return results only if
+ * they meet certain criteria. In this example, results are
+ * only returned if they do not come from a file which
+ * contains 'generated' anywhere in its path.
+ * @tags filter
+ */
+
+import cpp
+import external.DefectFilter
+
+predicate generatedFile(File f) { f.getAbsolutePath().matches("%generated%") }
+
+from DefectResult res
+where not generatedFile(res.getFile())
+select res, res.getMessage()
diff --git a/repo-tests/codeql/cpp/ql/src/filters/ClassifyFiles.ql b/repo-tests/codeql/cpp/ql/src/filters/ClassifyFiles.ql
new file mode 100644
index 00000000000..f51a7d03178
--- /dev/null
+++ b/repo-tests/codeql/cpp/ql/src/filters/ClassifyFiles.ql
@@ -0,0 +1,23 @@
+/**
+ * @name Classify files
+ * @description This query produces a list of all files in a snapshot
+ * that are classified as generated code or test code.
+ * @kind file-classifier
+ * @id cpp/file-classifier
+ */
+
+import cpp
+import semmle.code.cpp.AutogeneratedFile
+import semmle.code.cpp.TestFile
+
+predicate classify(File f, string tag) {
+ f instanceof AutogeneratedFile and
+ tag = "generated"
+ or
+ f instanceof TestFile and
+ tag = "test"
+}
+
+from File f, string tag
+where classify(f, tag)
+select f, tag
diff --git a/repo-tests/codeql/cpp/ql/src/jsf/3.02 Code Size and Complexity/AV Rule 1.ql b/repo-tests/codeql/cpp/ql/src/jsf/3.02 Code Size and Complexity/AV Rule 1.ql
new file mode 100644
index 00000000000..1ae7425a4d0
--- /dev/null
+++ b/repo-tests/codeql/cpp/ql/src/jsf/3.02 Code Size and Complexity/AV Rule 1.ql
@@ -0,0 +1,19 @@
+/**
+ * @name AV Rule 1
+ * @description Any one function (or method) will contain no more than 200 logical source lines of code.
+ * @kind problem
+ * @id cpp/jsf/av-rule-1
+ * @problem.severity warning
+ * @tags maintainability
+ * external/jsf
+ */
+
+import cpp
+
+from Function f, int n
+where
+ n = f.getMetrics().getNumberOfLinesOfCode() and
+ n > 200
+select f,
+ "AV Rule 1: any one function (or method) will contain no more than 200 logical source lines of code. Function '"
+ + f.toString() + "' contains " + n.toString() + " lines of code."
diff --git a/repo-tests/codeql/cpp/ql/src/jsf/3.02 Code Size and Complexity/AV Rule 2.ql b/repo-tests/codeql/cpp/ql/src/jsf/3.02 Code Size and Complexity/AV Rule 2.ql
new file mode 100644
index 00000000000..0420b9c3984
--- /dev/null
+++ b/repo-tests/codeql/cpp/ql/src/jsf/3.02 Code Size and Complexity/AV Rule 2.ql
@@ -0,0 +1,32 @@
+/**
+ * @name AV Rule 2
+ * @description There shall not be any self-modifying code.
+ * @kind problem
+ * @id cpp/jsf/av-rule-2
+ * @problem.severity error
+ * @tags maintainability
+ * readability
+ * testability
+ * external/jsf
+ */
+
+import cpp
+
+// We look for code that converts between function pointers and non-function, non-void
+// pointers. This will obviously not catch code that uses inline assembly to achieve
+// self-modification, nor will it spot the use of OS mechanisms to write into process
+// memory (such as WriteProcessMemory under Windows).
+predicate maybeSMCConversion(Type t1, Type t2) {
+ t1 instanceof FunctionPointerType and
+ t2 instanceof PointerType and
+ not t2 instanceof FunctionPointerType and
+ not t2 instanceof VoidPointerType
+ or
+ maybeSMCConversion(t2, t1)
+}
+
+from Expr e
+where
+ e.fromSource() and
+ maybeSMCConversion(e.getUnderlyingType(), e.getActualType())
+select e, "AV Rule 2: There shall not be any self-modifying code."
diff --git a/repo-tests/codeql/cpp/ql/src/jsf/3.02 Code Size and Complexity/AV Rule 3.ql b/repo-tests/codeql/cpp/ql/src/jsf/3.02 Code Size and Complexity/AV Rule 3.ql
new file mode 100644
index 00000000000..a1b374eaba7
--- /dev/null
+++ b/repo-tests/codeql/cpp/ql/src/jsf/3.02 Code Size and Complexity/AV Rule 3.ql
@@ -0,0 +1,17 @@
+/**
+ * @name AV Rule 3
+ * @description All functions shall have a cyclomatic complexity number of 20 or less.
+ * @kind problem
+ * @id cpp/jsf/av-rule-3
+ * @problem.severity recommendation
+ * @tags maintainability
+ * external/jsf
+ */
+
+import cpp
+
+from Function f, int c
+where
+ c = f.getMetrics().getCyclomaticComplexity() and
+ c > 20
+select f, "AV Rule 3: All functions shall have a cyclomatic complexity number of 20 or less."
diff --git a/repo-tests/codeql/cpp/ql/src/jsf/4.04 Environment/AV Rule 11.ql b/repo-tests/codeql/cpp/ql/src/jsf/4.04 Environment/AV Rule 11.ql
new file mode 100644
index 00000000000..838af649255
--- /dev/null
+++ b/repo-tests/codeql/cpp/ql/src/jsf/4.04 Environment/AV Rule 11.ql
@@ -0,0 +1,17 @@
+/**
+ * @name AV Rule 11
+ * @description Trigraphs will not be used.
+ * @kind problem
+ * @id cpp/jsf/av-rule-11
+ * @problem.severity warning
+ * @tags maintainability
+ * readability
+ * external/jsf
+ */
+
+import cpp
+import external.ExternalArtifact
+
+from DefectExternalData d
+where d.getQueryPath() = "jsf/4.04 Environment/AV Rule 11.ql"
+select d, d.getMessage()
diff --git a/repo-tests/codeql/cpp/ql/src/jsf/4.04 Environment/AV Rule 12.ql b/repo-tests/codeql/cpp/ql/src/jsf/4.04 Environment/AV Rule 12.ql
new file mode 100644
index 00000000000..c632de42748
--- /dev/null
+++ b/repo-tests/codeql/cpp/ql/src/jsf/4.04 Environment/AV Rule 12.ql
@@ -0,0 +1,17 @@
+/**
+ * @name AV Rule 12
+ * @description Digraphs will not be used.
+ * @kind problem
+ * @id cpp/jsf/av-rule-12
+ * @problem.severity warning
+ * @tags maintainability
+ * readability
+ * external/jsf
+ */
+
+import cpp
+import external.ExternalArtifact
+
+from DefectExternalData d
+where d.getQueryPath() = "jsf/4.04 Environment/AV Rule 12.ql"
+select d, d.getMessage()
diff --git a/repo-tests/codeql/cpp/ql/src/jsf/4.04 Environment/AV Rule 13.ql b/repo-tests/codeql/cpp/ql/src/jsf/4.04 Environment/AV Rule 13.ql
new file mode 100644
index 00000000000..7c997ba4c17
--- /dev/null
+++ b/repo-tests/codeql/cpp/ql/src/jsf/4.04 Environment/AV Rule 13.ql
@@ -0,0 +1,18 @@
+/**
+ * @name AV Rule 13
+ * @description Multi-byte characters and wide string literals will not be used.
+ * @kind problem
+ * @id cpp/jsf/av-rule-13
+ * @problem.severity error
+ * @tags maintainability
+ * portability
+ * external/jsf
+ */
+
+import cpp
+
+from Literal l
+where
+ l.getType() instanceof Wchar_t or
+ l.getType().(ArrayType).getBaseType().getUnspecifiedType() instanceof Wchar_t
+select l, "AV Rule 13: Multi-byte characters and wide string literals will not be used."
diff --git a/repo-tests/codeql/cpp/ql/src/jsf/4.04 Environment/AV Rule 14.ql b/repo-tests/codeql/cpp/ql/src/jsf/4.04 Environment/AV Rule 14.ql
new file mode 100644
index 00000000000..119588b5ebd
--- /dev/null
+++ b/repo-tests/codeql/cpp/ql/src/jsf/4.04 Environment/AV Rule 14.ql
@@ -0,0 +1,18 @@
+/**
+ * @name AV Rule 14
+ * @description Literal suffixes shall use uppercase rather than lowercase letters.
+ * @kind problem
+ * @id cpp/jsf/av-rule-14
+ * @problem.severity error
+ * @tags maintainability
+ * readability
+ * external/jsf
+ */
+
+import cpp
+
+from Literal l
+where
+ l.fromSource() and
+ l.getValueText().regexpMatch(".*[ul][uUlL]*\\s*")
+select l, "AV Rule 14: Literal suffixes shall use uppercase rather than lowercase letters."
diff --git a/repo-tests/codeql/cpp/ql/src/jsf/4.04 Environment/AV Rule 9.ql b/repo-tests/codeql/cpp/ql/src/jsf/4.04 Environment/AV Rule 9.ql
new file mode 100644
index 00000000000..9df1b72a736
--- /dev/null
+++ b/repo-tests/codeql/cpp/ql/src/jsf/4.04 Environment/AV Rule 9.ql
@@ -0,0 +1,17 @@
+/**
+ * @name AV Rule 9
+ * @description Only those characters specified in the C++ basic source character set will be used.
+ * @kind problem
+ * @id cpp/jsf/av-rule-9
+ * @problem.severity warning
+ * @tags maintainability
+ * portability
+ * external/jsf
+ */
+
+import cpp
+import external.ExternalArtifact
+
+from DefectExternalData d
+where d.getQueryPath() = "jsf/4.04 Environment/AV Rule 9.ql"
+select d, d.getMessage()
diff --git a/repo-tests/codeql/cpp/ql/src/jsf/4.05 Libraries/AV Rule 17.ql b/repo-tests/codeql/cpp/ql/src/jsf/4.05 Libraries/AV Rule 17.ql
new file mode 100644
index 00000000000..07f78f0b027
--- /dev/null
+++ b/repo-tests/codeql/cpp/ql/src/jsf/4.05 Libraries/AV Rule 17.ql
@@ -0,0 +1,21 @@
+/**
+ * @name AV Rule 17
+ * @description The error indicator errno shall not be used.
+ * @kind problem
+ * @id cpp/jsf/av-rule-17
+ * @problem.severity error
+ * @tags maintainability
+ * external/jsf
+ */
+
+import cpp
+
+from Locatable errno, Locatable use
+where
+ (
+ errno.(Macro).getHead() = "errno" and use = errno.(Macro).getAnInvocation()
+ or
+ errno.(Variable).hasName("errno") and use = errno.(Variable).getAnAccess()
+ ) and
+ errno.getFile().getAbsolutePath().matches("%errno.h")
+select use, "AV Rule 17: The error indicator errno shall not be used."
diff --git a/repo-tests/codeql/cpp/ql/src/jsf/4.05 Libraries/AV Rule 18.ql b/repo-tests/codeql/cpp/ql/src/jsf/4.05 Libraries/AV Rule 18.ql
new file mode 100644
index 00000000000..09f3dfd6b17
--- /dev/null
+++ b/repo-tests/codeql/cpp/ql/src/jsf/4.05 Libraries/AV Rule 18.ql
@@ -0,0 +1,18 @@
+/**
+ * @name AV Rule 18
+ * @description The macro offsetof, in library , shall not be used.
+ * @kind problem
+ * @id cpp/jsf/av-rule-18
+ * @problem.severity error
+ * @tags maintainability
+ * external/jsf
+ */
+
+import cpp
+
+from Macro offsetof
+where
+ offsetof.getHead().matches("offsetof(%,%)") and
+ offsetof.getFile().getAbsolutePath().matches("%stddef.h")
+select offsetof.getAnInvocation(),
+ "AV Rule 18: The macro offsetof, in library , shall not be used."
diff --git a/repo-tests/codeql/cpp/ql/src/jsf/4.05 Libraries/AV Rule 19.ql b/repo-tests/codeql/cpp/ql/src/jsf/4.05 Libraries/AV Rule 19.ql
new file mode 100644
index 00000000000..f22ef860126
--- /dev/null
+++ b/repo-tests/codeql/cpp/ql/src/jsf/4.05 Libraries/AV Rule 19.ql
@@ -0,0 +1,15 @@
+/**
+ * @name AV Rule 19
+ * @description and the setlocale function shall not be used.
+ * @kind problem
+ * @id cpp/jsf/av-rule-19
+ * @problem.severity error
+ * @tags maintainability
+ * external/jsf
+ */
+
+import cpp
+
+from Include incl
+where incl.getIncludedFile().getAbsolutePath().matches("%locale.h")
+select incl, "AV Rule 19: and the setlocale function shall not be used."
diff --git a/repo-tests/codeql/cpp/ql/src/jsf/4.05 Libraries/AV Rule 20.ql b/repo-tests/codeql/cpp/ql/src/jsf/4.05 Libraries/AV Rule 20.ql
new file mode 100644
index 00000000000..3fcfee2f55d
--- /dev/null
+++ b/repo-tests/codeql/cpp/ql/src/jsf/4.05 Libraries/AV Rule 20.ql
@@ -0,0 +1,34 @@
+/**
+ * @name AV Rule 20
+ * @description The setjmp macro and the longjmp function shall not be used.
+ * @kind problem
+ * @id cpp/jsf/av-rule-20
+ * @problem.severity warning
+ * @tags correctness
+ * portability
+ * readability
+ * external/jsf
+ */
+
+import cpp
+
+class Setjmp extends Macro {
+ Setjmp() {
+ super.getHead().matches("setjmp(%)") and
+ super.getFile().getAbsolutePath().matches("%setjmp.h")
+ }
+}
+
+class Longjmp extends Function {
+ Longjmp() {
+ super.hasName("longjmp") and
+ super.getNumberOfParameters() = 2 and
+ super.getFile().getAbsolutePath().matches("%setjmp.h")
+ }
+}
+
+from Setjmp setjmp, Longjmp longjmp, Locatable use
+where
+ use = setjmp.getAnInvocation() or
+ use = longjmp.getACallToThisFunction()
+select use, "AV Rule 20: The setjmp macro and the longjmp function shall not be used."
diff --git a/repo-tests/codeql/cpp/ql/src/jsf/4.05 Libraries/AV Rule 21.ql b/repo-tests/codeql/cpp/ql/src/jsf/4.05 Libraries/AV Rule 21.ql
new file mode 100644
index 00000000000..3640ddcb4b6
--- /dev/null
+++ b/repo-tests/codeql/cpp/ql/src/jsf/4.05 Libraries/AV Rule 21.ql
@@ -0,0 +1,16 @@
+/**
+ * @name AV Rule 21
+ * @description The signal handling facilities of shall not be used.
+ * @kind problem
+ * @id cpp/jsf/av-rule-21
+ * @problem.severity error
+ * @tags correctness
+ * portability
+ * external/jsf
+ */
+
+import cpp
+
+from Include incl
+where incl.getIncludedFile().getAbsolutePath().matches("%signal.h")
+select incl, "AV Rule 21: The signal handling facilities of shall not be used."
diff --git a/repo-tests/codeql/cpp/ql/src/jsf/4.05 Libraries/AV Rule 22.ql b/repo-tests/codeql/cpp/ql/src/jsf/4.05 Libraries/AV Rule 22.ql
new file mode 100644
index 00000000000..e13f767f581
--- /dev/null
+++ b/repo-tests/codeql/cpp/ql/src/jsf/4.05 Libraries/AV Rule 22.ql
@@ -0,0 +1,16 @@
+/**
+ * @name AV Rule 22
+ * @description The input/output library shall not be used.
+ * @kind problem
+ * @id cpp/jsf/av-rule-22
+ * @problem.severity error
+ * @tags maintainability
+ * portability
+ * external/jsf
+ */
+
+import cpp
+
+from Include incl
+where incl.getIncludedFile().getAbsolutePath().matches("%stdio.h")
+select incl, "AV Rule 22: The input/output library shall not be used."
diff --git a/repo-tests/codeql/cpp/ql/src/jsf/4.05 Libraries/AV Rule 23.ql b/repo-tests/codeql/cpp/ql/src/jsf/4.05 Libraries/AV Rule 23.ql
new file mode 100644
index 00000000000..9bb63cb6b29
--- /dev/null
+++ b/repo-tests/codeql/cpp/ql/src/jsf/4.05 Libraries/AV Rule 23.ql
@@ -0,0 +1,19 @@
+/**
+ * @name AV Rule 23
+ * @description The library functions atof, atoi and atol from library shall not be used.
+ * @kind problem
+ * @id cpp/jsf/av-rule-23
+ * @problem.severity error
+ * @tags correctness
+ * portability
+ * external/jsf
+ */
+
+import cpp
+
+from Function f
+where
+ f.getName().regexpMatch("atof|atoi|atol") and
+ f.getFile().getAbsolutePath().matches("%stdlib.h")
+select f.getACallToThisFunction(),
+ "AV Rule 23: The library functions atof, atoi and atol from library shall not be used."
diff --git a/repo-tests/codeql/cpp/ql/src/jsf/4.05 Libraries/AV Rule 24.ql b/repo-tests/codeql/cpp/ql/src/jsf/4.05 Libraries/AV Rule 24.ql
new file mode 100644
index 00000000000..9e0fd1d5de8
--- /dev/null
+++ b/repo-tests/codeql/cpp/ql/src/jsf/4.05 Libraries/AV Rule 24.ql
@@ -0,0 +1,19 @@
+/**
+ * @name Dangerous system functions
+ * @description The library functions abort, exit, getenv and system from library