diff --git a/config/opcode-qldoc.py b/config/opcode-qldoc.py new file mode 100644 index 00000000000..e379e6a3ea9 --- /dev/null +++ b/config/opcode-qldoc.py @@ -0,0 +1,102 @@ +#!/usr/bin/env python3 + +import os +import re +path = os.path + +needs_an_re = re.compile(r'^(?!Unary)[AEIOU]') # Name requiring "an" instead of "a". +start_qldoc_re = re.compile(r'^\s*/\*\*') # Start of a QLDoc comment +end_qldoc_re = re.compile(r'\*/\s*$') # End of a QLDoc comment +blank_qldoc_line_re = re.compile(r'^\s*\*\s*$') # A line in a QLDoc comment with only the '*' +instruction_class_re = re.compile(r'^class (?P[A-aa-z0-9]+)Instruction\s') # Declaration of an `Instruction` class +opcode_base_class_re = re.compile(r'^abstract class (?P[A-aa-z0-9]+)Opcode\s') # Declaration of an `Opcode` base class +opcode_class_re = re.compile(r'^ class (?P[A-aa-z0-9]+)\s') # Declaration of an `Opcode` class + +script_dir = path.realpath(path.dirname(__file__)) +instruction_path = path.realpath(path.join(script_dir, '../cpp/ql/src/semmle/code/cpp/ir/implementation/raw/Instruction.qll')) +opcode_path = path.realpath(path.join(script_dir, '../cpp/ql/src/semmle/code/cpp/ir/implementation/Opcode.qll')) + +# Scan `Instruction.qll`, keeping track of the QLDoc comment attached to each declaration of a class +# whose name ends with `Instruction`. +instruction_comments = {} +in_qldoc = False +saw_blank_line_in_qldoc = False +qldoc_lines = [] +with open(instruction_path, 'r', encoding='utf-8') as instr: + for line in instr: + if in_qldoc: + if end_qldoc_re.search(line): + qldoc_lines.append(line) + in_qldoc = False + elif blank_qldoc_line_re.search(line): + # We're going to skip any lines after the first blank line, to avoid duplicating all + # of the verbose description. + saw_blank_line_in_qldoc = True + elif not saw_blank_line_in_qldoc: + qldoc_lines.append(line) + else: + if start_qldoc_re.search(line): + # Starting a new QLDoc comment. + saw_blank_line_in_qldoc = False + qldoc_lines.append(line) + if not end_qldoc_re.search(line): + in_qldoc = True + else: + instruction_match = instruction_class_re.search(line) + if instruction_match: + # Found the declaration of an `Instruction` class. Record the QLDoc comments. + instruction_comments[instruction_match.group('name')] = qldoc_lines + qldoc_lines = [] + +# Scan `Opcode.qll`. Whenever we see the declaration of an `Opcode` class for which we have a +# corresponding `Instruction` class, we'll attach a copy of the `Instruction`'s QLDoc comment. +in_qldoc = False +qldoc_lines = [] +output_lines = [] +with open(opcode_path, 'r', encoding='utf-8') as opcode: + for line in opcode: + if in_qldoc: + qldoc_lines.append(line) + if end_qldoc_re.search(line): + in_qldoc = False + else: + if start_qldoc_re.search(line): + qldoc_lines.append(line) + if not end_qldoc_re.search(line): + in_qldoc = True + else: + name_without_suffix = None + name = None + indent = '' + opcode_base_match = opcode_base_class_re.search(line) + if opcode_base_match: + name_without_suffix = opcode_base_match.group('name') + name = name_without_suffix + 'Opcode' + else: + opcode_match = opcode_class_re.search(line) + if opcode_match: + name_without_suffix = opcode_match.group('name') + name = name_without_suffix + # Indent by two additional spaces, since opcodes are declared in the + # `Opcode` module. + indent = ' ' + + if name_without_suffix: + # Found an `Opcode` that matches a known `Instruction`. Replace the QLDoc with + # a copy of the one from the `Instruction`. + if instruction_comments.get(name_without_suffix): + article = 'an' if needs_an_re.search(name_without_suffix) else 'a' + qldoc_lines = [ + indent + '/**\n', + indent + ' * The `Opcode` for ' + article + ' `' + name_without_suffix + 'Instruction`.\n', + indent + ' *\n', + indent + ' * See the `' + name_without_suffix + 'Instruction` documentation for more details.\n', + indent + ' */\n' + ] + output_lines.extend(qldoc_lines) + qldoc_lines = [] + output_lines.append(line) + +# Write out the updated `Opcode.qll` +with open(opcode_path, 'w', encoding='utf-8') as opcode: + opcode.writelines(output_lines) diff --git a/cpp/ql/src/semmle/code/cpp/ir/IR.qll b/cpp/ql/src/semmle/code/cpp/ir/IR.qll index f019f20b6a8..381adad5e41 100644 --- a/cpp/ql/src/semmle/code/cpp/ir/IR.qll +++ b/cpp/ql/src/semmle/code/cpp/ir/IR.qll @@ -1,3 +1,47 @@ +/** + * Provides classes that describe the Intermediate Representation (IR) of the program. + * + * The IR is a representation of the semantics of the program, with very little dependence on the + * syntax that was used to write the program. For example, in C++, the statements `i += 1;`, `i++`, + * and `++i` all have the same semantic effect, but appear in the AST as three different types of + * `Expr` node. In the IR, all three statements are broken down into a sequence of fundamental + * operations similar to: + * + * ``` + * r1(int*) = VariableAddress[i] // Compute the address of variable `i` + * r2(int) = Load &:r1, m0 // Load the value of `i` + * r3(int) = Constant[1] // An integer constant with the value `1` + * r4(int) = Add r2, r3 // Add `1` to the value of `i` + * r5(int) = Store &r1, r4 // Store the new value back into the variable `i` + * ``` + * + * This allows IR-based analysis to focus on the fundamental operations, rather than having to be + * concerned with the various ways of expressing those operations in source code. + * + * The key classes in the IR are: + * + * - `IRFunction` - Contains the IR for an entire function definition, including all of that + * function's `Instruction`s, `IRBlock`s, and `IRVariables`. + * - `Instruction` - A single operation in the IR. An instruction specifies the operation to be + * performed, the operands that produce the inputs to that operation, and the type of the result + * of the operation. Control flows from an `Instruction` to one of a set of successor + * `Instruction`s. + * - `Operand` - An input value of an `Instruction`. All inputs of an `Instruction` are explicitly + * represented as `Operand`s, even if the input was implicit in the source code. An `Operand` has + * a link to the `Instruction` that consumes its value (its "use") and a link to the `Instruction` + * that produces its value (its "definition"). + * - `IRVariable` - A variable accessed by the IR for a particular function. An `IRVariable` is + * created for each variable directly accessed by the function. In addition, `IRVariable`s are + * created to represent certain temporary storage locations that do not have explicitly declared + * variables in the source code, such as the return value of the function. + * - `IRBlock` - A "basic block" in the control flow graph of a function. An `IRBlock` contains a + * sequence of instructions such that control flow can only enter the block at the first + * instruction, and can only leave the block from the last instruction. + * - `IRType` - The type of a value accessed in the IR. Unlike the `Type` class in the AST, `IRType` + * is language-neutral. For example, in C++, `unsigned int`, `char32_t`, and `wchar_t` might all + * be represented as the `IRType` `uint4`, a four-byte unsigned integer. + */ + // Most queries should operate on the aliased SSA IR, so that's what we expose -// publically as the "IR". +// publicly as the "IR". import implementation.aliased_ssa.IR diff --git a/cpp/ql/src/semmle/code/cpp/ir/IRConfiguration.qll b/cpp/ql/src/semmle/code/cpp/ir/IRConfiguration.qll index b5b7d7de7c2..b8abef8a547 100644 --- a/cpp/ql/src/semmle/code/cpp/ir/IRConfiguration.qll +++ b/cpp/ql/src/semmle/code/cpp/ir/IRConfiguration.qll @@ -1 +1,5 @@ +/** + * Module used to configure the IR generation process. + */ + import implementation.IRConfiguration diff --git a/cpp/ql/src/semmle/code/cpp/ir/PrintIR.qll b/cpp/ql/src/semmle/code/cpp/ir/PrintIR.qll index 3ff80237635..c4ebf2f1eba 100644 --- a/cpp/ql/src/semmle/code/cpp/ir/PrintIR.qll +++ b/cpp/ql/src/semmle/code/cpp/ir/PrintIR.qll @@ -1 +1,11 @@ +/** + * Outputs a representation of the IR as a control flow graph. + * + * This file contains the actual implementation of `PrintIR.ql`. For test cases and very small + * databases, `PrintIR.ql` can be run directly to dump the IR for the entire database. For most + * uses, however, it is better to write a query that imports `PrintIR.qll`, extends + * `PrintIRConfiguration`, and overrides `shouldPrintFunction()` to select a subset of functions to + * dump. + */ + import implementation.aliased_ssa.PrintIR diff --git a/cpp/ql/src/semmle/code/cpp/ir/implementation/EdgeKind.qll b/cpp/ql/src/semmle/code/cpp/ir/implementation/EdgeKind.qll index 54059fb5b82..32e36bb6787 100644 --- a/cpp/ql/src/semmle/code/cpp/ir/implementation/EdgeKind.qll +++ b/cpp/ql/src/semmle/code/cpp/ir/implementation/EdgeKind.qll @@ -1,3 +1,7 @@ +/** + * Provides classes that specify the conditions under which control flows along a given edge. + */ + private import internal.EdgeKindInternal private newtype TEdgeKind = @@ -77,9 +81,15 @@ class CaseEdge extends EdgeKind, TCaseEdge { else result = "Case[" + minValue + ".." + maxValue + "]" } - string getMinValue() { result = minValue } + /** + * Gets the smallest value of the switch expression for which control will flow along this edge. + */ + final string getMinValue() { result = minValue } - string getMaxValue() { result = maxValue } + /** + * Gets the largest value of the switch expression for which control will flow along this edge. + */ + final string getMaxValue() { result = maxValue } } /** diff --git a/cpp/ql/src/semmle/code/cpp/ir/implementation/IRConfiguration.qll b/cpp/ql/src/semmle/code/cpp/ir/implementation/IRConfiguration.qll index 71bc8ec2b0f..37ac2fccdd9 100644 --- a/cpp/ql/src/semmle/code/cpp/ir/implementation/IRConfiguration.qll +++ b/cpp/ql/src/semmle/code/cpp/ir/implementation/IRConfiguration.qll @@ -10,6 +10,7 @@ private newtype TIRConfiguration = MkIRConfiguration() * The query can extend this class to control which functions have IR generated for them. */ class IRConfiguration extends TIRConfiguration { + /** Gets a textual representation of this element. */ string toString() { result = "IRConfiguration" } /** @@ -17,6 +18,13 @@ class IRConfiguration extends TIRConfiguration { */ predicate shouldCreateIRForFunction(Language::Function func) { any() } + /** + * Holds if the strings used as part of an IR dump should be generated for function `func`. + * + * This predicate is overridden in `PrintIR.qll` to avoid the expense of generating a large number + * of debug strings for IR that will not be dumped. We still generate the actual IR for these + * functions, however, to preserve the results of any interprocedural analysis. + */ predicate shouldEvaluateDebugStringsForFunction(Language::Function func) { any() } } @@ -26,6 +34,7 @@ private newtype TIREscapeAnalysisConfiguration = MkIREscapeAnalysisConfiguration * The query can extend this class to control what escape analysis is used when generating SSA. */ class IREscapeAnalysisConfiguration extends TIREscapeAnalysisConfiguration { + /** Gets a textual representation of this element. */ string toString() { result = "IREscapeAnalysisConfiguration" } /** diff --git a/cpp/ql/src/semmle/code/cpp/ir/implementation/IRType.qll b/cpp/ql/src/semmle/code/cpp/ir/implementation/IRType.qll index dec78b413b3..41c9ac06d82 100644 --- a/cpp/ql/src/semmle/code/cpp/ir/implementation/IRType.qll +++ b/cpp/ql/src/semmle/code/cpp/ir/implementation/IRType.qll @@ -32,6 +32,7 @@ private newtype TIRType = * all pointer types map to the same instance of `IRAddressType`. */ class IRType extends TIRType { + /** Gets a textual representation of this type. */ string toString() { none() } /** diff --git a/cpp/ql/src/semmle/code/cpp/ir/implementation/MemoryAccessKind.qll b/cpp/ql/src/semmle/code/cpp/ir/implementation/MemoryAccessKind.qll index 6852a965401..5e11a310e2f 100644 --- a/cpp/ql/src/semmle/code/cpp/ir/implementation/MemoryAccessKind.qll +++ b/cpp/ql/src/semmle/code/cpp/ir/implementation/MemoryAccessKind.qll @@ -1,3 +1,9 @@ +/** + * Provides classes that describe how a particular `Instruction` or its operands access memory. + */ + +private import IRConfiguration + private newtype TMemoryAccessKind = TIndirectMemoryAccess() or TBufferMemoryAccess() or @@ -14,6 +20,7 @@ private newtype TMemoryAccessKind = * memory result. */ class MemoryAccessKind extends TMemoryAccessKind { + /** Gets a textual representation of this access kind. */ string toString() { none() } /** diff --git a/cpp/ql/src/semmle/code/cpp/ir/implementation/Opcode.qll b/cpp/ql/src/semmle/code/cpp/ir/implementation/Opcode.qll index c0b8adbe56b..c4134d240ab 100644 --- a/cpp/ql/src/semmle/code/cpp/ir/implementation/Opcode.qll +++ b/cpp/ql/src/semmle/code/cpp/ir/implementation/Opcode.qll @@ -1,3 +1,8 @@ +/** + * Provides `Opcode`s that specify the operation performed by an `Instruction`, as well as metadata + * about those opcodes, such as operand kinds and memory accesses. + */ + private import internal.OpcodeImports as Imports private import internal.OperandTag import Imports::MemoryAccessKind @@ -45,7 +50,7 @@ private newtype TOpcode = TConvertToDerived() or TCheckedConvertOrNull() or TCheckedConvertOrThrow() or - TDynamicCastToVoid() or + TCompleteObjectAddress() or TVariableAddress() or TFieldAddress() or TFunctionAddress() or @@ -86,7 +91,11 @@ private newtype TOpcode = TUnreached() or TNewObj() +/** + * An opcode that specifies the operation performed by an `Instruction`. + */ class Opcode extends TOpcode { + /** Gets a textual representation of this element. */ string toString() { result = "UnknownOpcode" } /** @@ -139,10 +148,20 @@ class Opcode extends TOpcode { predicate hasOperandInternal(OperandTag tag) { none() } } +/** + * The `Opcode` for a `UnaryInstruction`. + * + * See the `UnaryInstruction` documentation for more details. + */ abstract class UnaryOpcode extends Opcode { final override predicate hasOperandInternal(OperandTag tag) { tag instanceof UnaryOperandTag } } +/** + * The `Opcode` for a `BinaryInstruction`. + * + * See the `BinaryInstruction` documentation for more details. + */ abstract class BinaryOpcode extends Opcode { final override predicate hasOperandInternal(OperandTag tag) { tag instanceof LeftOperandTag or @@ -150,44 +169,127 @@ abstract class BinaryOpcode extends Opcode { } } +/** + * The `Opcode` for a `PointerArithmeticInstruction`. + * + * See the `PointerArithmeticInstruction` documentation for more details. + */ abstract class PointerArithmeticOpcode extends BinaryOpcode { } +/** + * The `Opcode` for a `PointerOffsetInstruction`. + * + * See the `PointerOffsetInstruction` documentation for more details. + */ abstract class PointerOffsetOpcode extends PointerArithmeticOpcode { } +/** + * The `Opcode` for an `ArithmeticInstruction`. + * + * See the `ArithmeticInstruction` documentation for more details. + */ abstract class ArithmeticOpcode extends Opcode { } +/** + * The `Opcode` for a `BinaryArithmeticInstruction`. + * + * See the `BinaryArithmeticInstruction` documentation for more details. + */ abstract class BinaryArithmeticOpcode extends BinaryOpcode, ArithmeticOpcode { } +/** + * The `Opcode` for a `UnaryArithmeticInstruction`. + * + * See the `UnaryArithmeticInstruction` documentation for more details. + */ abstract class UnaryArithmeticOpcode extends UnaryOpcode, ArithmeticOpcode { } +/** + * The `Opcode` for a `BitwiseInstruction`. + * + * See the `BitwiseInstruction` documentation for more details. + */ abstract class BitwiseOpcode extends Opcode { } +/** + * The `Opcode` for a `BinaryBitwiseInstruction`. + * + * See the `BinaryBitwiseInstruction` documentation for more details. + */ abstract class BinaryBitwiseOpcode extends BinaryOpcode, BitwiseOpcode { } +/** + * The `Opcode` for a `UnaryBitwiseInstruction`. + * + * See the `UnaryBitwiseInstruction` documentation for more details. + */ abstract class UnaryBitwiseOpcode extends UnaryOpcode, BitwiseOpcode { } +/** + * The `Opcode` for a `CompareInstruction`. + * + * See the `CompareInstruction` documentation for more details. + */ abstract class CompareOpcode extends BinaryOpcode { } +/** + * The `Opcode` for a `RelationalInstruction`. + * + * See the `RelationalInstruction` documentation for more details. + */ abstract class RelationalOpcode extends CompareOpcode { } +/** + * The `Opcode` for a `CopyInstruction`. + * + * See the `CopyInstruction` documentation for more details. + */ abstract class CopyOpcode extends Opcode { } +/** + * The `Opcode` for a `ConvertToBaseInstruction`. + * + * See the `ConvertToBaseInstruction` documentation for more details. + */ abstract class ConvertToBaseOpcode extends UnaryOpcode { } -abstract class MemoryAccessOpcode extends Opcode { } - +/** + * The `Opcode` for a `ReturnInstruction`. + * + * See the `ReturnInstruction` documentation for more details. + */ abstract class ReturnOpcode extends Opcode { } +/** + * The `Opcode` for a `ThrowInstruction`. + * + * See the `ThrowInstruction` documentation for more details. + */ abstract class ThrowOpcode extends Opcode { } +/** + * The `Opcode` for a `CatchInstruction`. + * + * See the `CatchInstruction` documentation for more details. + */ abstract class CatchOpcode extends Opcode { } -abstract class OpcodeWithCondition extends Opcode { +abstract private class OpcodeWithCondition extends Opcode { final override predicate hasOperandInternal(OperandTag tag) { tag instanceof ConditionOperandTag } } +/** + * The `Opcode` for a `BuiltInOperationInstruction`. + * + * See the `BuiltInOperationInstruction` documentation for more details. + */ abstract class BuiltInOperationOpcode extends Opcode { } +/** + * The `Opcode` for a `SideEffectInstruction`. + * + * See the `SideEffectInstruction` documentation for more details. + */ abstract class SideEffectOpcode extends Opcode { } /** @@ -323,7 +425,9 @@ abstract class OpcodeWithLoad extends IndirectReadOpcode { } /** - * An opcode that reads from a set of memory locations as a side effect. + * The `Opcode` for a `ReadSideEffectInstruction`. + * + * See the `ReadSideEffectInstruction` documentation for more details. */ abstract class ReadSideEffectOpcode extends SideEffectOpcode { final override predicate hasOperandInternal(OperandTag tag) { @@ -332,51 +436,111 @@ abstract class ReadSideEffectOpcode extends SideEffectOpcode { } /** - * An opcode that writes to a set of memory locations as a side effect. + * The `Opcode` for a `WriteSideEffectInstruction`. + * + * See the `WriteSideEffectInstruction` documentation for more details. */ abstract class WriteSideEffectOpcode extends SideEffectOpcode { } +/** + * Provides `Opcode`s that specify the operation performed by an `Instruction`. + */ module Opcode { + /** + * The `Opcode` for a `NoOpInstruction`. + * + * See the `NoOpInstruction` documentation for more details. + */ class NoOp extends Opcode, TNoOp { final override string toString() { result = "NoOp" } } + /** + * The `Opcode` for an `UninitializedInstruction`. + * + * See the `UninitializedInstruction` documentation for more details. + */ class Uninitialized extends IndirectWriteOpcode, TUninitialized { final override string toString() { result = "Uninitialized" } } + /** + * The `Opcode` for an `ErrorInstruction`. + * + * See the `ErrorInstruction` documentation for more details. + */ class Error extends Opcode, TError { final override string toString() { result = "Error" } } + /** + * The `Opcode` for an `InitializeParameterInstruction`. + * + * See the `InitializeParameterInstruction` documentation for more details. + */ class InitializeParameter extends IndirectWriteOpcode, TInitializeParameter { final override string toString() { result = "InitializeParameter" } } + /** + * The `Opcode` for an `InitializeIndirectionInstruction`. + * + * See the `InitializeIndirectionInstruction` documentation for more details. + */ class InitializeIndirection extends EntireAllocationWriteOpcode, TInitializeIndirection { final override string toString() { result = "InitializeIndirection" } } + /** + * The `Opcode` for an `InitializeThisInstruction`. + * + * See the `InitializeThisInstruction` documentation for more details. + */ class InitializeThis extends Opcode, TInitializeThis { final override string toString() { result = "InitializeThis" } } + /** + * The `Opcode` for an `EnterFunctionInstruction`. + * + * See the `EnterFunctionInstruction` documentation for more details. + */ class EnterFunction extends Opcode, TEnterFunction { final override string toString() { result = "EnterFunction" } } + /** + * The `Opcode` for an `ExitFunctionInstruction`. + * + * See the `ExitFunctionInstruction` documentation for more details. + */ class ExitFunction extends Opcode, TExitFunction { final override string toString() { result = "ExitFunction" } } + /** + * The `Opcode` for a `ReturnValueInstruction`. + * + * See the `ReturnValueInstruction` documentation for more details. + */ class ReturnValue extends ReturnOpcode, OpcodeWithLoad, TReturnValue { final override string toString() { result = "ReturnValue" } } + /** + * The `Opcode` for a `ReturnVoidInstruction`. + * + * See the `ReturnVoidInstruction` documentation for more details. + */ class ReturnVoid extends ReturnOpcode, TReturnVoid { final override string toString() { result = "ReturnVoid" } } + /** + * The `Opcode` for a `ReturnIndirectionInstruction`. + * + * See the `ReturnIndirectionInstruction` documentation for more details. + */ class ReturnIndirection extends EntireAllocationReadOpcode, TReturnIndirection { final override string toString() { result = "ReturnIndirection" } @@ -385,14 +549,29 @@ module Opcode { } } + /** + * The `Opcode` for a `CopyValueInstruction`. + * + * See the `CopyValueInstruction` documentation for more details. + */ class CopyValue extends UnaryOpcode, CopyOpcode, TCopyValue { final override string toString() { result = "CopyValue" } } + /** + * The `Opcode` for a `LoadInstruction`. + * + * See the `LoadInstruction` documentation for more details. + */ class Load extends CopyOpcode, OpcodeWithLoad, TLoad { final override string toString() { result = "Load" } } + /** + * The `Opcode` for a `StoreInstruction`. + * + * See the `StoreInstruction` documentation for more details. + */ class Store extends CopyOpcode, IndirectWriteOpcode, TStore { final override string toString() { result = "Store" } @@ -401,154 +580,344 @@ module Opcode { } } + /** + * The `Opcode` for an `AddInstruction`. + * + * See the `AddInstruction` documentation for more details. + */ class Add extends BinaryArithmeticOpcode, TAdd { final override string toString() { result = "Add" } } + /** + * The `Opcode` for a `SubInstruction`. + * + * See the `SubInstruction` documentation for more details. + */ class Sub extends BinaryArithmeticOpcode, TSub { final override string toString() { result = "Sub" } } + /** + * The `Opcode` for a `MulInstruction`. + * + * See the `MulInstruction` documentation for more details. + */ class Mul extends BinaryArithmeticOpcode, TMul { final override string toString() { result = "Mul" } } + /** + * The `Opcode` for a `DivInstruction`. + * + * See the `DivInstruction` documentation for more details. + */ class Div extends BinaryArithmeticOpcode, TDiv { final override string toString() { result = "Div" } } + /** + * The `Opcode` for a `RemInstruction`. + * + * See the `RemInstruction` documentation for more details. + */ class Rem extends BinaryArithmeticOpcode, TRem { final override string toString() { result = "Rem" } } + /** + * The `Opcode` for a `NegateInstruction`. + * + * See the `NegateInstruction` documentation for more details. + */ class Negate extends UnaryArithmeticOpcode, TNegate { final override string toString() { result = "Negate" } } + /** + * The `Opcode` for a `ShiftLeftInstruction`. + * + * See the `ShiftLeftInstruction` documentation for more details. + */ class ShiftLeft extends BinaryBitwiseOpcode, TShiftLeft { final override string toString() { result = "ShiftLeft" } } + /** + * The `Opcode` for a `ShiftRightInstruction`. + * + * See the `ShiftRightInstruction` documentation for more details. + */ class ShiftRight extends BinaryBitwiseOpcode, TShiftRight { final override string toString() { result = "ShiftRight" } } + /** + * The `Opcode` for a `BitAndInstruction`. + * + * See the `BitAndInstruction` documentation for more details. + */ class BitAnd extends BinaryBitwiseOpcode, TBitAnd { final override string toString() { result = "BitAnd" } } + /** + * The `Opcode` for a `BitOrInstruction`. + * + * See the `BitOrInstruction` documentation for more details. + */ class BitOr extends BinaryBitwiseOpcode, TBitOr { final override string toString() { result = "BitOr" } } + /** + * The `Opcode` for a `BitXorInstruction`. + * + * See the `BitXorInstruction` documentation for more details. + */ class BitXor extends BinaryBitwiseOpcode, TBitXor { final override string toString() { result = "BitXor" } } + /** + * The `Opcode` for a `BitComplementInstruction`. + * + * See the `BitComplementInstruction` documentation for more details. + */ class BitComplement extends UnaryBitwiseOpcode, TBitComplement { final override string toString() { result = "BitComplement" } } + /** + * The `Opcode` for a `LogicalNotInstruction`. + * + * See the `LogicalNotInstruction` documentation for more details. + */ class LogicalNot extends UnaryOpcode, TLogicalNot { final override string toString() { result = "LogicalNot" } } + /** + * The `Opcode` for a `CompareEQInstruction`. + * + * See the `CompareEQInstruction` documentation for more details. + */ class CompareEQ extends CompareOpcode, TCompareEQ { final override string toString() { result = "CompareEQ" } } + /** + * The `Opcode` for a `CompareNEInstruction`. + * + * See the `CompareNEInstruction` documentation for more details. + */ class CompareNE extends CompareOpcode, TCompareNE { final override string toString() { result = "CompareNE" } } + /** + * The `Opcode` for a `CompareLTInstruction`. + * + * See the `CompareLTInstruction` documentation for more details. + */ class CompareLT extends RelationalOpcode, TCompareLT { final override string toString() { result = "CompareLT" } } + /** + * The `Opcode` for a `CompareGTInstruction`. + * + * See the `CompareGTInstruction` documentation for more details. + */ class CompareGT extends RelationalOpcode, TCompareGT { final override string toString() { result = "CompareGT" } } + /** + * The `Opcode` for a `CompareLEInstruction`. + * + * See the `CompareLEInstruction` documentation for more details. + */ class CompareLE extends RelationalOpcode, TCompareLE { final override string toString() { result = "CompareLE" } } + /** + * The `Opcode` for a `CompareGEInstruction`. + * + * See the `CompareGEInstruction` documentation for more details. + */ class CompareGE extends RelationalOpcode, TCompareGE { final override string toString() { result = "CompareGE" } } + /** + * The `Opcode` for a `PointerAddInstruction`. + * + * See the `PointerAddInstruction` documentation for more details. + */ class PointerAdd extends PointerOffsetOpcode, TPointerAdd { final override string toString() { result = "PointerAdd" } } + /** + * The `Opcode` for a `PointerSubInstruction`. + * + * See the `PointerSubInstruction` documentation for more details. + */ class PointerSub extends PointerOffsetOpcode, TPointerSub { final override string toString() { result = "PointerSub" } } + /** + * The `Opcode` for a `PointerDiffInstruction`. + * + * See the `PointerDiffInstruction` documentation for more details. + */ class PointerDiff extends PointerArithmeticOpcode, TPointerDiff { final override string toString() { result = "PointerDiff" } } + /** + * The `Opcode` for a `ConvertInstruction`. + * + * See the `ConvertInstruction` documentation for more details. + */ class Convert extends UnaryOpcode, TConvert { final override string toString() { result = "Convert" } } + /** + * The `Opcode` for a `ConvertToNonVirtualBaseInstruction`. + * + * See the `ConvertToNonVirtualBaseInstruction` documentation for more details. + */ class ConvertToNonVirtualBase extends ConvertToBaseOpcode, TConvertToNonVirtualBase { final override string toString() { result = "ConvertToNonVirtualBase" } } + /** + * The `Opcode` for a `ConvertToVirtualBaseInstruction`. + * + * See the `ConvertToVirtualBaseInstruction` documentation for more details. + */ class ConvertToVirtualBase extends ConvertToBaseOpcode, TConvertToVirtualBase { final override string toString() { result = "ConvertToVirtualBase" } } + /** + * The `Opcode` for a `ConvertToDerivedInstruction`. + * + * See the `ConvertToDerivedInstruction` documentation for more details. + */ class ConvertToDerived extends UnaryOpcode, TConvertToDerived { final override string toString() { result = "ConvertToDerived" } } + /** + * The `Opcode` for a `CheckedConvertOrNullInstruction`. + * + * See the `CheckedConvertOrNullInstruction` documentation for more details. + */ class CheckedConvertOrNull extends UnaryOpcode, TCheckedConvertOrNull { final override string toString() { result = "CheckedConvertOrNull" } } + /** + * The `Opcode` for a `CheckedConvertOrThrowInstruction`. + * + * See the `CheckedConvertOrThrowInstruction` documentation for more details. + */ class CheckedConvertOrThrow extends UnaryOpcode, TCheckedConvertOrThrow { final override string toString() { result = "CheckedConvertOrThrow" } } - class DynamicCastToVoid extends UnaryOpcode, TDynamicCastToVoid { - final override string toString() { result = "DynamicCastToVoid" } + /** + * The `Opcode` for a `CompleteObjectAddressInstruction`. + * + * See the `CompleteObjectAddressInstruction` documentation for more details. + */ + class CompleteObjectAddress extends UnaryOpcode, TCompleteObjectAddress { + final override string toString() { result = "CompleteObjectAddress" } } + /** + * The `Opcode` for a `VariableAddressInstruction`. + * + * See the `VariableAddressInstruction` documentation for more details. + */ class VariableAddress extends Opcode, TVariableAddress { final override string toString() { result = "VariableAddress" } } + /** + * The `Opcode` for a `FieldAddressInstruction`. + * + * See the `FieldAddressInstruction` documentation for more details. + */ class FieldAddress extends UnaryOpcode, TFieldAddress { final override string toString() { result = "FieldAddress" } } + /** + * The `Opcode` for an `ElementsAddressInstruction`. + * + * See the `ElementsAddressInstruction` documentation for more details. + */ class ElementsAddress extends UnaryOpcode, TElementsAddress { final override string toString() { result = "ElementsAddress" } } + /** + * The `Opcode` for a `FunctionAddressInstruction`. + * + * See the `FunctionAddressInstruction` documentation for more details. + */ class FunctionAddress extends Opcode, TFunctionAddress { final override string toString() { result = "FunctionAddress" } } + /** + * The `Opcode` for a `ConstantInstruction`. + * + * See the `ConstantInstruction` documentation for more details. + */ class Constant extends Opcode, TConstant { final override string toString() { result = "Constant" } } + /** + * The `Opcode` for a `StringConstantInstruction`. + * + * See the `StringConstantInstruction` documentation for more details. + */ class StringConstant extends Opcode, TStringConstant { final override string toString() { result = "StringConstant" } } + /** + * The `Opcode` for a `ConditionalBranchInstruction`. + * + * See the `ConditionalBranchInstruction` documentation for more details. + */ class ConditionalBranch extends OpcodeWithCondition, TConditionalBranch { final override string toString() { result = "ConditionalBranch" } } + /** + * The `Opcode` for a `SwitchInstruction`. + * + * See the `SwitchInstruction` documentation for more details. + */ class Switch extends OpcodeWithCondition, TSwitch { final override string toString() { result = "Switch" } } + /** + * The `Opcode` for a `CallInstruction`. + * + * See the `CallInstruction` documentation for more details. + */ class Call extends Opcode, TCall { final override string toString() { result = "Call" } @@ -557,32 +926,67 @@ module Opcode { } } + /** + * The `Opcode` for a `CatchByTypeInstruction`. + * + * See the `CatchByTypeInstruction` documentation for more details. + */ class CatchByType extends CatchOpcode, TCatchByType { final override string toString() { result = "CatchByType" } } + /** + * The `Opcode` for a `CatchAnyInstruction`. + * + * See the `CatchAnyInstruction` documentation for more details. + */ class CatchAny extends CatchOpcode, TCatchAny { final override string toString() { result = "CatchAny" } } + /** + * The `Opcode` for a `ThrowValueInstruction`. + * + * See the `ThrowValueInstruction` documentation for more details. + */ class ThrowValue extends ThrowOpcode, OpcodeWithLoad, TThrowValue { final override string toString() { result = "ThrowValue" } } + /** + * The `Opcode` for a `ReThrowInstruction`. + * + * See the `ReThrowInstruction` documentation for more details. + */ class ReThrow extends ThrowOpcode, TReThrow { final override string toString() { result = "ReThrow" } } + /** + * The `Opcode` for an `UnwindInstruction`. + * + * See the `UnwindInstruction` documentation for more details. + */ class Unwind extends Opcode, TUnwind { final override string toString() { result = "Unwind" } } + /** + * The `Opcode` for an `AliasedDefinitionInstruction`. + * + * See the `AliasedDefinitionInstruction` documentation for more details. + */ class AliasedDefinition extends Opcode, TAliasedDefinition { final override string toString() { result = "AliasedDefinition" } final override MemoryAccessKind getWriteMemoryAccess() { result instanceof EscapedMemoryAccess } } + /** + * The `Opcode` for an `InitializeNonLocalInstruction`. + * + * See the `InitializeNonLocalInstruction` documentation for more details. + */ class InitializeNonLocal extends Opcode, TInitializeNonLocal { final override string toString() { result = "InitializeNonLocal" } @@ -591,6 +995,11 @@ module Opcode { } } + /** + * The `Opcode` for an `AliasedUseInstruction`. + * + * See the `AliasedUseInstruction` documentation for more details. + */ class AliasedUse extends Opcode, TAliasedUse { final override string toString() { result = "AliasedUse" } @@ -601,92 +1010,187 @@ module Opcode { } } + /** + * The `Opcode` for a `PhiInstruction`. + * + * See the `PhiInstruction` documentation for more details. + */ class Phi extends Opcode, TPhi { final override string toString() { result = "Phi" } final override MemoryAccessKind getWriteMemoryAccess() { result instanceof PhiMemoryAccess } } + /** + * The `Opcode` for a `BuiltInInstruction`. + * + * See the `BuiltInInstruction` documentation for more details. + */ class BuiltIn extends BuiltInOperationOpcode, TBuiltIn { final override string toString() { result = "BuiltIn" } } + /** + * The `Opcode` for a `VarArgsStartInstruction`. + * + * See the `VarArgsStartInstruction` documentation for more details. + */ class VarArgsStart extends UnaryOpcode, TVarArgsStart { final override string toString() { result = "VarArgsStart" } } + /** + * The `Opcode` for a `VarArgsEndInstruction`. + * + * See the `VarArgsEndInstruction` documentation for more details. + */ class VarArgsEnd extends UnaryOpcode, TVarArgsEnd { final override string toString() { result = "VarArgsEnd" } } + /** + * The `Opcode` for a `VarArgInstruction`. + * + * See the `VarArgInstruction` documentation for more details. + */ class VarArg extends UnaryOpcode, TVarArg { final override string toString() { result = "VarArg" } } + /** + * The `Opcode` for a `NextVarArgInstruction`. + * + * See the `NextVarArgInstruction` documentation for more details. + */ class NextVarArg extends UnaryOpcode, TNextVarArg { final override string toString() { result = "NextVarArg" } } + /** + * The `Opcode` for a `CallSideEffectInstruction`. + * + * See the `CallSideEffectInstruction` documentation for more details. + */ class CallSideEffect extends WriteSideEffectOpcode, EscapedWriteOpcode, MayWriteOpcode, ReadSideEffectOpcode, EscapedReadOpcode, MayReadOpcode, TCallSideEffect { final override string toString() { result = "CallSideEffect" } } + /** + * The `Opcode` for a `CallReadSideEffectInstruction`. + * + * See the `CallReadSideEffectInstruction` documentation for more details. + */ class CallReadSideEffect extends ReadSideEffectOpcode, EscapedReadOpcode, MayReadOpcode, TCallReadSideEffect { final override string toString() { result = "CallReadSideEffect" } } + /** + * The `Opcode` for an `IndirectReadSideEffectInstruction`. + * + * See the `IndirectReadSideEffectInstruction` documentation for more details. + */ class IndirectReadSideEffect extends ReadSideEffectOpcode, IndirectReadOpcode, TIndirectReadSideEffect { final override string toString() { result = "IndirectReadSideEffect" } } + /** + * The `Opcode` for an `IndirectMustWriteSideEffectInstruction`. + * + * See the `IndirectMustWriteSideEffectInstruction` documentation for more details. + */ class IndirectMustWriteSideEffect extends WriteSideEffectOpcode, IndirectWriteOpcode, TIndirectMustWriteSideEffect { final override string toString() { result = "IndirectMustWriteSideEffect" } } + /** + * The `Opcode` for an `IndirectMayWriteSideEffectInstruction`. + * + * See the `IndirectMayWriteSideEffectInstruction` documentation for more details. + */ class IndirectMayWriteSideEffect extends WriteSideEffectOpcode, IndirectWriteOpcode, MayWriteOpcode, TIndirectMayWriteSideEffect { final override string toString() { result = "IndirectMayWriteSideEffect" } } + /** + * The `Opcode` for a `BufferReadSideEffectInstruction`. + * + * See the `BufferReadSideEffectInstruction` documentation for more details. + */ class BufferReadSideEffect extends ReadSideEffectOpcode, UnsizedBufferReadOpcode, TBufferReadSideEffect { final override string toString() { result = "BufferReadSideEffect" } } + /** + * The `Opcode` for a `BufferMustWriteSideEffectInstruction`. + * + * See the `BufferMustWriteSideEffectInstruction` documentation for more details. + */ class BufferMustWriteSideEffect extends WriteSideEffectOpcode, UnsizedBufferWriteOpcode, TBufferMustWriteSideEffect { final override string toString() { result = "BufferMustWriteSideEffect" } } + /** + * The `Opcode` for a `BufferMayWriteSideEffectInstruction`. + * + * See the `BufferMayWriteSideEffectInstruction` documentation for more details. + */ class BufferMayWriteSideEffect extends WriteSideEffectOpcode, UnsizedBufferWriteOpcode, MayWriteOpcode, TBufferMayWriteSideEffect { final override string toString() { result = "BufferMayWriteSideEffect" } } + /** + * The `Opcode` for a `SizedBufferReadSideEffectInstruction`. + * + * See the `SizedBufferReadSideEffectInstruction` documentation for more details. + */ class SizedBufferReadSideEffect extends ReadSideEffectOpcode, SizedBufferReadOpcode, TSizedBufferReadSideEffect { final override string toString() { result = "SizedBufferReadSideEffect" } } + /** + * The `Opcode` for a `SizedBufferMustWriteSideEffectInstruction`. + * + * See the `SizedBufferMustWriteSideEffectInstruction` documentation for more details. + */ class SizedBufferMustWriteSideEffect extends WriteSideEffectOpcode, SizedBufferWriteOpcode, TSizedBufferMustWriteSideEffect { final override string toString() { result = "SizedBufferMustWriteSideEffect" } } + /** + * The `Opcode` for a `SizedBufferMayWriteSideEffectInstruction`. + * + * See the `SizedBufferMayWriteSideEffectInstruction` documentation for more details. + */ class SizedBufferMayWriteSideEffect extends WriteSideEffectOpcode, SizedBufferWriteOpcode, MayWriteOpcode, TSizedBufferMayWriteSideEffect { final override string toString() { result = "SizedBufferMayWriteSideEffect" } } + /** + * The `Opcode` for an `InitializeDynamicAllocationInstruction`. + * + * See the `InitializeDynamicAllocationInstruction` documentation for more details. + */ class InitializeDynamicAllocation extends SideEffectOpcode, EntireAllocationWriteOpcode, TInitializeDynamicAllocation { final override string toString() { result = "InitializeDynamicAllocation" } } + /** + * The `Opcode` for a `ChiInstruction`. + * + * See the `ChiInstruction` documentation for more details. + */ class Chi extends Opcode, TChi { final override string toString() { result = "Chi" } @@ -701,6 +1205,11 @@ module Opcode { } } + /** + * The `Opcode` for an `InlineAsmInstruction`. + * + * See the `InlineAsmInstruction` documentation for more details. + */ class InlineAsm extends Opcode, EscapedWriteOpcode, MayWriteOpcode, EscapedReadOpcode, MayReadOpcode, TInlineAsm { final override string toString() { result = "InlineAsm" } @@ -710,10 +1219,20 @@ module Opcode { } } + /** + * The `Opcode` for an `UnreachedInstruction`. + * + * See the `UnreachedInstruction` documentation for more details. + */ class Unreached extends Opcode, TUnreached { final override string toString() { result = "Unreached" } } + /** + * The `Opcode` for a `NewObjInstruction`. + * + * See the `NewObjInstruction` documentation for more details. + */ class NewObj extends Opcode, TNewObj { final override string toString() { result = "NewObj" } } diff --git a/cpp/ql/src/semmle/code/cpp/ir/implementation/TempVariableTag.qll b/cpp/ql/src/semmle/code/cpp/ir/implementation/TempVariableTag.qll index a0c0ca67530..5f230de560d 100644 --- a/cpp/ql/src/semmle/code/cpp/ir/implementation/TempVariableTag.qll +++ b/cpp/ql/src/semmle/code/cpp/ir/implementation/TempVariableTag.qll @@ -12,5 +12,6 @@ private import Imports::TempVariableTag * computed on each branch. The set of possible `TempVariableTag`s is language-dependent. */ class TempVariableTag extends TTempVariableTag { + /** Gets a textual representation of this tag. */ string toString() { result = getTempVariableTagId(this) } } diff --git a/cpp/ql/src/semmle/code/cpp/ir/implementation/aliased_ssa/IR.qll b/cpp/ql/src/semmle/code/cpp/ir/implementation/aliased_ssa/IR.qll index badd48552a5..3fa0f1b78be 100644 --- a/cpp/ql/src/semmle/code/cpp/ir/implementation/aliased_ssa/IR.qll +++ b/cpp/ql/src/semmle/code/cpp/ir/implementation/aliased_ssa/IR.qll @@ -1,3 +1,47 @@ +/** + * Provides classes that describe the Intermediate Representation (IR) of the program. + * + * The IR is a representation of the semantics of the program, with very little dependence on the + * syntax that was used to write the program. For example, in C++, the statements `i += 1;`, `i++`, + * and `++i` all have the same semantic effect, but appear in the AST as three different types of + * `Expr` node. In the IR, all three statements are broken down into a sequence of fundamental + * operations similar to: + * + * ``` + * r1(int*) = VariableAddress[i] // Compute the address of variable `i` + * r2(int) = Load &:r1, m0 // Load the value of `i` + * r3(int) = Constant[1] // An integer constant with the value `1` + * r4(int) = Add r2, r3 // Add `1` to the value of `i` + * r5(int) = Store &r1, r4 // Store the new value back into the variable `i` + * ``` + * + * This allows IR-based analysis to focus on the fundamental operations, rather than having to be + * concerned with the various ways of expressing those operations in source code. + * + * The key classes in the IR are: + * + * - `IRFunction` - Contains the IR for an entire function definition, including all of that + * function's `Instruction`s, `IRBlock`s, and `IRVariables`. + * - `Instruction` - A single operation in the IR. An instruction specifies the operation to be + * performed, the operands that produce the inputs to that operation, and the type of the result + * of the operation. Control flows from an `Instruction` to one of a set of successor + * `Instruction`s. + * - `Operand` - An input value of an `Instruction`. All inputs of an `Instruction` are explicitly + * represented as `Operand`s, even if the input was implicit in the source code. An `Operand` has + * a link to the `Instruction` that consumes its value (its "use") and a link to the `Instruction` + * that produces its value (its "definition"). + * - `IRVariable` - A variable accessed by the IR for a particular function. An `IRVariable` is + * created for each variable directly accessed by the function. In addition, `IRVariable`s are + * created to represent certain temporary storage locations that do not have explicitly declared + * variables in the source code, such as the return value of the function. + * - `IRBlock` - A "basic block" in the control flow graph of a function. An `IRBlock` contains a + * sequence of instructions such that control flow can only enter the block at the first + * instruction, and can only leave the block from the last instruction. + * - `IRType` - The type of a value accessed in the IR. Unlike the `Type` class in the AST, `IRType` + * is language-neutral. For example, in C++, `unsigned int`, `char32_t`, and `wchar_t` might all + * be represented as the `IRType` `uint4`, a four-byte unsigned integer. + */ + import IRFunction import Instruction import IRBlock @@ -11,11 +55,12 @@ import Imports::MemoryAccessKind private newtype TIRPropertyProvider = MkIRPropertyProvider() /** - * Class that provides additional properties to be dumped for IR instructions and blocks when using + * A class that provides additional properties to be dumped for IR instructions and blocks when using * the PrintIR module. Libraries that compute additional facts about IR elements can extend the * single instance of this class to specify the additional properties computed by the library. */ class IRPropertyProvider extends TIRPropertyProvider { + /** Gets a textual representation of this element. */ string toString() { result = "IRPropertyProvider" } /** diff --git a/cpp/ql/src/semmle/code/cpp/ir/implementation/aliased_ssa/IRBlock.qll b/cpp/ql/src/semmle/code/cpp/ir/implementation/aliased_ssa/IRBlock.qll index 94ef73b2769..f0ec0683bd6 100644 --- a/cpp/ql/src/semmle/code/cpp/ir/implementation/aliased_ssa/IRBlock.qll +++ b/cpp/ql/src/semmle/code/cpp/ir/implementation/aliased_ssa/IRBlock.qll @@ -1,3 +1,7 @@ +/** + * Provides classes describing basic blocks in the IR of a function. + */ + private import internal.IRInternal import Instruction private import internal.IRBlockImports as Imports @@ -16,15 +20,23 @@ private import Cached * Most consumers should use the class `IRBlock`. */ class IRBlockBase extends TIRBlock { + /** Gets a textual representation of this block. */ final string toString() { result = getFirstInstruction(this).toString() } + /** Gets the source location of the first non-`Phi` instruction in this block. */ final Language::Location getLocation() { result = getFirstInstruction().getLocation() } + /** + * Gets a string that uniquely identifies this block within its enclosing function. + * + * This predicate is used by debugging and printing code only. + */ final string getUniqueId() { result = getFirstInstruction(this).getUniqueId() } /** - * Gets the zero-based index of the block within its function. This is used - * by debugging and printing code only. + * Gets the zero-based index of the block within its function. + * + * This predicate is used by debugging and printing code only. */ int getDisplayIndex() { exists(IRConfiguration::IRConfiguration config | @@ -42,27 +54,51 @@ class IRBlockBase extends TIRBlock { ) } + /** + * Gets the `index`th non-`Phi` instruction in this block. + */ final Instruction getInstruction(int index) { result = getInstruction(this, index) } + /** + * Get the `Phi` instructions that appear at the start of this block. + */ final PhiInstruction getAPhiInstruction() { Construction::getPhiInstructionBlockStart(result) = getFirstInstruction() } + /** + * Get the instructions in this block, including `Phi` instructions. + */ final Instruction getAnInstruction() { result = getInstruction(_) or result = getAPhiInstruction() } + /** + * Gets the first non-`Phi` instruction in this block. + */ final Instruction getFirstInstruction() { result = getFirstInstruction(this) } + /** + * Gets the last instruction in this block. + */ final Instruction getLastInstruction() { result = getInstruction(getInstructionCount() - 1) } + /** + * Gets the number of non-`Phi` instructions in this block. + */ final int getInstructionCount() { result = getInstructionCount(this) } + /** + * Gets the `IRFunction` that contains this block. + */ final IRFunction getEnclosingIRFunction() { result = getFirstInstruction(this).getEnclosingIRFunction() } + /** + * Gets the `Function` that contains this block. + */ final Language::Function getEnclosingFunction() { result = getFirstInstruction(this).getEnclosingFunction() } @@ -74,20 +110,57 @@ class IRBlockBase extends TIRBlock { * instruction of another block. */ class IRBlock extends IRBlockBase { + /** + * Gets the blocks to which control flows directly from this block. + */ final IRBlock getASuccessor() { blockSuccessor(this, result) } + /** + * Gets the blocks from which control flows directly to this block. + */ final IRBlock getAPredecessor() { blockSuccessor(result, this) } + /** + * Gets the block to which control flows directly from this block along an edge of kind `kind`. + */ final IRBlock getSuccessor(EdgeKind kind) { blockSuccessor(this, result, kind) } + /** + * Gets the block to which control flows directly from this block along a back edge of kind + * `kind`. + */ final IRBlock getBackEdgeSuccessor(EdgeKind kind) { backEdgeSuccessor(this, result, kind) } + /** + * Holds if this block immediately dominates `block`. + * + * Block `A` immediate dominates block `B` if block `A` strictly dominates block `B` and block `B` + * is a direct successor of block `A`. + */ final predicate immediatelyDominates(IRBlock block) { blockImmediatelyDominates(this, block) } + /** + * Holds if this block strictly dominates `block`. + * + * Block `A` strictly dominates block `B` if block `A` dominates block `B` and blocks `A` and `B` + * are not the same block. + */ final predicate strictlyDominates(IRBlock block) { blockImmediatelyDominates+(this, block) } + /** + * Holds if this block dominates `block`. + * + * Block `A` dominates block `B` if any control flow path from the entry block of the function to + * block `B` must pass through block `A`. A block always dominates itself. + */ final predicate dominates(IRBlock block) { strictlyDominates(block) or this = block } + /** + * Gets the set of blocks on the dominance frontier of this block. + * + * The dominance frontier of block `A` is the set of blocks `B` such that block `A` does not + * dominate block `B`, but block `A` does dominate an immediate predecessor of block `B`. + */ pragma[noinline] final IRBlock dominanceFrontier() { dominates(result.getAPredecessor()) and @@ -95,7 +168,7 @@ class IRBlock extends IRBlockBase { } /** - * Holds if this block is reachable from the entry point of its function + * Holds if this block is reachable from the entry block of its function. */ final predicate isReachableFromFunctionEntry() { this = getEnclosingIRFunction().getEntryBlock() or @@ -210,4 +283,4 @@ private module Cached { idominance(isEntryBlock/1, blockSuccessor/2)(_, dominator, block) } -Instruction getFirstInstruction(TIRBlock block) { block = MkIRBlock(result) } +private Instruction getFirstInstruction(TIRBlock block) { block = MkIRBlock(result) } diff --git a/cpp/ql/src/semmle/code/cpp/ir/implementation/aliased_ssa/IRFunction.qll b/cpp/ql/src/semmle/code/cpp/ir/implementation/aliased_ssa/IRFunction.qll index 6b2d32af48c..5968e58f90b 100644 --- a/cpp/ql/src/semmle/code/cpp/ir/implementation/aliased_ssa/IRFunction.qll +++ b/cpp/ql/src/semmle/code/cpp/ir/implementation/aliased_ssa/IRFunction.qll @@ -1,3 +1,8 @@ +/** + * Provides the class `IRFunction`, which represents the Intermediate Representation for the + * definition of a function. + */ + private import internal.IRInternal private import internal.IRFunctionImports as Imports import Imports::IRFunctionBase diff --git a/cpp/ql/src/semmle/code/cpp/ir/implementation/aliased_ssa/IRVariable.qll b/cpp/ql/src/semmle/code/cpp/ir/implementation/aliased_ssa/IRVariable.qll index a01bd2dc79a..d317421c242 100644 --- a/cpp/ql/src/semmle/code/cpp/ir/implementation/aliased_ssa/IRVariable.qll +++ b/cpp/ql/src/semmle/code/cpp/ir/implementation/aliased_ssa/IRVariable.qll @@ -1,3 +1,7 @@ +/** + * Provides classes that represent variables accessed by the IR. + */ + private import internal.IRInternal import IRFunction private import internal.IRVariableImports as Imports @@ -7,15 +11,11 @@ private import Imports::TTempVariableTag private import Imports::TIRVariable private import Imports::IRType -IRUserVariable getIRUserVariable(Language::Function func, Language::Variable var) { - result.getVariable() = var and - result.getEnclosingFunction() = func -} - /** - * A variable referenced by the IR for a function. The variable may be a user-declared variable - * (`IRUserVariable`) or a temporary variable generated by the AST-to-IR translation - * (`IRTempVariable`). + * A variable referenced by the IR for a function. + * + * The variable may be a user-declared variable (`IRUserVariable`) or a temporary variable generated + * by the AST-to-IR translation (`IRTempVariable`). */ class IRVariable extends TIRVariable { Language::Function func; @@ -27,6 +27,7 @@ class IRVariable extends TIRVariable { this = TIRDynamicInitializationFlag(func, _, _) } + /** Gets a textual representation of this element. */ string toString() { none() } /** @@ -162,20 +163,26 @@ class IRGeneratedVariable extends IRVariable { override string getUniqueId() { none() } + /** + * Gets a string containing the source code location of the AST that generated this variable. + * + * This is used by debugging and printing code only. + */ final string getLocationString() { result = ast.getLocation().getStartLine().toString() + ":" + ast.getLocation().getStartColumn().toString() } + /** + * Gets the string that is combined with the location of the variable to generate the string + * representation of this variable. + * + * This is used by debugging and printing code only. + */ string getBaseString() { none() } } -IRTempVariable getIRTempVariable(Language::AST ast, TempVariableTag tag) { - result.getAST() = ast and - result.getTag() = tag -} - /** * A temporary variable introduced by IR construction. The most common examples are the variable * generated to hold the return value of a function, or the variable generated to hold the result of @@ -190,6 +197,10 @@ class IRTempVariable extends IRGeneratedVariable, IRAutomaticVariable, TIRTempVa result = "Temp: " + Construction::getTempVariableUniqueId(this) } + /** + * Gets the "tag" object that differentiates this temporary variable from other temporary + * variables generated for the same AST. + */ final TempVariableTag getTag() { result = tag } override string getBaseString() { result = "#temp" } @@ -253,6 +264,9 @@ class IRStringLiteral extends IRGeneratedVariable, TIRStringLiteral { final override string getBaseString() { result = "#string" } + /** + * Gets the AST of the string literal represented by this `IRStringLiteral`. + */ final Language::StringLiteral getLiteral() { result = literal } } @@ -270,6 +284,9 @@ class IRDynamicInitializationFlag extends IRGeneratedVariable, TIRDynamicInitial final override string toString() { result = var.toString() + "#init" } + /** + * Gets variable whose initialization is guarded by this flag. + */ final Language::Variable getVariable() { result = var } final override string getUniqueId() { diff --git a/cpp/ql/src/semmle/code/cpp/ir/implementation/aliased_ssa/Instruction.qll b/cpp/ql/src/semmle/code/cpp/ir/implementation/aliased_ssa/Instruction.qll index 79516f6780d..0d2ad2d3bea 100644 --- a/cpp/ql/src/semmle/code/cpp/ir/implementation/aliased_ssa/Instruction.qll +++ b/cpp/ql/src/semmle/code/cpp/ir/implementation/aliased_ssa/Instruction.qll @@ -215,6 +215,15 @@ class Instruction extends Construction::TStageInstruction { result = Raw::getInstructionUnconvertedResultExpression(this) } + /** + * Gets the language-specific type of the result produced by this instruction. + * + * Most consumers of the IR should use `getResultIRType()` instead. `getResultIRType()` uses a + * less complex, language-neutral type system in which all semantically equivalent types share the + * same `IRType` instance. For example, in C++, four different `Instruction`s might have three + * different values for `getResultLanguageType()`: `unsigned int`, `char32_t`, and `wchar_t`, + * whereas all four instructions would have the same value for `getResultIRType()`, `uint4`. + */ final Language::LanguageType getResultLanguageType() { result = Construction::getInstructionResultType(this) } @@ -537,6 +546,18 @@ class VariableAddressInstruction extends VariableInstruction { VariableAddressInstruction() { getOpcode() instanceof Opcode::VariableAddress } } +/** + * An instruction that returns the address of a function. + * + * This instruction returns the address of a function, including non-member functions, static member + * functions, and non-static member functions. + * + * The result has an `IRFunctionAddress` type. + */ +class FunctionAddressInstruction extends FunctionInstruction { + FunctionAddressInstruction() { getOpcode() instanceof Opcode::FunctionAddress } +} + /** * An instruction that initializes a parameter of the enclosing function with the value of the * corresponding argument passed by the caller. @@ -553,6 +574,16 @@ class InitializeParameterInstruction extends VariableInstruction { final Language::Parameter getParameter() { result = var.(IRUserVariable).getVariable() } } +/** + * An instruction that initializes all memory that existed before this function was called. + * + * This instruction provides a definition for memory that, because it was actually allocated and + * initialized elsewhere, would not otherwise have a definition in this function. + */ +class InitializeNonLocalInstruction extends Instruction { + InitializeNonLocalInstruction() { getOpcode() instanceof Opcode::InitializeNonLocal } +} + /** * An instruction that initializes the memory pointed to by a parameter of the enclosing function * with the value of that memory on entry to the function. @@ -590,6 +621,25 @@ class FieldAddressInstruction extends FieldInstruction { final Instruction getObjectAddress() { result = getObjectAddressOperand().getDef() } } +/** + * An instruction that computes the address of the first element of a managed array. + * + * This instruction is used for element access to C# arrays. + */ +class ElementsAddressInstruction extends UnaryInstruction { + ElementsAddressInstruction() { getOpcode() instanceof Opcode::ElementsAddress } + + /** + * Gets the operand that provides the address of the array object. + */ + final UnaryOperand getArrayObjectAddressOperand() { result = getAnOperand() } + + /** + * Gets the instruction whose result provides the address of the array object. + */ + final Instruction getArrayObjectAddress() { result = getArrayObjectAddressOperand().getDef() } +} + /** * An instruction that produces a well-defined but unknown result and has * unknown side effects, including side effects that are not conservatively @@ -1137,8 +1187,14 @@ class PointerDiffInstruction extends PointerArithmeticInstruction { class UnaryInstruction extends Instruction { UnaryInstruction() { getOpcode() instanceof UnaryOpcode } + /** + * Gets the sole operand of this instruction. + */ final UnaryOperand getUnaryOperand() { result = getAnOperand() } + /** + * Gets the instruction whose result provides the sole operand of this instruction. + */ final Instruction getUnary() { result = getUnaryOperand().getDef() } } @@ -1177,6 +1233,19 @@ class CheckedConvertOrThrowInstruction extends UnaryInstruction { CheckedConvertOrThrowInstruction() { getOpcode() instanceof Opcode::CheckedConvertOrThrow } } +/** + * An instruction that returns the address of the complete object that contains the subobject + * pointed to by its operand. + * + * If the operand holds a null address, the result is a null address. + * + * This instruction is used to represent `dyanmic_cast` in C++, which returns the pointer to + * the most-derived object. + */ +class CompleteObjectAddressInstruction extends UnaryInstruction { + CompleteObjectAddressInstruction() { getOpcode() instanceof Opcode::CompleteObjectAddress } +} + /** * An instruction that converts the address of an object to the address of a different subobject of * the same object, without any type checking at runtime. @@ -1453,7 +1522,7 @@ class CallInstruction extends Instruction { * Gets the `Function` that the call targets, if this is statically known. */ final Language::Function getStaticCallTarget() { - result = getCallTarget().(FunctionInstruction).getFunctionSymbol() + result = getCallTarget().(FunctionAddressInstruction).getFunctionSymbol() } /** @@ -1516,9 +1585,10 @@ class CallSideEffectInstruction extends SideEffectInstruction { /** * An instruction representing the side effect of a function call on any memory - * that might be read by that call. This instruction is emitted instead of - * `CallSideEffectInstruction` when it's certain that the call target cannot - * write to escaped memory. + * that might be read by that call. + * + * This instruction is emitted instead of `CallSideEffectInstruction` when it is certain that the + * call target cannot write to escaped memory. */ class CallReadSideEffectInstruction extends SideEffectInstruction { CallReadSideEffectInstruction() { getOpcode() instanceof Opcode::CallReadSideEffect } @@ -1566,7 +1636,15 @@ class SizedBufferReadSideEffectInstruction extends ReadSideEffectInstruction { getOpcode() instanceof Opcode::SizedBufferReadSideEffect } - Instruction getSizeDef() { result = getAnOperand().(BufferSizeOperand).getDef() } + /** + * Gets the operand that holds the number of bytes read from the buffer. + */ + final BufferSizeOperand getBufferSizeOperand() { result = getAnOperand() } + + /** + * Gets the instruction whose result provides the number of bytes read from the buffer. + */ + final Instruction getBufferSize() { result = getBufferSizeOperand().getDef() } } /** @@ -1576,7 +1654,15 @@ class SizedBufferReadSideEffectInstruction extends ReadSideEffectInstruction { class WriteSideEffectInstruction extends SideEffectInstruction, IndexedInstruction { WriteSideEffectInstruction() { getOpcode() instanceof WriteSideEffectOpcode } - Instruction getArgumentDef() { result = getAnOperand().(AddressOperand).getDef() } + /** + * Get the operand that holds the address of the memory to be written. + */ + final AddressOperand getDestinationAddressOperand() { result = getAnOperand() } + + /** + * Gets the instruction whose result provides the address of the memory to be written. + */ + Instruction getDestinationAddress() { result = getDestinationAddressOperand().getDef() } } /** @@ -1607,11 +1693,20 @@ class SizedBufferMustWriteSideEffectInstruction extends WriteSideEffectInstructi getOpcode() instanceof Opcode::SizedBufferMustWriteSideEffect } - Instruction getSizeDef() { result = getAnOperand().(BufferSizeOperand).getDef() } + /** + * Gets the operand that holds the number of bytes written to the buffer. + */ + final BufferSizeOperand getBufferSizeOperand() { result = getAnOperand() } + + /** + * Gets the instruction whose result provides the number of bytes written to the buffer. + */ + final Instruction getBufferSize() { result = getBufferSizeOperand().getDef() } } /** * An instruction representing the potential write of an indirect parameter within a function call. + * * Unlike `IndirectWriteSideEffectInstruction`, the location might not be completely overwritten. * written. */ @@ -1623,6 +1718,7 @@ class IndirectMayWriteSideEffectInstruction extends WriteSideEffectInstruction { /** * An instruction representing the write of an indirect buffer parameter within a function call. + * * Unlike `BufferWriteSideEffectInstruction`, the buffer might not be completely overwritten. */ class BufferMayWriteSideEffectInstruction extends WriteSideEffectInstruction { @@ -1631,6 +1727,7 @@ class BufferMayWriteSideEffectInstruction extends WriteSideEffectInstruction { /** * An instruction representing the write of an indirect buffer parameter within a function call. + * * Unlike `BufferWriteSideEffectInstruction`, the buffer might not be completely overwritten. */ class SizedBufferMayWriteSideEffectInstruction extends WriteSideEffectInstruction { @@ -1638,11 +1735,19 @@ class SizedBufferMayWriteSideEffectInstruction extends WriteSideEffectInstructio getOpcode() instanceof Opcode::SizedBufferMayWriteSideEffect } - Instruction getSizeDef() { result = getAnOperand().(BufferSizeOperand).getDef() } + /** + * Gets the operand that holds the number of bytes written to the buffer. + */ + final BufferSizeOperand getBufferSizeOperand() { result = getAnOperand() } + + /** + * Gets the instruction whose result provides the number of bytes written to the buffer. + */ + final Instruction getBufferSize() { result = getBufferSizeOperand().getDef() } } /** - * An instruction representing the initial value of newly allocated memory, e.g. the result of a + * An instruction representing the initial value of newly allocated memory, such as the result of a * call to `malloc`. */ class InitializeDynamicAllocationInstruction extends SideEffectInstruction { @@ -1860,17 +1965,20 @@ class ChiInstruction extends Instruction { } /** - * An instruction representing unreachable code. Inserted in place of the original target - * instruction of a `ConditionalBranch` or `Switch` instruction where that particular edge is - * infeasible. + * An instruction representing unreachable code. + * + * This instruction is inserted in place of the original target instruction of a `ConditionalBranch` + * or `Switch` instruction where that particular edge is infeasible. */ class UnreachedInstruction extends Instruction { UnreachedInstruction() { getOpcode() instanceof Opcode::Unreached } } /** - * An instruction representing a built-in operation. This is used to represent - * operations such as access to variable argument lists. + * An instruction representing a built-in operation. + * + * This is used to represent a variety of intrinsic operations provided by the compiler + * implementation, such as vector arithmetic. */ class BuiltInOperationInstruction extends Instruction { Language::BuiltInOperation operation; @@ -1880,6 +1988,10 @@ class BuiltInOperationInstruction extends Instruction { operation = Raw::getInstructionBuiltInOperation(this) } + /** + * Gets the language-specific `BuildInOperation` object that specifies the operation that is + * performed by this instruction. + */ final Language::BuiltInOperation getBuiltInOperation() { result = operation } } @@ -1892,3 +2004,59 @@ class BuiltInInstruction extends BuiltInOperationInstruction { final override string getImmediateString() { result = getBuiltInOperation().toString() } } + +/** + * An instruction that returns a `va_list` to access the arguments passed to the `...` parameter. + * + * The operand specifies the address of the `IREllipsisVariable` used to represent the `...` + * parameter. The result is a `va_list` that initially refers to the first argument that was passed + * to the `...` parameter. + */ +class VarArgsStartInstruction extends UnaryInstruction { + VarArgsStartInstruction() { getOpcode() instanceof Opcode::VarArgsStart } +} + +/** + * An instruction that cleans up a `va_list` after it is no longer in use. + * + * The operand specifies the address of the `va_list` to clean up. This instruction does not return + * a result. + */ +class VarArgsEndInstruction extends UnaryInstruction { + VarArgsEndInstruction() { getOpcode() instanceof Opcode::VarArgsEnd } +} + +/** + * An instruction that returns the address of the argument currently pointed to by a `va_list`. + * + * The operand is the `va_list` that points to the argument. The result is the address of the + * argument. + */ +class VarArgInstruction extends UnaryInstruction { + VarArgInstruction() { getOpcode() instanceof Opcode::VarArg } +} + +/** + * An instruction that modifies a `va_list` to point to the next argument that was passed to the + * `...` parameter. + * + * The operand is the current `va_list`. The result is an updated `va_list` that points to the next + * argument of the `...` parameter. + */ +class NextVarArgInstruction extends UnaryInstruction { + NextVarArgInstruction() { getOpcode() instanceof Opcode::NextVarArg } +} + +/** + * An instruction that allocates a new object on the managed heap. + * + * This instruction is used to represent the allocation of a new object in C# using the `new` + * expression. This instruction does not invoke a constructor for the object. Instead, there will be + * a subsequent `Call` instruction to invoke the appropriate constructor directory, passing the + * result of the `NewObj` as the `this` argument. + * + * The result is the address of the newly allocated object. + */ +class NewObjInstruction extends Instruction { + NewObjInstruction() { getOpcode() instanceof Opcode::NewObj } +} diff --git a/cpp/ql/src/semmle/code/cpp/ir/implementation/aliased_ssa/Operand.qll b/cpp/ql/src/semmle/code/cpp/ir/implementation/aliased_ssa/Operand.qll index f82704094c8..468687b0aca 100644 --- a/cpp/ql/src/semmle/code/cpp/ir/implementation/aliased_ssa/Operand.qll +++ b/cpp/ql/src/semmle/code/cpp/ir/implementation/aliased_ssa/Operand.qll @@ -1,3 +1,7 @@ +/** + * Provides classes that represent the input values of IR instructions. + */ + private import internal.IRInternal private import Instruction private import IRBlock @@ -78,10 +82,17 @@ private PhiOperandBase phiOperand( * A source operand of an `Instruction`. The operand represents a value consumed by the instruction. */ class Operand extends TOperand { + /** Gets a textual representation of this element. */ string toString() { result = "Operand" } + /** + * Gets the location of the source code for this operand. + */ final Language::Location getLocation() { result = getUse().getLocation() } + /** + * Gets the function that contains this operand. + */ final IRFunction getEnclosingIRFunction() { result = getUse().getEnclosingIRFunction() } /** @@ -270,6 +281,9 @@ class NonPhiOperand extends Operand { final override int getDumpSortOrder() { result = tag.getSortOrder() } + /** + * Gets the `OperandTag` that specifies how this operand is used by its `Instruction`. + */ final OperandTag getOperandTag() { result = tag } } @@ -292,6 +306,9 @@ class RegisterOperand extends NonPhiOperand, RegisterOperandBase { } } +/** + * A memory operand other than the operand of a `Phi` instruction. + */ class NonPhiMemoryOperand extends NonPhiOperand, MemoryOperand, NonPhiMemoryOperandBase { override MemoryOperandTag tag; @@ -313,6 +330,9 @@ class NonPhiMemoryOperand extends NonPhiOperand, MemoryOperand, NonPhiMemoryOper } } +/** + * A memory operand whose type may be different from the type of the result of its definition. + */ class TypedOperand extends NonPhiMemoryOperand { override TypedOperandTag tag; @@ -416,6 +436,9 @@ class PositionalArgumentOperand extends ArgumentOperand { final int getIndex() { result = tag.getArgIndex() } } +/** + * An operand representing memory read as a side effect of evaluating another instruction. + */ class SideEffectOperand extends TypedOperand { override SideEffectOperandTag tag; } diff --git a/cpp/ql/src/semmle/code/cpp/ir/implementation/aliased_ssa/PrintIR.qll b/cpp/ql/src/semmle/code/cpp/ir/implementation/aliased_ssa/PrintIR.qll index d9c0df44e12..b3e3a5b1195 100644 --- a/cpp/ql/src/semmle/code/cpp/ir/implementation/aliased_ssa/PrintIR.qll +++ b/cpp/ql/src/semmle/code/cpp/ir/implementation/aliased_ssa/PrintIR.qll @@ -1,3 +1,13 @@ +/** + * Outputs a representation of the IR as a control flow graph. + * + * This file contains the actual implementation of `PrintIR.ql`. For test cases and very small + * databases, `PrintIR.ql` can be run directly to dump the IR for the entire database. For most + * uses, however, it is better to write a query that imports `PrintIR.qll`, extends + * `PrintIRConfiguration`, and overrides `shouldPrintFunction()` to select a subset of functions to + * dump. + */ + private import internal.IRInternal private import IR private import internal.PrintIRImports as Imports @@ -9,6 +19,7 @@ private newtype TPrintIRConfiguration = MkPrintIRConfiguration() * The query can extend this class to control which functions are printed. */ class PrintIRConfiguration extends TPrintIRConfiguration { + /** Gets a textual representation of this configuration. */ string toString() { result = "PrintIRConfiguration" } /** @@ -47,7 +58,7 @@ private newtype TPrintableIRNode = /** * A node to be emitted in the IR graph. */ -abstract class PrintableIRNode extends TPrintableIRNode { +abstract private class PrintableIRNode extends TPrintableIRNode { abstract string toString(); /** @@ -98,7 +109,7 @@ abstract class PrintableIRNode extends TPrintableIRNode { /** * An IR graph node representing a `IRFunction` object. */ -class PrintableIRFunction extends PrintableIRNode, TPrintableIRFunction { +private class PrintableIRFunction extends PrintableIRNode, TPrintableIRFunction { IRFunction irFunc; PrintableIRFunction() { this = TPrintableIRFunction(irFunc) } @@ -129,7 +140,7 @@ class PrintableIRFunction extends PrintableIRNode, TPrintableIRFunction { /** * An IR graph node representing an `IRBlock` object. */ -class PrintableIRBlock extends PrintableIRNode, TPrintableIRBlock { +private class PrintableIRBlock extends PrintableIRNode, TPrintableIRBlock { IRBlock block; PrintableIRBlock() { this = TPrintableIRBlock(block) } @@ -161,7 +172,7 @@ class PrintableIRBlock extends PrintableIRNode, TPrintableIRBlock { /** * An IR graph node representing an `Instruction`. */ -class PrintableInstruction extends PrintableIRNode, TPrintableInstruction { +private class PrintableInstruction extends PrintableIRNode, TPrintableInstruction { Instruction instr; PrintableInstruction() { this = TPrintableInstruction(instr) } @@ -224,6 +235,9 @@ private string getPaddingString(int n) { n > 0 and n <= maxColumnWidth() and result = getPaddingString(n - 1) + " " } +/** + * Holds if `node` belongs to the output graph, and its property `key` has the given `value`. + */ query predicate nodes(PrintableIRNode node, string key, string value) { value = node.getProperty(key) } @@ -237,6 +251,10 @@ private int getSuccessorIndex(IRBlock pred, IRBlock succ) { ) } +/** + * Holds if the output graph contains an edge from `pred` to `succ`, and that edge's property `key` + * has the given `value`. + */ query predicate edges(PrintableIRBlock pred, PrintableIRBlock succ, string key, string value) { exists(EdgeKind kind, IRBlock predBlock, IRBlock succBlock | predBlock = pred.getBlock() and @@ -256,6 +274,9 @@ query predicate edges(PrintableIRBlock pred, PrintableIRBlock succ, string key, ) } +/** + * Holds if `parent` is the parent node of `child` in the output graph. + */ query predicate parents(PrintableIRNode child, PrintableIRNode parent) { parent = child.getParent() } diff --git a/cpp/ql/src/semmle/code/cpp/ir/implementation/raw/IR.qll b/cpp/ql/src/semmle/code/cpp/ir/implementation/raw/IR.qll index badd48552a5..3fa0f1b78be 100644 --- a/cpp/ql/src/semmle/code/cpp/ir/implementation/raw/IR.qll +++ b/cpp/ql/src/semmle/code/cpp/ir/implementation/raw/IR.qll @@ -1,3 +1,47 @@ +/** + * Provides classes that describe the Intermediate Representation (IR) of the program. + * + * The IR is a representation of the semantics of the program, with very little dependence on the + * syntax that was used to write the program. For example, in C++, the statements `i += 1;`, `i++`, + * and `++i` all have the same semantic effect, but appear in the AST as three different types of + * `Expr` node. In the IR, all three statements are broken down into a sequence of fundamental + * operations similar to: + * + * ``` + * r1(int*) = VariableAddress[i] // Compute the address of variable `i` + * r2(int) = Load &:r1, m0 // Load the value of `i` + * r3(int) = Constant[1] // An integer constant with the value `1` + * r4(int) = Add r2, r3 // Add `1` to the value of `i` + * r5(int) = Store &r1, r4 // Store the new value back into the variable `i` + * ``` + * + * This allows IR-based analysis to focus on the fundamental operations, rather than having to be + * concerned with the various ways of expressing those operations in source code. + * + * The key classes in the IR are: + * + * - `IRFunction` - Contains the IR for an entire function definition, including all of that + * function's `Instruction`s, `IRBlock`s, and `IRVariables`. + * - `Instruction` - A single operation in the IR. An instruction specifies the operation to be + * performed, the operands that produce the inputs to that operation, and the type of the result + * of the operation. Control flows from an `Instruction` to one of a set of successor + * `Instruction`s. + * - `Operand` - An input value of an `Instruction`. All inputs of an `Instruction` are explicitly + * represented as `Operand`s, even if the input was implicit in the source code. An `Operand` has + * a link to the `Instruction` that consumes its value (its "use") and a link to the `Instruction` + * that produces its value (its "definition"). + * - `IRVariable` - A variable accessed by the IR for a particular function. An `IRVariable` is + * created for each variable directly accessed by the function. In addition, `IRVariable`s are + * created to represent certain temporary storage locations that do not have explicitly declared + * variables in the source code, such as the return value of the function. + * - `IRBlock` - A "basic block" in the control flow graph of a function. An `IRBlock` contains a + * sequence of instructions such that control flow can only enter the block at the first + * instruction, and can only leave the block from the last instruction. + * - `IRType` - The type of a value accessed in the IR. Unlike the `Type` class in the AST, `IRType` + * is language-neutral. For example, in C++, `unsigned int`, `char32_t`, and `wchar_t` might all + * be represented as the `IRType` `uint4`, a four-byte unsigned integer. + */ + import IRFunction import Instruction import IRBlock @@ -11,11 +55,12 @@ import Imports::MemoryAccessKind private newtype TIRPropertyProvider = MkIRPropertyProvider() /** - * Class that provides additional properties to be dumped for IR instructions and blocks when using + * A class that provides additional properties to be dumped for IR instructions and blocks when using * the PrintIR module. Libraries that compute additional facts about IR elements can extend the * single instance of this class to specify the additional properties computed by the library. */ class IRPropertyProvider extends TIRPropertyProvider { + /** Gets a textual representation of this element. */ string toString() { result = "IRPropertyProvider" } /** diff --git a/cpp/ql/src/semmle/code/cpp/ir/implementation/raw/IRBlock.qll b/cpp/ql/src/semmle/code/cpp/ir/implementation/raw/IRBlock.qll index 94ef73b2769..f0ec0683bd6 100644 --- a/cpp/ql/src/semmle/code/cpp/ir/implementation/raw/IRBlock.qll +++ b/cpp/ql/src/semmle/code/cpp/ir/implementation/raw/IRBlock.qll @@ -1,3 +1,7 @@ +/** + * Provides classes describing basic blocks in the IR of a function. + */ + private import internal.IRInternal import Instruction private import internal.IRBlockImports as Imports @@ -16,15 +20,23 @@ private import Cached * Most consumers should use the class `IRBlock`. */ class IRBlockBase extends TIRBlock { + /** Gets a textual representation of this block. */ final string toString() { result = getFirstInstruction(this).toString() } + /** Gets the source location of the first non-`Phi` instruction in this block. */ final Language::Location getLocation() { result = getFirstInstruction().getLocation() } + /** + * Gets a string that uniquely identifies this block within its enclosing function. + * + * This predicate is used by debugging and printing code only. + */ final string getUniqueId() { result = getFirstInstruction(this).getUniqueId() } /** - * Gets the zero-based index of the block within its function. This is used - * by debugging and printing code only. + * Gets the zero-based index of the block within its function. + * + * This predicate is used by debugging and printing code only. */ int getDisplayIndex() { exists(IRConfiguration::IRConfiguration config | @@ -42,27 +54,51 @@ class IRBlockBase extends TIRBlock { ) } + /** + * Gets the `index`th non-`Phi` instruction in this block. + */ final Instruction getInstruction(int index) { result = getInstruction(this, index) } + /** + * Get the `Phi` instructions that appear at the start of this block. + */ final PhiInstruction getAPhiInstruction() { Construction::getPhiInstructionBlockStart(result) = getFirstInstruction() } + /** + * Get the instructions in this block, including `Phi` instructions. + */ final Instruction getAnInstruction() { result = getInstruction(_) or result = getAPhiInstruction() } + /** + * Gets the first non-`Phi` instruction in this block. + */ final Instruction getFirstInstruction() { result = getFirstInstruction(this) } + /** + * Gets the last instruction in this block. + */ final Instruction getLastInstruction() { result = getInstruction(getInstructionCount() - 1) } + /** + * Gets the number of non-`Phi` instructions in this block. + */ final int getInstructionCount() { result = getInstructionCount(this) } + /** + * Gets the `IRFunction` that contains this block. + */ final IRFunction getEnclosingIRFunction() { result = getFirstInstruction(this).getEnclosingIRFunction() } + /** + * Gets the `Function` that contains this block. + */ final Language::Function getEnclosingFunction() { result = getFirstInstruction(this).getEnclosingFunction() } @@ -74,20 +110,57 @@ class IRBlockBase extends TIRBlock { * instruction of another block. */ class IRBlock extends IRBlockBase { + /** + * Gets the blocks to which control flows directly from this block. + */ final IRBlock getASuccessor() { blockSuccessor(this, result) } + /** + * Gets the blocks from which control flows directly to this block. + */ final IRBlock getAPredecessor() { blockSuccessor(result, this) } + /** + * Gets the block to which control flows directly from this block along an edge of kind `kind`. + */ final IRBlock getSuccessor(EdgeKind kind) { blockSuccessor(this, result, kind) } + /** + * Gets the block to which control flows directly from this block along a back edge of kind + * `kind`. + */ final IRBlock getBackEdgeSuccessor(EdgeKind kind) { backEdgeSuccessor(this, result, kind) } + /** + * Holds if this block immediately dominates `block`. + * + * Block `A` immediate dominates block `B` if block `A` strictly dominates block `B` and block `B` + * is a direct successor of block `A`. + */ final predicate immediatelyDominates(IRBlock block) { blockImmediatelyDominates(this, block) } + /** + * Holds if this block strictly dominates `block`. + * + * Block `A` strictly dominates block `B` if block `A` dominates block `B` and blocks `A` and `B` + * are not the same block. + */ final predicate strictlyDominates(IRBlock block) { blockImmediatelyDominates+(this, block) } + /** + * Holds if this block dominates `block`. + * + * Block `A` dominates block `B` if any control flow path from the entry block of the function to + * block `B` must pass through block `A`. A block always dominates itself. + */ final predicate dominates(IRBlock block) { strictlyDominates(block) or this = block } + /** + * Gets the set of blocks on the dominance frontier of this block. + * + * The dominance frontier of block `A` is the set of blocks `B` such that block `A` does not + * dominate block `B`, but block `A` does dominate an immediate predecessor of block `B`. + */ pragma[noinline] final IRBlock dominanceFrontier() { dominates(result.getAPredecessor()) and @@ -95,7 +168,7 @@ class IRBlock extends IRBlockBase { } /** - * Holds if this block is reachable from the entry point of its function + * Holds if this block is reachable from the entry block of its function. */ final predicate isReachableFromFunctionEntry() { this = getEnclosingIRFunction().getEntryBlock() or @@ -210,4 +283,4 @@ private module Cached { idominance(isEntryBlock/1, blockSuccessor/2)(_, dominator, block) } -Instruction getFirstInstruction(TIRBlock block) { block = MkIRBlock(result) } +private Instruction getFirstInstruction(TIRBlock block) { block = MkIRBlock(result) } diff --git a/cpp/ql/src/semmle/code/cpp/ir/implementation/raw/IRFunction.qll b/cpp/ql/src/semmle/code/cpp/ir/implementation/raw/IRFunction.qll index 6b2d32af48c..5968e58f90b 100644 --- a/cpp/ql/src/semmle/code/cpp/ir/implementation/raw/IRFunction.qll +++ b/cpp/ql/src/semmle/code/cpp/ir/implementation/raw/IRFunction.qll @@ -1,3 +1,8 @@ +/** + * Provides the class `IRFunction`, which represents the Intermediate Representation for the + * definition of a function. + */ + private import internal.IRInternal private import internal.IRFunctionImports as Imports import Imports::IRFunctionBase diff --git a/cpp/ql/src/semmle/code/cpp/ir/implementation/raw/IRVariable.qll b/cpp/ql/src/semmle/code/cpp/ir/implementation/raw/IRVariable.qll index a01bd2dc79a..d317421c242 100644 --- a/cpp/ql/src/semmle/code/cpp/ir/implementation/raw/IRVariable.qll +++ b/cpp/ql/src/semmle/code/cpp/ir/implementation/raw/IRVariable.qll @@ -1,3 +1,7 @@ +/** + * Provides classes that represent variables accessed by the IR. + */ + private import internal.IRInternal import IRFunction private import internal.IRVariableImports as Imports @@ -7,15 +11,11 @@ private import Imports::TTempVariableTag private import Imports::TIRVariable private import Imports::IRType -IRUserVariable getIRUserVariable(Language::Function func, Language::Variable var) { - result.getVariable() = var and - result.getEnclosingFunction() = func -} - /** - * A variable referenced by the IR for a function. The variable may be a user-declared variable - * (`IRUserVariable`) or a temporary variable generated by the AST-to-IR translation - * (`IRTempVariable`). + * A variable referenced by the IR for a function. + * + * The variable may be a user-declared variable (`IRUserVariable`) or a temporary variable generated + * by the AST-to-IR translation (`IRTempVariable`). */ class IRVariable extends TIRVariable { Language::Function func; @@ -27,6 +27,7 @@ class IRVariable extends TIRVariable { this = TIRDynamicInitializationFlag(func, _, _) } + /** Gets a textual representation of this element. */ string toString() { none() } /** @@ -162,20 +163,26 @@ class IRGeneratedVariable extends IRVariable { override string getUniqueId() { none() } + /** + * Gets a string containing the source code location of the AST that generated this variable. + * + * This is used by debugging and printing code only. + */ final string getLocationString() { result = ast.getLocation().getStartLine().toString() + ":" + ast.getLocation().getStartColumn().toString() } + /** + * Gets the string that is combined with the location of the variable to generate the string + * representation of this variable. + * + * This is used by debugging and printing code only. + */ string getBaseString() { none() } } -IRTempVariable getIRTempVariable(Language::AST ast, TempVariableTag tag) { - result.getAST() = ast and - result.getTag() = tag -} - /** * A temporary variable introduced by IR construction. The most common examples are the variable * generated to hold the return value of a function, or the variable generated to hold the result of @@ -190,6 +197,10 @@ class IRTempVariable extends IRGeneratedVariable, IRAutomaticVariable, TIRTempVa result = "Temp: " + Construction::getTempVariableUniqueId(this) } + /** + * Gets the "tag" object that differentiates this temporary variable from other temporary + * variables generated for the same AST. + */ final TempVariableTag getTag() { result = tag } override string getBaseString() { result = "#temp" } @@ -253,6 +264,9 @@ class IRStringLiteral extends IRGeneratedVariable, TIRStringLiteral { final override string getBaseString() { result = "#string" } + /** + * Gets the AST of the string literal represented by this `IRStringLiteral`. + */ final Language::StringLiteral getLiteral() { result = literal } } @@ -270,6 +284,9 @@ class IRDynamicInitializationFlag extends IRGeneratedVariable, TIRDynamicInitial final override string toString() { result = var.toString() + "#init" } + /** + * Gets variable whose initialization is guarded by this flag. + */ final Language::Variable getVariable() { result = var } final override string getUniqueId() { diff --git a/cpp/ql/src/semmle/code/cpp/ir/implementation/raw/Instruction.qll b/cpp/ql/src/semmle/code/cpp/ir/implementation/raw/Instruction.qll index 79516f6780d..0d2ad2d3bea 100644 --- a/cpp/ql/src/semmle/code/cpp/ir/implementation/raw/Instruction.qll +++ b/cpp/ql/src/semmle/code/cpp/ir/implementation/raw/Instruction.qll @@ -215,6 +215,15 @@ class Instruction extends Construction::TStageInstruction { result = Raw::getInstructionUnconvertedResultExpression(this) } + /** + * Gets the language-specific type of the result produced by this instruction. + * + * Most consumers of the IR should use `getResultIRType()` instead. `getResultIRType()` uses a + * less complex, language-neutral type system in which all semantically equivalent types share the + * same `IRType` instance. For example, in C++, four different `Instruction`s might have three + * different values for `getResultLanguageType()`: `unsigned int`, `char32_t`, and `wchar_t`, + * whereas all four instructions would have the same value for `getResultIRType()`, `uint4`. + */ final Language::LanguageType getResultLanguageType() { result = Construction::getInstructionResultType(this) } @@ -537,6 +546,18 @@ class VariableAddressInstruction extends VariableInstruction { VariableAddressInstruction() { getOpcode() instanceof Opcode::VariableAddress } } +/** + * An instruction that returns the address of a function. + * + * This instruction returns the address of a function, including non-member functions, static member + * functions, and non-static member functions. + * + * The result has an `IRFunctionAddress` type. + */ +class FunctionAddressInstruction extends FunctionInstruction { + FunctionAddressInstruction() { getOpcode() instanceof Opcode::FunctionAddress } +} + /** * An instruction that initializes a parameter of the enclosing function with the value of the * corresponding argument passed by the caller. @@ -553,6 +574,16 @@ class InitializeParameterInstruction extends VariableInstruction { final Language::Parameter getParameter() { result = var.(IRUserVariable).getVariable() } } +/** + * An instruction that initializes all memory that existed before this function was called. + * + * This instruction provides a definition for memory that, because it was actually allocated and + * initialized elsewhere, would not otherwise have a definition in this function. + */ +class InitializeNonLocalInstruction extends Instruction { + InitializeNonLocalInstruction() { getOpcode() instanceof Opcode::InitializeNonLocal } +} + /** * An instruction that initializes the memory pointed to by a parameter of the enclosing function * with the value of that memory on entry to the function. @@ -590,6 +621,25 @@ class FieldAddressInstruction extends FieldInstruction { final Instruction getObjectAddress() { result = getObjectAddressOperand().getDef() } } +/** + * An instruction that computes the address of the first element of a managed array. + * + * This instruction is used for element access to C# arrays. + */ +class ElementsAddressInstruction extends UnaryInstruction { + ElementsAddressInstruction() { getOpcode() instanceof Opcode::ElementsAddress } + + /** + * Gets the operand that provides the address of the array object. + */ + final UnaryOperand getArrayObjectAddressOperand() { result = getAnOperand() } + + /** + * Gets the instruction whose result provides the address of the array object. + */ + final Instruction getArrayObjectAddress() { result = getArrayObjectAddressOperand().getDef() } +} + /** * An instruction that produces a well-defined but unknown result and has * unknown side effects, including side effects that are not conservatively @@ -1137,8 +1187,14 @@ class PointerDiffInstruction extends PointerArithmeticInstruction { class UnaryInstruction extends Instruction { UnaryInstruction() { getOpcode() instanceof UnaryOpcode } + /** + * Gets the sole operand of this instruction. + */ final UnaryOperand getUnaryOperand() { result = getAnOperand() } + /** + * Gets the instruction whose result provides the sole operand of this instruction. + */ final Instruction getUnary() { result = getUnaryOperand().getDef() } } @@ -1177,6 +1233,19 @@ class CheckedConvertOrThrowInstruction extends UnaryInstruction { CheckedConvertOrThrowInstruction() { getOpcode() instanceof Opcode::CheckedConvertOrThrow } } +/** + * An instruction that returns the address of the complete object that contains the subobject + * pointed to by its operand. + * + * If the operand holds a null address, the result is a null address. + * + * This instruction is used to represent `dyanmic_cast` in C++, which returns the pointer to + * the most-derived object. + */ +class CompleteObjectAddressInstruction extends UnaryInstruction { + CompleteObjectAddressInstruction() { getOpcode() instanceof Opcode::CompleteObjectAddress } +} + /** * An instruction that converts the address of an object to the address of a different subobject of * the same object, without any type checking at runtime. @@ -1453,7 +1522,7 @@ class CallInstruction extends Instruction { * Gets the `Function` that the call targets, if this is statically known. */ final Language::Function getStaticCallTarget() { - result = getCallTarget().(FunctionInstruction).getFunctionSymbol() + result = getCallTarget().(FunctionAddressInstruction).getFunctionSymbol() } /** @@ -1516,9 +1585,10 @@ class CallSideEffectInstruction extends SideEffectInstruction { /** * An instruction representing the side effect of a function call on any memory - * that might be read by that call. This instruction is emitted instead of - * `CallSideEffectInstruction` when it's certain that the call target cannot - * write to escaped memory. + * that might be read by that call. + * + * This instruction is emitted instead of `CallSideEffectInstruction` when it is certain that the + * call target cannot write to escaped memory. */ class CallReadSideEffectInstruction extends SideEffectInstruction { CallReadSideEffectInstruction() { getOpcode() instanceof Opcode::CallReadSideEffect } @@ -1566,7 +1636,15 @@ class SizedBufferReadSideEffectInstruction extends ReadSideEffectInstruction { getOpcode() instanceof Opcode::SizedBufferReadSideEffect } - Instruction getSizeDef() { result = getAnOperand().(BufferSizeOperand).getDef() } + /** + * Gets the operand that holds the number of bytes read from the buffer. + */ + final BufferSizeOperand getBufferSizeOperand() { result = getAnOperand() } + + /** + * Gets the instruction whose result provides the number of bytes read from the buffer. + */ + final Instruction getBufferSize() { result = getBufferSizeOperand().getDef() } } /** @@ -1576,7 +1654,15 @@ class SizedBufferReadSideEffectInstruction extends ReadSideEffectInstruction { class WriteSideEffectInstruction extends SideEffectInstruction, IndexedInstruction { WriteSideEffectInstruction() { getOpcode() instanceof WriteSideEffectOpcode } - Instruction getArgumentDef() { result = getAnOperand().(AddressOperand).getDef() } + /** + * Get the operand that holds the address of the memory to be written. + */ + final AddressOperand getDestinationAddressOperand() { result = getAnOperand() } + + /** + * Gets the instruction whose result provides the address of the memory to be written. + */ + Instruction getDestinationAddress() { result = getDestinationAddressOperand().getDef() } } /** @@ -1607,11 +1693,20 @@ class SizedBufferMustWriteSideEffectInstruction extends WriteSideEffectInstructi getOpcode() instanceof Opcode::SizedBufferMustWriteSideEffect } - Instruction getSizeDef() { result = getAnOperand().(BufferSizeOperand).getDef() } + /** + * Gets the operand that holds the number of bytes written to the buffer. + */ + final BufferSizeOperand getBufferSizeOperand() { result = getAnOperand() } + + /** + * Gets the instruction whose result provides the number of bytes written to the buffer. + */ + final Instruction getBufferSize() { result = getBufferSizeOperand().getDef() } } /** * An instruction representing the potential write of an indirect parameter within a function call. + * * Unlike `IndirectWriteSideEffectInstruction`, the location might not be completely overwritten. * written. */ @@ -1623,6 +1718,7 @@ class IndirectMayWriteSideEffectInstruction extends WriteSideEffectInstruction { /** * An instruction representing the write of an indirect buffer parameter within a function call. + * * Unlike `BufferWriteSideEffectInstruction`, the buffer might not be completely overwritten. */ class BufferMayWriteSideEffectInstruction extends WriteSideEffectInstruction { @@ -1631,6 +1727,7 @@ class BufferMayWriteSideEffectInstruction extends WriteSideEffectInstruction { /** * An instruction representing the write of an indirect buffer parameter within a function call. + * * Unlike `BufferWriteSideEffectInstruction`, the buffer might not be completely overwritten. */ class SizedBufferMayWriteSideEffectInstruction extends WriteSideEffectInstruction { @@ -1638,11 +1735,19 @@ class SizedBufferMayWriteSideEffectInstruction extends WriteSideEffectInstructio getOpcode() instanceof Opcode::SizedBufferMayWriteSideEffect } - Instruction getSizeDef() { result = getAnOperand().(BufferSizeOperand).getDef() } + /** + * Gets the operand that holds the number of bytes written to the buffer. + */ + final BufferSizeOperand getBufferSizeOperand() { result = getAnOperand() } + + /** + * Gets the instruction whose result provides the number of bytes written to the buffer. + */ + final Instruction getBufferSize() { result = getBufferSizeOperand().getDef() } } /** - * An instruction representing the initial value of newly allocated memory, e.g. the result of a + * An instruction representing the initial value of newly allocated memory, such as the result of a * call to `malloc`. */ class InitializeDynamicAllocationInstruction extends SideEffectInstruction { @@ -1860,17 +1965,20 @@ class ChiInstruction extends Instruction { } /** - * An instruction representing unreachable code. Inserted in place of the original target - * instruction of a `ConditionalBranch` or `Switch` instruction where that particular edge is - * infeasible. + * An instruction representing unreachable code. + * + * This instruction is inserted in place of the original target instruction of a `ConditionalBranch` + * or `Switch` instruction where that particular edge is infeasible. */ class UnreachedInstruction extends Instruction { UnreachedInstruction() { getOpcode() instanceof Opcode::Unreached } } /** - * An instruction representing a built-in operation. This is used to represent - * operations such as access to variable argument lists. + * An instruction representing a built-in operation. + * + * This is used to represent a variety of intrinsic operations provided by the compiler + * implementation, such as vector arithmetic. */ class BuiltInOperationInstruction extends Instruction { Language::BuiltInOperation operation; @@ -1880,6 +1988,10 @@ class BuiltInOperationInstruction extends Instruction { operation = Raw::getInstructionBuiltInOperation(this) } + /** + * Gets the language-specific `BuildInOperation` object that specifies the operation that is + * performed by this instruction. + */ final Language::BuiltInOperation getBuiltInOperation() { result = operation } } @@ -1892,3 +2004,59 @@ class BuiltInInstruction extends BuiltInOperationInstruction { final override string getImmediateString() { result = getBuiltInOperation().toString() } } + +/** + * An instruction that returns a `va_list` to access the arguments passed to the `...` parameter. + * + * The operand specifies the address of the `IREllipsisVariable` used to represent the `...` + * parameter. The result is a `va_list` that initially refers to the first argument that was passed + * to the `...` parameter. + */ +class VarArgsStartInstruction extends UnaryInstruction { + VarArgsStartInstruction() { getOpcode() instanceof Opcode::VarArgsStart } +} + +/** + * An instruction that cleans up a `va_list` after it is no longer in use. + * + * The operand specifies the address of the `va_list` to clean up. This instruction does not return + * a result. + */ +class VarArgsEndInstruction extends UnaryInstruction { + VarArgsEndInstruction() { getOpcode() instanceof Opcode::VarArgsEnd } +} + +/** + * An instruction that returns the address of the argument currently pointed to by a `va_list`. + * + * The operand is the `va_list` that points to the argument. The result is the address of the + * argument. + */ +class VarArgInstruction extends UnaryInstruction { + VarArgInstruction() { getOpcode() instanceof Opcode::VarArg } +} + +/** + * An instruction that modifies a `va_list` to point to the next argument that was passed to the + * `...` parameter. + * + * The operand is the current `va_list`. The result is an updated `va_list` that points to the next + * argument of the `...` parameter. + */ +class NextVarArgInstruction extends UnaryInstruction { + NextVarArgInstruction() { getOpcode() instanceof Opcode::NextVarArg } +} + +/** + * An instruction that allocates a new object on the managed heap. + * + * This instruction is used to represent the allocation of a new object in C# using the `new` + * expression. This instruction does not invoke a constructor for the object. Instead, there will be + * a subsequent `Call` instruction to invoke the appropriate constructor directory, passing the + * result of the `NewObj` as the `this` argument. + * + * The result is the address of the newly allocated object. + */ +class NewObjInstruction extends Instruction { + NewObjInstruction() { getOpcode() instanceof Opcode::NewObj } +} diff --git a/cpp/ql/src/semmle/code/cpp/ir/implementation/raw/Operand.qll b/cpp/ql/src/semmle/code/cpp/ir/implementation/raw/Operand.qll index f82704094c8..468687b0aca 100644 --- a/cpp/ql/src/semmle/code/cpp/ir/implementation/raw/Operand.qll +++ b/cpp/ql/src/semmle/code/cpp/ir/implementation/raw/Operand.qll @@ -1,3 +1,7 @@ +/** + * Provides classes that represent the input values of IR instructions. + */ + private import internal.IRInternal private import Instruction private import IRBlock @@ -78,10 +82,17 @@ private PhiOperandBase phiOperand( * A source operand of an `Instruction`. The operand represents a value consumed by the instruction. */ class Operand extends TOperand { + /** Gets a textual representation of this element. */ string toString() { result = "Operand" } + /** + * Gets the location of the source code for this operand. + */ final Language::Location getLocation() { result = getUse().getLocation() } + /** + * Gets the function that contains this operand. + */ final IRFunction getEnclosingIRFunction() { result = getUse().getEnclosingIRFunction() } /** @@ -270,6 +281,9 @@ class NonPhiOperand extends Operand { final override int getDumpSortOrder() { result = tag.getSortOrder() } + /** + * Gets the `OperandTag` that specifies how this operand is used by its `Instruction`. + */ final OperandTag getOperandTag() { result = tag } } @@ -292,6 +306,9 @@ class RegisterOperand extends NonPhiOperand, RegisterOperandBase { } } +/** + * A memory operand other than the operand of a `Phi` instruction. + */ class NonPhiMemoryOperand extends NonPhiOperand, MemoryOperand, NonPhiMemoryOperandBase { override MemoryOperandTag tag; @@ -313,6 +330,9 @@ class NonPhiMemoryOperand extends NonPhiOperand, MemoryOperand, NonPhiMemoryOper } } +/** + * A memory operand whose type may be different from the type of the result of its definition. + */ class TypedOperand extends NonPhiMemoryOperand { override TypedOperandTag tag; @@ -416,6 +436,9 @@ class PositionalArgumentOperand extends ArgumentOperand { final int getIndex() { result = tag.getArgIndex() } } +/** + * An operand representing memory read as a side effect of evaluating another instruction. + */ class SideEffectOperand extends TypedOperand { override SideEffectOperandTag tag; } diff --git a/cpp/ql/src/semmle/code/cpp/ir/implementation/raw/PrintIR.qll b/cpp/ql/src/semmle/code/cpp/ir/implementation/raw/PrintIR.qll index d9c0df44e12..b3e3a5b1195 100644 --- a/cpp/ql/src/semmle/code/cpp/ir/implementation/raw/PrintIR.qll +++ b/cpp/ql/src/semmle/code/cpp/ir/implementation/raw/PrintIR.qll @@ -1,3 +1,13 @@ +/** + * Outputs a representation of the IR as a control flow graph. + * + * This file contains the actual implementation of `PrintIR.ql`. For test cases and very small + * databases, `PrintIR.ql` can be run directly to dump the IR for the entire database. For most + * uses, however, it is better to write a query that imports `PrintIR.qll`, extends + * `PrintIRConfiguration`, and overrides `shouldPrintFunction()` to select a subset of functions to + * dump. + */ + private import internal.IRInternal private import IR private import internal.PrintIRImports as Imports @@ -9,6 +19,7 @@ private newtype TPrintIRConfiguration = MkPrintIRConfiguration() * The query can extend this class to control which functions are printed. */ class PrintIRConfiguration extends TPrintIRConfiguration { + /** Gets a textual representation of this configuration. */ string toString() { result = "PrintIRConfiguration" } /** @@ -47,7 +58,7 @@ private newtype TPrintableIRNode = /** * A node to be emitted in the IR graph. */ -abstract class PrintableIRNode extends TPrintableIRNode { +abstract private class PrintableIRNode extends TPrintableIRNode { abstract string toString(); /** @@ -98,7 +109,7 @@ abstract class PrintableIRNode extends TPrintableIRNode { /** * An IR graph node representing a `IRFunction` object. */ -class PrintableIRFunction extends PrintableIRNode, TPrintableIRFunction { +private class PrintableIRFunction extends PrintableIRNode, TPrintableIRFunction { IRFunction irFunc; PrintableIRFunction() { this = TPrintableIRFunction(irFunc) } @@ -129,7 +140,7 @@ class PrintableIRFunction extends PrintableIRNode, TPrintableIRFunction { /** * An IR graph node representing an `IRBlock` object. */ -class PrintableIRBlock extends PrintableIRNode, TPrintableIRBlock { +private class PrintableIRBlock extends PrintableIRNode, TPrintableIRBlock { IRBlock block; PrintableIRBlock() { this = TPrintableIRBlock(block) } @@ -161,7 +172,7 @@ class PrintableIRBlock extends PrintableIRNode, TPrintableIRBlock { /** * An IR graph node representing an `Instruction`. */ -class PrintableInstruction extends PrintableIRNode, TPrintableInstruction { +private class PrintableInstruction extends PrintableIRNode, TPrintableInstruction { Instruction instr; PrintableInstruction() { this = TPrintableInstruction(instr) } @@ -224,6 +235,9 @@ private string getPaddingString(int n) { n > 0 and n <= maxColumnWidth() and result = getPaddingString(n - 1) + " " } +/** + * Holds if `node` belongs to the output graph, and its property `key` has the given `value`. + */ query predicate nodes(PrintableIRNode node, string key, string value) { value = node.getProperty(key) } @@ -237,6 +251,10 @@ private int getSuccessorIndex(IRBlock pred, IRBlock succ) { ) } +/** + * Holds if the output graph contains an edge from `pred` to `succ`, and that edge's property `key` + * has the given `value`. + */ query predicate edges(PrintableIRBlock pred, PrintableIRBlock succ, string key, string value) { exists(EdgeKind kind, IRBlock predBlock, IRBlock succBlock | predBlock = pred.getBlock() and @@ -256,6 +274,9 @@ query predicate edges(PrintableIRBlock pred, PrintableIRBlock succ, string key, ) } +/** + * Holds if `parent` is the parent node of `child` in the output graph. + */ query predicate parents(PrintableIRNode child, PrintableIRNode parent) { parent = child.getParent() } diff --git a/cpp/ql/src/semmle/code/cpp/ir/implementation/raw/internal/TranslatedElement.qll b/cpp/ql/src/semmle/code/cpp/ir/implementation/raw/internal/TranslatedElement.qll index 8bf5fa9d44b..f3c8816c19d 100644 --- a/cpp/ql/src/semmle/code/cpp/ir/implementation/raw/internal/TranslatedElement.qll +++ b/cpp/ql/src/semmle/code/cpp/ir/implementation/raw/internal/TranslatedElement.qll @@ -24,6 +24,16 @@ private Element getRealParent(Expr expr) { result.(Destructor).getADestruction() = expr } +IRUserVariable getIRUserVariable(Function func, Variable var) { + result.getVariable() = var and + result.getEnclosingFunction() = func +} + +IRTempVariable getIRTempVariable(Locatable ast, TempVariableTag tag) { + result.getAST() = ast and + result.getTag() = tag +} + /** * Holds if `expr` is a constant of a type that can be replaced directly with * its value in the IR. This does not include address constants as we have no diff --git a/cpp/ql/src/semmle/code/cpp/ir/implementation/raw/internal/TranslatedExpr.qll b/cpp/ql/src/semmle/code/cpp/ir/implementation/raw/internal/TranslatedExpr.qll index 75e70d1986f..98bcd1da8b2 100644 --- a/cpp/ql/src/semmle/code/cpp/ir/implementation/raw/internal/TranslatedExpr.qll +++ b/cpp/ql/src/semmle/code/cpp/ir/implementation/raw/internal/TranslatedExpr.qll @@ -1011,7 +1011,7 @@ class TranslatedDynamicCast extends TranslatedSingleInstructionConversion { if resultType instanceof PointerType then if resultType.(PointerType).getBaseType() instanceof VoidType - then result instanceof Opcode::DynamicCastToVoid + then result instanceof Opcode::CompleteObjectAddress else result instanceof Opcode::CheckedConvertOrNull else result instanceof Opcode::CheckedConvertOrThrow ) diff --git a/cpp/ql/src/semmle/code/cpp/ir/implementation/unaliased_ssa/IR.qll b/cpp/ql/src/semmle/code/cpp/ir/implementation/unaliased_ssa/IR.qll index badd48552a5..3fa0f1b78be 100644 --- a/cpp/ql/src/semmle/code/cpp/ir/implementation/unaliased_ssa/IR.qll +++ b/cpp/ql/src/semmle/code/cpp/ir/implementation/unaliased_ssa/IR.qll @@ -1,3 +1,47 @@ +/** + * Provides classes that describe the Intermediate Representation (IR) of the program. + * + * The IR is a representation of the semantics of the program, with very little dependence on the + * syntax that was used to write the program. For example, in C++, the statements `i += 1;`, `i++`, + * and `++i` all have the same semantic effect, but appear in the AST as three different types of + * `Expr` node. In the IR, all three statements are broken down into a sequence of fundamental + * operations similar to: + * + * ``` + * r1(int*) = VariableAddress[i] // Compute the address of variable `i` + * r2(int) = Load &:r1, m0 // Load the value of `i` + * r3(int) = Constant[1] // An integer constant with the value `1` + * r4(int) = Add r2, r3 // Add `1` to the value of `i` + * r5(int) = Store &r1, r4 // Store the new value back into the variable `i` + * ``` + * + * This allows IR-based analysis to focus on the fundamental operations, rather than having to be + * concerned with the various ways of expressing those operations in source code. + * + * The key classes in the IR are: + * + * - `IRFunction` - Contains the IR for an entire function definition, including all of that + * function's `Instruction`s, `IRBlock`s, and `IRVariables`. + * - `Instruction` - A single operation in the IR. An instruction specifies the operation to be + * performed, the operands that produce the inputs to that operation, and the type of the result + * of the operation. Control flows from an `Instruction` to one of a set of successor + * `Instruction`s. + * - `Operand` - An input value of an `Instruction`. All inputs of an `Instruction` are explicitly + * represented as `Operand`s, even if the input was implicit in the source code. An `Operand` has + * a link to the `Instruction` that consumes its value (its "use") and a link to the `Instruction` + * that produces its value (its "definition"). + * - `IRVariable` - A variable accessed by the IR for a particular function. An `IRVariable` is + * created for each variable directly accessed by the function. In addition, `IRVariable`s are + * created to represent certain temporary storage locations that do not have explicitly declared + * variables in the source code, such as the return value of the function. + * - `IRBlock` - A "basic block" in the control flow graph of a function. An `IRBlock` contains a + * sequence of instructions such that control flow can only enter the block at the first + * instruction, and can only leave the block from the last instruction. + * - `IRType` - The type of a value accessed in the IR. Unlike the `Type` class in the AST, `IRType` + * is language-neutral. For example, in C++, `unsigned int`, `char32_t`, and `wchar_t` might all + * be represented as the `IRType` `uint4`, a four-byte unsigned integer. + */ + import IRFunction import Instruction import IRBlock @@ -11,11 +55,12 @@ import Imports::MemoryAccessKind private newtype TIRPropertyProvider = MkIRPropertyProvider() /** - * Class that provides additional properties to be dumped for IR instructions and blocks when using + * A class that provides additional properties to be dumped for IR instructions and blocks when using * the PrintIR module. Libraries that compute additional facts about IR elements can extend the * single instance of this class to specify the additional properties computed by the library. */ class IRPropertyProvider extends TIRPropertyProvider { + /** Gets a textual representation of this element. */ string toString() { result = "IRPropertyProvider" } /** diff --git a/cpp/ql/src/semmle/code/cpp/ir/implementation/unaliased_ssa/IRBlock.qll b/cpp/ql/src/semmle/code/cpp/ir/implementation/unaliased_ssa/IRBlock.qll index 94ef73b2769..f0ec0683bd6 100644 --- a/cpp/ql/src/semmle/code/cpp/ir/implementation/unaliased_ssa/IRBlock.qll +++ b/cpp/ql/src/semmle/code/cpp/ir/implementation/unaliased_ssa/IRBlock.qll @@ -1,3 +1,7 @@ +/** + * Provides classes describing basic blocks in the IR of a function. + */ + private import internal.IRInternal import Instruction private import internal.IRBlockImports as Imports @@ -16,15 +20,23 @@ private import Cached * Most consumers should use the class `IRBlock`. */ class IRBlockBase extends TIRBlock { + /** Gets a textual representation of this block. */ final string toString() { result = getFirstInstruction(this).toString() } + /** Gets the source location of the first non-`Phi` instruction in this block. */ final Language::Location getLocation() { result = getFirstInstruction().getLocation() } + /** + * Gets a string that uniquely identifies this block within its enclosing function. + * + * This predicate is used by debugging and printing code only. + */ final string getUniqueId() { result = getFirstInstruction(this).getUniqueId() } /** - * Gets the zero-based index of the block within its function. This is used - * by debugging and printing code only. + * Gets the zero-based index of the block within its function. + * + * This predicate is used by debugging and printing code only. */ int getDisplayIndex() { exists(IRConfiguration::IRConfiguration config | @@ -42,27 +54,51 @@ class IRBlockBase extends TIRBlock { ) } + /** + * Gets the `index`th non-`Phi` instruction in this block. + */ final Instruction getInstruction(int index) { result = getInstruction(this, index) } + /** + * Get the `Phi` instructions that appear at the start of this block. + */ final PhiInstruction getAPhiInstruction() { Construction::getPhiInstructionBlockStart(result) = getFirstInstruction() } + /** + * Get the instructions in this block, including `Phi` instructions. + */ final Instruction getAnInstruction() { result = getInstruction(_) or result = getAPhiInstruction() } + /** + * Gets the first non-`Phi` instruction in this block. + */ final Instruction getFirstInstruction() { result = getFirstInstruction(this) } + /** + * Gets the last instruction in this block. + */ final Instruction getLastInstruction() { result = getInstruction(getInstructionCount() - 1) } + /** + * Gets the number of non-`Phi` instructions in this block. + */ final int getInstructionCount() { result = getInstructionCount(this) } + /** + * Gets the `IRFunction` that contains this block. + */ final IRFunction getEnclosingIRFunction() { result = getFirstInstruction(this).getEnclosingIRFunction() } + /** + * Gets the `Function` that contains this block. + */ final Language::Function getEnclosingFunction() { result = getFirstInstruction(this).getEnclosingFunction() } @@ -74,20 +110,57 @@ class IRBlockBase extends TIRBlock { * instruction of another block. */ class IRBlock extends IRBlockBase { + /** + * Gets the blocks to which control flows directly from this block. + */ final IRBlock getASuccessor() { blockSuccessor(this, result) } + /** + * Gets the blocks from which control flows directly to this block. + */ final IRBlock getAPredecessor() { blockSuccessor(result, this) } + /** + * Gets the block to which control flows directly from this block along an edge of kind `kind`. + */ final IRBlock getSuccessor(EdgeKind kind) { blockSuccessor(this, result, kind) } + /** + * Gets the block to which control flows directly from this block along a back edge of kind + * `kind`. + */ final IRBlock getBackEdgeSuccessor(EdgeKind kind) { backEdgeSuccessor(this, result, kind) } + /** + * Holds if this block immediately dominates `block`. + * + * Block `A` immediate dominates block `B` if block `A` strictly dominates block `B` and block `B` + * is a direct successor of block `A`. + */ final predicate immediatelyDominates(IRBlock block) { blockImmediatelyDominates(this, block) } + /** + * Holds if this block strictly dominates `block`. + * + * Block `A` strictly dominates block `B` if block `A` dominates block `B` and blocks `A` and `B` + * are not the same block. + */ final predicate strictlyDominates(IRBlock block) { blockImmediatelyDominates+(this, block) } + /** + * Holds if this block dominates `block`. + * + * Block `A` dominates block `B` if any control flow path from the entry block of the function to + * block `B` must pass through block `A`. A block always dominates itself. + */ final predicate dominates(IRBlock block) { strictlyDominates(block) or this = block } + /** + * Gets the set of blocks on the dominance frontier of this block. + * + * The dominance frontier of block `A` is the set of blocks `B` such that block `A` does not + * dominate block `B`, but block `A` does dominate an immediate predecessor of block `B`. + */ pragma[noinline] final IRBlock dominanceFrontier() { dominates(result.getAPredecessor()) and @@ -95,7 +168,7 @@ class IRBlock extends IRBlockBase { } /** - * Holds if this block is reachable from the entry point of its function + * Holds if this block is reachable from the entry block of its function. */ final predicate isReachableFromFunctionEntry() { this = getEnclosingIRFunction().getEntryBlock() or @@ -210,4 +283,4 @@ private module Cached { idominance(isEntryBlock/1, blockSuccessor/2)(_, dominator, block) } -Instruction getFirstInstruction(TIRBlock block) { block = MkIRBlock(result) } +private Instruction getFirstInstruction(TIRBlock block) { block = MkIRBlock(result) } diff --git a/cpp/ql/src/semmle/code/cpp/ir/implementation/unaliased_ssa/IRFunction.qll b/cpp/ql/src/semmle/code/cpp/ir/implementation/unaliased_ssa/IRFunction.qll index 6b2d32af48c..5968e58f90b 100644 --- a/cpp/ql/src/semmle/code/cpp/ir/implementation/unaliased_ssa/IRFunction.qll +++ b/cpp/ql/src/semmle/code/cpp/ir/implementation/unaliased_ssa/IRFunction.qll @@ -1,3 +1,8 @@ +/** + * Provides the class `IRFunction`, which represents the Intermediate Representation for the + * definition of a function. + */ + private import internal.IRInternal private import internal.IRFunctionImports as Imports import Imports::IRFunctionBase diff --git a/cpp/ql/src/semmle/code/cpp/ir/implementation/unaliased_ssa/IRVariable.qll b/cpp/ql/src/semmle/code/cpp/ir/implementation/unaliased_ssa/IRVariable.qll index a01bd2dc79a..d317421c242 100644 --- a/cpp/ql/src/semmle/code/cpp/ir/implementation/unaliased_ssa/IRVariable.qll +++ b/cpp/ql/src/semmle/code/cpp/ir/implementation/unaliased_ssa/IRVariable.qll @@ -1,3 +1,7 @@ +/** + * Provides classes that represent variables accessed by the IR. + */ + private import internal.IRInternal import IRFunction private import internal.IRVariableImports as Imports @@ -7,15 +11,11 @@ private import Imports::TTempVariableTag private import Imports::TIRVariable private import Imports::IRType -IRUserVariable getIRUserVariable(Language::Function func, Language::Variable var) { - result.getVariable() = var and - result.getEnclosingFunction() = func -} - /** - * A variable referenced by the IR for a function. The variable may be a user-declared variable - * (`IRUserVariable`) or a temporary variable generated by the AST-to-IR translation - * (`IRTempVariable`). + * A variable referenced by the IR for a function. + * + * The variable may be a user-declared variable (`IRUserVariable`) or a temporary variable generated + * by the AST-to-IR translation (`IRTempVariable`). */ class IRVariable extends TIRVariable { Language::Function func; @@ -27,6 +27,7 @@ class IRVariable extends TIRVariable { this = TIRDynamicInitializationFlag(func, _, _) } + /** Gets a textual representation of this element. */ string toString() { none() } /** @@ -162,20 +163,26 @@ class IRGeneratedVariable extends IRVariable { override string getUniqueId() { none() } + /** + * Gets a string containing the source code location of the AST that generated this variable. + * + * This is used by debugging and printing code only. + */ final string getLocationString() { result = ast.getLocation().getStartLine().toString() + ":" + ast.getLocation().getStartColumn().toString() } + /** + * Gets the string that is combined with the location of the variable to generate the string + * representation of this variable. + * + * This is used by debugging and printing code only. + */ string getBaseString() { none() } } -IRTempVariable getIRTempVariable(Language::AST ast, TempVariableTag tag) { - result.getAST() = ast and - result.getTag() = tag -} - /** * A temporary variable introduced by IR construction. The most common examples are the variable * generated to hold the return value of a function, or the variable generated to hold the result of @@ -190,6 +197,10 @@ class IRTempVariable extends IRGeneratedVariable, IRAutomaticVariable, TIRTempVa result = "Temp: " + Construction::getTempVariableUniqueId(this) } + /** + * Gets the "tag" object that differentiates this temporary variable from other temporary + * variables generated for the same AST. + */ final TempVariableTag getTag() { result = tag } override string getBaseString() { result = "#temp" } @@ -253,6 +264,9 @@ class IRStringLiteral extends IRGeneratedVariable, TIRStringLiteral { final override string getBaseString() { result = "#string" } + /** + * Gets the AST of the string literal represented by this `IRStringLiteral`. + */ final Language::StringLiteral getLiteral() { result = literal } } @@ -270,6 +284,9 @@ class IRDynamicInitializationFlag extends IRGeneratedVariable, TIRDynamicInitial final override string toString() { result = var.toString() + "#init" } + /** + * Gets variable whose initialization is guarded by this flag. + */ final Language::Variable getVariable() { result = var } final override string getUniqueId() { diff --git a/cpp/ql/src/semmle/code/cpp/ir/implementation/unaliased_ssa/Instruction.qll b/cpp/ql/src/semmle/code/cpp/ir/implementation/unaliased_ssa/Instruction.qll index 79516f6780d..0d2ad2d3bea 100644 --- a/cpp/ql/src/semmle/code/cpp/ir/implementation/unaliased_ssa/Instruction.qll +++ b/cpp/ql/src/semmle/code/cpp/ir/implementation/unaliased_ssa/Instruction.qll @@ -215,6 +215,15 @@ class Instruction extends Construction::TStageInstruction { result = Raw::getInstructionUnconvertedResultExpression(this) } + /** + * Gets the language-specific type of the result produced by this instruction. + * + * Most consumers of the IR should use `getResultIRType()` instead. `getResultIRType()` uses a + * less complex, language-neutral type system in which all semantically equivalent types share the + * same `IRType` instance. For example, in C++, four different `Instruction`s might have three + * different values for `getResultLanguageType()`: `unsigned int`, `char32_t`, and `wchar_t`, + * whereas all four instructions would have the same value for `getResultIRType()`, `uint4`. + */ final Language::LanguageType getResultLanguageType() { result = Construction::getInstructionResultType(this) } @@ -537,6 +546,18 @@ class VariableAddressInstruction extends VariableInstruction { VariableAddressInstruction() { getOpcode() instanceof Opcode::VariableAddress } } +/** + * An instruction that returns the address of a function. + * + * This instruction returns the address of a function, including non-member functions, static member + * functions, and non-static member functions. + * + * The result has an `IRFunctionAddress` type. + */ +class FunctionAddressInstruction extends FunctionInstruction { + FunctionAddressInstruction() { getOpcode() instanceof Opcode::FunctionAddress } +} + /** * An instruction that initializes a parameter of the enclosing function with the value of the * corresponding argument passed by the caller. @@ -553,6 +574,16 @@ class InitializeParameterInstruction extends VariableInstruction { final Language::Parameter getParameter() { result = var.(IRUserVariable).getVariable() } } +/** + * An instruction that initializes all memory that existed before this function was called. + * + * This instruction provides a definition for memory that, because it was actually allocated and + * initialized elsewhere, would not otherwise have a definition in this function. + */ +class InitializeNonLocalInstruction extends Instruction { + InitializeNonLocalInstruction() { getOpcode() instanceof Opcode::InitializeNonLocal } +} + /** * An instruction that initializes the memory pointed to by a parameter of the enclosing function * with the value of that memory on entry to the function. @@ -590,6 +621,25 @@ class FieldAddressInstruction extends FieldInstruction { final Instruction getObjectAddress() { result = getObjectAddressOperand().getDef() } } +/** + * An instruction that computes the address of the first element of a managed array. + * + * This instruction is used for element access to C# arrays. + */ +class ElementsAddressInstruction extends UnaryInstruction { + ElementsAddressInstruction() { getOpcode() instanceof Opcode::ElementsAddress } + + /** + * Gets the operand that provides the address of the array object. + */ + final UnaryOperand getArrayObjectAddressOperand() { result = getAnOperand() } + + /** + * Gets the instruction whose result provides the address of the array object. + */ + final Instruction getArrayObjectAddress() { result = getArrayObjectAddressOperand().getDef() } +} + /** * An instruction that produces a well-defined but unknown result and has * unknown side effects, including side effects that are not conservatively @@ -1137,8 +1187,14 @@ class PointerDiffInstruction extends PointerArithmeticInstruction { class UnaryInstruction extends Instruction { UnaryInstruction() { getOpcode() instanceof UnaryOpcode } + /** + * Gets the sole operand of this instruction. + */ final UnaryOperand getUnaryOperand() { result = getAnOperand() } + /** + * Gets the instruction whose result provides the sole operand of this instruction. + */ final Instruction getUnary() { result = getUnaryOperand().getDef() } } @@ -1177,6 +1233,19 @@ class CheckedConvertOrThrowInstruction extends UnaryInstruction { CheckedConvertOrThrowInstruction() { getOpcode() instanceof Opcode::CheckedConvertOrThrow } } +/** + * An instruction that returns the address of the complete object that contains the subobject + * pointed to by its operand. + * + * If the operand holds a null address, the result is a null address. + * + * This instruction is used to represent `dyanmic_cast` in C++, which returns the pointer to + * the most-derived object. + */ +class CompleteObjectAddressInstruction extends UnaryInstruction { + CompleteObjectAddressInstruction() { getOpcode() instanceof Opcode::CompleteObjectAddress } +} + /** * An instruction that converts the address of an object to the address of a different subobject of * the same object, without any type checking at runtime. @@ -1453,7 +1522,7 @@ class CallInstruction extends Instruction { * Gets the `Function` that the call targets, if this is statically known. */ final Language::Function getStaticCallTarget() { - result = getCallTarget().(FunctionInstruction).getFunctionSymbol() + result = getCallTarget().(FunctionAddressInstruction).getFunctionSymbol() } /** @@ -1516,9 +1585,10 @@ class CallSideEffectInstruction extends SideEffectInstruction { /** * An instruction representing the side effect of a function call on any memory - * that might be read by that call. This instruction is emitted instead of - * `CallSideEffectInstruction` when it's certain that the call target cannot - * write to escaped memory. + * that might be read by that call. + * + * This instruction is emitted instead of `CallSideEffectInstruction` when it is certain that the + * call target cannot write to escaped memory. */ class CallReadSideEffectInstruction extends SideEffectInstruction { CallReadSideEffectInstruction() { getOpcode() instanceof Opcode::CallReadSideEffect } @@ -1566,7 +1636,15 @@ class SizedBufferReadSideEffectInstruction extends ReadSideEffectInstruction { getOpcode() instanceof Opcode::SizedBufferReadSideEffect } - Instruction getSizeDef() { result = getAnOperand().(BufferSizeOperand).getDef() } + /** + * Gets the operand that holds the number of bytes read from the buffer. + */ + final BufferSizeOperand getBufferSizeOperand() { result = getAnOperand() } + + /** + * Gets the instruction whose result provides the number of bytes read from the buffer. + */ + final Instruction getBufferSize() { result = getBufferSizeOperand().getDef() } } /** @@ -1576,7 +1654,15 @@ class SizedBufferReadSideEffectInstruction extends ReadSideEffectInstruction { class WriteSideEffectInstruction extends SideEffectInstruction, IndexedInstruction { WriteSideEffectInstruction() { getOpcode() instanceof WriteSideEffectOpcode } - Instruction getArgumentDef() { result = getAnOperand().(AddressOperand).getDef() } + /** + * Get the operand that holds the address of the memory to be written. + */ + final AddressOperand getDestinationAddressOperand() { result = getAnOperand() } + + /** + * Gets the instruction whose result provides the address of the memory to be written. + */ + Instruction getDestinationAddress() { result = getDestinationAddressOperand().getDef() } } /** @@ -1607,11 +1693,20 @@ class SizedBufferMustWriteSideEffectInstruction extends WriteSideEffectInstructi getOpcode() instanceof Opcode::SizedBufferMustWriteSideEffect } - Instruction getSizeDef() { result = getAnOperand().(BufferSizeOperand).getDef() } + /** + * Gets the operand that holds the number of bytes written to the buffer. + */ + final BufferSizeOperand getBufferSizeOperand() { result = getAnOperand() } + + /** + * Gets the instruction whose result provides the number of bytes written to the buffer. + */ + final Instruction getBufferSize() { result = getBufferSizeOperand().getDef() } } /** * An instruction representing the potential write of an indirect parameter within a function call. + * * Unlike `IndirectWriteSideEffectInstruction`, the location might not be completely overwritten. * written. */ @@ -1623,6 +1718,7 @@ class IndirectMayWriteSideEffectInstruction extends WriteSideEffectInstruction { /** * An instruction representing the write of an indirect buffer parameter within a function call. + * * Unlike `BufferWriteSideEffectInstruction`, the buffer might not be completely overwritten. */ class BufferMayWriteSideEffectInstruction extends WriteSideEffectInstruction { @@ -1631,6 +1727,7 @@ class BufferMayWriteSideEffectInstruction extends WriteSideEffectInstruction { /** * An instruction representing the write of an indirect buffer parameter within a function call. + * * Unlike `BufferWriteSideEffectInstruction`, the buffer might not be completely overwritten. */ class SizedBufferMayWriteSideEffectInstruction extends WriteSideEffectInstruction { @@ -1638,11 +1735,19 @@ class SizedBufferMayWriteSideEffectInstruction extends WriteSideEffectInstructio getOpcode() instanceof Opcode::SizedBufferMayWriteSideEffect } - Instruction getSizeDef() { result = getAnOperand().(BufferSizeOperand).getDef() } + /** + * Gets the operand that holds the number of bytes written to the buffer. + */ + final BufferSizeOperand getBufferSizeOperand() { result = getAnOperand() } + + /** + * Gets the instruction whose result provides the number of bytes written to the buffer. + */ + final Instruction getBufferSize() { result = getBufferSizeOperand().getDef() } } /** - * An instruction representing the initial value of newly allocated memory, e.g. the result of a + * An instruction representing the initial value of newly allocated memory, such as the result of a * call to `malloc`. */ class InitializeDynamicAllocationInstruction extends SideEffectInstruction { @@ -1860,17 +1965,20 @@ class ChiInstruction extends Instruction { } /** - * An instruction representing unreachable code. Inserted in place of the original target - * instruction of a `ConditionalBranch` or `Switch` instruction where that particular edge is - * infeasible. + * An instruction representing unreachable code. + * + * This instruction is inserted in place of the original target instruction of a `ConditionalBranch` + * or `Switch` instruction where that particular edge is infeasible. */ class UnreachedInstruction extends Instruction { UnreachedInstruction() { getOpcode() instanceof Opcode::Unreached } } /** - * An instruction representing a built-in operation. This is used to represent - * operations such as access to variable argument lists. + * An instruction representing a built-in operation. + * + * This is used to represent a variety of intrinsic operations provided by the compiler + * implementation, such as vector arithmetic. */ class BuiltInOperationInstruction extends Instruction { Language::BuiltInOperation operation; @@ -1880,6 +1988,10 @@ class BuiltInOperationInstruction extends Instruction { operation = Raw::getInstructionBuiltInOperation(this) } + /** + * Gets the language-specific `BuildInOperation` object that specifies the operation that is + * performed by this instruction. + */ final Language::BuiltInOperation getBuiltInOperation() { result = operation } } @@ -1892,3 +2004,59 @@ class BuiltInInstruction extends BuiltInOperationInstruction { final override string getImmediateString() { result = getBuiltInOperation().toString() } } + +/** + * An instruction that returns a `va_list` to access the arguments passed to the `...` parameter. + * + * The operand specifies the address of the `IREllipsisVariable` used to represent the `...` + * parameter. The result is a `va_list` that initially refers to the first argument that was passed + * to the `...` parameter. + */ +class VarArgsStartInstruction extends UnaryInstruction { + VarArgsStartInstruction() { getOpcode() instanceof Opcode::VarArgsStart } +} + +/** + * An instruction that cleans up a `va_list` after it is no longer in use. + * + * The operand specifies the address of the `va_list` to clean up. This instruction does not return + * a result. + */ +class VarArgsEndInstruction extends UnaryInstruction { + VarArgsEndInstruction() { getOpcode() instanceof Opcode::VarArgsEnd } +} + +/** + * An instruction that returns the address of the argument currently pointed to by a `va_list`. + * + * The operand is the `va_list` that points to the argument. The result is the address of the + * argument. + */ +class VarArgInstruction extends UnaryInstruction { + VarArgInstruction() { getOpcode() instanceof Opcode::VarArg } +} + +/** + * An instruction that modifies a `va_list` to point to the next argument that was passed to the + * `...` parameter. + * + * The operand is the current `va_list`. The result is an updated `va_list` that points to the next + * argument of the `...` parameter. + */ +class NextVarArgInstruction extends UnaryInstruction { + NextVarArgInstruction() { getOpcode() instanceof Opcode::NextVarArg } +} + +/** + * An instruction that allocates a new object on the managed heap. + * + * This instruction is used to represent the allocation of a new object in C# using the `new` + * expression. This instruction does not invoke a constructor for the object. Instead, there will be + * a subsequent `Call` instruction to invoke the appropriate constructor directory, passing the + * result of the `NewObj` as the `this` argument. + * + * The result is the address of the newly allocated object. + */ +class NewObjInstruction extends Instruction { + NewObjInstruction() { getOpcode() instanceof Opcode::NewObj } +} diff --git a/cpp/ql/src/semmle/code/cpp/ir/implementation/unaliased_ssa/Operand.qll b/cpp/ql/src/semmle/code/cpp/ir/implementation/unaliased_ssa/Operand.qll index f82704094c8..468687b0aca 100644 --- a/cpp/ql/src/semmle/code/cpp/ir/implementation/unaliased_ssa/Operand.qll +++ b/cpp/ql/src/semmle/code/cpp/ir/implementation/unaliased_ssa/Operand.qll @@ -1,3 +1,7 @@ +/** + * Provides classes that represent the input values of IR instructions. + */ + private import internal.IRInternal private import Instruction private import IRBlock @@ -78,10 +82,17 @@ private PhiOperandBase phiOperand( * A source operand of an `Instruction`. The operand represents a value consumed by the instruction. */ class Operand extends TOperand { + /** Gets a textual representation of this element. */ string toString() { result = "Operand" } + /** + * Gets the location of the source code for this operand. + */ final Language::Location getLocation() { result = getUse().getLocation() } + /** + * Gets the function that contains this operand. + */ final IRFunction getEnclosingIRFunction() { result = getUse().getEnclosingIRFunction() } /** @@ -270,6 +281,9 @@ class NonPhiOperand extends Operand { final override int getDumpSortOrder() { result = tag.getSortOrder() } + /** + * Gets the `OperandTag` that specifies how this operand is used by its `Instruction`. + */ final OperandTag getOperandTag() { result = tag } } @@ -292,6 +306,9 @@ class RegisterOperand extends NonPhiOperand, RegisterOperandBase { } } +/** + * A memory operand other than the operand of a `Phi` instruction. + */ class NonPhiMemoryOperand extends NonPhiOperand, MemoryOperand, NonPhiMemoryOperandBase { override MemoryOperandTag tag; @@ -313,6 +330,9 @@ class NonPhiMemoryOperand extends NonPhiOperand, MemoryOperand, NonPhiMemoryOper } } +/** + * A memory operand whose type may be different from the type of the result of its definition. + */ class TypedOperand extends NonPhiMemoryOperand { override TypedOperandTag tag; @@ -416,6 +436,9 @@ class PositionalArgumentOperand extends ArgumentOperand { final int getIndex() { result = tag.getArgIndex() } } +/** + * An operand representing memory read as a side effect of evaluating another instruction. + */ class SideEffectOperand extends TypedOperand { override SideEffectOperandTag tag; } diff --git a/cpp/ql/src/semmle/code/cpp/ir/implementation/unaliased_ssa/PrintIR.qll b/cpp/ql/src/semmle/code/cpp/ir/implementation/unaliased_ssa/PrintIR.qll index d9c0df44e12..b3e3a5b1195 100644 --- a/cpp/ql/src/semmle/code/cpp/ir/implementation/unaliased_ssa/PrintIR.qll +++ b/cpp/ql/src/semmle/code/cpp/ir/implementation/unaliased_ssa/PrintIR.qll @@ -1,3 +1,13 @@ +/** + * Outputs a representation of the IR as a control flow graph. + * + * This file contains the actual implementation of `PrintIR.ql`. For test cases and very small + * databases, `PrintIR.ql` can be run directly to dump the IR for the entire database. For most + * uses, however, it is better to write a query that imports `PrintIR.qll`, extends + * `PrintIRConfiguration`, and overrides `shouldPrintFunction()` to select a subset of functions to + * dump. + */ + private import internal.IRInternal private import IR private import internal.PrintIRImports as Imports @@ -9,6 +19,7 @@ private newtype TPrintIRConfiguration = MkPrintIRConfiguration() * The query can extend this class to control which functions are printed. */ class PrintIRConfiguration extends TPrintIRConfiguration { + /** Gets a textual representation of this configuration. */ string toString() { result = "PrintIRConfiguration" } /** @@ -47,7 +58,7 @@ private newtype TPrintableIRNode = /** * A node to be emitted in the IR graph. */ -abstract class PrintableIRNode extends TPrintableIRNode { +abstract private class PrintableIRNode extends TPrintableIRNode { abstract string toString(); /** @@ -98,7 +109,7 @@ abstract class PrintableIRNode extends TPrintableIRNode { /** * An IR graph node representing a `IRFunction` object. */ -class PrintableIRFunction extends PrintableIRNode, TPrintableIRFunction { +private class PrintableIRFunction extends PrintableIRNode, TPrintableIRFunction { IRFunction irFunc; PrintableIRFunction() { this = TPrintableIRFunction(irFunc) } @@ -129,7 +140,7 @@ class PrintableIRFunction extends PrintableIRNode, TPrintableIRFunction { /** * An IR graph node representing an `IRBlock` object. */ -class PrintableIRBlock extends PrintableIRNode, TPrintableIRBlock { +private class PrintableIRBlock extends PrintableIRNode, TPrintableIRBlock { IRBlock block; PrintableIRBlock() { this = TPrintableIRBlock(block) } @@ -161,7 +172,7 @@ class PrintableIRBlock extends PrintableIRNode, TPrintableIRBlock { /** * An IR graph node representing an `Instruction`. */ -class PrintableInstruction extends PrintableIRNode, TPrintableInstruction { +private class PrintableInstruction extends PrintableIRNode, TPrintableInstruction { Instruction instr; PrintableInstruction() { this = TPrintableInstruction(instr) } @@ -224,6 +235,9 @@ private string getPaddingString(int n) { n > 0 and n <= maxColumnWidth() and result = getPaddingString(n - 1) + " " } +/** + * Holds if `node` belongs to the output graph, and its property `key` has the given `value`. + */ query predicate nodes(PrintableIRNode node, string key, string value) { value = node.getProperty(key) } @@ -237,6 +251,10 @@ private int getSuccessorIndex(IRBlock pred, IRBlock succ) { ) } +/** + * Holds if the output graph contains an edge from `pred` to `succ`, and that edge's property `key` + * has the given `value`. + */ query predicate edges(PrintableIRBlock pred, PrintableIRBlock succ, string key, string value) { exists(EdgeKind kind, IRBlock predBlock, IRBlock succBlock | predBlock = pred.getBlock() and @@ -256,6 +274,9 @@ query predicate edges(PrintableIRBlock pred, PrintableIRBlock succ, string key, ) } +/** + * Holds if `parent` is the parent node of `child` in the output graph. + */ query predicate parents(PrintableIRNode child, PrintableIRNode parent) { parent = child.getParent() } diff --git a/cpp/ql/test/library-tests/ir/ir/raw_ir.expected b/cpp/ql/test/library-tests/ir/ir/raw_ir.expected index 999df2c5202..a2be838ae48 100644 --- a/cpp/ql/test/library-tests/ir/ir/raw_ir.expected +++ b/cpp/ql/test/library-tests/ir/ir/raw_ir.expected @@ -4867,12 +4867,12 @@ ir.cpp: # 863| r863_1(glval) = VariableAddress[pv] : # 863| r863_2(glval) = VariableAddress[pb] : # 863| r863_3(PolymorphicBase *) = Load : &:r863_2, ~m? -# 863| r863_4(void *) = DynamicCastToVoid : r863_3 +# 863| r863_4(void *) = CompleteObjectAddress : r863_3 # 863| mu863_5(void *) = Store : &:r863_1, r863_4 # 864| r864_1(glval) = VariableAddress[pcv] : # 864| r864_2(glval) = VariableAddress[pd] : # 864| r864_3(PolymorphicDerived *) = Load : &:r864_2, ~m? -# 864| r864_4(void *) = DynamicCastToVoid : r864_3 +# 864| r864_4(void *) = CompleteObjectAddress : r864_3 # 864| mu864_5(void *) = Store : &:r864_1, r864_4 # 865| v865_1(void) = NoOp : # 849| v849_4(void) = ReturnVoid : diff --git a/csharp/ql/src/experimental/ir/implementation/EdgeKind.qll b/csharp/ql/src/experimental/ir/implementation/EdgeKind.qll index 54059fb5b82..32e36bb6787 100644 --- a/csharp/ql/src/experimental/ir/implementation/EdgeKind.qll +++ b/csharp/ql/src/experimental/ir/implementation/EdgeKind.qll @@ -1,3 +1,7 @@ +/** + * Provides classes that specify the conditions under which control flows along a given edge. + */ + private import internal.EdgeKindInternal private newtype TEdgeKind = @@ -77,9 +81,15 @@ class CaseEdge extends EdgeKind, TCaseEdge { else result = "Case[" + minValue + ".." + maxValue + "]" } - string getMinValue() { result = minValue } + /** + * Gets the smallest value of the switch expression for which control will flow along this edge. + */ + final string getMinValue() { result = minValue } - string getMaxValue() { result = maxValue } + /** + * Gets the largest value of the switch expression for which control will flow along this edge. + */ + final string getMaxValue() { result = maxValue } } /** diff --git a/csharp/ql/src/experimental/ir/implementation/IRConfiguration.qll b/csharp/ql/src/experimental/ir/implementation/IRConfiguration.qll index 71bc8ec2b0f..37ac2fccdd9 100644 --- a/csharp/ql/src/experimental/ir/implementation/IRConfiguration.qll +++ b/csharp/ql/src/experimental/ir/implementation/IRConfiguration.qll @@ -10,6 +10,7 @@ private newtype TIRConfiguration = MkIRConfiguration() * The query can extend this class to control which functions have IR generated for them. */ class IRConfiguration extends TIRConfiguration { + /** Gets a textual representation of this element. */ string toString() { result = "IRConfiguration" } /** @@ -17,6 +18,13 @@ class IRConfiguration extends TIRConfiguration { */ predicate shouldCreateIRForFunction(Language::Function func) { any() } + /** + * Holds if the strings used as part of an IR dump should be generated for function `func`. + * + * This predicate is overridden in `PrintIR.qll` to avoid the expense of generating a large number + * of debug strings for IR that will not be dumped. We still generate the actual IR for these + * functions, however, to preserve the results of any interprocedural analysis. + */ predicate shouldEvaluateDebugStringsForFunction(Language::Function func) { any() } } @@ -26,6 +34,7 @@ private newtype TIREscapeAnalysisConfiguration = MkIREscapeAnalysisConfiguration * The query can extend this class to control what escape analysis is used when generating SSA. */ class IREscapeAnalysisConfiguration extends TIREscapeAnalysisConfiguration { + /** Gets a textual representation of this element. */ string toString() { result = "IREscapeAnalysisConfiguration" } /** diff --git a/csharp/ql/src/experimental/ir/implementation/IRType.qll b/csharp/ql/src/experimental/ir/implementation/IRType.qll index dec78b413b3..41c9ac06d82 100644 --- a/csharp/ql/src/experimental/ir/implementation/IRType.qll +++ b/csharp/ql/src/experimental/ir/implementation/IRType.qll @@ -32,6 +32,7 @@ private newtype TIRType = * all pointer types map to the same instance of `IRAddressType`. */ class IRType extends TIRType { + /** Gets a textual representation of this type. */ string toString() { none() } /** diff --git a/csharp/ql/src/experimental/ir/implementation/MemoryAccessKind.qll b/csharp/ql/src/experimental/ir/implementation/MemoryAccessKind.qll index 6852a965401..5e11a310e2f 100644 --- a/csharp/ql/src/experimental/ir/implementation/MemoryAccessKind.qll +++ b/csharp/ql/src/experimental/ir/implementation/MemoryAccessKind.qll @@ -1,3 +1,9 @@ +/** + * Provides classes that describe how a particular `Instruction` or its operands access memory. + */ + +private import IRConfiguration + private newtype TMemoryAccessKind = TIndirectMemoryAccess() or TBufferMemoryAccess() or @@ -14,6 +20,7 @@ private newtype TMemoryAccessKind = * memory result. */ class MemoryAccessKind extends TMemoryAccessKind { + /** Gets a textual representation of this access kind. */ string toString() { none() } /** diff --git a/csharp/ql/src/experimental/ir/implementation/Opcode.qll b/csharp/ql/src/experimental/ir/implementation/Opcode.qll index c0b8adbe56b..c4134d240ab 100644 --- a/csharp/ql/src/experimental/ir/implementation/Opcode.qll +++ b/csharp/ql/src/experimental/ir/implementation/Opcode.qll @@ -1,3 +1,8 @@ +/** + * Provides `Opcode`s that specify the operation performed by an `Instruction`, as well as metadata + * about those opcodes, such as operand kinds and memory accesses. + */ + private import internal.OpcodeImports as Imports private import internal.OperandTag import Imports::MemoryAccessKind @@ -45,7 +50,7 @@ private newtype TOpcode = TConvertToDerived() or TCheckedConvertOrNull() or TCheckedConvertOrThrow() or - TDynamicCastToVoid() or + TCompleteObjectAddress() or TVariableAddress() or TFieldAddress() or TFunctionAddress() or @@ -86,7 +91,11 @@ private newtype TOpcode = TUnreached() or TNewObj() +/** + * An opcode that specifies the operation performed by an `Instruction`. + */ class Opcode extends TOpcode { + /** Gets a textual representation of this element. */ string toString() { result = "UnknownOpcode" } /** @@ -139,10 +148,20 @@ class Opcode extends TOpcode { predicate hasOperandInternal(OperandTag tag) { none() } } +/** + * The `Opcode` for a `UnaryInstruction`. + * + * See the `UnaryInstruction` documentation for more details. + */ abstract class UnaryOpcode extends Opcode { final override predicate hasOperandInternal(OperandTag tag) { tag instanceof UnaryOperandTag } } +/** + * The `Opcode` for a `BinaryInstruction`. + * + * See the `BinaryInstruction` documentation for more details. + */ abstract class BinaryOpcode extends Opcode { final override predicate hasOperandInternal(OperandTag tag) { tag instanceof LeftOperandTag or @@ -150,44 +169,127 @@ abstract class BinaryOpcode extends Opcode { } } +/** + * The `Opcode` for a `PointerArithmeticInstruction`. + * + * See the `PointerArithmeticInstruction` documentation for more details. + */ abstract class PointerArithmeticOpcode extends BinaryOpcode { } +/** + * The `Opcode` for a `PointerOffsetInstruction`. + * + * See the `PointerOffsetInstruction` documentation for more details. + */ abstract class PointerOffsetOpcode extends PointerArithmeticOpcode { } +/** + * The `Opcode` for an `ArithmeticInstruction`. + * + * See the `ArithmeticInstruction` documentation for more details. + */ abstract class ArithmeticOpcode extends Opcode { } +/** + * The `Opcode` for a `BinaryArithmeticInstruction`. + * + * See the `BinaryArithmeticInstruction` documentation for more details. + */ abstract class BinaryArithmeticOpcode extends BinaryOpcode, ArithmeticOpcode { } +/** + * The `Opcode` for a `UnaryArithmeticInstruction`. + * + * See the `UnaryArithmeticInstruction` documentation for more details. + */ abstract class UnaryArithmeticOpcode extends UnaryOpcode, ArithmeticOpcode { } +/** + * The `Opcode` for a `BitwiseInstruction`. + * + * See the `BitwiseInstruction` documentation for more details. + */ abstract class BitwiseOpcode extends Opcode { } +/** + * The `Opcode` for a `BinaryBitwiseInstruction`. + * + * See the `BinaryBitwiseInstruction` documentation for more details. + */ abstract class BinaryBitwiseOpcode extends BinaryOpcode, BitwiseOpcode { } +/** + * The `Opcode` for a `UnaryBitwiseInstruction`. + * + * See the `UnaryBitwiseInstruction` documentation for more details. + */ abstract class UnaryBitwiseOpcode extends UnaryOpcode, BitwiseOpcode { } +/** + * The `Opcode` for a `CompareInstruction`. + * + * See the `CompareInstruction` documentation for more details. + */ abstract class CompareOpcode extends BinaryOpcode { } +/** + * The `Opcode` for a `RelationalInstruction`. + * + * See the `RelationalInstruction` documentation for more details. + */ abstract class RelationalOpcode extends CompareOpcode { } +/** + * The `Opcode` for a `CopyInstruction`. + * + * See the `CopyInstruction` documentation for more details. + */ abstract class CopyOpcode extends Opcode { } +/** + * The `Opcode` for a `ConvertToBaseInstruction`. + * + * See the `ConvertToBaseInstruction` documentation for more details. + */ abstract class ConvertToBaseOpcode extends UnaryOpcode { } -abstract class MemoryAccessOpcode extends Opcode { } - +/** + * The `Opcode` for a `ReturnInstruction`. + * + * See the `ReturnInstruction` documentation for more details. + */ abstract class ReturnOpcode extends Opcode { } +/** + * The `Opcode` for a `ThrowInstruction`. + * + * See the `ThrowInstruction` documentation for more details. + */ abstract class ThrowOpcode extends Opcode { } +/** + * The `Opcode` for a `CatchInstruction`. + * + * See the `CatchInstruction` documentation for more details. + */ abstract class CatchOpcode extends Opcode { } -abstract class OpcodeWithCondition extends Opcode { +abstract private class OpcodeWithCondition extends Opcode { final override predicate hasOperandInternal(OperandTag tag) { tag instanceof ConditionOperandTag } } +/** + * The `Opcode` for a `BuiltInOperationInstruction`. + * + * See the `BuiltInOperationInstruction` documentation for more details. + */ abstract class BuiltInOperationOpcode extends Opcode { } +/** + * The `Opcode` for a `SideEffectInstruction`. + * + * See the `SideEffectInstruction` documentation for more details. + */ abstract class SideEffectOpcode extends Opcode { } /** @@ -323,7 +425,9 @@ abstract class OpcodeWithLoad extends IndirectReadOpcode { } /** - * An opcode that reads from a set of memory locations as a side effect. + * The `Opcode` for a `ReadSideEffectInstruction`. + * + * See the `ReadSideEffectInstruction` documentation for more details. */ abstract class ReadSideEffectOpcode extends SideEffectOpcode { final override predicate hasOperandInternal(OperandTag tag) { @@ -332,51 +436,111 @@ abstract class ReadSideEffectOpcode extends SideEffectOpcode { } /** - * An opcode that writes to a set of memory locations as a side effect. + * The `Opcode` for a `WriteSideEffectInstruction`. + * + * See the `WriteSideEffectInstruction` documentation for more details. */ abstract class WriteSideEffectOpcode extends SideEffectOpcode { } +/** + * Provides `Opcode`s that specify the operation performed by an `Instruction`. + */ module Opcode { + /** + * The `Opcode` for a `NoOpInstruction`. + * + * See the `NoOpInstruction` documentation for more details. + */ class NoOp extends Opcode, TNoOp { final override string toString() { result = "NoOp" } } + /** + * The `Opcode` for an `UninitializedInstruction`. + * + * See the `UninitializedInstruction` documentation for more details. + */ class Uninitialized extends IndirectWriteOpcode, TUninitialized { final override string toString() { result = "Uninitialized" } } + /** + * The `Opcode` for an `ErrorInstruction`. + * + * See the `ErrorInstruction` documentation for more details. + */ class Error extends Opcode, TError { final override string toString() { result = "Error" } } + /** + * The `Opcode` for an `InitializeParameterInstruction`. + * + * See the `InitializeParameterInstruction` documentation for more details. + */ class InitializeParameter extends IndirectWriteOpcode, TInitializeParameter { final override string toString() { result = "InitializeParameter" } } + /** + * The `Opcode` for an `InitializeIndirectionInstruction`. + * + * See the `InitializeIndirectionInstruction` documentation for more details. + */ class InitializeIndirection extends EntireAllocationWriteOpcode, TInitializeIndirection { final override string toString() { result = "InitializeIndirection" } } + /** + * The `Opcode` for an `InitializeThisInstruction`. + * + * See the `InitializeThisInstruction` documentation for more details. + */ class InitializeThis extends Opcode, TInitializeThis { final override string toString() { result = "InitializeThis" } } + /** + * The `Opcode` for an `EnterFunctionInstruction`. + * + * See the `EnterFunctionInstruction` documentation for more details. + */ class EnterFunction extends Opcode, TEnterFunction { final override string toString() { result = "EnterFunction" } } + /** + * The `Opcode` for an `ExitFunctionInstruction`. + * + * See the `ExitFunctionInstruction` documentation for more details. + */ class ExitFunction extends Opcode, TExitFunction { final override string toString() { result = "ExitFunction" } } + /** + * The `Opcode` for a `ReturnValueInstruction`. + * + * See the `ReturnValueInstruction` documentation for more details. + */ class ReturnValue extends ReturnOpcode, OpcodeWithLoad, TReturnValue { final override string toString() { result = "ReturnValue" } } + /** + * The `Opcode` for a `ReturnVoidInstruction`. + * + * See the `ReturnVoidInstruction` documentation for more details. + */ class ReturnVoid extends ReturnOpcode, TReturnVoid { final override string toString() { result = "ReturnVoid" } } + /** + * The `Opcode` for a `ReturnIndirectionInstruction`. + * + * See the `ReturnIndirectionInstruction` documentation for more details. + */ class ReturnIndirection extends EntireAllocationReadOpcode, TReturnIndirection { final override string toString() { result = "ReturnIndirection" } @@ -385,14 +549,29 @@ module Opcode { } } + /** + * The `Opcode` for a `CopyValueInstruction`. + * + * See the `CopyValueInstruction` documentation for more details. + */ class CopyValue extends UnaryOpcode, CopyOpcode, TCopyValue { final override string toString() { result = "CopyValue" } } + /** + * The `Opcode` for a `LoadInstruction`. + * + * See the `LoadInstruction` documentation for more details. + */ class Load extends CopyOpcode, OpcodeWithLoad, TLoad { final override string toString() { result = "Load" } } + /** + * The `Opcode` for a `StoreInstruction`. + * + * See the `StoreInstruction` documentation for more details. + */ class Store extends CopyOpcode, IndirectWriteOpcode, TStore { final override string toString() { result = "Store" } @@ -401,154 +580,344 @@ module Opcode { } } + /** + * The `Opcode` for an `AddInstruction`. + * + * See the `AddInstruction` documentation for more details. + */ class Add extends BinaryArithmeticOpcode, TAdd { final override string toString() { result = "Add" } } + /** + * The `Opcode` for a `SubInstruction`. + * + * See the `SubInstruction` documentation for more details. + */ class Sub extends BinaryArithmeticOpcode, TSub { final override string toString() { result = "Sub" } } + /** + * The `Opcode` for a `MulInstruction`. + * + * See the `MulInstruction` documentation for more details. + */ class Mul extends BinaryArithmeticOpcode, TMul { final override string toString() { result = "Mul" } } + /** + * The `Opcode` for a `DivInstruction`. + * + * See the `DivInstruction` documentation for more details. + */ class Div extends BinaryArithmeticOpcode, TDiv { final override string toString() { result = "Div" } } + /** + * The `Opcode` for a `RemInstruction`. + * + * See the `RemInstruction` documentation for more details. + */ class Rem extends BinaryArithmeticOpcode, TRem { final override string toString() { result = "Rem" } } + /** + * The `Opcode` for a `NegateInstruction`. + * + * See the `NegateInstruction` documentation for more details. + */ class Negate extends UnaryArithmeticOpcode, TNegate { final override string toString() { result = "Negate" } } + /** + * The `Opcode` for a `ShiftLeftInstruction`. + * + * See the `ShiftLeftInstruction` documentation for more details. + */ class ShiftLeft extends BinaryBitwiseOpcode, TShiftLeft { final override string toString() { result = "ShiftLeft" } } + /** + * The `Opcode` for a `ShiftRightInstruction`. + * + * See the `ShiftRightInstruction` documentation for more details. + */ class ShiftRight extends BinaryBitwiseOpcode, TShiftRight { final override string toString() { result = "ShiftRight" } } + /** + * The `Opcode` for a `BitAndInstruction`. + * + * See the `BitAndInstruction` documentation for more details. + */ class BitAnd extends BinaryBitwiseOpcode, TBitAnd { final override string toString() { result = "BitAnd" } } + /** + * The `Opcode` for a `BitOrInstruction`. + * + * See the `BitOrInstruction` documentation for more details. + */ class BitOr extends BinaryBitwiseOpcode, TBitOr { final override string toString() { result = "BitOr" } } + /** + * The `Opcode` for a `BitXorInstruction`. + * + * See the `BitXorInstruction` documentation for more details. + */ class BitXor extends BinaryBitwiseOpcode, TBitXor { final override string toString() { result = "BitXor" } } + /** + * The `Opcode` for a `BitComplementInstruction`. + * + * See the `BitComplementInstruction` documentation for more details. + */ class BitComplement extends UnaryBitwiseOpcode, TBitComplement { final override string toString() { result = "BitComplement" } } + /** + * The `Opcode` for a `LogicalNotInstruction`. + * + * See the `LogicalNotInstruction` documentation for more details. + */ class LogicalNot extends UnaryOpcode, TLogicalNot { final override string toString() { result = "LogicalNot" } } + /** + * The `Opcode` for a `CompareEQInstruction`. + * + * See the `CompareEQInstruction` documentation for more details. + */ class CompareEQ extends CompareOpcode, TCompareEQ { final override string toString() { result = "CompareEQ" } } + /** + * The `Opcode` for a `CompareNEInstruction`. + * + * See the `CompareNEInstruction` documentation for more details. + */ class CompareNE extends CompareOpcode, TCompareNE { final override string toString() { result = "CompareNE" } } + /** + * The `Opcode` for a `CompareLTInstruction`. + * + * See the `CompareLTInstruction` documentation for more details. + */ class CompareLT extends RelationalOpcode, TCompareLT { final override string toString() { result = "CompareLT" } } + /** + * The `Opcode` for a `CompareGTInstruction`. + * + * See the `CompareGTInstruction` documentation for more details. + */ class CompareGT extends RelationalOpcode, TCompareGT { final override string toString() { result = "CompareGT" } } + /** + * The `Opcode` for a `CompareLEInstruction`. + * + * See the `CompareLEInstruction` documentation for more details. + */ class CompareLE extends RelationalOpcode, TCompareLE { final override string toString() { result = "CompareLE" } } + /** + * The `Opcode` for a `CompareGEInstruction`. + * + * See the `CompareGEInstruction` documentation for more details. + */ class CompareGE extends RelationalOpcode, TCompareGE { final override string toString() { result = "CompareGE" } } + /** + * The `Opcode` for a `PointerAddInstruction`. + * + * See the `PointerAddInstruction` documentation for more details. + */ class PointerAdd extends PointerOffsetOpcode, TPointerAdd { final override string toString() { result = "PointerAdd" } } + /** + * The `Opcode` for a `PointerSubInstruction`. + * + * See the `PointerSubInstruction` documentation for more details. + */ class PointerSub extends PointerOffsetOpcode, TPointerSub { final override string toString() { result = "PointerSub" } } + /** + * The `Opcode` for a `PointerDiffInstruction`. + * + * See the `PointerDiffInstruction` documentation for more details. + */ class PointerDiff extends PointerArithmeticOpcode, TPointerDiff { final override string toString() { result = "PointerDiff" } } + /** + * The `Opcode` for a `ConvertInstruction`. + * + * See the `ConvertInstruction` documentation for more details. + */ class Convert extends UnaryOpcode, TConvert { final override string toString() { result = "Convert" } } + /** + * The `Opcode` for a `ConvertToNonVirtualBaseInstruction`. + * + * See the `ConvertToNonVirtualBaseInstruction` documentation for more details. + */ class ConvertToNonVirtualBase extends ConvertToBaseOpcode, TConvertToNonVirtualBase { final override string toString() { result = "ConvertToNonVirtualBase" } } + /** + * The `Opcode` for a `ConvertToVirtualBaseInstruction`. + * + * See the `ConvertToVirtualBaseInstruction` documentation for more details. + */ class ConvertToVirtualBase extends ConvertToBaseOpcode, TConvertToVirtualBase { final override string toString() { result = "ConvertToVirtualBase" } } + /** + * The `Opcode` for a `ConvertToDerivedInstruction`. + * + * See the `ConvertToDerivedInstruction` documentation for more details. + */ class ConvertToDerived extends UnaryOpcode, TConvertToDerived { final override string toString() { result = "ConvertToDerived" } } + /** + * The `Opcode` for a `CheckedConvertOrNullInstruction`. + * + * See the `CheckedConvertOrNullInstruction` documentation for more details. + */ class CheckedConvertOrNull extends UnaryOpcode, TCheckedConvertOrNull { final override string toString() { result = "CheckedConvertOrNull" } } + /** + * The `Opcode` for a `CheckedConvertOrThrowInstruction`. + * + * See the `CheckedConvertOrThrowInstruction` documentation for more details. + */ class CheckedConvertOrThrow extends UnaryOpcode, TCheckedConvertOrThrow { final override string toString() { result = "CheckedConvertOrThrow" } } - class DynamicCastToVoid extends UnaryOpcode, TDynamicCastToVoid { - final override string toString() { result = "DynamicCastToVoid" } + /** + * The `Opcode` for a `CompleteObjectAddressInstruction`. + * + * See the `CompleteObjectAddressInstruction` documentation for more details. + */ + class CompleteObjectAddress extends UnaryOpcode, TCompleteObjectAddress { + final override string toString() { result = "CompleteObjectAddress" } } + /** + * The `Opcode` for a `VariableAddressInstruction`. + * + * See the `VariableAddressInstruction` documentation for more details. + */ class VariableAddress extends Opcode, TVariableAddress { final override string toString() { result = "VariableAddress" } } + /** + * The `Opcode` for a `FieldAddressInstruction`. + * + * See the `FieldAddressInstruction` documentation for more details. + */ class FieldAddress extends UnaryOpcode, TFieldAddress { final override string toString() { result = "FieldAddress" } } + /** + * The `Opcode` for an `ElementsAddressInstruction`. + * + * See the `ElementsAddressInstruction` documentation for more details. + */ class ElementsAddress extends UnaryOpcode, TElementsAddress { final override string toString() { result = "ElementsAddress" } } + /** + * The `Opcode` for a `FunctionAddressInstruction`. + * + * See the `FunctionAddressInstruction` documentation for more details. + */ class FunctionAddress extends Opcode, TFunctionAddress { final override string toString() { result = "FunctionAddress" } } + /** + * The `Opcode` for a `ConstantInstruction`. + * + * See the `ConstantInstruction` documentation for more details. + */ class Constant extends Opcode, TConstant { final override string toString() { result = "Constant" } } + /** + * The `Opcode` for a `StringConstantInstruction`. + * + * See the `StringConstantInstruction` documentation for more details. + */ class StringConstant extends Opcode, TStringConstant { final override string toString() { result = "StringConstant" } } + /** + * The `Opcode` for a `ConditionalBranchInstruction`. + * + * See the `ConditionalBranchInstruction` documentation for more details. + */ class ConditionalBranch extends OpcodeWithCondition, TConditionalBranch { final override string toString() { result = "ConditionalBranch" } } + /** + * The `Opcode` for a `SwitchInstruction`. + * + * See the `SwitchInstruction` documentation for more details. + */ class Switch extends OpcodeWithCondition, TSwitch { final override string toString() { result = "Switch" } } + /** + * The `Opcode` for a `CallInstruction`. + * + * See the `CallInstruction` documentation for more details. + */ class Call extends Opcode, TCall { final override string toString() { result = "Call" } @@ -557,32 +926,67 @@ module Opcode { } } + /** + * The `Opcode` for a `CatchByTypeInstruction`. + * + * See the `CatchByTypeInstruction` documentation for more details. + */ class CatchByType extends CatchOpcode, TCatchByType { final override string toString() { result = "CatchByType" } } + /** + * The `Opcode` for a `CatchAnyInstruction`. + * + * See the `CatchAnyInstruction` documentation for more details. + */ class CatchAny extends CatchOpcode, TCatchAny { final override string toString() { result = "CatchAny" } } + /** + * The `Opcode` for a `ThrowValueInstruction`. + * + * See the `ThrowValueInstruction` documentation for more details. + */ class ThrowValue extends ThrowOpcode, OpcodeWithLoad, TThrowValue { final override string toString() { result = "ThrowValue" } } + /** + * The `Opcode` for a `ReThrowInstruction`. + * + * See the `ReThrowInstruction` documentation for more details. + */ class ReThrow extends ThrowOpcode, TReThrow { final override string toString() { result = "ReThrow" } } + /** + * The `Opcode` for an `UnwindInstruction`. + * + * See the `UnwindInstruction` documentation for more details. + */ class Unwind extends Opcode, TUnwind { final override string toString() { result = "Unwind" } } + /** + * The `Opcode` for an `AliasedDefinitionInstruction`. + * + * See the `AliasedDefinitionInstruction` documentation for more details. + */ class AliasedDefinition extends Opcode, TAliasedDefinition { final override string toString() { result = "AliasedDefinition" } final override MemoryAccessKind getWriteMemoryAccess() { result instanceof EscapedMemoryAccess } } + /** + * The `Opcode` for an `InitializeNonLocalInstruction`. + * + * See the `InitializeNonLocalInstruction` documentation for more details. + */ class InitializeNonLocal extends Opcode, TInitializeNonLocal { final override string toString() { result = "InitializeNonLocal" } @@ -591,6 +995,11 @@ module Opcode { } } + /** + * The `Opcode` for an `AliasedUseInstruction`. + * + * See the `AliasedUseInstruction` documentation for more details. + */ class AliasedUse extends Opcode, TAliasedUse { final override string toString() { result = "AliasedUse" } @@ -601,92 +1010,187 @@ module Opcode { } } + /** + * The `Opcode` for a `PhiInstruction`. + * + * See the `PhiInstruction` documentation for more details. + */ class Phi extends Opcode, TPhi { final override string toString() { result = "Phi" } final override MemoryAccessKind getWriteMemoryAccess() { result instanceof PhiMemoryAccess } } + /** + * The `Opcode` for a `BuiltInInstruction`. + * + * See the `BuiltInInstruction` documentation for more details. + */ class BuiltIn extends BuiltInOperationOpcode, TBuiltIn { final override string toString() { result = "BuiltIn" } } + /** + * The `Opcode` for a `VarArgsStartInstruction`. + * + * See the `VarArgsStartInstruction` documentation for more details. + */ class VarArgsStart extends UnaryOpcode, TVarArgsStart { final override string toString() { result = "VarArgsStart" } } + /** + * The `Opcode` for a `VarArgsEndInstruction`. + * + * See the `VarArgsEndInstruction` documentation for more details. + */ class VarArgsEnd extends UnaryOpcode, TVarArgsEnd { final override string toString() { result = "VarArgsEnd" } } + /** + * The `Opcode` for a `VarArgInstruction`. + * + * See the `VarArgInstruction` documentation for more details. + */ class VarArg extends UnaryOpcode, TVarArg { final override string toString() { result = "VarArg" } } + /** + * The `Opcode` for a `NextVarArgInstruction`. + * + * See the `NextVarArgInstruction` documentation for more details. + */ class NextVarArg extends UnaryOpcode, TNextVarArg { final override string toString() { result = "NextVarArg" } } + /** + * The `Opcode` for a `CallSideEffectInstruction`. + * + * See the `CallSideEffectInstruction` documentation for more details. + */ class CallSideEffect extends WriteSideEffectOpcode, EscapedWriteOpcode, MayWriteOpcode, ReadSideEffectOpcode, EscapedReadOpcode, MayReadOpcode, TCallSideEffect { final override string toString() { result = "CallSideEffect" } } + /** + * The `Opcode` for a `CallReadSideEffectInstruction`. + * + * See the `CallReadSideEffectInstruction` documentation for more details. + */ class CallReadSideEffect extends ReadSideEffectOpcode, EscapedReadOpcode, MayReadOpcode, TCallReadSideEffect { final override string toString() { result = "CallReadSideEffect" } } + /** + * The `Opcode` for an `IndirectReadSideEffectInstruction`. + * + * See the `IndirectReadSideEffectInstruction` documentation for more details. + */ class IndirectReadSideEffect extends ReadSideEffectOpcode, IndirectReadOpcode, TIndirectReadSideEffect { final override string toString() { result = "IndirectReadSideEffect" } } + /** + * The `Opcode` for an `IndirectMustWriteSideEffectInstruction`. + * + * See the `IndirectMustWriteSideEffectInstruction` documentation for more details. + */ class IndirectMustWriteSideEffect extends WriteSideEffectOpcode, IndirectWriteOpcode, TIndirectMustWriteSideEffect { final override string toString() { result = "IndirectMustWriteSideEffect" } } + /** + * The `Opcode` for an `IndirectMayWriteSideEffectInstruction`. + * + * See the `IndirectMayWriteSideEffectInstruction` documentation for more details. + */ class IndirectMayWriteSideEffect extends WriteSideEffectOpcode, IndirectWriteOpcode, MayWriteOpcode, TIndirectMayWriteSideEffect { final override string toString() { result = "IndirectMayWriteSideEffect" } } + /** + * The `Opcode` for a `BufferReadSideEffectInstruction`. + * + * See the `BufferReadSideEffectInstruction` documentation for more details. + */ class BufferReadSideEffect extends ReadSideEffectOpcode, UnsizedBufferReadOpcode, TBufferReadSideEffect { final override string toString() { result = "BufferReadSideEffect" } } + /** + * The `Opcode` for a `BufferMustWriteSideEffectInstruction`. + * + * See the `BufferMustWriteSideEffectInstruction` documentation for more details. + */ class BufferMustWriteSideEffect extends WriteSideEffectOpcode, UnsizedBufferWriteOpcode, TBufferMustWriteSideEffect { final override string toString() { result = "BufferMustWriteSideEffect" } } + /** + * The `Opcode` for a `BufferMayWriteSideEffectInstruction`. + * + * See the `BufferMayWriteSideEffectInstruction` documentation for more details. + */ class BufferMayWriteSideEffect extends WriteSideEffectOpcode, UnsizedBufferWriteOpcode, MayWriteOpcode, TBufferMayWriteSideEffect { final override string toString() { result = "BufferMayWriteSideEffect" } } + /** + * The `Opcode` for a `SizedBufferReadSideEffectInstruction`. + * + * See the `SizedBufferReadSideEffectInstruction` documentation for more details. + */ class SizedBufferReadSideEffect extends ReadSideEffectOpcode, SizedBufferReadOpcode, TSizedBufferReadSideEffect { final override string toString() { result = "SizedBufferReadSideEffect" } } + /** + * The `Opcode` for a `SizedBufferMustWriteSideEffectInstruction`. + * + * See the `SizedBufferMustWriteSideEffectInstruction` documentation for more details. + */ class SizedBufferMustWriteSideEffect extends WriteSideEffectOpcode, SizedBufferWriteOpcode, TSizedBufferMustWriteSideEffect { final override string toString() { result = "SizedBufferMustWriteSideEffect" } } + /** + * The `Opcode` for a `SizedBufferMayWriteSideEffectInstruction`. + * + * See the `SizedBufferMayWriteSideEffectInstruction` documentation for more details. + */ class SizedBufferMayWriteSideEffect extends WriteSideEffectOpcode, SizedBufferWriteOpcode, MayWriteOpcode, TSizedBufferMayWriteSideEffect { final override string toString() { result = "SizedBufferMayWriteSideEffect" } } + /** + * The `Opcode` for an `InitializeDynamicAllocationInstruction`. + * + * See the `InitializeDynamicAllocationInstruction` documentation for more details. + */ class InitializeDynamicAllocation extends SideEffectOpcode, EntireAllocationWriteOpcode, TInitializeDynamicAllocation { final override string toString() { result = "InitializeDynamicAllocation" } } + /** + * The `Opcode` for a `ChiInstruction`. + * + * See the `ChiInstruction` documentation for more details. + */ class Chi extends Opcode, TChi { final override string toString() { result = "Chi" } @@ -701,6 +1205,11 @@ module Opcode { } } + /** + * The `Opcode` for an `InlineAsmInstruction`. + * + * See the `InlineAsmInstruction` documentation for more details. + */ class InlineAsm extends Opcode, EscapedWriteOpcode, MayWriteOpcode, EscapedReadOpcode, MayReadOpcode, TInlineAsm { final override string toString() { result = "InlineAsm" } @@ -710,10 +1219,20 @@ module Opcode { } } + /** + * The `Opcode` for an `UnreachedInstruction`. + * + * See the `UnreachedInstruction` documentation for more details. + */ class Unreached extends Opcode, TUnreached { final override string toString() { result = "Unreached" } } + /** + * The `Opcode` for a `NewObjInstruction`. + * + * See the `NewObjInstruction` documentation for more details. + */ class NewObj extends Opcode, TNewObj { final override string toString() { result = "NewObj" } } diff --git a/csharp/ql/src/experimental/ir/implementation/TempVariableTag.qll b/csharp/ql/src/experimental/ir/implementation/TempVariableTag.qll index a0c0ca67530..5f230de560d 100644 --- a/csharp/ql/src/experimental/ir/implementation/TempVariableTag.qll +++ b/csharp/ql/src/experimental/ir/implementation/TempVariableTag.qll @@ -12,5 +12,6 @@ private import Imports::TempVariableTag * computed on each branch. The set of possible `TempVariableTag`s is language-dependent. */ class TempVariableTag extends TTempVariableTag { + /** Gets a textual representation of this tag. */ string toString() { result = getTempVariableTagId(this) } } diff --git a/csharp/ql/src/experimental/ir/implementation/raw/IR.qll b/csharp/ql/src/experimental/ir/implementation/raw/IR.qll index badd48552a5..3fa0f1b78be 100644 --- a/csharp/ql/src/experimental/ir/implementation/raw/IR.qll +++ b/csharp/ql/src/experimental/ir/implementation/raw/IR.qll @@ -1,3 +1,47 @@ +/** + * Provides classes that describe the Intermediate Representation (IR) of the program. + * + * The IR is a representation of the semantics of the program, with very little dependence on the + * syntax that was used to write the program. For example, in C++, the statements `i += 1;`, `i++`, + * and `++i` all have the same semantic effect, but appear in the AST as three different types of + * `Expr` node. In the IR, all three statements are broken down into a sequence of fundamental + * operations similar to: + * + * ``` + * r1(int*) = VariableAddress[i] // Compute the address of variable `i` + * r2(int) = Load &:r1, m0 // Load the value of `i` + * r3(int) = Constant[1] // An integer constant with the value `1` + * r4(int) = Add r2, r3 // Add `1` to the value of `i` + * r5(int) = Store &r1, r4 // Store the new value back into the variable `i` + * ``` + * + * This allows IR-based analysis to focus on the fundamental operations, rather than having to be + * concerned with the various ways of expressing those operations in source code. + * + * The key classes in the IR are: + * + * - `IRFunction` - Contains the IR for an entire function definition, including all of that + * function's `Instruction`s, `IRBlock`s, and `IRVariables`. + * - `Instruction` - A single operation in the IR. An instruction specifies the operation to be + * performed, the operands that produce the inputs to that operation, and the type of the result + * of the operation. Control flows from an `Instruction` to one of a set of successor + * `Instruction`s. + * - `Operand` - An input value of an `Instruction`. All inputs of an `Instruction` are explicitly + * represented as `Operand`s, even if the input was implicit in the source code. An `Operand` has + * a link to the `Instruction` that consumes its value (its "use") and a link to the `Instruction` + * that produces its value (its "definition"). + * - `IRVariable` - A variable accessed by the IR for a particular function. An `IRVariable` is + * created for each variable directly accessed by the function. In addition, `IRVariable`s are + * created to represent certain temporary storage locations that do not have explicitly declared + * variables in the source code, such as the return value of the function. + * - `IRBlock` - A "basic block" in the control flow graph of a function. An `IRBlock` contains a + * sequence of instructions such that control flow can only enter the block at the first + * instruction, and can only leave the block from the last instruction. + * - `IRType` - The type of a value accessed in the IR. Unlike the `Type` class in the AST, `IRType` + * is language-neutral. For example, in C++, `unsigned int`, `char32_t`, and `wchar_t` might all + * be represented as the `IRType` `uint4`, a four-byte unsigned integer. + */ + import IRFunction import Instruction import IRBlock @@ -11,11 +55,12 @@ import Imports::MemoryAccessKind private newtype TIRPropertyProvider = MkIRPropertyProvider() /** - * Class that provides additional properties to be dumped for IR instructions and blocks when using + * A class that provides additional properties to be dumped for IR instructions and blocks when using * the PrintIR module. Libraries that compute additional facts about IR elements can extend the * single instance of this class to specify the additional properties computed by the library. */ class IRPropertyProvider extends TIRPropertyProvider { + /** Gets a textual representation of this element. */ string toString() { result = "IRPropertyProvider" } /** diff --git a/csharp/ql/src/experimental/ir/implementation/raw/IRBlock.qll b/csharp/ql/src/experimental/ir/implementation/raw/IRBlock.qll index 94ef73b2769..f0ec0683bd6 100644 --- a/csharp/ql/src/experimental/ir/implementation/raw/IRBlock.qll +++ b/csharp/ql/src/experimental/ir/implementation/raw/IRBlock.qll @@ -1,3 +1,7 @@ +/** + * Provides classes describing basic blocks in the IR of a function. + */ + private import internal.IRInternal import Instruction private import internal.IRBlockImports as Imports @@ -16,15 +20,23 @@ private import Cached * Most consumers should use the class `IRBlock`. */ class IRBlockBase extends TIRBlock { + /** Gets a textual representation of this block. */ final string toString() { result = getFirstInstruction(this).toString() } + /** Gets the source location of the first non-`Phi` instruction in this block. */ final Language::Location getLocation() { result = getFirstInstruction().getLocation() } + /** + * Gets a string that uniquely identifies this block within its enclosing function. + * + * This predicate is used by debugging and printing code only. + */ final string getUniqueId() { result = getFirstInstruction(this).getUniqueId() } /** - * Gets the zero-based index of the block within its function. This is used - * by debugging and printing code only. + * Gets the zero-based index of the block within its function. + * + * This predicate is used by debugging and printing code only. */ int getDisplayIndex() { exists(IRConfiguration::IRConfiguration config | @@ -42,27 +54,51 @@ class IRBlockBase extends TIRBlock { ) } + /** + * Gets the `index`th non-`Phi` instruction in this block. + */ final Instruction getInstruction(int index) { result = getInstruction(this, index) } + /** + * Get the `Phi` instructions that appear at the start of this block. + */ final PhiInstruction getAPhiInstruction() { Construction::getPhiInstructionBlockStart(result) = getFirstInstruction() } + /** + * Get the instructions in this block, including `Phi` instructions. + */ final Instruction getAnInstruction() { result = getInstruction(_) or result = getAPhiInstruction() } + /** + * Gets the first non-`Phi` instruction in this block. + */ final Instruction getFirstInstruction() { result = getFirstInstruction(this) } + /** + * Gets the last instruction in this block. + */ final Instruction getLastInstruction() { result = getInstruction(getInstructionCount() - 1) } + /** + * Gets the number of non-`Phi` instructions in this block. + */ final int getInstructionCount() { result = getInstructionCount(this) } + /** + * Gets the `IRFunction` that contains this block. + */ final IRFunction getEnclosingIRFunction() { result = getFirstInstruction(this).getEnclosingIRFunction() } + /** + * Gets the `Function` that contains this block. + */ final Language::Function getEnclosingFunction() { result = getFirstInstruction(this).getEnclosingFunction() } @@ -74,20 +110,57 @@ class IRBlockBase extends TIRBlock { * instruction of another block. */ class IRBlock extends IRBlockBase { + /** + * Gets the blocks to which control flows directly from this block. + */ final IRBlock getASuccessor() { blockSuccessor(this, result) } + /** + * Gets the blocks from which control flows directly to this block. + */ final IRBlock getAPredecessor() { blockSuccessor(result, this) } + /** + * Gets the block to which control flows directly from this block along an edge of kind `kind`. + */ final IRBlock getSuccessor(EdgeKind kind) { blockSuccessor(this, result, kind) } + /** + * Gets the block to which control flows directly from this block along a back edge of kind + * `kind`. + */ final IRBlock getBackEdgeSuccessor(EdgeKind kind) { backEdgeSuccessor(this, result, kind) } + /** + * Holds if this block immediately dominates `block`. + * + * Block `A` immediate dominates block `B` if block `A` strictly dominates block `B` and block `B` + * is a direct successor of block `A`. + */ final predicate immediatelyDominates(IRBlock block) { blockImmediatelyDominates(this, block) } + /** + * Holds if this block strictly dominates `block`. + * + * Block `A` strictly dominates block `B` if block `A` dominates block `B` and blocks `A` and `B` + * are not the same block. + */ final predicate strictlyDominates(IRBlock block) { blockImmediatelyDominates+(this, block) } + /** + * Holds if this block dominates `block`. + * + * Block `A` dominates block `B` if any control flow path from the entry block of the function to + * block `B` must pass through block `A`. A block always dominates itself. + */ final predicate dominates(IRBlock block) { strictlyDominates(block) or this = block } + /** + * Gets the set of blocks on the dominance frontier of this block. + * + * The dominance frontier of block `A` is the set of blocks `B` such that block `A` does not + * dominate block `B`, but block `A` does dominate an immediate predecessor of block `B`. + */ pragma[noinline] final IRBlock dominanceFrontier() { dominates(result.getAPredecessor()) and @@ -95,7 +168,7 @@ class IRBlock extends IRBlockBase { } /** - * Holds if this block is reachable from the entry point of its function + * Holds if this block is reachable from the entry block of its function. */ final predicate isReachableFromFunctionEntry() { this = getEnclosingIRFunction().getEntryBlock() or @@ -210,4 +283,4 @@ private module Cached { idominance(isEntryBlock/1, blockSuccessor/2)(_, dominator, block) } -Instruction getFirstInstruction(TIRBlock block) { block = MkIRBlock(result) } +private Instruction getFirstInstruction(TIRBlock block) { block = MkIRBlock(result) } diff --git a/csharp/ql/src/experimental/ir/implementation/raw/IRFunction.qll b/csharp/ql/src/experimental/ir/implementation/raw/IRFunction.qll index 6b2d32af48c..5968e58f90b 100644 --- a/csharp/ql/src/experimental/ir/implementation/raw/IRFunction.qll +++ b/csharp/ql/src/experimental/ir/implementation/raw/IRFunction.qll @@ -1,3 +1,8 @@ +/** + * Provides the class `IRFunction`, which represents the Intermediate Representation for the + * definition of a function. + */ + private import internal.IRInternal private import internal.IRFunctionImports as Imports import Imports::IRFunctionBase diff --git a/csharp/ql/src/experimental/ir/implementation/raw/IRVariable.qll b/csharp/ql/src/experimental/ir/implementation/raw/IRVariable.qll index a01bd2dc79a..d317421c242 100644 --- a/csharp/ql/src/experimental/ir/implementation/raw/IRVariable.qll +++ b/csharp/ql/src/experimental/ir/implementation/raw/IRVariable.qll @@ -1,3 +1,7 @@ +/** + * Provides classes that represent variables accessed by the IR. + */ + private import internal.IRInternal import IRFunction private import internal.IRVariableImports as Imports @@ -7,15 +11,11 @@ private import Imports::TTempVariableTag private import Imports::TIRVariable private import Imports::IRType -IRUserVariable getIRUserVariable(Language::Function func, Language::Variable var) { - result.getVariable() = var and - result.getEnclosingFunction() = func -} - /** - * A variable referenced by the IR for a function. The variable may be a user-declared variable - * (`IRUserVariable`) or a temporary variable generated by the AST-to-IR translation - * (`IRTempVariable`). + * A variable referenced by the IR for a function. + * + * The variable may be a user-declared variable (`IRUserVariable`) or a temporary variable generated + * by the AST-to-IR translation (`IRTempVariable`). */ class IRVariable extends TIRVariable { Language::Function func; @@ -27,6 +27,7 @@ class IRVariable extends TIRVariable { this = TIRDynamicInitializationFlag(func, _, _) } + /** Gets a textual representation of this element. */ string toString() { none() } /** @@ -162,20 +163,26 @@ class IRGeneratedVariable extends IRVariable { override string getUniqueId() { none() } + /** + * Gets a string containing the source code location of the AST that generated this variable. + * + * This is used by debugging and printing code only. + */ final string getLocationString() { result = ast.getLocation().getStartLine().toString() + ":" + ast.getLocation().getStartColumn().toString() } + /** + * Gets the string that is combined with the location of the variable to generate the string + * representation of this variable. + * + * This is used by debugging and printing code only. + */ string getBaseString() { none() } } -IRTempVariable getIRTempVariable(Language::AST ast, TempVariableTag tag) { - result.getAST() = ast and - result.getTag() = tag -} - /** * A temporary variable introduced by IR construction. The most common examples are the variable * generated to hold the return value of a function, or the variable generated to hold the result of @@ -190,6 +197,10 @@ class IRTempVariable extends IRGeneratedVariable, IRAutomaticVariable, TIRTempVa result = "Temp: " + Construction::getTempVariableUniqueId(this) } + /** + * Gets the "tag" object that differentiates this temporary variable from other temporary + * variables generated for the same AST. + */ final TempVariableTag getTag() { result = tag } override string getBaseString() { result = "#temp" } @@ -253,6 +264,9 @@ class IRStringLiteral extends IRGeneratedVariable, TIRStringLiteral { final override string getBaseString() { result = "#string" } + /** + * Gets the AST of the string literal represented by this `IRStringLiteral`. + */ final Language::StringLiteral getLiteral() { result = literal } } @@ -270,6 +284,9 @@ class IRDynamicInitializationFlag extends IRGeneratedVariable, TIRDynamicInitial final override string toString() { result = var.toString() + "#init" } + /** + * Gets variable whose initialization is guarded by this flag. + */ final Language::Variable getVariable() { result = var } final override string getUniqueId() { diff --git a/csharp/ql/src/experimental/ir/implementation/raw/Instruction.qll b/csharp/ql/src/experimental/ir/implementation/raw/Instruction.qll index 79516f6780d..0d2ad2d3bea 100644 --- a/csharp/ql/src/experimental/ir/implementation/raw/Instruction.qll +++ b/csharp/ql/src/experimental/ir/implementation/raw/Instruction.qll @@ -215,6 +215,15 @@ class Instruction extends Construction::TStageInstruction { result = Raw::getInstructionUnconvertedResultExpression(this) } + /** + * Gets the language-specific type of the result produced by this instruction. + * + * Most consumers of the IR should use `getResultIRType()` instead. `getResultIRType()` uses a + * less complex, language-neutral type system in which all semantically equivalent types share the + * same `IRType` instance. For example, in C++, four different `Instruction`s might have three + * different values for `getResultLanguageType()`: `unsigned int`, `char32_t`, and `wchar_t`, + * whereas all four instructions would have the same value for `getResultIRType()`, `uint4`. + */ final Language::LanguageType getResultLanguageType() { result = Construction::getInstructionResultType(this) } @@ -537,6 +546,18 @@ class VariableAddressInstruction extends VariableInstruction { VariableAddressInstruction() { getOpcode() instanceof Opcode::VariableAddress } } +/** + * An instruction that returns the address of a function. + * + * This instruction returns the address of a function, including non-member functions, static member + * functions, and non-static member functions. + * + * The result has an `IRFunctionAddress` type. + */ +class FunctionAddressInstruction extends FunctionInstruction { + FunctionAddressInstruction() { getOpcode() instanceof Opcode::FunctionAddress } +} + /** * An instruction that initializes a parameter of the enclosing function with the value of the * corresponding argument passed by the caller. @@ -553,6 +574,16 @@ class InitializeParameterInstruction extends VariableInstruction { final Language::Parameter getParameter() { result = var.(IRUserVariable).getVariable() } } +/** + * An instruction that initializes all memory that existed before this function was called. + * + * This instruction provides a definition for memory that, because it was actually allocated and + * initialized elsewhere, would not otherwise have a definition in this function. + */ +class InitializeNonLocalInstruction extends Instruction { + InitializeNonLocalInstruction() { getOpcode() instanceof Opcode::InitializeNonLocal } +} + /** * An instruction that initializes the memory pointed to by a parameter of the enclosing function * with the value of that memory on entry to the function. @@ -590,6 +621,25 @@ class FieldAddressInstruction extends FieldInstruction { final Instruction getObjectAddress() { result = getObjectAddressOperand().getDef() } } +/** + * An instruction that computes the address of the first element of a managed array. + * + * This instruction is used for element access to C# arrays. + */ +class ElementsAddressInstruction extends UnaryInstruction { + ElementsAddressInstruction() { getOpcode() instanceof Opcode::ElementsAddress } + + /** + * Gets the operand that provides the address of the array object. + */ + final UnaryOperand getArrayObjectAddressOperand() { result = getAnOperand() } + + /** + * Gets the instruction whose result provides the address of the array object. + */ + final Instruction getArrayObjectAddress() { result = getArrayObjectAddressOperand().getDef() } +} + /** * An instruction that produces a well-defined but unknown result and has * unknown side effects, including side effects that are not conservatively @@ -1137,8 +1187,14 @@ class PointerDiffInstruction extends PointerArithmeticInstruction { class UnaryInstruction extends Instruction { UnaryInstruction() { getOpcode() instanceof UnaryOpcode } + /** + * Gets the sole operand of this instruction. + */ final UnaryOperand getUnaryOperand() { result = getAnOperand() } + /** + * Gets the instruction whose result provides the sole operand of this instruction. + */ final Instruction getUnary() { result = getUnaryOperand().getDef() } } @@ -1177,6 +1233,19 @@ class CheckedConvertOrThrowInstruction extends UnaryInstruction { CheckedConvertOrThrowInstruction() { getOpcode() instanceof Opcode::CheckedConvertOrThrow } } +/** + * An instruction that returns the address of the complete object that contains the subobject + * pointed to by its operand. + * + * If the operand holds a null address, the result is a null address. + * + * This instruction is used to represent `dyanmic_cast` in C++, which returns the pointer to + * the most-derived object. + */ +class CompleteObjectAddressInstruction extends UnaryInstruction { + CompleteObjectAddressInstruction() { getOpcode() instanceof Opcode::CompleteObjectAddress } +} + /** * An instruction that converts the address of an object to the address of a different subobject of * the same object, without any type checking at runtime. @@ -1453,7 +1522,7 @@ class CallInstruction extends Instruction { * Gets the `Function` that the call targets, if this is statically known. */ final Language::Function getStaticCallTarget() { - result = getCallTarget().(FunctionInstruction).getFunctionSymbol() + result = getCallTarget().(FunctionAddressInstruction).getFunctionSymbol() } /** @@ -1516,9 +1585,10 @@ class CallSideEffectInstruction extends SideEffectInstruction { /** * An instruction representing the side effect of a function call on any memory - * that might be read by that call. This instruction is emitted instead of - * `CallSideEffectInstruction` when it's certain that the call target cannot - * write to escaped memory. + * that might be read by that call. + * + * This instruction is emitted instead of `CallSideEffectInstruction` when it is certain that the + * call target cannot write to escaped memory. */ class CallReadSideEffectInstruction extends SideEffectInstruction { CallReadSideEffectInstruction() { getOpcode() instanceof Opcode::CallReadSideEffect } @@ -1566,7 +1636,15 @@ class SizedBufferReadSideEffectInstruction extends ReadSideEffectInstruction { getOpcode() instanceof Opcode::SizedBufferReadSideEffect } - Instruction getSizeDef() { result = getAnOperand().(BufferSizeOperand).getDef() } + /** + * Gets the operand that holds the number of bytes read from the buffer. + */ + final BufferSizeOperand getBufferSizeOperand() { result = getAnOperand() } + + /** + * Gets the instruction whose result provides the number of bytes read from the buffer. + */ + final Instruction getBufferSize() { result = getBufferSizeOperand().getDef() } } /** @@ -1576,7 +1654,15 @@ class SizedBufferReadSideEffectInstruction extends ReadSideEffectInstruction { class WriteSideEffectInstruction extends SideEffectInstruction, IndexedInstruction { WriteSideEffectInstruction() { getOpcode() instanceof WriteSideEffectOpcode } - Instruction getArgumentDef() { result = getAnOperand().(AddressOperand).getDef() } + /** + * Get the operand that holds the address of the memory to be written. + */ + final AddressOperand getDestinationAddressOperand() { result = getAnOperand() } + + /** + * Gets the instruction whose result provides the address of the memory to be written. + */ + Instruction getDestinationAddress() { result = getDestinationAddressOperand().getDef() } } /** @@ -1607,11 +1693,20 @@ class SizedBufferMustWriteSideEffectInstruction extends WriteSideEffectInstructi getOpcode() instanceof Opcode::SizedBufferMustWriteSideEffect } - Instruction getSizeDef() { result = getAnOperand().(BufferSizeOperand).getDef() } + /** + * Gets the operand that holds the number of bytes written to the buffer. + */ + final BufferSizeOperand getBufferSizeOperand() { result = getAnOperand() } + + /** + * Gets the instruction whose result provides the number of bytes written to the buffer. + */ + final Instruction getBufferSize() { result = getBufferSizeOperand().getDef() } } /** * An instruction representing the potential write of an indirect parameter within a function call. + * * Unlike `IndirectWriteSideEffectInstruction`, the location might not be completely overwritten. * written. */ @@ -1623,6 +1718,7 @@ class IndirectMayWriteSideEffectInstruction extends WriteSideEffectInstruction { /** * An instruction representing the write of an indirect buffer parameter within a function call. + * * Unlike `BufferWriteSideEffectInstruction`, the buffer might not be completely overwritten. */ class BufferMayWriteSideEffectInstruction extends WriteSideEffectInstruction { @@ -1631,6 +1727,7 @@ class BufferMayWriteSideEffectInstruction extends WriteSideEffectInstruction { /** * An instruction representing the write of an indirect buffer parameter within a function call. + * * Unlike `BufferWriteSideEffectInstruction`, the buffer might not be completely overwritten. */ class SizedBufferMayWriteSideEffectInstruction extends WriteSideEffectInstruction { @@ -1638,11 +1735,19 @@ class SizedBufferMayWriteSideEffectInstruction extends WriteSideEffectInstructio getOpcode() instanceof Opcode::SizedBufferMayWriteSideEffect } - Instruction getSizeDef() { result = getAnOperand().(BufferSizeOperand).getDef() } + /** + * Gets the operand that holds the number of bytes written to the buffer. + */ + final BufferSizeOperand getBufferSizeOperand() { result = getAnOperand() } + + /** + * Gets the instruction whose result provides the number of bytes written to the buffer. + */ + final Instruction getBufferSize() { result = getBufferSizeOperand().getDef() } } /** - * An instruction representing the initial value of newly allocated memory, e.g. the result of a + * An instruction representing the initial value of newly allocated memory, such as the result of a * call to `malloc`. */ class InitializeDynamicAllocationInstruction extends SideEffectInstruction { @@ -1860,17 +1965,20 @@ class ChiInstruction extends Instruction { } /** - * An instruction representing unreachable code. Inserted in place of the original target - * instruction of a `ConditionalBranch` or `Switch` instruction where that particular edge is - * infeasible. + * An instruction representing unreachable code. + * + * This instruction is inserted in place of the original target instruction of a `ConditionalBranch` + * or `Switch` instruction where that particular edge is infeasible. */ class UnreachedInstruction extends Instruction { UnreachedInstruction() { getOpcode() instanceof Opcode::Unreached } } /** - * An instruction representing a built-in operation. This is used to represent - * operations such as access to variable argument lists. + * An instruction representing a built-in operation. + * + * This is used to represent a variety of intrinsic operations provided by the compiler + * implementation, such as vector arithmetic. */ class BuiltInOperationInstruction extends Instruction { Language::BuiltInOperation operation; @@ -1880,6 +1988,10 @@ class BuiltInOperationInstruction extends Instruction { operation = Raw::getInstructionBuiltInOperation(this) } + /** + * Gets the language-specific `BuildInOperation` object that specifies the operation that is + * performed by this instruction. + */ final Language::BuiltInOperation getBuiltInOperation() { result = operation } } @@ -1892,3 +2004,59 @@ class BuiltInInstruction extends BuiltInOperationInstruction { final override string getImmediateString() { result = getBuiltInOperation().toString() } } + +/** + * An instruction that returns a `va_list` to access the arguments passed to the `...` parameter. + * + * The operand specifies the address of the `IREllipsisVariable` used to represent the `...` + * parameter. The result is a `va_list` that initially refers to the first argument that was passed + * to the `...` parameter. + */ +class VarArgsStartInstruction extends UnaryInstruction { + VarArgsStartInstruction() { getOpcode() instanceof Opcode::VarArgsStart } +} + +/** + * An instruction that cleans up a `va_list` after it is no longer in use. + * + * The operand specifies the address of the `va_list` to clean up. This instruction does not return + * a result. + */ +class VarArgsEndInstruction extends UnaryInstruction { + VarArgsEndInstruction() { getOpcode() instanceof Opcode::VarArgsEnd } +} + +/** + * An instruction that returns the address of the argument currently pointed to by a `va_list`. + * + * The operand is the `va_list` that points to the argument. The result is the address of the + * argument. + */ +class VarArgInstruction extends UnaryInstruction { + VarArgInstruction() { getOpcode() instanceof Opcode::VarArg } +} + +/** + * An instruction that modifies a `va_list` to point to the next argument that was passed to the + * `...` parameter. + * + * The operand is the current `va_list`. The result is an updated `va_list` that points to the next + * argument of the `...` parameter. + */ +class NextVarArgInstruction extends UnaryInstruction { + NextVarArgInstruction() { getOpcode() instanceof Opcode::NextVarArg } +} + +/** + * An instruction that allocates a new object on the managed heap. + * + * This instruction is used to represent the allocation of a new object in C# using the `new` + * expression. This instruction does not invoke a constructor for the object. Instead, there will be + * a subsequent `Call` instruction to invoke the appropriate constructor directory, passing the + * result of the `NewObj` as the `this` argument. + * + * The result is the address of the newly allocated object. + */ +class NewObjInstruction extends Instruction { + NewObjInstruction() { getOpcode() instanceof Opcode::NewObj } +} diff --git a/csharp/ql/src/experimental/ir/implementation/raw/Operand.qll b/csharp/ql/src/experimental/ir/implementation/raw/Operand.qll index f82704094c8..468687b0aca 100644 --- a/csharp/ql/src/experimental/ir/implementation/raw/Operand.qll +++ b/csharp/ql/src/experimental/ir/implementation/raw/Operand.qll @@ -1,3 +1,7 @@ +/** + * Provides classes that represent the input values of IR instructions. + */ + private import internal.IRInternal private import Instruction private import IRBlock @@ -78,10 +82,17 @@ private PhiOperandBase phiOperand( * A source operand of an `Instruction`. The operand represents a value consumed by the instruction. */ class Operand extends TOperand { + /** Gets a textual representation of this element. */ string toString() { result = "Operand" } + /** + * Gets the location of the source code for this operand. + */ final Language::Location getLocation() { result = getUse().getLocation() } + /** + * Gets the function that contains this operand. + */ final IRFunction getEnclosingIRFunction() { result = getUse().getEnclosingIRFunction() } /** @@ -270,6 +281,9 @@ class NonPhiOperand extends Operand { final override int getDumpSortOrder() { result = tag.getSortOrder() } + /** + * Gets the `OperandTag` that specifies how this operand is used by its `Instruction`. + */ final OperandTag getOperandTag() { result = tag } } @@ -292,6 +306,9 @@ class RegisterOperand extends NonPhiOperand, RegisterOperandBase { } } +/** + * A memory operand other than the operand of a `Phi` instruction. + */ class NonPhiMemoryOperand extends NonPhiOperand, MemoryOperand, NonPhiMemoryOperandBase { override MemoryOperandTag tag; @@ -313,6 +330,9 @@ class NonPhiMemoryOperand extends NonPhiOperand, MemoryOperand, NonPhiMemoryOper } } +/** + * A memory operand whose type may be different from the type of the result of its definition. + */ class TypedOperand extends NonPhiMemoryOperand { override TypedOperandTag tag; @@ -416,6 +436,9 @@ class PositionalArgumentOperand extends ArgumentOperand { final int getIndex() { result = tag.getArgIndex() } } +/** + * An operand representing memory read as a side effect of evaluating another instruction. + */ class SideEffectOperand extends TypedOperand { override SideEffectOperandTag tag; } diff --git a/csharp/ql/src/experimental/ir/implementation/raw/PrintIR.qll b/csharp/ql/src/experimental/ir/implementation/raw/PrintIR.qll index d9c0df44e12..b3e3a5b1195 100644 --- a/csharp/ql/src/experimental/ir/implementation/raw/PrintIR.qll +++ b/csharp/ql/src/experimental/ir/implementation/raw/PrintIR.qll @@ -1,3 +1,13 @@ +/** + * Outputs a representation of the IR as a control flow graph. + * + * This file contains the actual implementation of `PrintIR.ql`. For test cases and very small + * databases, `PrintIR.ql` can be run directly to dump the IR for the entire database. For most + * uses, however, it is better to write a query that imports `PrintIR.qll`, extends + * `PrintIRConfiguration`, and overrides `shouldPrintFunction()` to select a subset of functions to + * dump. + */ + private import internal.IRInternal private import IR private import internal.PrintIRImports as Imports @@ -9,6 +19,7 @@ private newtype TPrintIRConfiguration = MkPrintIRConfiguration() * The query can extend this class to control which functions are printed. */ class PrintIRConfiguration extends TPrintIRConfiguration { + /** Gets a textual representation of this configuration. */ string toString() { result = "PrintIRConfiguration" } /** @@ -47,7 +58,7 @@ private newtype TPrintableIRNode = /** * A node to be emitted in the IR graph. */ -abstract class PrintableIRNode extends TPrintableIRNode { +abstract private class PrintableIRNode extends TPrintableIRNode { abstract string toString(); /** @@ -98,7 +109,7 @@ abstract class PrintableIRNode extends TPrintableIRNode { /** * An IR graph node representing a `IRFunction` object. */ -class PrintableIRFunction extends PrintableIRNode, TPrintableIRFunction { +private class PrintableIRFunction extends PrintableIRNode, TPrintableIRFunction { IRFunction irFunc; PrintableIRFunction() { this = TPrintableIRFunction(irFunc) } @@ -129,7 +140,7 @@ class PrintableIRFunction extends PrintableIRNode, TPrintableIRFunction { /** * An IR graph node representing an `IRBlock` object. */ -class PrintableIRBlock extends PrintableIRNode, TPrintableIRBlock { +private class PrintableIRBlock extends PrintableIRNode, TPrintableIRBlock { IRBlock block; PrintableIRBlock() { this = TPrintableIRBlock(block) } @@ -161,7 +172,7 @@ class PrintableIRBlock extends PrintableIRNode, TPrintableIRBlock { /** * An IR graph node representing an `Instruction`. */ -class PrintableInstruction extends PrintableIRNode, TPrintableInstruction { +private class PrintableInstruction extends PrintableIRNode, TPrintableInstruction { Instruction instr; PrintableInstruction() { this = TPrintableInstruction(instr) } @@ -224,6 +235,9 @@ private string getPaddingString(int n) { n > 0 and n <= maxColumnWidth() and result = getPaddingString(n - 1) + " " } +/** + * Holds if `node` belongs to the output graph, and its property `key` has the given `value`. + */ query predicate nodes(PrintableIRNode node, string key, string value) { value = node.getProperty(key) } @@ -237,6 +251,10 @@ private int getSuccessorIndex(IRBlock pred, IRBlock succ) { ) } +/** + * Holds if the output graph contains an edge from `pred` to `succ`, and that edge's property `key` + * has the given `value`. + */ query predicate edges(PrintableIRBlock pred, PrintableIRBlock succ, string key, string value) { exists(EdgeKind kind, IRBlock predBlock, IRBlock succBlock | predBlock = pred.getBlock() and @@ -256,6 +274,9 @@ query predicate edges(PrintableIRBlock pred, PrintableIRBlock succ, string key, ) } +/** + * Holds if `parent` is the parent node of `child` in the output graph. + */ query predicate parents(PrintableIRNode child, PrintableIRNode parent) { parent = child.getParent() } diff --git a/csharp/ql/src/experimental/ir/implementation/raw/internal/TranslatedElement.qll b/csharp/ql/src/experimental/ir/implementation/raw/internal/TranslatedElement.qll index 7171cb66c2c..0022711f79e 100644 --- a/csharp/ql/src/experimental/ir/implementation/raw/internal/TranslatedElement.qll +++ b/csharp/ql/src/experimental/ir/implementation/raw/internal/TranslatedElement.qll @@ -21,6 +21,16 @@ ArrayType getArrayOfDim(int dim, Type type) { result.getElementType() = type } +IRUserVariable getIRUserVariable(Language::Function func, Language::Variable var) { + result.getVariable() = var and + result.getEnclosingFunction() = func +} + +IRTempVariable getIRTempVariable(Language::AST ast, TempVariableTag tag) { + result.getAST() = ast and + result.getTag() = tag +} + private predicate canCreateCompilerGeneratedElement(Element generatedBy, int nth) { generatedBy instanceof ForeachStmt and nth in [0 .. ForeachElements::noGeneratedElements() - 1] or diff --git a/csharp/ql/src/experimental/ir/implementation/unaliased_ssa/IR.qll b/csharp/ql/src/experimental/ir/implementation/unaliased_ssa/IR.qll index badd48552a5..3fa0f1b78be 100644 --- a/csharp/ql/src/experimental/ir/implementation/unaliased_ssa/IR.qll +++ b/csharp/ql/src/experimental/ir/implementation/unaliased_ssa/IR.qll @@ -1,3 +1,47 @@ +/** + * Provides classes that describe the Intermediate Representation (IR) of the program. + * + * The IR is a representation of the semantics of the program, with very little dependence on the + * syntax that was used to write the program. For example, in C++, the statements `i += 1;`, `i++`, + * and `++i` all have the same semantic effect, but appear in the AST as three different types of + * `Expr` node. In the IR, all three statements are broken down into a sequence of fundamental + * operations similar to: + * + * ``` + * r1(int*) = VariableAddress[i] // Compute the address of variable `i` + * r2(int) = Load &:r1, m0 // Load the value of `i` + * r3(int) = Constant[1] // An integer constant with the value `1` + * r4(int) = Add r2, r3 // Add `1` to the value of `i` + * r5(int) = Store &r1, r4 // Store the new value back into the variable `i` + * ``` + * + * This allows IR-based analysis to focus on the fundamental operations, rather than having to be + * concerned with the various ways of expressing those operations in source code. + * + * The key classes in the IR are: + * + * - `IRFunction` - Contains the IR for an entire function definition, including all of that + * function's `Instruction`s, `IRBlock`s, and `IRVariables`. + * - `Instruction` - A single operation in the IR. An instruction specifies the operation to be + * performed, the operands that produce the inputs to that operation, and the type of the result + * of the operation. Control flows from an `Instruction` to one of a set of successor + * `Instruction`s. + * - `Operand` - An input value of an `Instruction`. All inputs of an `Instruction` are explicitly + * represented as `Operand`s, even if the input was implicit in the source code. An `Operand` has + * a link to the `Instruction` that consumes its value (its "use") and a link to the `Instruction` + * that produces its value (its "definition"). + * - `IRVariable` - A variable accessed by the IR for a particular function. An `IRVariable` is + * created for each variable directly accessed by the function. In addition, `IRVariable`s are + * created to represent certain temporary storage locations that do not have explicitly declared + * variables in the source code, such as the return value of the function. + * - `IRBlock` - A "basic block" in the control flow graph of a function. An `IRBlock` contains a + * sequence of instructions such that control flow can only enter the block at the first + * instruction, and can only leave the block from the last instruction. + * - `IRType` - The type of a value accessed in the IR. Unlike the `Type` class in the AST, `IRType` + * is language-neutral. For example, in C++, `unsigned int`, `char32_t`, and `wchar_t` might all + * be represented as the `IRType` `uint4`, a four-byte unsigned integer. + */ + import IRFunction import Instruction import IRBlock @@ -11,11 +55,12 @@ import Imports::MemoryAccessKind private newtype TIRPropertyProvider = MkIRPropertyProvider() /** - * Class that provides additional properties to be dumped for IR instructions and blocks when using + * A class that provides additional properties to be dumped for IR instructions and blocks when using * the PrintIR module. Libraries that compute additional facts about IR elements can extend the * single instance of this class to specify the additional properties computed by the library. */ class IRPropertyProvider extends TIRPropertyProvider { + /** Gets a textual representation of this element. */ string toString() { result = "IRPropertyProvider" } /** diff --git a/csharp/ql/src/experimental/ir/implementation/unaliased_ssa/IRBlock.qll b/csharp/ql/src/experimental/ir/implementation/unaliased_ssa/IRBlock.qll index 94ef73b2769..f0ec0683bd6 100644 --- a/csharp/ql/src/experimental/ir/implementation/unaliased_ssa/IRBlock.qll +++ b/csharp/ql/src/experimental/ir/implementation/unaliased_ssa/IRBlock.qll @@ -1,3 +1,7 @@ +/** + * Provides classes describing basic blocks in the IR of a function. + */ + private import internal.IRInternal import Instruction private import internal.IRBlockImports as Imports @@ -16,15 +20,23 @@ private import Cached * Most consumers should use the class `IRBlock`. */ class IRBlockBase extends TIRBlock { + /** Gets a textual representation of this block. */ final string toString() { result = getFirstInstruction(this).toString() } + /** Gets the source location of the first non-`Phi` instruction in this block. */ final Language::Location getLocation() { result = getFirstInstruction().getLocation() } + /** + * Gets a string that uniquely identifies this block within its enclosing function. + * + * This predicate is used by debugging and printing code only. + */ final string getUniqueId() { result = getFirstInstruction(this).getUniqueId() } /** - * Gets the zero-based index of the block within its function. This is used - * by debugging and printing code only. + * Gets the zero-based index of the block within its function. + * + * This predicate is used by debugging and printing code only. */ int getDisplayIndex() { exists(IRConfiguration::IRConfiguration config | @@ -42,27 +54,51 @@ class IRBlockBase extends TIRBlock { ) } + /** + * Gets the `index`th non-`Phi` instruction in this block. + */ final Instruction getInstruction(int index) { result = getInstruction(this, index) } + /** + * Get the `Phi` instructions that appear at the start of this block. + */ final PhiInstruction getAPhiInstruction() { Construction::getPhiInstructionBlockStart(result) = getFirstInstruction() } + /** + * Get the instructions in this block, including `Phi` instructions. + */ final Instruction getAnInstruction() { result = getInstruction(_) or result = getAPhiInstruction() } + /** + * Gets the first non-`Phi` instruction in this block. + */ final Instruction getFirstInstruction() { result = getFirstInstruction(this) } + /** + * Gets the last instruction in this block. + */ final Instruction getLastInstruction() { result = getInstruction(getInstructionCount() - 1) } + /** + * Gets the number of non-`Phi` instructions in this block. + */ final int getInstructionCount() { result = getInstructionCount(this) } + /** + * Gets the `IRFunction` that contains this block. + */ final IRFunction getEnclosingIRFunction() { result = getFirstInstruction(this).getEnclosingIRFunction() } + /** + * Gets the `Function` that contains this block. + */ final Language::Function getEnclosingFunction() { result = getFirstInstruction(this).getEnclosingFunction() } @@ -74,20 +110,57 @@ class IRBlockBase extends TIRBlock { * instruction of another block. */ class IRBlock extends IRBlockBase { + /** + * Gets the blocks to which control flows directly from this block. + */ final IRBlock getASuccessor() { blockSuccessor(this, result) } + /** + * Gets the blocks from which control flows directly to this block. + */ final IRBlock getAPredecessor() { blockSuccessor(result, this) } + /** + * Gets the block to which control flows directly from this block along an edge of kind `kind`. + */ final IRBlock getSuccessor(EdgeKind kind) { blockSuccessor(this, result, kind) } + /** + * Gets the block to which control flows directly from this block along a back edge of kind + * `kind`. + */ final IRBlock getBackEdgeSuccessor(EdgeKind kind) { backEdgeSuccessor(this, result, kind) } + /** + * Holds if this block immediately dominates `block`. + * + * Block `A` immediate dominates block `B` if block `A` strictly dominates block `B` and block `B` + * is a direct successor of block `A`. + */ final predicate immediatelyDominates(IRBlock block) { blockImmediatelyDominates(this, block) } + /** + * Holds if this block strictly dominates `block`. + * + * Block `A` strictly dominates block `B` if block `A` dominates block `B` and blocks `A` and `B` + * are not the same block. + */ final predicate strictlyDominates(IRBlock block) { blockImmediatelyDominates+(this, block) } + /** + * Holds if this block dominates `block`. + * + * Block `A` dominates block `B` if any control flow path from the entry block of the function to + * block `B` must pass through block `A`. A block always dominates itself. + */ final predicate dominates(IRBlock block) { strictlyDominates(block) or this = block } + /** + * Gets the set of blocks on the dominance frontier of this block. + * + * The dominance frontier of block `A` is the set of blocks `B` such that block `A` does not + * dominate block `B`, but block `A` does dominate an immediate predecessor of block `B`. + */ pragma[noinline] final IRBlock dominanceFrontier() { dominates(result.getAPredecessor()) and @@ -95,7 +168,7 @@ class IRBlock extends IRBlockBase { } /** - * Holds if this block is reachable from the entry point of its function + * Holds if this block is reachable from the entry block of its function. */ final predicate isReachableFromFunctionEntry() { this = getEnclosingIRFunction().getEntryBlock() or @@ -210,4 +283,4 @@ private module Cached { idominance(isEntryBlock/1, blockSuccessor/2)(_, dominator, block) } -Instruction getFirstInstruction(TIRBlock block) { block = MkIRBlock(result) } +private Instruction getFirstInstruction(TIRBlock block) { block = MkIRBlock(result) } diff --git a/csharp/ql/src/experimental/ir/implementation/unaliased_ssa/IRFunction.qll b/csharp/ql/src/experimental/ir/implementation/unaliased_ssa/IRFunction.qll index 6b2d32af48c..5968e58f90b 100644 --- a/csharp/ql/src/experimental/ir/implementation/unaliased_ssa/IRFunction.qll +++ b/csharp/ql/src/experimental/ir/implementation/unaliased_ssa/IRFunction.qll @@ -1,3 +1,8 @@ +/** + * Provides the class `IRFunction`, which represents the Intermediate Representation for the + * definition of a function. + */ + private import internal.IRInternal private import internal.IRFunctionImports as Imports import Imports::IRFunctionBase diff --git a/csharp/ql/src/experimental/ir/implementation/unaliased_ssa/IRVariable.qll b/csharp/ql/src/experimental/ir/implementation/unaliased_ssa/IRVariable.qll index a01bd2dc79a..d317421c242 100644 --- a/csharp/ql/src/experimental/ir/implementation/unaliased_ssa/IRVariable.qll +++ b/csharp/ql/src/experimental/ir/implementation/unaliased_ssa/IRVariable.qll @@ -1,3 +1,7 @@ +/** + * Provides classes that represent variables accessed by the IR. + */ + private import internal.IRInternal import IRFunction private import internal.IRVariableImports as Imports @@ -7,15 +11,11 @@ private import Imports::TTempVariableTag private import Imports::TIRVariable private import Imports::IRType -IRUserVariable getIRUserVariable(Language::Function func, Language::Variable var) { - result.getVariable() = var and - result.getEnclosingFunction() = func -} - /** - * A variable referenced by the IR for a function. The variable may be a user-declared variable - * (`IRUserVariable`) or a temporary variable generated by the AST-to-IR translation - * (`IRTempVariable`). + * A variable referenced by the IR for a function. + * + * The variable may be a user-declared variable (`IRUserVariable`) or a temporary variable generated + * by the AST-to-IR translation (`IRTempVariable`). */ class IRVariable extends TIRVariable { Language::Function func; @@ -27,6 +27,7 @@ class IRVariable extends TIRVariable { this = TIRDynamicInitializationFlag(func, _, _) } + /** Gets a textual representation of this element. */ string toString() { none() } /** @@ -162,20 +163,26 @@ class IRGeneratedVariable extends IRVariable { override string getUniqueId() { none() } + /** + * Gets a string containing the source code location of the AST that generated this variable. + * + * This is used by debugging and printing code only. + */ final string getLocationString() { result = ast.getLocation().getStartLine().toString() + ":" + ast.getLocation().getStartColumn().toString() } + /** + * Gets the string that is combined with the location of the variable to generate the string + * representation of this variable. + * + * This is used by debugging and printing code only. + */ string getBaseString() { none() } } -IRTempVariable getIRTempVariable(Language::AST ast, TempVariableTag tag) { - result.getAST() = ast and - result.getTag() = tag -} - /** * A temporary variable introduced by IR construction. The most common examples are the variable * generated to hold the return value of a function, or the variable generated to hold the result of @@ -190,6 +197,10 @@ class IRTempVariable extends IRGeneratedVariable, IRAutomaticVariable, TIRTempVa result = "Temp: " + Construction::getTempVariableUniqueId(this) } + /** + * Gets the "tag" object that differentiates this temporary variable from other temporary + * variables generated for the same AST. + */ final TempVariableTag getTag() { result = tag } override string getBaseString() { result = "#temp" } @@ -253,6 +264,9 @@ class IRStringLiteral extends IRGeneratedVariable, TIRStringLiteral { final override string getBaseString() { result = "#string" } + /** + * Gets the AST of the string literal represented by this `IRStringLiteral`. + */ final Language::StringLiteral getLiteral() { result = literal } } @@ -270,6 +284,9 @@ class IRDynamicInitializationFlag extends IRGeneratedVariable, TIRDynamicInitial final override string toString() { result = var.toString() + "#init" } + /** + * Gets variable whose initialization is guarded by this flag. + */ final Language::Variable getVariable() { result = var } final override string getUniqueId() { diff --git a/csharp/ql/src/experimental/ir/implementation/unaliased_ssa/Instruction.qll b/csharp/ql/src/experimental/ir/implementation/unaliased_ssa/Instruction.qll index 79516f6780d..0d2ad2d3bea 100644 --- a/csharp/ql/src/experimental/ir/implementation/unaliased_ssa/Instruction.qll +++ b/csharp/ql/src/experimental/ir/implementation/unaliased_ssa/Instruction.qll @@ -215,6 +215,15 @@ class Instruction extends Construction::TStageInstruction { result = Raw::getInstructionUnconvertedResultExpression(this) } + /** + * Gets the language-specific type of the result produced by this instruction. + * + * Most consumers of the IR should use `getResultIRType()` instead. `getResultIRType()` uses a + * less complex, language-neutral type system in which all semantically equivalent types share the + * same `IRType` instance. For example, in C++, four different `Instruction`s might have three + * different values for `getResultLanguageType()`: `unsigned int`, `char32_t`, and `wchar_t`, + * whereas all four instructions would have the same value for `getResultIRType()`, `uint4`. + */ final Language::LanguageType getResultLanguageType() { result = Construction::getInstructionResultType(this) } @@ -537,6 +546,18 @@ class VariableAddressInstruction extends VariableInstruction { VariableAddressInstruction() { getOpcode() instanceof Opcode::VariableAddress } } +/** + * An instruction that returns the address of a function. + * + * This instruction returns the address of a function, including non-member functions, static member + * functions, and non-static member functions. + * + * The result has an `IRFunctionAddress` type. + */ +class FunctionAddressInstruction extends FunctionInstruction { + FunctionAddressInstruction() { getOpcode() instanceof Opcode::FunctionAddress } +} + /** * An instruction that initializes a parameter of the enclosing function with the value of the * corresponding argument passed by the caller. @@ -553,6 +574,16 @@ class InitializeParameterInstruction extends VariableInstruction { final Language::Parameter getParameter() { result = var.(IRUserVariable).getVariable() } } +/** + * An instruction that initializes all memory that existed before this function was called. + * + * This instruction provides a definition for memory that, because it was actually allocated and + * initialized elsewhere, would not otherwise have a definition in this function. + */ +class InitializeNonLocalInstruction extends Instruction { + InitializeNonLocalInstruction() { getOpcode() instanceof Opcode::InitializeNonLocal } +} + /** * An instruction that initializes the memory pointed to by a parameter of the enclosing function * with the value of that memory on entry to the function. @@ -590,6 +621,25 @@ class FieldAddressInstruction extends FieldInstruction { final Instruction getObjectAddress() { result = getObjectAddressOperand().getDef() } } +/** + * An instruction that computes the address of the first element of a managed array. + * + * This instruction is used for element access to C# arrays. + */ +class ElementsAddressInstruction extends UnaryInstruction { + ElementsAddressInstruction() { getOpcode() instanceof Opcode::ElementsAddress } + + /** + * Gets the operand that provides the address of the array object. + */ + final UnaryOperand getArrayObjectAddressOperand() { result = getAnOperand() } + + /** + * Gets the instruction whose result provides the address of the array object. + */ + final Instruction getArrayObjectAddress() { result = getArrayObjectAddressOperand().getDef() } +} + /** * An instruction that produces a well-defined but unknown result and has * unknown side effects, including side effects that are not conservatively @@ -1137,8 +1187,14 @@ class PointerDiffInstruction extends PointerArithmeticInstruction { class UnaryInstruction extends Instruction { UnaryInstruction() { getOpcode() instanceof UnaryOpcode } + /** + * Gets the sole operand of this instruction. + */ final UnaryOperand getUnaryOperand() { result = getAnOperand() } + /** + * Gets the instruction whose result provides the sole operand of this instruction. + */ final Instruction getUnary() { result = getUnaryOperand().getDef() } } @@ -1177,6 +1233,19 @@ class CheckedConvertOrThrowInstruction extends UnaryInstruction { CheckedConvertOrThrowInstruction() { getOpcode() instanceof Opcode::CheckedConvertOrThrow } } +/** + * An instruction that returns the address of the complete object that contains the subobject + * pointed to by its operand. + * + * If the operand holds a null address, the result is a null address. + * + * This instruction is used to represent `dyanmic_cast` in C++, which returns the pointer to + * the most-derived object. + */ +class CompleteObjectAddressInstruction extends UnaryInstruction { + CompleteObjectAddressInstruction() { getOpcode() instanceof Opcode::CompleteObjectAddress } +} + /** * An instruction that converts the address of an object to the address of a different subobject of * the same object, without any type checking at runtime. @@ -1453,7 +1522,7 @@ class CallInstruction extends Instruction { * Gets the `Function` that the call targets, if this is statically known. */ final Language::Function getStaticCallTarget() { - result = getCallTarget().(FunctionInstruction).getFunctionSymbol() + result = getCallTarget().(FunctionAddressInstruction).getFunctionSymbol() } /** @@ -1516,9 +1585,10 @@ class CallSideEffectInstruction extends SideEffectInstruction { /** * An instruction representing the side effect of a function call on any memory - * that might be read by that call. This instruction is emitted instead of - * `CallSideEffectInstruction` when it's certain that the call target cannot - * write to escaped memory. + * that might be read by that call. + * + * This instruction is emitted instead of `CallSideEffectInstruction` when it is certain that the + * call target cannot write to escaped memory. */ class CallReadSideEffectInstruction extends SideEffectInstruction { CallReadSideEffectInstruction() { getOpcode() instanceof Opcode::CallReadSideEffect } @@ -1566,7 +1636,15 @@ class SizedBufferReadSideEffectInstruction extends ReadSideEffectInstruction { getOpcode() instanceof Opcode::SizedBufferReadSideEffect } - Instruction getSizeDef() { result = getAnOperand().(BufferSizeOperand).getDef() } + /** + * Gets the operand that holds the number of bytes read from the buffer. + */ + final BufferSizeOperand getBufferSizeOperand() { result = getAnOperand() } + + /** + * Gets the instruction whose result provides the number of bytes read from the buffer. + */ + final Instruction getBufferSize() { result = getBufferSizeOperand().getDef() } } /** @@ -1576,7 +1654,15 @@ class SizedBufferReadSideEffectInstruction extends ReadSideEffectInstruction { class WriteSideEffectInstruction extends SideEffectInstruction, IndexedInstruction { WriteSideEffectInstruction() { getOpcode() instanceof WriteSideEffectOpcode } - Instruction getArgumentDef() { result = getAnOperand().(AddressOperand).getDef() } + /** + * Get the operand that holds the address of the memory to be written. + */ + final AddressOperand getDestinationAddressOperand() { result = getAnOperand() } + + /** + * Gets the instruction whose result provides the address of the memory to be written. + */ + Instruction getDestinationAddress() { result = getDestinationAddressOperand().getDef() } } /** @@ -1607,11 +1693,20 @@ class SizedBufferMustWriteSideEffectInstruction extends WriteSideEffectInstructi getOpcode() instanceof Opcode::SizedBufferMustWriteSideEffect } - Instruction getSizeDef() { result = getAnOperand().(BufferSizeOperand).getDef() } + /** + * Gets the operand that holds the number of bytes written to the buffer. + */ + final BufferSizeOperand getBufferSizeOperand() { result = getAnOperand() } + + /** + * Gets the instruction whose result provides the number of bytes written to the buffer. + */ + final Instruction getBufferSize() { result = getBufferSizeOperand().getDef() } } /** * An instruction representing the potential write of an indirect parameter within a function call. + * * Unlike `IndirectWriteSideEffectInstruction`, the location might not be completely overwritten. * written. */ @@ -1623,6 +1718,7 @@ class IndirectMayWriteSideEffectInstruction extends WriteSideEffectInstruction { /** * An instruction representing the write of an indirect buffer parameter within a function call. + * * Unlike `BufferWriteSideEffectInstruction`, the buffer might not be completely overwritten. */ class BufferMayWriteSideEffectInstruction extends WriteSideEffectInstruction { @@ -1631,6 +1727,7 @@ class BufferMayWriteSideEffectInstruction extends WriteSideEffectInstruction { /** * An instruction representing the write of an indirect buffer parameter within a function call. + * * Unlike `BufferWriteSideEffectInstruction`, the buffer might not be completely overwritten. */ class SizedBufferMayWriteSideEffectInstruction extends WriteSideEffectInstruction { @@ -1638,11 +1735,19 @@ class SizedBufferMayWriteSideEffectInstruction extends WriteSideEffectInstructio getOpcode() instanceof Opcode::SizedBufferMayWriteSideEffect } - Instruction getSizeDef() { result = getAnOperand().(BufferSizeOperand).getDef() } + /** + * Gets the operand that holds the number of bytes written to the buffer. + */ + final BufferSizeOperand getBufferSizeOperand() { result = getAnOperand() } + + /** + * Gets the instruction whose result provides the number of bytes written to the buffer. + */ + final Instruction getBufferSize() { result = getBufferSizeOperand().getDef() } } /** - * An instruction representing the initial value of newly allocated memory, e.g. the result of a + * An instruction representing the initial value of newly allocated memory, such as the result of a * call to `malloc`. */ class InitializeDynamicAllocationInstruction extends SideEffectInstruction { @@ -1860,17 +1965,20 @@ class ChiInstruction extends Instruction { } /** - * An instruction representing unreachable code. Inserted in place of the original target - * instruction of a `ConditionalBranch` or `Switch` instruction where that particular edge is - * infeasible. + * An instruction representing unreachable code. + * + * This instruction is inserted in place of the original target instruction of a `ConditionalBranch` + * or `Switch` instruction where that particular edge is infeasible. */ class UnreachedInstruction extends Instruction { UnreachedInstruction() { getOpcode() instanceof Opcode::Unreached } } /** - * An instruction representing a built-in operation. This is used to represent - * operations such as access to variable argument lists. + * An instruction representing a built-in operation. + * + * This is used to represent a variety of intrinsic operations provided by the compiler + * implementation, such as vector arithmetic. */ class BuiltInOperationInstruction extends Instruction { Language::BuiltInOperation operation; @@ -1880,6 +1988,10 @@ class BuiltInOperationInstruction extends Instruction { operation = Raw::getInstructionBuiltInOperation(this) } + /** + * Gets the language-specific `BuildInOperation` object that specifies the operation that is + * performed by this instruction. + */ final Language::BuiltInOperation getBuiltInOperation() { result = operation } } @@ -1892,3 +2004,59 @@ class BuiltInInstruction extends BuiltInOperationInstruction { final override string getImmediateString() { result = getBuiltInOperation().toString() } } + +/** + * An instruction that returns a `va_list` to access the arguments passed to the `...` parameter. + * + * The operand specifies the address of the `IREllipsisVariable` used to represent the `...` + * parameter. The result is a `va_list` that initially refers to the first argument that was passed + * to the `...` parameter. + */ +class VarArgsStartInstruction extends UnaryInstruction { + VarArgsStartInstruction() { getOpcode() instanceof Opcode::VarArgsStart } +} + +/** + * An instruction that cleans up a `va_list` after it is no longer in use. + * + * The operand specifies the address of the `va_list` to clean up. This instruction does not return + * a result. + */ +class VarArgsEndInstruction extends UnaryInstruction { + VarArgsEndInstruction() { getOpcode() instanceof Opcode::VarArgsEnd } +} + +/** + * An instruction that returns the address of the argument currently pointed to by a `va_list`. + * + * The operand is the `va_list` that points to the argument. The result is the address of the + * argument. + */ +class VarArgInstruction extends UnaryInstruction { + VarArgInstruction() { getOpcode() instanceof Opcode::VarArg } +} + +/** + * An instruction that modifies a `va_list` to point to the next argument that was passed to the + * `...` parameter. + * + * The operand is the current `va_list`. The result is an updated `va_list` that points to the next + * argument of the `...` parameter. + */ +class NextVarArgInstruction extends UnaryInstruction { + NextVarArgInstruction() { getOpcode() instanceof Opcode::NextVarArg } +} + +/** + * An instruction that allocates a new object on the managed heap. + * + * This instruction is used to represent the allocation of a new object in C# using the `new` + * expression. This instruction does not invoke a constructor for the object. Instead, there will be + * a subsequent `Call` instruction to invoke the appropriate constructor directory, passing the + * result of the `NewObj` as the `this` argument. + * + * The result is the address of the newly allocated object. + */ +class NewObjInstruction extends Instruction { + NewObjInstruction() { getOpcode() instanceof Opcode::NewObj } +} diff --git a/csharp/ql/src/experimental/ir/implementation/unaliased_ssa/Operand.qll b/csharp/ql/src/experimental/ir/implementation/unaliased_ssa/Operand.qll index f82704094c8..468687b0aca 100644 --- a/csharp/ql/src/experimental/ir/implementation/unaliased_ssa/Operand.qll +++ b/csharp/ql/src/experimental/ir/implementation/unaliased_ssa/Operand.qll @@ -1,3 +1,7 @@ +/** + * Provides classes that represent the input values of IR instructions. + */ + private import internal.IRInternal private import Instruction private import IRBlock @@ -78,10 +82,17 @@ private PhiOperandBase phiOperand( * A source operand of an `Instruction`. The operand represents a value consumed by the instruction. */ class Operand extends TOperand { + /** Gets a textual representation of this element. */ string toString() { result = "Operand" } + /** + * Gets the location of the source code for this operand. + */ final Language::Location getLocation() { result = getUse().getLocation() } + /** + * Gets the function that contains this operand. + */ final IRFunction getEnclosingIRFunction() { result = getUse().getEnclosingIRFunction() } /** @@ -270,6 +281,9 @@ class NonPhiOperand extends Operand { final override int getDumpSortOrder() { result = tag.getSortOrder() } + /** + * Gets the `OperandTag` that specifies how this operand is used by its `Instruction`. + */ final OperandTag getOperandTag() { result = tag } } @@ -292,6 +306,9 @@ class RegisterOperand extends NonPhiOperand, RegisterOperandBase { } } +/** + * A memory operand other than the operand of a `Phi` instruction. + */ class NonPhiMemoryOperand extends NonPhiOperand, MemoryOperand, NonPhiMemoryOperandBase { override MemoryOperandTag tag; @@ -313,6 +330,9 @@ class NonPhiMemoryOperand extends NonPhiOperand, MemoryOperand, NonPhiMemoryOper } } +/** + * A memory operand whose type may be different from the type of the result of its definition. + */ class TypedOperand extends NonPhiMemoryOperand { override TypedOperandTag tag; @@ -416,6 +436,9 @@ class PositionalArgumentOperand extends ArgumentOperand { final int getIndex() { result = tag.getArgIndex() } } +/** + * An operand representing memory read as a side effect of evaluating another instruction. + */ class SideEffectOperand extends TypedOperand { override SideEffectOperandTag tag; } diff --git a/csharp/ql/src/experimental/ir/implementation/unaliased_ssa/PrintIR.qll b/csharp/ql/src/experimental/ir/implementation/unaliased_ssa/PrintIR.qll index d9c0df44e12..b3e3a5b1195 100644 --- a/csharp/ql/src/experimental/ir/implementation/unaliased_ssa/PrintIR.qll +++ b/csharp/ql/src/experimental/ir/implementation/unaliased_ssa/PrintIR.qll @@ -1,3 +1,13 @@ +/** + * Outputs a representation of the IR as a control flow graph. + * + * This file contains the actual implementation of `PrintIR.ql`. For test cases and very small + * databases, `PrintIR.ql` can be run directly to dump the IR for the entire database. For most + * uses, however, it is better to write a query that imports `PrintIR.qll`, extends + * `PrintIRConfiguration`, and overrides `shouldPrintFunction()` to select a subset of functions to + * dump. + */ + private import internal.IRInternal private import IR private import internal.PrintIRImports as Imports @@ -9,6 +19,7 @@ private newtype TPrintIRConfiguration = MkPrintIRConfiguration() * The query can extend this class to control which functions are printed. */ class PrintIRConfiguration extends TPrintIRConfiguration { + /** Gets a textual representation of this configuration. */ string toString() { result = "PrintIRConfiguration" } /** @@ -47,7 +58,7 @@ private newtype TPrintableIRNode = /** * A node to be emitted in the IR graph. */ -abstract class PrintableIRNode extends TPrintableIRNode { +abstract private class PrintableIRNode extends TPrintableIRNode { abstract string toString(); /** @@ -98,7 +109,7 @@ abstract class PrintableIRNode extends TPrintableIRNode { /** * An IR graph node representing a `IRFunction` object. */ -class PrintableIRFunction extends PrintableIRNode, TPrintableIRFunction { +private class PrintableIRFunction extends PrintableIRNode, TPrintableIRFunction { IRFunction irFunc; PrintableIRFunction() { this = TPrintableIRFunction(irFunc) } @@ -129,7 +140,7 @@ class PrintableIRFunction extends PrintableIRNode, TPrintableIRFunction { /** * An IR graph node representing an `IRBlock` object. */ -class PrintableIRBlock extends PrintableIRNode, TPrintableIRBlock { +private class PrintableIRBlock extends PrintableIRNode, TPrintableIRBlock { IRBlock block; PrintableIRBlock() { this = TPrintableIRBlock(block) } @@ -161,7 +172,7 @@ class PrintableIRBlock extends PrintableIRNode, TPrintableIRBlock { /** * An IR graph node representing an `Instruction`. */ -class PrintableInstruction extends PrintableIRNode, TPrintableInstruction { +private class PrintableInstruction extends PrintableIRNode, TPrintableInstruction { Instruction instr; PrintableInstruction() { this = TPrintableInstruction(instr) } @@ -224,6 +235,9 @@ private string getPaddingString(int n) { n > 0 and n <= maxColumnWidth() and result = getPaddingString(n - 1) + " " } +/** + * Holds if `node` belongs to the output graph, and its property `key` has the given `value`. + */ query predicate nodes(PrintableIRNode node, string key, string value) { value = node.getProperty(key) } @@ -237,6 +251,10 @@ private int getSuccessorIndex(IRBlock pred, IRBlock succ) { ) } +/** + * Holds if the output graph contains an edge from `pred` to `succ`, and that edge's property `key` + * has the given `value`. + */ query predicate edges(PrintableIRBlock pred, PrintableIRBlock succ, string key, string value) { exists(EdgeKind kind, IRBlock predBlock, IRBlock succBlock | predBlock = pred.getBlock() and @@ -256,6 +274,9 @@ query predicate edges(PrintableIRBlock pred, PrintableIRBlock succ, string key, ) } +/** + * Holds if `parent` is the parent node of `child` in the output graph. + */ query predicate parents(PrintableIRNode child, PrintableIRNode parent) { parent = child.getParent() }