diff --git a/ql/docs/learn-ql/ast.dot b/ql/docs/learn-ql/ast.dot new file mode 100644 index 00000000000..fbf32b744ed --- /dev/null +++ b/ql/docs/learn-ql/ast.dot @@ -0,0 +1,22 @@ +digraph ast { + graph [dpi=300]; + "x" [shape=rect]; + "y" [shape=rect]; + "x + y" [shape=rect]; + "(x + y)" [shape=rect]; + "z" [shape=rect]; + "(x + y) * z" [shape=rect]; + invis1 [style=invis]; + invis2 [style=invis]; + invis3 [style=invis]; + + "(x + y) * z" -> "(x + y)" [label=" 0"]; + "(x + y) * z" -> "z" [label=" 1"]; + "(x + y)" -> "x + y" [label=" 0"]; + "x + y" -> "x" [label=" 0"]; + "x + y" -> "y" [label=" 1"]; + + "z" -> invis1 [style=invis]; + invis1 -> invis2 [style=invis]; + invis1 -> invis3 [style=invis]; +} diff --git a/ql/docs/learn-ql/ast.png b/ql/docs/learn-ql/ast.png new file mode 100644 index 00000000000..61a9f29b80a Binary files /dev/null and b/ql/docs/learn-ql/ast.png differ diff --git a/ql/docs/learn-ql/cfg.dot b/ql/docs/learn-ql/cfg.dot new file mode 100644 index 00000000000..47db3df4c57 --- /dev/null +++ b/ql/docs/learn-ql/cfg.dot @@ -0,0 +1,8 @@ +digraph cfg { + graph [dpi=300]; + rankdir=LR; + "x := 0" -> "p != nil"; + "p != nil" -> "x = p.f"; + "p != nil" -> "return x"; + "x = p.f" -> "return x"; +} diff --git a/ql/docs/learn-ql/cfg.png b/ql/docs/learn-ql/cfg.png new file mode 100644 index 00000000000..1290dadb870 Binary files /dev/null and b/ql/docs/learn-ql/cfg.png differ diff --git a/ql/docs/learn-ql/cfg2.dot b/ql/docs/learn-ql/cfg2.dot new file mode 100644 index 00000000000..b8dcd71ee25 --- /dev/null +++ b/ql/docs/learn-ql/cfg2.dot @@ -0,0 +1,14 @@ +digraph cfg2 { + graph [dpi=300]; + rankdir=LR; + + "p != nil is true" [shape=box]; + "p != nil is false" [shape=box]; + + "x := 0" -> "p != nil"; + "p != nil" -> "p != nil is true"; + "p != nil is true" -> "x = p.f"; + "p != nil" -> "p != nil is false"; + "p != nil is false" -> "return x"; + "x = p.f" -> "return x"; +} diff --git a/ql/docs/learn-ql/cfg2.png b/ql/docs/learn-ql/cfg2.png new file mode 100644 index 00000000000..617fe4fe4dc Binary files /dev/null and b/ql/docs/learn-ql/cfg2.png differ diff --git a/ql/docs/learn-ql/dfg.dot b/ql/docs/learn-ql/dfg.dot new file mode 100644 index 00000000000..82253c9b133 --- /dev/null +++ b/ql/docs/learn-ql/dfg.dot @@ -0,0 +1,11 @@ +digraph dfg { + graph [dpi=300]; + rankdir=LR; + + "x" [shape=diamond]; + "return x" [label=x>]; + + "0" -> "x"; + "p.f" -> "x"; + "x" -> "return x"; +} diff --git a/ql/docs/learn-ql/dfg.png b/ql/docs/learn-ql/dfg.png new file mode 100644 index 00000000000..6727af7b4ac Binary files /dev/null and b/ql/docs/learn-ql/dfg.png differ diff --git a/ql/docs/learn-ql/introduce-libraries.rst b/ql/docs/learn-ql/introduce-libraries.rst new file mode 100644 index 00000000000..f13903b875b --- /dev/null +++ b/ql/docs/learn-ql/introduce-libraries.rst @@ -0,0 +1,621 @@ +Introducing the CodeQL libraries for Go +======================================= + +Overview +-------- + +CodeQL ships with an extensive library for analyzing Go code. The classes in this library present +the data from a CodeQL database in an object-oriented form and provide abstractions and predicates +to help you with common analysis tasks. + +The library is implemented as a set of QL modules, that is, files with the extension ``.qll``. The +module ``go.qll`` imports most other standard library modules, so you can include the complete +library by beginning your query with: + +.. code-block:: ql + + import go + +Broadly speaking, the CodeQL library for Go provides two views of a Go code base: at the `syntactic +level`, source code is represented as an `abstract syntax tree +`__ (AST), while at the `data-flow level` it is +represented as a `data-flow graph `__ (DFG). In +between, there is also an intermediate representation of the program as a control-flow graph (CFG), +though this representation is rarely useful on its own and mostly used to construct the higher-level +DFG representation. + +The AST representation captures the syntactic structure of the program. You can use it to reason +about syntactic properties such as the nesting of statements within each other, but also about the +types of expressions and which variable a name refers to. + +The DFG, on the other hand, provides an approximation of how data flows through variables and +operations at runtime. It is used, for example, by the security queries to model the way +user-controlled input can propagate through the program. Additionally, the DFG contains information +about which function may be invoked by a given call (taking virtual dispatch through interfaces into +account), as well as control-flow information about the order in which different operations may be +executed at runtime. + +As a rule of thumb, you normally want to use the AST only for superficial syntactic queries. Any +analysis involving deeper semantic properties of the program should be done on the DFG. + +The rest of this tutorial briefly summarizes the most important classes and predicates provided by +this library, including references to the `detailed API documentation +`__ where applicable. We start by giving an overview of the AST +representation, followed by an explanation of names and entities, which are used to represent +name-binding information, and of types and type information. Then we move on to control flow and the +data-flow graph, and finally the call graph and a few advanced topics. + +Abstract syntax +--------------- + +The AST presents the program as a hierarchical structure of nodes, each of which corresponds to a +syntactic element of the program source text. For example, there is an AST node for each expression +and each statement in the program. These AST nodes are arranged into a parent-child relationship +reflecting the nesting of syntactic elements and the order in which inner elements appear in +enclosing ones. + +For example, this is the AST for the expression ``(x + y) * z``: + +|ast| + +It is composed of six AST nodes, representing ``x``, ``y``, ``x + y``, ``(x + y)``, ``z`` and the +entire expression ``(x + y) * z``, respectively. The AST nodes representing ``x`` and ``y`` are +children of the AST node representing ``x + y``, ``x`` being the zeroth child and ``y`` being the +first child, reflecting their order in the program text. Similarly, ``x + y`` is the only child of +``(x + y)``, which is the zeroth child of ``(x + y) * z``, whose first child is ``z``. + +All AST nodes belong to class `AstNode +`__, which defines generic +tree traversal predicates: + +- ``getChild(i)``: returns the ``i``\ th child of this AST node. +- ``getAChild()``: returns any child of this AST node. +- ``getParent()``: returns the parent node of this AST node, if any. + +These predicates should only be used to perform generic AST traversal. To access children of +specific AST node types, the specialized predicates introduced below should be used instead. In +particular, queries should not rely on the numeric indices of child nodes relative to their parent +nodes: these are considered an implementation detail that may change between versions of the +library. + +The predicate ``toString()`` in class ``AstNode`` nodes gives a short description of the AST node, +usually just indicating what kind of node it is. The ``toString()`` predicate does `not` provide +access to the source text corresponding to an AST node. The source text is not stored in the +dataset, and hence is not directly accessible to CodeQL queries. + +The predicate ``getLocation()`` in class ``AstNode`` returns a `Location +`__ entity +describing the source location of the program element represented by the AST node. You can use its +member predicates ``getFile()``, ``getStartLine()``, ``getStartColumn``, ``getEndLine()``, and +``getEndColumn()`` to obtain information about its file, start line and column, and end line and +column. + +The most important subclasses of `AstNode +`__ are `Stmt +`__ and `Expr +`__, which represent +statements and expressions, respectively. This section briefly discusses some of their more +important subclasses and predicates. For a full reference of all the subclasses of `Stmt +`__ and `Expr +`__ and their API, see +`Stmt.qll `__ and `Expr.qll +`__. + +Statements +~~~~~~~~~~ + +- ``ExprStmt``: an expression statement; use ``getExpr()`` to access the expression itself +- ``Assignment``: an assignment statement; use ``getLhs(i)`` to access the ``i``\ th left-hand side + and ``getRhs(i)`` to access the ``i``\ th right-hand side; if there is only a single left-hand side + you can use ``getLhs()`` instead, and similar for the right-hand side + + - ``SimpleAssignStmt``: an assignment statement that does not involve a compound operator + + - ``AssignStmt``: a plain assignment statement of the form ``lhs = rhs`` + - ``DefineStmt``: a short-hand variable declaration of the form ``lhs := rhs`` + + - ``CompoundAssignStmt``: an assignment statement with a compound operator, such as ``lhs += rhs`` + +- ``IncStmt``, ``DecStmt``: an increment statement or a decrement statement, respectively; use + ``getOperand()`` to access the expression being incremented or decremented +- ``BlockStmt``: a block of statements between curly braces; use ``getStmt(i)`` to access the + ``i``\ th statement in a block +- ``IfStmt``: an ``if`` statement; use ``getInit()``, ``getCond()``, ``getThen()``, and + ``getElse()`` to access the (optional) init statement, the condition being checked, the "then" + branch to evaluate if the condition is true, and the (optional) "else" branch to evaluate + otherwise, respectively +- ``LoopStmt``: a loop; use ``getBody()`` to access its body + + - ``ForStmt``: a ``for`` statement; use ``getInit()``, ``getCond()``, and ``getPost()`` to access + the init statement, loop condition, and post statement, respectively, all of which are optional + + - ``RangeStmt``: a ``range`` statement; use ``getDomain()`` to access the iteration domain, and + ``getKey()`` and ``getValue()`` to access the expressions to which successive keys and values + are assigned, if any + +- ``GoStmt``: a ``go`` statement; use ``getCall()`` to access the call expression that is evaluated + in the new goroutine +- ``DeferStmt``: a ``defer`` statement; use ``getCall()`` to access the call expression being + deferred +- ``SendStmt``: a send statement; use ``getChannel()`` and ``getValue()`` to access the channel and + the value being sent over the channel, respectively +- ``ReturnStmt``: a ``return`` statement; use ``getExpr(i)`` to access the ``i``\ th returned + expression; if there is only a single returned expression you can use ``getExpr()`` instead +- ``BranchStmt``: a statement that interrupts structured control flow; use ``getLabel()`` to get the + optional target label + + - ``BreakStmt``: a ``break`` statement + - ``ContinueStmt``: a ``continue`` statement + - ``FallthroughStmt``: a ``fallthrough`` statement at the end of a switch case + - ``GotoStmt``: a ``goto`` statement + +- ``DeclStmt``: a declaration statement, use ``getDecl()`` to access the declaration in this + statement; note that one rarely needs to deal with declaration statements directly, since + reasoning about the entities they declare is usually easier +- ``SwitchStmt``: a ``switch`` statement; use ``getInit()`` to access the (optional) init statement, + and ``getCase(i)`` to access the ``i``\ th ``case`` or ``default`` clause + + - ``ExpressionSwitchStmt``: a ``switch`` statement examining the value of an expression + - ``TypeSwitchStmt``: a ``switch`` statement examining the type of an expression + +- ``CaseClause``: a ``case`` or ``default`` clause in a ``switch`` statement; use ``getExpr(i)`` to + access the ``i``\ th expression, and ``getStmt(i)`` to access the ``i``\ th statement in the body + of this clause +- ``SelectStmt``: a ``select`` statement; use ``getCommClause(i)`` to access the ``i``\ th ``case`` + or ``default`` clause +- ``CommClause``: a ``case`` or ``default`` clause in a ``select`` statement; use ``getComm()`` to + access the send/receive statement of this clause (not defined for ``default`` clauses), and + ``getStmt(i)`` to access the ``i``\ th statement in the body of this clause +- ``RecvStmt``: a receive statement in a ``case`` clause of a ``select`` statement; use + ``getLhs(i)`` to access the ``i``\ th left-hand side of this statement, and ``getExpr()`` to + access the underlying receive expression + +Expressions +~~~~~~~~~~~ + +Class ``Expression`` has a predicate ``isConst()`` that holds if the expression is a compile-time +constant. For such constant expressions, ``getNumericValue()`` and ``getStringValue()`` can be used +to determine their numeric value and string value, respectively. Note that these predicates are not +defined for expressions whose value cannot be determined at compile time. Also note that the result +type of ``getNumericValue()`` is the QL type ``float``. If an expression has a numeric value that +cannot be represented as a QL ``float``, this predicate is also not defined. In such cases, you can +use ``getExactValue()`` to obtain a string representation of the value of the constant. + +- ``Ident``: an identifier; use ``getName()`` to access its name +- ``SelectorExpr``: a selector of the form ``base.sel``; use ``getBase()`` to access the part before + the dot, and ``getSelector()`` for the identifier after the dot +- ``BasicLit``: a literal of a basic type; subclasses ``IntLit``, ``FloatLit``, ``ImagLit``, + ``RuneLit``, and ``StringLit`` represent various specific kinds of literals +- ``FuncLit``: a function literal; use ``getBody()`` to access the body of the function +- ``CompositeLit``: a composite literal; use ``getKey(i)`` and ``getValue(i)`` to access the + ``i``\ th key and the ``i``\ th value, respectively +- ``ParenExpr``: a parenthesized expression; use ``getExpr()`` to access the expression between the + parentheses +- ``IndexExpr``: an index expression ``base[idx]``; use ``getBase()`` and ``getIndex()`` to access + ``base`` and ``idx``, respectively +- ``SliceExpr``: a slice expression ``base[lo:hi:max]``; use ``getBase()``, ``getLow()``, + ``getHigh()``, and ``getMax()`` to access ``base``, ``lo``, ``hi``, and ``max``, respectively; + note that ``lo``, ``hi``, and ``max`` can be omitted, in which case the corresponding predicates are not defined +- ``ConversionExpr``: a conversion expression ``T(e)``; use ``getTypeExpr()`` and ``getOperand()`` + to access ``T`` and ``e``, respectively +- ``TypeAssertExpr``: a type assertion ``e.(T)``; use ``getExpr()`` and ``getTypeExpr()`` to access + ``e`` and ``T``, respectively +- ``CallExpr``: a call expression ``callee(arg0, ..., argn)``; use ``getCalleeExpr()`` to access + ``callee``, and ``getArg(i)`` to access the ``i``\ th argument +- ``StarExpr``: a star expression, which may be either a pointer-type expression or a + pointer-dereference expression, depending on context; use ``getBase()`` to access the operand of + the star +- ``TypeExpr``: an expression that denotes a type +- ``OperatorExpr``: an expression with a unary or binary operator; use ``getOperator()`` to access + the operator + + - ``UnaryExpr``: an expression with a unary operator; use ``getAnOperand()`` to access the operand + of the operator + - ``BinaryExpr``: an expression with a binary operator; use ``getLeftOperand()`` and + ``getRightOperand()`` to access the left and the right operand, respectively + + - ``ComparisonExpr``: a binary expression that performs a comparison, including both equality + tests and relational comparisons + + - ``EqualityTestExpr``: an equality test, that is, either ``==`` or ``!=``; the predicate + ``getPolarity()`` has result ``true`` for the former and ``false`` for the latter + - ``RelationalComparisonExpr``: a relational comparison; use ``getLesserOperand()`` and + ``getGreaterOperand()`` to access the lesser and greater operand of the comparison, + respectively; ``isStrict()`` holds if this is a strict comparison using ``<`` or ``>``, + as opposed to ``<=`` or ``>=`` + +Names +~~~~~ + +While ``Ident`` and ``SelectorExpr`` are very useful classes, they are often too general: ``Ident`` +covers all identifiers in a program, including both identifiers appearing in a declaration as well +as references, and does not distinguish between names referring to packages, types, variables, +constants, functions, or statement labels. Similarly, a ``SelectorExpr`` might refer to a package, a +type, a function, or a method. + +Class ``Name`` and its subclasses provide a more fine-grained mapping of this space, organized along +the two axes of structure and namespace. In terms of structure, a name can be a ``SimpleName``, +meaning that it is a simple identifier (and hence an ``Ident``), or it can be a ``QualifiedName``, +meaning that it is a qualified identifier (and hence a ``SelectorExpr``). In terms of namespacing, a +``Name`` can be a ``PackageName``, ``TypeName``, ``ValueName``, or ``LabelName``. A ``ValueName``, +in turn, can be either a ``ConstantName``, a ``VariableName``, or a ``FunctionName``, depending on +what sort of entity the name refers to. + +A related abstraction is provided by class ``ReferenceExpr``: a reference expression is an +expression that refers to a variable, a constant, a function, a field, or an element of an array or +a slice. Use predicates ``isLvalue()`` and ``isRvalue()`` to determine whether a reference +expression appears in a syntactic context where it is assigned to or read from, respectively. + +Finally, ``ValueExpr`` generalizes ``ReferenceExpr`` to include all other kinds of expressions that +can be evaluated to a value (as opposed to expressions that refer to a package, a type, or a +statement label). + +Functions +~~~~~~~~~ + +At the syntactic level, functions appear in two forms: in function declarations (represented by +class ``FuncDecl``) and as function literals (represented by class ``FuncLit``). Since it is often +convenient to reason about functions of either kind, these two classes share a common superclass +``FuncDef``, which defines a few useful member predicates: + + - ``getBody()`` provides access to the function body + - ``getName()`` gets the function name; it is undefined for function literals, which do not have a + name + - ``getParameter(i)`` gets the ``i``\ th parameter of the function + - ``getResultVar(i)`` gets the ``i``\ th result variable of the function; if there is only + one result, ``getResultVar()`` can be used to access it + - ``getACall()`` gets a data-flow node (see below) representing a call to this function + +Entities and name binding +------------------------- + +Not all elements of a code base can be represented as AST nodes. For example, functions defined in +the standard library or in a dependency do not have a source-level definition within the source code +of the program itself, and built-in functions like ``len`` do not have a definition at all. Hence +functions cannot simplify be identified with their definition, and similarly for variables, types, +and so on. + +To smooth over this difference and provide a unified view of functions no matter where they are +defined, the Go library introduces the concept of an `entity`. An entity is a named program element, +that is, a package, a type, a constant, a variable, a field, a function, or a label. All entities +belong to class ``Entity``, which defines a few useful predicates: + + - ``getName()`` gets the name of the entity + - ``hasQualifiedName(pkg, n)`` holds if this entity is declared in package ``pkg`` and has name + ``n``; this predicate is only defined for types, functions, and package-level variables and + constants (but not for methods or local variables) + - ``getDeclaration()`` connects an entity to its declaring identifier, if any + - ``getAReference()`` gets a ``Name`` that refers to this entity + +Conversely, class ``Name`` defines a predicate ``getTarget()`` that gets the entity to which the +name refers. + +Class ``Entity`` has several subclasses representing specific kinds of entities: ``PackageEntity`` +for packages; ``TypeEntity`` for types; ``ValueEntity`` for constants (``Constant``), variables +(``Variable``), and functions (``Function``); and ``Label`` for statement labels. + +Class ``Variable``, in turn, has a few subclasses representing specific kinds of variables: a +``LocalVariable`` is a variable declared in a local scope, that is, not at package level; +``ReceiverVariable``, ``Parameter`` and ``ResultVariable`` describe receivers, parameters and +results, respectively, and define a predicate ``getFunction()`` to access the corresponding +function. Finally, class ``Field`` represents struct fields, and provides a member predicate +``hasQualifiedName(pkg, tp, f)`` that holds if this field has name ``f`` and belongs to type ``tp`` +in package ``pkg``. (Note that due to embedding the same field can belong to multiple types.) + +Class ``Function`` has a subclass ``Method`` representing methods (including both interface methods +and methods defined on a named type). Similar to ``Field``, ``Method`` provides a member predicate +``hasQualifiedName(pkg, tp, m)`` that holds if this method has name ``m`` and belongs to type ``tp`` +in package ``pkg``. Predicate ``implements(m2)`` holds if this method implements method ``m2``, that +is, it has the same name and signature as ``m2`` and it belongs to a type that implements the +interface to which ``m2`` belongs. For any function, ``getACall()`` provides access to call sites +that may call this function, possibly through virtual dispatch. + +Finally, module ``Builtin`` provides a convenient way of looking up the entities corresponding to +built-in functions and types. For example, ``Builtin::len()`` is the entity representing the +built-in function ``len``, ``Builtin::bool()`` is the ``bool`` type, and ``Builtin::nil()`` is the +value ``nil``. + +Type information +---------------- + +Types are represented by class ``Type`` and its subclasses, such as ``BoolType`` for the built-in +type ``bool``; ``NumericType`` for the various numeric types including ``IntType``, ``Uint8Type``, +``Float64Type`` and others; ``StringType`` for the type ``string``; ``NamedType``, ``ArrayType``, +``SliceType``, ``StructType``, ``InterfaceType``, ``PointerType``, ``MapType``, ``ChanType`` for +named types, arrays, slices, structs, interfaces, pointers, maps, and channels, respectively. +Finally, ``SignatureType`` represents function types. + +Note that the type ``BoolType`` is distinct from the entity ``Builtin::bool()``: the latter views +``bool`` as a declared entity, the former as a type. You can, however, map from types to their +corresponding entity (if any) using the predicate ``getEntity()``. + +Class ``Expr`` and class ``Entity`` both define a predicate ``getType()`` to determine the type of +an expression or entity. If the type of an expression or entity cannot be determined (for example +because some dependency could not be found during extraction), it will be associated with an invalid +type of class ``InvalidType``. + +Control flow +------------ + +Most CodeQL query writers will rarely use the control-flow representation of a program directly, but +it is nevertheless useful to understand how it works. + +Unlike the abstract syntax tree, which views the program as a hierarchy of AST nodes, the +control-flow graph views it as a collection of `control-flow nodes`, each representing a single +operation performed at runtime. These nodes are connected to each other by (directed) edges +representing the order in which operations are performed. + +For example, consider the following code snippet: + +.. code-block:: go + + x := 0 + if p != nil { + x = p.f + } + return x + +In the AST, this is represented as an ``IfStmt`` and a ``ReturnStmt``, with the former having an +``NeqExpr`` and a ``BlockStmt`` as its children, and so on. This provides a very detailed picture of +the syntactic structure of the code, but it does not immediately help us reason about the order +in which the various operations such as the comparison and the assignment are performed. + +In the CFG, there are nodes corresponding to ``x := 0``, ``p != nil``, ``x = p.f``, and ``return +x``, as well as a few others. The edges between these nodes model the possible execution orders of +these statements and expressions, and look as follows (simplified somewhat for presentational +purposes): + +|cfg| + +For example, the edge from ``p != nil`` to ``x = p.f`` models the case where the comparison +evaluates to ``true`` and the "then" branch is evaluated, while the edge from ``p != nil`` to +``return x`` models the case where the comparison evaluates to ``false`` and the "then" branch is +skipped. + +Note, in particular, that a CFG node can have multiple outgoing edges (like from ``p != nil``) as +well as multiple incoming edges (like into ``return x``) to represent control-flow branching at +runtime. + +Also note that only AST nodes that perform some kind of operation on values have a corresponding CFG +node. This includes expressions (such as the comparison ``p != nil``), assignment statements (such +as ``x = p.f``) and return statements (such as ``return x``), but not statements that serve a purely +syntactic purpose (such as block statements) and statements whose semantics is already reflected by +the CFG edges (such as ``if`` statements). + +It is important to point out that the control-flow graph provided by the CodeQL libraries for Go +only models `local` control flow, that is, flow within a single function. Flow from function calls +to the function they invoke, for example, is not represented by control-flow edges. + +In CodeQL, control-flow nodes are represented by class ``ControlFlow::Node``, and the edges between +nodes are captured by the member predicates ``getASuccessor()`` and ``getAPredecessor()`` of +``ControlFlow::Node``. In addition to control-flow nodes representing runtime operations, each +function also has a synthetic entry node and an exit node, representing the start and end of an +execution of the function, respectively. These exist to ensure that the control-flow graph +corresponding to a function has a unique entry node and a unique exit node, which is required for +many standard control-flow analysis algorithms. + +Data flow +--------- + +At the data-flow level, the program is thought of as a collection of `data-flow nodes`. These nodes +are connected to each other by (directed) edges representing the way data flows through the program +at runtime. + +For example, there are data-flow nodes corresponding to expressions and other data-flow nodes +corresponding to variables (`SSA variables +`__, to be precise). Here is the +data-flow graph corresponding to the code snippet shown above, ignoring SSA conversion for +simplicity: + +|dfg| + +Note that unlike in the control-flow graph, the assignments ``x := 0`` and ``x = p.f`` are not +represented as nodes. Instead, they are expressed as edges between the node representing the +right-hand side of the assignment and the node representing the variable on the left-hand side. For +any subsequent uses of that variable, there is a data-flow edge from the variable to that use, so by +following the edges in the data-flow graph we can trace the flow of values through variables at +runtime. + +It is important to point out that the data-flow graph provided by the CodeQL libraries for Go only +models `local` flow, that is, flow within a single function. Flow from arguments in a function call +to the corresponding function parameters, for example, is not represented by data-flow edges. + +In CodeQL, data-flow nodes are represented by class ``DataFlow::Node``, and the edges between nodes +are captured by the predicate ``DataFlow::localFlowStep``. The predicate ``DataFlow::localFlow`` +generalizes this from a single flow step to zero or more flow steps. + +Most expressions have a corresponding data-flow node; exceptions include type expressions, statement +labels and other expressions that do not have a value, as well as short-circuiting operators. To map +from the AST node of an expression to the corresponding DFG node, use ``DataFlow::exprNode``. Note +that the AST node and the DFG node are different entities and cannot be used interchangeably. + +There is also a predicate ``asExpr()`` on ``DataFlow::Node`` that allows you to recover the +expression underlying a DFG node. However, this predicate should be used with caution, since many +data-flow nodes do not correspond to an expression, and so this predicate will not be defined for +them. + +Similar to ``Expr``, ``DataFlow::Node`` has a member predicate ``getType()`` to determine the type +of a node, as well as predicates ``getNumericValue()``, ``getStringValue()``, and +``getExactValue()`` to retrieve the value of a node if it is constant. + +Important subclasses of ``DataFlow::Node`` include: + + - ``DataFlow::CallNode``: a function call or method call; use ``getArgument(i)`` and + ``getResult(i)`` to obtain the data-flow nodes corresponding to the ``i``\ th argument and the + ``i``\ th result of this call, respectively; if there is only a single result, ``getResult()`` + will return it + - ``DataFlow::ParameterNode``: a parameter of a function; use ``asParameter()`` to access the + corresponding AST node + - ``DataFlow::BinaryOperationNode``: an operation involving a binary operator; each ``BinaryExpr`` + has a corresponding ``BinaryOperationNode``, but there are also binary operations that are not + explicit at the AST level, such as those arising from compound assignments and + increment/decrement statements; at the AST level, ``x + 1``, ``x += 1``, and ``x++`` are + represented by different kinds of AST nodes, while at the DFG level they are all modeled as a + binary operation node with operands ``x`` and ``1`` + - ``DataFlow::UnaryOperationNode``: analogous, but for unary operators + + - ``DataFlow::PointerDereferenceNode``: a pointer dereference, either explicit in an expression + of the form ``*p``, or implicit in a field or method reference through a pointer + - ``DataFlow::AddressOperationNode``: analogous, but for taking the address of an entity + - ``DataFlow::RelationalComparisonNode``, ``DataFlow::EqualityTestNode``: data-flow nodes + corresponding to ``RelationalComparisonExpr`` and ``EqualityTestExpr`` AST nodes + +Finally, classes ``Read`` and ``Write`` represent, respectively, a read or a write of a variable, a +field, or an element of an array, a slice or a map. Use their member predicates ``readsVariable``, +``writesVariable``, ``readsField``, ``writesField``, ``readsElement``, and ``writesElement`` to +determine what the read/write refers to. + +Call graph +---------- + +The call graph connects function (and method) calls to the functions they invoke. Call graph +information is made available by two member predicates on ``DataFlow::CallNode``: ``getTarget()`` +returns the declared target of a call, while ``getACallee()`` returns all possible actual functions +a call may invoke at runtime. + +These two predicates differ in how they handle calls to interface methods: while ``getTarget()`` +will return the interface method itself, ``getACallee()`` will return all concrete methods that +implement the interface method. + +Global data flow and taint tracking +----------------------------------- + +The predicates ``DataFlow::localFlowStep`` and ``DataFlow::localFlow`` are useful for reasoning +about the flow of values in a single function. However, more advanced use cases, particularly in +security analysis, will invariably require reasoning about global data flow, including flow into, +out of, and across function calls, and through fields. + +In CodeQL, such reasoning is expressed in terms of `data-flow configurations`. A data-flow +configuration has three ingredients: sources, sinks, and barriers (also called sanitizers), all of +which are sets of data-flow nodes. Given these three sets, CodeQL provides a general mechanism for +finding paths from a source to a sink, possibly going into and out of functions and fields, but +never flowing through a barrier. + +To define a data-flow configuration, you can define a subclass of ``DataFlow::Configuration``, +overriding the member predicates ``isSource``, ``isSink``, and ``isBarrier`` to define the sets of +sources, sinks, and barriers. + +Going beyond pure data flow, many security analyses need to perform more general `taint tracking`, +which also considers flow through value-transforming operations such as string operations. To track +taint, you can define a subclass of ``TaintTracking::Configuration``, which works similar to +data-flow configurations. + +A detailed exposition of global data flow and taint tracking is out of scope for this brief +introduction. See `Introduction to data flow analysis with CodeQL +`__ for a general overview of data flow +and taint tracking. + +Advanced libraries +------------------ + +Finally, we briefly describe a few concepts and libraries that are useful for advanced query +writers. + +Basic blocks and dominance +~~~~~~~~~~~~~~~~~~~~~~~~~~ + +Many important control-flow analyses organize control-flow nodes into `basic blocks +`__, which are maximal straight-line sequences of +control-flow nodes without any branching. In the CodeQL libraries, basic blocks are represented by +class ``BasicBlock``. Each control-flow node belongs to a basic block. You can use the predicate +``getBasicBlock()`` in class ``ControlFlow::Node`` and the predicate ``getNode(i)`` in +``BasicBlock`` to move from one to the other. + +Dominance is a standard concept in control-flow analysis: a basic block ``dom`` is said to +`dominate` a basic block ``bb`` if any path through the control-flow graph from the entry node to +the first node of ``bb`` must pass through ``dom``. In other words, whenever program execution +reaches the beginning of ``bb``, it must have come through ``dom``. Each basic block is moreover +considered to dominate itself. + +Dually, a basic block ``postdom`` is said to `post-dominate` a basic block ``bb`` if any path +through the control-flow graph from the last node of ``bb`` to the exit node must pass through +``postdom``. In other words, after program execution leaves ``bb``, it must eventually reach +``postdom``. + +These two concepts are captured by two member predicates ``dominates`` and ``postDominates`` of class +``BasicBlock``. + +Condition guard nodes +~~~~~~~~~~~~~~~~~~~~~ + +A condition guard node is a synthetic control-flow node that records the fact that at some point in +the control-flow graph the truth value of a condition is known. For example, consider again the code snippet we saw above: + +.. code-block:: go + + x := 0 + if p != nil { + x = p.f + } + return x + +At the beginning of the "then" branch ``p`` is known not be ``nil``. This knowledge is encoded in +the control-flow graph by a condition guard node preceding the assignment to ``x``, recording the +fact that ``p != nil`` is ``true`` at this point: + +|cfg2| + +A typical use of this information would be in an analyis that looks for ``nil`` dereferences: such +an analysis would be able to conclude that the field read ``p.f`` is safe because it is immediately +preceded by a condition guard node guaranteeing that ``p`` is not ``nil``. + +In CodeQL, condition guard nodes are represented by class ``ControlFlow::ConditionGuardNode`` which +offers a variety of member predicates to reason about which conditions a guard node guarantees. + +Static single-assignment form +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +`Static single-assignment form `__ (SSA +form for short) is a program representation in which the original program variables are mapped onto +more fine-grained `SSA variables`. Each SSA variable has exactly one definition, so program +variables with multiple assignments correspond to multiple SSA variables. + +Most of the time query authors do not have to deal with SSA form directly. The data-flow graph uses +it under the hood, and so most of the benefits derived from SSA can be gained by simply using the +data-flow graph. + +For example, the data-flow graph for our running example actually looks more like this: + +|ssa| + +Note that the program variable ``x`` has been mapped onto three distinct SSA variables ``x1``, +``x2``, and ``x3``. In this case there is not much benefit to such a representation, but in general +SSA form has well-known advantages for data-flow analysis for which we refer to the literature. + +If you do need to work with raw SSA variables, they are represented by the class ``SsaVariable``. +Class ``SsaDefinition`` represents definitions of SSA variables, which have a one-to-one +correspondence with ``SsaVariable``\ s. Member predicates ``getDefinition()`` and ``getVariable()`` +exist to map from one to the other. You can use member predicate ``getAUse()`` of ``SsaVariable`` to +look for uses of an SSA variable. To access the program variable underlying an SSA variable, use +member predicate ``getSourceVariable()``. + +Global value numbering +~~~~~~~~~~~~~~~~~~~~~~ + +`Global value numbering `__ is a technique for +determining when two computations in a program are guaranteed to yield the same result. This is done +by associating with each data-flow node an abstract representation of its value (conventionally +called a `value number`, even though in practice it is not usually a number) such that identical +computations are represented by identical value numbers. + +Since this is an undecidable problem, global value numbering is `conservative` in the sense that if +two data-flow nodes have the same value number they are guaranteed to have the same value at +runtime, but not conversely. (That is, there may be data-flow nodes that do, in fact, always +evaluate to the same value, but their value numbers are different.) + +In the CodeQL libraries for Go, you can use the ``globalValueNumber(nd)`` predicate to compute the +global value number for a data-flow node ``nd``. Value numbers are represented as an opaque QL type +``GVN`` that provides very little information. Usually, all you need to do with global value numbers +is to compare them to each other to determine whether two data-flow nodes have the same value. + +What next? +---------- + +- Find out more about QL in the `QL language handbook `__ and `QL language specification `__. +- Learn more about the query console in `Using the query console `__. + +.. |ast| image:: ast.png +.. |cfg| image:: cfg.png +.. |dfg| image:: dfg.png +.. |cfg2| image:: cfg2.png +.. |ssa| image:: ssa.png diff --git a/ql/docs/learn-ql/ssa.dot b/ql/docs/learn-ql/ssa.dot new file mode 100644 index 00000000000..c8bfb8c62a0 --- /dev/null +++ b/ql/docs/learn-ql/ssa.dot @@ -0,0 +1,15 @@ +digraph ssa { + graph [dpi=300]; + rankdir=LR; + + "x1" [shape=diamond,label=1>]; + "x2" [shape=diamond,label=2>]; + "x3" [shape=diamond,label=3>]; + "return x" [label=x>]; + + "0" -> "x1"; + "p.f" -> "x2"; + "x1" -> "x3"; + "x2" -> "x3"; + "x3" -> "return x"; +} diff --git a/ql/docs/learn-ql/ssa.png b/ql/docs/learn-ql/ssa.png new file mode 100644 index 00000000000..cd5ba3f29de Binary files /dev/null and b/ql/docs/learn-ql/ssa.png differ diff --git a/ql/src/InconsistentCode/ConstantLengthComparison.ql b/ql/src/InconsistentCode/ConstantLengthComparison.ql index bc4c9b0aa00..6058e287dec 100644 --- a/ql/src/InconsistentCode/ConstantLengthComparison.ql +++ b/ql/src/InconsistentCode/ConstantLengthComparison.ql @@ -16,7 +16,7 @@ from ControlFlow::ConditionGuardNode cond, DataFlow::CallNode lenA where // `i` is incremented in `fs` - fs.getPost().(IncStmt).getExpr() = i.getAReference() and + fs.getPost().(IncStmt).getOperand() = i.getAReference() and // `idx` reads `a[i]` idx.reads(a.getANode(), i.getARead()) and // `lenA` is `len(a)` diff --git a/ql/src/InconsistentCode/InconsistentLoopOrientation.ql b/ql/src/InconsistentCode/InconsistentLoopOrientation.ql index 7eea9dbbeab..fa5051ed5c7 100644 --- a/ql/src/InconsistentCode/InconsistentLoopOrientation.ql +++ b/ql/src/InconsistentCode/InconsistentLoopOrientation.ql @@ -36,7 +36,7 @@ predicate bounds(RelationalComparisonExpr test, Variable v, string direction) { * downward. */ predicate updates(IncDecStmt upd, Variable v, string direction) { - upd.getExpr() = v.getAReference() and + upd.getOperand() = v.getAReference() and ( upd instanceof IncStmt and direction = "upward" or diff --git a/ql/src/InconsistentCode/WhitespaceContradictsPrecedence.ql b/ql/src/InconsistentCode/WhitespaceContradictsPrecedence.ql index c2d2f80a6b6..7dde89c445f 100644 --- a/ql/src/InconsistentCode/WhitespaceContradictsPrecedence.ql +++ b/ql/src/InconsistentCode/WhitespaceContradictsPrecedence.ql @@ -51,10 +51,10 @@ class AssocNestedExpr extends BinaryExpr { class HarmlessNestedExpr extends BinaryExpr { HarmlessNestedExpr() { exists(BinaryExpr parent | this = parent.getAChildExpr() | - parent instanceof Comparison and + parent instanceof ComparisonExpr and (this instanceof ArithmeticExpr or this instanceof ShiftExpr) or - parent instanceof LogicalExpr and this instanceof Comparison + parent instanceof LogicalExpr and this instanceof ComparisonExpr ) } } diff --git a/ql/src/RedundantCode/CompareIdenticalValues.ql b/ql/src/RedundantCode/CompareIdenticalValues.ql index 5ce12c4f395..4f63b222018 100644 --- a/ql/src/RedundantCode/CompareIdenticalValues.ql +++ b/ql/src/RedundantCode/CompareIdenticalValues.ql @@ -13,7 +13,7 @@ import go -from Comparison cmp, Expr l +from ComparisonExpr cmp, Expr l where l = cmp.getLeftOperand() and l.getGlobalValueNumber() = cmp.getRightOperand().getGlobalValueNumber() and diff --git a/ql/src/RedundantCode/NegativeLengthCheck.ql b/ql/src/RedundantCode/NegativeLengthCheck.ql index 491d286f0b9..2c55baddc9b 100644 --- a/ql/src/RedundantCode/NegativeLengthCheck.ql +++ b/ql/src/RedundantCode/NegativeLengthCheck.ql @@ -11,7 +11,7 @@ import go -from Comparison cmp, BuiltinFunction len, int ub, string r +from ComparisonExpr cmp, BuiltinFunction len, int ub, string r where (len = Builtin::len() or len = Builtin::cap()) and ( diff --git a/ql/src/semmle/go/Expr.qll b/ql/src/semmle/go/Expr.qll index 632e78246cb..bb2e16eb9e1 100644 --- a/ql/src/semmle/go/Expr.qll +++ b/ql/src/semmle/go/Expr.qll @@ -320,12 +320,12 @@ class StructLit extends CompositeLit { */ class ParenExpr extends @parenexpr, Expr { /** Gets the expression between parentheses. */ - Expr getExpression() { result = getChildExpr(0) } + Expr getExpr() { result = getChildExpr(0) } - override Expr stripParens() { result = getExpression().stripParens() } + override Expr stripParens() { result = getExpr().stripParens() } override predicate isPlatformIndependentConstant() { - getExpression().isPlatformIndependentConstant() + getExpr().isPlatformIndependentConstant() } override string toString() { result = "(...)" } @@ -394,7 +394,7 @@ class SliceExpr extends @sliceexpr, Expr { */ class TypeAssertExpr extends @typeassertexpr, Expr { /** Gets the base expression whose type is being asserted. */ - Expr getExpression() { result = getChildExpr(0) } + Expr getExpr() { result = getChildExpr(0) } /** Gets the expression representing the asserted type. */ Expr getTypeExpr() { result = getChildExpr(1) } @@ -402,7 +402,7 @@ class TypeAssertExpr extends @typeassertexpr, Expr { override predicate mayHaveOwnSideEffects() { any() } override predicate isPlatformIndependentConstant() { - getExpression().isPlatformIndependentConstant() + getExpr().isPlatformIndependentConstant() } override string toString() { result = "type assertion" } @@ -718,9 +718,9 @@ class AddressExpr extends @addressexpr, UnaryExpr { } /** - * A unary arrow expression using `<-`. + * A unary receive expression using `<-`. */ -class ArrowExpr extends @arrowexpr, UnaryExpr { +class RecvExpr extends @arrowexpr, UnaryExpr { override predicate mayHaveOwnSideEffects() { any() } override string getOperator() { result = "<-" } @@ -776,12 +776,12 @@ class ShiftExpr extends @shiftexpr, BitwiseBinaryExpr { } /** * A comparison expression, that is, `==`, `!=`, `<`, `<=`, `>=` or `>`. */ -class Comparison extends @comparison, BinaryExpr { } +class ComparisonExpr extends @comparison, BinaryExpr { } /** * An equality test, that is, `==` or `!=`. */ -class EqualityTestExpr extends @equalitytest, Comparison { +class EqualityTestExpr extends @equalitytest, ComparisonExpr { /** Gets the polarity of this equality test, that is, `true` for `==` and `false` for `!=`. */ boolean getPolarity() { none() } } @@ -789,7 +789,7 @@ class EqualityTestExpr extends @equalitytest, Comparison { /** * A relational comparison, that is, `<`, `<=`, `>=` or `>`. */ -class RelationalComparisonExpr extends @relationalcomparison, Comparison { +class RelationalComparisonExpr extends @relationalcomparison, ComparisonExpr { /** Holds if this comparison is strict, that is, it implies inequality. */ predicate isStrict() { none() } @@ -1136,7 +1136,7 @@ private predicate isTypeExprBottomUp(Expr e) { e instanceof InterfaceTypeExpr or e instanceof MapTypeExpr or e instanceof ChanTypeExpr or - isTypeExprBottomUp(e.(ParenExpr).getExpression()) or + isTypeExprBottomUp(e.(ParenExpr).getExpr()) or isTypeExprBottomUp(e.(StarExpr).getBase()) or isTypeExprBottomUp(e.(Ellipsis).getOperand()) } @@ -1184,7 +1184,7 @@ private predicate isTypeExprTopDown(Expr e) { or e = any(SelectorExpr sel | isTypeExprTopDown(sel)).getBase() or - e = any(ParenExpr pe | isTypeExprTopDown(pe)).getExpression() + e = any(ParenExpr pe | isTypeExprTopDown(pe)).getExpr() or e = any(StarExpr se | isTypeExprTopDown(se)).getBase() or @@ -1216,7 +1216,7 @@ class ReferenceExpr extends Expr { not this = any(MethodSpec md).getNameExpr() and not this = any(StructLit sl).getKey(_) or - this.(ParenExpr).getExpression() instanceof ReferenceExpr + this.(ParenExpr).getExpr() instanceof ReferenceExpr or this.(StarExpr).getBase() instanceof ReferenceExpr or @@ -1229,7 +1229,7 @@ class ReferenceExpr extends Expr { predicate isLvalue() { this = any(Assignment assgn).getLhs(_) or - this = any(IncDecStmt ids).getExpr() + this = any(IncDecStmt ids).getOperand() or exists(RangeStmt rs | this = rs.getKey() or @@ -1247,7 +1247,7 @@ class ReferenceExpr extends Expr { or this = any(CompoundAssignStmt cmp).getLhs(_) or - this = any(IncDecStmt ids).getExpr() + this = any(IncDecStmt ids).getOperand() } } @@ -1258,7 +1258,7 @@ class ValueExpr extends Expr { this instanceof BasicLit or this instanceof FuncLit or this instanceof CompositeLit or - this.(ParenExpr).getExpression() instanceof ValueExpr or + this.(ParenExpr).getExpr() instanceof ValueExpr or this instanceof SliceExpr or this instanceof TypeAssertExpr or this instanceof CallOrConversionExpr or diff --git a/ql/src/semmle/go/Stmt.qll b/ql/src/semmle/go/Stmt.qll index a0d84869556..a57373229c9 100644 --- a/ql/src/semmle/go/Stmt.qll +++ b/ql/src/semmle/go/Stmt.qll @@ -103,8 +103,8 @@ class SendStmt extends @sendstmt, Stmt { * An increment or decrement statement. */ class IncDecStmt extends @incdecstmt, Stmt { - /** Gets the expression. */ - Expr getExpr() { result = getChildExpr(0) } + /** Gets the expression being incremented or decremented. */ + Expr getOperand() { result = getChildExpr(0) } /** Gets the increment or decrement operator. */ string getOperator() { none() } @@ -530,7 +530,7 @@ class RecvStmt extends Stmt { int getNumLhs() { result = count(getLhs(_)) } /** Gets the receive expression of this receive statement. */ - ArrowExpr getExpr() { + RecvExpr getExpr() { result = this.(ExprStmt).getExpr() or result = this.(Assignment).getRhs() } diff --git a/ql/src/semmle/go/controlflow/ControlFlowGraph.qll b/ql/src/semmle/go/controlflow/ControlFlowGraph.qll index 6d5a3f618ea..fd965d35151 100644 --- a/ql/src/semmle/go/controlflow/ControlFlowGraph.qll +++ b/ql/src/semmle/go/controlflow/ControlFlowGraph.qll @@ -155,7 +155,7 @@ module ControlFlow { private predicate ensuresAux(Expr expr, boolean b) { expr = cond and b = outcome or - expr = any(ParenExpr par | ensuresAux(par, b)).getExpression() + expr = any(ParenExpr par | ensuresAux(par, b)).getExpr() or expr = any(NotExpr ne | ensuresAux(ne, b.booleanNot())).getOperand() or diff --git a/ql/src/semmle/go/controlflow/ControlFlowGraphImpl.qll b/ql/src/semmle/go/controlflow/ControlFlowGraphImpl.qll index c8c17af2899..d86f717dbc9 100644 --- a/ql/src/semmle/go/controlflow/ControlFlowGraphImpl.qll +++ b/ql/src/semmle/go/controlflow/ControlFlowGraphImpl.qll @@ -29,7 +29,7 @@ private predicate isCondRoot(Expr e) { private predicate isCond(Expr e) { isCondRoot(e) or e = any(LogicalBinaryExpr lbe | isCond(lbe)).getRightOperand() or - e = any(ParenExpr par | isCond(par)).getExpression() + e = any(ParenExpr par | isCond(par)).getExpr() } /** @@ -309,7 +309,7 @@ newtype TWriteTarget = ) ) or - exists(IncDecStmt ids | write = MkIncDecNode(ids) | lhs = ids.getExpr().stripParens()) + exists(IncDecStmt ids | write = MkIncDecNode(ids) | lhs = ids.getOperand().stripParens()) or exists(ParameterOrReceiver parm | write = MkParameterInit(parm) | lhs = parm.getDeclaration()) or @@ -703,7 +703,7 @@ module CFG { or i = 0 and result = this.(LabeledStmt).getStmt() or - i = 0 and result = this.(ParenExpr).getExpression() + i = 0 and result = this.(ParenExpr).getExpr() or result = this.(PlainBlock).getStmt(i) } @@ -1385,7 +1385,7 @@ module CFG { } private class IncDecTree extends ControlFlowTree, IncDecStmt { - override predicate firstNode(ControlFlow::Node first) { firstNode(getExpr(), first) } + override predicate firstNode(ControlFlow::Node first) { firstNode(getOperand(), first) } override predicate lastNode(ControlFlow::Node last, Completion cmpl) { ControlFlowTree.super.lastNode(last, cmpl) @@ -1395,7 +1395,7 @@ module CFG { } override predicate succ(ControlFlow::Node pred, ControlFlow::Node succ) { - lastNode(getExpr(), pred, normalCompletion()) and + lastNode(getOperand(), pred, normalCompletion()) and succ = MkImplicitOne(this) or pred = MkImplicitOne(this) and @@ -1851,7 +1851,7 @@ module CFG { result = Panic() } - override ControlFlowTree getChildTree(int i) { i = 0 and result = getExpression() } + override ControlFlowTree getChildTree(int i) { i = 0 and result = getExpr() } } private class UnaryExprTree extends ControlFlowTree, UnaryExpr { diff --git a/ql/src/semmle/go/controlflow/IR.qll b/ql/src/semmle/go/controlflow/IR.qll index c477913c62d..b3c655b12b7 100644 --- a/ql/src/semmle/go/controlflow/IR.qll +++ b/ql/src/semmle/go/controlflow/IR.qll @@ -843,7 +843,7 @@ module IR { /** Gets the corresponding increment or decrement statement. */ IncDecStmt getStmt() { result = ids } - override Type getResultType() { result = ids.getExpr().getType() } + override Type getResultType() { result = ids.getOperand().getType() } override ControlFlow::Root getRoot() { result.isRootOf(ids) } @@ -867,7 +867,7 @@ module IR { /** Gets the corresponding increment or decrement statement. */ IncDecStmt getStmt() { result = ids } - override Type getResultType() { result = ids.getExpr().getType() } + override Type getResultType() { result = ids.getOperand().getType() } override ControlFlow::Root getRoot() { result.isRootOf(ids) } @@ -1486,7 +1486,7 @@ module IR { */ Instruction evalExprInstruction(Expr e) { result = MkExprNode(e) or - result = evalExprInstruction(e.(ParenExpr).getExpression()) + result = evalExprInstruction(e.(ParenExpr).getExpr()) } /** diff --git a/ql/src/semmle/go/dataflow/internal/DataFlowUtil.qll b/ql/src/semmle/go/dataflow/internal/DataFlowUtil.qll index d442bb6520b..94fc150caef 100644 --- a/ql/src/semmle/go/dataflow/internal/DataFlowUtil.qll +++ b/ql/src/semmle/go/dataflow/internal/DataFlowUtil.qll @@ -558,7 +558,7 @@ class BinaryOperationNode extends Node { exists(IR::EvalIncDecRhsInstruction rhs, IncDecStmt ids | rhs = asInstruction() and ids = rhs.getStmt() | - left = exprNode(ids.getExpr()) and + left = exprNode(ids.getOperand()) and right = instructionNode(any(IR::EvalImplicitOneInstruction one | one.getStmt() = ids)) and op = ids.getOperator().charAt(0) ) @@ -757,7 +757,7 @@ predicate simpleLocalFlowStep(Node nodeFrom, Node nodeTo) { exists(Expr pred, Expr succ | succ.(LogicalBinaryExpr).getAnOperand() = pred or succ.(ConversionExpr).getOperand() = pred or - succ.(TypeAssertExpr).getExpression() = pred + succ.(TypeAssertExpr).getExpr() = pred | nodeFrom = exprNode(pred) and nodeTo = exprNode(succ)