diff --git a/ql/src/codeql_ruby/DataFlow.qll b/ql/src/codeql_ruby/DataFlow.qll new file mode 100644 index 00000000000..1eb4a7cbb45 --- /dev/null +++ b/ql/src/codeql_ruby/DataFlow.qll @@ -0,0 +1,7 @@ +/** + * Provides classes for performing local (intra-procedural) and + * global (inter-procedural) data flow analyses. + */ +module DataFlow { + import codeql_ruby.dataflow.internal.DataFlowImpl +} diff --git a/ql/src/codeql_ruby/controlflow/CfgNodes.qll b/ql/src/codeql_ruby/controlflow/CfgNodes.qll index a0098df7a90..cac0a211691 100644 --- a/ql/src/codeql_ruby/controlflow/CfgNodes.qll +++ b/ql/src/codeql_ruby/controlflow/CfgNodes.qll @@ -176,6 +176,23 @@ abstract private class ExprChildMapping extends Expr { /** Provides classes for control-flow nodes that wrap AST expressions. */ module ExprNodes { // TODO: Add more classes + private class AssignmentExprChildMapping extends ExprChildMapping, Assignment { + override predicate relevantChild(Expr e) { e = this.getAnOperand() } + } + + /** A control-flow node that wraps an `Assignment` AST expression. */ + class AssignmentCfgNode extends ExprCfgNode { + override AssignmentExprChildMapping e; + + final override Assignment getExpr() { result = ExprCfgNode.super.getExpr() } + + /** Gets the LHS of this assignment. */ + final ExprCfgNode getLhs() { e.hasCfgChild(e.getLhs(), this, result) } + + /** Gets the RHS of this assignment. */ + final ExprCfgNode getRhs() { e.hasCfgChild(e.getRhs(), this, result) } + } + private class BinaryOperationExprChildMapping extends ExprChildMapping, BinaryOperation { override predicate relevantChild(Expr e) { e = this.getAnOperand() } } @@ -193,6 +210,40 @@ module ExprNodes { final ExprCfgNode getRightOperand() { e.hasCfgChild(e.getRightOperand(), this, result) } } + private class CallExprChildMapping extends ExprChildMapping, Call { + override predicate relevantChild(Expr e) { e = [this.getAnArgument(), this.getReceiver()] } + } + + /** A control-flow node that wraps a `Call` AST expression. */ + class CallCfgNode extends ExprCfgNode { + override CallExprChildMapping e; + + final override Call getExpr() { result = ExprCfgNode.super.getExpr() } + + /** Gets the `n`th argument of this call. */ + final ExprCfgNode getArgument(int n) { e.hasCfgChild(e.getArgument(n), this, result) } + + /** Gets the receiver of this call. */ + final ExprCfgNode getReceiver() { e.hasCfgChild(e.getReceiver(), this, result) } + } + + private class ExprSequenceChildMapping extends ExprChildMapping, ExprSequence { + override predicate relevantChild(Expr e) { e = this.getAnExpr() } + } + + /** A control-flow node that wraps an `ExprSequence` AST expression. */ + class ExprSequenceCfgNode extends ExprCfgNode { + override ExprSequenceChildMapping e; + + final override ExprSequence getExpr() { result = ExprCfgNode.super.getExpr() } + + /** Gets the last expression in this sequence, if any. */ + final ExprCfgNode getLastExpr() { e.hasCfgChild(e.getLastExpr(), this, result) } + + /** Gets the 'n'th expression of this expression sequence. */ + final ExprCfgNode getExpr(int n) { e.hasCfgChild(e.getExpr(n), this, result) } + } + /** A control-flow node that wraps a `VariableReadAccess` AST expression. */ class VariableReadAccessCfgNode extends ExprCfgNode { override VariableReadAccess e; diff --git a/ql/src/codeql_ruby/dataflow/internal/DataFlowDispatch.qll b/ql/src/codeql_ruby/dataflow/internal/DataFlowDispatch.qll new file mode 100644 index 00000000000..995143c71bf --- /dev/null +++ b/ql/src/codeql_ruby/dataflow/internal/DataFlowDispatch.qll @@ -0,0 +1,51 @@ +private import ruby +private import codeql_ruby.CFG +private import DataFlowPrivate + +newtype TReturnKind = TNormalReturnKind() + +/** + * Gets a node that can read the value returned from `call` with return kind + * `kind`. + */ +OutNode getAnOutNode(DataFlowCall call, ReturnKind kind) { call = result.getCall(kind) } + +/** + * A return kind. A return kind describes how a value can be returned + * from a callable. + */ +abstract class ReturnKind extends TReturnKind { + /** Gets a textual representation of this position. */ + abstract string toString(); +} + +/** + * A value returned from a callable using a `return` statement or an expression + * body, that is, a "normal" return. + */ +class NormalReturnKind extends ReturnKind, TNormalReturnKind { + override string toString() { result = "return" } +} + +class DataFlowCallable = CfgScope; + +class DataFlowCall extends CfgNodes::ExprNodes::CallCfgNode { + DataFlowCallable getEnclosingCallable() { result = this.getScope() } +} + +/** Gets a viable run-time target for the call `call`. */ +DataFlowCallable viableCallable(DataFlowCall call) { none() } + +/** + * Holds if the set of viable implementations that can be called by `call` + * might be improved by knowing the call context. This is the case if the + * call is a delegate call, or if the qualifier accesses a parameter of + * the enclosing callable `c` (including the implicit `this` parameter). + */ +predicate mayBenefitFromCallContext(DataFlowCall call, Callable c) { none() } + +/** + * Gets a viable dispatch target of `call` in the context `ctx`. This is + * restricted to those `call`s for which a context might make a difference. + */ +DataFlowCallable viableImplInCallContext(DataFlowCall call, DataFlowCall ctx) { none() } diff --git a/ql/src/codeql_ruby/dataflow/internal/DataFlowImplSpecific.qll b/ql/src/codeql_ruby/dataflow/internal/DataFlowImplSpecific.qll new file mode 100644 index 00000000000..e78a0814a14 --- /dev/null +++ b/ql/src/codeql_ruby/dataflow/internal/DataFlowImplSpecific.qll @@ -0,0 +1,11 @@ +/** + * Provides Ruby-specific definitions for use in the data flow library. + */ +module Private { + import DataFlowPrivate + import DataFlowDispatch +} + +module Public { + import DataFlowPublic +} diff --git a/ql/src/codeql_ruby/dataflow/internal/DataFlowPrivate.qll b/ql/src/codeql_ruby/dataflow/internal/DataFlowPrivate.qll new file mode 100644 index 00000000000..1ee45be9c9c --- /dev/null +++ b/ql/src/codeql_ruby/dataflow/internal/DataFlowPrivate.qll @@ -0,0 +1,362 @@ +private import ruby +private import codeql_ruby.CFG +private import codeql_ruby.dataflow.SSA +private import DataFlowPublic +private import DataFlowDispatch + +abstract class NodeImpl extends Node { + /** Do not call: use `getEnclosingCallable()` instead. */ + abstract CfgScope getCfgScope(); + + /** Do not call: use `getLocation()` instead. */ + abstract Location getLocationImpl(); + + /** Do not call: use `toString()` instead. */ + abstract string toStringImpl(); +} + +private class ExprNodeImpl extends ExprNode, NodeImpl { + override CfgScope getCfgScope() { result = this.getExprNode().getExpr().getCfgScope() } + + override Location getLocationImpl() { result = this.getExprNode().getLocation() } + + override string toStringImpl() { result = this.getExprNode().toString() } +} + +/** Provides predicates related to local data flow. */ +module LocalFlow { + private import codeql_ruby.dataflow.internal.SsaImpl + + /** + * Holds if `nodeFrom` is a last node referencing SSA definition `def`, which + * can reach `next`. + */ + private predicate localFlowSsaInput(Node nodeFrom, Ssa::Definition def, Ssa::Definition next) { + exists(BasicBlock bb, int i | lastRefBeforeRedef(def, bb, i, next) | + def = nodeFrom.(SsaDefinitionNode).getDefinition() and + def.definesAt(_, bb, i) + or + exists(CfgNodes::ExprCfgNode e | + e = nodeFrom.asExpr() and + e = bb.getNode(i) and + e.getExpr() instanceof VariableReadAccess + ) + ) + } + + /** + * Holds if there is a local flow step from `nodeFrom` to `nodeTo` involving + * SSA definition `def. + */ + predicate localSsaFlowStep(Ssa::Definition def, Node nodeFrom, Node nodeTo) { + // Flow from assignment into SSA definition + exists(CfgNodes::ExprNodes::AssignmentCfgNode a, BasicBlock bb, int i | + def.definesAt(_, bb, i) and + a = bb.getNode(i) and + a.getExpr() instanceof AssignExpr and + nodeFrom.asExpr() = a.getRhs() and + nodeTo.(SsaDefinitionNode).getDefinition() = def + ) + or + // Flow from SSA definition to first read + def = nodeFrom.(SsaDefinitionNode).getDefinition() and + nodeTo.asExpr() = def.getAFirstRead() + or + // Flow from read to next read + exists( + CfgNodes::ExprNodes::VariableReadAccessCfgNode read1, + CfgNodes::ExprNodes::VariableReadAccessCfgNode read2 + | + def.hasAdjacentReads(read1, read2) and + nodeTo.asExpr() = read2 + | + nodeFrom.asExpr() = read1 + or + read1 = nodeFrom.(PostUpdateNode).getPreUpdateNode().asExpr() + ) + or + // Flow into phi node + exists(Ssa::PhiNode phi | + localFlowSsaInput(nodeFrom, def, phi) and + phi = nodeTo.(SsaDefinitionNode).getDefinition() and + def = phi.getAnInput() + ) + // TODO + // or + // // Flow into uncertain SSA definition + // exists(LocalFlow::UncertainExplicitSsaDefinition uncertain | + // localFlowSsaInput(nodeFrom, def, uncertain) and + // uncertain = nodeTo.(SsaDefinitionNode).getDefinition() and + // def = uncertain.getPriorDefinition() + // ) + } +} + +/** An argument of a call (including qualifier arguments). */ +private class Argument extends Expr { + private Call call; + private int arg; + + Argument() { this = call.getArgument(arg) } + + /** Holds if this expression is the `i`th argument of `c`. */ + predicate isArgumentOf(Expr c, int i) { c = call and i = arg } +} + +/** A collection of cached types and predicates to be evaluated in the same stage. */ +cached +private module Cached { + cached + newtype TNode = + TExprNode(CfgNodes::ExprCfgNode n) or + TSsaDefinitionNode(Ssa::Definition def) or + TParameterNode(Parameter p) or + TExprPostUpdateNode(CfgNodes::ExprCfgNode n) { n.getNode() instanceof Argument } + + /** + * This is the local flow predicate that is used as a building block in global + * data flow. It excludes SSA flow through instance fields, as flow through fields + * is handled by the global data-flow library, but includes various other steps + * that are only relevant for global flow. + */ + cached + predicate simpleLocalFlowStep(Node nodeFrom, Node nodeTo) { + exists(Ssa::Definition def | LocalFlow::localSsaFlowStep(def, nodeFrom, nodeTo)) + or + nodeFrom.asExpr() = nodeTo.asExpr().(CfgNodes::ExprNodes::AssignmentCfgNode).getRhs() + } + + cached + newtype TContent = TTodoContent() // stub + + /** Holds if `n` should be hidden from path explanations. */ + cached + predicate nodeIsHidden(Node n) { + exists(Ssa::Definition def | def = n.(SsaDefinitionNode).getDefinition() | + def instanceof Ssa::PhiNode + ) + } +} + +import Cached + +/** An SSA definition, viewed as a node in a data flow graph. */ +class SsaDefinitionNode extends NodeImpl, TSsaDefinitionNode { + Ssa::Definition def; + + SsaDefinitionNode() { this = TSsaDefinitionNode(def) } + + /** Gets the underlying SSA definition. */ + Ssa::Definition getDefinition() { result = def } + + override CfgScope getCfgScope() { result = def.getBasicBlock().getScope() } + + override Location getLocationImpl() { result = def.getLocation() } + + override string toStringImpl() { result = def.toString() } +} + +private module ParameterNodes { + abstract private class ParameterNodeImpl extends ParameterNode, NodeImpl { } + + /** + * The value of an explicit parameter at function entry, viewed as a node in a data + * flow graph. + */ + class ExplicitParameterNode extends ParameterNodeImpl, TParameterNode { + private Parameter parameter; + + ExplicitParameterNode() { this = TParameterNode(parameter) } + + override Parameter getParameter() { result = parameter } + + override predicate isParameterOf(Callable c, int i) { c.getParameter(i) = parameter } + + override CfgScope getCfgScope() { result = parameter.getCallable() } + + override Location getLocationImpl() { result = parameter.getLocation() } + + override string toStringImpl() { result = parameter.toString() } + } +} + +import ParameterNodes + +/** A data-flow node that represents a call argument. */ +abstract class ArgumentNode extends Node { + /** Holds if this argument occurs at the given position in the given call. */ + cached + abstract predicate argumentOf(DataFlowCall call, int pos); + + /** Gets the call in which this node is an argument. */ + final DataFlowCall getCall() { this.argumentOf(result, _) } +} + +private module ArgumentNodes { + /** A data-flow node that represents an explicit call argument. */ + class ExplicitArgumentNode extends ArgumentNode { + ExplicitArgumentNode() { this.asExpr().getExpr() instanceof Argument } + + override predicate argumentOf(DataFlowCall call, int pos) { + this.asExpr() = call.getReceiver() and + pos = -1 + or + this.asExpr() = call.getArgument(pos) + } + } +} + +import ArgumentNodes + +/** A data-flow node that represents a value returned by a callable. */ +abstract class ReturnNode extends Node { + /** Gets the kind of this return node. */ + abstract ReturnKind getKind(); +} + +private module ReturnNodes { + /** + * A data-flow node that represents an expression returned by a callable, + * either using a (`yield`) `return` statement or an expression body (`=>`). + */ + class ExprReturnNode extends ReturnNode, ExprNode { + ExprReturnNode() { + none() // TODO + } + + override ReturnKind getKind() { result instanceof NormalReturnKind } + } +} + +import ReturnNodes + +/** A data-flow node that represents the output of a call. */ +abstract class OutNode extends Node { + /** Gets the underlying call, where this node is a corresponding output of kind `kind`. */ + cached + abstract DataFlowCall getCall(ReturnKind kind); +} + +private module OutNodes { + /** + * A data-flow node that reads a value returned directly by a callable, + * either via a C# call or a CIL call. + */ + class ExprOutNode extends OutNode, ExprNode { + private DataFlowCall call; + + ExprOutNode() { call = this.getExprNode() } + + override DataFlowCall getCall(ReturnKind kind) { + result = call and + kind instanceof NormalReturnKind + } + } +} + +import OutNodes + +predicate jumpStep(Node pred, Node succ) { none() } + +predicate storeStep(Node node1, Content c, Node node2) { none() } + +predicate readStep(Node node1, Content c, Node node2) { none() } + +/** + * Holds if values stored inside content `c` are cleared at node `n`. For example, + * any value stored inside `f` is cleared at the pre-update node associated with `x` + * in `x.f = newValue`. + */ +predicate clearsContent(Node n, Content c) { storeStep(_, c, n) } + +private newtype TDataFlowType = TTodoDataFlowType() + +class DataFlowType extends TDataFlowType { + string toString() { result = "" } +} + +/** Gets the type of `n` used for type pruning. */ +DataFlowType getNodeType(NodeImpl n) { any() } + +/** Gets a string representation of a `DataFlowType`. */ +string ppReprType(DataFlowType t) { result = t.toString() } + +/** + * Holds if `t1` and `t2` are compatible, that is, whether data can flow from + * a node of type `t1` to a node of type `t2`. + */ +pragma[inline] +predicate compatibleTypes(DataFlowType t1, DataFlowType t2) { any() } + +/** + * A node associated with an object after an operation that might have + * changed its state. + * + * This can be either the argument to a callable after the callable returns + * (which might have mutated the argument), or the qualifier of a field after + * an update to the field. + * + * Nodes corresponding to AST elements, for example `ExprNode`, usually refer + * to the value before the update with the exception of `ObjectCreation`, + * which represents the value after the constructor has run. + */ +abstract class PostUpdateNode extends Node { + /** Gets the node before the state update. */ + abstract Node getPreUpdateNode(); +} + +private module PostUpdateNodes { + class ExprPostUpdateNode extends PostUpdateNode, NodeImpl, TExprPostUpdateNode { + private CfgNodes::ExprCfgNode e; + + ExprPostUpdateNode() { this = TExprPostUpdateNode(e) } + + override ExprNode getPreUpdateNode() { e = result.getExprNode() } + + override CfgScope getCfgScope() { result = e.getExpr().getCfgScope() } + + override Location getLocationImpl() { result = e.getLocation() } + + override string toStringImpl() { result = "[post] " + e.toString() } + } +} + +private import PostUpdateNodes + +/** A node that performs a type cast. */ +class CastNode extends Node { + CastNode() { none() } +} + +class DataFlowExpr = CfgNodes::ExprCfgNode; + +int accessPathLimit() { result = 5 } + +/** The unit type. */ +private newtype TUnit = TMkUnit() + +/** The trivial type with a single element. */ +class Unit extends TUnit { + /** Gets a textual representation of this element. */ + string toString() { result = "unit" } +} + +/** + * Holds if `n` does not require a `PostUpdateNode` as it either cannot be + * modified or its modification cannot be observed, for example if it is a + * freshly created object that is not saved in a variable. + * + * This predicate is only used for consistency checks. + */ +predicate isImmutableOrUnobservable(Node n) { none() } + +/** + * Holds if the node `n` is unreachable when the call context is `call`. + */ +predicate isUnreachableInCall(Node n, DataFlowCall call) { none() } + +class BarrierGuard extends AstNode { + BarrierGuard() { none() } + + Node getAGuardedNode() { none() } +} diff --git a/ql/src/codeql_ruby/dataflow/internal/DataFlowPublic.qll b/ql/src/codeql_ruby/dataflow/internal/DataFlowPublic.qll new file mode 100644 index 00000000000..7a8d40a5e1a --- /dev/null +++ b/ql/src/codeql_ruby/dataflow/internal/DataFlowPublic.qll @@ -0,0 +1,110 @@ +private import ruby +private import DataFlowDispatch +private import DataFlowPrivate +private import codeql_ruby.CFG + +/** + * An element, viewed as a node in a data flow graph. Either an expression + * (`ExprNode`) or a parameter (`ParameterNode`). + */ +class Node extends TNode { + /** Gets the expression corresponding to this node, if any. */ + CfgNodes::ExprCfgNode asExpr() { result = this.(ExprNode).getExprNode() } + + /** Gets the parameter corresponding to this node, if any. */ + Parameter asParameter() { result = this.(ParameterNode).getParameter() } + + /** Gets a textual representation of this node. */ + // TODO: cache + final string toString() { result = this.(NodeImpl).toStringImpl() } + + /** Gets the location of this node. */ + // TODO: cache + final Location getLocation() { result = this.(NodeImpl).getLocationImpl() } + + final DataFlowCallable getEnclosingCallable() { result = this.(NodeImpl).getCfgScope() } + + /** + * Holds if this element is at the specified location. + * The location spans column `startcolumn` of line `startline` to + * column `endcolumn` of line `endline` in file `filepath`. + * For more information, see + * [Locations](https://help.semmle.com/QL/learn-ql/ql/locations.html). + */ + predicate hasLocationInfo( + string filepath, int startline, int startcolumn, int endline, int endcolumn + ) { + getLocation().hasLocationInfo(filepath, startline, startcolumn, endline, endcolumn) + } +} + +/** + * An expression, viewed as a node in a data flow graph. + * + * Note that because of control-flow splitting, one `Expr` may correspond + * to multiple `ExprNode`s, just like it may correspond to multiple + * `ControlFlow::Node`s. + */ +class ExprNode extends Node, TExprNode { + private CfgNodes::ExprCfgNode n; + + ExprNode() { this = TExprNode(n) } + + /** Gets the expression corresponding to this node. */ + CfgNodes::ExprCfgNode getExprNode() { result = n } +} + +/** + * The value of a parameter at function entry, viewed as a node in a data + * flow graph. + */ +class ParameterNode extends Node, TParameterNode { + private Parameter p; + + ParameterNode() { this = TParameterNode(p) } + + /** Gets the parameter corresponding to this node, if any. */ + Parameter getParameter() { result = p } + + /** + * Holds if this node is the parameter of callable `c` at the specified + * (zero-based) position. + */ + predicate isParameterOf(Callable c, int i) { p = c.getParameter(i) } +} + +/** Gets a node corresponding to expression `e`. */ +ExprNode exprNode(CfgNodes::ExprCfgNode e) { result.getExprNode() = e } + +/** + * Gets the node corresponding to the value of parameter `p` at function entry. + */ +ParameterNode parameterNode(Parameter p) { result.getParameter() = p } + +predicate localFlowStep = simpleLocalFlowStep/2; + +/** + * Holds if data flows from `source` to `sink` in zero or more local + * (intra-procedural) steps. + */ +predicate localFlow(Node source, Node sink) { localFlowStep*(source, sink) } + +/** + * Holds if data can flow from `e1` to `e2` in zero or more + * local (intra-procedural) steps. + */ +predicate localExprFlow(CfgNodes::ExprCfgNode e1, CfgNodes::ExprCfgNode e2) { + localFlow(exprNode(e1), exprNode(e2)) +} + +/** + * A reference contained in an object. This is either a field, a property, + * or an element in a collection. + */ +class Content extends TContent { + /** Gets a textual representation of this content. */ + string toString() { none() } + + /** Gets the location of this content. */ + Location getLocation() { none() } +}