Add support for flow summaries

This commit is contained in:
Tom Hvitved
2021-08-25 15:49:03 +02:00
parent c183e05c49
commit 08dc6d79ef
16 changed files with 1520 additions and 86 deletions

View File

@@ -53,10 +53,8 @@ class Call extends Expr, TCall {
/** Gets a potential target of this call, if any. */
final Callable getATarget() {
exists(DataFlowCall c | this = c.getExpr() |
result = viableCallable(c)
or
result = viableCallableLambda(c, _)
exists(DataFlowCall c | this = c.asCall().getExpr() |
TCfgScope(result) = [viableCallable(c), viableCallableLambda(c, _)]
)
}

View File

@@ -0,0 +1,123 @@
/** Provides classes and predicates for defining flow summaries. */
import ruby
import codeql.ruby.DataFlow
private import internal.FlowSummaryImpl as Impl
private import internal.DataFlowDispatch
// import all instances below
private module Summaries { }
class SummaryComponent = Impl::Public::SummaryComponent;
/** Provides predicates for constructing summary components. */
module SummaryComponent {
private import Impl::Public::SummaryComponent as SC
predicate parameter = SC::parameter/1;
predicate argument = SC::argument/1;
/** Gets a summary component that represents a qualifier. */
SummaryComponent qualifier() { result = argument(-1) }
/** Gets a summary component that represents a block argument. */
SummaryComponent block() { result = argument(-2) }
/** Gets a summary component that represents the return value of a call. */
SummaryComponent return() { result = SC::return(any(NormalReturnKind rk)) }
}
class SummaryComponentStack = Impl::Public::SummaryComponentStack;
/** Provides predicates for constructing stacks of summary components. */
module SummaryComponentStack {
private import Impl::Public::SummaryComponentStack as SCS
predicate singleton = SCS::singleton/1;
predicate push = SCS::push/2;
predicate argument = SCS::argument/1;
/** Gets a singleton stack representing a qualifier. */
SummaryComponentStack qualifier() { result = singleton(SummaryComponent::qualifier()) }
/** Gets a singleton stack representing a block argument. */
SummaryComponentStack block() { result = singleton(SummaryComponent::block()) }
/** Gets a singleton stack representing the return value of a call. */
SummaryComponentStack return() { result = singleton(SummaryComponent::return()) }
}
/** A callable with a flow summary, identified by a unique string. */
abstract class SummarizedCallable extends LibraryCallable {
bindingset[this]
SummarizedCallable() { any() }
/**
* Holds if data may flow from `input` to `output` through this callable.
*
* `preservesValue` indicates whether this is a value-preserving step
* or a taint-step.
*
* Input specifications are restricted to stacks that end with
* `SummaryComponent::argument(_)`, preceded by zero or more
* `SummaryComponent::return()` or `SummaryComponent::content(_)` components.
*
* Output specifications are restricted to stacks that end with
* `SummaryComponent::return()` or `SummaryComponent::argument(_)`.
*
* Output stacks ending with `SummaryComponent::return()` can be preceded by zero
* or more `SummaryComponent::content(_)` components.
*
* Output stacks ending with `SummaryComponent::argument(_)` can be preceded by an
* optional `SummaryComponent::parameter(_)` component, which in turn can be preceded
* by zero or more `SummaryComponent::content(_)` components.
*/
pragma[nomagic]
predicate propagatesFlow(
SummaryComponentStack input, SummaryComponentStack output, boolean preservesValue
) {
none()
}
/**
* Same as
*
* ```rb
* propagatesFlow(
* SummaryComponentStack input, SummaryComponentStack output, boolean preservesValue
* )
* ```
*
* but uses an external (string) representation of the input and output stacks.
*/
pragma[nomagic]
predicate propagatesFlowExt(string input, string output, string kind) { none() }
/**
* Holds if values stored inside `content` are cleared on objects passed as
* the `i`th argument to this callable.
*/
pragma[nomagic]
predicate clearsContent(int i, DataFlow::Content content) { none() }
}
private class SummarizedCallableAdaptor extends Impl::Public::SummarizedCallable {
private SummarizedCallable sc;
SummarizedCallableAdaptor() { this = TLibraryCallable(sc) }
final override predicate propagatesFlow(
SummaryComponentStack input, SummaryComponentStack output, boolean preservesValue
) {
sc.propagatesFlow(input, output, preservesValue)
}
final override predicate clearsContent(int i, DataFlow::Content content) {
sc.clearsContent(i, content)
}
}
class RequiredSummaryComponentStack = Impl::Public::RequiredSummaryComponentStack;

View File

@@ -2,8 +2,9 @@ private import ruby
private import codeql.ruby.CFG
private import DataFlowPrivate
private import codeql.ruby.typetracking.TypeTracker
private import codeql.ruby.dataflow.internal.DataFlowPublic as DataFlow
private import codeql.ruby.ast.internal.Module
private import FlowSummaryImpl as FlowSummaryImpl
private import codeql.ruby.dataflow.FlowSummary
newtype TReturnKind =
TNormalReturnKind() or
@@ -39,56 +40,154 @@ class BreakReturnKind extends ReturnKind, TBreakReturnKind {
override string toString() { result = "break" }
}
class DataFlowCallable = CfgScope;
/** A callable defined in library code, identified by a unique string. */
abstract class LibraryCallable extends string {
bindingset[this]
LibraryCallable() { any() }
class DataFlowCall extends CfgNodes::ExprNodes::CallCfgNode {
DataFlowCallable getEnclosingCallable() { result = this.getScope() }
/** Gets a call to this library callable. */
abstract Call getACall();
}
pragma[nomagic]
private predicate methodCall(DataFlow::LocalSourceNode sourceNode, string method) {
exists(DataFlow::Node nodeTo |
method = this.getExpr().(MethodCall).getMethodName() and
nodeTo.asExpr() = this.getReceiver() and
sourceNode.flowsTo(nodeTo)
)
}
/**
* A callable. This includes callables from source code, as well as callables
* defined in library code.
*/
class DataFlowCallable extends TDataFlowCallable {
/** Gets the underlying source code callable, if any. */
Callable asCallable() { this = TCfgScope(result) }
private Block yieldCall() {
this.getExpr() instanceof YieldCall and
exists(BlockParameterNode node |
node = trackBlock(result) and
node.getMethod() = this.getExpr().getEnclosingMethod()
)
}
/** Get the underlying library callable, if any. */
LibraryCallable asLibraryCallable() { this = TLibraryCallable(result) }
pragma[nomagic]
private predicate superCall(Module superClass, string method) {
this.getExpr() instanceof SuperCall and
exists(Module tp |
tp = this.getExpr().getEnclosingModule().getModule() and
superClass = tp.getSuperClass() and
method = this.getExpr().getEnclosingMethod().getName()
)
}
/** Gets a textual representation of this callable. */
string toString() { result = [this.asCallable().toString(), this.asLibraryCallable()] }
pragma[nomagic]
private predicate instanceMethodCall(Module tp, string method) {
exists(DataFlow::LocalSourceNode sourceNode |
this.methodCall(sourceNode, method) and
sourceNode = trackInstance(tp)
)
}
/** Gets the location of this callable. */
Location getLocation() { result = this.asCallable().getLocation() }
}
/**
* A call. This includes calls from source code, as well as call(back)s
* inside library callables with a flow summary.
*/
class DataFlowCall extends TDataFlowCall {
/** Gets the enclosing callable. */
DataFlowCallable getEnclosingCallable() { none() }
/** Gets the underlying source code call, if any. */
CfgNodes::ExprNodes::CallCfgNode asCall() { none() }
/** Gets a textual representation of this call. */
string toString() { none() }
/** Gets the location of this call. */
Location getLocation() { none() }
}
/**
* A synthesized call inside a callable with a flow summary.
*
* For example, in
* ```rb
* ints.each do |i|
* puts i
* end
* ```
*
* there is a call to the block argument inside `each`.
*/
class SummaryCall extends DataFlowCall, TSummaryCall {
private FlowSummaryImpl::Public::SummarizedCallable c;
private DataFlow::Node receiver;
SummaryCall() { this = TSummaryCall(c, receiver) }
/** Gets the data flow node that this call targets. */
DataFlow::Node getReceiver() { result = receiver }
override DataFlowCallable getEnclosingCallable() { result = c }
override string toString() { result = "[summary] call to " + receiver + " in " + c }
override Location getLocation() { result = c.getLocation() }
}
private class NormalCall extends DataFlowCall, TNormalCall {
private CfgNodes::ExprNodes::CallCfgNode c;
NormalCall() { this = TNormalCall(c) }
override CfgNodes::ExprNodes::CallCfgNode asCall() { result = c }
override DataFlowCallable getEnclosingCallable() { result = TCfgScope(c.getScope()) }
override string toString() { result = c.toString() }
override Location getLocation() { result = c.getLocation() }
}
pragma[nomagic]
private predicate methodCall(
CfgNodes::ExprNodes::CallCfgNode call, DataFlow::LocalSourceNode sourceNode, string method
) {
exists(DataFlow::Node nodeTo |
method = call.getExpr().(MethodCall).getMethodName() and
nodeTo.asExpr() = call.getReceiver() and
sourceNode.flowsTo(nodeTo)
)
}
private Block yieldCall(CfgNodes::ExprNodes::CallCfgNode call) {
call.getExpr() instanceof YieldCall and
exists(BlockParameterNode node |
node = trackBlock(result) and
node.getMethod() = call.getExpr().getEnclosingMethod()
)
}
pragma[nomagic]
private predicate superCall(CfgNodes::ExprNodes::CallCfgNode call, Module superClass, string method) {
call.getExpr() instanceof SuperCall and
exists(Module tp |
tp = call.getExpr().getEnclosingModule().getModule() and
superClass = tp.getSuperClass() and
method = call.getExpr().getEnclosingMethod().getName()
)
}
pragma[nomagic]
private predicate instanceMethodCall(CfgNodes::ExprNodes::CallCfgNode call, Module tp, string method) {
exists(DataFlow::LocalSourceNode sourceNode |
methodCall(call, sourceNode, method) and
sourceNode = trackInstance(tp)
)
}
cached
private module Cached {
cached
newtype TDataFlowCallable =
TCfgScope(CfgScope scope) or
TLibraryCallable(LibraryCallable callable)
cached
DataFlowCallable getTarget() {
newtype TDataFlowCall =
TNormalCall(CfgNodes::ExprNodes::CallCfgNode c) or
TSummaryCall(FlowSummaryImpl::Public::SummarizedCallable c, DataFlow::Node receiver) {
FlowSummaryImpl::Private::summaryCallbackRange(c, receiver)
}
cached
CfgScope getTarget(CfgNodes::ExprNodes::CallCfgNode call) {
exists(string method |
exists(Module tp |
this.instanceMethodCall(tp, method) and
instanceMethodCall(call, tp, method) and
result = lookupMethod(tp, method) and
if result.(Method).isPrivate()
then
exists(Self self |
self = this.getReceiver().getExpr() and
self = call.getReceiver().getExpr() and
pragma[only_bind_out](self.getEnclosingModule().getModule().getSuperClass*()) =
pragma[only_bind_out](result.getEnclosingModule().getModule())
) and
@@ -96,26 +195,28 @@ class DataFlowCall extends CfgNodes::ExprNodes::CallCfgNode {
// This may remove some plausible targets, but also removes a lot of
// implausible targets
if result.getEnclosingModule() instanceof Toplevel
then result.getFile() = this.getFile()
then result.getFile() = call.getFile()
else any()
else any()
)
or
exists(DataFlow::LocalSourceNode sourceNode |
this.methodCall(sourceNode, method) and
methodCall(call, sourceNode, method) and
sourceNode = trackSingletonMethod(result, method)
)
)
or
exists(Module superClass, string method |
this.superCall(superClass, method) and
superCall(call, superClass, method) and
result = lookupMethod(superClass, method)
)
or
result = this.yieldCall()
result = yieldCall(call)
}
}
import Cached
private DataFlow::LocalSourceNode trackInstance(Module tp, TypeTracker t) {
t.start() and
(
@@ -297,8 +398,13 @@ private DataFlow::LocalSourceNode trackModule(Module tp) {
/** Gets a viable run-time target for the call `call`. */
DataFlowCallable viableCallable(DataFlowCall call) {
result = call.getTarget() and
not call.getExpr() instanceof YieldCall // handled by `lambdaCreation`/`lambdaCall`
result = TCfgScope(getTarget(call.asCall())) and
not call.asCall().getExpr() instanceof YieldCall // handled by `lambdaCreation`/`lambdaCall`
or
exists(LibraryCallable callable |
result = TLibraryCallable(callable) and
call.asCall().getExpr() = callable.getACall()
)
}
/**
@@ -307,7 +413,7 @@ DataFlowCallable viableCallable(DataFlowCall call) {
* qualifier accesses a parameter of the enclosing callable `c` (including
* the implicit `self` parameter).
*/
predicate mayBenefitFromCallContext(DataFlowCall call, Callable c) { none() }
predicate mayBenefitFromCallContext(DataFlowCall call, DataFlowCallable c) { none() }
/**
* Gets a viable dispatch target of `call` in the context `ctx`. This is

View File

@@ -4,6 +4,7 @@ private import codeql.ruby.dataflow.SSA
private import DataFlowPublic
private import DataFlowDispatch
private import SsaImpl as SsaImpl
private import FlowSummaryImpl as FlowSummaryImpl
abstract class NodeImpl extends Node {
/** Do not call: use `getEnclosingCallable()` instead. */
@@ -124,9 +125,19 @@ private module Cached {
or
n = any(CfgNodes::ExprNodes::CallCfgNode call).getReceiver()
)
} or
TSummaryNode(
FlowSummaryImpl::Public::SummarizedCallable c,
FlowSummaryImpl::Private::SummaryNodeState state
) {
FlowSummaryImpl::Private::summaryNodeRange(c, state)
} or
TSummaryParameterNode(FlowSummaryImpl::Public::SummarizedCallable c, int i) {
FlowSummaryImpl::Private::summaryParameterNodeRange(c, i)
}
class TParameterNode = TNormalParameterNode or TBlockParameterNode or TSelfParameterNode;
class TParameterNode =
TNormalParameterNode or TBlockParameterNode or TSelfParameterNode or TSummaryParameterNode;
/**
* This is the local flow predicate that is used as a building block in global
@@ -175,8 +186,13 @@ private module Cached {
) and
nodeFrom.asExpr() = for.getValue()
)
or
FlowSummaryImpl::Private::Steps::summaryLocalStep(nodeFrom, nodeTo, true)
}
cached
predicate isLocalSourceNode(Node n) { not simpleLocalFlowStep+(any(ExprNode e), n) }
cached
newtype TContent = TTodoContent() // stub
}
@@ -188,6 +204,10 @@ predicate nodeIsHidden(Node n) {
exists(Ssa::Definition def | def = n.(SsaDefinitionNode).getDefinition() |
def instanceof Ssa::PhiNode
)
or
n instanceof SummaryNode
or
n instanceof SummaryParameterNode
}
/** An SSA definition, viewed as a node in a data flow graph. */
@@ -229,7 +249,13 @@ class ReturningStatementNode extends NodeImpl, TReturningNode {
}
private module ParameterNodes {
abstract private class ParameterNodeImpl extends ParameterNode, NodeImpl { }
abstract class ParameterNodeImpl extends ParameterNode, NodeImpl {
abstract predicate isSourceParameterOf(Callable c, int i);
override predicate isParameterOf(DataFlowCallable c, int i) {
this.isSourceParameterOf(c.asCallable(), i)
}
}
/**
* The value of a normal parameter at function entry, viewed as a node in a data
@@ -242,7 +268,7 @@ private module ParameterNodes {
override Parameter getParameter() { result = parameter }
override predicate isParameterOf(Callable c, int i) { c.getParameter(i) = parameter }
override predicate isSourceParameterOf(Callable c, int i) { c.getParameter(i) = parameter }
override CfgScope getCfgScope() { result = parameter.getCallable() }
@@ -262,7 +288,7 @@ private module ParameterNodes {
final MethodBase getMethod() { result = method }
override predicate isParameterOf(Callable c, int i) { method = c and i = -1 }
override predicate isSourceParameterOf(Callable c, int i) { method = c and i = -1 }
override CfgScope getCfgScope() { result = method }
@@ -286,7 +312,7 @@ private module ParameterNodes {
result = method.getAParameter() and result instanceof BlockParameter
}
override predicate isParameterOf(Callable c, int i) { c = method and i = -2 }
override predicate isSourceParameterOf(Callable c, int i) { c = method and i = -2 }
override CfgScope getCfgScope() { result = method }
@@ -302,14 +328,52 @@ private module ParameterNodes {
not exists(getParameter()) and result = "&block"
}
}
/** A parameter for a library callable with a flow summary. */
class SummaryParameterNode extends ParameterNodeImpl, TSummaryParameterNode {
private FlowSummaryImpl::Public::SummarizedCallable sc;
private int pos;
SummaryParameterNode() { this = TSummaryParameterNode(sc, pos) }
override predicate isSourceParameterOf(Callable c, int i) { none() }
override predicate isParameterOf(DataFlowCallable c, int i) { sc = c and i = pos }
override CfgScope getCfgScope() { none() }
override DataFlowCallable getEnclosingCallable() { result = sc }
override Location getLocationImpl() { none() }
override string toStringImpl() { result = "parameter " + pos + " of " + sc }
}
}
import ParameterNodes
/** A data-flow node used to model flow summaries. */
private class SummaryNode extends NodeImpl, TSummaryNode {
private FlowSummaryImpl::Public::SummarizedCallable c;
private FlowSummaryImpl::Private::SummaryNodeState state;
SummaryNode() { this = TSummaryNode(c, state) }
override CfgScope getCfgScope() { none() }
override DataFlowCallable getEnclosingCallable() { result = c }
override Location getLocationImpl() { none() }
override string toStringImpl() { result = "[summary] " + state + " in " + c }
}
/** A data-flow node that represents a call argument. */
abstract class ArgumentNode extends Node {
/** Holds if this argument occurs at the given position in the given call. */
abstract predicate argumentOf(DataFlowCall call, int pos);
predicate argumentOf(DataFlowCall call, int pos) { this.sourceArgumentOf(call.asCall(), pos) }
abstract predicate sourceArgumentOf(CfgNodes::ExprNodes::CallCfgNode call, int pos);
/** Gets the call in which this node is an argument. */
final DataFlowCall getCall() { this.argumentOf(result, _) }
@@ -323,7 +387,7 @@ private module ArgumentNodes {
not this.asExpr().getExpr() instanceof BlockArgument
}
override predicate argumentOf(DataFlowCall call, int pos) {
override predicate sourceArgumentOf(CfgNodes::ExprNodes::CallCfgNode call, int pos) {
this.asExpr() = call.getArgument(pos)
}
}
@@ -332,7 +396,7 @@ private module ArgumentNodes {
class SelfArgumentNode extends ArgumentNode {
SelfArgumentNode() { this.asExpr() = any(CfgNodes::ExprNodes::CallCfgNode call).getReceiver() }
override predicate argumentOf(DataFlowCall call, int pos) {
override predicate sourceArgumentOf(CfgNodes::ExprNodes::CallCfgNode call, int pos) {
this.asExpr() = call.getReceiver() and
pos = -1
}
@@ -345,7 +409,7 @@ private module ArgumentNodes {
exists(CfgNodes::ExprNodes::CallCfgNode c | c.getBlock() = this.asExpr())
}
override predicate argumentOf(DataFlowCall call, int pos) {
override predicate sourceArgumentOf(CfgNodes::ExprNodes::CallCfgNode call, int pos) {
pos = -2 and
(
this.asExpr() = call.getBlock()
@@ -358,6 +422,16 @@ private module ArgumentNodes {
)
}
}
private class SummaryArgumentNode extends SummaryNode, ArgumentNode {
SummaryArgumentNode() { FlowSummaryImpl::Private::summaryArgumentNode(_, this, _) }
override predicate sourceArgumentOf(CfgNodes::ExprNodes::CallCfgNode call, int pos) { none() }
override predicate argumentOf(DataFlowCall call, int pos) {
FlowSummaryImpl::Private::summaryArgumentNode(call, this, pos)
}
}
}
import ArgumentNodes
@@ -408,11 +482,19 @@ private module ReturnNodes {
class ExprReturnNode extends ReturnNode, ExprNode {
ExprReturnNode() {
this.getExprNode().getASuccessor().(CfgNodes::AnnotatedExitNode).isNormal() and
this.getEnclosingCallable() instanceof Callable
this.(NodeImpl).getCfgScope() instanceof Callable
}
override ReturnKind getKind() { result instanceof NormalReturnKind }
}
private class SummaryReturnNode extends SummaryNode, ReturnNode {
private ReturnKind rk;
SummaryReturnNode() { FlowSummaryImpl::Private::summaryReturnNode(this, rk) }
override ReturnKind getKind() { result = rk }
}
}
import ReturnNodes
@@ -431,13 +513,21 @@ private module OutNodes {
class ExprOutNode extends OutNode, ExprNode {
private DataFlowCall call;
ExprOutNode() { call = this.getExprNode() }
ExprOutNode() { call.asCall() = this.getExprNode() }
override DataFlowCall getCall(ReturnKind kind) {
result = call and
kind instanceof NormalReturnKind
}
}
private class SummaryOutNode extends SummaryNode, OutNode {
SummaryOutNode() { FlowSummaryImpl::Private::summaryOutNode(_, this, _) }
override DataFlowCall getCall(ReturnKind kind) {
FlowSummaryImpl::Private::summaryOutNode(result, this, kind)
}
}
}
import OutNodes
@@ -459,16 +549,24 @@ predicate jumpStep(Node pred, Node succ) {
succ.asExpr().getExpr().(ConstantReadAccess).getValue() = pred.asExpr().getExpr()
}
predicate storeStep(Node node1, Content c, Node node2) { none() }
predicate storeStep(Node node1, Content c, Node node2) {
FlowSummaryImpl::Private::Steps::summaryStoreStep(node1, c, node2)
}
predicate readStep(Node node1, Content c, Node node2) { none() }
predicate readStep(Node node1, Content c, Node node2) {
FlowSummaryImpl::Private::Steps::summaryReadStep(node1, c, node2)
}
/**
* Holds if values stored inside content `c` are cleared at node `n`. For example,
* any value stored inside `f` is cleared at the pre-update node associated with `x`
* in `x.f = newValue`.
*/
predicate clearsContent(Node n, Content c) { storeStep(_, c, n) }
predicate clearsContent(Node n, Content c) {
storeStep(_, c, n)
or
FlowSummaryImpl::Private::Steps::summaryClearsContent(n, c)
}
private newtype TDataFlowType = TTodoDataFlowType()
@@ -519,6 +617,14 @@ private module PostUpdateNodes {
override string toStringImpl() { result = "[post] " + e.toString() }
}
private class SummaryPostUpdateNode extends SummaryNode, PostUpdateNode {
private Node pre;
SummaryPostUpdateNode() { FlowSummaryImpl::Private::summaryPostUpdateNode(this, pre) }
override Node getPreUpdateNode() { result = pre }
}
}
private import PostUpdateNodes
@@ -562,15 +668,15 @@ newtype LambdaCallKind =
/** Holds if `creation` is an expression that creates a lambda of kind `kind` for `c`. */
predicate lambdaCreation(Node creation, LambdaCallKind kind, DataFlowCallable c) {
kind = TYieldCallKind() and
creation.asExpr().getExpr() = c.(Block)
creation.asExpr().getExpr() = c.asCallable().(Block)
or
kind = TLambdaCallKind() and
(
creation.asExpr().getExpr() = c.(Lambda)
creation.asExpr().getExpr() = c.asCallable().(Lambda)
or
creation.asExpr() =
any(CfgNodes::ExprNodes::MethodCallCfgNode mc |
c = mc.getBlock().getExpr() and
c.asCallable() = mc.getBlock().getExpr() and
mc.getExpr().getMethodName() = "lambda"
)
)
@@ -579,14 +685,20 @@ predicate lambdaCreation(Node creation, LambdaCallKind kind, DataFlowCallable c)
/** Holds if `call` is a lambda call of kind `kind` where `receiver` is the lambda expression. */
predicate lambdaCall(DataFlowCall call, LambdaCallKind kind, Node receiver) {
kind = TYieldCallKind() and
receiver.(BlockParameterNode).getMethod() = call.getExpr().(YieldCall).getEnclosingMethod()
receiver.(BlockParameterNode).getMethod() =
call.asCall().getExpr().(YieldCall).getEnclosingMethod()
or
kind = TLambdaCallKind() and
call =
call.asCall() =
any(CfgNodes::ExprNodes::MethodCallCfgNode mc |
receiver.asExpr() = mc.getReceiver() and
mc.getExpr().getMethodName() = "call"
)
or
receiver = call.(SummaryCall).getReceiver() and
if receiver.(ParameterNode).isParameterOf(_, -2)
then kind = TYieldCallKind()
else kind = TLambdaCallKind()
}
/** Extra data-flow steps needed for lambda flow analysis. */

View File

@@ -4,6 +4,7 @@ private import DataFlowPrivate
private import codeql.ruby.CFG
private import codeql.ruby.typetracking.TypeTracker
private import codeql.ruby.dataflow.SSA
private import FlowSummaryImpl as FlowSummaryImpl
/**
* An element, viewed as a node in a data flow graph. Either an expression
@@ -24,7 +25,7 @@ class Node extends TNode {
// TODO: cache
final Location getLocation() { result = this.(NodeImpl).getLocationImpl() }
final DataFlowCallable getEnclosingCallable() { result = this.(NodeImpl).getCfgScope() }
DataFlowCallable getEnclosingCallable() { result = TCfgScope(this.(NodeImpl).getCfgScope()) }
/**
* Holds if this element is at the specified location.
@@ -89,14 +90,14 @@ class ParameterNode extends Node, TParameterNode {
* Holds if this node is the parameter of callable `c` at the specified
* (zero-based) position.
*/
predicate isParameterOf(Callable c, int i) { none() }
predicate isParameterOf(DataFlowCallable c, int i) { none() }
}
/**
* A data-flow node that is a source of local flow.
*/
class LocalSourceNode extends Node {
LocalSourceNode() { not simpleLocalFlowStep+(any(ExprNode n), this) }
LocalSourceNode() { isLocalSourceNode(this) }
/** Holds if this `LocalSourceNode` can flow to `nodeTo` in one or more local flow steps. */
pragma[inline]
@@ -131,7 +132,17 @@ ExprNode exprNode(CfgNodes::ExprCfgNode e) { result.getExprNode() = e }
*/
ParameterNode parameterNode(Parameter p) { result.getParameter() = p }
predicate localFlowStep = simpleLocalFlowStep/2;
/**
* Holds if data flows from `nodeFrom` to `nodeTo` in exactly one local
* (intra-procedural) step.
*/
predicate localFlowStep(Node nodeFrom, Node nodeTo) {
simpleLocalFlowStep(nodeFrom, nodeTo)
or
// Simple flow through library code is included in the exposed local
// step relation, even though flow is technically inter-procedural
FlowSummaryImpl::Private::Steps::summaryThroughStep(nodeFrom, nodeTo, true)
}
/**
* Holds if data flows from `source` to `sink` in zero or more local

View File

@@ -0,0 +1,827 @@
/**
* Provides classes and predicates for defining flow summaries.
*
* The definitions in this file are language-independent, and language-specific
* definitions are passed in via the `DataFlowImplSpecific` and
* `FlowSummaryImplSpecific` modules.
*/
private import FlowSummaryImplSpecific
private import DataFlowImplSpecific::Private
private import DataFlowImplSpecific::Public
private import DataFlowImplCommon
/** Provides classes and predicates for defining flow summaries. */
module Public {
private import Private
/**
* A component used in a flow summary.
*
* Either a parameter or an argument at a given position, a specific
* content type, or a return kind.
*/
class SummaryComponent extends TSummaryComponent {
/** Gets a textual representation of this summary component. */
string toString() {
exists(Content c | this = TContentSummaryComponent(c) and result = c.toString())
or
exists(int i | this = TParameterSummaryComponent(i) and result = "parameter " + i)
or
exists(int i | this = TArgumentSummaryComponent(i) and result = "argument " + i)
or
exists(ReturnKind rk | this = TReturnSummaryComponent(rk) and result = "return (" + rk + ")")
}
}
/** Provides predicates for constructing summary components. */
module SummaryComponent {
/** Gets a summary component for content `c`. */
SummaryComponent content(Content c) { result = TContentSummaryComponent(c) }
/** Gets a summary component for parameter `i`. */
SummaryComponent parameter(int i) { result = TParameterSummaryComponent(i) }
/** Gets a summary component for argument `i`. */
SummaryComponent argument(int i) { result = TArgumentSummaryComponent(i) }
/** Gets a summary component for a return of kind `rk`. */
SummaryComponent return(ReturnKind rk) { result = TReturnSummaryComponent(rk) }
}
/**
* A (non-empty) stack of summary components.
*
* A stack is used to represent where data is read from (input) or where it
* is written to (output). For example, an input stack `[Field f, Argument 0]`
* means that data is read from field `f` from the `0`th argument, while an
* output stack `[Field g, Return]` means that data is written to the field
* `g` of the returned object.
*/
class SummaryComponentStack extends TSummaryComponentStack {
/** Gets the head of this stack. */
SummaryComponent head() {
this = TSingletonSummaryComponentStack(result) or
this = TConsSummaryComponentStack(result, _)
}
/** Gets the tail of this stack, if any. */
SummaryComponentStack tail() { this = TConsSummaryComponentStack(_, result) }
/** Gets the length of this stack. */
int length() {
this = TSingletonSummaryComponentStack(_) and result = 1
or
result = 1 + this.tail().length()
}
/** Gets the stack obtained by dropping the first `i` elements, if any. */
SummaryComponentStack drop(int i) {
i = 0 and result = this
or
result = this.tail().drop(i - 1)
}
/** Holds if this stack contains summary component `c`. */
predicate contains(SummaryComponent c) { c = this.drop(_).head() }
/** Gets a textual representation of this stack. */
string toString() {
exists(SummaryComponent head, SummaryComponentStack tail |
head = this.head() and
tail = this.tail() and
result = head + " of " + tail
)
or
exists(SummaryComponent c |
this = TSingletonSummaryComponentStack(c) and
result = c.toString()
)
}
}
/** Provides predicates for constructing stacks of summary components. */
module SummaryComponentStack {
/** Gets a singleton stack containing `c`. */
SummaryComponentStack singleton(SummaryComponent c) {
result = TSingletonSummaryComponentStack(c)
}
/**
* Gets the stack obtained by pushing `head` onto `tail`.
*
* Make sure to override `RequiredSummaryComponentStack::required()` in order
* to ensure that the constructed stack exists.
*/
SummaryComponentStack push(SummaryComponent head, SummaryComponentStack tail) {
result = TConsSummaryComponentStack(head, tail)
}
/** Gets a singleton stack for argument `i`. */
SummaryComponentStack argument(int i) { result = singleton(SummaryComponent::argument(i)) }
/** Gets a singleton stack representing a return of kind `rk`. */
SummaryComponentStack return(ReturnKind rk) { result = singleton(SummaryComponent::return(rk)) }
}
/**
* A class that exists for QL technical reasons only (the IPA type used
* to represent component stacks needs to be bounded).
*/
abstract class RequiredSummaryComponentStack extends SummaryComponentStack {
/**
* Holds if the stack obtained by pushing `head` onto `tail` is required.
*/
abstract predicate required(SummaryComponent c);
}
/** A callable with a flow summary. */
abstract class SummarizedCallable extends DataFlowCallable {
/**
* Holds if data may flow from `input` to `output` through this callable.
*
* `preservesValue` indicates whether this is a value-preserving step
* or a taint-step.
*
* Input specifications are restricted to stacks that end with
* `SummaryComponent::argument(_)`, preceded by zero or more
* `SummaryComponent::return(_)` or `SummaryComponent::content(_)` components.
*
* Output specifications are restricted to stacks that end with
* `SummaryComponent::return(_)` or `SummaryComponent::argument(_)`.
*
* Output stacks ending with `SummaryComponent::return(_)` can be preceded by zero
* or more `SummaryComponent::content(_)` components.
*
* Output stacks ending with `SummaryComponent::argument(_)` can be preceded by an
* optional `SummaryComponent::parameter(_)` component, which in turn can be preceded
* by zero or more `SummaryComponent::content(_)` components.
*/
pragma[nomagic]
predicate propagatesFlow(
SummaryComponentStack input, SummaryComponentStack output, boolean preservesValue
) {
none()
}
/**
* Holds if values stored inside `content` are cleared on objects passed as
* the `i`th argument to this callable.
*/
pragma[nomagic]
predicate clearsContent(int i, Content content) { none() }
}
}
/**
* Provides predicates for compiling flow summaries down to atomic local steps,
* read steps, and store steps.
*/
module Private {
private import Public
newtype TSummaryComponent =
TContentSummaryComponent(Content c) or
TParameterSummaryComponent(int i) { parameterPosition(i) } or
TArgumentSummaryComponent(int i) { parameterPosition(i) } or
TReturnSummaryComponent(ReturnKind rk)
newtype TSummaryComponentStack =
TSingletonSummaryComponentStack(SummaryComponent c) or
TConsSummaryComponentStack(SummaryComponent head, SummaryComponentStack tail) {
tail.(RequiredSummaryComponentStack).required(head)
}
pragma[nomagic]
private predicate summary(
SummarizedCallable c, SummaryComponentStack input, SummaryComponentStack output,
boolean preservesValue
) {
c.propagatesFlow(input, output, preservesValue)
}
private newtype TSummaryNodeState =
TSummaryNodeInputState(SummaryComponentStack s) {
exists(SummaryComponentStack input |
summary(_, input, _, _) and
s = input.drop(_)
)
} or
TSummaryNodeOutputState(SummaryComponentStack s) {
exists(SummaryComponentStack output |
summary(_, _, output, _) and
s = output.drop(_)
)
}
/**
* A state used to break up (complex) flow summaries into atomic flow steps.
* For a flow summary
*
* ```ql
* propagatesFlow(
* SummaryComponentStack input, SummaryComponentStack output, boolean preservesValue
* )
* ```
*
* the following states are used:
*
* - `TSummaryNodeInputState(SummaryComponentStack s)`:
* this state represents that the components in `s` _have been read_ from the
* input.
* - `TSummaryNodeOutputState(SummaryComponentStack s)`:
* this state represents that the components in `s` _remain to be written_ to
* the output.
*/
class SummaryNodeState extends TSummaryNodeState {
/** Holds if this state is a valid input state for `c`. */
pragma[nomagic]
predicate isInputState(SummarizedCallable c, SummaryComponentStack s) {
this = TSummaryNodeInputState(s) and
exists(SummaryComponentStack input |
summary(c, input, _, _) and
s = input.drop(_)
)
}
/** Holds if this state is a valid output state for `c`. */
pragma[nomagic]
predicate isOutputState(SummarizedCallable c, SummaryComponentStack s) {
this = TSummaryNodeOutputState(s) and
exists(SummaryComponentStack output |
summary(c, _, output, _) and
s = output.drop(_)
)
}
/** Gets a textual representation of this state. */
string toString() {
exists(SummaryComponentStack s |
this = TSummaryNodeInputState(s) and
result = "read: " + s
)
or
exists(SummaryComponentStack s |
this = TSummaryNodeOutputState(s) and
result = "to write: " + s
)
}
}
/**
* Holds if `state` represents having read the `i`th argument for `c`. In this case
* we are not synthesizing a data-flow node, but instead assume that a relevant
* parameter node already exists.
*/
private predicate parameterReadState(SummarizedCallable c, SummaryNodeState state, int i) {
state.isInputState(c, SummaryComponentStack::argument(i))
}
/**
* Holds if a synthesized summary node is needed for the state `state` in summarized
* callable `c`.
*/
predicate summaryNodeRange(SummarizedCallable c, SummaryNodeState state) {
state.isInputState(c, _) and
not parameterReadState(c, state, _)
or
state.isOutputState(c, _)
}
pragma[noinline]
private Node summaryNodeInputState(SummarizedCallable c, SummaryComponentStack s) {
exists(SummaryNodeState state | state.isInputState(c, s) |
result = summaryNode(c, state)
or
exists(int i |
parameterReadState(c, state, i) and
result.(ParamNode).isParameterOf(c, i)
)
)
}
pragma[noinline]
private Node summaryNodeOutputState(SummarizedCallable c, SummaryComponentStack s) {
exists(SummaryNodeState state |
state.isOutputState(c, s) and
result = summaryNode(c, state)
)
}
/**
* Holds if a write targets `post`, which is a post-update node for the `i`th
* parameter of `c`.
*/
private predicate isParameterPostUpdate(Node post, SummarizedCallable c, int i) {
post = summaryNodeOutputState(c, SummaryComponentStack::argument(i))
}
/** Holds if a parameter node is required for the `i`th parameter of `c`. */
predicate summaryParameterNodeRange(SummarizedCallable c, int i) {
parameterReadState(c, _, i)
or
isParameterPostUpdate(_, c, i)
}
private predicate callbackOutput(
SummarizedCallable c, SummaryComponentStack s, Node receiver, ReturnKind rk
) {
any(SummaryNodeState state).isInputState(c, s) and
s.head() = TReturnSummaryComponent(rk) and
receiver = summaryNodeInputState(c, s.drop(1))
}
private Node pre(Node post) {
summaryPostUpdateNode(post, result)
or
not summaryPostUpdateNode(post, _) and
result = post
}
private predicate callbackInput(
SummarizedCallable c, SummaryComponentStack s, Node receiver, int i
) {
any(SummaryNodeState state).isOutputState(c, s) and
s.head() = TParameterSummaryComponent(i) and
receiver = pre(summaryNodeOutputState(c, s.drop(1)))
}
/** Holds if a call targeting `receiver` should be synthesized inside `c`. */
predicate summaryCallbackRange(SummarizedCallable c, Node receiver) {
callbackOutput(c, _, receiver, _)
or
callbackInput(c, _, receiver, _)
}
/**
* Gets the type of synthesized summary node `n`.
*
* The type is computed based on the language-specific predicates
* `getContentType()`, `getReturnType()`, `getCallbackParameterType()`, and
* `getCallbackReturnType()`.
*/
DataFlowType summaryNodeType(Node n) {
exists(Node pre |
summaryPostUpdateNode(n, pre) and
result = getNodeType(pre)
)
or
exists(SummarizedCallable c, SummaryComponentStack s, SummaryComponent head | head = s.head() |
n = summaryNodeInputState(c, s) and
(
exists(Content cont |
head = TContentSummaryComponent(cont) and result = getContentType(cont)
)
or
exists(ReturnKind rk |
head = TReturnSummaryComponent(rk) and
result =
getCallbackReturnType(getNodeType(summaryNodeInputState(pragma[only_bind_out](c),
s.drop(1))), rk)
)
)
or
n = summaryNodeOutputState(c, s) and
(
exists(Content cont |
head = TContentSummaryComponent(cont) and result = getContentType(cont)
)
or
s.length() = 1 and
exists(ReturnKind rk |
head = TReturnSummaryComponent(rk) and
result = getReturnType(c, rk)
)
or
exists(int i | head = TParameterSummaryComponent(i) |
result =
getCallbackParameterType(getNodeType(summaryNodeOutputState(pragma[only_bind_out](c),
s.drop(1))), i)
)
)
)
}
/** Holds if summary node `out` contains output of kind `rk` from call `c`. */
predicate summaryOutNode(DataFlowCall c, Node out, ReturnKind rk) {
exists(SummarizedCallable callable, SummaryComponentStack s, Node receiver |
callbackOutput(callable, s, receiver, rk) and
out = summaryNodeInputState(callable, s) and
c = summaryDataFlowCall(receiver)
)
}
/** Holds if summary node `arg` is the `i`th argument of call `c`. */
predicate summaryArgumentNode(DataFlowCall c, Node arg, int i) {
exists(SummarizedCallable callable, SummaryComponentStack s, Node receiver |
callbackInput(callable, s, receiver, i) and
arg = summaryNodeOutputState(callable, s) and
c = summaryDataFlowCall(receiver)
)
}
/** Holds if summary node `post` is a post-update node with pre-update node `pre`. */
predicate summaryPostUpdateNode(Node post, ParamNode pre) {
exists(SummarizedCallable c, int i |
isParameterPostUpdate(post, c, i) and
pre.isParameterOf(c, i)
)
}
/** Holds if summary node `ret` is a return node of kind `rk`. */
predicate summaryReturnNode(Node ret, ReturnKind rk) {
exists(SummarizedCallable callable, SummaryComponentStack s |
ret = summaryNodeOutputState(callable, s) and
s = TSingletonSummaryComponentStack(TReturnSummaryComponent(rk))
)
}
/** Provides a compilation of flow summaries to atomic data-flow steps. */
module Steps {
/**
* Holds if there is a local step from `pred` to `succ`, which is synthesized
* from a flow summary.
*/
predicate summaryLocalStep(Node pred, Node succ, boolean preservesValue) {
exists(
SummarizedCallable c, SummaryComponentStack inputContents,
SummaryComponentStack outputContents
|
summary(c, inputContents, outputContents, preservesValue) and
pred = summaryNodeInputState(c, inputContents) and
succ = summaryNodeOutputState(c, outputContents)
|
preservesValue = true
or
preservesValue = false and not summary(c, inputContents, outputContents, true)
)
or
// If flow through a method updates a parameter from some input A, and that
// parameter also is returned through B, then we'd like a combined flow from A
// to B as well. As an example, this simplifies modeling of fluent methods:
// for `StringBuilder.append(x)` with a specified value flow from qualifier to
// return value and taint flow from argument 0 to the qualifier, then this
// allows us to infer taint flow from argument 0 to the return value.
summaryPostUpdateNode(pred, succ) and preservesValue = true
}
/**
* Holds if there is a read step of content `c` from `pred` to `succ`, which
* is synthesized from a flow summary.
*/
predicate summaryReadStep(Node pred, Content c, Node succ) {
exists(SummarizedCallable sc, SummaryComponentStack s |
pred = summaryNodeInputState(sc, s.drop(1)) and
succ = summaryNodeInputState(sc, s) and
SummaryComponent::content(c) = s.head()
)
}
/**
* Holds if there is a store step of content `c` from `pred` to `succ`, which
* is synthesized from a flow summary.
*/
predicate summaryStoreStep(Node pred, Content c, Node succ) {
exists(SummarizedCallable sc, SummaryComponentStack s |
pred = summaryNodeOutputState(sc, s) and
succ = summaryNodeOutputState(sc, s.drop(1)) and
SummaryComponent::content(c) = s.head()
)
}
/**
* Holds if values stored inside content `c` are cleared when passed as
* input of type `input` in `call`.
*/
predicate summaryClearsContent(ArgNode arg, Content c) {
exists(DataFlowCall call, int i |
viableCallable(call).(SummarizedCallable).clearsContent(i, c) and
arg.argumentOf(call, i)
)
}
pragma[nomagic]
private ParamNode summaryArgParam(ArgNode arg, ReturnKindExt rk, OutNodeExt out) {
exists(DataFlowCall call, int pos, SummarizedCallable callable |
arg.argumentOf(call, pos) and
viableCallable(call) = callable and
result.isParameterOf(callable, pos) and
out = rk.getAnOutNode(call)
)
}
/**
* Holds if `arg` flows to `out` using a simple flow summary, that is, a flow
* summary without reads and stores.
*
* NOTE: This step should not be used in global data-flow/taint-tracking, but may
* be useful to include in the exposed local data-flow/taint-tracking relations.
*/
predicate summaryThroughStep(ArgNode arg, Node out, boolean preservesValue) {
exists(ReturnKindExt rk, ReturnNodeExt ret |
summaryLocalStep(summaryArgParam(arg, rk, out), ret, preservesValue) and
ret.getKind() = rk
)
}
/**
* Holds if there is a read(+taint) of `c` from `arg` to `out` using a
* flow summary.
*
* NOTE: This step should not be used in global data-flow/taint-tracking, but may
* be useful to include in the exposed local data-flow/taint-tracking relations.
*/
predicate summaryGetterStep(ArgNode arg, Content c, Node out) {
exists(ReturnKindExt rk, Node mid, ReturnNodeExt ret |
summaryReadStep(summaryArgParam(arg, rk, out), c, mid) and
summaryLocalStep(mid, ret, _) and
ret.getKind() = rk
)
}
/**
* Holds if there is a (taint+)store of `arg` into content `c` of `out` using a
* flow summary.
*
* NOTE: This step should not be used in global data-flow/taint-tracking, but may
* be useful to include in the exposed local data-flow/taint-tracking relations.
*/
predicate summarySetterStep(ArgNode arg, Content c, Node out) {
exists(ReturnKindExt rk, Node mid, ReturnNodeExt ret |
summaryLocalStep(summaryArgParam(arg, rk, out), mid, _) and
summaryStoreStep(mid, c, ret) and
ret.getKind() = rk
)
}
/**
* Holds if data is written into content `c` of argument `arg` using a flow summary.
*
* Depending on the type of `c`, this predicate may be relevant to include in the
* definition of `clearsContent()`.
*/
predicate summaryStoresIntoArg(Content c, Node arg) {
exists(ParamUpdateReturnKind rk, ReturnNodeExt ret, PostUpdateNode out |
exists(DataFlowCall call, SummarizedCallable callable |
getNodeEnclosingCallable(ret) = callable and
viableCallable(call) = callable and
summaryStoreStep(_, c, ret) and
ret.getKind() = pragma[only_bind_into](rk) and
out = rk.getAnOutNode(call) and
arg = out.getPreUpdateNode()
)
)
}
}
/**
* Provides a means of translating externally (e.g., CSV) defined flow
* summaries into a `SummarizedCallable`s.
*/
module External {
/** Holds if `spec` is a relevant external specification. */
private predicate relevantSpec(string spec) {
summaryElement(_, spec, _, _) or
summaryElement(_, _, spec, _) or
sourceElement(_, spec, _) or
sinkElement(_, spec, _)
}
/** Holds if the `n`th component of specification `s` is `c`. */
predicate specSplit(string s, string c, int n) { relevantSpec(s) and s.splitAt(" of ", n) = c }
/** Holds if specification `s` has length `len`. */
predicate specLength(string s, int len) { len = 1 + max(int n | specSplit(s, _, n)) }
/** Gets the last component of specification `s`. */
string specLast(string s) {
exists(int len |
specLength(s, len) and
specSplit(s, result, len - 1)
)
}
/** Holds if specification component `c` parses as parameter `n`. */
predicate parseParam(string c, int n) {
specSplit(_, c, _) and
(
c.regexpCapture("Parameter\\[([-0-9]+)\\]", 1).toInt() = n
or
exists(int n1, int n2 |
c.regexpCapture("Parameter\\[([-0-9]+)\\.\\.([0-9]+)\\]", 1).toInt() = n1 and
c.regexpCapture("Parameter\\[([-0-9]+)\\.\\.([0-9]+)\\]", 2).toInt() = n2 and
n = [n1 .. n2]
)
)
}
/** Holds if specification component `c` parses as argument `n`. */
predicate parseArg(string c, int n) {
specSplit(_, c, _) and
(
c.regexpCapture("Argument\\[([-0-9]+)\\]", 1).toInt() = n
or
exists(int n1, int n2 |
c.regexpCapture("Argument\\[([-0-9]+)\\.\\.([0-9]+)\\]", 1).toInt() = n1 and
c.regexpCapture("Argument\\[([-0-9]+)\\.\\.([0-9]+)\\]", 2).toInt() = n2 and
n = [n1 .. n2]
)
)
}
private SummaryComponent interpretComponent(string c) {
specSplit(_, c, _) and
(
exists(int pos | parseArg(c, pos) and result = SummaryComponent::argument(pos))
or
exists(int pos | parseParam(c, pos) and result = SummaryComponent::parameter(pos))
or
c = "ReturnValue" and result = SummaryComponent::return(getReturnValueKind())
or
result = interpretComponentSpecific(c)
)
}
/**
* Holds if `spec` specifies summary component stack `stack`.
*/
predicate interpretSpec(string spec, SummaryComponentStack stack) {
interpretSpec(spec, 0, stack)
}
private predicate interpretSpec(string spec, int idx, SummaryComponentStack stack) {
exists(string c |
relevantSpec(spec) and
specLength(spec, idx + 1) and
specSplit(spec, c, idx) and
stack = SummaryComponentStack::singleton(interpretComponent(c))
)
or
exists(SummaryComponent head, SummaryComponentStack tail |
interpretSpec(spec, idx, head, tail) and
stack = SummaryComponentStack::push(head, tail)
)
}
private predicate interpretSpec(
string output, int idx, SummaryComponent head, SummaryComponentStack tail
) {
exists(string c |
interpretSpec(output, idx + 1, tail) and
specSplit(output, c, idx) and
head = interpretComponent(c)
)
}
private class MkStack extends RequiredSummaryComponentStack {
MkStack() { interpretSpec(_, _, _, this) }
override predicate required(SummaryComponent c) { interpretSpec(_, _, c, this) }
}
private class SummarizedCallableExternal extends SummarizedCallable {
SummarizedCallableExternal() { summaryElement(this, _, _, _) }
override predicate propagatesFlow(
SummaryComponentStack input, SummaryComponentStack output, boolean preservesValue
) {
exists(string inSpec, string outSpec, string kind |
summaryElement(this, inSpec, outSpec, kind) and
interpretSpec(inSpec, input) and
interpretSpec(outSpec, output)
|
kind = "value" and preservesValue = true
or
kind = "taint" and preservesValue = false
)
}
}
/** Holds if component `c` of specification `spec` cannot be parsed. */
predicate invalidSpecComponent(string spec, string c) {
specSplit(spec, c, _) and
not exists(interpretComponent(c))
}
private predicate inputNeedsReference(string c) {
c = "Argument" or
parseArg(c, _)
}
private predicate outputNeedsReference(string c) {
c = "Argument" or
parseArg(c, _) or
c = "ReturnValue"
}
private predicate sourceElementRef(InterpretNode ref, string output, string kind) {
exists(SourceOrSinkElement e |
sourceElement(e, output, kind) and
if outputNeedsReference(specLast(output))
then e = ref.getCallTarget()
else e = ref.asElement()
)
}
private predicate sinkElementRef(InterpretNode ref, string input, string kind) {
exists(SourceOrSinkElement e |
sinkElement(e, input, kind) and
if inputNeedsReference(specLast(input))
then e = ref.getCallTarget()
else e = ref.asElement()
)
}
private predicate interpretOutput(string output, int idx, InterpretNode ref, InterpretNode node) {
sourceElementRef(ref, output, _) and
specLength(output, idx) and
node = ref
or
exists(InterpretNode mid, string c |
interpretOutput(output, idx + 1, ref, mid) and
specSplit(output, c, idx)
|
exists(int pos |
node.asNode().(PostUpdateNode).getPreUpdateNode().(ArgNode).argumentOf(mid.asCall(), pos)
|
c = "Argument" or parseArg(c, pos)
)
or
exists(int pos | node.asNode().(ParamNode).isParameterOf(mid.asCallable(), pos) |
c = "Parameter" or parseParam(c, pos)
)
or
c = "ReturnValue" and
node.asNode() = getAnOutNodeExt(mid.asCall(), TValueReturn(getReturnValueKind()))
or
interpretOutputSpecific(c, mid, node)
)
}
private predicate interpretInput(string input, int idx, InterpretNode ref, InterpretNode node) {
sinkElementRef(ref, input, _) and
specLength(input, idx) and
node = ref
or
exists(InterpretNode mid, string c |
interpretInput(input, idx + 1, ref, mid) and
specSplit(input, c, idx)
|
exists(int pos | node.asNode().(ArgNode).argumentOf(mid.asCall(), pos) |
c = "Argument" or parseArg(c, pos)
)
or
exists(ReturnNodeExt ret |
c = "ReturnValue" and
ret = node.asNode() and
ret.getKind().(ValueReturnKind).getKind() = getReturnValueKind() and
mid.asCallable() = getNodeEnclosingCallable(ret)
)
or
interpretInputSpecific(c, mid, node)
)
}
/**
* Holds if `node` is specified as a source with the given kind in a CSV flow
* model.
*/
predicate isSourceNode(InterpretNode node, string kind) {
exists(InterpretNode ref, string output |
sourceElementRef(ref, output, kind) and
interpretOutput(output, 0, ref, node)
)
}
/**
* Holds if `node` is specified as a sink with the given kind in a CSV flow
* model.
*/
predicate isSinkNode(InterpretNode node, string kind) {
exists(InterpretNode ref, string input |
sinkElementRef(ref, input, kind) and
interpretInput(input, 0, ref, node)
)
}
}
/** Provides a query predicate for outputting a set of relevant flow summaries. */
module TestOutput {
/** A flow summary to include in the `summary/3` query predicate. */
abstract class RelevantSummarizedCallable extends SummarizedCallable {
/** Gets the string representation of this callable used by `summary/3`. */
string getFullString() { result = this.toString() }
}
/** A query predicate for outputting flow summaries in QL tests. */
query predicate summary(string callable, string flow, boolean preservesValue) {
exists(
RelevantSummarizedCallable c, SummaryComponentStack input, SummaryComponentStack output
|
callable = c.getFullString() and
c.propagatesFlow(input, output, preservesValue) and
flow = input + " -> " + output
)
}
}
}

View File

@@ -0,0 +1,99 @@
/**
* Provides Ruby specific classes and predicates for defining flow summaries.
*/
private import ruby
private import DataFlowDispatch
private import DataFlowPrivate
private import DataFlowPublic
private import DataFlowImplCommon
private import FlowSummaryImpl::Private
private import FlowSummaryImpl::Public
private import codeql.ruby.dataflow.FlowSummary as FlowSummary
/** Holds is `i` is a valid parameter position. */
predicate parameterPosition(int i) { i in [-2 .. 10] }
/** Gets the synthesized summary data-flow node for the given values. */
Node summaryNode(SummarizedCallable c, SummaryNodeState state) { result = TSummaryNode(c, state) }
/** Gets the synthesized data-flow call for `receiver`. */
SummaryCall summaryDataFlowCall(Node receiver) { receiver = result.getReceiver() }
/** Gets the type of content `c`. */
DataFlowType getContentType(Content c) { any() }
/** Gets the return type of kind `rk` for callable `c`. */
bindingset[c, rk]
DataFlowType getReturnType(SummarizedCallable c, ReturnKind rk) { any() }
/**
* Gets the type of the `i`th parameter in a synthesized call that targets a
* callback of type `t`.
*/
bindingset[t, i]
DataFlowType getCallbackParameterType(DataFlowType t, int i) { any() }
/**
* Gets the return type of kind `rk` in a synthesized call that targets a
* callback of type `t`.
*/
DataFlowType getCallbackReturnType(DataFlowType t, ReturnKind rk) { any() }
/**
* Holds if an external flow summary exists for `c` with input specification
* `input`, output specification `output`, and kind `kind`.
*/
predicate summaryElement(DataFlowCallable c, string input, string output, string kind) {
exists(FlowSummary::SummarizedCallable sc |
sc.propagatesFlowExt(input, output, kind) and
c.asLibraryCallable() = sc
)
}
/**
* Holds if an external source specification exists for `e` with output specification
* `output` and kind `kind`.
*/
predicate sourceElement(AstNode n, string output, string kind) { none() }
/**
* Holds if an external sink specification exists for `n` with input specification
* `input` and kind `kind`.
*/
predicate sinkElement(AstNode n, string input, string kind) { none() }
/** Gets the summary component for specification component `c`, if any. */
SummaryComponent interpretComponentSpecific(string c) {
c = "BlockArgument" and
result = FlowSummary::SummaryComponent::block()
}
class SourceOrSinkElement = AstNode;
/** Gets the return kind corresponding to specification `"ReturnValue"`. */
NormalReturnKind getReturnValueKind() { any() }
/** An entity used to interpret a source/sink specification. */
class InterpretNode extends AstNode {
/** Gets the element that this node corresponds to, if any. */
SourceOrSinkElement asElement() { none() }
/** Gets the data-flow node that this node corresponds to, if any. */
Node asNode() { none() }
/** Gets the call that this node corresponds to, if any. */
DataFlowCall asCall() { none() }
/** Gets the callable that this node corresponds to, if any. */
DataFlowCallable asCallable() { none() }
/** Gets the target of this call, if any. */
Callable getCallTarget() { none() }
}
/** Provides additional sink specification logic required for attributes. */
predicate interpretOutputSpecific(string c, InterpretNode mid, InterpretNode node) { none() }
/** Provides additional sink specification logic required for attributes. */
predicate interpretInputSpecific(string c, InterpretNode mid, InterpretNode n) { none() }

View File

@@ -2,6 +2,7 @@ private import ruby
private import TaintTrackingPublic
private import codeql.ruby.CFG
private import codeql.ruby.DataFlow
private import FlowSummaryImpl as FlowSummaryImpl
/**
* Holds if `node` should be a sanitizer in all global taint flow configurations
@@ -35,4 +36,6 @@ predicate defaultAdditionalTaintStep(DataFlow::Node nodeFrom, DataFlow::Node nod
or
// element reference from nodeFrom
nodeFrom.asExpr() = nodeTo.asExpr().(CfgNodes::ExprNodes::ElementReferenceCfgNode).getReceiver()
or
FlowSummaryImpl::Private::Steps::summaryLocalStep(nodeFrom, nodeTo, false)
}

View File

@@ -2,6 +2,7 @@ private import ruby
private import TaintTrackingPrivate
private import codeql.ruby.CFG
private import codeql.ruby.DataFlow
private import FlowSummaryImpl as FlowSummaryImpl
/**
* Holds if taint propagates from `source` to `sink` in zero or more local
@@ -17,4 +18,14 @@ predicate localExprTaint(CfgNodes::ExprCfgNode e1, CfgNodes::ExprCfgNode e2) {
localTaint(DataFlow::exprNode(e1), DataFlow::exprNode(e2))
}
predicate localTaintStep = defaultAdditionalTaintStep/2;
/**
* Holds if taint propagates from `nodeFrom` to `nodeTo` in exactly one local
* (intra-procedural) step.
*/
predicate localTaintStep(DataFlow::Node nodeFrom, DataFlow::Node nodeTo) {
defaultAdditionalTaintStep(nodeFrom, nodeTo)
or
// Simple flow through library code is included in the exposed local
// step relation, even though flow is technically inter-procedural
FlowSummaryImpl::Private::Steps::summaryThroughStep(nodeFrom, nodeTo, false)
}

View File

@@ -58,6 +58,7 @@ module UrlRedirect {
// As a rough heuristic, assume that methods with these names are handlers for POST/PUT/PATCH/DELETE requests,
// which are not as vulnerable to URL redirection because browsers will not initiate them from clicking a link.
not this.getEnclosingCallable()
.asCallable()
.(Method)
.getName()
.regexpMatch(".*(create|update|destroy).*")

View File

@@ -1,10 +1,11 @@
private import codeql.ruby.AST as AST
private import codeql.ruby.CFG as CFG
private import CFG::CfgNodes
private import codeql.ruby.dataflow.internal.DataFlowImplCommon as DataFlowImplCommon
private import codeql.ruby.dataflow.internal.DataFlowPublic as DataFlowPublic
private import codeql.ruby.dataflow.internal.DataFlowPrivate as DataFlowPrivate
private import codeql.ruby.dataflow.internal.DataFlowDispatch as DataFlowDispatch
private import codeql.ruby.dataflow.internal.SsaImpl as SsaImpl
private import codeql.ruby.controlflow.CfgNodes
class Node = DataFlowPublic::Node;
@@ -22,19 +23,31 @@ predicate jumpStep = DataFlowPrivate::jumpStep/2;
*/
string getPossibleContentName() { result = getSetterCallAttributeName(_) }
/** Holds if `nodeFrom` steps to `nodeTo` by being passed as a parameter in a call. */
/**
* Holds if `nodeFrom` steps to `nodeTo` by being passed as a parameter in a call.
*
* Flow into summarized library methods is not included, as that will lead to negative
* recursion (or, at best, terrible performance), since identifying calls to library
* methods is done using API graphs (which uses type tracking).
*/
predicate callStep(DataFlowPrivate::ArgumentNode nodeFrom, DataFlowPublic::ParameterNode nodeTo) {
exists(DataFlowDispatch::DataFlowCall call, DataFlowDispatch::DataFlowCallable callable, int i |
call.getTarget() = callable and
nodeFrom.argumentOf(call, i) and
nodeTo.isParameterOf(callable, i)
exists(ExprNodes::CallCfgNode call, CFG::CfgScope callable, int i |
DataFlowDispatch::getTarget(call) = callable and
nodeFrom.sourceArgumentOf(call, i) and
nodeTo.(DataFlowPrivate::ParameterNodeImpl).isSourceParameterOf(callable, i)
)
}
/** Holds if `nodeFrom` steps to `nodeTo` by being returned from a call. */
/**
* Holds if `nodeFrom` steps to `nodeTo` by being returned from a call.
*
* Flow out of summarized library methods is not included, as that will lead to negative
* recursion (or, at best, terrible performance), since identifying calls to library
* methods is done using API graphs (which uses type tracking).
*/
predicate returnStep(DataFlowPrivate::ReturnNode nodeFrom, Node nodeTo) {
exists(DataFlowDispatch::DataFlowCall call |
DataFlowImplCommon::getNodeEnclosingCallable(nodeFrom) = call.getTarget() and
exists(ExprNodes::CallCfgNode call |
nodeFrom.(DataFlowPrivate::NodeImpl).getCfgScope() = DataFlowDispatch::getTarget(call) and
nodeTo.asExpr().getNode() = call.getNode()
)
}

View File

@@ -13,7 +13,6 @@
import ruby
import codeql.ruby.ast.internal.Module
import codeql.ruby.dataflow.SSA
import codeql.ruby.dataflow.internal.DataFlowDispatch
from DefLoc loc, Expr src, Expr target, string kind
where
@@ -38,9 +37,7 @@ newtype DefLoc =
/** A constant, module or class. */
ConstantDefLoc(ConstantReadAccess read, ConstantWriteAccess write) { write = definitionOf(read) } or
/** A method call. */
MethodLoc(MethodCall call, Method meth) {
exists(DataFlowCall c | c.getExpr() = call and c.getTarget() = meth)
} or
MethodLoc(MethodCall call, Method meth) { meth = call.getATarget() } or
/** A local variable. */
LocalVariableLoc(VariableReadAccess read, VariableWriteAccess write) {
exists(Ssa::WriteDefinition w |

View File

@@ -0,0 +1,34 @@
edges
| summaries.rb:1:11:1:26 | call to identity : | summaries.rb:2:6:2:12 | tainted |
| summaries.rb:1:11:1:26 | call to identity : | summaries.rb:4:24:4:30 | tainted : |
| summaries.rb:1:11:1:26 | call to identity : | summaries.rb:16:36:16:42 | tainted : |
| summaries.rb:1:20:1:26 | "taint" : | summaries.rb:1:11:1:26 | call to identity : |
| summaries.rb:4:12:7:3 | call to apply_block : | summaries.rb:9:6:9:13 | tainted2 |
| summaries.rb:4:24:4:30 | tainted : | summaries.rb:4:12:7:3 | call to apply_block : |
| summaries.rb:4:24:4:30 | tainted : | summaries.rb:4:36:4:36 | x : |
| summaries.rb:4:36:4:36 | x : | summaries.rb:5:8:5:8 | x |
| summaries.rb:11:17:11:17 | x : | summaries.rb:12:8:12:8 | x |
| summaries.rb:16:12:16:43 | call to apply_lambda : | summaries.rb:18:6:18:13 | tainted3 |
| summaries.rb:16:36:16:42 | tainted : | summaries.rb:11:17:11:17 | x : |
| summaries.rb:16:36:16:42 | tainted : | summaries.rb:16:12:16:43 | call to apply_lambda : |
nodes
| summaries.rb:1:11:1:26 | call to identity : | semmle.label | call to identity : |
| summaries.rb:1:20:1:26 | "taint" : | semmle.label | "taint" : |
| summaries.rb:2:6:2:12 | tainted | semmle.label | tainted |
| summaries.rb:4:12:7:3 | call to apply_block : | semmle.label | call to apply_block : |
| summaries.rb:4:24:4:30 | tainted : | semmle.label | tainted : |
| summaries.rb:4:36:4:36 | x : | semmle.label | x : |
| summaries.rb:5:8:5:8 | x | semmle.label | x |
| summaries.rb:9:6:9:13 | tainted2 | semmle.label | tainted2 |
| summaries.rb:11:17:11:17 | x : | semmle.label | x : |
| summaries.rb:12:8:12:8 | x | semmle.label | x |
| summaries.rb:16:12:16:43 | call to apply_lambda : | semmle.label | call to apply_lambda : |
| summaries.rb:16:36:16:42 | tainted : | semmle.label | tainted : |
| summaries.rb:18:6:18:13 | tainted3 | semmle.label | tainted3 |
invalidSpecComponent
#select
| summaries.rb:2:6:2:12 | tainted | summaries.rb:1:20:1:26 | "taint" : | summaries.rb:2:6:2:12 | tainted | $@ | summaries.rb:1:20:1:26 | "taint" : | "taint" : |
| summaries.rb:5:8:5:8 | x | summaries.rb:1:20:1:26 | "taint" : | summaries.rb:5:8:5:8 | x | $@ | summaries.rb:1:20:1:26 | "taint" : | "taint" : |
| summaries.rb:9:6:9:13 | tainted2 | summaries.rb:1:20:1:26 | "taint" : | summaries.rb:9:6:9:13 | tainted2 | $@ | summaries.rb:1:20:1:26 | "taint" : | "taint" : |
| summaries.rb:12:8:12:8 | x | summaries.rb:1:20:1:26 | "taint" : | summaries.rb:12:8:12:8 | x | $@ | summaries.rb:1:20:1:26 | "taint" : | "taint" : |
| summaries.rb:18:6:18:13 | tainted3 | summaries.rb:1:20:1:26 | "taint" : | summaries.rb:18:6:18:13 | tainted3 | $@ | summaries.rb:1:20:1:26 | "taint" : | "taint" : |

View File

@@ -0,0 +1,77 @@
/**
* @kind path-problem
*/
import ruby
import codeql.ruby.dataflow.FlowSummary
import DataFlow::PathGraph
import codeql.ruby.TaintTracking
import codeql.ruby.dataflow.internal.FlowSummaryImpl
query predicate invalidSpecComponent(SummarizedCallable sc, string s, string c) {
(sc.propagatesFlowExt(s, _, _) or sc.propagatesFlowExt(_, s, _)) and
Private::External::invalidSpecComponent(s, c)
}
private class SummarizedCallableIdentity extends SummarizedCallable {
SummarizedCallableIdentity() { this = "identity" }
override MethodCall getACall() { result.getMethodName() = this }
override predicate propagatesFlowExt(string input, string output, string kind) {
input = "Argument[0]" and
output = "ReturnValue" and
kind = "value"
}
}
private class SummarizedCallableApplyBlock extends SummarizedCallable {
SummarizedCallableApplyBlock() { this = "apply_block" }
override MethodCall getACall() { result.getMethodName() = this }
override predicate propagatesFlowExt(string input, string output, string kind) {
input = "Argument[0]" and
output = "Parameter[0] of BlockArgument" and
kind = "value"
or
input = "ReturnValue of BlockArgument" and
output = "ReturnValue" and
kind = "value"
}
}
private class SummarizedCallableApplyLambda extends SummarizedCallable {
SummarizedCallableApplyLambda() { this = "apply_lambda" }
override MethodCall getACall() { result.getMethodName() = this }
override predicate propagatesFlowExt(string input, string output, string kind) {
input = "Argument[1]" and
output = "Parameter[0] of Argument[0]" and
kind = "value"
or
input = "ReturnValue of Argument[0]" and
output = "ReturnValue" and
kind = "value"
}
}
class Conf extends TaintTracking::Configuration {
Conf() { this = "FlowSummaries" }
override predicate isSource(DataFlow::Node src) {
src.asExpr().getExpr().(StringLiteral).getValueText() = "taint"
}
override predicate isSink(DataFlow::Node sink) {
exists(MethodCall mc |
mc.getMethodName() = "sink" and
mc.getAnArgument() = sink.asExpr().getExpr()
)
}
}
from DataFlow::PathNode source, DataFlow::PathNode sink, Conf conf
where conf.hasFlowPath(source, sink)
select sink, source, sink, "$@", source, source.toString()

View File

@@ -0,0 +1,18 @@
tainted = identity "taint"
sink tainted
tainted2 = apply_block tainted do |x|
sink x
x
end
sink tainted2
my_lambda = -> (x) {
sink x
x
}
tainted3 = apply_lambda(my_lambda, tainted)
sink(tainted3)

View File

@@ -15,6 +15,10 @@
"codeql/csharp/ql/lib/semmle/code/csharp/dataflow/internal/DataFlowImplConsistency.qll",
"ql/lib/codeql/ruby/dataflow/internal/DataFlowImplConsistency.qll"
],
"DataFlow Summaries": [
"codeql/csharp/ql/lib/semmle/code/csharp/dataflow/internal/FlowSummaryImpl.qll",
"ql/lib/codeql/ruby/dataflow/internal/FlowSummaryImpl.qll"
],
"TaintTracking": [
"codeql/csharp/ql/lib/semmle/code/csharp/dataflow/internal/tainttracking1/TaintTrackingImpl.qll",
"ql/lib/codeql/ruby/dataflow/internal/tainttracking1/TaintTrackingImpl.qll"