Ruby: Prepare for data flow through arrays

This commit is contained in:
Tom Hvitved
2021-12-09 08:57:02 +01:00
parent 27f786b41e
commit 8c18aaae74
10 changed files with 115 additions and 23 deletions

View File

@@ -98,7 +98,7 @@ module API {
/**
* Gets a `new` call to the function represented by this API component.
*/
DataFlow::Node getAnInstantiation() { result = this.getInstance().getAnImmediateUse() }
DataFlow::ExprNode getAnInstantiation() { result = getInstance().getAnImmediateUse() }
/**
* Gets a node representing a subclass of the class represented by this node.

View File

@@ -4,6 +4,7 @@ import ruby
import codeql.ruby.DataFlow
private import internal.FlowSummaryImpl as Impl
private import internal.DataFlowDispatch
private import internal.DataFlowPrivate
// import all instances below
private module Summaries {
@@ -22,12 +23,34 @@ module SummaryComponent {
predicate content = SC::content/1;
/** Gets a summary component that represents a qualifier. */
SummaryComponent qualifier() { result = argument(any(ParameterPosition pos | pos.isSelf())) }
/** Gets a summary component that represents a `self` argument. */
SummaryComponent self() { result = argument(any(ParameterPosition pos | pos.isSelf())) }
/** Gets a summary component that represents a block argument. */
SummaryComponent block() { result = argument(any(ParameterPosition pos | pos.isBlock())) }
/** Gets a summary component that represents an element in an array at an unknown index. */
SummaryComponent arrayElementUnknown() { result = SC::content(TUnknownArrayElementContent()) }
/** Gets a summary component that represents an element in an array at a known index. */
bindingset[i]
SummaryComponent arrayElementKnown(int i) {
result = SC::content(TKnownArrayElementContent(i))
or
// `i` may be out of range
not exists(TKnownArrayElementContent(i)) and
result = arrayElementUnknown()
}
/**
* Gets a summary component that represents an element in an array at either an unknown
* index or known index. This predicate should never be used in the output specification
* of a flow summary; use `arrayElementUnknown()` instead.
*/
SummaryComponent arrayElementAny() {
result in [arrayElementUnknown(), SC::content(TKnownArrayElementContent(_))]
}
/** Gets a summary component that represents the return value of a call. */
SummaryComponent return() { result = SC::return(any(NormalReturnKind rk)) }
}
@@ -44,8 +67,8 @@ module SummaryComponentStack {
predicate argument = SCS::argument/1;
/** Gets a singleton stack representing a qualifier. */
SummaryComponentStack qualifier() { result = singleton(SummaryComponent::qualifier()) }
/** Gets a singleton stack representing a `self` argument. */
SummaryComponentStack self() { result = singleton(SummaryComponent::self()) }
/** Gets a singleton stack representing a block argument. */
SummaryComponentStack block() { result = singleton(SummaryComponent::block()) }
@@ -108,6 +131,17 @@ abstract class SummarizedCallable extends LibraryCallable {
predicate clearsContent(ParameterPosition pos, DataFlow::Content content) { none() }
}
/**
* A callable with a flow summary, identified by a unique string, where all
* calls to a method with the same name are considered relevant.
*/
abstract class SimpleSummarizedCallable extends SummarizedCallable {
bindingset[this]
SimpleSummarizedCallable() { any() }
final override MethodCall getACall() { result.getMethodName() = this }
}
private class SummarizedCallableAdapter extends Impl::Public::SummarizedCallable {
private SummarizedCallable sc;

View File

@@ -250,7 +250,7 @@ private module Cached {
TPositionalParameterPosition(int pos) {
pos = any(Parameter p).getPosition()
or
pos in [0 .. 10] // TODO: remove once `Argument[_]` summaries are replaced with `Argument[i..]`
pos in [0 .. 100] // TODO: remove once `Argument[_]` summaries are replaced with `Argument[i..]`
or
FlowSummaryImplSpecific::ParsePositions::isParsedArgumentPosition(_, pos)
} or

View File

@@ -294,9 +294,13 @@ private module Cached {
}
cached
newtype TContent = TTodoContent() // stub
newtype TContent =
TKnownArrayElementContent(int i) { i in [0 .. 10] } or
TUnknownArrayElementContent()
}
class TArrayElementContent = TKnownArrayElementContent or TUnknownArrayElementContent;
import Cached
/** Holds if `n` should be hidden from path explanations. */
@@ -741,8 +745,6 @@ predicate readStep(Node node1, Content c, Node node2) {
* in `x.f = newValue`.
*/
predicate clearsContent(Node n, Content c) {
storeStep(_, c, n)
or
FlowSummaryImpl::Private::Steps::summaryClearsContent(n, c)
}
@@ -886,4 +888,6 @@ predicate additionalLambdaFlowStep(Node nodeFrom, Node nodeTo, boolean preserves
* One example would be to allow flow like `p.foo = p.bar;`, which is disallowed
* by default as a heuristic.
*/
predicate allowParameterReturnInSelf(ParameterNode p) { none() }
predicate allowParameterReturnInSelf(ParameterNode p) {
FlowSummaryImpl::Private::summaryAllowParameterReturnInSelf(p)
}

View File

@@ -45,19 +45,19 @@ class Node extends TNode {
}
/** A data-flow node corresponding to a call in the control-flow graph. */
class CallNode extends LocalSourceNode {
class CallNode extends LocalSourceNode, ExprNode {
private CfgNodes::ExprNodes::CallCfgNode node;
CallNode() { node = this.asExpr() }
CallNode() { node = this.getExprNode() }
/** Gets the data-flow node corresponding to the receiver of the call corresponding to this data-flow node */
Node getReceiver() { result.asExpr() = node.getReceiver() }
ExprNode getReceiver() { result.getExprNode() = node.getReceiver() }
/** Gets the data-flow node corresponding to the `n`th argument of the call corresponding to this data-flow node */
Node getArgument(int n) { result.asExpr() = node.getArgument(n) }
ExprNode getArgument(int n) { result.getExprNode() = node.getArgument(n) }
/** Gets the data-flow node corresponding to the named argument of the call corresponding to this data-flow node */
Node getKeywordArgument(string name) { result.asExpr() = node.getKeywordArgument(name) }
ExprNode getKeywordArgument(string name) { result.getExprNode() = node.getKeywordArgument(name) }
/** Gets the name of the the method called by the method call (if any) corresponding to this data-flow node */
string getMethodName() { result = node.getExpr().(MethodCall).getMethodName() }
@@ -161,10 +161,7 @@ predicate localExprFlow(CfgNodes::ExprCfgNode e1, CfgNodes::ExprCfgNode e2) {
localFlow(exprNode(e1), exprNode(e2))
}
/**
* A reference contained in an object. This is either a field, a property,
* or an element in a collection.
*/
/** A reference contained in an object. */
class Content extends TContent {
/** Gets a textual representation of this content. */
string toString() { none() }
@@ -173,6 +170,31 @@ class Content extends TContent {
Location getLocation() { none() }
}
/** Provides different sub classes of `Content`. */
module Content {
/** An element in an array. */
class ArrayElementContent extends Content, TArrayElementContent { }
/** An element in an array at a known index. */
class KnownArrayElementContent extends ArrayElementContent, TKnownArrayElementContent {
private int i;
KnownArrayElementContent() { this = TKnownArrayElementContent(i) }
/** Gets the index in the array. */
int getIndex() { result = i }
override string toString() { result = "array element " + i }
}
/** An element in an array at an unknown index. */
class UnknownArrayElementContent extends ArrayElementContent, TUnknownArrayElementContent {
UnknownArrayElementContent() { this = TUnknownArrayElementContent() }
override string toString() { result = "array element" }
}
}
/**
* A guard that validates some expression.
*

View File

@@ -58,12 +58,33 @@ predicate summaryElement(DataFlowCallable c, string input, string output, string
* This covers all the Ruby-specific components of a flow summary, and
* is currently restricted to `"BlockArgument"`.
*/
bindingset[c]
SummaryComponent interpretComponentSpecific(string c) {
c = "Self" and
result = FlowSummary::SummaryComponent::self()
or
c = "BlockArgument" and
result = FlowSummary::SummaryComponent::block()
or
c = "Argument[_]" and
result = FlowSummary::SummaryComponent::argument(any(ParameterPosition pos | pos.isPositional(_)))
or
c = "ArrayElement" and
result = FlowSummary::SummaryComponent::arrayElementAny()
or
c = "ArrayElement[?]" and
result = FlowSummary::SummaryComponent::arrayElementUnknown()
or
exists(int i |
c.regexpCapture("ArrayElement\\[([0-9]+)\\]", 1).toInt() = i and
result = FlowSummary::SummaryComponent::arrayElementKnown(i)
)
or
exists(int i1, int i2 |
c.regexpCapture("ArrayElement\\[([-0-9]+)\\.\\.([0-9]+)\\]", 1).toInt() = i1 and
c.regexpCapture("ArrayElement\\[([-0-9]+)\\.\\.([0-9]+)\\]", 2).toInt() = i2 and
result = FlowSummary::SummaryComponent::arrayElementKnown([i1 .. i2])
)
}
/** Gets the textual representation of a summary component in the format used for flow summaries. */

View File

@@ -1,4 +1,5 @@
private import ruby
private import DataFlowPrivate
private import TaintTrackingPublic
private import codeql.ruby.CFG
private import codeql.ruby.DataFlow
@@ -34,8 +35,10 @@ predicate defaultAdditionalTaintStep(DataFlow::Node nodeFrom, DataFlow::Node nod
nodeFrom.asExpr() =
nodeTo.asExpr().(CfgNodes::ExprNodes::StringlikeLiteralCfgNode).getAComponent()
or
// element reference from nodeFrom
nodeFrom.asExpr() = nodeTo.asExpr().(CfgNodes::ExprNodes::ElementReferenceCfgNode).getReceiver()
or
FlowSummaryImpl::Private::Steps::summaryLocalStep(nodeFrom, nodeTo, false)
or
// Although flow through arrays is modelled precisely using stores/reads, we still
// allow flow out of a _tainted_ array. This is needed in order to support taint-
// tracking configurations where the source is an array.
readStep(nodeFrom, any(DataFlow::Content::ArrayElementContent c), nodeTo)
}

View File

@@ -5,5 +5,6 @@ import codeql.ruby.dataflow.internal.DataFlowDispatch
query predicate ret(ReturningNode node) { any() }
query predicate arg(ArgumentNode n, DataFlowCall call, ArgumentPosition pos) {
n.argumentOf(call, pos)
n.argumentOf(call, pos) and
not n instanceof SummaryNode
}

View File

@@ -27,6 +27,7 @@ nodes
| summaries.rb:18:6:18:13 | tainted3 | semmle.label | tainted3 |
subpaths
invalidSpecComponent
invalidOutputSpecComponent
#select
| summaries.rb:2:6:2:12 | tainted | summaries.rb:1:20:1:26 | "taint" : | summaries.rb:2:6:2:12 | tainted | $@ | summaries.rb:1:20:1:26 | "taint" : | "taint" : |
| summaries.rb:5:8:5:8 | x | summaries.rb:1:20:1:26 | "taint" : | summaries.rb:5:8:5:8 | x | $@ | summaries.rb:1:20:1:26 | "taint" : | "taint" : |

View File

@@ -13,6 +13,12 @@ query predicate invalidSpecComponent(SummarizedCallable sc, string s, string c)
Private::External::invalidSpecComponent(s, c)
}
query predicate invalidOutputSpecComponent(SummarizedCallable sc, string s, string c) {
sc.propagatesFlowExt(_, s, _) and
Private::External::specSplit(s, c, _) and
c = "ArrayElement" // not allowed in output specs; use `ArrayElement[?] instead
}
private class SummarizedCallableIdentity extends SummarizedCallable {
SummarizedCallableIdentity() { this = "identity" }