Merge pull request #10760 from hvitved/ruby/regex-taint-flow-restrict

Ruby: Restrict regexp taint flow to `String` summaries
This commit is contained in:
Tom Hvitved
2022-10-12 11:59:08 +02:00
committed by GitHub
6 changed files with 42 additions and 202 deletions

View File

@@ -735,6 +735,9 @@ class SummaryNode extends NodeImpl, TSummaryNode {
SummaryNode() { this = TSummaryNode(c, state) }
/** Gets the summarized callable that this node belongs to. */
FlowSummaryImpl::Public::SummarizedCallable getSummarizedCallable() { result = c }
override CfgScope getCfgScope() { none() }
override DataFlowCallable getEnclosingCallable() { result.asLibraryCallable() = c }

View File

@@ -1,191 +0,0 @@
/**
* Provides an implementation of global (interprocedural) taint tracking.
* This file re-exports the local (intraprocedural) taint-tracking analysis
* from `TaintTrackingParameter::Public` and adds a global analysis, mainly
* exposed through the `Configuration` class. For some languages, this file
* exists in several identical copies, allowing queries to use multiple
* `Configuration` classes that depend on each other without introducing
* mutual recursion among those configurations.
*/
import TaintTrackingParameter::Public
private import TaintTrackingParameter::Private
/**
* A configuration of interprocedural taint tracking analysis. This defines
* sources, sinks, and any other configurable aspect of the analysis. Each
* use of the taint tracking library must define its own unique extension of
* this abstract class.
*
* A taint-tracking configuration is a special data flow configuration
* (`DataFlow::Configuration`) that allows for flow through nodes that do not
* necessarily preserve values but are still relevant from a taint tracking
* perspective. (For example, string concatenation, where one of the operands
* is tainted.)
*
* To create a configuration, extend this class with a subclass whose
* characteristic predicate is a unique singleton string. For example, write
*
* ```ql
* class MyAnalysisConfiguration extends TaintTracking::Configuration {
* MyAnalysisConfiguration() { this = "MyAnalysisConfiguration" }
* // Override `isSource` and `isSink`.
* // Optionally override `isSanitizer`.
* // Optionally override `isSanitizerIn`.
* // Optionally override `isSanitizerOut`.
* // Optionally override `isSanitizerGuard`.
* // Optionally override `isAdditionalTaintStep`.
* }
* ```
*
* Then, to query whether there is flow between some `source` and `sink`,
* write
*
* ```ql
* exists(MyAnalysisConfiguration cfg | cfg.hasFlow(source, sink))
* ```
*
* Multiple configurations can coexist, but it is unsupported to depend on
* another `TaintTracking::Configuration` or a `DataFlow::Configuration` in the
* overridden predicates that define sources, sinks, or additional steps.
* Instead, the dependency should go to a `TaintTracking2::Configuration` or a
* `DataFlow2::Configuration`, `DataFlow3::Configuration`, etc.
*/
abstract class Configuration extends DataFlow::Configuration {
bindingset[this]
Configuration() { any() }
/**
* Holds if `source` is a relevant taint source.
*
* The smaller this predicate is, the faster `hasFlow()` will converge.
*/
// overridden to provide taint-tracking specific qldoc
override predicate isSource(DataFlow::Node source) { none() }
/**
* Holds if `source` is a relevant taint source with the given initial
* `state`.
*
* The smaller this predicate is, the faster `hasFlow()` will converge.
*/
// overridden to provide taint-tracking specific qldoc
override predicate isSource(DataFlow::Node source, DataFlow::FlowState state) { none() }
/**
* Holds if `sink` is a relevant taint sink
*
* The smaller this predicate is, the faster `hasFlow()` will converge.
*/
// overridden to provide taint-tracking specific qldoc
override predicate isSink(DataFlow::Node sink) { none() }
/**
* Holds if `sink` is a relevant taint sink accepting `state`.
*
* The smaller this predicate is, the faster `hasFlow()` will converge.
*/
// overridden to provide taint-tracking specific qldoc
override predicate isSink(DataFlow::Node sink, DataFlow::FlowState state) { none() }
/** Holds if the node `node` is a taint sanitizer. */
predicate isSanitizer(DataFlow::Node node) { none() }
final override predicate isBarrier(DataFlow::Node node) {
this.isSanitizer(node) or
defaultTaintSanitizer(node)
}
/**
* Holds if the node `node` is a taint sanitizer when the flow state is
* `state`.
*/
predicate isSanitizer(DataFlow::Node node, DataFlow::FlowState state) { none() }
final override predicate isBarrier(DataFlow::Node node, DataFlow::FlowState state) {
this.isSanitizer(node, state)
}
/** Holds if taint propagation into `node` is prohibited. */
predicate isSanitizerIn(DataFlow::Node node) { none() }
final override predicate isBarrierIn(DataFlow::Node node) { this.isSanitizerIn(node) }
/** Holds if taint propagation out of `node` is prohibited. */
predicate isSanitizerOut(DataFlow::Node node) { none() }
final override predicate isBarrierOut(DataFlow::Node node) { this.isSanitizerOut(node) }
/**
* DEPRECATED: Use `isSanitizer` and `BarrierGuard` module instead.
*
* Holds if taint propagation through nodes guarded by `guard` is prohibited.
*/
deprecated predicate isSanitizerGuard(DataFlow::BarrierGuard guard) { none() }
deprecated final override predicate isBarrierGuard(DataFlow::BarrierGuard guard) {
this.isSanitizerGuard(guard)
}
/**
* DEPRECATED: Use `isSanitizer` and `BarrierGuard` module instead.
*
* Holds if taint propagation through nodes guarded by `guard` is prohibited
* when the flow state is `state`.
*/
deprecated predicate isSanitizerGuard(DataFlow::BarrierGuard guard, DataFlow::FlowState state) {
none()
}
deprecated final override predicate isBarrierGuard(
DataFlow::BarrierGuard guard, DataFlow::FlowState state
) {
this.isSanitizerGuard(guard, state)
}
/**
* Holds if taint may propagate from `node1` to `node2` in addition to the normal data-flow and taint steps.
*/
predicate isAdditionalTaintStep(DataFlow::Node node1, DataFlow::Node node2) { none() }
final override predicate isAdditionalFlowStep(DataFlow::Node node1, DataFlow::Node node2) {
this.isAdditionalTaintStep(node1, node2) or
defaultAdditionalTaintStep(node1, node2)
}
/**
* Holds if taint may propagate from `node1` to `node2` in addition to the normal data-flow and taint steps.
* This step is only applicable in `state1` and updates the flow state to `state2`.
*/
predicate isAdditionalTaintStep(
DataFlow::Node node1, DataFlow::FlowState state1, DataFlow::Node node2,
DataFlow::FlowState state2
) {
none()
}
final override predicate isAdditionalFlowStep(
DataFlow::Node node1, DataFlow::FlowState state1, DataFlow::Node node2,
DataFlow::FlowState state2
) {
this.isAdditionalTaintStep(node1, state1, node2, state2)
}
override predicate allowImplicitRead(DataFlow::Node node, DataFlow::ContentSet c) {
(
this.isSink(node) or
this.isSink(node, _) or
this.isAdditionalTaintStep(node, _) or
this.isAdditionalTaintStep(node, _, _, _)
) and
defaultImplicitTaintRead(node, c)
}
/**
* Holds if taint may flow from `source` to `sink` for this configuration.
*/
// overridden to provide taint-tracking specific qldoc
override predicate hasFlow(DataFlow::Node source, DataFlow::Node sink) {
super.hasFlow(source, sink)
}
}

View File

@@ -1,6 +0,0 @@
import codeql.ruby.dataflow.internal.TaintTrackingPublic as Public
module Private {
import codeql.ruby.dataflow.internal.DataFlowImplForRegExp as DataFlow
import codeql.ruby.dataflow.internal.TaintTrackingPrivate
}

View File

@@ -3,7 +3,7 @@
private import codeql.ruby.AST
private import codeql.ruby.ApiGraphs
private import codeql.ruby.DataFlow
private import codeql.ruby.dataflow.FlowSummary
private import codeql.ruby.dataflow.FlowSummary as FlowSummary
private import codeql.ruby.dataflow.internal.DataFlowDispatch
private import codeql.ruby.controlflow.CfgNodes
private import codeql.ruby.Regexp as RE
@@ -107,6 +107,18 @@ module String {
preservesValue = false
}
/** A `String` callable with a flow summary. */
abstract class SummarizedCallable extends FlowSummary::SummarizedCallable {
bindingset[this]
SummarizedCallable() { any() }
}
abstract private class SimpleSummarizedCallable extends SummarizedCallable,
FlowSummary::SimpleSummarizedCallable {
bindingset[this]
SimpleSummarizedCallable() { any() }
}
private class NewSummary extends SummarizedCallable {
NewSummary() { this = "String.new" }

View File

@@ -1,10 +1,15 @@
private import codeql.ruby.Regexp
private import codeql.ruby.ast.Literal as Ast
private import codeql.ruby.AST as Ast
private import codeql.ruby.CFG
private import codeql.ruby.DataFlow
private import codeql.ruby.controlflow.CfgNodes
private import codeql.ruby.dataflow.internal.tainttrackingforregexp.TaintTrackingImpl
private import codeql.ruby.dataflow.internal.DataFlowImplForRegExp
private import codeql.ruby.typetracking.TypeTracker
private import codeql.ruby.ApiGraphs
private import codeql.ruby.dataflow.internal.DataFlowPrivate as DataFlowPrivate
private import codeql.ruby.dataflow.internal.FlowSummaryImpl as FlowSummaryImpl
private import codeql.ruby.dataflow.FlowSummary as FlowSummary
private import codeql.ruby.frameworks.core.String
class RegExpConfiguration extends Configuration {
RegExpConfiguration() { this = "RegExpConfiguration" }
@@ -20,7 +25,7 @@ class RegExpConfiguration extends Configuration {
override predicate isSink(DataFlow::Node sink) { sink instanceof RegExpInterpretation::Range }
override predicate isSanitizer(DataFlow::Node node) {
override predicate isBarrier(DataFlow::Node node) {
exists(DataFlow::CallNode mce | mce.getMethodName() = ["match", "match?"] |
// receiver of https://ruby-doc.org/core-2.4.0/String.html#method-i-match
node = mce.getReceiver() and
@@ -31,6 +36,24 @@ class RegExpConfiguration extends Configuration {
mce.getReceiver() = trackRegexpType()
)
}
override predicate isAdditionalFlowStep(DataFlow::Node nodeFrom, DataFlow::Node nodeTo) {
// include taint flow through `String` summaries,
FlowSummaryImpl::Private::Steps::summaryLocalStep(nodeFrom, nodeTo, false) and
nodeFrom.(DataFlowPrivate::SummaryNode).getSummarizedCallable() instanceof
String::SummarizedCallable
or
// string concatenations, and
exists(CfgNodes::ExprNodes::OperationCfgNode op |
op = nodeTo.asExpr() and
op.getAnOperand() = nodeFrom.asExpr() and
op.getExpr().(Ast::BinaryOperation).getOperator() = "+"
)
or
// string interpolations
nodeFrom.asExpr() =
nodeTo.asExpr().(CfgNodes::ExprNodes::StringlikeLiteralCfgNode).getAComponent()
}
}
private DataFlow::LocalSourceNode trackRegexpType(TypeTracker t) {