Merge pull request #8332 from hvitved/ruby/regexp-taint-flow

Ruby: Use taint tracking instead of type tracking to define `regExpSource`
This commit is contained in:
Arthur Baars
2022-03-18 18:24:02 +01:00
committed by GitHub
6 changed files with 5514 additions and 21 deletions

View File

@@ -27,7 +27,8 @@
"python/ql/lib/semmle/python/dataflow/new/internal/DataFlowImpl3.qll",
"python/ql/lib/semmle/python/dataflow/new/internal/DataFlowImpl4.qll",
"ruby/ql/lib/codeql/ruby/dataflow/internal/DataFlowImpl.qll",
"ruby/ql/lib/codeql/ruby/dataflow/internal/DataFlowImpl2.qll"
"ruby/ql/lib/codeql/ruby/dataflow/internal/DataFlowImpl2.qll",
"ruby/ql/lib/codeql/ruby/dataflow/internal/DataFlowImplForLibraries.qll"
],
"DataFlow Java/C++/C#/Python Common": [
"java/ql/lib/semmle/code/java/dataflow/internal/DataFlowImplCommon.qll",
@@ -54,7 +55,8 @@
"python/ql/lib/semmle/python/dataflow/new/internal/tainttracking2/TaintTrackingImpl.qll",
"python/ql/lib/semmle/python/dataflow/new/internal/tainttracking3/TaintTrackingImpl.qll",
"python/ql/lib/semmle/python/dataflow/new/internal/tainttracking4/TaintTrackingImpl.qll",
"ruby/ql/lib/codeql/ruby/dataflow/internal/tainttracking1/TaintTrackingImpl.qll"
"ruby/ql/lib/codeql/ruby/dataflow/internal/tainttracking1/TaintTrackingImpl.qll",
"ruby/ql/lib/codeql/ruby/dataflow/internal/tainttrackingforlibraries/TaintTrackingImpl.qll"
],
"DataFlow Java/C++/C#/Python Consistency checks": [
"java/ql/lib/semmle/code/java/dataflow/internal/DataFlowImplConsistency.qll",

View File

@@ -794,6 +794,20 @@ module ExprNodes {
final override VariableWriteAccess getExpr() { result = ExprCfgNode.super.getExpr() }
}
/** A control-flow node that wraps a `ConstantReadAccess` AST expression. */
class ConstantReadAccessCfgNode extends ExprCfgNode {
override ConstantReadAccess e;
final override ConstantReadAccess getExpr() { result = ExprCfgNode.super.getExpr() }
}
/** A control-flow node that wraps a `ConstantWriteAccess` AST expression. */
class ConstantWriteAccessCfgNode extends ExprCfgNode {
override ConstantWriteAccess e;
final override ConstantWriteAccess getExpr() { result = ExprCfgNode.super.getExpr() }
}
/** A control-flow node that wraps a `InstanceVariableWriteAccess` AST expression. */
class InstanceVariableWriteAccessCfgNode extends ExprCfgNode {
override InstanceVariableWriteAccess e;

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,198 @@
/**
* Provides an implementation of global (interprocedural) taint tracking.
* This file re-exports the local (intraprocedural) taint-tracking analysis
* from `TaintTrackingParameter::Public` and adds a global analysis, mainly
* exposed through the `Configuration` class. For some languages, this file
* exists in several identical copies, allowing queries to use multiple
* `Configuration` classes that depend on each other without introducing
* mutual recursion among those configurations.
*/
import TaintTrackingParameter::Public
private import TaintTrackingParameter::Private
/**
* A configuration of interprocedural taint tracking analysis. This defines
* sources, sinks, and any other configurable aspect of the analysis. Each
* use of the taint tracking library must define its own unique extension of
* this abstract class.
*
* A taint-tracking configuration is a special data flow configuration
* (`DataFlow::Configuration`) that allows for flow through nodes that do not
* necessarily preserve values but are still relevant from a taint tracking
* perspective. (For example, string concatenation, where one of the operands
* is tainted.)
*
* To create a configuration, extend this class with a subclass whose
* characteristic predicate is a unique singleton string. For example, write
*
* ```ql
* class MyAnalysisConfiguration extends TaintTracking::Configuration {
* MyAnalysisConfiguration() { this = "MyAnalysisConfiguration" }
* // Override `isSource` and `isSink`.
* // Optionally override `isSanitizer`.
* // Optionally override `isSanitizerIn`.
* // Optionally override `isSanitizerOut`.
* // Optionally override `isSanitizerGuard`.
* // Optionally override `isAdditionalTaintStep`.
* }
* ```
*
* Then, to query whether there is flow between some `source` and `sink`,
* write
*
* ```ql
* exists(MyAnalysisConfiguration cfg | cfg.hasFlow(source, sink))
* ```
*
* Multiple configurations can coexist, but it is unsupported to depend on
* another `TaintTracking::Configuration` or a `DataFlow::Configuration` in the
* overridden predicates that define sources, sinks, or additional steps.
* Instead, the dependency should go to a `TaintTracking2::Configuration` or a
* `DataFlow2::Configuration`, `DataFlow3::Configuration`, etc.
*/
abstract class Configuration extends DataFlow::Configuration {
bindingset[this]
Configuration() { any() }
/**
* Holds if `source` is a relevant taint source.
*
* The smaller this predicate is, the faster `hasFlow()` will converge.
*/
// overridden to provide taint-tracking specific qldoc
override predicate isSource(DataFlow::Node source) { none() }
/**
* Holds if `source` is a relevant taint source with the given initial
* `state`.
*
* The smaller this predicate is, the faster `hasFlow()` will converge.
*/
// overridden to provide taint-tracking specific qldoc
override predicate isSource(DataFlow::Node source, DataFlow::FlowState state) { none() }
/**
* Holds if `sink` is a relevant taint sink
*
* The smaller this predicate is, the faster `hasFlow()` will converge.
*/
// overridden to provide taint-tracking specific qldoc
override predicate isSink(DataFlow::Node sink) { none() }
/**
* Holds if `sink` is a relevant taint sink accepting `state`.
*
* The smaller this predicate is, the faster `hasFlow()` will converge.
*/
// overridden to provide taint-tracking specific qldoc
override predicate isSink(DataFlow::Node sink, DataFlow::FlowState state) { none() }
/** Holds if the node `node` is a taint sanitizer. */
predicate isSanitizer(DataFlow::Node node) { none() }
final override predicate isBarrier(DataFlow::Node node) {
this.isSanitizer(node) or
defaultTaintSanitizer(node)
}
/**
* Holds if the node `node` is a taint sanitizer when the flow state is
* `state`.
*/
predicate isSanitizer(DataFlow::Node node, DataFlow::FlowState state) { none() }
final override predicate isBarrier(DataFlow::Node node, DataFlow::FlowState state) {
this.isSanitizer(node, state)
}
/** Holds if taint propagation into `node` is prohibited. */
predicate isSanitizerIn(DataFlow::Node node) { none() }
/**
* Holds if taint propagation into `node` is prohibited when the flow state is
* `state`.
*/
predicate isSanitizerIn(DataFlow::Node node, DataFlow::FlowState state) { none() }
final override predicate isBarrierIn(DataFlow::Node node, DataFlow::FlowState state) {
this.isSanitizerIn(node, state)
}
final override predicate isBarrierIn(DataFlow::Node node) { this.isSanitizerIn(node) }
/** Holds if taint propagation out of `node` is prohibited. */
predicate isSanitizerOut(DataFlow::Node node) { none() }
final override predicate isBarrierOut(DataFlow::Node node) { this.isSanitizerOut(node) }
/**
* Holds if taint propagation out of `node` is prohibited when the flow state is
* `state`.
*/
predicate isSanitizerOut(DataFlow::Node node, DataFlow::FlowState state) { none() }
final override predicate isBarrierOut(DataFlow::Node node, DataFlow::FlowState state) {
this.isSanitizerOut(node, state)
}
/** Holds if taint propagation through nodes guarded by `guard` is prohibited. */
predicate isSanitizerGuard(DataFlow::BarrierGuard guard) { none() }
final override predicate isBarrierGuard(DataFlow::BarrierGuard guard) {
this.isSanitizerGuard(guard) or defaultTaintSanitizerGuard(guard)
}
/**
* Holds if taint propagation through nodes guarded by `guard` is prohibited
* when the flow state is `state`.
*/
predicate isSanitizerGuard(DataFlow::BarrierGuard guard, DataFlow::FlowState state) { none() }
final override predicate isBarrierGuard(DataFlow::BarrierGuard guard, DataFlow::FlowState state) {
this.isSanitizerGuard(guard, state)
}
/**
* Holds if the additional taint propagation step from `node1` to `node2`
* must be taken into account in the analysis.
*/
predicate isAdditionalTaintStep(DataFlow::Node node1, DataFlow::Node node2) { none() }
final override predicate isAdditionalFlowStep(DataFlow::Node node1, DataFlow::Node node2) {
this.isAdditionalTaintStep(node1, node2) or
defaultAdditionalTaintStep(node1, node2)
}
/**
* Holds if the additional taint propagation step from `node1` to `node2`
* must be taken into account in the analysis. This step is only applicable
* in `state1` and updates the flow state to `state2`.
*/
predicate isAdditionalTaintStep(
DataFlow::Node node1, DataFlow::FlowState state1, DataFlow::Node node2,
DataFlow::FlowState state2
) {
none()
}
final override predicate isAdditionalFlowStep(
DataFlow::Node node1, DataFlow::FlowState state1, DataFlow::Node node2,
DataFlow::FlowState state2
) {
this.isAdditionalTaintStep(node1, state1, node2, state2)
}
override predicate allowImplicitRead(DataFlow::Node node, DataFlow::Content c) {
(this.isSink(node) or this.isAdditionalTaintStep(node, _)) and
defaultImplicitTaintRead(node, c)
}
/**
* Holds if taint may flow from `source` to `sink` for this configuration.
*/
// overridden to provide taint-tracking specific qldoc
override predicate hasFlow(DataFlow::Node source, DataFlow::Node sink) {
super.hasFlow(source, sink)
}
}

View File

@@ -0,0 +1,6 @@
import codeql.ruby.dataflow.internal.TaintTrackingPublic as Public
module Private {
import codeql.ruby.dataflow.internal.DataFlowImplForLibraries as DataFlow
import codeql.ruby.dataflow.internal.TaintTrackingPrivate
}

View File

@@ -8,9 +8,9 @@
private import codeql.ruby.ast.Literal as AST
private import codeql.Locations
private import codeql.ruby.DataFlow
private import codeql.ruby.TaintTracking
private import codeql.ruby.typetracking.TypeTracker
private import codeql.ruby.controlflow.CfgNodes
private import codeql.ruby.ApiGraphs
private import codeql.ruby.dataflow.internal.tainttrackingforlibraries.TaintTrackingImpl
/**
* A `StringlikeLiteral` containing a regular expression term, that is, either
@@ -993,25 +993,35 @@ private predicate isInterpretedAsRegExp(DataFlow::Node source) {
// The argument of a call that coerces the argument to a regular expression.
exists(DataFlow::CallNode mce |
mce.getMethodName() = ["match", "match?"] and
source = mce.getArgument(0)
source = mce.getArgument(0) and
// exclude https://ruby-doc.org/core-2.4.0/Regexp.html#method-i-match
not mce.getReceiver().asExpr().getExpr() instanceof AST::RegExpLiteral
)
}
/**
* Gets a node whose value may flow (inter-procedurally) to `re`, where it is interpreted
* as a part of a regular expression.
*/
private DataFlow::Node regExpSource(DataFlow::Node re, TypeBackTracker t) {
t.start() and
re = result and
isInterpretedAsRegExp(result)
or
exists(TypeBackTracker t2, DataFlow::Node succ | succ = regExpSource(re, t2) |
t2 = t.smallstep(result, succ)
or
TaintTracking::localTaintStep(result, succ) and
t = t2
)
private class RegExpConfiguration extends Configuration {
RegExpConfiguration() { this = "RegExpConfiguration" }
override predicate isSource(DataFlow::Node source) {
source.asExpr() =
any(ExprCfgNode e |
e.getConstantValue().isString(_) and
not e instanceof ExprNodes::VariableReadAccessCfgNode and
not e instanceof ExprNodes::ConstantReadAccessCfgNode
)
}
override predicate isSink(DataFlow::Node sink) { isInterpretedAsRegExp(sink) }
override predicate isSanitizer(DataFlow::Node node) {
// stop flow if `node` is receiver of
// https://ruby-doc.org/core-2.4.0/String.html#method-i-match
exists(DataFlow::CallNode mce |
mce.getMethodName() = ["match", "match?"] and
node = mce.getReceiver() and
mce.getArgument(0).asExpr().getExpr() instanceof AST::RegExpLiteral
)
}
}
/**
@@ -1019,4 +1029,6 @@ private DataFlow::Node regExpSource(DataFlow::Node re, TypeBackTracker t) {
* as a part of a regular expression.
*/
cached
DataFlow::Node regExpSource(DataFlow::Node re) { result = regExpSource(re, TypeBackTracker::end()) }
DataFlow::Node regExpSource(DataFlow::Node re) {
exists(RegExpConfiguration c | c.hasFlow(result, re))
}