Ruby: Add alert provenance plumbing.

This commit is contained in:
Anders Schack-Mulligen
2024-02-06 15:23:26 +01:00
parent 82e6fbbd22
commit 6991f5452f
9 changed files with 167 additions and 96 deletions

View File

@@ -32,9 +32,22 @@ abstract class SummarizedCallable extends LibraryCallable, Impl::Public::Summari
* DEPRECATED: Use `propagatesFlow` instead.
*/
deprecated predicate propagatesFlowExt(string input, string output, boolean preservesValue) {
this.propagatesFlow(input, output, preservesValue)
this.propagatesFlow(input, output, preservesValue, _)
}
override predicate propagatesFlow(
string input, string output, boolean preservesValue, string model
) {
this.propagatesFlow(input, output, preservesValue) and model = ""
}
/**
* Holds if data may flow from `input` to `output` through this callable.
*
* `preservesValue` indicates whether this is a value-preserving step or a taint-step.
*/
predicate propagatesFlow(string input, string output, boolean preservesValue) { none() }
/**
* Gets the synthesized parameter that results from an input specification
* that starts with `Argument[s]` for this library callable.
@@ -100,7 +113,9 @@ private module LibraryCallbackSummaries {
libraryCallHasLambdaArg(result.getAControlFlowNode(), _)
}
override predicate propagatesFlow(string input, string output, boolean preservesValue) {
override predicate propagatesFlow(
string input, string output, boolean preservesValue, string model
) {
(
input = "Argument[block]" and
output = "Argument[block].Parameter[lambda-self]"
@@ -111,7 +126,8 @@ private module LibraryCallbackSummaries {
output = "Argument[" + i + "].Parameter[lambda-self]"
)
) and
preservesValue = true
preservesValue = true and
model = "heuristic-callback"
}
}
}

View File

@@ -244,10 +244,11 @@ module LocalFlow {
}
predicate flowSummaryLocalStep(
FlowSummaryNode nodeFrom, FlowSummaryNode nodeTo, FlowSummaryImpl::Public::SummarizedCallable c
FlowSummaryNode nodeFrom, FlowSummaryNode nodeTo, FlowSummaryImpl::Public::SummarizedCallable c,
string model
) {
FlowSummaryImpl::Private::Steps::summaryLocalStep(nodeFrom.getSummaryNode(),
nodeTo.getSummaryNode(), true) and
nodeTo.getSummaryNode(), true, model) and
c = nodeFrom.getSummarizedCallable()
}
@@ -271,7 +272,7 @@ module LocalFlow {
node1 =
unique(FlowSummaryNode n1 |
FlowSummaryImpl::Private::Steps::summaryLocalStep(n1.getSummaryNode(),
node2.(FlowSummaryNode).getSummaryNode(), true)
node2.(FlowSummaryNode).getSummaryNode(), true, _)
)
}
}
@@ -606,25 +607,28 @@ private module Cached {
* data flow.
*/
cached
predicate simpleLocalFlowStep(Node nodeFrom, Node nodeTo) {
LocalFlow::localFlowStepCommon(nodeFrom, nodeTo)
or
exists(SsaImpl::DefinitionExt def |
// captured variables are handled by the shared `VariableCapture` library
not def instanceof VariableCapture::CapturedSsaDefinitionExt
|
LocalFlow::localSsaFlowStep(def, nodeFrom, nodeTo)
predicate simpleLocalFlowStep(Node nodeFrom, Node nodeTo, string model) {
(
LocalFlow::localFlowStepCommon(nodeFrom, nodeTo)
or
LocalFlow::localSsaFlowStepUseUse(def, nodeFrom, nodeTo) and
not FlowSummaryImpl::Private::Steps::prohibitsUseUseFlow(nodeFrom, _)
exists(SsaImpl::DefinitionExt def |
// captured variables are handled by the shared `VariableCapture` library
not def instanceof VariableCapture::CapturedSsaDefinitionExt
|
LocalFlow::localSsaFlowStep(def, nodeFrom, nodeTo)
or
LocalFlow::localSsaFlowStepUseUse(def, nodeFrom, nodeTo) and
not FlowSummaryImpl::Private::Steps::prohibitsUseUseFlow(nodeFrom, _)
or
LocalFlow::localFlowSsaInputFromRead(def, nodeFrom, nodeTo) and
not FlowSummaryImpl::Private::Steps::prohibitsUseUseFlow(nodeFrom, _)
)
or
LocalFlow::localFlowSsaInputFromRead(def, nodeFrom, nodeTo) and
not FlowSummaryImpl::Private::Steps::prohibitsUseUseFlow(nodeFrom, _)
)
VariableCapture::valueStep(nodeFrom, nodeTo)
) and
model = ""
or
LocalFlow::flowSummaryLocalStep(nodeFrom, nodeTo, _)
or
VariableCapture::valueStep(nodeFrom, nodeTo)
LocalFlow::flowSummaryLocalStep(nodeFrom, nodeTo, _, model)
}
/** This is the local flow predicate that is exposed. */
@@ -656,7 +660,8 @@ private module Cached {
or
VariableCapture::flowInsensitiveStep(nodeFrom, nodeTo)
or
LocalFlow::flowSummaryLocalStep(nodeFrom, nodeTo, any(LibraryCallableToIncludeInTypeTracking c))
LocalFlow::flowSummaryLocalStep(nodeFrom, nodeTo, any(LibraryCallableToIncludeInTypeTracking c),
_)
}
/** Holds if `n` wraps an SSA definition without ingoing flow. */
@@ -752,7 +757,7 @@ private module Cached {
// external model data. This, unfortunately, does not included any field names used
// in models defined in QL code.
exists(string input, string output |
ModelOutput::relevantSummaryModel(_, _, input, output, _)
ModelOutput::relevantSummaryModel(_, _, input, output, _, _)
|
name = [input, output].regexpFind("(?<=(^|\\.)Field\\[)[^\\]]+(?=\\])", _, _).trim()
)
@@ -2241,6 +2246,14 @@ predicate lambdaCall(DataFlowCall call, LambdaCallKind kind, Node receiver) {
/** Extra data-flow steps needed for lambda flow analysis. */
predicate additionalLambdaFlowStep(Node nodeFrom, Node nodeTo, boolean preservesValue) { none() }
predicate knownSourceModel(Node source, string model) {
source = ModelOutput::getASourceNode(_, model).asSource()
}
predicate knownSinkModel(Node sink, string model) {
sink = ModelOutput::getASinkNode(_, model).asSink()
}
/**
* Holds if flow is allowed to pass from parameter `p` and back to itself as a
* side-effect, resulting in a summary from `p` to itself.

View File

@@ -77,38 +77,41 @@ private module Cached {
* in all global taint flow configurations.
*/
cached
predicate defaultAdditionalTaintStep(DataFlow::Node nodeFrom, DataFlow::Node nodeTo) {
// value of `case` expression into variables in patterns
exists(
CfgNodes::ExprNodes::CaseExprCfgNode case, CfgNodes::ExprCfgNode value,
CfgNodes::ExprNodes::InClauseCfgNode clause, Ssa::Definition def
|
nodeFrom.asExpr() = value and
value = case.getValue() and
clause = case.getBranch(_) and
def = nodeTo.(SsaDefinitionExtNode).getDefinitionExt() and
def.getControlFlowNode() = variablesInPattern(clause.getPattern()) and
not LocalFlow::ssaDefAssigns(def, value)
)
or
// operation involving `nodeFrom`
exists(CfgNodes::ExprNodes::OperationCfgNode op |
op = nodeTo.asExpr() and
op.getAnOperand() = nodeFrom.asExpr() and
not op.getExpr() =
any(Expr e |
// included in normal data-flow
e instanceof AssignExpr or
e instanceof BinaryLogicalOperation or
// has flow summary
e instanceof SplatExpr
)
)
predicate defaultAdditionalTaintStep(DataFlow::Node nodeFrom, DataFlow::Node nodeTo, string model) {
(
// value of `case` expression into variables in patterns
exists(
CfgNodes::ExprNodes::CaseExprCfgNode case, CfgNodes::ExprCfgNode value,
CfgNodes::ExprNodes::InClauseCfgNode clause, Ssa::Definition def
|
nodeFrom.asExpr() = value and
value = case.getValue() and
clause = case.getBranch(_) and
def = nodeTo.(SsaDefinitionExtNode).getDefinitionExt() and
def.getControlFlowNode() = variablesInPattern(clause.getPattern()) and
not LocalFlow::ssaDefAssigns(def, value)
)
or
// operation involving `nodeFrom`
exists(CfgNodes::ExprNodes::OperationCfgNode op |
op = nodeTo.asExpr() and
op.getAnOperand() = nodeFrom.asExpr() and
not op.getExpr() =
any(Expr e |
// included in normal data-flow
e instanceof AssignExpr or
e instanceof BinaryLogicalOperation or
// has flow summary
e instanceof SplatExpr
)
)
) and
model = ""
or
FlowSummaryImpl::Private::Steps::summaryLocalStep(nodeFrom.(FlowSummaryNode).getSummaryNode(),
nodeTo.(FlowSummaryNode).getSummaryNode(), false)
nodeTo.(FlowSummaryNode).getSummaryNode(), false, model)
or
any(FlowSteps::AdditionalTaintStep s).step(nodeFrom, nodeTo)
any(FlowSteps::AdditionalTaintStep s).step(nodeFrom, nodeTo) and model = "AdditionalTaintStep"
or
// Although flow through collections is modeled precisely using stores/reads, we still
// allow flow out of a _tainted_ collection. This is needed in order to support taint-
@@ -119,7 +122,8 @@ private module Cached {
c.isKnownOrUnknownElement(_)
or
c.isAnyElement()
)
) and
model = ""
}
cached
@@ -136,7 +140,7 @@ private module Cached {
cached
predicate localTaintStepCached(DataFlow::Node nodeFrom, DataFlow::Node nodeTo) {
DataFlow::localFlowStep(nodeFrom, nodeTo) or
defaultAdditionalTaintStep(nodeFrom, nodeTo) or
defaultAdditionalTaintStep(nodeFrom, nodeTo, _) or
// Simple flow through library code is included in the exposed local
// step relation, even though flow is technically inter-procedural
summaryThroughStepTaint(nodeFrom, nodeTo, _)

View File

@@ -37,7 +37,7 @@ private class SummarizedCallableFromModel extends SummarizedCallable {
string path;
SummarizedCallableFromModel() {
ModelOutput::relevantSummaryModel(type, path, _, _, _) and
ModelOutput::relevantSummaryModel(type, path, _, _, _, _) and
this = type + ";" + path
}
@@ -48,8 +48,10 @@ private class SummarizedCallableFromModel extends SummarizedCallable {
)
}
override predicate propagatesFlow(string input, string output, boolean preservesValue) {
exists(string kind | ModelOutput::relevantSummaryModel(type, path, input, output, kind) |
override predicate propagatesFlow(
string input, string output, boolean preservesValue, string model
) {
exists(string kind | ModelOutput::relevantSummaryModel(type, path, input, output, kind, model) |
kind = "value" and
preservesValue = true
or

View File

@@ -229,41 +229,55 @@ private predicate typeModel(string row) { any(TypeModelCsv s).row(inversePad(row
private predicate typeVariableModel(string row) { any(TypeVariableModelCsv s).row(inversePad(row)) }
/** Holds if a source model exists for the given parameters. */
predicate sourceModel(string type, string path, string kind) {
predicate sourceModel(string type, string path, string kind, string model) {
exists(string row |
sourceModel(row) and
row.splitAt(";", 0) = type and
row.splitAt(";", 1) = path and
row.splitAt(";", 2) = kind
row.splitAt(";", 2) = kind and
model = "SourceModelCsv"
)
or
Extensions::sourceModel(type, path, kind)
exists(QlBuiltins::ExtensionId madId |
Extensions::sourceModel(type, path, kind, madId) and
model = "MaD:" + madId.toString()
)
}
/** Holds if a sink model exists for the given parameters. */
private predicate sinkModel(string type, string path, string kind) {
private predicate sinkModel(string type, string path, string kind, string model) {
exists(string row |
sinkModel(row) and
row.splitAt(";", 0) = type and
row.splitAt(";", 1) = path and
row.splitAt(";", 2) = kind
row.splitAt(";", 2) = kind and
model = "SinkModelCsv"
)
or
Extensions::sinkModel(type, path, kind)
exists(QlBuiltins::ExtensionId madId |
Extensions::sinkModel(type, path, kind, madId) and
model = "MaD:" + madId.toString()
)
}
/** Holds if a summary model `row` exists for the given parameters. */
private predicate summaryModel(string type, string path, string input, string output, string kind) {
private predicate summaryModel(
string type, string path, string input, string output, string kind, string model
) {
exists(string row |
summaryModel(row) and
row.splitAt(";", 0) = type and
row.splitAt(";", 1) = path and
row.splitAt(";", 2) = input and
row.splitAt(";", 3) = output and
row.splitAt(";", 4) = kind
row.splitAt(";", 4) = kind and
model = "SummaryModelCsv"
)
or
Extensions::summaryModel(type, path, input, output, kind)
exists(QlBuiltins::ExtensionId madId |
Extensions::summaryModel(type, path, input, output, kind, madId) and
model = "MaD:" + madId.toString()
)
}
/** Holds if a type model exists for the given parameters. */
@@ -294,9 +308,9 @@ private predicate typeVariableModel(string name, string path) {
*/
predicate isRelevantType(string type) {
(
sourceModel(type, _, _) or
sinkModel(type, _, _) or
summaryModel(type, _, _, _, _) or
sourceModel(type, _, _, _) or
sinkModel(type, _, _, _) or
summaryModel(type, _, _, _, _, _) or
typeModel(_, type, _)
) and
(
@@ -319,9 +333,9 @@ pragma[nomagic]
predicate isRelevantFullPath(string type, string path) {
isRelevantType(type) and
(
sourceModel(type, path, _) or
sinkModel(type, path, _) or
summaryModel(type, path, _, _, _) or
sourceModel(type, path, _, _) or
sinkModel(type, path, _, _) or
summaryModel(type, path, _, _, _, _) or
typeModel(_, type, path)
)
}
@@ -331,8 +345,8 @@ private predicate accessPathRange(string s) {
isRelevantFullPath(_, s)
or
exists(string type | isRelevantType(type) |
summaryModel(type, _, s, _, _) or
summaryModel(type, _, _, s, _)
summaryModel(type, _, s, _, _, _) or
summaryModel(type, _, _, s, _, _)
)
or
typeVariableModel(_, s)
@@ -543,7 +557,7 @@ private API::Node getNodeFromPath(string type, AccessPath path) {
pragma[nomagic]
private predicate typeStepModel(string type, AccessPath basePath, AccessPath output) {
summaryModel(type, basePath, "", output, "type")
summaryModel(type, basePath, "", output, "type", _)
}
pragma[nomagic]
@@ -621,9 +635,9 @@ module ModelOutput {
* Holds if a CSV source model contributed `source` with the given `kind`.
*/
cached
API::Node getASourceNode(string kind) {
API::Node getASourceNode(string kind, string model) {
exists(string type, string path |
sourceModel(type, path, kind) and
sourceModel(type, path, kind, model) and
result = getNodeFromPath(type, path)
)
}
@@ -632,9 +646,9 @@ module ModelOutput {
* Holds if a CSV sink model contributed `sink` with the given `kind`.
*/
cached
API::Node getASinkNode(string kind) {
API::Node getASinkNode(string kind, string model) {
exists(string type, string path |
sinkModel(type, path, kind) and
sinkModel(type, path, kind, model) and
result = getNodeFromPath(type, path)
)
}
@@ -644,10 +658,10 @@ module ModelOutput {
*/
cached
predicate relevantSummaryModel(
string type, string path, string input, string output, string kind
string type, string path, string input, string output, string kind, string model
) {
isRelevantType(type) and
summaryModel(type, path, input, output, kind)
summaryModel(type, path, input, output, kind, model)
}
/**
@@ -655,7 +669,7 @@ module ModelOutput {
*/
cached
predicate resolvedSummaryBase(string type, string path, Specific::InvokeNode baseNode) {
summaryModel(type, path, _, _, _) and
summaryModel(type, path, _, _, _, _) and
baseNode = getInvocationFromPath(type, path)
}
@@ -664,7 +678,7 @@ module ModelOutput {
*/
cached
predicate resolvedSummaryRefBase(string type, string path, API::Node baseNode) {
summaryModel(type, path, _, _, _) and
summaryModel(type, path, _, _, _, _) and
baseNode = getNodeFromPath(type, path)
}
@@ -680,12 +694,22 @@ module ModelOutput {
import Specific::ModelOutputSpecific
private import codeql.mad.ModelValidation as SharedModelVal
/**
* Holds if a CSV source model contributed `source` with the given `kind`.
*/
API::Node getASourceNode(string kind) { result = getASourceNode(kind, _) }
/**
* Holds if a CSV sink model contributed `sink` with the given `kind`.
*/
API::Node getASinkNode(string kind) { result = getASinkNode(kind, _) }
private module KindValConfig implements SharedModelVal::KindValidationConfigSig {
predicate summaryKind(string kind) { summaryModel(_, _, _, _, kind) }
predicate summaryKind(string kind) { summaryModel(_, _, _, _, kind, _) }
predicate sinkKind(string kind) { sinkModel(_, _, kind) }
predicate sinkKind(string kind) { sinkModel(_, _, kind, _) }
predicate sourceKind(string kind) { sourceModel(_, _, kind) }
predicate sourceKind(string kind) { sourceModel(_, _, kind, _) }
}
private module KindVal = SharedModelVal::KindValidation<KindValConfig>;

View File

@@ -8,13 +8,15 @@
*
* The kind `remote` represents a general remote flow source.
*/
extensible predicate sourceModel(string type, string path, string kind);
extensible predicate sourceModel(
string type, string path, string kind, QlBuiltins::ExtensionId madId
);
/**
* Holds if the value at `(type, path)` should be seen as a sink
* of the given `kind`.
*/
extensible predicate sinkModel(string type, string path, string kind);
extensible predicate sinkModel(string type, string path, string kind, QlBuiltins::ExtensionId madId);
/**
* Holds if in calls to `(type, path)`, the value referred to by `input`
@@ -23,7 +25,9 @@ extensible predicate sinkModel(string type, string path, string kind);
* `kind` should be either `value` or `taint`, for value-preserving or taint-preserving steps,
* respectively.
*/
extensible predicate summaryModel(string type, string path, string input, string output, string kind);
extensible predicate summaryModel(
string type, string path, string input, string output, string kind, QlBuiltins::ExtensionId madId
);
/**
* Holds if calls to `(type, path)` should be considered neutral. The meaning of this depends on the `kind`.

View File

@@ -38,7 +38,7 @@ deprecated class Configuration extends DataFlow::Configuration {
DataFlow::Node nodeFrom, DataFlow::FlowState stateFrom, DataFlow::Node nodeTo,
DataFlow::FlowState stateTo
) {
defaultAdditionalTaintStep(nodeFrom, nodeTo) and
defaultAdditionalTaintStep(nodeFrom, nodeTo, _) and
// This is a taint step, so the flow state becomes `taint`.
stateFrom = [FlowState::data(), FlowState::taint()] and
stateTo = FlowState::taint()
@@ -57,7 +57,7 @@ private module Config implements DataFlow::StateConfigSig {
predicate isAdditionalFlowStep(
DataFlow::Node nodeFrom, FlowState stateFrom, DataFlow::Node nodeTo, FlowState stateTo
) {
defaultAdditionalTaintStep(nodeFrom, nodeTo) and
defaultAdditionalTaintStep(nodeFrom, nodeTo, _) and // TODO: propagate provenance
// This is a taint step, so the flow state becomes `taint`.
(
stateFrom = FlowState::Taint()

View File

@@ -217,7 +217,15 @@ private module SummaryTypeTrackerInput implements SummaryTypeTracker::Input {
predicate return = FlowSummaryImpl::Private::SummaryComponent::return/0;
// Callables
class SummarizedCallable = FlowSummaryImpl::Private::SummarizedCallableImpl;
class SummarizedCallable instanceof FlowSummaryImpl::Private::SummarizedCallableImpl {
string toString() { result = super.toString() }
predicate propagatesFlow(
SummaryComponentStack input, SummaryComponentStack output, boolean preservesValue
) {
super.propagatesFlow(input, output, preservesValue, _)
}
}
// Relating nodes to summaries
Node argumentOf(Node call, SummaryComponent arg, boolean isPostUpdate) {

View File

@@ -179,7 +179,7 @@ class SinkCallable extends DataFlow::MethodNode {
exists(string type, string path, string method |
method = path.regexpCapture("(Method\\[[^\\]]+\\]).*", 1) and
Util::pathToMethod(this, type, method) and
sinkModel(type, path, _)
sinkModel(type, path, _, _)
)
}
}
@@ -192,7 +192,7 @@ class SourceCallable extends DataFlow::CallableNode {
exists(string type, string path, string method |
method = path.regexpCapture("(Method\\[[^\\]]+\\]).*", 1) and
Util::pathToMethod(this, type, method) and
sourceModel(type, path, _)
sourceModel(type, path, _, _)
)
}
}
@@ -204,7 +204,7 @@ class SummaryCallable extends DataFlow::CallableNode {
SummaryCallable() {
exists(string type, string path |
Util::pathToMethod(this, type, path) and
summaryModel(type, path, _, _, _)
summaryModel(type, path, _, _, _, _)
)
}
}