Python: Add alert provenance plumbing.

This commit is contained in:
Anders Schack-Mulligen
2024-02-07 12:52:34 +01:00
parent f202661912
commit a8fc100108
13 changed files with 142 additions and 75 deletions

View File

@@ -31,8 +31,21 @@ abstract class SummarizedCallable extends LibraryCallable, Impl::Public::Summari
* DEPRECATED: Use `propagatesFlow` instead.
*/
deprecated predicate propagatesFlowExt(string input, string output, boolean preservesValue) {
this.propagatesFlow(input, output, preservesValue)
this.propagatesFlow(input, output, preservesValue, _)
}
override predicate propagatesFlow(
string input, string output, boolean preservesValue, string model
) {
this.propagatesFlow(input, output, preservesValue) and model = this
}
/**
* Holds if data may flow from `input` to `output` through this callable.
*
* `preservesValue` indicates whether this is a value-preserving step or a taint-step.
*/
predicate propagatesFlow(string input, string output, boolean preservesValue) { none() }
}
deprecated class RequiredSummaryComponentStack = Impl::Private::RequiredSummaryComponentStack;

View File

@@ -3,6 +3,7 @@ private import DataFlowPublic
private import semmle.python.essa.SsaCompute
private import semmle.python.dataflow.new.internal.ImportResolution
private import FlowSummaryImpl as FlowSummaryImpl
private import semmle.python.frameworks.data.ModelsAsData
// Since we allow extra data-flow steps from modeled frameworks, we import these
// up-front, to ensure these are included. This provides a more seamless experience from
// a user point of view, since they don't need to know they need to import a specific
@@ -471,12 +472,12 @@ import StepRelationTransformations
*
* It includes flow steps from flow summaries.
*/
predicate simpleLocalFlowStep(Node nodeFrom, Node nodeTo) {
simpleLocalFlowStepForTypetracking(nodeFrom, nodeTo)
predicate simpleLocalFlowStep(Node nodeFrom, Node nodeTo, string model) {
simpleLocalFlowStepForTypetracking(nodeFrom, nodeTo) and model = ""
or
summaryLocalStep(nodeFrom, nodeTo)
summaryLocalStep(nodeFrom, nodeTo, model)
or
variableCaptureLocalFlowStep(nodeFrom, nodeTo)
variableCaptureLocalFlowStep(nodeFrom, nodeTo) and model = ""
}
/**
@@ -490,9 +491,9 @@ predicate simpleLocalFlowStepForTypetracking(Node nodeFrom, Node nodeTo) {
LocalFlow::localFlowStep(nodeFrom, nodeTo)
}
private predicate summaryLocalStep(Node nodeFrom, Node nodeTo) {
private predicate summaryLocalStep(Node nodeFrom, Node nodeTo, string model) {
FlowSummaryImpl::Private::Steps::summaryLocalStep(nodeFrom.(FlowSummaryNode).getSummaryNode(),
nodeTo.(FlowSummaryNode).getSummaryNode(), true)
nodeTo.(FlowSummaryNode).getSummaryNode(), true, model)
}
predicate variableCaptureLocalFlowStep(Node nodeFrom, Node nodeTo) {
@@ -1078,6 +1079,14 @@ predicate lambdaCall(DataFlowCall call, LambdaCallKind kind, Node receiver) {
/** Extra data-flow steps needed for lambda flow analysis. */
predicate additionalLambdaFlowStep(Node nodeFrom, Node nodeTo, boolean preservesValue) { none() }
predicate knownSourceModel(Node source, string model) {
source = ModelOutput::getASourceNode(_, model).asSource()
}
predicate knownSinkModel(Node sink, string model) {
sink = ModelOutput::getASinkNode(_, model).asSink()
}
/**
* Holds if flow is allowed to pass from parameter `p` and back to itself as a
* side-effect, resulting in a summary from `p` to itself.

View File

@@ -642,7 +642,9 @@ newtype TContent =
or
//
// 2) summaries in data-extension files
exists(string input, string output | ModelOutput::relevantSummaryModel(_, _, input, output, _) |
exists(string input, string output |
ModelOutput::relevantSummaryModel(_, _, input, output, _, _)
|
attr = [input, output].regexpFind("(?<=(^|\\.)Attribute\\[)[^\\]]+(?=\\])", _, _).trim()
)
} or

View File

@@ -12,7 +12,7 @@ private import FlowSummaryImpl as FlowSummaryImpl
* (intra-procedural) step.
*/
predicate localFlowStep(Node nodeFrom, Node nodeTo) {
simpleLocalFlowStep(nodeFrom, nodeTo)
simpleLocalFlowStep(nodeFrom, nodeTo, _)
or
// Simple flow through library code is included in the exposed local
// step relation, even though flow is technically inter-procedural.

View File

@@ -242,7 +242,7 @@ private module Cached {
*/
pragma[nomagic]
private predicate localSourceFlowStep(Node nodeFrom, Node nodeTo) {
simpleLocalFlowStep(nodeFrom, nodeTo) and
simpleLocalFlowStep(nodeFrom, nodeTo, _) and
not nodeTo = any(ModuleVariableNode v).getARead()
}

View File

@@ -24,10 +24,11 @@ private module Cached {
* global taint flow configurations.
*/
cached
predicate defaultAdditionalTaintStep(DataFlow::Node nodeFrom, DataFlow::Node nodeTo) {
localAdditionalTaintStep(nodeFrom, nodeTo)
predicate defaultAdditionalTaintStep(DataFlow::Node nodeFrom, DataFlow::Node nodeTo, string model) {
localAdditionalTaintStep(nodeFrom, nodeTo, model)
or
any(AdditionalTaintStep a).step(nodeFrom, nodeTo)
any(AdditionalTaintStep a).step(nodeFrom, nodeTo) and
model = "AdditionalTaintStep"
}
/**
@@ -36,30 +37,34 @@ private module Cached {
* different objects.
*/
cached
predicate localAdditionalTaintStep(DataFlow::Node nodeFrom, DataFlow::Node nodeTo) {
concatStep(nodeFrom, nodeTo)
or
subscriptStep(nodeFrom, nodeTo)
or
stringManipulation(nodeFrom, nodeTo)
or
containerStep(nodeFrom, nodeTo)
or
copyStep(nodeFrom, nodeTo)
or
DataFlowPrivate::forReadStep(nodeFrom, _, nodeTo)
or
DataFlowPrivate::iterableUnpackingReadStep(nodeFrom, _, nodeTo)
or
DataFlowPrivate::iterableUnpackingStoreStep(nodeFrom, _, nodeTo)
or
awaitStep(nodeFrom, nodeTo)
or
asyncWithStep(nodeFrom, nodeTo)
predicate localAdditionalTaintStep(DataFlow::Node nodeFrom, DataFlow::Node nodeTo, string model) {
(
concatStep(nodeFrom, nodeTo)
or
subscriptStep(nodeFrom, nodeTo)
or
stringManipulation(nodeFrom, nodeTo)
or
containerStep(nodeFrom, nodeTo)
or
copyStep(nodeFrom, nodeTo)
or
DataFlowPrivate::forReadStep(nodeFrom, _, nodeTo)
or
DataFlowPrivate::iterableUnpackingReadStep(nodeFrom, _, nodeTo)
or
DataFlowPrivate::iterableUnpackingStoreStep(nodeFrom, _, nodeTo)
or
awaitStep(nodeFrom, nodeTo)
or
asyncWithStep(nodeFrom, nodeTo)
) and
model = ""
or
FlowSummaryImpl::Private::Steps::summaryLocalStep(nodeFrom
.(DataFlowPrivate::FlowSummaryNode)
.getSummaryNode(), nodeTo.(DataFlowPrivate::FlowSummaryNode).getSummaryNode(), false)
.getSummaryNode(), nodeTo.(DataFlowPrivate::FlowSummaryNode).getSummaryNode(), false,
model)
}
}

View File

@@ -34,7 +34,7 @@ predicate localTaintStep(DataFlow::Node nodeFrom, DataFlow::Node nodeTo) {
// Ordinary data flow
DataFlow::localFlowStep(nodeFrom, nodeTo)
or
localAdditionalTaintStep(nodeFrom, nodeTo)
localAdditionalTaintStep(nodeFrom, nodeTo, _)
}
/**

View File

@@ -24,7 +24,15 @@ private module SummaryTypeTrackerInput implements SummaryTypeTracker::Input {
ContentFilter getFilterFromWithContentStep(Content content) { none() }
// Callables
class SummarizedCallable = FlowSummaryImpl::Private::SummarizedCallableImpl;
class SummarizedCallable instanceof FlowSummaryImpl::Private::SummarizedCallableImpl {
string toString() { result = super.toString() }
predicate propagatesFlow(
SummaryComponentStack input, SummaryComponentStack output, boolean preservesValue
) {
super.propagatesFlow(input, output, preservesValue, _)
}
}
// Summaries and their stacks
class SummaryComponent = FlowSummaryImpl::Private::SummaryComponent;

View File

@@ -33,7 +33,7 @@ private class SummarizedCallableFromModel extends SummarizedCallable {
string path;
SummarizedCallableFromModel() {
ModelOutput::relevantSummaryModel(type, path, _, _, _) and
ModelOutput::relevantSummaryModel(type, path, _, _, _, _) and
this = type + ";" + path
}
@@ -46,8 +46,10 @@ private class SummarizedCallableFromModel extends SummarizedCallable {
)
}
override predicate propagatesFlow(string input, string output, boolean preservesValue) {
exists(string kind | ModelOutput::relevantSummaryModel(type, path, input, output, kind) |
override predicate propagatesFlow(
string input, string output, boolean preservesValue, string model
) {
exists(string kind | ModelOutput::relevantSummaryModel(type, path, input, output, kind, model) |
kind = "value" and
preservesValue = true
or

View File

@@ -229,41 +229,55 @@ private predicate typeModel(string row) { any(TypeModelCsv s).row(inversePad(row
private predicate typeVariableModel(string row) { any(TypeVariableModelCsv s).row(inversePad(row)) }
/** Holds if a source model exists for the given parameters. */
predicate sourceModel(string type, string path, string kind) {
predicate sourceModel(string type, string path, string kind, string model) {
exists(string row |
sourceModel(row) and
row.splitAt(";", 0) = type and
row.splitAt(";", 1) = path and
row.splitAt(";", 2) = kind
row.splitAt(";", 2) = kind and
model = "SourceModelCsv"
)
or
Extensions::sourceModel(type, path, kind)
exists(QlBuiltins::ExtensionId madId |
Extensions::sourceModel(type, path, kind, madId) and
model = "MaD:" + madId.toString()
)
}
/** Holds if a sink model exists for the given parameters. */
private predicate sinkModel(string type, string path, string kind) {
private predicate sinkModel(string type, string path, string kind, string model) {
exists(string row |
sinkModel(row) and
row.splitAt(";", 0) = type and
row.splitAt(";", 1) = path and
row.splitAt(";", 2) = kind
row.splitAt(";", 2) = kind and
model = "SinkModelCsv"
)
or
Extensions::sinkModel(type, path, kind)
exists(QlBuiltins::ExtensionId madId |
Extensions::sinkModel(type, path, kind, madId) and
model = "MaD:" + madId.toString()
)
}
/** Holds if a summary model `row` exists for the given parameters. */
private predicate summaryModel(string type, string path, string input, string output, string kind) {
private predicate summaryModel(
string type, string path, string input, string output, string kind, string model
) {
exists(string row |
summaryModel(row) and
row.splitAt(";", 0) = type and
row.splitAt(";", 1) = path and
row.splitAt(";", 2) = input and
row.splitAt(";", 3) = output and
row.splitAt(";", 4) = kind
row.splitAt(";", 4) = kind and
model = "SummaryModelCsv"
)
or
Extensions::summaryModel(type, path, input, output, kind)
exists(QlBuiltins::ExtensionId madId |
Extensions::summaryModel(type, path, input, output, kind, madId) and
model = "MaD:" + madId.toString()
)
}
/** Holds if a type model exists for the given parameters. */
@@ -294,9 +308,9 @@ private predicate typeVariableModel(string name, string path) {
*/
predicate isRelevantType(string type) {
(
sourceModel(type, _, _) or
sinkModel(type, _, _) or
summaryModel(type, _, _, _, _) or
sourceModel(type, _, _, _) or
sinkModel(type, _, _, _) or
summaryModel(type, _, _, _, _, _) or
typeModel(_, type, _)
) and
(
@@ -319,9 +333,9 @@ pragma[nomagic]
predicate isRelevantFullPath(string type, string path) {
isRelevantType(type) and
(
sourceModel(type, path, _) or
sinkModel(type, path, _) or
summaryModel(type, path, _, _, _) or
sourceModel(type, path, _, _) or
sinkModel(type, path, _, _) or
summaryModel(type, path, _, _, _, _) or
typeModel(_, type, path)
)
}
@@ -331,8 +345,8 @@ private predicate accessPathRange(string s) {
isRelevantFullPath(_, s)
or
exists(string type | isRelevantType(type) |
summaryModel(type, _, s, _, _) or
summaryModel(type, _, _, s, _)
summaryModel(type, _, s, _, _, _) or
summaryModel(type, _, _, s, _, _)
)
or
typeVariableModel(_, s)
@@ -543,7 +557,7 @@ private API::Node getNodeFromPath(string type, AccessPath path) {
pragma[nomagic]
private predicate typeStepModel(string type, AccessPath basePath, AccessPath output) {
summaryModel(type, basePath, "", output, "type")
summaryModel(type, basePath, "", output, "type", _)
}
pragma[nomagic]
@@ -621,9 +635,9 @@ module ModelOutput {
* Holds if a CSV source model contributed `source` with the given `kind`.
*/
cached
API::Node getASourceNode(string kind) {
API::Node getASourceNode(string kind, string model) {
exists(string type, string path |
sourceModel(type, path, kind) and
sourceModel(type, path, kind, model) and
result = getNodeFromPath(type, path)
)
}
@@ -632,9 +646,9 @@ module ModelOutput {
* Holds if a CSV sink model contributed `sink` with the given `kind`.
*/
cached
API::Node getASinkNode(string kind) {
API::Node getASinkNode(string kind, string model) {
exists(string type, string path |
sinkModel(type, path, kind) and
sinkModel(type, path, kind, model) and
result = getNodeFromPath(type, path)
)
}
@@ -644,10 +658,10 @@ module ModelOutput {
*/
cached
predicate relevantSummaryModel(
string type, string path, string input, string output, string kind
string type, string path, string input, string output, string kind, string model
) {
isRelevantType(type) and
summaryModel(type, path, input, output, kind)
summaryModel(type, path, input, output, kind, model)
}
/**
@@ -655,7 +669,7 @@ module ModelOutput {
*/
cached
predicate resolvedSummaryBase(string type, string path, Specific::InvokeNode baseNode) {
summaryModel(type, path, _, _, _) and
summaryModel(type, path, _, _, _, _) and
baseNode = getInvocationFromPath(type, path)
}
@@ -664,7 +678,7 @@ module ModelOutput {
*/
cached
predicate resolvedSummaryRefBase(string type, string path, API::Node baseNode) {
summaryModel(type, path, _, _, _) and
summaryModel(type, path, _, _, _, _) and
baseNode = getNodeFromPath(type, path)
}
@@ -680,12 +694,22 @@ module ModelOutput {
import Specific::ModelOutputSpecific
private import codeql.mad.ModelValidation as SharedModelVal
/**
* Holds if a CSV source model contributed `source` with the given `kind`.
*/
API::Node getASourceNode(string kind) { result = getASourceNode(kind, _) }
/**
* Holds if a CSV sink model contributed `sink` with the given `kind`.
*/
API::Node getASinkNode(string kind) { result = getASinkNode(kind, _) }
private module KindValConfig implements SharedModelVal::KindValidationConfigSig {
predicate summaryKind(string kind) { summaryModel(_, _, _, _, kind) }
predicate summaryKind(string kind) { summaryModel(_, _, _, _, kind, _) }
predicate sinkKind(string kind) { sinkModel(_, _, kind) }
predicate sinkKind(string kind) { sinkModel(_, _, kind, _) }
predicate sourceKind(string kind) { sourceModel(_, _, kind) }
predicate sourceKind(string kind) { sourceModel(_, _, kind, _) }
}
private module KindVal = SharedModelVal::KindValidation<KindValConfig>;

View File

@@ -8,13 +8,15 @@
*
* The kind `remote` represents a general remote flow source.
*/
extensible predicate sourceModel(string type, string path, string kind);
extensible predicate sourceModel(
string type, string path, string kind, QlBuiltins::ExtensionId madId
);
/**
* Holds if the value at `(type, path)` should be seen as a sink
* of the given `kind`.
*/
extensible predicate sinkModel(string type, string path, string kind);
extensible predicate sinkModel(string type, string path, string kind, QlBuiltins::ExtensionId madId);
/**
* Holds if in calls to `(type, path)`, the value referred to by `input`
@@ -23,7 +25,9 @@ extensible predicate sinkModel(string type, string path, string kind);
* `kind` should be either `value` or `taint`, for value-preserving or taint-preserving steps,
* respectively.
*/
extensible predicate summaryModel(string type, string path, string input, string output, string kind);
extensible predicate summaryModel(
string type, string path, string input, string output, string kind, QlBuiltins::ExtensionId madId
);
/**
* Holds if calls to `(type, path)` should be considered neutral. The meaning of this depends on the `kind`.

View File

@@ -138,7 +138,7 @@ predicate invocationMatchesExtraCallSiteFilter(API::CallNode invoke, AccessPathT
pragma[nomagic]
private predicate relevantInputOutputPath(API::CallNode base, AccessPath inputOrOutput) {
exists(string type, string input, string output, string path |
ModelOutput::relevantSummaryModel(type, path, input, output, _) and
ModelOutput::relevantSummaryModel(type, path, input, output, _, _) and
ModelOutput::resolvedSummaryBase(type, path, base) and
inputOrOutput = [input, output]
)
@@ -170,7 +170,7 @@ private API::Node getNodeFromInputOutputPath(API::CallNode baseNode, AccessPath
*/
predicate summaryStep(API::Node pred, API::Node succ, string kind) {
exists(string type, string path, API::CallNode base, AccessPath input, AccessPath output |
ModelOutput::relevantSummaryModel(type, path, input, output, kind) and
ModelOutput::relevantSummaryModel(type, path, input, output, kind, _) and // TODO???
ModelOutput::resolvedSummaryBase(type, path, base) and
pred = getNodeFromInputOutputPath(base, input) and
succ = getNodeFromInputOutputPath(base, output)

View File

@@ -157,12 +157,12 @@ class ExternalApiDataNode extends DataFlow::Node {
ExternalApiDataNode() {
exists(InterestingExternalApiCall call | this = call.getArgument(_)) and
// Not already modeled as a taint step
not TaintTrackingPrivate::defaultAdditionalTaintStep(this, _) and
not TaintTrackingPrivate::defaultAdditionalTaintStep(this, _, _) and
// for `list.append(x)`, we have a additional taint step from x -> [post] list.
// Since we have modeled this explicitly, I don't see any cases where we would want to report this.
not exists(DataFlow::PostUpdateNode post |
post.getPreUpdateNode() = this and
TaintTrackingPrivate::defaultAdditionalTaintStep(_, post)
TaintTrackingPrivate::defaultAdditionalTaintStep(_, post, _)
)
}
}