Python: Add alert provenance plumbing.

This commit is contained in:
Anders Schack-Mulligen
2024-02-07 12:52:34 +01:00
parent f202661912
commit a8fc100108
13 changed files with 142 additions and 75 deletions

View File

@@ -31,8 +31,21 @@ abstract class SummarizedCallable extends LibraryCallable, Impl::Public::Summari
* DEPRECATED: Use `propagatesFlow` instead. * DEPRECATED: Use `propagatesFlow` instead.
*/ */
deprecated predicate propagatesFlowExt(string input, string output, boolean preservesValue) { deprecated predicate propagatesFlowExt(string input, string output, boolean preservesValue) {
this.propagatesFlow(input, output, preservesValue) this.propagatesFlow(input, output, preservesValue, _)
} }
override predicate propagatesFlow(
string input, string output, boolean preservesValue, string model
) {
this.propagatesFlow(input, output, preservesValue) and model = this
}
/**
* Holds if data may flow from `input` to `output` through this callable.
*
* `preservesValue` indicates whether this is a value-preserving step or a taint-step.
*/
predicate propagatesFlow(string input, string output, boolean preservesValue) { none() }
} }
deprecated class RequiredSummaryComponentStack = Impl::Private::RequiredSummaryComponentStack; deprecated class RequiredSummaryComponentStack = Impl::Private::RequiredSummaryComponentStack;

View File

@@ -3,6 +3,7 @@ private import DataFlowPublic
private import semmle.python.essa.SsaCompute private import semmle.python.essa.SsaCompute
private import semmle.python.dataflow.new.internal.ImportResolution private import semmle.python.dataflow.new.internal.ImportResolution
private import FlowSummaryImpl as FlowSummaryImpl private import FlowSummaryImpl as FlowSummaryImpl
private import semmle.python.frameworks.data.ModelsAsData
// Since we allow extra data-flow steps from modeled frameworks, we import these // Since we allow extra data-flow steps from modeled frameworks, we import these
// up-front, to ensure these are included. This provides a more seamless experience from // up-front, to ensure these are included. This provides a more seamless experience from
// a user point of view, since they don't need to know they need to import a specific // a user point of view, since they don't need to know they need to import a specific
@@ -471,12 +472,12 @@ import StepRelationTransformations
* *
* It includes flow steps from flow summaries. * It includes flow steps from flow summaries.
*/ */
predicate simpleLocalFlowStep(Node nodeFrom, Node nodeTo) { predicate simpleLocalFlowStep(Node nodeFrom, Node nodeTo, string model) {
simpleLocalFlowStepForTypetracking(nodeFrom, nodeTo) simpleLocalFlowStepForTypetracking(nodeFrom, nodeTo) and model = ""
or or
summaryLocalStep(nodeFrom, nodeTo) summaryLocalStep(nodeFrom, nodeTo, model)
or or
variableCaptureLocalFlowStep(nodeFrom, nodeTo) variableCaptureLocalFlowStep(nodeFrom, nodeTo) and model = ""
} }
/** /**
@@ -490,9 +491,9 @@ predicate simpleLocalFlowStepForTypetracking(Node nodeFrom, Node nodeTo) {
LocalFlow::localFlowStep(nodeFrom, nodeTo) LocalFlow::localFlowStep(nodeFrom, nodeTo)
} }
private predicate summaryLocalStep(Node nodeFrom, Node nodeTo) { private predicate summaryLocalStep(Node nodeFrom, Node nodeTo, string model) {
FlowSummaryImpl::Private::Steps::summaryLocalStep(nodeFrom.(FlowSummaryNode).getSummaryNode(), FlowSummaryImpl::Private::Steps::summaryLocalStep(nodeFrom.(FlowSummaryNode).getSummaryNode(),
nodeTo.(FlowSummaryNode).getSummaryNode(), true) nodeTo.(FlowSummaryNode).getSummaryNode(), true, model)
} }
predicate variableCaptureLocalFlowStep(Node nodeFrom, Node nodeTo) { predicate variableCaptureLocalFlowStep(Node nodeFrom, Node nodeTo) {
@@ -1078,6 +1079,14 @@ predicate lambdaCall(DataFlowCall call, LambdaCallKind kind, Node receiver) {
/** Extra data-flow steps needed for lambda flow analysis. */ /** Extra data-flow steps needed for lambda flow analysis. */
predicate additionalLambdaFlowStep(Node nodeFrom, Node nodeTo, boolean preservesValue) { none() } predicate additionalLambdaFlowStep(Node nodeFrom, Node nodeTo, boolean preservesValue) { none() }
predicate knownSourceModel(Node source, string model) {
source = ModelOutput::getASourceNode(_, model).asSource()
}
predicate knownSinkModel(Node sink, string model) {
sink = ModelOutput::getASinkNode(_, model).asSink()
}
/** /**
* Holds if flow is allowed to pass from parameter `p` and back to itself as a * Holds if flow is allowed to pass from parameter `p` and back to itself as a
* side-effect, resulting in a summary from `p` to itself. * side-effect, resulting in a summary from `p` to itself.

View File

@@ -642,7 +642,9 @@ newtype TContent =
or or
// //
// 2) summaries in data-extension files // 2) summaries in data-extension files
exists(string input, string output | ModelOutput::relevantSummaryModel(_, _, input, output, _) | exists(string input, string output |
ModelOutput::relevantSummaryModel(_, _, input, output, _, _)
|
attr = [input, output].regexpFind("(?<=(^|\\.)Attribute\\[)[^\\]]+(?=\\])", _, _).trim() attr = [input, output].regexpFind("(?<=(^|\\.)Attribute\\[)[^\\]]+(?=\\])", _, _).trim()
) )
} or } or

View File

@@ -12,7 +12,7 @@ private import FlowSummaryImpl as FlowSummaryImpl
* (intra-procedural) step. * (intra-procedural) step.
*/ */
predicate localFlowStep(Node nodeFrom, Node nodeTo) { predicate localFlowStep(Node nodeFrom, Node nodeTo) {
simpleLocalFlowStep(nodeFrom, nodeTo) simpleLocalFlowStep(nodeFrom, nodeTo, _)
or or
// Simple flow through library code is included in the exposed local // Simple flow through library code is included in the exposed local
// step relation, even though flow is technically inter-procedural. // step relation, even though flow is technically inter-procedural.

View File

@@ -242,7 +242,7 @@ private module Cached {
*/ */
pragma[nomagic] pragma[nomagic]
private predicate localSourceFlowStep(Node nodeFrom, Node nodeTo) { private predicate localSourceFlowStep(Node nodeFrom, Node nodeTo) {
simpleLocalFlowStep(nodeFrom, nodeTo) and simpleLocalFlowStep(nodeFrom, nodeTo, _) and
not nodeTo = any(ModuleVariableNode v).getARead() not nodeTo = any(ModuleVariableNode v).getARead()
} }

View File

@@ -24,10 +24,11 @@ private module Cached {
* global taint flow configurations. * global taint flow configurations.
*/ */
cached cached
predicate defaultAdditionalTaintStep(DataFlow::Node nodeFrom, DataFlow::Node nodeTo) { predicate defaultAdditionalTaintStep(DataFlow::Node nodeFrom, DataFlow::Node nodeTo, string model) {
localAdditionalTaintStep(nodeFrom, nodeTo) localAdditionalTaintStep(nodeFrom, nodeTo, model)
or or
any(AdditionalTaintStep a).step(nodeFrom, nodeTo) any(AdditionalTaintStep a).step(nodeFrom, nodeTo) and
model = "AdditionalTaintStep"
} }
/** /**
@@ -36,30 +37,34 @@ private module Cached {
* different objects. * different objects.
*/ */
cached cached
predicate localAdditionalTaintStep(DataFlow::Node nodeFrom, DataFlow::Node nodeTo) { predicate localAdditionalTaintStep(DataFlow::Node nodeFrom, DataFlow::Node nodeTo, string model) {
concatStep(nodeFrom, nodeTo) (
or concatStep(nodeFrom, nodeTo)
subscriptStep(nodeFrom, nodeTo) or
or subscriptStep(nodeFrom, nodeTo)
stringManipulation(nodeFrom, nodeTo) or
or stringManipulation(nodeFrom, nodeTo)
containerStep(nodeFrom, nodeTo) or
or containerStep(nodeFrom, nodeTo)
copyStep(nodeFrom, nodeTo) or
or copyStep(nodeFrom, nodeTo)
DataFlowPrivate::forReadStep(nodeFrom, _, nodeTo) or
or DataFlowPrivate::forReadStep(nodeFrom, _, nodeTo)
DataFlowPrivate::iterableUnpackingReadStep(nodeFrom, _, nodeTo) or
or DataFlowPrivate::iterableUnpackingReadStep(nodeFrom, _, nodeTo)
DataFlowPrivate::iterableUnpackingStoreStep(nodeFrom, _, nodeTo) or
or DataFlowPrivate::iterableUnpackingStoreStep(nodeFrom, _, nodeTo)
awaitStep(nodeFrom, nodeTo) or
or awaitStep(nodeFrom, nodeTo)
asyncWithStep(nodeFrom, nodeTo) or
asyncWithStep(nodeFrom, nodeTo)
) and
model = ""
or or
FlowSummaryImpl::Private::Steps::summaryLocalStep(nodeFrom FlowSummaryImpl::Private::Steps::summaryLocalStep(nodeFrom
.(DataFlowPrivate::FlowSummaryNode) .(DataFlowPrivate::FlowSummaryNode)
.getSummaryNode(), nodeTo.(DataFlowPrivate::FlowSummaryNode).getSummaryNode(), false) .getSummaryNode(), nodeTo.(DataFlowPrivate::FlowSummaryNode).getSummaryNode(), false,
model)
} }
} }

View File

@@ -34,7 +34,7 @@ predicate localTaintStep(DataFlow::Node nodeFrom, DataFlow::Node nodeTo) {
// Ordinary data flow // Ordinary data flow
DataFlow::localFlowStep(nodeFrom, nodeTo) DataFlow::localFlowStep(nodeFrom, nodeTo)
or or
localAdditionalTaintStep(nodeFrom, nodeTo) localAdditionalTaintStep(nodeFrom, nodeTo, _)
} }
/** /**

View File

@@ -24,7 +24,15 @@ private module SummaryTypeTrackerInput implements SummaryTypeTracker::Input {
ContentFilter getFilterFromWithContentStep(Content content) { none() } ContentFilter getFilterFromWithContentStep(Content content) { none() }
// Callables // Callables
class SummarizedCallable = FlowSummaryImpl::Private::SummarizedCallableImpl; class SummarizedCallable instanceof FlowSummaryImpl::Private::SummarizedCallableImpl {
string toString() { result = super.toString() }
predicate propagatesFlow(
SummaryComponentStack input, SummaryComponentStack output, boolean preservesValue
) {
super.propagatesFlow(input, output, preservesValue, _)
}
}
// Summaries and their stacks // Summaries and their stacks
class SummaryComponent = FlowSummaryImpl::Private::SummaryComponent; class SummaryComponent = FlowSummaryImpl::Private::SummaryComponent;

View File

@@ -33,7 +33,7 @@ private class SummarizedCallableFromModel extends SummarizedCallable {
string path; string path;
SummarizedCallableFromModel() { SummarizedCallableFromModel() {
ModelOutput::relevantSummaryModel(type, path, _, _, _) and ModelOutput::relevantSummaryModel(type, path, _, _, _, _) and
this = type + ";" + path this = type + ";" + path
} }
@@ -46,8 +46,10 @@ private class SummarizedCallableFromModel extends SummarizedCallable {
) )
} }
override predicate propagatesFlow(string input, string output, boolean preservesValue) { override predicate propagatesFlow(
exists(string kind | ModelOutput::relevantSummaryModel(type, path, input, output, kind) | string input, string output, boolean preservesValue, string model
) {
exists(string kind | ModelOutput::relevantSummaryModel(type, path, input, output, kind, model) |
kind = "value" and kind = "value" and
preservesValue = true preservesValue = true
or or

View File

@@ -229,41 +229,55 @@ private predicate typeModel(string row) { any(TypeModelCsv s).row(inversePad(row
private predicate typeVariableModel(string row) { any(TypeVariableModelCsv s).row(inversePad(row)) } private predicate typeVariableModel(string row) { any(TypeVariableModelCsv s).row(inversePad(row)) }
/** Holds if a source model exists for the given parameters. */ /** Holds if a source model exists for the given parameters. */
predicate sourceModel(string type, string path, string kind) { predicate sourceModel(string type, string path, string kind, string model) {
exists(string row | exists(string row |
sourceModel(row) and sourceModel(row) and
row.splitAt(";", 0) = type and row.splitAt(";", 0) = type and
row.splitAt(";", 1) = path and row.splitAt(";", 1) = path and
row.splitAt(";", 2) = kind row.splitAt(";", 2) = kind and
model = "SourceModelCsv"
) )
or or
Extensions::sourceModel(type, path, kind) exists(QlBuiltins::ExtensionId madId |
Extensions::sourceModel(type, path, kind, madId) and
model = "MaD:" + madId.toString()
)
} }
/** Holds if a sink model exists for the given parameters. */ /** Holds if a sink model exists for the given parameters. */
private predicate sinkModel(string type, string path, string kind) { private predicate sinkModel(string type, string path, string kind, string model) {
exists(string row | exists(string row |
sinkModel(row) and sinkModel(row) and
row.splitAt(";", 0) = type and row.splitAt(";", 0) = type and
row.splitAt(";", 1) = path and row.splitAt(";", 1) = path and
row.splitAt(";", 2) = kind row.splitAt(";", 2) = kind and
model = "SinkModelCsv"
) )
or or
Extensions::sinkModel(type, path, kind) exists(QlBuiltins::ExtensionId madId |
Extensions::sinkModel(type, path, kind, madId) and
model = "MaD:" + madId.toString()
)
} }
/** Holds if a summary model `row` exists for the given parameters. */ /** Holds if a summary model `row` exists for the given parameters. */
private predicate summaryModel(string type, string path, string input, string output, string kind) { private predicate summaryModel(
string type, string path, string input, string output, string kind, string model
) {
exists(string row | exists(string row |
summaryModel(row) and summaryModel(row) and
row.splitAt(";", 0) = type and row.splitAt(";", 0) = type and
row.splitAt(";", 1) = path and row.splitAt(";", 1) = path and
row.splitAt(";", 2) = input and row.splitAt(";", 2) = input and
row.splitAt(";", 3) = output and row.splitAt(";", 3) = output and
row.splitAt(";", 4) = kind row.splitAt(";", 4) = kind and
model = "SummaryModelCsv"
) )
or or
Extensions::summaryModel(type, path, input, output, kind) exists(QlBuiltins::ExtensionId madId |
Extensions::summaryModel(type, path, input, output, kind, madId) and
model = "MaD:" + madId.toString()
)
} }
/** Holds if a type model exists for the given parameters. */ /** Holds if a type model exists for the given parameters. */
@@ -294,9 +308,9 @@ private predicate typeVariableModel(string name, string path) {
*/ */
predicate isRelevantType(string type) { predicate isRelevantType(string type) {
( (
sourceModel(type, _, _) or sourceModel(type, _, _, _) or
sinkModel(type, _, _) or sinkModel(type, _, _, _) or
summaryModel(type, _, _, _, _) or summaryModel(type, _, _, _, _, _) or
typeModel(_, type, _) typeModel(_, type, _)
) and ) and
( (
@@ -319,9 +333,9 @@ pragma[nomagic]
predicate isRelevantFullPath(string type, string path) { predicate isRelevantFullPath(string type, string path) {
isRelevantType(type) and isRelevantType(type) and
( (
sourceModel(type, path, _) or sourceModel(type, path, _, _) or
sinkModel(type, path, _) or sinkModel(type, path, _, _) or
summaryModel(type, path, _, _, _) or summaryModel(type, path, _, _, _, _) or
typeModel(_, type, path) typeModel(_, type, path)
) )
} }
@@ -331,8 +345,8 @@ private predicate accessPathRange(string s) {
isRelevantFullPath(_, s) isRelevantFullPath(_, s)
or or
exists(string type | isRelevantType(type) | exists(string type | isRelevantType(type) |
summaryModel(type, _, s, _, _) or summaryModel(type, _, s, _, _, _) or
summaryModel(type, _, _, s, _) summaryModel(type, _, _, s, _, _)
) )
or or
typeVariableModel(_, s) typeVariableModel(_, s)
@@ -543,7 +557,7 @@ private API::Node getNodeFromPath(string type, AccessPath path) {
pragma[nomagic] pragma[nomagic]
private predicate typeStepModel(string type, AccessPath basePath, AccessPath output) { private predicate typeStepModel(string type, AccessPath basePath, AccessPath output) {
summaryModel(type, basePath, "", output, "type") summaryModel(type, basePath, "", output, "type", _)
} }
pragma[nomagic] pragma[nomagic]
@@ -621,9 +635,9 @@ module ModelOutput {
* Holds if a CSV source model contributed `source` with the given `kind`. * Holds if a CSV source model contributed `source` with the given `kind`.
*/ */
cached cached
API::Node getASourceNode(string kind) { API::Node getASourceNode(string kind, string model) {
exists(string type, string path | exists(string type, string path |
sourceModel(type, path, kind) and sourceModel(type, path, kind, model) and
result = getNodeFromPath(type, path) result = getNodeFromPath(type, path)
) )
} }
@@ -632,9 +646,9 @@ module ModelOutput {
* Holds if a CSV sink model contributed `sink` with the given `kind`. * Holds if a CSV sink model contributed `sink` with the given `kind`.
*/ */
cached cached
API::Node getASinkNode(string kind) { API::Node getASinkNode(string kind, string model) {
exists(string type, string path | exists(string type, string path |
sinkModel(type, path, kind) and sinkModel(type, path, kind, model) and
result = getNodeFromPath(type, path) result = getNodeFromPath(type, path)
) )
} }
@@ -644,10 +658,10 @@ module ModelOutput {
*/ */
cached cached
predicate relevantSummaryModel( predicate relevantSummaryModel(
string type, string path, string input, string output, string kind string type, string path, string input, string output, string kind, string model
) { ) {
isRelevantType(type) and isRelevantType(type) and
summaryModel(type, path, input, output, kind) summaryModel(type, path, input, output, kind, model)
} }
/** /**
@@ -655,7 +669,7 @@ module ModelOutput {
*/ */
cached cached
predicate resolvedSummaryBase(string type, string path, Specific::InvokeNode baseNode) { predicate resolvedSummaryBase(string type, string path, Specific::InvokeNode baseNode) {
summaryModel(type, path, _, _, _) and summaryModel(type, path, _, _, _, _) and
baseNode = getInvocationFromPath(type, path) baseNode = getInvocationFromPath(type, path)
} }
@@ -664,7 +678,7 @@ module ModelOutput {
*/ */
cached cached
predicate resolvedSummaryRefBase(string type, string path, API::Node baseNode) { predicate resolvedSummaryRefBase(string type, string path, API::Node baseNode) {
summaryModel(type, path, _, _, _) and summaryModel(type, path, _, _, _, _) and
baseNode = getNodeFromPath(type, path) baseNode = getNodeFromPath(type, path)
} }
@@ -680,12 +694,22 @@ module ModelOutput {
import Specific::ModelOutputSpecific import Specific::ModelOutputSpecific
private import codeql.mad.ModelValidation as SharedModelVal private import codeql.mad.ModelValidation as SharedModelVal
/**
* Holds if a CSV source model contributed `source` with the given `kind`.
*/
API::Node getASourceNode(string kind) { result = getASourceNode(kind, _) }
/**
* Holds if a CSV sink model contributed `sink` with the given `kind`.
*/
API::Node getASinkNode(string kind) { result = getASinkNode(kind, _) }
private module KindValConfig implements SharedModelVal::KindValidationConfigSig { private module KindValConfig implements SharedModelVal::KindValidationConfigSig {
predicate summaryKind(string kind) { summaryModel(_, _, _, _, kind) } predicate summaryKind(string kind) { summaryModel(_, _, _, _, kind, _) }
predicate sinkKind(string kind) { sinkModel(_, _, kind) } predicate sinkKind(string kind) { sinkModel(_, _, kind, _) }
predicate sourceKind(string kind) { sourceModel(_, _, kind) } predicate sourceKind(string kind) { sourceModel(_, _, kind, _) }
} }
private module KindVal = SharedModelVal::KindValidation<KindValConfig>; private module KindVal = SharedModelVal::KindValidation<KindValConfig>;

View File

@@ -8,13 +8,15 @@
* *
* The kind `remote` represents a general remote flow source. * The kind `remote` represents a general remote flow source.
*/ */
extensible predicate sourceModel(string type, string path, string kind); extensible predicate sourceModel(
string type, string path, string kind, QlBuiltins::ExtensionId madId
);
/** /**
* Holds if the value at `(type, path)` should be seen as a sink * Holds if the value at `(type, path)` should be seen as a sink
* of the given `kind`. * of the given `kind`.
*/ */
extensible predicate sinkModel(string type, string path, string kind); extensible predicate sinkModel(string type, string path, string kind, QlBuiltins::ExtensionId madId);
/** /**
* Holds if in calls to `(type, path)`, the value referred to by `input` * Holds if in calls to `(type, path)`, the value referred to by `input`
@@ -23,7 +25,9 @@ extensible predicate sinkModel(string type, string path, string kind);
* `kind` should be either `value` or `taint`, for value-preserving or taint-preserving steps, * `kind` should be either `value` or `taint`, for value-preserving or taint-preserving steps,
* respectively. * respectively.
*/ */
extensible predicate summaryModel(string type, string path, string input, string output, string kind); extensible predicate summaryModel(
string type, string path, string input, string output, string kind, QlBuiltins::ExtensionId madId
);
/** /**
* Holds if calls to `(type, path)` should be considered neutral. The meaning of this depends on the `kind`. * Holds if calls to `(type, path)` should be considered neutral. The meaning of this depends on the `kind`.

View File

@@ -138,7 +138,7 @@ predicate invocationMatchesExtraCallSiteFilter(API::CallNode invoke, AccessPathT
pragma[nomagic] pragma[nomagic]
private predicate relevantInputOutputPath(API::CallNode base, AccessPath inputOrOutput) { private predicate relevantInputOutputPath(API::CallNode base, AccessPath inputOrOutput) {
exists(string type, string input, string output, string path | exists(string type, string input, string output, string path |
ModelOutput::relevantSummaryModel(type, path, input, output, _) and ModelOutput::relevantSummaryModel(type, path, input, output, _, _) and
ModelOutput::resolvedSummaryBase(type, path, base) and ModelOutput::resolvedSummaryBase(type, path, base) and
inputOrOutput = [input, output] inputOrOutput = [input, output]
) )
@@ -170,7 +170,7 @@ private API::Node getNodeFromInputOutputPath(API::CallNode baseNode, AccessPath
*/ */
predicate summaryStep(API::Node pred, API::Node succ, string kind) { predicate summaryStep(API::Node pred, API::Node succ, string kind) {
exists(string type, string path, API::CallNode base, AccessPath input, AccessPath output | exists(string type, string path, API::CallNode base, AccessPath input, AccessPath output |
ModelOutput::relevantSummaryModel(type, path, input, output, kind) and ModelOutput::relevantSummaryModel(type, path, input, output, kind, _) and // TODO???
ModelOutput::resolvedSummaryBase(type, path, base) and ModelOutput::resolvedSummaryBase(type, path, base) and
pred = getNodeFromInputOutputPath(base, input) and pred = getNodeFromInputOutputPath(base, input) and
succ = getNodeFromInputOutputPath(base, output) succ = getNodeFromInputOutputPath(base, output)

View File

@@ -157,12 +157,12 @@ class ExternalApiDataNode extends DataFlow::Node {
ExternalApiDataNode() { ExternalApiDataNode() {
exists(InterestingExternalApiCall call | this = call.getArgument(_)) and exists(InterestingExternalApiCall call | this = call.getArgument(_)) and
// Not already modeled as a taint step // Not already modeled as a taint step
not TaintTrackingPrivate::defaultAdditionalTaintStep(this, _) and not TaintTrackingPrivate::defaultAdditionalTaintStep(this, _, _) and
// for `list.append(x)`, we have a additional taint step from x -> [post] list. // for `list.append(x)`, we have a additional taint step from x -> [post] list.
// Since we have modeled this explicitly, I don't see any cases where we would want to report this. // Since we have modeled this explicitly, I don't see any cases where we would want to report this.
not exists(DataFlow::PostUpdateNode post | not exists(DataFlow::PostUpdateNode post |
post.getPreUpdateNode() = this and post.getPreUpdateNode() = this and
TaintTrackingPrivate::defaultAdditionalTaintStep(_, post) TaintTrackingPrivate::defaultAdditionalTaintStep(_, post, _)
) )
} }
} }