Merge pull request #21051 from hvitved/shared/flow-summary-provenance-filtering

Shared: Provenance-based filtering of flow summaries
This commit is contained in:
Tom Hvitved
2026-01-26 17:24:34 +01:00
committed by GitHub
142 changed files with 15884 additions and 21193 deletions

View File

@@ -0,0 +1,4 @@
---
category: minorAnalysis
---
* The predicate `SummarizedCallable.propagatesFlow` has been extended with the columns `Provenance p` and `boolean isExact`, and as a consequence the predicates `SummarizedCallable.hasProvenance` and `SummarizedCallable.hasExactModel` have been removed.

View File

@@ -22,30 +22,39 @@ deprecated class SummaryComponentStack = Impl::Private::SummaryComponentStack;
deprecated module SummaryComponentStack = Impl::Private::SummaryComponentStack;
/** A callable with a flow summary, identified by a unique string. */
abstract class SummarizedCallable extends LibraryCallable, Impl::Public::SummarizedCallable {
bindingset[this]
SummarizedCallable() { any() }
class Provenance = Impl::Public::Provenance;
/**
* DEPRECATED: Use `propagatesFlow` instead.
*/
deprecated predicate propagatesFlowExt(string input, string output, boolean preservesValue) {
this.propagatesFlow(input, output, preservesValue, _)
/** Provides the `Range` class used to define the extent of `SummarizedCallable`. */
module SummarizedCallable {
/** A callable with a flow summary, identified by a unique string. */
abstract class Range extends LibraryCallable, Impl::Public::SummarizedCallable {
bindingset[this]
Range() { any() }
override predicate propagatesFlow(
string input, string output, boolean preservesValue, Provenance p, boolean isExact,
string model
) {
this.propagatesFlow(input, output, preservesValue) and
p = "manual" and
isExact = true and
model = this
}
/**
* Holds if data may flow from `input` to `output` through this callable.
*
* `preservesValue` indicates whether this is a value-preserving step or a taint-step.
*/
predicate propagatesFlow(string input, string output, boolean preservesValue) { none() }
}
override predicate propagatesFlow(
string input, string output, boolean preservesValue, string model
) {
this.propagatesFlow(input, output, preservesValue) and model = this
}
/**
* Holds if data may flow from `input` to `output` through this callable.
*
* `preservesValue` indicates whether this is a value-preserving step or a taint-step.
*/
predicate propagatesFlow(string input, string output, boolean preservesValue) { none() }
}
final private class SummarizedCallableFinal = SummarizedCallable::Range;
/** A callable with a flow summary, identified by a unique string. */
final class SummarizedCallable extends SummarizedCallableFinal,
Impl::Public::RelevantSummarizedCallable
{ }
deprecated class RequiredSummaryComponentStack = Impl::Private::RequiredSummaryComponentStack;

View File

@@ -18,6 +18,8 @@ module Input implements InputSig<Location, DataFlowImplSpecific::PythonDataFlow>
class SinkBase = Void;
predicate callableFromSource(SummarizedCallableBase c) { none() }
ArgumentPosition callbackSelfParameterPosition() { result.isLambdaSelf() }
ReturnKind getStandardReturnValueKind() { any() }

View File

@@ -30,7 +30,7 @@ private module SummaryTypeTrackerInput implements SummaryTypeTracker::Input {
predicate propagatesFlow(
SummaryComponentStack input, SummaryComponentStack output, boolean preservesValue
) {
super.propagatesFlow(input, output, preservesValue, _)
super.propagatesFlow(input, output, preservesValue, _, _, _)
}
}

View File

@@ -674,7 +674,7 @@ module Flask {
*
* see https://flask.palletsprojects.com/en/2.3.x/api/#flask.render_template_string
*/
private class RenderTemplateStringSummary extends SummarizedCallable {
private class RenderTemplateStringSummary extends SummarizedCallable::Range {
RenderTemplateStringSummary() { this = "flask.render_template_string" }
override DataFlow::CallCfgNode getACall() {
@@ -700,7 +700,7 @@ module Flask {
*
* see https://flask.palletsprojects.com/en/2.3.x/api/#flask.stream_template_string
*/
private class StreamTemplateStringSummary extends SummarizedCallable {
private class StreamTemplateStringSummary extends SummarizedCallable::Range {
StreamTemplateStringSummary() { this = "flask.stream_template_string" }
override DataFlow::CallCfgNode getACall() {

View File

@@ -3187,7 +3187,7 @@ module StdlibPrivate {
*
* See https://docs.python.org/3.11/library/re.html#re-objects
*/
class RePatternSummary extends SummarizedCallable {
class RePatternSummary extends SummarizedCallable::Range {
RePatternSummary() { this = "re.Pattern" }
override DataFlow::CallCfgNode getACall() {
@@ -3227,7 +3227,7 @@ module StdlibPrivate {
*
* See https://docs.python.org/3/library/re.html#re.Match
*/
class ReMatchSummary extends SummarizedCallable {
class ReMatchSummary extends SummarizedCallable::Range {
ReMatchSummary() { this = ["re.Match", "compiled re.Match"] }
override DataFlow::CallCfgNode getACall() {
@@ -3291,7 +3291,7 @@ module StdlibPrivate {
*
* See https://docs.python.org/3/library/re.html#re.Match
*/
class ReMatchMethodsSummary extends SummarizedCallable {
class ReMatchMethodsSummary extends SummarizedCallable::Range {
string methodName;
ReMatchMethodsSummary() {
@@ -3335,7 +3335,7 @@ module StdlibPrivate {
*
* See https://docs.python.org/3/library/re.html#functions
*/
class ReFunctionsSummary extends SummarizedCallable {
class ReFunctionsSummary extends SummarizedCallable::Range {
string methodName;
ReFunctionsSummary() {
@@ -4183,7 +4183,7 @@ module StdlibPrivate {
*
* see https://docs.python.org/3/library/stdtypes.html#dict
*/
class DictSummary extends SummarizedCallable {
class DictSummary extends SummarizedCallable::Range {
DictSummary() { this = "builtins.dict" }
override DataFlow::CallCfgNode getACall() { result = API::builtin("dict").getACall() }
@@ -4222,7 +4222,7 @@ module StdlibPrivate {
}
/** A flow summary for `list`. */
class ListSummary extends SummarizedCallable {
class ListSummary extends SummarizedCallable::Range {
ListSummary() { this = "builtins.list" }
override DataFlow::CallCfgNode getACall() { result = API::builtin("list").getACall() }
@@ -4252,7 +4252,7 @@ module StdlibPrivate {
}
/** A flow summary for tuple */
class TupleSummary extends SummarizedCallable {
class TupleSummary extends SummarizedCallable::Range {
TupleSummary() { this = "builtins.tuple" }
override DataFlow::CallCfgNode getACall() { result = API::builtin("tuple").getACall() }
@@ -4277,7 +4277,7 @@ module StdlibPrivate {
}
/** A flow summary for set */
class SetSummary extends SummarizedCallable {
class SetSummary extends SummarizedCallable::Range {
SetSummary() { this = "builtins.set" }
override DataFlow::CallCfgNode getACall() { result = API::builtin("set").getACall() }
@@ -4307,7 +4307,7 @@ module StdlibPrivate {
}
/** A flow summary for frozenset */
class FrozensetSummary extends SummarizedCallable {
class FrozensetSummary extends SummarizedCallable::Range {
FrozensetSummary() { this = "builtins.frozenset" }
override DataFlow::CallCfgNode getACall() { result = API::builtin("frozenset").getACall() }
@@ -4325,7 +4325,7 @@ module StdlibPrivate {
// Flow summaries for functions operating on containers
// ---------------------------------------------------------------------------
/** A flow summary for `reversed`. */
class ReversedSummary extends SummarizedCallable {
class ReversedSummary extends SummarizedCallable::Range {
ReversedSummary() { this = "builtins.reversed" }
override DataFlow::CallCfgNode getACall() { result = API::builtin("reversed").getACall() }
@@ -4355,7 +4355,7 @@ module StdlibPrivate {
}
/** A flow summary for `sorted`. */
class SortedSummary extends SummarizedCallable {
class SortedSummary extends SummarizedCallable::Range {
SortedSummary() { this = "builtins.sorted" }
override DataFlow::CallCfgNode getACall() { result = API::builtin("sorted").getACall() }
@@ -4387,7 +4387,7 @@ module StdlibPrivate {
}
/** A flow summary for `iter`. */
class IterSummary extends SummarizedCallable {
class IterSummary extends SummarizedCallable::Range {
IterSummary() { this = "builtins.iter" }
override DataFlow::CallCfgNode getACall() { result = API::builtin("iter").getACall() }
@@ -4417,7 +4417,7 @@ module StdlibPrivate {
}
/** A flow summary for `next`. */
class NextSummary extends SummarizedCallable {
class NextSummary extends SummarizedCallable::Range {
NextSummary() { this = "builtins.next" }
override DataFlow::CallCfgNode getACall() { result = API::builtin("next").getACall() }
@@ -4447,7 +4447,7 @@ module StdlibPrivate {
}
/** A flow summary for `map`. */
class MapSummary extends SummarizedCallable {
class MapSummary extends SummarizedCallable::Range {
MapSummary() { this = "builtins.map" }
override DataFlow::CallCfgNode getACall() { result = API::builtin("map").getACall() }
@@ -4482,7 +4482,7 @@ module StdlibPrivate {
}
/** A flow summary for `filter`. */
class FilterSummary extends SummarizedCallable {
class FilterSummary extends SummarizedCallable::Range {
FilterSummary() { this = "builtins.filter" }
override DataFlow::CallCfgNode getACall() { result = API::builtin("filter").getACall() }
@@ -4508,7 +4508,7 @@ module StdlibPrivate {
}
/**A summary for `enumerate`. */
class EnumerateSummary extends SummarizedCallable {
class EnumerateSummary extends SummarizedCallable::Range {
EnumerateSummary() { this = "builtins.enumerate" }
override DataFlow::CallCfgNode getACall() { result = API::builtin("enumerate").getACall() }
@@ -4534,7 +4534,7 @@ module StdlibPrivate {
}
/** A flow summary for `zip`. */
class ZipSummary extends SummarizedCallable {
class ZipSummary extends SummarizedCallable::Range {
ZipSummary() { this = "builtins.zip" }
override DataFlow::CallCfgNode getACall() { result = API::builtin("zip").getACall() }
@@ -4568,7 +4568,7 @@ module StdlibPrivate {
// Flow summaries for container methods
// ---------------------------------------------------------------------------
/** A flow summary for `copy`. */
class CopySummary extends SummarizedCallable {
class CopySummary extends SummarizedCallable::Range {
CopySummary() { this = "collection.copy" }
override DataFlow::CallCfgNode getACall() {
@@ -4591,7 +4591,7 @@ module StdlibPrivate {
}
/** A flow summary for `copy.replace`. */
class ReplaceSummary extends SummarizedCallable {
class ReplaceSummary extends SummarizedCallable::Range {
ReplaceSummary() { this = "copy.replace" }
override DataFlow::CallCfgNode getACall() {
@@ -4624,7 +4624,7 @@ module StdlibPrivate {
* I also handles the default value when `pop` is called
* on a dictionary, since that also does not depend on the key.
*/
class PopSummary extends SummarizedCallable {
class PopSummary extends SummarizedCallable::Range {
PopSummary() { this = "collection.pop" }
override DataFlow::CallCfgNode getACall() {
@@ -4655,7 +4655,7 @@ module StdlibPrivate {
}
/** A flow summary for `dict.pop` */
class DictPopSummary extends SummarizedCallable {
class DictPopSummary extends SummarizedCallable::Range {
string key;
DictPopSummary() {
@@ -4678,7 +4678,7 @@ module StdlibPrivate {
}
/** A flow summary for `dict.get` at specific content. */
class DictGetSummary extends SummarizedCallable {
class DictGetSummary extends SummarizedCallable::Range {
string key;
DictGetSummary() {
@@ -4706,7 +4706,7 @@ module StdlibPrivate {
}
/** A flow summary for `dict.get` disregarding content. */
class DictGetAnySummary extends SummarizedCallable {
class DictGetAnySummary extends SummarizedCallable::Range {
DictGetAnySummary() { this = "dict.get" }
override DataFlow::CallCfgNode getACall() {
@@ -4729,7 +4729,7 @@ module StdlibPrivate {
}
/** A flow summary for `dict.popitem` */
class DictPopitemSummary extends SummarizedCallable {
class DictPopitemSummary extends SummarizedCallable::Range {
DictPopitemSummary() { this = "dict.popitem" }
override DataFlow::CallCfgNode getACall() {
@@ -4753,7 +4753,7 @@ module StdlibPrivate {
*
* See https://docs.python.org/3.10/library/stdtypes.html#dict.setdefault
*/
class DictSetdefaultSummary extends SummarizedCallable {
class DictSetdefaultSummary extends SummarizedCallable::Range {
DictSetdefaultSummary() { this = "dict.setdefault" }
override DataFlow::CallCfgNode getACall() {
@@ -4778,7 +4778,7 @@ module StdlibPrivate {
* This summary handles read and store steps. See `DictSetdefaultSummary`
* for the dataflow steps.
*/
class DictSetdefaultKeySummary extends SummarizedCallable {
class DictSetdefaultKeySummary extends SummarizedCallable::Range {
string key;
DictSetdefaultKeySummary() {
@@ -4811,7 +4811,7 @@ module StdlibPrivate {
*
* See https://docs.python.org/3.10/library/stdtypes.html#dict.values
*/
class DictValues extends SummarizedCallable {
class DictValues extends SummarizedCallable::Range {
DictValues() { this = "dict.values" }
override DataFlow::CallCfgNode getACall() {
@@ -4840,7 +4840,7 @@ module StdlibPrivate {
*
* See https://docs.python.org/3.10/library/stdtypes.html#dict.keys
*/
class DictKeys extends SummarizedCallable {
class DictKeys extends SummarizedCallable::Range {
DictKeys() { this = "dict.keys" }
override DataFlow::CallCfgNode getACall() { result.(DataFlow::MethodCallNode).calls(_, "keys") }
@@ -4862,7 +4862,7 @@ module StdlibPrivate {
*
* See https://docs.python.org/3.10/library/stdtypes.html#dict.items
*/
class DictItems extends SummarizedCallable {
class DictItems extends SummarizedCallable::Range {
DictItems() { this = "dict.items" }
override DataFlow::CallCfgNode getACall() {
@@ -4892,7 +4892,7 @@ module StdlibPrivate {
*
* See https://docs.python.org/3.10/library/stdtypes.html#typesseq-mutable
*/
class ListAppend extends SummarizedCallable {
class ListAppend extends SummarizedCallable::Range {
ListAppend() { this = "list.append" }
override DataFlow::CallCfgNode getACall() {
@@ -4921,7 +4921,7 @@ module StdlibPrivate {
*
* See https://docs.python.org/3.10/library/stdtypes.html#frozenset.add
*/
class SetAdd extends SummarizedCallable {
class SetAdd extends SummarizedCallable::Range {
SetAdd() { this = "set.add" }
override DataFlow::CallCfgNode getACall() { result.(DataFlow::MethodCallNode).calls(_, "add") }
@@ -4948,7 +4948,7 @@ module StdlibPrivate {
*
* See https://devdocs.io/python~3.11/library/os#os.getenv
*/
class OsGetEnv extends SummarizedCallable {
class OsGetEnv extends SummarizedCallable::Range {
OsGetEnv() { this = "os.getenv" }
override DataFlow::CallCfgNode getACall() {

View File

@@ -33,12 +33,16 @@ private class ThreatModelSourceFromDataExtension extends ThreatModelSource::Rang
}
}
private class SummarizedCallableFromModel extends SummarizedCallable {
private class SummarizedCallableFromModel extends SummarizedCallable::Range {
string type;
string path;
string input_;
string output_;
string kind;
string model_;
SummarizedCallableFromModel() {
ModelOutput::relevantSummaryModel(type, path, _, _, _, _) and
ModelOutput::relevantSummaryModel(type, path, input_, output_, kind, model_) and
this = type + ";" + path
}
@@ -52,14 +56,13 @@ private class SummarizedCallableFromModel extends SummarizedCallable {
}
override predicate propagatesFlow(
string input, string output, boolean preservesValue, string model
string input, string output, boolean preservesValue, Provenance p, boolean isExact, string model
) {
exists(string kind | ModelOutput::relevantSummaryModel(type, path, input, output, kind, model) |
kind = "value" and
preservesValue = true
or
kind = "taint" and
preservesValue = false
)
input = input_ and
output = output_ and
(if kind = "value" then preservesValue = true else preservesValue = false) and
p = "manual" and
isExact = true and
model = model_
}
}

View File

@@ -10,7 +10,7 @@ private import semmle.python.ApiGraphs
module RecursionGuard {
private import semmle.python.dataflow.new.internal.TypeTrackingImpl::TypeTrackingInput as TT
private class RecursionGuard extends SummarizedCallable {
private class RecursionGuard extends SummarizedCallable::Range {
RecursionGuard() { this = "RecursionGuard" }
override DataFlow::CallCfgNode getACall() {
@@ -26,7 +26,7 @@ module RecursionGuard {
}
}
private class SummarizedCallableIdentity extends SummarizedCallable {
private class SummarizedCallableIdentity extends SummarizedCallable::Range {
SummarizedCallableIdentity() { this = "identity" }
override DataFlow::CallCfgNode getACall() {
@@ -43,7 +43,7 @@ private class SummarizedCallableIdentity extends SummarizedCallable {
}
// For lambda flow to work, implement lambdaCall and lambdaCreation
private class SummarizedCallableApplyLambda extends SummarizedCallable {
private class SummarizedCallableApplyLambda extends SummarizedCallable::Range {
SummarizedCallableApplyLambda() { this = "apply_lambda" }
override DataFlow::CallCfgNode getACall() {
@@ -63,7 +63,7 @@ private class SummarizedCallableApplyLambda extends SummarizedCallable {
}
}
private class SummarizedCallableReversed extends SummarizedCallable {
private class SummarizedCallableReversed extends SummarizedCallable::Range {
SummarizedCallableReversed() { this = "list_reversed" }
override DataFlow::CallCfgNode getACall() {
@@ -79,7 +79,7 @@ private class SummarizedCallableReversed extends SummarizedCallable {
}
}
private class SummarizedCallableMap extends SummarizedCallable {
private class SummarizedCallableMap extends SummarizedCallable::Range {
SummarizedCallableMap() { this = "list_map" }
override DataFlow::CallCfgNode getACall() {
@@ -99,7 +99,7 @@ private class SummarizedCallableMap extends SummarizedCallable {
}
}
private class SummarizedCallableAppend extends SummarizedCallable {
private class SummarizedCallableAppend extends SummarizedCallable::Range {
SummarizedCallableAppend() { this = "append_to_list" }
override DataFlow::CallCfgNode getACall() {
@@ -119,7 +119,7 @@ private class SummarizedCallableAppend extends SummarizedCallable {
}
}
private class SummarizedCallableJsonLoads extends SummarizedCallable {
private class SummarizedCallableJsonLoads extends SummarizedCallable::Range {
SummarizedCallableJsonLoads() { this = "json.loads" }
override DataFlow::CallCfgNode getACall() {
@@ -138,7 +138,7 @@ private class SummarizedCallableJsonLoads extends SummarizedCallable {
}
// Repeated summaries
private class SummarizedCallableWithSubpath extends SummarizedCallable {
private class SummarizedCallableWithSubpath extends SummarizedCallable::Range {
SummarizedCallableWithSubpath() { this = "extracted_package.functions.with_subpath" }
override DataFlow::CallCfgNode getACall() {
@@ -164,7 +164,7 @@ private class SummarizedCallableWithSubpath extends SummarizedCallable {
}
}
private class SummarizedCallableWithSubpathAgain extends SummarizedCallable {
private class SummarizedCallableWithSubpathAgain extends SummarizedCallable::Range {
SummarizedCallableWithSubpathAgain() { this = "extracted_package.functions.with_subpathII" }
override DataFlow::CallCfgNode getACall() {
@@ -190,7 +190,7 @@ private class SummarizedCallableWithSubpathAgain extends SummarizedCallable {
}
}
private class SummarizedCallableWithoutSubpath extends SummarizedCallable {
private class SummarizedCallableWithoutSubpath extends SummarizedCallable::Range {
SummarizedCallableWithoutSubpath() { this = "extracted_package.functions.without_subpath" }
override DataFlow::CallCfgNode getACall() {
@@ -216,7 +216,7 @@ private class SummarizedCallableWithoutSubpath extends SummarizedCallable {
}
}
private class SummarizedCallableWithoutSubpathAgain extends SummarizedCallable {
private class SummarizedCallableWithoutSubpathAgain extends SummarizedCallable::Range {
SummarizedCallableWithoutSubpathAgain() { this = "extracted_package.functions.without_subpathII" }
override DataFlow::CallCfgNode getACall() {

View File

@@ -10,7 +10,7 @@ private import semmle.python.ApiGraphs
module RecursionGuard {
private import semmle.python.dataflow.new.internal.TypeTrackingImpl::TypeTrackingInput as TT
private class RecursionGuard extends SummarizedCallable {
private class RecursionGuard extends SummarizedCallable::Range {
RecursionGuard() { this = "TypeTrackingSummariesRecursionGuard" }
override DataFlow::CallCfgNode getACall() {
@@ -32,7 +32,7 @@ module RecursionGuard {
}
}
private class SummarizedCallableIdentity extends SummarizedCallable {
private class SummarizedCallableIdentity extends SummarizedCallable::Range {
SummarizedCallableIdentity() { this = "TTS_identity" }
override DataFlow::CallCfgNode getACall() { none() }
@@ -51,7 +51,7 @@ private class SummarizedCallableIdentity extends SummarizedCallable {
}
// For lambda flow to work, implement lambdaCall and lambdaCreation
private class SummarizedCallableApplyLambda extends SummarizedCallable {
private class SummarizedCallableApplyLambda extends SummarizedCallable::Range {
SummarizedCallableApplyLambda() { this = "TTS_apply_lambda" }
override DataFlow::CallCfgNode getACall() { none() }
@@ -73,7 +73,7 @@ private class SummarizedCallableApplyLambda extends SummarizedCallable {
}
}
private class SummarizedCallableReversed extends SummarizedCallable {
private class SummarizedCallableReversed extends SummarizedCallable::Range {
SummarizedCallableReversed() { this = "TTS_reversed" }
override DataFlow::CallCfgNode getACall() { none() }
@@ -91,7 +91,7 @@ private class SummarizedCallableReversed extends SummarizedCallable {
}
}
private class SummarizedCallableMap extends SummarizedCallable {
private class SummarizedCallableMap extends SummarizedCallable::Range {
SummarizedCallableMap() { this = "TTS_list_map" }
override DataFlow::CallCfgNode getACall() { none() }
@@ -113,7 +113,7 @@ private class SummarizedCallableMap extends SummarizedCallable {
}
}
private class SummarizedCallableAppend extends SummarizedCallable {
private class SummarizedCallableAppend extends SummarizedCallable::Range {
SummarizedCallableAppend() { this = "TTS_append_to_list" }
override DataFlow::CallCfgNode getACall() { none() }
@@ -135,7 +135,7 @@ private class SummarizedCallableAppend extends SummarizedCallable {
}
}
private class SummarizedCallableJsonLoads extends SummarizedCallable {
private class SummarizedCallableJsonLoads extends SummarizedCallable::Range {
SummarizedCallableJsonLoads() { this = "TTS_json.loads" }
override DataFlow::CallCfgNode getACall() {
@@ -156,7 +156,7 @@ private class SummarizedCallableJsonLoads extends SummarizedCallable {
}
// read and store
private class SummarizedCallableReadSecret extends SummarizedCallable {
private class SummarizedCallableReadSecret extends SummarizedCallable::Range {
SummarizedCallableReadSecret() { this = "TTS_read_secret" }
override DataFlow::CallCfgNode getACall() { none() }
@@ -174,7 +174,7 @@ private class SummarizedCallableReadSecret extends SummarizedCallable {
}
}
private class SummarizedCallableSetSecret extends SummarizedCallable {
private class SummarizedCallableSetSecret extends SummarizedCallable::Range {
SummarizedCallableSetSecret() { this = "TTS_set_secret" }
override DataFlow::CallCfgNode getACall() { none() }