Merge pull request #19445 from asgerf/js/summaries-with-fallback

JS: Generate flow summaries from summaryModels; only generate steps as a fallback
This commit is contained in:
Asger F
2025-05-13 14:49:38 +02:00
committed by GitHub
7 changed files with 201 additions and 51 deletions

View File

@@ -264,3 +264,7 @@ module Stage {
cached
predicate backref() { optionalStep(_, _, _) }
}
predicate unsupportedCallable = Private::unsupportedCallable/1;
predicate unsupportedCallable = Private::unsupportedCallable/4;

View File

@@ -19,6 +19,7 @@
private import javascript
private import internal.ApiGraphModels as Shared
private import internal.ApiGraphModelsSpecific as Specific
private import semmle.javascript.dataflow.internal.FlowSummaryPrivate
private import semmle.javascript.endpoints.EndpointNaming as EndpointNaming
import Shared::ModelInput as ModelInput
import Shared::ModelOutput as ModelOutput
@@ -45,12 +46,94 @@ private class ThreatModelSourceFromDataExtension extends ThreatModelSource::Rang
}
}
private class SummarizedCallableFromModel extends DataFlow::SummarizedCallable {
string type;
string path;
SummarizedCallableFromModel() {
ModelOutput::relevantSummaryModel(type, path, _, _, _, _) and
this = type + ";" + path
}
override DataFlow::InvokeNode getACall() { ModelOutput::resolvedSummaryBase(type, path, result) }
override predicate propagatesFlow(
string input, string output, boolean preservesValue, string model
) {
exists(string kind | ModelOutput::relevantSummaryModel(type, path, input, output, kind, model) |
kind = "value" and
preservesValue = true
or
kind = "taint" and
preservesValue = false
)
}
predicate hasTypeAndPath(string type_, string path_) { type = type_ and path = path_ }
predicate isUnsupportedByFlowSummaries() { unsupportedCallable(this) }
}
private predicate shouldInduceStepsFromSummary(string type, string path) {
exists(SummarizedCallableFromModel callable |
callable.isUnsupportedByFlowSummaries() and
callable.hasTypeAndPath(type, path)
)
}
/**
* Holds if `path` is an input or output spec for a summary with the given `base` node.
*/
pragma[nomagic]
private predicate relevantInputOutputPath(API::InvokeNode base, AccessPath inputOrOutput) {
exists(string type, string input, string output, string path |
// If the summary for 'callable' could not be handled as a flow summary, we need to evaluate
// its inputs and outputs to a set of nodes, so we can generate steps instead.
shouldInduceStepsFromSummary(type, path) and
ModelOutput::resolvedSummaryBase(type, path, base) and
ModelOutput::relevantSummaryModel(type, path, input, output, _, _) and
inputOrOutput = [input, output]
)
}
/**
* Gets the API node for the first `n` tokens of the given input/output path, evaluated relative to `baseNode`.
*/
private API::Node getNodeFromInputOutputPath(API::InvokeNode baseNode, AccessPath path, int n) {
relevantInputOutputPath(baseNode, path) and
(
n = 1 and
result = Shared::getSuccessorFromInvoke(baseNode, path.getToken(0))
or
result =
Shared::getSuccessorFromNode(getNodeFromInputOutputPath(baseNode, path, n - 1),
path.getToken(n - 1))
)
}
/**
* Gets the API node for the given input/output path, evaluated relative to `baseNode`.
*/
private API::Node getNodeFromInputOutputPath(API::InvokeNode baseNode, AccessPath path) {
result = getNodeFromInputOutputPath(baseNode, path, path.getNumToken())
}
private predicate summaryStep(API::Node pred, API::Node succ, string kind) {
exists(string type, string path, API::InvokeNode base, AccessPath input, AccessPath output |
shouldInduceStepsFromSummary(type, path) and
ModelOutput::relevantSummaryModel(type, path, input, output, kind, _) and
ModelOutput::resolvedSummaryBase(type, path, base) and
pred = getNodeFromInputOutputPath(base, input) and
succ = getNodeFromInputOutputPath(base, output)
)
}
/**
* Like `ModelOutput::summaryStep` but with API nodes mapped to data-flow nodes.
*/
private predicate summaryStepNodes(DataFlow::Node pred, DataFlow::Node succ, string kind) {
exists(API::Node predNode, API::Node succNode |
Specific::summaryStep(predNode, succNode, kind) and
summaryStep(predNode, succNode, kind) and
pred = predNode.asSink() and
succ = succNode.asSource()
)

View File

@@ -272,51 +272,6 @@ predicate invocationMatchesExtraCallSiteFilter(API::InvokeNode invoke, AccessPat
)
}
/**
* Holds if `path` is an input or output spec for a summary with the given `base` node.
*/
pragma[nomagic]
private predicate relevantInputOutputPath(API::InvokeNode base, AccessPath inputOrOutput) {
exists(string type, string input, string output, string path |
ModelOutput::relevantSummaryModel(type, path, input, output, _, _) and
ModelOutput::resolvedSummaryBase(type, path, base) and
inputOrOutput = [input, output]
)
}
/**
* Gets the API node for the first `n` tokens of the given input/output path, evaluated relative to `baseNode`.
*/
private API::Node getNodeFromInputOutputPath(API::InvokeNode baseNode, AccessPath path, int n) {
relevantInputOutputPath(baseNode, path) and
(
n = 1 and
result = getSuccessorFromInvoke(baseNode, path.getToken(0))
or
result =
getSuccessorFromNode(getNodeFromInputOutputPath(baseNode, path, n - 1), path.getToken(n - 1))
)
}
/**
* Gets the API node for the given input/output path, evaluated relative to `baseNode`.
*/
private API::Node getNodeFromInputOutputPath(API::InvokeNode baseNode, AccessPath path) {
result = getNodeFromInputOutputPath(baseNode, path, path.getNumToken())
}
/**
* Holds if a CSV summary contributed the step `pred -> succ` of the given `kind`.
*/
predicate summaryStep(API::Node pred, API::Node succ, string kind) {
exists(string type, string path, API::InvokeNode base, AccessPath input, AccessPath output |
ModelOutput::relevantSummaryModel(type, path, input, output, kind, _) and
ModelOutput::resolvedSummaryBase(type, path, base) and
pred = getNodeFromInputOutputPath(base, input) and
succ = getNodeFromInputOutputPath(base, output)
)
}
class InvokeNode = API::InvokeNode;
/** Gets an `InvokeNode` corresponding to an invocation of `node`. */

View File

@@ -39,7 +39,7 @@ function strToStr() {
}
function strToArray() {
sink(s.chop(source("s1"), 3)); // $ MISSING: hasTaintFlow=s1
sink(s.chop(source("s1"), 3)); // $ hasTaintFlow=s1
sink(s.chars(source("s2"))[0]); // $ hasTaintFlow=s2
sink(s.words(source("s3"))[0]); // $ hasTaintFlow=s3
sink(s.lines(source("s7"))[0]); // $ hasTaintFlow=s7
@@ -97,7 +97,7 @@ function multiSource() {
function chaining() {
sink(s(source("s1"))
.slugify().capitalize().decapitalize().clean().cleanDiacritics()
.slugify().capitalize().decapitalize().clean().cleanDiacritics()
.swapCase().escapeHTML().unescapeHTML().wrap().dedent()
.reverse().pred().succ().titleize().camelize().classify()
.underscored().dasherize().humanize().trim().ltrim().rtrim()
@@ -119,8 +119,8 @@ function chaining() {
.q(source("s17")).ljust(10, source("s18"))
.rjust(10, source("s19"))); // $ hasTaintFlow=s16 hasTaintFlow=s17 hasTaintFlow=s18 hasTaintFlow=s19
sink(s(source("s20")).tap(function(value) {
return value + source("s21");
sink(s(source("s20")).tap(function(value) {
return value + source("s21");
}).value()); // $ hasTaintFlow=s20 hasTaintFlow=s21
}

View File

@@ -1,4 +1,21 @@
legacyDataFlowDifference
| test.js:5:30:5:37 | source() | test.js:5:8:5:38 | testlib ... urce()) | only flow with NEW data flow library |
| test.js:6:22:6:29 | source() | test.js:6:8:6:30 | preserv ... urce()) | only flow with NEW data flow library |
| test.js:7:41:7:48 | source() | test.js:7:8:7:49 | require ... urce()) | only flow with NEW data flow library |
| test.js:11:38:11:45 | source() | test.js:11:8:11:55 | testlib ... , 1, 1) | only flow with NEW data flow library |
| test.js:13:44:13:51 | source() | test.js:13:8:13:55 | testlib ... e(), 1) | only flow with NEW data flow library |
| test.js:17:47:17:54 | source() | test.js:17:8:17:61 | testlib ... , 1, 1) | only flow with NEW data flow library |
| test.js:18:50:18:57 | source() | test.js:18:8:18:61 | testlib ... e(), 1) | only flow with NEW data flow library |
| test.js:19:53:19:60 | source() | test.js:19:8:19:61 | testlib ... urce()) | only flow with NEW data flow library |
| test.js:21:34:21:41 | source() | test.js:21:8:21:51 | testlib ... , 1, 1) | only flow with NEW data flow library |
| test.js:22:37:22:44 | source() | test.js:22:8:22:51 | testlib ... , 1, 1) | only flow with NEW data flow library |
| test.js:23:40:23:47 | source() | test.js:23:8:23:51 | testlib ... e(), 1) | only flow with NEW data flow library |
| test.js:24:43:24:50 | source() | test.js:24:8:24:51 | testlib ... urce()) | only flow with NEW data flow library |
| test.js:31:29:31:36 | source() | test.js:32:10:32:10 | y | only flow with NEW data flow library |
| test.js:37:29:37:36 | source() | test.js:38:10:38:10 | y | only flow with NEW data flow library |
| test.js:43:29:43:36 | source() | test.js:44:10:44:10 | y | only flow with NEW data flow library |
| test.js:47:33:47:40 | source() | test.js:49:10:49:13 | this | only flow with NEW data flow library |
| test.js:102:16:102:34 | testlib.getSource() | test.js:104:8:104:24 | source.continue() | only flow with NEW data flow library |
consistencyIssue
taintFlow
| guardedRouteHandler.js:6:10:6:28 | req.injectedReqData | guardedRouteHandler.js:6:10:6:28 | req.injectedReqData |

View File

@@ -78,7 +78,9 @@ edges
| ReflectedXss.js:22:19:22:26 | req.body | ReflectedXss.js:22:12:22:27 | marked(req.body) | provenance | |
| ReflectedXss.js:29:7:32:4 | mytable | ReflectedXss.js:33:12:33:18 | mytable | provenance | |
| ReflectedXss.js:29:17:32:4 | table([ ... ce\\n ]) | ReflectedXss.js:29:7:32:4 | mytable | provenance | |
| ReflectedXss.js:31:14:31:21 | req.body | ReflectedXss.js:29:17:32:4 | table([ ... ce\\n ]) | provenance | |
| ReflectedXss.js:29:23:32:3 | [\\n [ ... rce\\n ] [1, 1] | ReflectedXss.js:29:17:32:4 | table([ ... ce\\n ]) | provenance | |
| ReflectedXss.js:31:5:31:22 | ['body', req.body] [1] | ReflectedXss.js:29:23:32:3 | [\\n [ ... rce\\n ] [1, 1] | provenance | |
| ReflectedXss.js:31:14:31:21 | req.body | ReflectedXss.js:31:5:31:22 | ['body', req.body] [1] | provenance | |
| ReflectedXss.js:41:31:41:38 | req.body | ReflectedXss.js:41:12:41:39 | convert ... q.body) | provenance | |
| ReflectedXss.js:63:14:63:21 | req.body | ReflectedXss.js:63:39:63:42 | file | provenance | |
| ReflectedXss.js:63:39:63:42 | file | ReflectedXss.js:64:16:64:19 | file | provenance | |
@@ -253,6 +255,8 @@ nodes
| ReflectedXss.js:28:12:28:19 | req.body | semmle.label | req.body |
| ReflectedXss.js:29:7:32:4 | mytable | semmle.label | mytable |
| ReflectedXss.js:29:17:32:4 | table([ ... ce\\n ]) | semmle.label | table([ ... ce\\n ]) |
| ReflectedXss.js:29:23:32:3 | [\\n [ ... rce\\n ] [1, 1] | semmle.label | [\\n [ ... rce\\n ] [1, 1] |
| ReflectedXss.js:31:5:31:22 | ['body', req.body] [1] | semmle.label | ['body', req.body] [1] |
| ReflectedXss.js:31:14:31:21 | req.body | semmle.label | req.body |
| ReflectedXss.js:33:12:33:18 | mytable | semmle.label | mytable |
| ReflectedXss.js:40:12:40:19 | req.body | semmle.label | req.body |

View File

@@ -530,6 +530,93 @@ module Make<
}
}
private predicate isNonLocalSummaryComponent(SummaryComponent c) {
c instanceof TArgumentSummaryComponent or
c instanceof TParameterSummaryComponent or
c instanceof TReturnSummaryComponent
}
private predicate isLocalSummaryComponent(SummaryComponent c) {
not isNonLocalSummaryComponent(c)
}
/**
* Holds if `s` is a valid input stack, in the sense that we generate a data flow graph
* that faithfully represents this flow, and lambda-tracking can be expected to track
* lambdas to the relevant callbacks in practice.
*/
private predicate isSupportedInputStack(SummaryComponentStack s) {
// Argument[n].*
s.length() = 1 and
s.head() instanceof TArgumentSummaryComponent
or
// Argument[n].ReturnValue.*
s.length() = 2 and
s.head() instanceof TReturnSummaryComponent and
s.tail().head() instanceof TArgumentSummaryComponent
or
// Argument[n].Parameter[n].Content.*
s.length() = 3 and
s.head() instanceof TContentSummaryComponent and
s.tail().head() instanceof TParameterSummaryComponent and
s.drop(2).head() instanceof TArgumentSummaryComponent
or
isSupportedInputStack(s.tail()) and
isLocalSummaryComponent(s.head())
}
private predicate isSupportedOutputStack1(SummaryComponentStack s) {
// ReturnValue.*
s.length() = 1 and
s.head() instanceof TReturnSummaryComponent
or
// Argument[n].Content.*
s.length() = 2 and
s.head() instanceof TContentSummaryComponent and
s.tail().head() instanceof TArgumentSummaryComponent
or
// Argument[n].Parameter[n].*
s.length() = 2 and
s.head() instanceof TParameterSummaryComponent and
s.tail().head() instanceof TArgumentSummaryComponent
or
isSupportedOutputStack1(s.tail()) and
isLocalSummaryComponent(s.head())
}
/** Like `isSupportedInputStack` but for output stacks. */
private predicate isSupportedOutputStack(SummaryComponentStack s) {
isSupportedOutputStack1(s)
or
// `Argument[n]` not followed by anything. Needs to be outside the recursion.
s.length() = 1 and
s.head() instanceof TArgumentSummaryComponent
}
/**
* Holds if `callable` has an unsupported flow `input -> output`.
*
* `whichOne` indicates if the `input` or `output` contains the unsupported sequence.
*/
predicate unsupportedCallable(
SummarizedCallableImpl callable, SummaryComponentStack input, SummaryComponentStack output,
string whichOne
) {
callable.propagatesFlow(input, output, _, _) and
(
not isSupportedInputStack(input) and whichOne = "input"
or
not isSupportedOutputStack(output) and whichOne = "output"
)
}
/**
* Holds if `callable` has an unsupported flow.
*/
predicate unsupportedCallable(SummarizedCallableImpl callable) {
unsupportedCallable(callable, _, _, _)
}
private predicate summarySpec(string spec) {
exists(SummarizedCallable c |
c.propagatesFlow(spec, _, _, _)