Move the definitions of isEffectiveSink and getAReasonSinkExcluded to the base class.

They can now be implemented generically for all sink types.
This commit is contained in:
tiferet
2022-11-16 11:47:24 -08:00
parent fc56c5a022
commit eab270eb84
11 changed files with 44 additions and 269 deletions

View File

@@ -6,7 +6,7 @@
private import javascript as JS
import EndpointTypes
import EndpointCharacteristics
import EndpointCharacteristics as EndpointCharacteristics
/**
* EXPERIMENTAL. This API may change in the future.
@@ -48,7 +48,7 @@ abstract class AtmConfig extends string {
final predicate isKnownSink(JS::DataFlow::Node sink) {
// If the list of characteristics includes positive indicators with maximal confidence for this class, then it's a
// known sink for the class.
exists(EndpointCharacteristic characteristic |
exists(EndpointCharacteristics::EndpointCharacteristic characteristic |
characteristic.getEndpoints(sink) and
characteristic
.getImplications(this.getASinkEndpointType(), true, characteristic.maximalConfidence())
@@ -69,7 +69,26 @@ abstract class AtmConfig extends string {
* Holds if the candidate sink `candidateSink` predicted by the machine learning model should be
* an effective sink, i.e. one considered as a possible sink of flow in the boosted query.
*/
predicate isEffectiveSink(JS::DataFlow::Node candidateSink) { none() }
final predicate isEffectiveSink(JS::DataFlow::Node candidateSink) {
not exists(getAReasonSinkExcluded(candidateSink))
}
final EndpointCharacteristics::EndpointCharacteristic getAReasonSinkExcluded(
JS::DataFlow::Node candidateSink
) {
// An endpoint is an effective sink if it has neither standard endpoint filter characteristics nor endpoint filter
// characteristics that are specific to this sink type.
exists(EndpointCharacteristics::StandardEndpointFilterCharacteristic standardFilter |
standardFilter.getEndpoints(candidateSink) and
result = standardFilter
)
or
exists(EndpointCharacteristics::EndpointFilterCharacteristic specificFilter |
specificFilter.getEndpoints(candidateSink) and
specificFilter.getImplications(getASinkEndpointType(), false, _) and
result = specificFilter
)
}
/**
* EXPERIMENTAL. This API may change in the future.

View File

@@ -11,81 +11,6 @@ import AdaptiveThreatModeling
private import CoreKnowledge as CoreKnowledge
private import StandardEndpointFilters as StandardEndpointFilters
module SinkEndpointFilter {
/**
* Provides a set of reasons why a given data flow node should be excluded as a sink candidate.
*
* If this predicate has no results for a sink candidate `n`, then we should treat `n` as an
* effective sink.
*/
string getAReasonSinkExcluded(DataFlow::Node sinkCandidate) {
result = StandardEndpointFilters::getAReasonSinkExcluded(sinkCandidate)
or
exists(DataFlow::CallNode call | sinkCandidate = call.getAnArgument() |
// additional databases accesses that aren't modeled yet
call.(DataFlow::MethodCallNode).getMethodName() =
["create", "createCollection", "createIndexes"] and
result = "matches database access call heuristic"
or
// Remove modeled sinks
CoreKnowledge::isArgumentToKnownLibrarySinkFunction(sinkCandidate) and
result = "modeled sink"
or
// Remove common kinds of unlikely sinks
CoreKnowledge::isKnownStepSrc(sinkCandidate) and
result = "predecessor in a modeled flow step"
or
// Remove modeled database calls. Arguments to modeled calls are very likely to be modeled
// as sinks if they are true positives. Therefore arguments that are not modeled as sinks
// are unlikely to be true positives.
call instanceof DatabaseAccess and
result = "modeled database access"
or
// Remove calls to APIs that aren't relevant to NoSQL injection
call.getReceiver() instanceof Http::RequestNode and
result = "receiver is a HTTP request expression"
or
call.getReceiver() instanceof Http::ResponseNode and
result = "receiver is a HTTP response expression"
)
or
// Require NoSQL injection sink candidates to be (a) direct arguments to external library calls
// or (b) heuristic sinks for NoSQL injection.
//
// ## Direct arguments to external library calls
//
// The `StandardEndpointFilters::flowsToArgumentOfLikelyExternalLibraryCall` endpoint filter
// allows sink candidates which are within object literals or array literals, for example
// `req.sendFile(_, { path: ENDPOINT })`.
//
// However, the NoSQL injection query deals differently with these types of sinks compared to
// other security queries. Other security queries such as SQL injection tend to treat
// `ENDPOINT` as the ground truth sink, but the NoSQL injection query instead treats
// `{ path: ENDPOINT }` as the ground truth sink and defines an additional flow step to ensure
// data flows from `ENDPOINT` to the ground truth sink `{ path: ENDPOINT }`.
//
// Therefore for the NoSQL injection boosted query, we must ignore sink candidates within object
// literals or array literals, to avoid having multiple alerts for the same security
// vulnerability (one FP where the sink is `ENDPOINT` and one TP where the sink is
// `{ path: ENDPOINT }`). We accomplish this by directly testing that the sink candidate is an
// argument of a likely external library call.
//
// ## Heuristic sinks
//
// We also allow heuristic sinks in addition to direct arguments to external library calls.
// These are copied from the `HeuristicNosqlInjectionSink` class defined within
// `codeql/javascript/ql/src/semmle/javascript/heuristics/AdditionalSinks.qll`.
// We can't reuse the class because importing that file would cause us to treat these
// heuristic sinks as known sinks.
not sinkCandidate = StandardEndpointFilters::getALikelyExternalLibraryCall().getAnArgument() and
not (
isAssignedToOrConcatenatedWith(sinkCandidate, "(?i)(nosql|query)") or
isArgTo(sinkCandidate, "(?i)(query)")
) and
result = "not a direct argument to a likely external library call or a heuristic sink"
}
}
class NosqlInjectionAtmConfig extends AtmConfig {
NosqlInjectionAtmConfig() { this = "NosqlInjectionATMConfig" }
@@ -93,10 +18,6 @@ class NosqlInjectionAtmConfig extends AtmConfig {
source instanceof NosqlInjection::Source or TaintedObject::isSource(source, _)
}
override predicate isEffectiveSink(DataFlow::Node sinkCandidate) {
not exists(SinkEndpointFilter::getAReasonSinkExcluded(sinkCandidate))
}
override EndpointType getASinkEndpointType() { result instanceof NosqlInjectionSinkType }
}

View File

@@ -10,65 +10,11 @@ import AdaptiveThreatModeling
import CoreKnowledge as CoreKnowledge
import StandardEndpointFilters as StandardEndpointFilters
/**
* This module provides logic to filter candidate sinks to those which are likely SQL injection
* sinks.
*/
module SinkEndpointFilter {
private import javascript
private import SQL
/**
* Provides a set of reasons why a given data flow node should be excluded as a sink candidate.
*
* If this predicate has no results for a sink candidate `n`, then we should treat `n` as an
* effective sink.
*/
string getAReasonSinkExcluded(DataFlow::Node sinkCandidate) {
result = StandardEndpointFilters::getAReasonSinkExcluded(sinkCandidate)
or
exists(DataFlow::CallNode call | sinkCandidate = call.getAnArgument() |
// prepared statements for SQL
any(DataFlow::CallNode cn | cn.getCalleeName() = "prepare")
.getAMethodCall("run")
.getAnArgument() = sinkCandidate and
result = "prepared SQL statement"
or
sinkCandidate instanceof DataFlow::ArrayCreationNode and
result = "array creation"
or
// UI is unrelated to SQL
call.getCalleeName().regexpMatch("(?i).*(render|html).*") and
result = "HTML / rendering"
)
or
// Require SQL injection sink candidates to be (a) arguments to external library calls
// (possibly indirectly), or (b) heuristic sinks.
//
// Heuristic sinks are copied from the `HeuristicSqlInjectionSink` class defined within
// `codeql/javascript/ql/src/semmle/javascript/heuristics/AdditionalSinks.qll`.
// We can't reuse the class because importing that file would cause us to treat these
// heuristic sinks as known sinks.
not StandardEndpointFilters::flowsToArgumentOfLikelyExternalLibraryCall(sinkCandidate) and
not (
isAssignedToOrConcatenatedWith(sinkCandidate, "(?i)(sql|query)") or
isArgTo(sinkCandidate, "(?i)(query)") or
isConcatenatedWithString(sinkCandidate,
"(?s).*(ALTER|COUNT|CREATE|DATABASE|DELETE|DISTINCT|DROP|FROM|GROUP|INSERT|INTO|LIMIT|ORDER|SELECT|TABLE|UPDATE|WHERE).*")
) and
result = "not an argument to a likely external library call or a heuristic sink"
}
}
class SqlInjectionAtmConfig extends AtmConfig {
SqlInjectionAtmConfig() { this = "SqlInjectionATMConfig" }
override predicate isKnownSource(DataFlow::Node source) { source instanceof SqlInjection::Source }
override predicate isEffectiveSink(DataFlow::Node sinkCandidate) {
not exists(SinkEndpointFilter::getAReasonSinkExcluded(sinkCandidate))
}
override EndpointType getASinkEndpointType() { result instanceof SqlInjectionSinkType }
}

View File

@@ -12,14 +12,6 @@ private import semmle.javascript.heuristics.SyntacticHeuristics
private import CoreKnowledge as CoreKnowledge
import EndpointCharacteristics as EndpointCharacteristics
/** Provides a set of reasons why a given data flow node should be excluded as a sink candidate. */
string getAReasonSinkExcluded(DataFlow::Node n) {
exists(EndpointCharacteristics::StandardEndpointFilterCharacteristic characteristic |
characteristic.getEndpoints(n) and
result = characteristic
)
}
/**
* Holds if the node `n` is an argument to a function that has a manual model.
*/
@@ -77,7 +69,7 @@ private DataFlow::SourceNode getACallback(DataFlow::ParameterNode p, DataFlow::T
* Get calls for which we do not have the callee (i.e. the definition of the called function). This
* acts as a heuristic for identifying calls to external library functions.
*/
private DataFlow::CallNode getACallWithoutCallee() {
DataFlow::CallNode getACallWithoutCallee() {
forall(Function callee | callee = result.getACallee() | callee.getTopLevel().isExterns()) and
not exists(DataFlow::ParameterNode param, DataFlow::FunctionNode callback |
param.flowsTo(result.getCalleeNode()) and

View File

@@ -10,64 +10,11 @@ import AdaptiveThreatModeling
import CoreKnowledge as CoreKnowledge
import StandardEndpointFilters as StandardEndpointFilters
/**
* This module provides logic to filter candidate sinks to those which are likely path injection
* sinks.
*/
module SinkEndpointFilter {
private import javascript
private import TaintedPath
/**
* Provides a set of reasons why a given data flow node should be excluded as a sink candidate.
*
* If this predicate has no results for a sink candidate `n`, then we should treat `n` as an
* effective sink.
*/
string getAReasonSinkExcluded(DataFlow::Node sinkCandidate) {
result = StandardEndpointFilters::getAReasonSinkExcluded(sinkCandidate)
or
// Require path injection sink candidates to be (a) arguments to external library calls
// (possibly indirectly), or (b) heuristic sinks.
//
// Heuristic sinks are mostly copied from the `HeuristicTaintedPathSink` class defined within
// `codeql/javascript/ql/src/semmle/javascript/heuristics/AdditionalSinks.qll`.
// We can't reuse the class because importing that file would cause us to treat these
// heuristic sinks as known sinks.
not StandardEndpointFilters::flowsToArgumentOfLikelyExternalLibraryCall(sinkCandidate) and
not (
isAssignedToOrConcatenatedWith(sinkCandidate, "(?i)(file|folder|dir|absolute)")
or
isArgTo(sinkCandidate, "(?i)(get|read)file")
or
exists(string pathPattern |
// paths with at least two parts, and either a trailing or leading slash
pathPattern = "(?i)([a-z0-9_.-]+/){2,}" or
pathPattern = "(?i)(/[a-z0-9_.-]+){2,}"
|
isConcatenatedWithString(sinkCandidate, pathPattern)
)
or
isConcatenatedWithStrings(".*/", sinkCandidate, "/.*")
or
// In addition to the names from `HeuristicTaintedPathSink` in the
// `isAssignedToOrConcatenatedWith` predicate call above, we also allow the noisier "path"
// name.
isAssignedToOrConcatenatedWith(sinkCandidate, "(?i)path")
) and
result = "not a direct argument to a likely external library call or a heuristic sink"
}
}
class TaintedPathAtmConfig extends AtmConfig {
TaintedPathAtmConfig() { this = "TaintedPathATMConfig" }
override predicate isKnownSource(DataFlow::Node source) { source instanceof TaintedPath::Source }
override predicate isEffectiveSink(DataFlow::Node sinkCandidate) {
not exists(SinkEndpointFilter::getAReasonSinkExcluded(sinkCandidate))
}
override EndpointType getASinkEndpointType() { result instanceof TaintedPathSinkType }
}

View File

@@ -10,65 +10,11 @@ import AdaptiveThreatModeling
import CoreKnowledge as CoreKnowledge
import StandardEndpointFilters as StandardEndpointFilters
/**
* This module provides logic to filter candidate sinks to those which are likely XSS sinks.
*/
module SinkEndpointFilter {
private import javascript
private import DomBasedXss
/**
* Provides a set of reasons why a given data flow node should be excluded as a sink candidate.
*
* If this predicate has no results for a sink candidate `n`, then we should treat `n` as an
* effective sink.
*/
string getAReasonSinkExcluded(DataFlow::Node sinkCandidate) {
result = StandardEndpointFilters::getAReasonSinkExcluded(sinkCandidate)
or
exists(DataFlow::CallNode call | sinkCandidate = call.getAnArgument() |
call.getCalleeName() = "setState"
) and
result = "setState calls ought to be safe in react applications"
or
// Require XSS sink candidates to be (a) arguments to external library calls (possibly
// indirectly), or (b) heuristic sinks.
//
// Heuristic sinks are copied from the `HeuristicDomBasedXssSink` class defined within
// `codeql/javascript/ql/src/semmle/javascript/heuristics/AdditionalSinks.qll`.
// We can't reuse the class because importing that file would cause us to treat these
// heuristic sinks as known sinks.
not StandardEndpointFilters::flowsToArgumentOfLikelyExternalLibraryCall(sinkCandidate) and
not (
isAssignedToOrConcatenatedWith(sinkCandidate, "(?i)(html|innerhtml)")
or
isArgTo(sinkCandidate, "(?i)(html|render)")
or
sinkCandidate instanceof StringOps::HtmlConcatenationLeaf
or
isConcatenatedWithStrings("(?is).*<[a-z ]+.*", sinkCandidate, "(?s).*>.*")
or
// In addition to the heuristic sinks from `HeuristicDomBasedXssSink`, explicitly allow
// property writes like `elem.innerHTML = <TAINT>` that may not be picked up as HTML
// concatenation leaves.
exists(DataFlow::PropWrite pw |
pw.getPropertyName().regexpMatch("(?i).*html*") and
pw.getRhs() = sinkCandidate
)
) and
result = "not a direct argument to a likely external library call or a heuristic sink"
}
}
class DomBasedXssAtmConfig extends AtmConfig {
DomBasedXssAtmConfig() { this = "DomBasedXssATMConfig" }
override predicate isKnownSource(DataFlow::Node source) { source instanceof DomBasedXss::Source }
override predicate isEffectiveSink(DataFlow::Node sinkCandidate) {
not exists(SinkEndpointFilter::getAReasonSinkExcluded(sinkCandidate))
}
override EndpointType getASinkEndpointType() { result instanceof XssSinkType }
}

View File

@@ -12,19 +12,23 @@
import javascript
import experimental.adaptivethreatmodeling.ATMConfig
import extraction.ExtractEndpointDataTraining
private import experimental.adaptivethreatmodeling.NosqlInjectionATM as NosqlInjectionAtm
private import experimental.adaptivethreatmodeling.SqlInjectionATM as SqlInjectionAtm
private import experimental.adaptivethreatmodeling.TaintedPathATM as TaintedPathAtm
private import experimental.adaptivethreatmodeling.XssATM as XssAtm
string getAReasonSinkExcluded(DataFlow::Node sinkCandidate, Query query) {
query instanceof NosqlInjectionQuery and
result = NosqlInjectionAtm::SinkEndpointFilter::getAReasonSinkExcluded(sinkCandidate)
result = any(NosqlInjectionAtm::NosqlInjectionAtmConfig cfg).getAReasonSinkExcluded(sinkCandidate)
or
query instanceof SqlInjectionQuery and
result = SqlInjectionAtm::SinkEndpointFilter::getAReasonSinkExcluded(sinkCandidate)
result = any(SqlInjectionAtm::SqlInjectionAtmConfig cfg).getAReasonSinkExcluded(sinkCandidate)
or
query instanceof TaintedPathQuery and
result = TaintedPathAtm::SinkEndpointFilter::getAReasonSinkExcluded(sinkCandidate)
result = any(TaintedPathAtm::TaintedPathAtmConfig cfg).getAReasonSinkExcluded(sinkCandidate)
or
query instanceof XssQuery and
result = XssAtm::SinkEndpointFilter::getAReasonSinkExcluded(sinkCandidate)
result = any(XssAtm::DomBasedXssAtmConfig cfg).getAReasonSinkExcluded(sinkCandidate)
}
pragma[inline]

View File

@@ -10,10 +10,10 @@ import experimental.adaptivethreatmodeling.EndpointFeatures as EndpointFeatures
import NoFeaturizationRestrictionsConfig
private import Exclusions as Exclusions
import Queries
import experimental.adaptivethreatmodeling.NosqlInjectionATM as NosqlInjectionAtm
import experimental.adaptivethreatmodeling.SqlInjectionATM as SqlInjectionAtm
import experimental.adaptivethreatmodeling.TaintedPathATM as TaintedPathAtm
import experimental.adaptivethreatmodeling.XssATM as XssAtm
private import experimental.adaptivethreatmodeling.NosqlInjectionATM as NosqlInjectionAtm
private import experimental.adaptivethreatmodeling.SqlInjectionATM as SqlInjectionAtm
private import experimental.adaptivethreatmodeling.TaintedPathATM as TaintedPathAtm
private import experimental.adaptivethreatmodeling.XssATM as XssAtm
/**
* Gets the set of featureName-featureValue pairs for each endpoint in the training set.

View File

@@ -17,10 +17,10 @@ import extraction.NoFeaturizationRestrictionsConfig
query predicate tokenFeatures(DataFlow::Node endpoint, string featureName, string featureValue) {
(
not exists(NosqlInjectionAtm::SinkEndpointFilter::getAReasonSinkExcluded(endpoint)) or
not exists(SqlInjectionAtm::SinkEndpointFilter::getAReasonSinkExcluded(endpoint)) or
not exists(TaintedPathAtm::SinkEndpointFilter::getAReasonSinkExcluded(endpoint)) or
not exists(XssAtm::SinkEndpointFilter::getAReasonSinkExcluded(endpoint)) or
not exists(any(NosqlInjectionAtm::NosqlInjectionAtmConfig cfg).getAReasonSinkExcluded(endpoint)) or
not exists(any(SqlInjectionAtm::SqlInjectionAtmConfig cfg).getAReasonSinkExcluded(endpoint)) or
not exists(any(TaintedPathAtm::TaintedPathAtmConfig cfg).getAReasonSinkExcluded(endpoint)) or
not exists(any(XssAtm::DomBasedXssAtmConfig cfg).getAReasonSinkExcluded(endpoint)) or
StandardEndpointFilters::isArgumentToModeledFunction(endpoint)
) and
EndpointFeatures::tokenFeatures(endpoint, featureName, featureValue)

View File

@@ -24,24 +24,24 @@ import experimental.adaptivethreatmodeling.XssATM as XssAtm
query predicate nosqlFilteredTruePositives(DataFlow::Node endpoint, string reason) {
endpoint instanceof NosqlInjection::Sink and
reason = NosqlInjectionAtm::SinkEndpointFilter::getAReasonSinkExcluded(endpoint) and
reason = any(NosqlInjectionAtm::NosqlInjectionAtmConfig cfg).getAReasonSinkExcluded(endpoint) and
not reason = ["argument to modeled function", "modeled sink", "modeled database access"]
}
query predicate sqlFilteredTruePositives(DataFlow::Node endpoint, string reason) {
endpoint instanceof SqlInjection::Sink and
reason = SqlInjectionAtm::SinkEndpointFilter::getAReasonSinkExcluded(endpoint) and
reason = any(SqlInjectionAtm::SqlInjectionAtmConfig cfg).getAReasonSinkExcluded(endpoint) and
reason != "argument to modeled function"
}
query predicate taintedPathFilteredTruePositives(DataFlow::Node endpoint, string reason) {
endpoint instanceof TaintedPath::Sink and
reason = TaintedPathAtm::SinkEndpointFilter::getAReasonSinkExcluded(endpoint) and
reason = any(TaintedPathAtm::TaintedPathAtmConfig cfg).getAReasonSinkExcluded(endpoint) and
reason != "argument to modeled function"
}
query predicate xssFilteredTruePositives(DataFlow::Node endpoint, string reason) {
endpoint instanceof DomBasedXss::Sink and
reason = XssAtm::SinkEndpointFilter::getAReasonSinkExcluded(endpoint) and
reason = any(XssAtm::DomBasedXssAtmConfig cfg).getAReasonSinkExcluded(endpoint) and
reason != "argument to modeled function"
}

View File

@@ -2,5 +2,5 @@ import javascript
import experimental.adaptivethreatmodeling.NosqlInjectionATM as NosqlInjectionAtm
query predicate effectiveSinks(DataFlow::Node node) {
not exists(NosqlInjectionAtm::SinkEndpointFilter::getAReasonSinkExcluded(node))
not exists(any(NosqlInjectionAtm::NosqlInjectionAtmConfig cfg).getAReasonSinkExcluded(node))
}