mirror of
https://github.com/github/codeql.git
synced 2026-05-24 16:17:07 +02:00
Simplify AtmConfig:
- We no longer create new configs for each query we want to boost with ATM. - Instead the `AtmConfig` module imports the configs for the Java queries it can and copies the configs for the ones that are defined in a ql file. - The predicates that used to be defined in the `AtmConfig` class are now defined either in candidate extraction query or(in the case of `isKnownSink` which is used in more than one file) in `EndpointCharacteristic.qll`. - Delete all the derived classes of AtmConfig. - Surface all candidates that pass the endpoint filters, regardless of flow from a source.
This commit is contained in:
@@ -1,162 +1,89 @@
|
||||
/**
|
||||
* For internal use only.
|
||||
*
|
||||
* Configures boosting for adaptive threat modeling (ATM).
|
||||
* Collects the query configurations to boost with ATM. Imports the configurations of supported Java queries where
|
||||
* possible. Java queries that are defined in a `.ql` file get copied into this file.
|
||||
*/
|
||||
|
||||
private import java as java
|
||||
private import semmle.code.java.dataflow.TaintTracking
|
||||
import semmle.code.java.security.RequestForgeryConfig
|
||||
import semmle.code.java.security.SqlInjectionQuery
|
||||
import EndpointTypes
|
||||
import EndpointCharacteristics as EndpointCharacteristics
|
||||
/* Copied from java/ql/src/Security/CWE/CWE-022/TaintedPath.ql */
|
||||
private import semmle.code.java.dataflow.ExternalFlow
|
||||
private import semmle.code.java.security.PathCreation
|
||||
private import semmle.code.java.security.PathSanitizer
|
||||
|
||||
/**
|
||||
* EXPERIMENTAL. This API may change in the future.
|
||||
*
|
||||
* A configuration class for defining known endpoints and endpoint filters for adaptive threat
|
||||
* modeling (ATM). Each boosted query must define its own extension of this abstract class.
|
||||
*
|
||||
* A configuration defines a set of known sources (`isKnownSource`) and sinks (`isKnownSink`).
|
||||
* It must also define a sink endpoint filter (`isEffectiveSink`) that filters candidate sinks
|
||||
* predicted by the machine learning model to a set of effective sinks.
|
||||
*
|
||||
* To get started with ATM, you can copy-paste an implementation of the relevant predicates from a
|
||||
* `DataFlow::Configuration` or `TaintTracking::Configuration` class for a standard security query.
|
||||
* For example, for SQL injection you can start by defining the `isKnownSource` and `isKnownSink`
|
||||
* predicates in the ATM configuration by copying and pasting the implementations of `isSource` and
|
||||
* `isSink` from `SqlInjection::Configuration`.
|
||||
*
|
||||
* Note that if the security query configuration defines additional edges beyond the standard data
|
||||
* flow edges, such as `NosqlInjection::Configuration`, you may need to replace the definition of
|
||||
* `isAdditionalFlowStep` with a more generalised definition of additional edges. See
|
||||
* `NosqlInjectionATM.qll` for an example of doing this.
|
||||
/*
|
||||
* Configurations that are copied from Java queries because they can't be directly imported.
|
||||
*/
|
||||
abstract class AtmConfig extends TaintTracking::Configuration {
|
||||
bindingset[this]
|
||||
AtmConfig() { any() }
|
||||
|
||||
/**
|
||||
* Holds if `source` is a relevant taint source. When sources are not boosted, `isSource` is equivalent to
|
||||
* `isKnownSource` (i.e there are no "effective" sources to be classified by an ML model).
|
||||
*/
|
||||
override predicate isSource(DataFlow::Node source) { this.isKnownSource(source) }
|
||||
/* TaintedPathConfig cannot be imported directly since it is defined in a .ql file. It is therefore copied here. */
|
||||
/* Copied from java/ql/src/Security/CWE/CWE-022/TaintedPath.ql */
|
||||
class TaintedPathConfig extends TaintTracking::Configuration {
|
||||
TaintedPathConfig() { this = "TaintedPathConfig" }
|
||||
|
||||
override predicate isSource(DataFlow::Node source) { source instanceof RemoteFlowSource }
|
||||
|
||||
/**
|
||||
* Holds if `sink` is a known taint sink or an "effective" sink (a candidate to be classified by an ML model).
|
||||
*/
|
||||
override predicate isSink(DataFlow::Node sink) {
|
||||
this.isKnownSink(sink) or this.isEffectiveSink(sink)
|
||||
}
|
||||
|
||||
/**
|
||||
* EXPERIMENTAL. This API may change in the future.
|
||||
*
|
||||
* Holds if `source` is a known source of flow.
|
||||
*/
|
||||
abstract predicate isKnownSource(DataFlow::Node source);
|
||||
|
||||
/**
|
||||
* EXPERIMENTAL. This API may change in the future.
|
||||
*
|
||||
* Holds if `sink` is a known sink of for this query
|
||||
*/
|
||||
final predicate isKnownSink(DataFlow::Node sink) {
|
||||
// If the list of characteristics includes positive indicators with maximal confidence for this class, then it's a
|
||||
// known sink for the class.
|
||||
isKnownSink(sink, this.getASinkEndpointType())
|
||||
}
|
||||
|
||||
/**
|
||||
* Holds if `sink` is a known sink for this query of type `sinkType`.
|
||||
*/
|
||||
final predicate isKnownSink(DataFlow::Node sink, EndpointType sinkType) {
|
||||
sinkType = this.getASinkEndpointType() and
|
||||
// If the list of characteristics includes positive indicators with maximal confidence for this class, then it's a
|
||||
// known sink for the class.
|
||||
exists(EndpointCharacteristics::EndpointCharacteristic characteristic |
|
||||
characteristic.appliesToEndpoint(sink) and
|
||||
characteristic.hasImplications(sinkType, true, characteristic.maximalConfidence())
|
||||
)
|
||||
}
|
||||
|
||||
/**
|
||||
* EXPERIMENTAL. This API may change in the future.
|
||||
*
|
||||
* Holds if the candidate source `candidateSource` predicted by the machine learning model should be
|
||||
* an effective source, i.e. one considered as a possible source of flow in the boosted query.
|
||||
*/
|
||||
predicate isEffectiveSource(DataFlow::Node candidateSource) { none() }
|
||||
|
||||
/**
|
||||
* EXPERIMENTAL. This API may change in the future.
|
||||
*
|
||||
* Holds if the candidate sink `candidateSink` predicted by the machine learning model should be
|
||||
* an effective sink, i.e. one considered as a possible sink of flow in the boosted query.
|
||||
*/
|
||||
predicate isEffectiveSink(DataFlow::Node candidateSink) {
|
||||
not exists(this.getAReasonSinkExcluded(candidateSink))
|
||||
}
|
||||
|
||||
/**
|
||||
* Gets the list of characteristics that cause `candidateSink` to be excluded as an effective sink.
|
||||
*/
|
||||
final EndpointCharacteristics::EndpointCharacteristic getAReasonSinkExcluded(
|
||||
DataFlow::Node candidateSink
|
||||
) {
|
||||
// An endpoint is an effective sink (sink candidate) if none of its characteristics give much indication whether or
|
||||
// not it is a sink. Historically, we used endpoint filters, and scored endpoints that are filtered out neither by
|
||||
// a standard endpoint filter nor by an endpoint filter specific to this sink type.
|
||||
result.appliesToEndpoint(candidateSink) and
|
||||
// Exclude endpoints that have a characteristic that implies they're not sinks for _any_ sink type.
|
||||
exists(float confidence |
|
||||
confidence >= result.mediumConfidence() and
|
||||
result.hasImplications(any(NegativeSinkType negative), true, confidence)
|
||||
)
|
||||
sink.asExpr() = any(PathCreation p).getAnInput()
|
||||
or
|
||||
// Exclude endpoints that have a characteristic that implies they're not sinks for _this particular_ sink type,
|
||||
// for every sink type relevant to this query.
|
||||
not exists(EndpointType sinkType |
|
||||
sinkType = this.getASinkEndpointType() and
|
||||
not exists(float confidence |
|
||||
confidence >= result.mediumConfidence() and
|
||||
result.hasImplications(sinkType, false, confidence)
|
||||
)
|
||||
)
|
||||
sinkNode(sink, "create-file")
|
||||
}
|
||||
|
||||
/**
|
||||
* EXPERIMENTAL. This API may change in the future.
|
||||
*
|
||||
* Get an endpoint type for the sources of this query. A query may have multiple applicable
|
||||
* endpoint types for its sources.
|
||||
*/
|
||||
EndpointType getASourceEndpointType() { none() }
|
||||
|
||||
/**
|
||||
* EXPERIMENTAL. This API may change in the future.
|
||||
*
|
||||
* Get all sink types that can be sinks for this query. A query may have multiple applicable
|
||||
* endpoint types for its sinks.
|
||||
*/
|
||||
abstract EndpointType getASinkEndpointType();
|
||||
|
||||
pragma[inline]
|
||||
predicate isFlowLikelyInBaseQuery(DataFlow::Node source, DataFlow::Node sink) {
|
||||
this.isKnownSource(source) and this.isKnownSink(sink)
|
||||
override predicate isSanitizer(DataFlow::Node sanitizer) {
|
||||
sanitizer.getType() instanceof BoxedType or
|
||||
sanitizer.getType() instanceof PrimitiveType or
|
||||
sanitizer.getType() instanceof NumberType or
|
||||
sanitizer instanceof PathInjectionSanitizer
|
||||
}
|
||||
|
||||
/**
|
||||
* Holds if if `sink` is an effective sink with flow from `source` which gets used as a sink candidate for scoring
|
||||
* with the ML model.
|
||||
*/
|
||||
predicate isSinkCandidateWithFlow(DataFlow::PathNode sink) {
|
||||
exists(DataFlow::PathNode source |
|
||||
// Note: In JavaScript there's no need to check `isEffectiveSink` here explicitly, because `hasFlowPath` calls `isSink` which
|
||||
// requires an endpoint to be either a known sink or an effective sink. Known sinks are later filtered out by
|
||||
// `isFlowLikelyInBaseQuery`, leaving only effective sinks.
|
||||
this.hasFlowPath(source, sink) and
|
||||
not this.isFlowLikelyInBaseQuery(source.getNode(), sink.getNode()) and
|
||||
isEffectiveSink(sink.getNode()) and
|
||||
not isKnownSink(sink.getNode()) // As long as we're not boosting sources this is already implicitly checked by `isFlowLikelyInBaseQuery`
|
||||
override predicate isAdditionalTaintStep(DataFlow::Node n1, DataFlow::Node n2) {
|
||||
any(TaintedPathAdditionalTaintStep s).step(n1, n2)
|
||||
}
|
||||
}
|
||||
|
||||
/* TaintedPathCommon cannot be imported directly due to the hyphen in `CWE-022`. It is therefore copied here. */
|
||||
/* Copied from java/ql/src/Security/CWE/CWE-022/TaintedPathCommon.qll */
|
||||
/**
|
||||
* A unit class for adding additional taint steps.
|
||||
*
|
||||
* Extend this class to add additional taint steps that should apply to tainted path flow configurations.
|
||||
*/
|
||||
class TaintedPathAdditionalTaintStep extends Unit {
|
||||
abstract predicate step(DataFlow::Node n1, DataFlow::Node n2);
|
||||
}
|
||||
|
||||
private class DefaultTaintedPathAdditionalTaintStep extends TaintedPathAdditionalTaintStep {
|
||||
override predicate step(DataFlow::Node n1, DataFlow::Node n2) {
|
||||
exists(Argument a |
|
||||
a = n1.asExpr() and
|
||||
a.getCall() = n2.asExpr() and
|
||||
a = any(TaintPreservingUriCtorParam tpp).getAnArgument()
|
||||
)
|
||||
}
|
||||
}
|
||||
|
||||
private class TaintPreservingUriCtorParam extends Parameter {
|
||||
TaintPreservingUriCtorParam() {
|
||||
exists(Constructor ctor, int idx, int nParams |
|
||||
ctor.getDeclaringType() instanceof TypeUri and
|
||||
this = ctor.getParameter(idx) and
|
||||
nParams = ctor.getNumberOfParameters()
|
||||
|
|
||||
// URI(String scheme, String ssp, String fragment)
|
||||
idx = 1 and nParams = 3
|
||||
or
|
||||
// URI(String scheme, String host, String path, String fragment)
|
||||
idx = [1, 2] and nParams = 4
|
||||
or
|
||||
// URI(String scheme, String authority, String path, String query, String fragment)
|
||||
idx = 2 and nParams = 5
|
||||
or
|
||||
// URI(String scheme, String userInfo, String host, int port, String path, String query, String fragment)
|
||||
idx = 4 and nParams = 7
|
||||
)
|
||||
}
|
||||
}
|
||||
|
||||
@@ -11,12 +11,26 @@ private import semmle.code.java.dataflow.ExternalFlow
|
||||
private import semmle.code.java.dataflow.internal.FlowSummaryImpl as FlowSummaryImpl
|
||||
import experimental.adaptivethreatmodeling.EndpointTypes
|
||||
private import experimental.adaptivethreatmodeling.ATMConfig
|
||||
private import experimental.adaptivethreatmodeling.SqlInjectionATM
|
||||
private import experimental.adaptivethreatmodeling.TaintedPathATM
|
||||
private import experimental.adaptivethreatmodeling.RequestForgeryATM
|
||||
private import semmle.code.java.security.ExternalAPIs as ExternalAPIs
|
||||
private import semmle.code.java.Expr as Expr
|
||||
|
||||
/*
|
||||
* Predicates that are used to surface prompt examples and candidates for classification with an ML model.
|
||||
*/
|
||||
|
||||
/**
|
||||
* Holds if `sink` is a known sink of type `sinkType`.
|
||||
*/
|
||||
predicate isKnownSink(DataFlow::Node sink, SinkType sinkType) {
|
||||
// If the list of characteristics includes positive indicators with maximal confidence for this class, then it's a
|
||||
// known sink for the class.
|
||||
sinkType != any(NegativeSinkType negative) and
|
||||
exists(EndpointCharacteristics::EndpointCharacteristic characteristic |
|
||||
characteristic.appliesToEndpoint(sink) and
|
||||
characteristic.hasImplications(sinkType, true, characteristic.maximalConfidence())
|
||||
)
|
||||
}
|
||||
|
||||
/**
|
||||
* Holds if the given endpoint has a self-contradictory combination of characteristics. Detects errors in our endpoint
|
||||
* characteristics. Lists the problematic characterisitics and their implications for all such endpoints, together with
|
||||
@@ -127,6 +141,10 @@ predicate hasMetadata(DataFlow::Node n, string metadata) {
|
||||
)
|
||||
}
|
||||
|
||||
/*
|
||||
* EndpointCharacteristic classes.
|
||||
*/
|
||||
|
||||
/**
|
||||
* A set of characteristics that a particular endpoint might have. This set of characteristics is used to make decisions
|
||||
* about whether to include the endpoint in the training set and with what label, as well as whether to score the
|
||||
@@ -379,7 +397,7 @@ private class IsSanitizerCharacteristic extends NotASinkCharacteristic {
|
||||
IsSanitizerCharacteristic() { this = "sanitizer" }
|
||||
|
||||
override predicate appliesToEndpoint(DataFlow::Node n) {
|
||||
exists(AtmConfig config | config.isSanitizer(n))
|
||||
exists(TaintTracking::Configuration config | config.isSanitizer(n))
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -53,8 +53,3 @@ class TaintedPathSinkType extends SinkType {
|
||||
class RequestForgerySinkType extends SinkType {
|
||||
RequestForgerySinkType() { this = "ssrf" }
|
||||
}
|
||||
|
||||
/** Other sinks modeled by a MaD `kind` but not belonging to any of the existing sink types. */
|
||||
class OtherMaDSinkType extends SinkType {
|
||||
OtherMaDSinkType() { this = "other-sink" }
|
||||
}
|
||||
|
||||
@@ -1,38 +0,0 @@
|
||||
/**
|
||||
* For internal use only.
|
||||
*
|
||||
* A taint-tracking configuration for reasoning about SSRF (server side request forgery) vulnerabilities.
|
||||
* Largely copied from java/ql/lib/semmle/code/java/security/RequestForgeryConfig.qll.
|
||||
*
|
||||
* Only import this directly from .ql files, to avoid the possibility of polluting the Configuration hierarchy
|
||||
* accidentally.
|
||||
*/
|
||||
|
||||
import ATMConfig
|
||||
import semmle.code.java.dataflow.FlowSources
|
||||
import semmle.code.java.security.RequestForgery
|
||||
|
||||
class RequestForgeryAtmConfig extends AtmConfig {
|
||||
RequestForgeryAtmConfig() { this = "RequestForgeryAtmConfig" }
|
||||
|
||||
override predicate isKnownSource(DataFlow::Node source) {
|
||||
source instanceof RemoteFlowSource and
|
||||
// Exclude results of remote HTTP requests: fetching something else based on that result
|
||||
// is no worse than following a redirect returned by the remote server, and typically
|
||||
// we're requesting a resource via https which we trust to only send us to safe URLs.
|
||||
not source.asExpr().(MethodAccess).getCallee() instanceof UrlConnectionGetInputStreamMethod
|
||||
}
|
||||
|
||||
override EndpointType getASinkEndpointType() { result instanceof RequestForgerySinkType }
|
||||
|
||||
/*
|
||||
* This is largely a copy of the taint tracking configuration for the standard SSRF
|
||||
* query, except additional sinks have been added using the sink endpoint filter.
|
||||
*/
|
||||
|
||||
override predicate isAdditionalTaintStep(DataFlow::Node pred, DataFlow::Node succ) {
|
||||
any(RequestForgeryAdditionalTaintStep r).propagatesTaint(pred, succ)
|
||||
}
|
||||
|
||||
override predicate isSanitizer(DataFlow::Node node) { node instanceof RequestForgerySanitizer }
|
||||
}
|
||||
@@ -1,36 +0,0 @@
|
||||
/**
|
||||
* For internal use only.
|
||||
*
|
||||
* A taint-tracking configuration for reasoning about SQL injection vulnerabilities.
|
||||
* Defines shared code used by the SQL injection boosted query.
|
||||
* Largely copied from semmle.code.java.security.SqlInjectionQuery.
|
||||
*/
|
||||
|
||||
import ATMConfig
|
||||
import semmle.code.java.dataflow.FlowSources
|
||||
import semmle.code.java.security.QueryInjection
|
||||
|
||||
class SqlInjectionAtmConfig extends AtmConfig {
|
||||
SqlInjectionAtmConfig() { this = "SqlInjectionAtmConfig" }
|
||||
|
||||
override predicate isKnownSource(DataFlow::Node source) { source instanceof RemoteFlowSource }
|
||||
|
||||
override EndpointType getASinkEndpointType() {
|
||||
result instanceof SqlSinkType or result instanceof SqlSinkType
|
||||
}
|
||||
|
||||
/*
|
||||
* This is largely a copy of the taint tracking configuration for the standard SQL injection
|
||||
* query, except additional sinks have been added using the sink endpoint filter.
|
||||
*/
|
||||
|
||||
override predicate isSanitizer(DataFlow::Node node) {
|
||||
node.getType() instanceof PrimitiveType or
|
||||
node.getType() instanceof BoxedType or
|
||||
node.getType() instanceof NumberType
|
||||
}
|
||||
|
||||
override predicate isAdditionalTaintStep(DataFlow::Node node1, DataFlow::Node node2) {
|
||||
any(AdditionalQueryInjectionTaintStep s).step(node1, node2)
|
||||
}
|
||||
}
|
||||
@@ -1,82 +0,0 @@
|
||||
/**
|
||||
* For internal use only.
|
||||
*
|
||||
* A taint-tracking configuration for reasoning about path injection vulnerabilities.
|
||||
* Defines shared code used by the path injection boosted query.
|
||||
* Largely copied from java/ql/src/Security/CWE/CWE-022/TaintedPath.ql.
|
||||
*/
|
||||
|
||||
import java
|
||||
import semmle.code.java.security.PathSanitizer
|
||||
import ATMConfig
|
||||
import semmle.code.java.dataflow.FlowSources
|
||||
|
||||
class TaintedPathAtmConfig extends AtmConfig {
|
||||
TaintedPathAtmConfig() { this = "TaintedPathAtmConfig" }
|
||||
|
||||
override predicate isKnownSource(DataFlow::Node source) { source instanceof RemoteFlowSource }
|
||||
|
||||
override EndpointType getASinkEndpointType() { result instanceof TaintedPathSinkType }
|
||||
|
||||
/*
|
||||
* This is largely a copy of the taint tracking configuration for the standard path injection
|
||||
* query, except additional ATM sinks have been added to the `isSink` predicate.
|
||||
*/
|
||||
|
||||
override predicate isSanitizer(DataFlow::Node sanitizer) {
|
||||
sanitizer.getType() instanceof BoxedType or
|
||||
sanitizer.getType() instanceof PrimitiveType or
|
||||
sanitizer.getType() instanceof NumberType or
|
||||
sanitizer instanceof PathInjectionSanitizer
|
||||
}
|
||||
|
||||
override predicate isAdditionalTaintStep(DataFlow::Node n1, DataFlow::Node n2) {
|
||||
any(TaintedPathAdditionalTaintStep s).step(n1, n2)
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* Models a very basic guard for the tainted path queries.
|
||||
* TODO: Copied from java/ql/src/Security/CWE/CWE-022/TaintedPathCommon.qll because I couldn't figure out how to import it.
|
||||
*/
|
||||
|
||||
/**
|
||||
* A unit class for adding additional taint steps.
|
||||
*
|
||||
* Extend this class to add additional taint steps that should apply to tainted path flow configurations.
|
||||
*/
|
||||
class TaintedPathAdditionalTaintStep extends Unit {
|
||||
abstract predicate step(DataFlow::Node n1, DataFlow::Node n2);
|
||||
}
|
||||
|
||||
private class DefaultTaintedPathAdditionalTaintStep extends TaintedPathAdditionalTaintStep {
|
||||
override predicate step(DataFlow::Node n1, DataFlow::Node n2) {
|
||||
exists(Argument a |
|
||||
a = n1.asExpr() and
|
||||
a.getCall() = n2.asExpr() and
|
||||
a = any(TaintPreservingUriCtorParam tpp).getAnArgument()
|
||||
)
|
||||
}
|
||||
}
|
||||
|
||||
private class TaintPreservingUriCtorParam extends Parameter {
|
||||
TaintPreservingUriCtorParam() {
|
||||
exists(Constructor ctor, int idx, int nParams |
|
||||
ctor.getDeclaringType() instanceof TypeUri and
|
||||
this = ctor.getParameter(idx) and
|
||||
nParams = ctor.getNumberOfParameters()
|
||||
|
|
||||
// URI(String scheme, String ssp, String fragment)
|
||||
idx = 1 and nParams = 3
|
||||
or
|
||||
// URI(String scheme, String host, String path, String fragment)
|
||||
idx = [1, 2] and nParams = 4
|
||||
or
|
||||
// URI(String scheme, String authority, String path, String query, String fragment)
|
||||
idx = 2 and nParams = 5
|
||||
or
|
||||
// URI(String scheme, String userInfo, String host, int port, String path, String query, String fragment)
|
||||
idx = 4 and nParams = 7
|
||||
)
|
||||
}
|
||||
}
|
||||
@@ -13,9 +13,6 @@ private import semmle.code.java.security.ExternalAPIs as ExternalAPIs
|
||||
private import experimental.adaptivethreatmodeling.EndpointCharacteristics as EndpointCharacteristics
|
||||
private import experimental.adaptivethreatmodeling.EndpointTypes
|
||||
private import experimental.adaptivethreatmodeling.ATMConfig as AtmConfig
|
||||
private import experimental.adaptivethreatmodeling.SqlInjectionATM as SqlInjectionAtm
|
||||
private import experimental.adaptivethreatmodeling.TaintedPathATM as TaintedPathAtm
|
||||
private import experimental.adaptivethreatmodeling.RequestForgeryATM as RequestForgeryAtm
|
||||
|
||||
/*
|
||||
* ****** WARNING: ******
|
||||
@@ -23,18 +20,18 @@ private import experimental.adaptivethreatmodeling.RequestForgeryATM as RequestF
|
||||
* the ML-gnerarated, noisy sinks will end up poluting the positive examples used in the prompt!
|
||||
*/
|
||||
|
||||
from DataFlow::Node sink, AtmConfig::AtmConfig config, EndpointType sinkType, string message
|
||||
from DataFlow::Node sink, EndpointType sinkType, string message
|
||||
where
|
||||
// If there are _any_ erroneous endpoints, return nothing. This will prevent us from accidentally running this query
|
||||
// when there's a codex-generated data extension file in `java/ql/lib/ext`.
|
||||
not EndpointCharacteristics::erroneousEndpoints(_, _, _, _, _) and
|
||||
// Extract positive examples of sinks belonging to the existing ATM query configurations.
|
||||
(
|
||||
config.isKnownSink(sink, sinkType) and
|
||||
EndpointCharacteristics::isKnownSink(sink, sinkType) and
|
||||
// It's valid for a node to satisfy the logic for both `isSink` and `isSanitizer`, but in that case it will be
|
||||
// treated by the actual query as a sanitizer, since the final logic is something like
|
||||
// `isSink(n) and not isSanitizer(n)`. We don't want to include such nodes as positive examples in the prompt.
|
||||
not config.isSanitizer(sink) and
|
||||
not exists(TaintTracking::Configuration config | config.isSanitizer(sink)) and
|
||||
// Include only sinks that are arguments to an external API call, because these are the sinks we are most interested
|
||||
// in.
|
||||
sink instanceof ExternalAPIs::ExternalApiDataNode and
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
/**
|
||||
* Surfaces the endpoints that pass the endpoint filters and have flow from a source for each query config, and are
|
||||
* therefore used as candidates for classification with an ML model.
|
||||
* Surfaces the endpoints that pass the endpoint filters and are not already known to be sinks, and are therefore used
|
||||
* as candidates for classification with an ML model.
|
||||
*
|
||||
* Note: This query does not actually classify the endpoints using the model.
|
||||
*
|
||||
@@ -17,30 +17,62 @@ private import semmle.code.java.dataflow.ExternalFlow
|
||||
private import experimental.adaptivethreatmodeling.EndpointCharacteristics as EndpointCharacteristics
|
||||
private import experimental.adaptivethreatmodeling.EndpointTypes
|
||||
private import experimental.adaptivethreatmodeling.ATMConfig as AtmConfig
|
||||
private import experimental.adaptivethreatmodeling.SqlInjectionATM as SqlInjectionAtm
|
||||
private import experimental.adaptivethreatmodeling.TaintedPathATM as TaintedPathAtm
|
||||
private import experimental.adaptivethreatmodeling.RequestForgeryATM as RequestForgeryAtm
|
||||
|
||||
from DataFlow::Node sink, string message
|
||||
/**
|
||||
* Holds if the candidate sink `candidateSink` should be considered as a possible sink of type `sinkType`, and
|
||||
* classified by the ML model. A candidate sink is a node that cannot be excluded form `sinkType` based on its
|
||||
* characteristics.
|
||||
*/
|
||||
predicate isEffectiveSink(DataFlow::Node candidateSink, SinkType sinkType) {
|
||||
sinkType != any(NegativeSinkType negative) and
|
||||
not exists(EndpointCharacteristics::EndpointCharacteristic characteristic |
|
||||
characteristic = getAReasonSinkExcluded(candidateSink, sinkType)
|
||||
)
|
||||
}
|
||||
|
||||
/**
|
||||
* Gets the list of characteristics that cause `candidateSink` to be excluded as an effective sink for a given sink
|
||||
* type.
|
||||
*/
|
||||
EndpointCharacteristics::EndpointCharacteristic getAReasonSinkExcluded(
|
||||
DataFlow::Node candidateSink, SinkType sinkType
|
||||
) {
|
||||
// An endpoint is a sink candidate if none of its characteristics give much indication whether or not it is a sink.
|
||||
sinkType != any(NegativeSinkType negative) and
|
||||
result.appliesToEndpoint(candidateSink) and
|
||||
// Exclude endpoints that have a characteristic that implies they're not sinks for _any_ sink type.
|
||||
exists(float confidence |
|
||||
confidence >= result.mediumConfidence() and
|
||||
result.hasImplications(any(NegativeSinkType negative), true, confidence)
|
||||
)
|
||||
or
|
||||
// Exclude endpoints that have a characteristic that implies they're not sinks for _this particular_ sink type.
|
||||
exists(float confidence |
|
||||
confidence >= result.mediumConfidence() and
|
||||
result.hasImplications(sinkType, false, confidence)
|
||||
)
|
||||
}
|
||||
|
||||
from DataFlow::Node sinkCandidate, string message
|
||||
where
|
||||
// If a node is already a known sink for any of our existing ATM queries and is already modeled as a MaD sink, we
|
||||
// don't include it as a candidate. Otherwise, we might include it as a candidate for query A, but the model will
|
||||
// label it as a sink for one of the sink types of query B, for which it's already a known sink. This would result in
|
||||
// overlap between our detected sinks and the pre-existing modeling. We assume that, if a sink has already been
|
||||
// modeled in a MaD model, then it doesn't belong to any additional sink types, and we don't need to reexamine it.
|
||||
not exists(AtmConfig::AtmConfig config, string kind |
|
||||
config.isKnownSink(sink) and
|
||||
sinkNode(sink, kind)
|
||||
not exists(string kind |
|
||||
sinkNode(sinkCandidate, kind)
|
||||
// and EndpointCharacteristics::isKnownSink(sinkCandidate, sinkType) and kind = sinkType.getKind() // TODO: Uncomment this line once our sink types indeed correspond to MaD `kind`s.
|
||||
) and
|
||||
// The message is the concatenation of all relevant configs, and we surface only sinks that have at least one relevant
|
||||
// config.
|
||||
// The message is the concatenation of all sink types for which this endpoint is known neither to be a sink nor to be
|
||||
// a non-sink, and we surface only endpoints that have at least one such sink type.
|
||||
message =
|
||||
strictconcat(AtmConfig::AtmConfig config, DataFlow::PathNode sinkPathNode |
|
||||
config.isSinkCandidateWithFlow(sinkPathNode) and
|
||||
sinkPathNode.getNode() = sink
|
||||
strictconcat(SinkType sinkType |
|
||||
not EndpointCharacteristics::isKnownSink(sinkCandidate, sinkType) and
|
||||
isEffectiveSink(sinkCandidate, sinkType)
|
||||
|
|
||||
config, ", "
|
||||
sinkType + ", "
|
||||
) + "\n" +
|
||||
// Extract the needed metadata for this endpoint.
|
||||
any(string metadata | EndpointCharacteristics::hasMetadata(sink, metadata))
|
||||
select sink, message
|
||||
any(string metadata | EndpointCharacteristics::hasMetadata(sinkCandidate, metadata))
|
||||
select sinkCandidate, message
|
||||
|
||||
Reference in New Issue
Block a user