diff --git a/java/ql/experimental/adaptivethreatmodeling/lib/experimental/adaptivethreatmodeling/ATMConfig.qll b/java/ql/experimental/adaptivethreatmodeling/lib/experimental/adaptivethreatmodeling/ATMConfig.qll index 5d8da8459b4..74f7590e29f 100644 --- a/java/ql/experimental/adaptivethreatmodeling/lib/experimental/adaptivethreatmodeling/ATMConfig.qll +++ b/java/ql/experimental/adaptivethreatmodeling/lib/experimental/adaptivethreatmodeling/ATMConfig.qll @@ -1,162 +1,89 @@ /** * For internal use only. * - * Configures boosting for adaptive threat modeling (ATM). + * Collects the query configurations to boost with ATM. Imports the configurations of supported Java queries where + * possible. Java queries that are defined in a `.ql` file get copied into this file. */ private import java as java private import semmle.code.java.dataflow.TaintTracking +import semmle.code.java.security.RequestForgeryConfig +import semmle.code.java.security.SqlInjectionQuery import EndpointTypes import EndpointCharacteristics as EndpointCharacteristics +/* Copied from java/ql/src/Security/CWE/CWE-022/TaintedPath.ql */ +private import semmle.code.java.dataflow.ExternalFlow +private import semmle.code.java.security.PathCreation +private import semmle.code.java.security.PathSanitizer -/** - * EXPERIMENTAL. This API may change in the future. - * - * A configuration class for defining known endpoints and endpoint filters for adaptive threat - * modeling (ATM). Each boosted query must define its own extension of this abstract class. - * - * A configuration defines a set of known sources (`isKnownSource`) and sinks (`isKnownSink`). - * It must also define a sink endpoint filter (`isEffectiveSink`) that filters candidate sinks - * predicted by the machine learning model to a set of effective sinks. - * - * To get started with ATM, you can copy-paste an implementation of the relevant predicates from a - * `DataFlow::Configuration` or `TaintTracking::Configuration` class for a standard security query. - * For example, for SQL injection you can start by defining the `isKnownSource` and `isKnownSink` - * predicates in the ATM configuration by copying and pasting the implementations of `isSource` and - * `isSink` from `SqlInjection::Configuration`. - * - * Note that if the security query configuration defines additional edges beyond the standard data - * flow edges, such as `NosqlInjection::Configuration`, you may need to replace the definition of - * `isAdditionalFlowStep` with a more generalised definition of additional edges. See - * `NosqlInjectionATM.qll` for an example of doing this. +/* + * Configurations that are copied from Java queries because they can't be directly imported. */ -abstract class AtmConfig extends TaintTracking::Configuration { - bindingset[this] - AtmConfig() { any() } - /** - * Holds if `source` is a relevant taint source. When sources are not boosted, `isSource` is equivalent to - * `isKnownSource` (i.e there are no "effective" sources to be classified by an ML model). - */ - override predicate isSource(DataFlow::Node source) { this.isKnownSource(source) } +/* TaintedPathConfig cannot be imported directly since it is defined in a .ql file. It is therefore copied here. */ +/* Copied from java/ql/src/Security/CWE/CWE-022/TaintedPath.ql */ +class TaintedPathConfig extends TaintTracking::Configuration { + TaintedPathConfig() { this = "TaintedPathConfig" } + + override predicate isSource(DataFlow::Node source) { source instanceof RemoteFlowSource } - /** - * Holds if `sink` is a known taint sink or an "effective" sink (a candidate to be classified by an ML model). - */ override predicate isSink(DataFlow::Node sink) { - this.isKnownSink(sink) or this.isEffectiveSink(sink) - } - - /** - * EXPERIMENTAL. This API may change in the future. - * - * Holds if `source` is a known source of flow. - */ - abstract predicate isKnownSource(DataFlow::Node source); - - /** - * EXPERIMENTAL. This API may change in the future. - * - * Holds if `sink` is a known sink of for this query - */ - final predicate isKnownSink(DataFlow::Node sink) { - // If the list of characteristics includes positive indicators with maximal confidence for this class, then it's a - // known sink for the class. - isKnownSink(sink, this.getASinkEndpointType()) - } - - /** - * Holds if `sink` is a known sink for this query of type `sinkType`. - */ - final predicate isKnownSink(DataFlow::Node sink, EndpointType sinkType) { - sinkType = this.getASinkEndpointType() and - // If the list of characteristics includes positive indicators with maximal confidence for this class, then it's a - // known sink for the class. - exists(EndpointCharacteristics::EndpointCharacteristic characteristic | - characteristic.appliesToEndpoint(sink) and - characteristic.hasImplications(sinkType, true, characteristic.maximalConfidence()) - ) - } - - /** - * EXPERIMENTAL. This API may change in the future. - * - * Holds if the candidate source `candidateSource` predicted by the machine learning model should be - * an effective source, i.e. one considered as a possible source of flow in the boosted query. - */ - predicate isEffectiveSource(DataFlow::Node candidateSource) { none() } - - /** - * EXPERIMENTAL. This API may change in the future. - * - * Holds if the candidate sink `candidateSink` predicted by the machine learning model should be - * an effective sink, i.e. one considered as a possible sink of flow in the boosted query. - */ - predicate isEffectiveSink(DataFlow::Node candidateSink) { - not exists(this.getAReasonSinkExcluded(candidateSink)) - } - - /** - * Gets the list of characteristics that cause `candidateSink` to be excluded as an effective sink. - */ - final EndpointCharacteristics::EndpointCharacteristic getAReasonSinkExcluded( - DataFlow::Node candidateSink - ) { - // An endpoint is an effective sink (sink candidate) if none of its characteristics give much indication whether or - // not it is a sink. Historically, we used endpoint filters, and scored endpoints that are filtered out neither by - // a standard endpoint filter nor by an endpoint filter specific to this sink type. - result.appliesToEndpoint(candidateSink) and - // Exclude endpoints that have a characteristic that implies they're not sinks for _any_ sink type. - exists(float confidence | - confidence >= result.mediumConfidence() and - result.hasImplications(any(NegativeSinkType negative), true, confidence) - ) + sink.asExpr() = any(PathCreation p).getAnInput() or - // Exclude endpoints that have a characteristic that implies they're not sinks for _this particular_ sink type, - // for every sink type relevant to this query. - not exists(EndpointType sinkType | - sinkType = this.getASinkEndpointType() and - not exists(float confidence | - confidence >= result.mediumConfidence() and - result.hasImplications(sinkType, false, confidence) - ) - ) + sinkNode(sink, "create-file") } - /** - * EXPERIMENTAL. This API may change in the future. - * - * Get an endpoint type for the sources of this query. A query may have multiple applicable - * endpoint types for its sources. - */ - EndpointType getASourceEndpointType() { none() } - - /** - * EXPERIMENTAL. This API may change in the future. - * - * Get all sink types that can be sinks for this query. A query may have multiple applicable - * endpoint types for its sinks. - */ - abstract EndpointType getASinkEndpointType(); - - pragma[inline] - predicate isFlowLikelyInBaseQuery(DataFlow::Node source, DataFlow::Node sink) { - this.isKnownSource(source) and this.isKnownSink(sink) + override predicate isSanitizer(DataFlow::Node sanitizer) { + sanitizer.getType() instanceof BoxedType or + sanitizer.getType() instanceof PrimitiveType or + sanitizer.getType() instanceof NumberType or + sanitizer instanceof PathInjectionSanitizer } - /** - * Holds if if `sink` is an effective sink with flow from `source` which gets used as a sink candidate for scoring - * with the ML model. - */ - predicate isSinkCandidateWithFlow(DataFlow::PathNode sink) { - exists(DataFlow::PathNode source | - // Note: In JavaScript there's no need to check `isEffectiveSink` here explicitly, because `hasFlowPath` calls `isSink` which - // requires an endpoint to be either a known sink or an effective sink. Known sinks are later filtered out by - // `isFlowLikelyInBaseQuery`, leaving only effective sinks. - this.hasFlowPath(source, sink) and - not this.isFlowLikelyInBaseQuery(source.getNode(), sink.getNode()) and - isEffectiveSink(sink.getNode()) and - not isKnownSink(sink.getNode()) // As long as we're not boosting sources this is already implicitly checked by `isFlowLikelyInBaseQuery` + override predicate isAdditionalTaintStep(DataFlow::Node n1, DataFlow::Node n2) { + any(TaintedPathAdditionalTaintStep s).step(n1, n2) + } +} + +/* TaintedPathCommon cannot be imported directly due to the hyphen in `CWE-022`. It is therefore copied here. */ +/* Copied from java/ql/src/Security/CWE/CWE-022/TaintedPathCommon.qll */ +/** + * A unit class for adding additional taint steps. + * + * Extend this class to add additional taint steps that should apply to tainted path flow configurations. + */ +class TaintedPathAdditionalTaintStep extends Unit { + abstract predicate step(DataFlow::Node n1, DataFlow::Node n2); +} + +private class DefaultTaintedPathAdditionalTaintStep extends TaintedPathAdditionalTaintStep { + override predicate step(DataFlow::Node n1, DataFlow::Node n2) { + exists(Argument a | + a = n1.asExpr() and + a.getCall() = n2.asExpr() and + a = any(TaintPreservingUriCtorParam tpp).getAnArgument() + ) + } +} + +private class TaintPreservingUriCtorParam extends Parameter { + TaintPreservingUriCtorParam() { + exists(Constructor ctor, int idx, int nParams | + ctor.getDeclaringType() instanceof TypeUri and + this = ctor.getParameter(idx) and + nParams = ctor.getNumberOfParameters() + | + // URI(String scheme, String ssp, String fragment) + idx = 1 and nParams = 3 + or + // URI(String scheme, String host, String path, String fragment) + idx = [1, 2] and nParams = 4 + or + // URI(String scheme, String authority, String path, String query, String fragment) + idx = 2 and nParams = 5 + or + // URI(String scheme, String userInfo, String host, int port, String path, String query, String fragment) + idx = 4 and nParams = 7 ) } } diff --git a/java/ql/experimental/adaptivethreatmodeling/lib/experimental/adaptivethreatmodeling/EndpointCharacteristics.qll b/java/ql/experimental/adaptivethreatmodeling/lib/experimental/adaptivethreatmodeling/EndpointCharacteristics.qll index fffc52ec21d..ae96c3e9714 100644 --- a/java/ql/experimental/adaptivethreatmodeling/lib/experimental/adaptivethreatmodeling/EndpointCharacteristics.qll +++ b/java/ql/experimental/adaptivethreatmodeling/lib/experimental/adaptivethreatmodeling/EndpointCharacteristics.qll @@ -11,12 +11,26 @@ private import semmle.code.java.dataflow.ExternalFlow private import semmle.code.java.dataflow.internal.FlowSummaryImpl as FlowSummaryImpl import experimental.adaptivethreatmodeling.EndpointTypes private import experimental.adaptivethreatmodeling.ATMConfig -private import experimental.adaptivethreatmodeling.SqlInjectionATM -private import experimental.adaptivethreatmodeling.TaintedPathATM -private import experimental.adaptivethreatmodeling.RequestForgeryATM private import semmle.code.java.security.ExternalAPIs as ExternalAPIs private import semmle.code.java.Expr as Expr +/* + * Predicates that are used to surface prompt examples and candidates for classification with an ML model. + */ + +/** + * Holds if `sink` is a known sink of type `sinkType`. + */ +predicate isKnownSink(DataFlow::Node sink, SinkType sinkType) { + // If the list of characteristics includes positive indicators with maximal confidence for this class, then it's a + // known sink for the class. + sinkType != any(NegativeSinkType negative) and + exists(EndpointCharacteristics::EndpointCharacteristic characteristic | + characteristic.appliesToEndpoint(sink) and + characteristic.hasImplications(sinkType, true, characteristic.maximalConfidence()) + ) +} + /** * Holds if the given endpoint has a self-contradictory combination of characteristics. Detects errors in our endpoint * characteristics. Lists the problematic characterisitics and their implications for all such endpoints, together with @@ -127,6 +141,10 @@ predicate hasMetadata(DataFlow::Node n, string metadata) { ) } +/* + * EndpointCharacteristic classes. + */ + /** * A set of characteristics that a particular endpoint might have. This set of characteristics is used to make decisions * about whether to include the endpoint in the training set and with what label, as well as whether to score the @@ -379,7 +397,7 @@ private class IsSanitizerCharacteristic extends NotASinkCharacteristic { IsSanitizerCharacteristic() { this = "sanitizer" } override predicate appliesToEndpoint(DataFlow::Node n) { - exists(AtmConfig config | config.isSanitizer(n)) + exists(TaintTracking::Configuration config | config.isSanitizer(n)) } } diff --git a/java/ql/experimental/adaptivethreatmodeling/lib/experimental/adaptivethreatmodeling/EndpointTypes.qll b/java/ql/experimental/adaptivethreatmodeling/lib/experimental/adaptivethreatmodeling/EndpointTypes.qll index e3ad1a623bf..595076d9780 100644 --- a/java/ql/experimental/adaptivethreatmodeling/lib/experimental/adaptivethreatmodeling/EndpointTypes.qll +++ b/java/ql/experimental/adaptivethreatmodeling/lib/experimental/adaptivethreatmodeling/EndpointTypes.qll @@ -53,8 +53,3 @@ class TaintedPathSinkType extends SinkType { class RequestForgerySinkType extends SinkType { RequestForgerySinkType() { this = "ssrf" } } - -/** Other sinks modeled by a MaD `kind` but not belonging to any of the existing sink types. */ -class OtherMaDSinkType extends SinkType { - OtherMaDSinkType() { this = "other-sink" } -} diff --git a/java/ql/experimental/adaptivethreatmodeling/lib/experimental/adaptivethreatmodeling/RequestForgeryATM.qll b/java/ql/experimental/adaptivethreatmodeling/lib/experimental/adaptivethreatmodeling/RequestForgeryATM.qll deleted file mode 100644 index 2ce83096433..00000000000 --- a/java/ql/experimental/adaptivethreatmodeling/lib/experimental/adaptivethreatmodeling/RequestForgeryATM.qll +++ /dev/null @@ -1,38 +0,0 @@ -/** - * For internal use only. - * - * A taint-tracking configuration for reasoning about SSRF (server side request forgery) vulnerabilities. - * Largely copied from java/ql/lib/semmle/code/java/security/RequestForgeryConfig.qll. - * - * Only import this directly from .ql files, to avoid the possibility of polluting the Configuration hierarchy - * accidentally. - */ - -import ATMConfig -import semmle.code.java.dataflow.FlowSources -import semmle.code.java.security.RequestForgery - -class RequestForgeryAtmConfig extends AtmConfig { - RequestForgeryAtmConfig() { this = "RequestForgeryAtmConfig" } - - override predicate isKnownSource(DataFlow::Node source) { - source instanceof RemoteFlowSource and - // Exclude results of remote HTTP requests: fetching something else based on that result - // is no worse than following a redirect returned by the remote server, and typically - // we're requesting a resource via https which we trust to only send us to safe URLs. - not source.asExpr().(MethodAccess).getCallee() instanceof UrlConnectionGetInputStreamMethod - } - - override EndpointType getASinkEndpointType() { result instanceof RequestForgerySinkType } - - /* - * This is largely a copy of the taint tracking configuration for the standard SSRF - * query, except additional sinks have been added using the sink endpoint filter. - */ - - override predicate isAdditionalTaintStep(DataFlow::Node pred, DataFlow::Node succ) { - any(RequestForgeryAdditionalTaintStep r).propagatesTaint(pred, succ) - } - - override predicate isSanitizer(DataFlow::Node node) { node instanceof RequestForgerySanitizer } -} diff --git a/java/ql/experimental/adaptivethreatmodeling/lib/experimental/adaptivethreatmodeling/SqlInjectionATM.qll b/java/ql/experimental/adaptivethreatmodeling/lib/experimental/adaptivethreatmodeling/SqlInjectionATM.qll deleted file mode 100644 index 628cd9d0f50..00000000000 --- a/java/ql/experimental/adaptivethreatmodeling/lib/experimental/adaptivethreatmodeling/SqlInjectionATM.qll +++ /dev/null @@ -1,36 +0,0 @@ -/** - * For internal use only. - * - * A taint-tracking configuration for reasoning about SQL injection vulnerabilities. - * Defines shared code used by the SQL injection boosted query. - * Largely copied from semmle.code.java.security.SqlInjectionQuery. - */ - -import ATMConfig -import semmle.code.java.dataflow.FlowSources -import semmle.code.java.security.QueryInjection - -class SqlInjectionAtmConfig extends AtmConfig { - SqlInjectionAtmConfig() { this = "SqlInjectionAtmConfig" } - - override predicate isKnownSource(DataFlow::Node source) { source instanceof RemoteFlowSource } - - override EndpointType getASinkEndpointType() { - result instanceof SqlSinkType or result instanceof SqlSinkType - } - - /* - * This is largely a copy of the taint tracking configuration for the standard SQL injection - * query, except additional sinks have been added using the sink endpoint filter. - */ - - override predicate isSanitizer(DataFlow::Node node) { - node.getType() instanceof PrimitiveType or - node.getType() instanceof BoxedType or - node.getType() instanceof NumberType - } - - override predicate isAdditionalTaintStep(DataFlow::Node node1, DataFlow::Node node2) { - any(AdditionalQueryInjectionTaintStep s).step(node1, node2) - } -} diff --git a/java/ql/experimental/adaptivethreatmodeling/lib/experimental/adaptivethreatmodeling/TaintedPathATM.qll b/java/ql/experimental/adaptivethreatmodeling/lib/experimental/adaptivethreatmodeling/TaintedPathATM.qll deleted file mode 100644 index d216c0c9567..00000000000 --- a/java/ql/experimental/adaptivethreatmodeling/lib/experimental/adaptivethreatmodeling/TaintedPathATM.qll +++ /dev/null @@ -1,82 +0,0 @@ -/** - * For internal use only. - * - * A taint-tracking configuration for reasoning about path injection vulnerabilities. - * Defines shared code used by the path injection boosted query. - * Largely copied from java/ql/src/Security/CWE/CWE-022/TaintedPath.ql. - */ - -import java -import semmle.code.java.security.PathSanitizer -import ATMConfig -import semmle.code.java.dataflow.FlowSources - -class TaintedPathAtmConfig extends AtmConfig { - TaintedPathAtmConfig() { this = "TaintedPathAtmConfig" } - - override predicate isKnownSource(DataFlow::Node source) { source instanceof RemoteFlowSource } - - override EndpointType getASinkEndpointType() { result instanceof TaintedPathSinkType } - - /* - * This is largely a copy of the taint tracking configuration for the standard path injection - * query, except additional ATM sinks have been added to the `isSink` predicate. - */ - - override predicate isSanitizer(DataFlow::Node sanitizer) { - sanitizer.getType() instanceof BoxedType or - sanitizer.getType() instanceof PrimitiveType or - sanitizer.getType() instanceof NumberType or - sanitizer instanceof PathInjectionSanitizer - } - - override predicate isAdditionalTaintStep(DataFlow::Node n1, DataFlow::Node n2) { - any(TaintedPathAdditionalTaintStep s).step(n1, n2) - } -} - -/* - * Models a very basic guard for the tainted path queries. - * TODO: Copied from java/ql/src/Security/CWE/CWE-022/TaintedPathCommon.qll because I couldn't figure out how to import it. - */ - -/** - * A unit class for adding additional taint steps. - * - * Extend this class to add additional taint steps that should apply to tainted path flow configurations. - */ -class TaintedPathAdditionalTaintStep extends Unit { - abstract predicate step(DataFlow::Node n1, DataFlow::Node n2); -} - -private class DefaultTaintedPathAdditionalTaintStep extends TaintedPathAdditionalTaintStep { - override predicate step(DataFlow::Node n1, DataFlow::Node n2) { - exists(Argument a | - a = n1.asExpr() and - a.getCall() = n2.asExpr() and - a = any(TaintPreservingUriCtorParam tpp).getAnArgument() - ) - } -} - -private class TaintPreservingUriCtorParam extends Parameter { - TaintPreservingUriCtorParam() { - exists(Constructor ctor, int idx, int nParams | - ctor.getDeclaringType() instanceof TypeUri and - this = ctor.getParameter(idx) and - nParams = ctor.getNumberOfParameters() - | - // URI(String scheme, String ssp, String fragment) - idx = 1 and nParams = 3 - or - // URI(String scheme, String host, String path, String fragment) - idx = [1, 2] and nParams = 4 - or - // URI(String scheme, String authority, String path, String query, String fragment) - idx = 2 and nParams = 5 - or - // URI(String scheme, String userInfo, String host, int port, String path, String query, String fragment) - idx = 4 and nParams = 7 - ) - } -} diff --git a/java/ql/experimental/adaptivethreatmodeling/src/ExtractPositiveExamples.ql b/java/ql/experimental/adaptivethreatmodeling/src/ExtractPositiveExamples.ql index 2c94663e12f..f6b6dd50260 100644 --- a/java/ql/experimental/adaptivethreatmodeling/src/ExtractPositiveExamples.ql +++ b/java/ql/experimental/adaptivethreatmodeling/src/ExtractPositiveExamples.ql @@ -13,9 +13,6 @@ private import semmle.code.java.security.ExternalAPIs as ExternalAPIs private import experimental.adaptivethreatmodeling.EndpointCharacteristics as EndpointCharacteristics private import experimental.adaptivethreatmodeling.EndpointTypes private import experimental.adaptivethreatmodeling.ATMConfig as AtmConfig -private import experimental.adaptivethreatmodeling.SqlInjectionATM as SqlInjectionAtm -private import experimental.adaptivethreatmodeling.TaintedPathATM as TaintedPathAtm -private import experimental.adaptivethreatmodeling.RequestForgeryATM as RequestForgeryAtm /* * ****** WARNING: ****** @@ -23,18 +20,18 @@ private import experimental.adaptivethreatmodeling.RequestForgeryATM as RequestF * the ML-gnerarated, noisy sinks will end up poluting the positive examples used in the prompt! */ -from DataFlow::Node sink, AtmConfig::AtmConfig config, EndpointType sinkType, string message +from DataFlow::Node sink, EndpointType sinkType, string message where // If there are _any_ erroneous endpoints, return nothing. This will prevent us from accidentally running this query // when there's a codex-generated data extension file in `java/ql/lib/ext`. not EndpointCharacteristics::erroneousEndpoints(_, _, _, _, _) and // Extract positive examples of sinks belonging to the existing ATM query configurations. ( - config.isKnownSink(sink, sinkType) and + EndpointCharacteristics::isKnownSink(sink, sinkType) and // It's valid for a node to satisfy the logic for both `isSink` and `isSanitizer`, but in that case it will be // treated by the actual query as a sanitizer, since the final logic is something like // `isSink(n) and not isSanitizer(n)`. We don't want to include such nodes as positive examples in the prompt. - not config.isSanitizer(sink) and + not exists(TaintTracking::Configuration config | config.isSanitizer(sink)) and // Include only sinks that are arguments to an external API call, because these are the sinks we are most interested // in. sink instanceof ExternalAPIs::ExternalApiDataNode and diff --git a/java/ql/experimental/adaptivethreatmodeling/src/ExtractSinkCandidatesWithFlow.ql b/java/ql/experimental/adaptivethreatmodeling/src/ExtractSinkCandidatesWithFlow.ql index b9fe965623b..6d2e1336e57 100644 --- a/java/ql/experimental/adaptivethreatmodeling/src/ExtractSinkCandidatesWithFlow.ql +++ b/java/ql/experimental/adaptivethreatmodeling/src/ExtractSinkCandidatesWithFlow.ql @@ -1,6 +1,6 @@ /** - * Surfaces the endpoints that pass the endpoint filters and have flow from a source for each query config, and are - * therefore used as candidates for classification with an ML model. + * Surfaces the endpoints that pass the endpoint filters and are not already known to be sinks, and are therefore used + * as candidates for classification with an ML model. * * Note: This query does not actually classify the endpoints using the model. * @@ -17,30 +17,62 @@ private import semmle.code.java.dataflow.ExternalFlow private import experimental.adaptivethreatmodeling.EndpointCharacteristics as EndpointCharacteristics private import experimental.adaptivethreatmodeling.EndpointTypes private import experimental.adaptivethreatmodeling.ATMConfig as AtmConfig -private import experimental.adaptivethreatmodeling.SqlInjectionATM as SqlInjectionAtm -private import experimental.adaptivethreatmodeling.TaintedPathATM as TaintedPathAtm -private import experimental.adaptivethreatmodeling.RequestForgeryATM as RequestForgeryAtm -from DataFlow::Node sink, string message +/** + * Holds if the candidate sink `candidateSink` should be considered as a possible sink of type `sinkType`, and + * classified by the ML model. A candidate sink is a node that cannot be excluded form `sinkType` based on its + * characteristics. + */ +predicate isEffectiveSink(DataFlow::Node candidateSink, SinkType sinkType) { + sinkType != any(NegativeSinkType negative) and + not exists(EndpointCharacteristics::EndpointCharacteristic characteristic | + characteristic = getAReasonSinkExcluded(candidateSink, sinkType) + ) +} + +/** + * Gets the list of characteristics that cause `candidateSink` to be excluded as an effective sink for a given sink + * type. + */ +EndpointCharacteristics::EndpointCharacteristic getAReasonSinkExcluded( + DataFlow::Node candidateSink, SinkType sinkType +) { + // An endpoint is a sink candidate if none of its characteristics give much indication whether or not it is a sink. + sinkType != any(NegativeSinkType negative) and + result.appliesToEndpoint(candidateSink) and + // Exclude endpoints that have a characteristic that implies they're not sinks for _any_ sink type. + exists(float confidence | + confidence >= result.mediumConfidence() and + result.hasImplications(any(NegativeSinkType negative), true, confidence) + ) + or + // Exclude endpoints that have a characteristic that implies they're not sinks for _this particular_ sink type. + exists(float confidence | + confidence >= result.mediumConfidence() and + result.hasImplications(sinkType, false, confidence) + ) +} + +from DataFlow::Node sinkCandidate, string message where // If a node is already a known sink for any of our existing ATM queries and is already modeled as a MaD sink, we // don't include it as a candidate. Otherwise, we might include it as a candidate for query A, but the model will // label it as a sink for one of the sink types of query B, for which it's already a known sink. This would result in // overlap between our detected sinks and the pre-existing modeling. We assume that, if a sink has already been // modeled in a MaD model, then it doesn't belong to any additional sink types, and we don't need to reexamine it. - not exists(AtmConfig::AtmConfig config, string kind | - config.isKnownSink(sink) and - sinkNode(sink, kind) + not exists(string kind | + sinkNode(sinkCandidate, kind) + // and EndpointCharacteristics::isKnownSink(sinkCandidate, sinkType) and kind = sinkType.getKind() // TODO: Uncomment this line once our sink types indeed correspond to MaD `kind`s. ) and - // The message is the concatenation of all relevant configs, and we surface only sinks that have at least one relevant - // config. + // The message is the concatenation of all sink types for which this endpoint is known neither to be a sink nor to be + // a non-sink, and we surface only endpoints that have at least one such sink type. message = - strictconcat(AtmConfig::AtmConfig config, DataFlow::PathNode sinkPathNode | - config.isSinkCandidateWithFlow(sinkPathNode) and - sinkPathNode.getNode() = sink + strictconcat(SinkType sinkType | + not EndpointCharacteristics::isKnownSink(sinkCandidate, sinkType) and + isEffectiveSink(sinkCandidate, sinkType) | - config, ", " + sinkType + ", " ) + "\n" + // Extract the needed metadata for this endpoint. - any(string metadata | EndpointCharacteristics::hasMetadata(sink, metadata)) -select sink, message + any(string metadata | EndpointCharacteristics::hasMetadata(sinkCandidate, metadata)) +select sinkCandidate, message