wip

2026-05-16 04:09:27 +02:00 · 2023-03-01 13:17:01 +01:00
52 changed files with 2652 additions and 8 deletions
--- a/codeql-workspace.yml
+++ b/codeql-workspace.yml
@@ -8,16 +8,16 @@ provide:
  - "cpp/ql/test/query-tests/Security/CWE/CWE-190/semmle/tainted/qlpack.yml"
  - "go/ql/config/legacy-support/qlpack.yml"
  - "go/build/codeql-extractor-go/codeql-extractor.yml"
-  - "javascript/ql/experimental/adaptivethreatmodeling/lib/qlpack.yml"
+  - "*/ql/experimental/adaptivethreatmodeling/lib/qlpack.yml"
  # This pack is explicitly excluded from the workspace since most users
  # will want to use a version of this pack from the package cache. Internal
  # users can uncomment the following line and place a custom ML model
  # in the corresponding pack to test a custom ML model within their local
  # checkout.
-  # - "javascript/ql/experimental/adaptivethreatmodeling/model/qlpack.yml"
-  - "javascript/ql/experimental/adaptivethreatmodeling/modelbuilding/qlpack.yml"
-  - "javascript/ql/experimental/adaptivethreatmodeling/src/qlpack.yml"
-  - "javascript/ql/experimental/adaptivethreatmodeling/test/qlpack.yml"
+  - "*/ql/experimental/adaptivethreatmodeling/model/qlpack.yml"
+  - "*/ql/experimental/adaptivethreatmodeling/modelbuilding/qlpack.yml"
+  - "*/ql/experimental/adaptivethreatmodeling/src/qlpack.yml"
+  - "*/ql/experimental/adaptivethreatmodeling/test/qlpack.yml"
  - "csharp/ql/campaigns/Solorigate/lib/qlpack.yml"
  - "csharp/ql/campaigns/Solorigate/src/qlpack.yml"
  - "csharp/ql/campaigns/Solorigate/test/qlpack.yml"
--- a/java/ql/experimental/adaptivethreatmodeling/lib/experimental/adaptivethreatmodeling/ATMConfig.qll
+++ b/java/ql/experimental/adaptivethreatmodeling/lib/experimental/adaptivethreatmodeling/ATMConfig.qll
@@ -0,0 +1,161 @@
+/**
+ * For internal use only.
+ *
+ * Configures boosting for adaptive threat modeling (ATM).
+ */
+
+private import java as java
+private import semmle.code.java.dataflow.TaintTracking
+import EndpointTypes
+import EndpointCharacteristics as EndpointCharacteristics
+import AdaptiveThreatModeling::ATM::ResultsInfo as AtmResultsInfo
+
+/**
+ * EXPERIMENTAL. This API may change in the future.
+ *
+ * A configuration class for defining known endpoints and endpoint filters for adaptive threat
+ * modeling (ATM). Each boosted query must define its own extension of this abstract class.
+ *
+ * A configuration defines a set of known sources (`isKnownSource`) and sinks (`isKnownSink`).
+ * It must also define a sink endpoint filter (`isEffectiveSink`) that filters candidate sinks
+ * predicted by the machine learning model to a set of effective sinks.
+ *
+ * To get started with ATM, you can copy-paste an implementation of the relevant predicates from a
+ * `DataFlow::Configuration` or `TaintTracking::Configuration` class for a standard security query.
+ * For example, for SQL injection you can start by defining the `isKnownSource` and `isKnownSink`
+ * predicates in the ATM configuration by copying and pasting the implementations of `isSource` and
+ * `isSink` from `SqlInjection::Configuration`.
+ *
+ * Note that if the security query configuration defines additional edges beyond the standard data
+ * flow edges, such as `NosqlInjection::Configuration`, you may need to replace the definition of
+ * `isAdditionalFlowStep` with a more generalised definition of additional edges. See
+ * `NosqlInjectionATM.qll` for an example of doing this.
+ */
+abstract class AtmConfig extends TaintTracking::Configuration {
+  bindingset[this]
+  AtmConfig() { any() }
+
+  /**
+   * Holds if `source` is a relevant taint source. When sources are not boosted, `isSource` is equivalent to
+   * `isKnownSource` (i.e there are no "effective" sources to be classified by an ML model).
+   */
+  override predicate isSource(DataFlow::Node source) { this.isKnownSource(source) }
+
+  /**
+   * Holds if `sink` is a known taint sink or an "effective" sink (a candidate to be classified by an ML model).
+   */
+  override predicate isSink(DataFlow::Node sink) {
+    this.isKnownSink(sink) or this.isEffectiveSink(sink)
+  }
+
+  /**
+   * EXPERIMENTAL. This API may change in the future.
+   *
+   * Holds if `source` is a known source of flow.
+   */
+  predicate isKnownSource(DataFlow::Node source) { none() }
+
+  /**
+   * EXPERIMENTAL. This API may change in the future.
+   *
+   * Holds if `sink` is a known sink of flow.
+   */
+  final predicate isKnownSink(DataFlow::Node sink) {
+    // If the list of characteristics includes positive indicators with maximal confidence for this class, then it's a
+    // known sink for the class.
+    exists(EndpointCharacteristics::EndpointCharacteristic characteristic |
+      characteristic.appliesToEndpoint(sink) and
+      characteristic
+          .hasImplications(this.getASinkEndpointType(), true, characteristic.maximalConfidence())
+    )
+  }
+
+  /**
+   * EXPERIMENTAL. This API may change in the future.
+   *
+   * Holds if the candidate source `candidateSource` predicted by the machine learning model should be
+   * an effective source, i.e. one considered as a possible source of flow in the boosted query.
+   */
+  predicate isEffectiveSource(DataFlow::Node candidateSource) { none() }
+
+  /**
+   * EXPERIMENTAL. This API may change in the future.
+   *
+   * Holds if the candidate sink `candidateSink` predicted by the machine learning model should be
+   * an effective sink, i.e. one considered as a possible sink of flow in the boosted query.
+   */
+  predicate isEffectiveSink(DataFlow::Node candidateSink) {
+    not exists(this.getAReasonSinkExcluded(candidateSink))
+  }
+
+  /**
+   * Gets the list of characteristics that cause `candidateSink` to be excluded as an effective sink.
+   */
+  final EndpointCharacteristics::EndpointCharacteristic getAReasonSinkExcluded(
+    DataFlow::Node candidateSink
+  ) {
+    // An endpoint is an effective sink (sink candidate) if none of its characteristics give much indication whether or
+    // not it is a sink. Historically, we used endpoint filters, and scored endpoints that are filtered out neither by
+    // a standard endpoint filter nor by an endpoint filter specific to this sink type.
+    exists(EndpointCharacteristics::EndpointCharacteristic filter, float confidence |
+      filter.appliesToEndpoint(candidateSink) and
+      confidence >= filter.mediumConfidence() and
+      (
+        // Exclude endpoints that have a characteristic that implies they're not sinks for _any_ sink type.
+        filter.hasImplications(any(NegativeType negative), true, confidence)
+        or
+        // Exclude endpoints that have a characteristic that implies they're not sinks for _this particular_ sink type.
+        filter.hasImplications(this.getASinkEndpointType(), false, confidence)
+      ) and
+      result = filter
+    )
+  }
+
+  /**
+   * EXPERIMENTAL. This API may change in the future.
+   *
+   * Get an endpoint type for the sources of this query. A query may have multiple applicable
+   * endpoint types for its sources.
+   */
+  EndpointType getASourceEndpointType() { none() }
+
+  /**
+   * EXPERIMENTAL. This API may change in the future.
+   *
+   * Get an endpoint type for the sinks of this query. A query may have multiple applicable
+   * endpoint types for its sinks.
+   */
+  abstract EndpointType getASinkEndpointType();
+
+  /**
+   * EXPERIMENTAL. This API may change in the future.
+   *
+   * Specifies the default cut-off value that controls how many alerts are produced.
+   * The cut-off value must be in the range [0,1].
+   * A cut-off value of 0 only produces alerts that are likely true-positives.
+   * A cut-off value of 1 produces all alerts including those that are likely false-positives.
+   */
+  float getScoreCutoff() { result = 0.0 }
+
+  /**
+   * Holds if there's an ATM alert (a flow path from `source` to `sink` with ML-determined likelihood `score`) according
+   * to this ML-boosted configuration, whereas the unboosted base query does not contain this source and sink
+   * combination.
+   */
+  predicate hasBoostedFlowPath(DataFlow::PathNode source, DataFlow::PathNode sink, float score) {
+    this.hasFlowPath(source, sink) and
+    not AtmResultsInfo::isFlowLikelyInBaseQuery(source.getNode(), sink.getNode()) and
+    score = AtmResultsInfo::getScoreForFlow(source.getNode(), sink.getNode())
+  }
+
+  /**
+   * Holds if if `sink` is an effective sink with flow from `source` which gets used as a sink candidate for scoring
+   * with the ML model.
+   */
+  predicate isSinkCandidateWithFlow(DataFlow::PathNode sink) {
+    exists(DataFlow::PathNode source |
+      this.hasFlowPath(source, sink) and
+      not AtmResultsInfo::isFlowLikelyInBaseQuery(source.getNode(), sink.getNode())
+    )
+  }
+}
--- a/java/ql/experimental/adaptivethreatmodeling/lib/experimental/adaptivethreatmodeling/AdaptiveThreatModeling.qll
+++ b/java/ql/experimental/adaptivethreatmodeling/lib/experimental/adaptivethreatmodeling/AdaptiveThreatModeling.qll
@@ -0,0 +1,124 @@
+/**
+ * For internal use only.
+ *
+ * Provides information about the results of boosted queries for use in adaptive threat modeling (ATM).
+ */
+
+private import semmle.code.java.dataflow.DataFlow::DataFlow as DataFlow
+import ATMConfig
+private import BaseScoring
+private import EndpointScoring as EndpointScoring
+
+module ATM {
+  /**
+   * EXPERIMENTAL. This API may change in the future.
+   *
+   * This module contains informational predicates about the results returned by adaptive threat
+   * modeling (ATM).
+   */
+  module ResultsInfo {
+    /**
+     * Indicates whether the flow from source to sink represents a result with
+     * sufficiently high likelihood of being a true-positive.
+     */
+    pragma[inline]
+    private predicate shouldResultBeIncluded(DataFlow::Node source, DataFlow::Node sink) {
+      any(ScoringResults results).shouldResultBeIncluded(source, sink)
+    }
+
+    /**
+     * EXPERIMENTAL. This API may change in the future.
+     *
+     * Returns the score for the flow between the source `source` and the `sink` sink in the
+     * boosted query.
+     */
+    pragma[inline]
+    float getScoreForFlow(DataFlow::Node source, DataFlow::Node sink) {
+      any(DataFlow::Configuration cfg).hasFlow(source, sink) and
+      shouldResultBeIncluded(source, sink) and
+      result = unique(float s | s = any(ScoringResults results).getScoreForFlow(source, sink))
+    }
+
+    /**
+     * Pad a score returned from `getKnownScoreForFlow` to a particular length by adding a decimal
+     * point if one does not already exist, and "0"s after that decimal point.
+     *
+     * Note that this predicate must itself define an upper bound on `length`, so that it has a
+     * finite number of results. Currently this is defined as 12.
+     */
+    private string paddedScore(float score, int length) {
+      // In this definition, we must restrict the values that `length` and `score` can take on so
+      // that the predicate has a finite number of results.
+      (score = getScoreForFlow(_, _) or score = 0) and
+      length = result.length() and
+      (
+        // We need to make sure the padded score contains a "." so lexically sorting the padded
+        // scores is equivalent to numerically sorting the scores.
+        score.toString().charAt(_) = "." and
+        result = score.toString()
+        or
+        not score.toString().charAt(_) = "." and
+        result = score.toString() + "."
+      )
+      or
+      result = paddedScore(score, length - 1) + "0" and
+      length <= 12
+    }
+
+    /**
+     * EXPERIMENTAL. This API may change in the future.
+     *
+     * Return a string representing the score of the flow between `source` and `sink` in the
+     * boosted query.
+     *
+     * The returned string is a fixed length, such that lexically sorting the strings returned by
+     * this predicate gives the same sort order as numerically sorting the scores of the flows.
+     */
+    pragma[inline]
+    string getScoreStringForFlow(DataFlow::Node source, DataFlow::Node sink) {
+      exists(float score |
+        score = getScoreForFlow(source, sink) and
+        (
+          // A length of 12 is equivalent to 10 decimal places.
+          score.toString().length() >= 12 and
+          result = score.toString().substring(0, 12)
+          or
+          score.toString().length() < 12 and
+          result = paddedScore(score, 12)
+        )
+      )
+    }
+
+    /**
+     * EXPERIMENTAL. This API may change in the future.
+     *
+     * Indicates whether the flow from source to sink is likely to be reported by the base security
+     * query.
+     *
+     * Currently this is a heuristic: it ignores potential differences in the definitions of
+     * additional flow steps.
+     */
+    pragma[inline]
+    predicate isFlowLikelyInBaseQuery(DataFlow::Node source, DataFlow::Node sink) {
+      getCfg().isKnownSource(source) and getCfg().isKnownSink(sink)
+    }
+
+    /**
+     * EXPERIMENTAL. This API may change in the future.
+     *
+     * Get additional information about why ATM included the flow from source to sink as an alert.
+     */
+    pragma[inline]
+    string getAdditionalAlertInfo(DataFlow::Node source, DataFlow::Node sink) {
+      exists(string sourceOrigins, string sinkOrigins |
+        sourceOrigins = concat(any(ScoringResults results).getASourceOrigin(source), ", ") and
+        sinkOrigins = concat(any(ScoringResults results).getASinkOrigin(sink), ", ") and
+        result =
+          "[Source origins: " +
+            any(string s | if sourceOrigins != "" then s = sourceOrigins else s = "unknown") +
+            "; sink origins: " +
+            any(string s | if sinkOrigins != "" then s = sinkOrigins else s = "unknown") + "]"
+      )
+    }
+  }
+}
--- a/java/ql/experimental/adaptivethreatmodeling/lib/experimental/adaptivethreatmodeling/BaseScoring.qll
+++ b/java/ql/experimental/adaptivethreatmodeling/lib/experimental/adaptivethreatmodeling/BaseScoring.qll
@@ -0,0 +1,55 @@
+/**
+ * For internal use only.
+ *
+ * Provides shared scoring functionality for use in adaptive threat modeling (ATM).
+ */
+
+private import java
+private import semmle.code.java.dataflow.DataFlow::DataFlow as DataFlow
+private import ATMConfig
+
+external predicate availableMlModels(
+  string modelChecksum, string modelLanguage, string modelName, string modelType
+);
+
+/** Get the ATM configuration. */
+AtmConfig getCfg() { any() }
+
+/**
+ * A string containing scoring information produced by a scoring model.
+ *
+ * Scoring models include embedding models and endpoint scoring models.
+ */
+abstract class ScoringResults extends string {
+  bindingset[this]
+  ScoringResults() { any() }
+
+  /**
+   * Get ATM's confidence that a path between `source` and `sink` represents a security
+   * vulnerability. This will be a number between 0.0 and 1.0.
+   */
+  abstract float getScoreForFlow(DataFlow::Node source, DataFlow::Node sink);
+
+  /**
+   * Get a string representing why ATM included the given source in the dataflow analysis.
+   *
+   * In general, there may be multiple reasons why ATM included the given source, in which case
+   * this predicate should have multiple results.
+   */
+  abstract string getASourceOrigin(DataFlow::Node source);
+
+  /**
+   * Get a string representing why ATM included the given sink in the dataflow analysis.
+   *
+   * In general, there may be multiple reasons why ATM included the given sink, in which case this
+   * predicate should have multiple results.
+   */
+  abstract string getASinkOrigin(DataFlow::Node sink);
+
+  /**
+   * Indicates whether the flow from source to sink represents a result with
+   * sufficiently high likelihood of being a true-positive.
+   */
+  pragma[inline]
+  abstract predicate shouldResultBeIncluded(DataFlow::Node source, DataFlow::Node sink);
+}
--- a/java/ql/experimental/adaptivethreatmodeling/lib/experimental/adaptivethreatmodeling/EndpointCharacteristics.qll
+++ b/java/ql/experimental/adaptivethreatmodeling/lib/experimental/adaptivethreatmodeling/EndpointCharacteristics.qll
@@ -0,0 +1,607 @@
+/**
+ * For internal use only.
+ */
+
+private import java as java
+import semmle.code.java.dataflow.TaintTracking
+import semmle.code.java.security.QueryInjection
+import semmle.code.java.security.PathCreation
+import semmle.code.java.security.RequestForgery
+private import semmle.code.java.dataflow.ExternalFlow
+import experimental.adaptivethreatmodeling.EndpointTypes
+private import experimental.adaptivethreatmodeling.ATMConfig
+private import experimental.adaptivethreatmodeling.SqlTaintedATM
+private import experimental.adaptivethreatmodeling.TaintedPathATM
+private import experimental.adaptivethreatmodeling.RequestForgeryATM
+private import semmle.code.java.security.ExternalAPIs as ExternalAPIs
+private import semmle.code.java.Expr as Expr
+
+/**
+ * Holds if the given endpoint has a self-contradictory combination of characteristics. Detects errors in our endpoint
+ * characteristics. Lists the problematic characterisitics and their implications for all such endpoints, together with
+ * an error message indicating why this combination is problematic.
+ *
+ * Copied from javascript/ql/experimental/adaptivethreatmodeling/test/endpoint_large_scale/ContradictoryEndpointCharacteristics.ql
+ */
+query predicate erroneousEndpoints(
+  DataFlow::Node endpoint, EndpointCharacteristic characteristic, EndpointType endpointClass,
+  float confidence, string errorMessage
+) {
+  // An endpoint's characteristics should not include positive indicators with medium/high confidence for more than one
+  // class.
+  exists(EndpointCharacteristic characteristic2, EndpointType endpointClass2, float confidence2 |
+    endpointClass.getEncoding() != endpointClass2.getEncoding() and
+    characteristic.appliesToEndpoint(endpoint) and
+    characteristic2.appliesToEndpoint(endpoint) and
+    characteristic.hasImplications(endpointClass, true, confidence) and
+    characteristic2.hasImplications(endpointClass2, true, confidence2) and
+    confidence > characteristic.mediumConfidence() and
+    confidence2 > characteristic2.mediumConfidence() and
+    // It's valid for a node to satisfy the logic for both `isSink` and `isSanitizer`, but in that case it will be
+    // treated by the actual query as a sanitizer, since the final logic is something like
+    // `isSink(n) and not isSanitizer(n)`.
+    not (
+      characteristic instanceof IsSanitizerCharacteristic or
+      characteristic2 instanceof IsSanitizerCharacteristic
+    )
+  ) and
+  errorMessage = "Endpoint has high-confidence positive indicators for multiple classes"
+  or
+  // An enpoint's characteristics should not include positive indicators with medium/high confidence for some class and
+  // also include negative indicators with medium/high confidence for this same class.
+  exists(EndpointCharacteristic characteristic2, float confidence2 |
+    characteristic.appliesToEndpoint(endpoint) and
+    characteristic2.appliesToEndpoint(endpoint) and
+    characteristic.hasImplications(endpointClass, true, confidence) and
+    characteristic2.hasImplications(endpointClass, false, confidence2) and
+    confidence > characteristic.mediumConfidence() and
+    confidence2 > characteristic2.mediumConfidence()
+  ) and
+  errorMessage = "Endpoint has high-confidence positive and negative indicators for the same class"
+}
+
+query predicate erroneousConfidences(
+  EndpointCharacteristic characteristic, float confidence, string errorMessage
+) {
+  characteristic.hasImplications(_, _, confidence) and
+  (confidence < 0 or confidence > 1) and
+  errorMessage = "Characteristic has an indicator with confidence outside of [0, 1]"
+}
+
+/**
+ * A set of characteristics that a particular endpoint might have. This set of characteristics is used to make decisions
+ * about whether to include the endpoint in the training set and with what label, as well as whether to score the
+ * endpoint at inference time.
+ */
+abstract class EndpointCharacteristic extends string {
+  /**
+   * Holds when the string matches the name of the characteristic, which should describe some characteristic of the
+   * endpoint that is meaningful for determining whether it's a sink and if so of which type
+   */
+  bindingset[this]
+  EndpointCharacteristic() { any() }
+
+  /**
+   * Holds for endpoints that have this characteristic. This predicate contains the logic that applies characteristics
+   * to the appropriate set of dataflow nodes.
+   */
+  abstract predicate appliesToEndpoint(DataFlow::Node n);
+
+  /**
+   * This predicate describes what the characteristic tells us about an endpoint.
+   *
+   * Params:
+   * endpointClass: The sink type. Each EndpointType has a predicate getEncoding, which specifies the classifier
+   * class for this sink type. Class 0 is the negative class (non-sink). Each positive int corresponds to a single
+   * sink type.
+   * isPositiveIndicator: If true, this characteristic indicates that this endpoint _is_ a member of the class; if
+   * false, it indicates that it _isn't_ a member of the class.
+   * confidence: A float in [0, 1], which tells us how strong an indicator this characteristic is for the endpoint
+   * belonging / not belonging to the given class. A confidence near zero means this characteristic is a very weak
+   * indicator of whether or not the endpoint belongs to the class. A confidence of 1 means that all endpoints with
+   * this characteristic definitively do/don't belong to the class.
+   */
+  abstract predicate hasImplications(
+    EndpointType endpointClass, boolean isPositiveIndicator, float confidence
+  );
+
+  /** Indicators with confidence at or above this threshold are considered to be high-confidence indicators. */
+  final float getHighConfidenceThreshold() { result = 0.8 }
+
+  // The following are some confidence values that are used in practice by the subclasses. They are defined as named
+  // constants here to make it easier to change them in the future.
+  final float maximalConfidence() { result = 1.0 }
+
+  final float highConfidence() { result = 0.9 }
+
+  final float mediumConfidence() { result = 0.6 }
+}
+
+//----------------------------------------------------------------------------------------------------------------------
+// Helper predicates.
+//----------------------------------------------------------------------------------------------------------------------
+predicate isTypeAccess(DataFlow::Node n) { n.asExpr() instanceof TypeAccess }
+
+/**
+ * Holds if `n` has the given metadata.
+ *
+ * This is a helper function to extract and export needed information about each endpoint in the sink candidate query as
+ * well as the queries that exatract positive and negative examples for the prompt / training set. The metadata is
+ * extracted as a string in the format of a Python dictionary.
+ */
+predicate hasMetadata(DataFlow::Node n, string metadata) {
+  exists(
+    Callable callee, Call call, string package, string type, boolean subtypes, string name,
+    string signature, string ext, int input, string provenance, boolean isPublic,
+    boolean isExternalApiDataNode
+  |
+    n.asExpr() = call.getArgument(input) and
+    callee = call.getCallee() and
+    package = callee.getDeclaringType().getPackage().getName() and
+    type = callee.getDeclaringType().getName() and //TODO: Will this work for inner classes? Will it produce X$Y? What about lambdas? What about enums? What about interfaces? What about annotations?
+    (
+      if callee.isFinal() or callee.getDeclaringType().isFinal()
+      then subtypes = false // See https://github.com/github/codeql-java-team/issues/254#issuecomment-1422296423
+      else subtypes = true
+    ) and
+    name = callee.getName() and // TODO: Will this work for constructors?
+    signature = paramsString(callee) and // TODO: Why are brackets being escaped (`\[\]` vs `[]`)?
+    ext = "" and // see https://github.slack.com/archives/CP9127VUK/p1673979477496069
+    provenance = "manual" and // TODO
+    (if callee.isPublic() then isPublic = true else isPublic = false) and
+    (
+      if n instanceof ExternalAPIs::ExternalApiDataNode
+      then isExternalApiDataNode = true
+      else isExternalApiDataNode = false
+    ) and
+    metadata =
+      "{'Package': '" + package + "', 'Type': '" + type + "', 'Subtypes': " + subtypes +
+        ", 'Name': '" + name + "', 'Signature': '" + signature + "', 'Ext': '" + ext +
+        "', 'Argument index': " + input + ", 'Provenance': '" + provenance + "', 'Is public': " +
+        isPublic + ", 'Is passed to external API': " + isExternalApiDataNode + "}" // TODO: Why are the curly braces added twice?
+  )
+}
+
+// private predicate isKnownExternalApiQuerySink(DataFlow::Node n) {
+//   n instanceof Xxe::Sink or
+//   n instanceof TaintedPath::Sink or
+//   n instanceof XpathInjection::Sink or
+//   n instanceof Xss::Sink or
+//   n instanceof ClientSideUrlRedirect::Sink or
+//   n instanceof CodeInjection::Sink or
+//   n instanceof RequestForgery::Sink or
+//   n instanceof CorsMisconfigurationForCredentials::Sink or
+//   n instanceof CommandInjection::Sink or
+//   n instanceof PrototypePollution::Sink or
+//   n instanceof UnvalidatedDynamicMethodCall::Sink or
+//   n instanceof TaintedFormatString::Sink or
+//   n instanceof NosqlInjection::Sink or
+//   n instanceof PostMessageStar::Sink or
+//   n instanceof RegExpInjection::Sink or
+//   n instanceof SqlTainted::Sink or
+//   n instanceof XmlBomb::Sink or
+//   n instanceof ZipSlip::Sink or
+//   n instanceof UnsafeDeserialization::Sink or
+//   n instanceof ServerSideUrlRedirect::Sink or
+//   n instanceof CleartextStorage::Sink or
+//   n instanceof HttpToFileAccess::Sink
+// }
+// /**
+//  * Holds if the node `n` is a known sink in a modeled library.
+//  */
+// private predicate isKnownLibrarySink(DataFlow::Node n) {
+//   isKnownExternalApiQuerySink(n) or
+//   n instanceof CleartextLogging::Sink or
+//   n instanceof StackTraceExposure::Sink or
+//   n instanceof ShellCommandInjectionFromEnvironment::Sink or
+//   n instanceof InsecureRandomness::Sink or
+//   n instanceof FileAccessToHttp::Sink or
+//   n instanceof IndirectCommandInjection::Sink
+// }
+// /**
+//  * Holds if the node `n` is known as the predecessor in a modeled flow step.
+//  */
+// private predicate isKnownStepSrc(DataFlow::Node n) {
+//   TaintTracking::sharedTaintStep(n, _) or
+//   DataFlow::SharedFlowStep::step(n, _) or
+//   DataFlow::SharedFlowStep::step(n, _, _, _)
+// }
+// /**
+//  * Holds if the data flow node is a (possibly indirect) argument of a likely external library call.
+//  *
+//  * This includes direct arguments of likely external library calls as well as nested object
+//  * literals within those calls.
+//  */
+// private predicate flowsToArgumentOfLikelyExternalLibraryCall(DataFlow::Node n) {
+//   n = getACallWithoutCallee().getAnArgument()
+//   or
+//   exists(DataFlow::SourceNode src | flowsToArgumentOfLikelyExternalLibraryCall(src) |
+//     n = src.getAPropertyWrite().getRhs()
+//   )
+//   or
+//   exists(DataFlow::ArrayCreationNode arr | flowsToArgumentOfLikelyExternalLibraryCall(arr) |
+//     n = arr.getAnElement()
+//   )
+// }
+// /**
+//  * Get calls for which we do not have the callee (i.e. the definition of the called function). This
+//  * acts as a heuristic for identifying calls to external library functions.
+//  */
+// private DataFlow::CallNode getACallWithoutCallee() {
+//   forall(Function callee | callee = result.getACallee() | callee.getTopLevel().isExterns()) and
+//   not exists(DataFlow::ParameterNode param, DataFlow::FunctionNode callback |
+//     param.flowsTo(result.getCalleeNode()) and
+//     callback = getACallback(param, DataFlow::TypeBackTracker::end())
+//   )
+// }
+// /**
+//  * Gets a node that flows to callback-parameter `p`.
+//  */
+// private DataFlow::SourceNode getACallback(DataFlow::ParameterNode p, DataFlow::TypeBackTracker t) {
+//   t.start() and
+//   result = p and
+//   any(DataFlow::FunctionNode f).getLastParameter() = p and
+//   exists(p.getACall())
+//   or
+//   exists(DataFlow::TypeBackTracker t2 | result = getACallback(p, t2).backtrack(t2, t))
+// }
+//----------------------------------------------------------------------------------------------------------------------
+// Characteristics that are indicative of a sink.
+// NOTE: Initially each sink type has only one characteristic, which is that it's a sink of this type in the standard
+// Java libraries.
+//----------------------------------------------------------------------------------------------------------------------
+// /**
+//  * Endpoints identified as "DomBasedXssSink" by the standard Java libraries are XSS sinks with maximal confidence.
+//  */
+// private class DomBasedXssSinkCharacteristic extends EndpointCharacteristic {
+//   DomBasedXssSinkCharacteristic() { this = any(XssSinkType type).getDescription() }
+//   override predicate appliesToEndpoint(DataFlow::Node n) { n instanceof DomBasedXss::Sink }
+//   override predicate hasImplications(
+//     EndpointType endpointClass, boolean isPositiveIndicator, float confidence
+//   ) {
+//     endpointClass instanceof XssSinkType and
+//     isPositiveIndicator = true and
+//     confidence = maximalConfidence()
+//   }
+// }
+/**
+ * Endpoints identified as "TaintedPathSink" by the standard Java libraries are path injection sinks with maximal
+ * confidence.
+ */
+private class TaintedPathSinkCharacteristic extends EndpointCharacteristic {
+  TaintedPathSinkCharacteristic() { this = any(TaintedPathSinkType type).getDescription() }
+
+  override predicate appliesToEndpoint(DataFlow::Node n) {
+    n.asExpr() = any(PathCreation p).getAnInput()
+    or
+    sinkNode(n, "create-file")
+  }
+
+  override predicate hasImplications(
+    EndpointType endpointClass, boolean isPositiveIndicator, float confidence
+  ) {
+    endpointClass instanceof TaintedPathSinkType and
+    isPositiveIndicator = true and
+    confidence = maximalConfidence()
+  }
+}
+
+/**
+ * Endpoints identified as "SqlTaintedSink" by the standard Java libraries are SQL injection sinks with maximal
+ * confidence.
+ */
+private class SqlTaintedSinkCharacteristic extends EndpointCharacteristic {
+  SqlTaintedSinkCharacteristic() { this = any(SqlTaintedSinkType type).getDescription() }
+
+  override predicate appliesToEndpoint(DataFlow::Node n) { n instanceof QueryInjectionSink }
+
+  override predicate hasImplications(
+    EndpointType endpointClass, boolean isPositiveIndicator, float confidence
+  ) {
+    endpointClass instanceof SqlTaintedSinkType and
+    isPositiveIndicator = true and
+    confidence = maximalConfidence()
+  }
+}
+
+/**
+ * Endpoints identified as "RequestForgerySink" by the standard Java libraries are server-side request forgery sinks
+ * with maximal confidence.
+ */
+private class RequestForgerySinkCharacteristic extends EndpointCharacteristic {
+  RequestForgerySinkCharacteristic() { this = any(RequestForgerySinkType type).getDescription() }
+
+  override predicate appliesToEndpoint(DataFlow::Node n) { n instanceof RequestForgerySink }
+
+  override predicate hasImplications(
+    EndpointType endpointClass, boolean isPositiveIndicator, float confidence
+  ) {
+    endpointClass instanceof RequestForgerySinkType and
+    isPositiveIndicator = true and
+    confidence = maximalConfidence()
+  }
+}
+
+//----------------------------------------------------------------------------------------------------------------------
+// Characteristics that are indicative of not being a sink of any type, and have historically been used to select
+// negative samples for training.
+//----------------------------------------------------------------------------------------------------------------------
+/**
+ * A characteristic that is an indicator of not being a sink of any type, because it's a modeled argument.
+ */
+abstract class OtherModeledArgumentCharacteristic extends EndpointCharacteristic {
+  bindingset[this]
+  OtherModeledArgumentCharacteristic() { any() }
+}
+
+/**
+ * A characteristic that is an indicator of not being a sink of any type, because it's an argument to a function of a
+ * builtin object.
+ */
+abstract private class ArgumentToBuiltinFunctionCharacteristic extends OtherModeledArgumentCharacteristic {
+  bindingset[this]
+  ArgumentToBuiltinFunctionCharacteristic() { any() }
+}
+
+/**
+ * A high-confidence characteristic that indicates that an endpoint is not a sink of any type.
+ */
+abstract private class NotASinkCharacteristic extends EndpointCharacteristic {
+  bindingset[this]
+  NotASinkCharacteristic() { any() }
+
+  override predicate hasImplications(
+    EndpointType endpointClass, boolean isPositiveIndicator, float confidence
+  ) {
+    endpointClass instanceof NegativeType and
+    isPositiveIndicator = true and
+    confidence = highConfidence()
+  }
+}
+
+/**
+ * A medium-confidence characteristic that indicates that an endpoint is not a sink of any type.
+ *
+ * TODO: This class is currently not private, because the current extraction logic explicitly avoids including these
+ * endpoints in the training data. We might want to change this in the future.
+ */
+abstract class LikelyNotASinkCharacteristic extends EndpointCharacteristic {
+  bindingset[this]
+  LikelyNotASinkCharacteristic() { any() }
+
+  override predicate hasImplications(
+    EndpointType endpointClass, boolean isPositiveIndicator, float confidence
+  ) {
+    endpointClass instanceof NegativeType and
+    isPositiveIndicator = true and
+    confidence = mediumConfidence()
+  }
+}
+
+/**
+ * An EndpointFilterCharacteristic that indicates that an endpoint is a type access. Type accesses are not sinks.
+ */
+private class IsTypeAccessCharacteristic extends NotASinkCharacteristic {
+  IsTypeAccessCharacteristic() { this = "type access" }
+
+  override predicate appliesToEndpoint(DataFlow::Node n) { isTypeAccess(n) }
+}
+
+/**
+ * An EndpointFilterCharacteristic that indicates that an endpoint is a sanitizer for some sink type. A sanitizer can
+ * never be a sink.
+ */
+private class IsSanitizerCharacteristic extends NotASinkCharacteristic {
+  IsSanitizerCharacteristic() { this = "is sanitizer" }
+
+  override predicate appliesToEndpoint(DataFlow::Node n) {
+    exists(AtmConfig config | config.isSanitizer(n))
+  }
+}
+
+/**
+ * An EndpointFilterCharacteristic that indicates that an endpoint is an argument to a safe external API method.
+ *
+ * Based on java/ql/lib/semmle/code/java/security/ExternalAPIs.qll.
+ *
+ * TODO: Is this correct?
+ */
+private class SafeExternalApiMethodCharacteristic extends NotASinkCharacteristic {
+  string baseDescription;
+
+  SafeExternalApiMethodCharacteristic() {
+    baseDescription = "safe external API method " and
+    this = any(string s | s = baseDescription + ["org.junit", "other than org.junit"])
+  }
+
+  override predicate appliesToEndpoint(DataFlow::Node n) {
+    exists(Expr::Call call |
+      n.asExpr() = call.getAnArgument() and
+      call.getCallee() instanceof ExternalAPIs::SafeExternalApiMethod and
+      (
+        // The vast majority of calls to safe external API methods involve junit. To get a diverse set of negative
+        // examples, we break those off into a separate characteristic.
+        call.getCallee().getDeclaringType().getPackage().getName().matches("org.junit%") and
+        this = baseDescription + "org.junit"
+        or
+        not call.getCallee().getDeclaringType().getPackage().getName().matches("org.junit%") and
+        this = baseDescription + "other than org.junit"
+      )
+    )
+  }
+}
+
+//----------------------------------------------------------------------------------------------------------------------
+// Characteristics that have historically acted as endpoint filters to exclude endpoints from scoring at inference time.
+//----------------------------------------------------------------------------------------------------------------------
+/** A characteristic that has historically acted as an endpoint filter for inference-time scoring. */
+abstract class EndpointFilterCharacteristic extends EndpointCharacteristic {
+  bindingset[this]
+  EndpointFilterCharacteristic() { any() }
+}
+
+/**
+ * An EndpointFilterCharacteristic that indicates that an endpoint is unlikely to be a sink of any type.
+ */
+abstract private class StandardEndpointFilterCharacteristic extends EndpointFilterCharacteristic {
+  bindingset[this]
+  StandardEndpointFilterCharacteristic() { any() }
+
+  override predicate hasImplications(
+    EndpointType endpointClass, boolean isPositiveIndicator, float confidence
+  ) {
+    endpointClass instanceof NegativeType and
+    isPositiveIndicator = true and
+    confidence = mediumConfidence()
+  }
+}
+
+/**
+ * An EndpointFilterCharacteristic that indicates that an endpoint is a constant expression. While a constant expression
+ * can be a sink, it cannot be part of a tainted flow: Constant expressions always evaluate to a constant primitive
+ * value, so they can't ever appear in an alert. These endpoints are therefore excluded from scoring at inference time.
+ *
+ * WARNING: These endpoints should not be used as negative samples for training, because they are not necessarily
+ * non-sinks. They are merely not interesting sinks to run through the ML model because they can never be part of a
+ * tainted flow.
+ */
+class IsConstantExpressionCharacteristic extends StandardEndpointFilterCharacteristic {
+  IsConstantExpressionCharacteristic() { this = "constant expression" }
+
+  override predicate appliesToEndpoint(DataFlow::Node n) {
+    n.asExpr() instanceof CompileTimeConstantExpr
+  }
+}
+
+/**
+ * An EndpointFilterCharacteristic that indicates that an endpoint is not part of the source code for the project being
+ * analyzed.
+ *
+ * WARNING: These endpoints should not be used as negative samples for training, because they are not necessarily
+ * non-sinks. They are merely not interesting sinks to run through the ML model.
+ */
+private class IsExternalCharacteristic extends StandardEndpointFilterCharacteristic {
+  IsExternalCharacteristic() { this = "external" }
+
+  override predicate appliesToEndpoint(DataFlow::Node n) {
+    not exists(n.getLocation().getFile().getRelativePath())
+  }
+}
+
+/**
+ * An EndpointFilterCharacteristic that indicates that an endpoint is not the final step in a taint propagation. This
+ * prevents us from detecting expresssions near sinks that are not the sink itself.
+ *
+ * WARNING: These endpoints should not be used as negative samples for training, because a there are rare situations
+ * where a node is both a sink and the `from` node of a flow step: when the called API uses the given value dangerously
+ * and then returns the given value. Example: `stillTainted = dangerous(tainted)`, assuming that the implementation of
+ * `dangerous(x)` eventually returns `x`.
+ */
+private class IsFlowStep extends StandardEndpointFilterCharacteristic {
+  IsFlowStep() { this = "flow step" }
+
+  override predicate appliesToEndpoint(DataFlow::Node n) { isKnownStepSrc(n) }
+
+  /**
+   * Holds if the node `n` is known as the predecessor in a modeled flow step.
+   */
+  private predicate isKnownStepSrc(DataFlow::Node n) {
+    any(TaintTracking::Configuration c).isAdditionalFlowStep(n, _) or
+    TaintTracking::localTaintStep(n, _)
+  }
+}
+
+/**
+ * An EndpointFilterCharacteristic that indicates that an endpoint sits in a test file.
+ *
+ * WARNING: These endpoints should not be used as negative samples for training, because there can in fact be sinks in
+ * test files -- we just don't care to model them because they aren't exploitable.
+ */
+private class TestFileCharacteristic extends StandardEndpointFilterCharacteristic {
+  TestFileCharacteristic() { this = "test file" }
+
+  override predicate appliesToEndpoint(DataFlow::Node n) {
+    exists(File f | f = n.getLocation().getFile() and isInTestFile(f))
+  }
+
+  /**
+   * Holds if `file` is a test file. Copied from java/ql/src/utils/modelgenerator/internal/CaptureModelsSpecific.qll.
+   *
+   * TODO: Why can't I import utils.modelgenerator.internal.CaptureModelsSpecific?
+   */
+  private predicate isInTestFile(File file) {
+    file.getAbsolutePath().matches("%src/test/%") or
+    file.getAbsolutePath().matches("%/guava-tests/%") or
+    file.getAbsolutePath().matches("%/guava-testlib/%")
+  }
+}
+// class IsArgumentToModeledFunctionCharacteristic extends StandardEndpointFilterCharacteristic {
+//   IsArgumentToModeledFunctionCharacteristic() { this = "argument to modeled function" }
+//   override predicate appliesToEndpoint(DataFlow::Node n) {
+//     exists(DataFlow::InvokeNode invk, DataFlow::Node known |
+//       invk.getAnArgument() = n and
+//       invk.getAnArgument() = known and
+//       (
+//         isKnownLibrarySink(known)
+//         or
+//         isKnownStepSrc(known)
+//         or
+//         exists(OtherModeledArgumentCharacteristic characteristic |
+//           characteristic.appliesToEndpoint(known)
+//         )
+//       )
+//     )
+//   }
+// }
+// private class IsArgumentToSinklessLibraryCharacteristic extends StandardEndpointFilterCharacteristic {
+//   IsArgumentToSinklessLibraryCharacteristic() { this = "argument to sinkless library" }
+//   override predicate appliesToEndpoint(DataFlow::Node n) {
+//     exists(DataFlow::InvokeNode invk, DataFlow::SourceNode commonSafeLibrary, string libraryName |
+//       libraryName = ["slugify", "striptags", "marked"]
+//     |
+//       commonSafeLibrary = DataFlow::moduleImport(libraryName) and
+//       invk = [commonSafeLibrary, commonSafeLibrary.getAPropertyRead()].getAnInvocation() and
+//       n = invk.getAnArgument()
+//     )
+//   }
+// }
+// private class IsSanitizerCharacteristic extends StandardEndpointFilterCharacteristic {
+//   IsSanitizerCharacteristic() { this = "sanitizer" }
+//   override predicate appliesToEndpoint(DataFlow::Node n) {
+//     exists(DataFlow::CallNode call | n = call.getAnArgument() |
+//       call.getCalleeName().regexpMatch("(?i).*(escape|valid(ate)?|sanitize|purify).*")
+//     )
+//   }
+// }
+// private class IsPredicateCharacteristic extends StandardEndpointFilterCharacteristic {
+//   IsPredicateCharacteristic() { this = "predicate" }
+//   override predicate appliesToEndpoint(DataFlow::Node n) {
+//     exists(DataFlow::CallNode call | n = call.getAnArgument() |
+//       call.getCalleeName().regexpMatch("(equals|(|is|has|can)(_|[A-Z])).*")
+//     )
+//   }
+// }
+// private class IsHashCharacteristic extends StandardEndpointFilterCharacteristic {
+//   IsHashCharacteristic() { this = "hash" }
+//   override predicate appliesToEndpoint(DataFlow::Node n) {
+//     exists(DataFlow::CallNode call | n = call.getAnArgument() |
+//       call.getCalleeName().regexpMatch("(?i)^(sha\\d*|md5|hash)$")
+//     )
+//   }
+// }
+// private class IsNumericCharacteristic extends StandardEndpointFilterCharacteristic {
+//   IsNumericCharacteristic() { this = "numeric" }
+//   override predicate appliesToEndpoint(DataFlow::Node n) {
+//     SyntacticHeuristics::isReadFrom(n, ".*index.*")
+//   }
+// }
+// private class InIrrelevantFileCharacteristic extends StandardEndpointFilterCharacteristic {
+//   private string category;
+//   InIrrelevantFileCharacteristic() {
+//     this = "in " + category + " file" and category = ["externs", "generated", "library", "test"]
+//   }
+//   override predicate appliesToEndpoint(DataFlow::Node n) {
+//     // Ignore candidate sinks within externs, generated, library, and test code
+//     ClassifyFiles::classify(n.getFile(), category)
+//   }
+// }
--- a/java/ql/experimental/adaptivethreatmodeling/lib/experimental/adaptivethreatmodeling/EndpointFeatures.qll
+++ b/java/ql/experimental/adaptivethreatmodeling/lib/experimental/adaptivethreatmodeling/EndpointFeatures.qll
@@ -0,0 +1,139 @@
+/**
+ * For internal use only.
+ *
+ * Extracts data about the database for use in adaptive threat modeling (ATM).
+ */
+
+private import java
+private import semmle.code.java.dataflow.DataFlow::DataFlow as DataFlow
+private import FeaturizationConfig
+
+/**
+ * Gets the value of the token-based feature named `featureName` for the endpoint `endpoint`.
+ *
+ * This is a single string containing a space-separated list of tokens.
+ */
+private string getTokenFeature(DataFlow::Node endpoint, string featureName) {
+  // Performance optimization: Restrict feature extraction to endpoints we've explicitly asked to featurize.
+  endpoint = any(FeaturizationConfig cfg).getAnEndpointToFeaturize() and
+  exists(EndpointFeature f | f.getName() = featureName and result = f.getValue(endpoint)) and
+  featureName = getASupportedFeatureName()
+}
+
+/** Get a name of a supported generic token-based feature. */
+string getASupportedFeatureName() { result = any(EndpointFeature f).getName() }
+
+/**
+ * Generic token-based features for ATM.
+ *
+ * This predicate holds if the generic token-based feature named `featureName` has the value
+ * `featureValue` for the endpoint `endpoint`.
+ */
+predicate tokenFeatures(DataFlow::Node endpoint, string featureName, string featureValue) {
+  // Performance optimization: Restrict feature extraction to endpoints we've explicitly asked to featurize.
+  endpoint = any(FeaturizationConfig cfg).getAnEndpointToFeaturize() and
+  featureValue = getTokenFeature(endpoint, featureName)
+}
+
+/**
+ * See EndpointFeature
+ */
+private newtype TEndpointFeature =
+  TEnclosingFunctionName() or
+  TInputArgumentIndex() or
+  TCalleeFlexibleAccessPath() or
+  TEnclosingFunctionSignature() or
+  TContextFunctionInterfaces()
+
+/**
+ * An implementation of an endpoint feature: defines feature-name/value tuples for use in ML.
+ */
+abstract class EndpointFeature extends TEndpointFeature {
+  /**
+   * Gets the name of the feature. Used by the ML model.
+   * Names are coupled to models: changing the name of a feature requires retraining the model.
+   */
+  abstract string getName();
+
+  /**
+   * Gets the value of the feature. Used by the ML model.
+   * Models are trained based on feature values, so changing the value of a feature requires retraining the model.
+   */
+  abstract string getValue(DataFlow::Node endpoint);
+
+  string toString() { result = this.getName() }
+}
+
+//----------------------------------------------------------------------------------------------------------------------
+// Feature: EnclosingFunctionName
+//----------------------------------------------------------------------------------------------------------------------
+/**
+ * The feature for the name of the function that encloses the endpoint.
+ */
+class EnclosingFunctionName extends EndpointFeature, TEnclosingFunctionName {
+  override string getName() { result = "enclosingFunctionName" }
+
+  override string getValue(DataFlow::Node endpoint) {
+    result = endpoint.getEnclosingCallable().getName()
+  }
+}
+
+//----------------------------------------------------------------------------------------------------------------------
+// Feature: InputArgumentIndex
+//----------------------------------------------------------------------------------------------------------------------
+class InputArgumentIndex extends EndpointFeature, TInputArgumentIndex {
+  override string getName() { result = "InputArgumentIndex" }
+
+  override string getValue(DataFlow::Node endpoint) {
+    exists(Argument arg | endpoint.asExpr() = arg and result = arg.getPosition().toString())
+  }
+}
+
+//----------------------------------------------------------------------------------------------------------------------
+// Feature: CalleeFlexibleAccessPath
+//----------------------------------------------------------------------------------------------------------------------
+class CalleeFlexibleAccessPath extends EndpointFeature, TCalleeFlexibleAccessPath {
+  override string getName() { result = "CalleeFlexibleAccessPath" }
+
+  override string getValue(DataFlow::Node endpoint) {
+    exists(Callable callee, Call call, string package, string type, string name |
+      endpoint.asExpr() = call.getAnArgument() and
+      callee = call.getCallee() and
+      package = callee.getDeclaringType().getPackage().getName() and
+      type = callee.getDeclaringType().getName() and //TODO: Will this work for inner classes? Will it produce X$Y? What about lambdas? What about enums? What about interfaces? What about annotations?
+      name = callee.getName() and
+      result = package + "." + type + "." + name
+    )
+  }
+}
+
+//----------------------------------------------------------------------------------------------------------------------
+// Feature: EnclosingFunctionSignature
+//----------------------------------------------------------------------------------------------------------------------
+class EnclosingFunctionSignature extends EndpointFeature, TEnclosingFunctionSignature {
+  override string getName() { result = "enclosingFunctionSignature" }
+
+  override string getValue(DataFlow::Node endpoint) {
+    exists(Callable callee |
+      callee = endpoint.getEnclosingCallable() and
+      result = callee.paramsString()
+    )
+  }
+}
+
+//----------------------------------------------------------------------------------------------------------------------
+// Feature: ContextFunctionInterfaces
+//----------------------------------------------------------------------------------------------------------------------
+class ContextFunctionInterfaces extends EndpointFeature, TContextFunctionInterfaces {
+  override string getName() { result = "contextFunctionInterfaces" }
+
+  override string getValue(DataFlow::Node endpoint) {
+    result =
+      concat(Method method, string line |
+        method.getLocation().getFile() = endpoint.getLocation().getFile() and
+        line = method.getStringSignature()
+      |
+        line, "\n" order by line
+      )
+  }
+}
--- a/java/ql/experimental/adaptivethreatmodeling/lib/experimental/adaptivethreatmodeling/EndpointScoring.qll
+++ b/java/ql/experimental/adaptivethreatmodeling/lib/experimental/adaptivethreatmodeling/EndpointScoring.qll
@@ -0,0 +1,154 @@
+/**
+ * For internal use only.
+ *
+ * Provides an implementation of scoring alerts for use in adaptive threat modeling (ATM).
+ */
+
+private import java
+private import semmle.code.java.dataflow.DataFlow::DataFlow as DataFlow
+private import BaseScoring
+private import EndpointFeatures as EndpointFeatures
+private import FeaturizationConfig
+private import EndpointTypes
+
+private string getACompatibleModelChecksum() {
+  availableMlModels(result, "java", _, "atm-endpoint-scoring")
+}
+
+module ModelScoring {
+  /**
+   * A featurization config that only featurizes new candidate endpoints that are part of a flow
+   * path.
+   */
+  class RelevantFeaturizationConfig extends FeaturizationConfig {
+    RelevantFeaturizationConfig() { this = "RelevantFeaturization" }
+
+    override DataFlow::Node getAnEndpointToFeaturize() {
+      getCfg().isEffectiveSource(result) and any(DataFlow::Configuration cfg).hasFlow(result, _)
+      or
+      getCfg().isEffectiveSink(result) and any(DataFlow::Configuration cfg).hasFlow(_, result)
+    }
+  }
+
+  DataFlow::Node getARequestedEndpoint() {
+    result = any(FeaturizationConfig cfg).getAnEndpointToFeaturize()
+  }
+
+  private int getARequestedEndpointType() { result = any(EndpointType type).getEncoding() }
+
+  predicate endpointScores(DataFlow::Node endpoint, int encodedEndpointType, float score) =
+    scoreEndpoints(getARequestedEndpoint/0, EndpointFeatures::tokenFeatures/3,
+      EndpointFeatures::getASupportedFeatureName/0, getARequestedEndpointType/0,
+      getACompatibleModelChecksum/0)(endpoint, encodedEndpointType, score)
+}
+
+/**
+ * Return ATM's confidence that `source` is a source for the given security query. This will be a
+ * number between 0.0 and 1.0.
+ */
+private float getScoreForSource(DataFlow::Node source) {
+  if getCfg().isKnownSource(source)
+  then result = 1.0
+  else (
+    // This restriction on `source` has no semantic effect but improves performance.
+    getCfg().isEffectiveSource(source) and
+    ModelScoring::endpointScores(source, getCfg().getASourceEndpointType().getEncoding(), result)
+  )
+}
+
+/**
+ * Return ATM's confidence that `sink` is a sink for the given security query. This will be a
+ * number between 0.0 and 1.0.
+ */
+private float getScoreForSink(DataFlow::Node sink) {
+  if getCfg().isKnownSink(sink)
+  then result = 1.0
+  else (
+    // This restriction on `sink` has no semantic effect but improves performance.
+    getCfg().isEffectiveSink(sink) and
+    ModelScoring::endpointScores(sink, getCfg().getASinkEndpointType().getEncoding(), result)
+  )
+}
+
+class EndpointScoringResults extends ScoringResults {
+  EndpointScoringResults() {
+    this = "EndpointScoringResults" and exists(getACompatibleModelChecksum())
+  }
+
+  /**
+   * Get ATM's confidence that a path between `source` and `sink` represents a security
+   * vulnerability. This will be a number between 0.0 and 1.0.
+   */
+  override float getScoreForFlow(DataFlow::Node source, DataFlow::Node sink) {
+    result = getScoreForSource(source) * getScoreForSink(sink)
+  }
+
+  /**
+   * Get a string representing why ATM included the given source in the dataflow analysis.
+   *
+   * In general, there may be multiple reasons why ATM included the given source, in which case
+   * this predicate should have multiple results.
+   */
+  pragma[inline]
+  override string getASourceOrigin(DataFlow::Node source) {
+    result = "known" and getCfg().isKnownSource(source)
+    or
+    result = "predicted" and getCfg().isEffectiveSource(source)
+  }
+
+  /**
+   * Get a string representing why ATM included the given sink in the dataflow analysis.
+   *
+   * In general, there may be multiple reasons why ATM included the given sink, in which case
+   * this predicate should have multiple results.
+   */
+  pragma[inline]
+  override string getASinkOrigin(DataFlow::Node sink) {
+    result = "known" and getCfg().isKnownSink(sink)
+    or
+    not getCfg().isKnownSink(sink) and
+    result =
+      "predicted (scores: " +
+        concat(EndpointType type, float score |
+          ModelScoring::endpointScores(sink, type.getEncoding(), score)
+        |
+          type.getDescription() + "=" + score.toString(), ", " order by type.getEncoding()
+        ) + ")" and
+    getCfg().isEffectiveSink(sink)
+  }
+
+  pragma[inline]
+  override predicate shouldResultBeIncluded(DataFlow::Node source, DataFlow::Node sink) {
+    exists(source) and
+    if getCfg().isKnownSink(sink)
+    then any()
+    else (
+      // This restriction on `sink` has no semantic effect but improves performance.
+      getCfg().isEffectiveSink(sink) and
+      exists(float sinkScore |
+        ModelScoring::endpointScores(sink, getCfg().getASinkEndpointType().getEncoding(), sinkScore) and
+        // Include the endpoint if (a) the query endpoint type scores higher than all other
+        // endpoint types, or (b) the query endpoint type scores at least
+        // 0.5 - (getCfg().getScoreCutoff() / 2).
+        sinkScore >=
+          [
+            max(float s | ModelScoring::endpointScores(sink, _, s)),
+            0.5 - getCfg().getScoreCutoff() / 2
+          ]
+      )
+    )
+  }
+}
+
+module Debugging {
+  query predicate hopInputEndpoints(DataFlow::Node endpoint) {
+    endpoint = ModelScoring::getARequestedEndpoint()
+  }
+
+  query predicate endpointScores = ModelScoring::endpointScores/3;
+
+  query predicate shouldResultBeIncluded(DataFlow::Node source, DataFlow::Node sink) {
+    any(ScoringResults scoringResults).shouldResultBeIncluded(source, sink) and
+    any(DataFlow::Configuration cfg).hasFlow(source, sink)
+  }
+}
--- a/java/ql/experimental/adaptivethreatmodeling/lib/experimental/adaptivethreatmodeling/EndpointTypes.qll
+++ b/java/ql/experimental/adaptivethreatmodeling/lib/experimental/adaptivethreatmodeling/EndpointTypes.qll
@@ -0,0 +1,71 @@
+/**
+ * For internal use only.
+ *
+ * Defines the set of classes that endpoint scoring models can predict. Endpoint scoring models must
+ * only predict classes defined within this file. This file is the source of truth for the integer
+ * representation of each of these classes.
+ */
+newtype TEndpointType =
+  TNegativeType() or
+  TXssSinkType() or
+  TNosqlInjectionSinkType() or
+  TSqlTaintedSinkType() or
+  TTaintedPathSinkType() or
+  TRequestForgerySinkType()
+
+/** A class that can be predicted by endpoint scoring models. */
+abstract class EndpointType extends TEndpointType {
+  abstract string getDescription();
+
+  /**
+   * Gets the integer representation of this endpoint type. This integer representation specifies the class number
+   * used by the endpoint scoring model (the classifier) to represent this endpoint type. Class 0 is the negative
+   * class (non-sink). Each positive int corresponds to a single sink type.
+   */
+  abstract int getEncoding();
+
+  /**
+   * Gets the name of the sink/source kind for this endpoint type as used in Models as Data.
+   *
+   * See https://github.com/github/codeql/blob/44213f0144fdd54bb679ca48d68b28dcf820f7a8/java/ql/lib/semmle/code/java/dataflow/ExternalFlow.qll#LL353C11-L357C31
+   */
+  abstract string getKind();
+
+  string toString() { result = getDescription() }
+}
+
+/** The `Negative` class that can be predicted by endpoint scoring models. */
+class NegativeType extends EndpointType, TNegativeType {
+  override string getDescription() { result = "Negative" }
+
+  override int getEncoding() { result = 0 }
+
+  override string getKind() { result = "" }
+}
+
+/** The `SqlTaintedSink` class that can be predicted by endpoint scoring models. */
+class SqlTaintedSinkType extends EndpointType, TSqlTaintedSinkType {
+  override string getDescription() { result = "SqlTaintedSink" }
+
+  override int getEncoding() { result = 1 }
+
+  override string getKind() { result = "sql" }
+}
+
+/** The `TaintedPathSink` class that can be predicted by endpoint scoring models. */
+class TaintedPathSinkType extends EndpointType, TTaintedPathSinkType {
+  override string getDescription() { result = "TaintedPathSink" }
+
+  override int getEncoding() { result = 2 }
+
+  override string getKind() { result = "create-file" }
+}
+
+/** The `RequestForgerySinkType` class that can be predicted by endpoint scoring models. */
+class RequestForgerySinkType extends EndpointType, TRequestForgerySinkType {
+  override string getDescription() { result = "RequestForgerySink" }
+
+  override int getEncoding() { result = 3 }
+
+  override string getKind() { result = "open-url" } // TODO: is this correct, or should it be “jdbc-url”?
+}
--- a/java/ql/experimental/adaptivethreatmodeling/lib/experimental/adaptivethreatmodeling/FeaturizationConfig.qll
+++ b/java/ql/experimental/adaptivethreatmodeling/lib/experimental/adaptivethreatmodeling/FeaturizationConfig.qll
@@ -0,0 +1,15 @@
+import java
+private import semmle.code.java.dataflow.DataFlow::DataFlow as DataFlow
+
+/**
+ * A configuration that defines which endpoints should be featurized.
+ *
+ * This is used as a performance optimization to ensure that we only featurize the endpoints we need
+ * to featurize.
+ */
+abstract class FeaturizationConfig extends string {
+  bindingset[this]
+  FeaturizationConfig() { any() }
+
+  abstract DataFlow::Node getAnEndpointToFeaturize();
+}
--- a/java/ql/experimental/adaptivethreatmodeling/lib/experimental/adaptivethreatmodeling/FunctionBodyFeatures.qll
+++ b/java/ql/experimental/adaptivethreatmodeling/lib/experimental/adaptivethreatmodeling/FunctionBodyFeatures.qll
@@ -0,0 +1,102 @@
+/**
+ * FunctionBodyFeatures.qll
+ *
+ * Contains logic relating to the `enclosingFunctionBody` and `enclosingFunctionName` features.
+ */
+
+import java
+private import FeaturizationConfig
+private import semmle.code.java.dataflow.DataFlow::DataFlow as DataFlow
+
+/**
+ * Gets a tokenized representation of the AST node for use in the `enclosingFunctionBody` feature.
+ */
+string getTokenizedAstNode(Top top) {
+  result = top.(Variable).getName()
+  or
+  result = top.(Field).getName()
+  or
+  result = top.(Literal).getValue()
+}
+
+/** Gets an AST node within the function `f` that we should featurize. */
+pragma[inline]
+Element getAnAstNodeToFeaturize(Callable c) {
+  result.(Stmt).getEnclosingCallable() = c or
+  result.(Expr).getEnclosingCallable() = c
+}
+
+/** DEPRECATED: Alias for getAnAstNodeToFeaturize */
+deprecated Top getAnASTNodeToFeaturize(Callable c) { result = getAnAstNodeToFeaturize(c) }
+
+/**
+ * Get the enclosing function for an endpoint.
+ *
+ * This is used to compute the `enclosingFunctionBody` and `enclosingFunctionName` features.
+ */
+Callable getRepresentativeFunctionForEndpoint(DataFlow::Node endpoint) {
+  // Performance optimization: Restrict the set of endpoints to the endpoints to featurize.
+  endpoint = any(FeaturizationConfig cfg).getAnEndpointToFeaturize() and
+  result = endpoint.getEnclosingCallable()
+}
+
+/** Returns an AST node within the function `f` that an associated token feature. */
+Element getAnAstNodeWithAFeature(Callable c) {
+  // Performance optimization: Restrict the set of functions to those containing an endpoint to featurize.
+  c = getRepresentativeFunctionForEndpoint(any(FeaturizationConfig cfg).getAnEndpointToFeaturize()) and
+  result = getAnAstNodeToFeaturize(c)
+}
+
+/** DEPRECATED: Alias for getAnAstNodeWithAFeature */
+deprecated Element getAnASTNodeWithAFeature(Callable c) { result = getAnAstNodeWithAFeature(c) }
+
+/** Returns the number of source-code characters in a function. */
+int getNumCharsInFunction(Callable c) {
+  result =
+    strictsum(Element element |
+      element = getAnAstNodeWithAFeature(c)
+    |
+      getTokenizedAstNode(element).length()
+    )
+}
+
+/**
+ * Gets the maximum number of characters a feature can be.
+ * The evaluator string limit is 5395415 characters. We choose a limit lower than this.
+ */
+private int getMaxChars() { result = 1000000 }
+
+/**
+ * Returns a featurized representation of the function that can be used to populate the
+ * `enclosingFunctionBody` feature for an endpoint.
+ */
+string getBodyTokensFeature(Callable c) {
+  // Performance optimization: If a function has more than 256 body subtokens, then featurize it as
+  // absent. This approximates the behavior of the classifier on non-generic body features where
+  // large body features are replaced by the absent token.
+  //
+  // We count nodes instead of tokens because tokens are often not unique.
+  strictcount(Element element |
+    element = getAnAstNodeToFeaturize(c) and
+    exists(getTokenizedAstNode(element))
+  ) <= 256 and
+  // Performance optimization: If a function has more than getMaxChars() characters in its body subtokens,
+  // then featurize it as absent.
+  getNumCharsInFunction(c) <= getMaxChars() and
+  result =
+    strictconcat(Location l, string token |
+      // The use of a nested exists here allows us to avoid duplicates due to two AST nodes in the
+      // same location featurizing to the same token. By using a nested exists, we take only unique
+      // (location, token) pairs.
+      exists(Element element |
+        element = getAnAstNodeToFeaturize(c) and
+        token = getTokenizedAstNode(element) and
+        l = element.getLocation()
+      )
+    |
+      token, " "
+      order by
+        l.getFile().getAbsolutePath(), l.getStartLine(), l.getStartColumn(), l.getEndLine(),
+        l.getEndColumn(), token
+    )
+}
--- a/java/ql/experimental/adaptivethreatmodeling/lib/experimental/adaptivethreatmodeling/RequestForgeryATM.qll
+++ b/java/ql/experimental/adaptivethreatmodeling/lib/experimental/adaptivethreatmodeling/RequestForgeryATM.qll
@@ -0,0 +1,38 @@
+/**
+ * For internal use only.
+ *
+ * A taint-tracking configuration for reasoning about SSRF (server side request forgery) vulnerabilities.
+ * Largely copied from java/ql/lib/semmle/code/java/security/RequestForgeryConfig.qll.
+ *
+ * Only import this directly from .ql files, to avoid the possibility of polluting the Configuration hierarchy
+ * accidentally.
+ */
+
+import ATMConfig
+import semmle.code.java.dataflow.FlowSources
+import semmle.code.java.security.RequestForgery
+
+class RequestForgeryAtmConfig extends AtmConfig {
+  RequestForgeryAtmConfig() { this = "RequestForgeryAtmConfig" }
+
+  override predicate isKnownSource(DataFlow::Node source) {
+    source instanceof RemoteFlowSource and
+    // Exclude results of remote HTTP requests: fetching something else based on that result
+    // is no worse than following a redirect returned by the remote server, and typically
+    // we're requesting a resource via https which we trust to only send us to safe URLs.
+    not source.asExpr().(MethodAccess).getCallee() instanceof UrlConnectionGetInputStreamMethod
+  }
+
+  override EndpointType getASinkEndpointType() { result instanceof RequestForgerySinkType }
+
+  /*
+   * This is largely a copy of the taint tracking configuration for the standard SSRF
+   * query, except additional sinks have been added using the sink endpoint filter.
+   */
+
+  override predicate isAdditionalTaintStep(DataFlow::Node pred, DataFlow::Node succ) {
+    any(RequestForgeryAdditionalTaintStep r).propagatesTaint(pred, succ)
+  }
+
+  override predicate isSanitizer(DataFlow::Node node) { node instanceof RequestForgerySanitizer }
+}
--- a/java/ql/experimental/adaptivethreatmodeling/lib/experimental/adaptivethreatmodeling/SqlTaintedATM.qll
+++ b/java/ql/experimental/adaptivethreatmodeling/lib/experimental/adaptivethreatmodeling/SqlTaintedATM.qll
@@ -0,0 +1,34 @@
+/**
+ * For internal use only.
+ *
+ * A taint-tracking configuration for reasoning about SQL injection vulnerabilities.
+ * Defines shared code used by the SQL injection boosted query.
+ * Largely copied from semmle.code.java.security.SqlInjectionQuery.
+ */
+
+import ATMConfig
+import semmle.code.java.dataflow.FlowSources
+import semmle.code.java.security.QueryInjection
+
+class SqlTaintedAtmConfig extends AtmConfig {
+  SqlTaintedAtmConfig() { this = "SqlTaintedAtmConfig" }
+
+  override predicate isKnownSource(DataFlow::Node source) { source instanceof RemoteFlowSource }
+
+  override EndpointType getASinkEndpointType() { result instanceof SqlTaintedSinkType }
+
+  /*
+   * This is largely a copy of the taint tracking configuration for the standard SQL injection
+   * query, except additional sinks have been added using the sink endpoint filter.
+   */
+
+  override predicate isSanitizer(DataFlow::Node node) {
+    node.getType() instanceof PrimitiveType or
+    node.getType() instanceof BoxedType or
+    node.getType() instanceof NumberType
+  }
+
+  override predicate isAdditionalTaintStep(DataFlow::Node node1, DataFlow::Node node2) {
+    any(AdditionalQueryInjectionTaintStep s).step(node1, node2)
+  }
+}
--- a/java/ql/experimental/adaptivethreatmodeling/lib/experimental/adaptivethreatmodeling/TaintedPathATM.qll
+++ b/java/ql/experimental/adaptivethreatmodeling/lib/experimental/adaptivethreatmodeling/TaintedPathATM.qll
@@ -0,0 +1,82 @@
+/**
+ * For internal use only.
+ *
+ * A taint-tracking configuration for reasoning about path injection vulnerabilities.
+ * Defines shared code used by the path injection boosted query.
+ * Largely copied from java/ql/src/Security/CWE/CWE-022/TaintedPath.ql.
+ */
+
+import java
+import semmle.code.java.security.PathSanitizer
+import ATMConfig
+import semmle.code.java.dataflow.FlowSources
+
+class TaintedPathAtmConfig extends AtmConfig {
+  TaintedPathAtmConfig() { this = "TaintedPathAtmConfig" }
+
+  override predicate isKnownSource(DataFlow::Node source) { source instanceof RemoteFlowSource }
+
+  override EndpointType getASinkEndpointType() { result instanceof TaintedPathSinkType }
+
+  /*
+   * This is largely a copy of the taint tracking configuration for the standard path injection
+   * query, except additional ATM sinks have been added to the `isSink` predicate.
+   */
+
+  override predicate isSanitizer(DataFlow::Node sanitizer) {
+    sanitizer.getType() instanceof BoxedType or
+    sanitizer.getType() instanceof PrimitiveType or
+    sanitizer.getType() instanceof NumberType or
+    sanitizer instanceof PathInjectionSanitizer
+  }
+
+  override predicate isAdditionalTaintStep(DataFlow::Node n1, DataFlow::Node n2) {
+    any(TaintedPathAdditionalTaintStep s).step(n1, n2)
+  }
+}
+
+/*
+ * Models a very basic guard for the tainted path queries.
+ * TODO: Copied from java/ql/src/Security/CWE/CWE-022/TaintedPathCommon.qll because I couldn't figure out how to import it.
+ */
+
+/**
+ * A unit class for adding additional taint steps.
+ *
+ * Extend this class to add additional taint steps that should apply to tainted path flow configurations.
+ */
+class TaintedPathAdditionalTaintStep extends Unit {
+  abstract predicate step(DataFlow::Node n1, DataFlow::Node n2);
+}
+
+private class DefaultTaintedPathAdditionalTaintStep extends TaintedPathAdditionalTaintStep {
+  override predicate step(DataFlow::Node n1, DataFlow::Node n2) {
+    exists(Argument a |
+      a = n1.asExpr() and
+      a.getCall() = n2.asExpr() and
+      a = any(TaintPreservingUriCtorParam tpp).getAnArgument()
+    )
+  }
+}
+
+private class TaintPreservingUriCtorParam extends Parameter {
+  TaintPreservingUriCtorParam() {
+    exists(Constructor ctor, int idx, int nParams |
+      ctor.getDeclaringType() instanceof TypeUri and
+      this = ctor.getParameter(idx) and
+      nParams = ctor.getNumberOfParameters()
+    |
+      // URI(String scheme, String ssp, String fragment)
+      idx = 1 and nParams = 3
+      or
+      // URI(String scheme, String host, String path, String fragment)
+      idx = [1, 2] and nParams = 4
+      or
+      // URI(String scheme, String authority, String path, String query, String fragment)
+      idx = 2 and nParams = 5
+      or
+      // URI(String scheme, String userInfo, String host, int port, String path, String query, String fragment)
+      idx = 4 and nParams = 7
+    )
+  }
+}
--- a/java/ql/experimental/adaptivethreatmodeling/lib/qlpack.yml
+++ b/java/ql/experimental/adaptivethreatmodeling/lib/qlpack.yml
@@ -0,0 +1,10 @@
+name: codeql/java-experimental-atm-lib
+description: CodeQL libraries for the experimental ML-powered queries
+version: 0.4.5
+extractor: java
+library: true
+groups:
+    - java
+    - experimental
+dependencies:
+    codeql/java-all: ${workspace}
--- a/java/ql/experimental/adaptivethreatmodeling/model/.gitignore
+++ b/java/ql/experimental/adaptivethreatmodeling/model/.gitignore
@@ -0,0 +1,3 @@
+# Avoid checking in ML models
+# This matches the mlModels property of qlpack.yml.
+resources/*.codeqlmodel
--- a/java/ql/experimental/adaptivethreatmodeling/model/codeql-pack.lock.yml
+++ b/java/ql/experimental/adaptivethreatmodeling/model/codeql-pack.lock.yml
@@ -0,0 +1,4 @@
+---
+dependencies: {}
+compiled: false
+lockVersion: 1.0.0
--- a/java/ql/experimental/adaptivethreatmodeling/model/qlpack.yml
+++ b/java/ql/experimental/adaptivethreatmodeling/model/qlpack.yml
@@ -0,0 +1,8 @@
+name: codeql/java-experimental-atm-model
+description: Machine learning model supporting the experimental ML-powered queries
+version: 0.0.1
+groups:
+    - java
+    - experimental
+mlModels:
+    - "resources/*.codeqlmodel"
--- a/java/ql/experimental/adaptivethreatmodeling/model/resources/.gitkeep
+++ b/java/ql/experimental/adaptivethreatmodeling/model/resources/.gitkeep
--- a/java/ql/experimental/adaptivethreatmodeling/modelbuilding/DebugResultInclusion.ql
+++ b/java/ql/experimental/adaptivethreatmodeling/modelbuilding/DebugResultInclusion.ql
@@ -0,0 +1,67 @@
+/**
+ * @name Debug result inclusion
+ * @description Use this query to understand why some alerts are included or excluded from the
+ *              results of boosted queries. The results for this query are the union of the alerts
+ *              generated by each boosted query. Each alert includes an explanation why it was
+ *              included or excluded for each of the four security queries.
+ * @kind problem
+ * @problem.severity error
+ * @id adaptive-threat-modeling/java/debug-result-inclusion
+ */
+
+import java
+import experimental.adaptivethreatmodeling.ATMConfig
+import extraction.ExtractEndpointDataTraining
+private import experimental.adaptivethreatmodeling.SqlTaintedATM as SqlTaintedAtm
+private import experimental.adaptivethreatmodeling.TaintedPathATM as TaintedPathAtm
+private import experimental.adaptivethreatmodeling.RequestForgeryATM as RequestForgeryAtm
+
+string getAReasonSinkExcluded(DataFlow::Node sinkCandidate, Query query) {
+  query instanceof SqlTaintedQuery and
+  result = any(SqlTaintedAtm::SqlTaintedAtmConfig cfg).getAReasonSinkExcluded(sinkCandidate)
+  or
+  query instanceof TaintedPathQuery and
+  result = any(TaintedPathAtm::TaintedPathAtmConfig cfg).getAReasonSinkExcluded(sinkCandidate)
+  or
+  query instanceof RequestForgeryQuery and
+  result = any(RequestForgeryAtm::RequestForgeryAtmConfig cfg).getAReasonSinkExcluded(sinkCandidate)
+}
+
+pragma[inline]
+string getDescriptionForAlertCandidate(
+  DataFlow::Node sourceCandidate, DataFlow::Node sinkCandidate, Query query
+) {
+  result = "excluded[reason=" + getAReasonSinkExcluded(sinkCandidate, query) + "]"
+  or
+  getDataFlowCfg(query).(AtmConfig).isKnownSink(sinkCandidate) and
+  result = "excluded[reason=known-sink]"
+  or
+  not exists(getAReasonSinkExcluded(sinkCandidate, query)) and
+  not getDataFlowCfg(query).hasFlow(sourceCandidate, sinkCandidate) and
+  (
+    if
+      getDataFlowCfg(query).isSource(sourceCandidate) or
+      getDataFlowCfg(query).isSource(sourceCandidate, _)
+    then result = "no flow"
+    else result = "not a known source"
+  )
+  or
+  getDataFlowCfg(query).hasFlow(sourceCandidate, sinkCandidate) and
+  result = "included"
+}
+
+pragma[inline]
+string getDescriptionForAlert(DataFlow::Node sourceCandidate, DataFlow::Node sinkCandidate) {
+  result =
+    concat(Query query |
+      |
+      query.getName() + ": " +
+          getDescriptionForAlertCandidate(sourceCandidate, sinkCandidate, query), ", "
+    )
+}
+
+from DataFlow::Configuration cfg, DataFlow::Node source, DataFlow::Node sink
+where cfg.hasFlow(source, sink)
+select sink,
+  "This is an ATM result that may depend on $@ [" + getDescriptionForAlert(source, sink) + "]",
+  source, "a user-provided value"
--- a/java/ql/experimental/adaptivethreatmodeling/modelbuilding/codeql-pack.lock.yml
+++ b/java/ql/experimental/adaptivethreatmodeling/modelbuilding/codeql-pack.lock.yml
@@ -0,0 +1,6 @@
+---
+dependencies:
+  codeql/java-experimental-atm-model:
+    version: 0.0.1
+compiled: false
+lockVersion: 1.0.0
--- a/java/ql/experimental/adaptivethreatmodeling/modelbuilding/counting/CountAlertsAndSinks.qll
+++ b/java/ql/experimental/adaptivethreatmodeling/modelbuilding/counting/CountAlertsAndSinks.qll
@@ -0,0 +1,21 @@
+/**
+ * For internal use only.
+ *
+ *
+ * Count the number of sinks and alerts for a particular dataflow config.
+ */
+
+import java
+import evaluation.EndToEndEvaluation
+private import semmle.code.java.dataflow.DataFlow::DataFlow as DataFlow
+
+query predicate countAlertsAndSinks(int numAlerts, int numSinks) {
+  numAlerts =
+    count(DataFlow::Configuration cfg, DataFlow::Node source, DataFlow::Node sink |
+      cfg.hasFlow(source, sink) and not isFlowExcluded(source, sink)
+    ) and
+  numSinks =
+    count(DataFlow::Node sink |
+      exists(DataFlow::Configuration cfg | cfg.isSink(sink) or cfg.isSink(sink, _))
+    )
+}
--- a/java/ql/experimental/adaptivethreatmodeling/modelbuilding/counting/CountRequestForgery.ql
+++ b/java/ql/experimental/adaptivethreatmodeling/modelbuilding/counting/CountRequestForgery.ql
@@ -0,0 +1,9 @@
+/*
+ * For internal use only.
+ *
+ *
+ * Count the number of sinks and alerts for the `RequestForgery` security query.
+ */
+
+import semmle.code.java.security.RequestForgery
+import CountAlertsAndSinks
--- a/java/ql/experimental/adaptivethreatmodeling/modelbuilding/counting/CountSqlTainted.ql
+++ b/java/ql/experimental/adaptivethreatmodeling/modelbuilding/counting/CountSqlTainted.ql
@@ -0,0 +1,9 @@
+/*
+ * For internal use only.
+ *
+ *
+ * Count the number of sinks and alerts for the `SqlTainted` security query.
+ */
+
+import semmle.code.java.security.SqlInjectionQuery
+import CountAlertsAndSinks
--- a/java/ql/experimental/adaptivethreatmodeling/modelbuilding/counting/CountTaintedPath.ql
+++ b/java/ql/experimental/adaptivethreatmodeling/modelbuilding/counting/CountTaintedPath.ql
@@ -0,0 +1,74 @@
+/*
+ * For internal use only.
+ *
+ *
+ * Count the number of sinks and alerts for the `TaintedPath` security query.
+ */
+
+//TODO no libraries for TaintedPath so we copy paste the config used in the TaintedPath.ql query.
+import java
+import DataFlow::PathGraph
+private import semmle.code.java.dataflow.ExternalFlow
+import semmle.code.java.security.PathCreation
+import semmle.code.java.security.PathSanitizer
+import semmle.code.java.dataflow.FlowSources
+import CountAlertsAndSinks
+
+class TaintedPathConfig extends TaintTracking::Configuration {
+  TaintedPathConfig() { this = "TaintedPathConfig" }
+
+  override predicate isSource(DataFlow::Node source) { source instanceof RemoteFlowSource }
+
+  override predicate isSink(DataFlow::Node sink) {
+    sink.asExpr() = any(PathCreation p).getAnInput()
+    or
+    sinkNode(sink, "create-file")
+  }
+
+  override predicate isSanitizer(DataFlow::Node sanitizer) {
+    sanitizer.getType() instanceof BoxedType or
+    sanitizer.getType() instanceof PrimitiveType or
+    sanitizer.getType() instanceof NumberType or
+    sanitizer instanceof PathInjectionSanitizer
+  }
+
+  override predicate isAdditionalTaintStep(DataFlow::Node n1, DataFlow::Node n2) {
+    any(TaintedPathAdditionalTaintStep s).step(n1, n2)
+  }
+}
+
+class TaintedPathAdditionalTaintStep extends Unit {
+  abstract predicate step(DataFlow::Node n1, DataFlow::Node n2);
+}
+
+private class DefaultTaintedPathAdditionalTaintStep extends TaintedPathAdditionalTaintStep {
+  override predicate step(DataFlow::Node n1, DataFlow::Node n2) {
+    exists(Argument a |
+      a = n1.asExpr() and
+      a.getCall() = n2.asExpr() and
+      a = any(TaintPreservingUriCtorParam tpp).getAnArgument()
+    )
+  }
+}
+
+private class TaintPreservingUriCtorParam extends Parameter {
+  TaintPreservingUriCtorParam() {
+    exists(Constructor ctor, int idx, int nParams |
+      ctor.getDeclaringType() instanceof TypeUri and
+      this = ctor.getParameter(idx) and
+      nParams = ctor.getNumberOfParameters()
+    |
+      // URI(String scheme, String ssp, String fragment)
+      idx = 1 and nParams = 3
+      or
+      // URI(String scheme, String host, String path, String fragment)
+      idx = [1, 2] and nParams = 4
+      or
+      // URI(String scheme, String authority, String path, String query, String fragment)
+      idx = 2 and nParams = 5
+      or
+      // URI(String scheme, String userInfo, String host, int port, String path, String query, String fragment)
+      idx = 4 and nParams = 7
+    )
+  }
+}
--- a/java/ql/experimental/adaptivethreatmodeling/modelbuilding/evaluation/EndToEndEvaluation.qll
+++ b/java/ql/experimental/adaptivethreatmodeling/modelbuilding/evaluation/EndToEndEvaluation.qll
@@ -0,0 +1,12 @@
+private import java
+private import extraction.Exclusions as Exclusions
+private import semmle.code.java.dataflow.DataFlow::DataFlow as DataFlow
+
+/**
+ * Holds if the flow from `source` to `sink` should be excluded from the results of an end-to-end
+ * evaluation query.
+ */
+pragma[inline]
+predicate isFlowExcluded(DataFlow::Node source, DataFlow::Node sink) {
+  Exclusions::isFileExcluded([source.getLocation().getFile(), sink.getLocation().getFile()])
+}
--- a/java/ql/experimental/adaptivethreatmodeling/modelbuilding/evaluation/EndpointScoresIntegrationTest.ql
+++ b/java/ql/experimental/adaptivethreatmodeling/modelbuilding/evaluation/EndpointScoresIntegrationTest.ql
@@ -0,0 +1,29 @@
+/**
+ * EndpointScoresIntegrationTest.ql
+ *
+ * Extract scores for each test endpoint that is an argument to a function call in the database.
+ * This is used by integration tests to verify that QL and the modeling codebase agree on the scores
+ * of a set of test endpoints.
+ */
+
+import java
+import experimental.adaptivethreatmodeling.ATMConfig
+import experimental.adaptivethreatmodeling.FeaturizationConfig
+import experimental.adaptivethreatmodeling.EndpointScoring::ModelScoring as ModelScoring
+private import semmle.code.java.dataflow.DataFlow::DataFlow as DataFlow
+private import semmle.code.java.dataflow.internal.DataFlowPrivate as DataFlowPrivate
+
+/**
+ * A featurization config that featurizes endpoints that are arguments to function calls.
+ *
+ * This should only be used in extraction queries and tests.
+ */
+class FunctionArgumentFeaturizationConfig extends FeaturizationConfig {
+  FunctionArgumentFeaturizationConfig() { this = "FunctionArgumentFeaturization" }
+
+  override DataFlow::Node getAnEndpointToFeaturize() {
+    exists(Call call | result.asExpr() = call.getAnArgument())
+  }
+}
+
+query predicate endpointScores = ModelScoring::endpointScores/3;
--- a/java/ql/experimental/adaptivethreatmodeling/modelbuilding/evaluation/ModelCheck.ql
+++ b/java/ql/experimental/adaptivethreatmodeling/modelbuilding/evaluation/ModelCheck.ql
@@ -0,0 +1,16 @@
+/**
+ * ModelCheck.ql
+ *
+ * Returns checksums of ATM models.
+ */
+
+/**
+ * The `availableMlModels` template predicate.
+ *
+ * This is populated by the evaluator with metadata for the available machine learning models.
+ */
+external predicate availableMlModels(
+  string modelChecksum, string modelLanguage, string modelName, string modelType
+);
+
+select any(string checksum | availableMlModels(checksum, "java", _, _))
--- a/java/ql/experimental/adaptivethreatmodeling/modelbuilding/extraction/CountSourcesAndSinks.ql
+++ b/java/ql/experimental/adaptivethreatmodeling/modelbuilding/extraction/CountSourcesAndSinks.ql
@@ -0,0 +1,67 @@
+/*
+ * For internal use only.
+ *
+ * Counts sources and sinks for Java security queries.
+ */
+
+import java
+import semmle.code.java.dataflow.DataFlow::DataFlow as DataFlow
+import semmle.code.java.dataflow.TaintTracking::TaintTracking as TaintTracking
+// java/ql/lib/semmle/code/java/security$ ls *Query.qll | sed -e 's/\(.*\)Query.qll/import semmle.code.java.security.\1Query as \1/'
+import semmle.code.java.security.AndroidIntentRedirectionQuery as AndroidIntentRedirection
+import semmle.code.java.security.AndroidSensitiveCommunicationQuery as AndroidSensitiveCommunication
+import semmle.code.java.security.AndroidWebViewCertificateValidationQuery as AndroidWebViewCertificateValidation
+import semmle.code.java.security.CleartextStorageAndroidDatabaseQuery as CleartextStorageAndroidDatabase
+import semmle.code.java.security.CleartextStorageAndroidFilesystemQuery as CleartextStorageAndroidFilesystem
+import semmle.code.java.security.CleartextStorageClassQuery as CleartextStorageClass
+import semmle.code.java.security.CleartextStorageCookieQuery as CleartextStorageCookie
+import semmle.code.java.security.CleartextStoragePropertiesQuery as CleartextStorageProperties
+import semmle.code.java.security.CleartextStorageQuery as CleartextStorage
+import semmle.code.java.security.CleartextStorageSharedPrefsQuery as CleartextStorageSharedPrefs
+import semmle.code.java.security.CommandLineQuery as CommandLine
+import semmle.code.java.security.ConditionalBypassQuery as ConditionalBypass
+import semmle.code.java.security.FragmentInjectionQuery as FragmentInjection
+import semmle.code.java.security.GroovyInjectionQuery as GroovyInjection
+import semmle.code.java.security.HardcodedCredentialsApiCallQuery as HardcodedCredentialsApiCall
+import semmle.code.java.security.HardcodedCredentialsSourceCallQuery as HardcodedCredentialsSourceCall
+import semmle.code.java.security.HttpsUrlsQuery as HttpsUrls
+import semmle.code.java.security.ImplicitPendingIntentsQuery as ImplicitPendingIntents
+import semmle.code.java.security.ImproperIntentVerificationQuery as ImproperIntentVerification
+import semmle.code.java.security.InsecureBasicAuthQuery as InsecureBasicAuth
+import semmle.code.java.security.InsecureTrustManagerQuery as InsecureTrustManager
+import semmle.code.java.security.InsufficientKeySizeQuery as InsufficientKeySize
+import semmle.code.java.security.IntentUriPermissionManipulationQuery as IntentUriPermissionManipulation
+import semmle.code.java.security.JexlInjectionQuery as JexlInjection
+import semmle.code.java.security.JndiInjectionQuery as JndiInjection
+import semmle.code.java.security.LogInjectionQuery as LogInjection
+import semmle.code.java.security.MissingJWTSignatureCheckQuery as MissingJWTSignatureCheck
+import semmle.code.java.security.MvelInjectionQuery as MvelInjection
+import semmle.code.java.security.OgnlInjectionQuery as OgnlInjection
+import semmle.code.java.security.OverlyLargeRangeQuery as OverlyLargeRange
+import semmle.code.java.security.PartialPathTraversalQuery as PartialPathTraversal
+import semmle.code.java.security.RandomQuery as Random
+import semmle.code.java.security.RsaWithoutOaepQuery as RsaWithoutOaep
+import semmle.code.java.security.SensitiveKeyboardCacheQuery as SensitiveKeyboardCache
+import semmle.code.java.security.SensitiveLoggingQuery as SensitiveLogging
+import semmle.code.java.security.SpelInjectionQuery as SpelInjection
+import semmle.code.java.security.SqlInjectionQuery as SqlInjection
+import semmle.code.java.security.StaticInitializationVectorQuery as StaticInitializationVector
+import semmle.code.java.security.TemplateInjectionQuery as TemplateInjection
+import semmle.code.java.security.UnsafeAndroidAccessQuery as UnsafeAndroidAccess
+import semmle.code.java.security.UnsafeCertTrustQuery as UnsafeCertTrust
+import semmle.code.java.security.UnsafeContentUriResolutionQuery as UnsafeContentUriResolution
+import semmle.code.java.security.UnsafeDeserializationQuery as UnsafeDeserialization
+import semmle.code.java.security.WebviewDubuggingEnabledQuery as WebviewDubuggingEnabled
+import semmle.code.java.security.XsltInjectionQuery as XsltInjection
+
+DataFlow::Node getASink(TaintTracking::Configuration cfg) {
+  cfg.isSink(result) or cfg.isSink(result, _)
+}
+
+DataFlow::Node getASource(TaintTracking::Configuration cfg) {
+  cfg.isSource(result) or cfg.isSource(result, _)
+}
+
+from TaintTracking::Configuration cfg, int sources, int sinks
+where count(getASource(cfg)) = sources and count(getASink(cfg)) = sinks
+select cfg, sources, sinks
--- a/java/ql/experimental/adaptivethreatmodeling/modelbuilding/extraction/Exclusions.qll
+++ b/java/ql/experimental/adaptivethreatmodeling/modelbuilding/extraction/Exclusions.qll
@@ -0,0 +1,59 @@
+/**
+ * For internal use only.
+ *
+ * Defines files that should be excluded from the evaluation of ML models.
+ */
+
+private import java
+
+//TODO Couldn't find a library for the classifier so copy pasted predicate in java/ql/src/filters/ClassifyFiles.ql
+predicate classify(File f, string tag) {
+  f instanceof GeneratedFile and tag = "generated"
+  or
+  exists(GeneratedClass gc | gc.getFile() = f | tag = "generated")
+  or
+  exists(TestClass tc | tc.getFile() = f | tag = "test")
+  or
+  exists(TestMethod tm | tm.getFile() = f | tag = "test")
+}
+
+/** Holds if the file should be excluded from end-to-end evaluation. */
+predicate isFileExcluded(File file) {
+  // Ignore files that are outside the root folder of the analyzed source location.
+  //
+  // If the file doesn't have a relative path, then the source file is located outside the root
+  // folder of the analyzed source location, meaning that the files are additional files added to
+  // the database like standard library files that we would like to ignore.
+  not exists(file.getRelativePath())
+  or
+  // Ignore files based on their path.
+  exists(string ignorePattern, string separator |
+    ignorePattern =
+      // Exclude test files
+      "(tests?|test[_-]?case|" +
+        // Exclude library files
+        //
+        // - The Bower and npm package managers store packages in bower_components and node_modules
+        // folders respectively.
+        // - Specific exclusion for end-to-end: `applications/examples/static/epydoc` contains
+        //   library code from Epydoc.
+        "3rd[_-]?party|bower_components|extern(s|al)?|node_modules|resources|third[_-]?party|_?vendor|"
+        + "applications" + separator + "examples" + separator + "static" + separator + "epydoc|" +
+        // Exclude generated code
+        "gen|\\.?generated|" +
+        // Exclude benchmarks
+        "benchmarks?|" +
+        // Exclude documentation
+        "docs?|documentation)" and
+    separator = "(\\/|\\.)" and
+    exists(
+      file.getRelativePath()
+          .toLowerCase()
+          .regexpFind(separator + ignorePattern + separator + "|" + "^" + ignorePattern + separator +
+              "|" + separator + ignorePattern + "$", _, _)
+    )
+  )
+  or
+  // Ignore generated, library, and test files.
+  classify(file, ["externs", "generated", "library", "test"])
+}
--- a/java/ql/experimental/adaptivethreatmodeling/modelbuilding/extraction/ExtractEndpointDataTraining.ql
+++ b/java/ql/experimental/adaptivethreatmodeling/modelbuilding/extraction/ExtractEndpointDataTraining.ql
@@ -0,0 +1,11 @@
+/*
+ * For internal use only.
+ *
+ * Extracts training data we can use to train ML models for ML-powered queries.
+ */
+
+private import ExtractEndpointDataTraining as ExtractEndpointDataTraining
+
+query predicate endpoints = ExtractEndpointDataTraining::reformattedTrainingEndpoints/5;
+
+query predicate tokenFeatures = ExtractEndpointDataTraining::tokenFeatures/3;
--- a/java/ql/experimental/adaptivethreatmodeling/modelbuilding/extraction/ExtractEndpointDataTraining.qll
+++ b/java/ql/experimental/adaptivethreatmodeling/modelbuilding/extraction/ExtractEndpointDataTraining.qll
@@ -0,0 +1,251 @@
+/**
+ * For internal use only.
+ *
+ * Extracts training data we can use to train ML models for ML-powered queries.
+ */
+
+import java
+import experimental.adaptivethreatmodeling.EndpointCharacteristics
+import experimental.adaptivethreatmodeling.EndpointFeatures as EndpointFeatures
+import NoFeaturizationRestrictionsConfig
+private import Exclusions as Exclusions
+import Queries
+private import experimental.adaptivethreatmodeling.SqlTaintedATM as SqlTaintedAtm
+private import experimental.adaptivethreatmodeling.TaintedPathATM as TaintedPathAtm
+private import experimental.adaptivethreatmodeling.RequestForgeryATM as RequestForgeryAtm
+
+/**
+ * Gets the set of featureName-featureValue pairs for each endpoint in the training set.
+ *
+ * `EndpointFeatures::tokenFeatures` has no results when `featureName` is absent for the endpoint
+ * `endpoint`. To preserve compatibility with the data pipeline, this relation will instead set
+ * `featureValue` to the empty string in this case.
+ */
+predicate tokenFeatures(DataFlow::Node endpoint, string featureName, string featureValue) {
+  trainingEndpoints(endpoint, _, _) and
+  (
+    EndpointFeatures::tokenFeatures(endpoint, featureName, featureValue)
+    or
+    // Performance note: this creates a Cartesian product between `endpoint` and `featureName`.
+    featureName = EndpointFeatures::getASupportedFeatureName() and
+    not exists(string value | EndpointFeatures::tokenFeatures(endpoint, featureName, value)) and
+    featureValue = ""
+  )
+}
+
+/**
+ * Holds if the given endpoint should be included in the training set as a sample belonging to endpointClass, and has
+ * the given characteristic. This query uses the endpoint characteristics to select and label endpoints for the training
+ * set, and provides a list of characteristics for each endpoint in the training set, which is used in the modeling
+ * code.
+ *
+ * Params:
+ * endpoint: The endpoint to include / exclude.
+ * endpointClass: The sink type. See the documentation of EndpointType.getEncoding for details about the relationship
+ * between an EndpointType and a class in the classifier.
+ * characteristic: Provides the list of characteristics that apply to the endpoint, which the modeling code currently
+ * uses for type balancing.
+ *
+ * Note: This predicate will produce multiple tuples for endpoints that have multiple characteristics, which we must
+ * then group together into a list of characteristics.
+ */
+query predicate trainingEndpoints(
+  DataFlow::Node endpoint, EndpointType endpointClass, EndpointCharacteristic characteristic
+) {
+  characteristic.appliesToEndpoint(endpoint) and
+  // Only consider the source code for the project being analyzed.
+  exists(endpoint.getLocation().getFile().getRelativePath()) and
+  // Only select endpoints that can be part of a tainted flow: Constant expressions always evaluate to a constant
+  // primitive value. Therefore they can't ever appear in an alert, making them less interesting training examples.
+  // TODO: Experiment with removing this requirement.
+  // not endpoint.asExpr() instanceof CompileTimeConstantExpr and
+  not exists(EndpointFilterCharacteristic efc | efc.appliesToEndpoint(endpoint)) and
+  // Do not select endpoints filtered out by end-to-end evaluation.
+  // TODO: Experiment with removing this requirement.
+  not Exclusions::isFileExcluded(endpoint.getLocation().getFile()) and
+  // Filter out negative examples that also have a LikelyNotASinkReason, because this is currently done here
+  // https://github.com/github/codeql/blob/387e57546bf7352f7c1cfe781daa1a3799b7063e/javascript/ql/experimental/adaptivethreatmodeling/modelbuilding/extraction/ExtractEndpointData.qll#L77
+  // TODO: Experiment with removing this requirement.
+  not (
+    endpointClass instanceof NegativeType and
+    exists(EndpointCharacteristic c |
+      c.appliesToEndpoint(endpoint) and
+      c instanceof LikelyNotASinkCharacteristic
+    )
+  ) and
+  // Don't surface endpoint filters as characteristics, because they were previously not surfaced.
+  // TODO: Experiment with surfacing these to the modeling code by removing the following line (and then make
+  // EndpointFilterCharacteristic private).
+  not characteristic instanceof EndpointFilterCharacteristic and
+  (
+    // If the list of characteristics includes positive indicators with high confidence for this class, select this as a
+    // training sample belonging to the class.
+    exists(EndpointCharacteristic characteristic2, float confidence |
+      characteristic2.appliesToEndpoint(endpoint) and
+      characteristic2.hasImplications(endpointClass, true, confidence) and
+      confidence >= characteristic2.getHighConfidenceThreshold()
+    ) and
+    (
+      // Temporarily limit this only to positive classes. For negative classes, additionally select only endpoints that
+      // have no high confidence indicators that they are sinks, because this is what was previously done.
+      // TODO: Experiment with removing this requirement, and instead ensuring that an endpoint never has both a high
+      // confidence indicator that it _is_ a sink and a high confidence indicator that it is _not_ a sink.
+      not endpointClass instanceof NegativeType
+      or
+      not exists(EndpointCharacteristic characteristic3, float confidence3, EndpointType posClass |
+        characteristic3.appliesToEndpoint(endpoint) and
+        characteristic3.hasImplications(posClass, true, confidence3) and
+        confidence3 >= characteristic3.getHighConfidenceThreshold() and
+        not posClass instanceof NegativeType
+      )
+    )
+    or
+    // If the list of characteristics includes negative indicators with high confidence for all classes other than 0,
+    // select this as a training sample of class 0 (this means we had query-specific characteristics to decide this
+    // endpoint isn't a sink for each of our sink types).
+    endpointClass instanceof NegativeType and
+    forall(EndpointType otherClass | not otherClass instanceof NegativeType |
+      exists(EndpointCharacteristic characteristic2, float confidence |
+        characteristic2.appliesToEndpoint(endpoint) and
+        characteristic2.hasImplications(otherClass, false, confidence) and
+        confidence >= characteristic2.getHighConfidenceThreshold()
+      )
+    )
+  )
+}
+
+/**
+ * Temporary:
+ * Reformat the training data that was extracted with the new logic to match the format produced by the old predicate.
+ * This is the format expected by the endpoint pipeline.
+ */
+query predicate reformattedTrainingEndpoints(
+  DataFlow::Node endpoint, string queryName, string key, string value, string valueType
+) {
+  trainingEndpoints(endpoint, _, _) and
+  exists(Query query |
+    queryName = query.getName() and
+    // For sinks, only list that sink type, but for non-sinks, list all sink types.
+    (
+      exists(EndpointType endpointClass |
+        endpointClass.getDescription().matches(queryName + "%") and
+        not endpointClass instanceof NegativeType and
+        trainingEndpoints(endpoint, endpointClass, _)
+      )
+      or
+      exists(EndpointType endpointClass |
+        endpointClass instanceof NegativeType and
+        trainingEndpoints(endpoint, endpointClass, _)
+      )
+    ) and
+    (
+      // NOTE: We don't use hasFlowFromSource in training, so we could just hardcode it to be false.
+      key = "hasFlowFromSource" and
+      (
+        if FlowFromSource::hasFlowFromSource(endpoint, query)
+        then value = "true"
+        else value = "false"
+      ) and
+      valueType = "boolean"
+      or
+      // Constant expressions always evaluate to a constant primitive value. Therefore they can't ever
+      // appear in an alert, making them less interesting training examples.
+      key = "isConstantExpression" and
+      (
+        if endpoint.asExpr() instanceof CompileTimeConstantExpr
+        then value = "true"
+        else value = "false"
+      ) and
+      valueType = "boolean"
+      or
+      // Holds if alerts involving the endpoint are excluded from the end-to-end evaluation.
+      key = "isExcludedFromEndToEndEvaluation" and
+      (
+        if Exclusions::isFileExcluded(endpoint.getLocation().getFile())
+        then value = "true"
+        else value = "false"
+      ) and
+      valueType = "boolean"
+      or
+      // The label for this query, considering the endpoint as a sink.
+      key = "sinkLabel" and
+      valueType = "string" and
+      value = "Sink" and
+      exists(EndpointType endpointClass |
+        endpointClass.getDescription().matches(queryName + "%") and
+        not endpointClass instanceof NegativeType and
+        trainingEndpoints(endpoint, endpointClass, _)
+      )
+      or
+      key = "sinkLabel" and
+      valueType = "string" and
+      value = "NotASink" and
+      exists(EndpointType endpointClass |
+        endpointClass instanceof NegativeType and
+        trainingEndpoints(endpoint, endpointClass, _)
+      )
+      or
+      // The reason, or reasons, why the endpoint was labeled NotASink for this query, only for negative examples.
+      key = "notASinkReason" and
+      exists(EndpointCharacteristic characteristic, EndpointType endpointClass |
+        characteristic.appliesToEndpoint(endpoint) and
+        characteristic.hasImplications(endpointClass, true, _) and
+        endpointClass instanceof NegativeType and
+        value = characteristic
+      ) and
+      // Don't include a notASinkReason for endpoints that are also known sinks.
+      not exists(EndpointCharacteristic characteristic3, float confidence3, EndpointType posClass |
+        characteristic3.appliesToEndpoint(endpoint) and
+        characteristic3.hasImplications(posClass, true, confidence3) and
+        confidence3 >= characteristic3.getHighConfidenceThreshold() and
+        not posClass instanceof NegativeType
+      ) and
+      // Don't surface endpoint filters as notASinkReasons, because they were previously not surfaced.
+      // TODO: Experiment with surfacing these to the modeling code by removing the following line (and then make
+      // EndpointFilterCharacteristic private).
+      not value instanceof EndpointFilterCharacteristic and
+      valueType = "string"
+    )
+  )
+}
+
+/**
+ * Gets the ATM data flow configuration for the specified query.
+ * TODO: Delete this once we are no longer surfacing `hasFlowFromSource`.
+ */
+DataFlow::Configuration getDataFlowCfg(Query query) {
+  query instanceof SqlTaintedQuery and result instanceof SqlTaintedAtm::SqlTaintedAtmConfig
+  or
+  query instanceof TaintedPathQuery and result instanceof TaintedPathAtm::TaintedPathAtmConfig
+  or
+  query instanceof RequestForgeryQuery and
+  result instanceof RequestForgeryAtm::RequestForgeryAtmConfig
+}
+
+// TODO: Delete this once we are no longer surfacing `hasFlowFromSource`.
+private module FlowFromSource {
+  predicate hasFlowFromSource(DataFlow::Node endpoint, Query q) {
+    exists(Configuration cfg | cfg.getQuery() = q | cfg.hasFlow(_, endpoint))
+  }
+
+  /**
+   * A data flow configuration that replicates the data flow configuration for a specific query, but
+   * replaces the set of sinks with the set of endpoints we're extracting.
+   *
+   * We use this to find out when there is flow to a particular endpoint from a known source.
+   *
+   * This configuration behaves in a very similar way to the `ForwardExploringConfiguration` class
+   * from the CodeQL standard libraries for JavaScript.
+   */
+  private class Configuration extends DataFlow::Configuration {
+    Query q;
+
+    Configuration() { this = getDataFlowCfg(q) }
+
+    Query getQuery() { result = q }
+
+    /** Holds if `sink` is an endpoint we're extracting. */
+    override predicate isSink(DataFlow::Node sink) { any() }
+    // override predicate isSink(DataFlow::Node sink, DataFlow::FlowLabel lbl) { exists(lbl) }
+  }
+}
--- a/java/ql/experimental/adaptivethreatmodeling/modelbuilding/extraction/ExtractEndpointLabelEncoding.ql
+++ b/java/ql/experimental/adaptivethreatmodeling/modelbuilding/extraction/ExtractEndpointLabelEncoding.ql
@@ -0,0 +1,11 @@
+/**
+ * @name Endpoint types
+ * @description Maps endpoint type encodings to human-readable descriptions.
+ * @kind table
+ * @id java/ml-powered/model-building/endpoint-type-encodings
+ */
+
+import experimental.adaptivethreatmodeling.EndpointTypes
+
+from EndpointType type
+select type.getEncoding() as label, type.getDescription() as labelName order by label
--- a/java/ql/experimental/adaptivethreatmodeling/modelbuilding/extraction/ExtractEndpointMapping.ql
+++ b/java/ql/experimental/adaptivethreatmodeling/modelbuilding/extraction/ExtractEndpointMapping.ql
@@ -0,0 +1,25 @@
+/*
+ * For internal use only.
+ *
+ * Maps ML-powered queries to their `EndpointType` for clearer labelling while evaluating ML model during training.
+ */
+
+import experimental.adaptivethreatmodeling.SqlTaintedATM as SqlTaintedAtm
+import experimental.adaptivethreatmodeling.TaintedPathATM as TaintedPathAtm
+import experimental.adaptivethreatmodeling.RequestForgeryATM as RequestForgeryAtm
+import experimental.adaptivethreatmodeling.AdaptiveThreatModeling
+
+from string queryName, AtmConfig c, EndpointType e
+where
+  (
+    queryName = "SqlTainted" and
+    c instanceof SqlTaintedAtm::SqlTaintedAtmConfig
+    or
+    queryName = "TaintedPath" and
+    c instanceof TaintedPathAtm::TaintedPathAtmConfig
+    or
+    queryName = "RequestForgery" and
+    c instanceof RequestForgeryAtm::RequestForgeryAtmConfig
+  ) and
+  e = c.getASinkEndpointType()
+select queryName, e.getEncoding() as label
--- a/java/ql/experimental/adaptivethreatmodeling/modelbuilding/extraction/ExtractMisclassifiedEndpointFeatures.ql
+++ b/java/ql/experimental/adaptivethreatmodeling/modelbuilding/extraction/ExtractMisclassifiedEndpointFeatures.ql
@@ -0,0 +1,44 @@
+/*
+ * For internal use only.
+ *
+ * Query for finding misclassified endpoints which we can use to debug ML-powered queries.
+ */
+
+import java
+import experimental.adaptivethreatmodeling.AdaptiveThreatModeling
+import experimental.adaptivethreatmodeling.ATMConfig
+import experimental.adaptivethreatmodeling.BaseScoring
+import experimental.adaptivethreatmodeling.EndpointFeatures as EndpointFeatures
+import experimental.adaptivethreatmodeling.EndpointTypes
+import semmle.code.java.security.QueryInjection
+
+/** Gets the positive endpoint type for which you wish to find misclassified examples. */
+EndpointType getEndpointType() { result instanceof SqlTaintedSinkType }
+
+/** Get a positive endpoint. This will be run through the classifier to determine whether it is misclassified. */
+DataFlow::Node getAPositiveEndpoint() { result instanceof QueryInjectionSink }
+
+/** An ATM configuration to find misclassified endpoints of type `getEndpointType()`. */
+class ExtractMisclassifiedEndpointsAtmConfig extends AtmConfig {
+  ExtractMisclassifiedEndpointsAtmConfig() { this = "ExtractMisclassifiedEndpointsATMConfig" }
+
+  override predicate isEffectiveSink(DataFlow::Node sinkCandidate) {
+    sinkCandidate = getAPositiveEndpoint()
+  }
+
+  override EndpointType getASinkEndpointType() { result = getEndpointType() }
+}
+
+/** Get an endpoint from `getAPositiveEndpoint()` that is incorrectly excluded from the results. */
+DataFlow::Node getAMisclassifedEndpoint() {
+  any(ExtractMisclassifiedEndpointsAtmConfig config).isEffectiveSink(result) and
+  not any(ScoringResults results).shouldResultBeIncluded(_, result)
+}
+
+/** The token features for each misclassified endpoint. */
+query predicate tokenFeaturesForMisclassifiedEndpoints(
+  DataFlow::Node endpoint, string featureName, string featureValue
+) {
+  endpoint = getAMisclassifedEndpoint() and
+  EndpointFeatures::tokenFeatures(endpoint, featureName, featureValue)
+}
--- a/java/ql/experimental/adaptivethreatmodeling/modelbuilding/extraction/ExtractNegativeExamples.ql
+++ b/java/ql/experimental/adaptivethreatmodeling/modelbuilding/extraction/ExtractNegativeExamples.ql
@@ -0,0 +1,52 @@
+/**
+ * Surfaces endpoints are non-sinks with high confidence, for use as negative examples in the prompt.
+ *
+ * @name Negative examples (experimental)
+ * @kind problem
+ * @id java/ml-powered/non-sink
+ * @tags experimental security
+ */
+
+private import java
+import semmle.code.java.dataflow.TaintTracking
+private import experimental.adaptivethreatmodeling.EndpointCharacteristics as EndpointCharacteristics
+private import experimental.adaptivethreatmodeling.EndpointTypes
+
+bindingset[rate]
+DataFlow::Node getSampleFromSampleRate(float rate) {
+  exists(int r |
+    result =
+      rank[r](DataFlow::Node n, string path, int a, int b, int c, int d |
+        n.asExpr().getLocation().hasLocationInfo(path, a, b, c, d)
+      |
+        n order by path, a, b, c, d
+      ) and
+    r % (1 / rate).ceil() = 0
+  )
+}
+
+from
+  DataFlow::Node endpoint, EndpointCharacteristics::EndpointCharacteristic characteristic,
+  float confidence
+where
+  characteristic.appliesToEndpoint(endpoint) and
+  confidence >= characteristic.highConfidence() and
+  characteristic.hasImplications(any(NegativeType negative), true, confidence) and
+  // Exclude endpoints that have contradictory endpoint characteristics, because we only want examples we're highly
+  // certain about in the prompt.
+  not EndpointCharacteristics::erroneousEndpoints(endpoint, _, _, _, _) and
+  // It's valid for a node to satisfy the logic for both `isSink` and `isSanitizer`, but in that case it will be
+  // treated by the actual query as a sanitizer, since the final logic is something like
+  // `isSink(n) and not isSanitizer(n)`. We don't want to include such nodes as negative examples in the prompt, because
+  // they're ambiguous and might confuse the model, so we explicitly exclude all known sinks from the negative examples.
+  not exists(
+    EndpointCharacteristics::EndpointCharacteristic characteristic2, float confidence2,
+    EndpointType positiveType
+  |
+    characteristic2.appliesToEndpoint(endpoint) and
+    confidence2 >= characteristic2.maximalConfidence() and
+    not positiveType instanceof NegativeType and
+    characteristic2.hasImplications(positiveType, true, confidence2)
+  ) and
+  endpoint = getSampleFromSampleRate(0.01)
+select endpoint, "Non-sink of type " + characteristic + " with confidence " + confidence.toString()
--- a/java/ql/experimental/adaptivethreatmodeling/modelbuilding/extraction/ExtractPositiveExamples.ql
+++ b/java/ql/experimental/adaptivethreatmodeling/modelbuilding/extraction/ExtractPositiveExamples.ql
@@ -0,0 +1,34 @@
+/**
+ * Surfaces endpoints are sinks with high confidence, for use as positive examples in the prompt.
+ *
+ * @name Positive examples (experimental)
+ * @kind problem
+ * @id java/ml-powered/known-sink
+ * @tags experimental security
+ */
+
+private import java
+import semmle.code.java.dataflow.TaintTracking
+private import experimental.adaptivethreatmodeling.EndpointCharacteristics as EndpointCharacteristics
+private import experimental.adaptivethreatmodeling.ATMConfig as AtmConfig
+private import experimental.adaptivethreatmodeling.SqlTaintedATM as SqlTaintednAtm
+private import experimental.adaptivethreatmodeling.TaintedPathATM as TaintedPathAtm
+private import experimental.adaptivethreatmodeling.RequestForgeryATM as RequestForgeryAtm
+
+/*
+ * ****** WARNING: ******
+ * Before calling this query, make sure there's no codex-generated data extension file in `java/ql/lib/ext`. Otherwise,
+ * the ML-gnerarated, noisy sinks will end up poluting the positive examples used in the prompt!
+ */
+
+from DataFlow::Node sink, AtmConfig::AtmConfig config
+where
+  config.isKnownSink(sink) and
+  // If there are _any_ erroneous endpoints, return nothing. This will prevent us from accidentally running this query
+  // when there's a codex-generated data extension file in `java/ql/lib/ext`.
+  not EndpointCharacteristics::erroneousEndpoints(_, _, _, _, _) and
+  // It's valid for a node to satisfy the logic for both `isSink` and `isSanitizer`, but in that case it will be
+  // treated by the actual query as a sanitizer, since the final logic is something like
+  // `isSink(n) and not isSanitizer(n)`. We don't want to include such nodes as positive examples in the prompt.
+  not config.isSanitizer(sink)
+select sink, config.getASinkEndpointType().getDescription()
--- a/java/ql/experimental/adaptivethreatmodeling/modelbuilding/extraction/ExtractSinkCandidatesWithFlow.ql
+++ b/java/ql/experimental/adaptivethreatmodeling/modelbuilding/extraction/ExtractSinkCandidatesWithFlow.ql
@@ -0,0 +1,48 @@
+/**
+ * Surfaces the endpoints that pass the endpoint filters and have flow from a source for each query config, and are
+ * therefore used as candidates for classification with an ML model.
+ *
+ * Note: This query does not actually classify the endpoints using the model.
+ *
+ * @name Sink candidates with flow (experimental)
+ * @description Sink candidates with flow from a source
+ * @kind problem
+ * @id java/ml-powered/sink-candidates-with-flow
+ * @tags experimental security
+ */
+
+private import java
+import semmle.code.java.dataflow.TaintTracking
+private import experimental.adaptivethreatmodeling.ATMConfig as AtmConfig
+private import experimental.adaptivethreatmodeling.SqlTaintedATM as SqlTaintedAtm
+private import experimental.adaptivethreatmodeling.TaintedPathATM as TaintedPathAtm
+private import experimental.adaptivethreatmodeling.RequestForgeryATM as RequestForgeryAtm
+
+from
+  DataFlow::Node sink, string message, string package, string type, boolean subtypes, string name,
+  string signature, string ext, string input, string provenance
+where
+  exists(Callable callee, Call call, int index |
+    sink.asExpr() = call.getArgument(index) and
+    callee = call.getCallee() and
+    package = callee.getDeclaringType().getPackage().getName() and
+    type = callee.getDeclaringType().getName() and //TODO: Will this work for inner classes? Will it produce X$Y? What about lambdas? What about enums? What about interfaces? What about annotations?
+    subtypes = true and // TODO
+    name = callee.getName() and // TODO: Will this work for constructors?
+    signature = callee.paramsString() and
+    ext = "" and // TODO
+    input = "Argument[" + index + "]" and // TODO: why are slashes added?
+    provenance = "manual" // TODO
+  ) and
+  // The message is the concatenation of all relevant configs, and we surface only sinks that have at least one relevant
+  // config.
+  message =
+    strictconcat(AtmConfig::AtmConfig config, DataFlow::PathNode sinkPathNode |
+        config.isSinkCandidateWithFlow(sinkPathNode) and
+        sinkPathNode.getNode() = sink
+      |
+        config.getASinkEndpointType().getDescription(), ", "
+      ) + "\n{'package': '" + package + "', 'type': '" + type + "', 'subtypes': " + subtypes +
+      ", 'name': '" + name + "', 'signature': '" + signature + "', 'ext': '" + ext + "', 'input': '"
+      + input + "', 'provenance': '" + provenance + "'}" // TODO: Why are the curly braces added twice?
+select sink, message
--- a/java/ql/experimental/adaptivethreatmodeling/modelbuilding/extraction/Labels.qll
+++ b/java/ql/experimental/adaptivethreatmodeling/modelbuilding/extraction/Labels.qll
@@ -0,0 +1,29 @@
+/*
+ * For internal use only.
+ *
+ * Labels used in training and evaluation data to indicate knowledge about whether an endpoint is a
+ * sink for a particular security query.
+ */
+
+newtype TEndpointLabel =
+  TSinkLabel() or
+  TNotASinkLabel() or
+  TUnknownLabel()
+
+abstract class EndpointLabel extends TEndpointLabel {
+  abstract string getEncoding();
+
+  string toString() { result = getEncoding() }
+}
+
+class SinkLabel extends EndpointLabel, TSinkLabel {
+  override string getEncoding() { result = "Sink" }
+}
+
+class NotASinkLabel extends EndpointLabel, TNotASinkLabel {
+  override string getEncoding() { result = "NotASink" }
+}
+
+class UnknownLabel extends EndpointLabel, TUnknownLabel {
+  override string getEncoding() { result = "Unknown" }
+}
--- a/java/ql/experimental/adaptivethreatmodeling/modelbuilding/extraction/NoFeaturizationRestrictionsConfig.qll
+++ b/java/ql/experimental/adaptivethreatmodeling/modelbuilding/extraction/NoFeaturizationRestrictionsConfig.qll
@@ -0,0 +1,17 @@
+/*
+ * For internal use only.
+ */
+
+private import experimental.adaptivethreatmodeling.FeaturizationConfig
+private import semmle.code.java.dataflow.DataFlow::DataFlow as DataFlow
+
+/**
+ * A featurization config that featurizes all endpoints.
+ *
+ * This should only be used in extraction queries and tests.
+ */
+class NoRestrictionsFeaturizationConfig extends FeaturizationConfig {
+  NoRestrictionsFeaturizationConfig() { this = "NoRestrictionsFeaturization" }
+
+  override DataFlow::Node getAnEndpointToFeaturize() { any() }
+}
--- a/java/ql/experimental/adaptivethreatmodeling/modelbuilding/extraction/Queries.qll
+++ b/java/ql/experimental/adaptivethreatmodeling/modelbuilding/extraction/Queries.qll
@@ -0,0 +1,28 @@
+/*
+ * For internal use only.
+ *
+ * Represents the security queries for which we currently have ML-powered versions.
+ */
+
+newtype TQuery =
+  TSqlTaintedQuery() or
+  TTaintedPathQuery() or
+  TRequestForgeryQuery()
+
+abstract class Query extends TQuery {
+  abstract string getName();
+
+  string toString() { result = getName() }
+}
+
+class SqlTaintedQuery extends Query, TSqlTaintedQuery {
+  override string getName() { result = "SqlTainted" }
+}
+
+class TaintedPathQuery extends Query, TTaintedPathQuery {
+  override string getName() { result = "TaintedPath" }
+}
+
+class RequestForgeryQuery extends Query, TRequestForgeryQuery {
+  override string getName() { result = "RequestForgery" }
+}
--- a/java/ql/experimental/adaptivethreatmodeling/modelbuilding/qlpack.yml
+++ b/java/ql/experimental/adaptivethreatmodeling/modelbuilding/qlpack.yml
@@ -0,0 +1,10 @@
+name: codeql/java-experimental-atm-model-building
+description: CodeQL libraries for building machine learning models for the experimental ML-powered queries
+extractor: java
+library: false
+groups:
+    - java
+    - experimental
+dependencies:
+    codeql/java-experimental-atm-lib: ${workspace}
+    codeql/java-experimental-atm-model: "0.0.0"
--- a/java/ql/experimental/adaptivethreatmodeling/src/RequestForgeryATM.ql
+++ b/java/ql/experimental/adaptivethreatmodeling/src/RequestForgeryATM.ql
@@ -0,0 +1,25 @@
+/**
+ * For internal use only.
+ *
+ * @name Server-side request forgery (experimental)
+ * @description Making web requests based on unvalidated user-input
+ *              may cause the server to communicate with malicious servers.
+ * @kind path-problem
+ * @scored
+ * @problem.severity error
+ * @security-severity 9.1
+ * @precision high
+ * @id java/ml-powered/ssrf
+ * @tags experimental security
+ *       external/cwe/cwe-918
+ */
+
+import experimental.adaptivethreatmodeling.RequestForgeryATM
+import AtmResultsInfo
+import DataFlow::PathGraph
+
+from AtmConfig cfg, DataFlow::PathNode source, DataFlow::PathNode sink, float score
+where cfg.hasBoostedFlowPath(source, sink, score)
+select sink.getNode(), source, sink,
+  "(Experimental) Potential server-side request forgery due to a $@.", source.getNode(),
+  "user-provided value", score
--- a/java/ql/experimental/adaptivethreatmodeling/src/SqlTaintedATM.ql
+++ b/java/ql/experimental/adaptivethreatmodeling/src/SqlTaintedATM.ql
@@ -0,0 +1,25 @@
+/**
+ * For internal use only.
+ *
+ * @name Query built from user-controlled sources (experimental)
+ * @description Building a SQL or Java Persistence query from user-controlled sources is vulnerable to insertion of
+ *              malicious code by the user.
+ * @kind path-problem
+ * @scored
+ * @problem.severity error
+ * @security-severity 8.8
+ * @precision high
+ * @id java/ml-powered/sql-injection
+ * @tags experimental security
+ *       external/cwe/cwe-089
+ *       external/cwe/cwe-564
+ */
+
+import experimental.adaptivethreatmodeling.SqlTaintedATM
+import AtmResultsInfo
+import DataFlow::PathGraph
+
+from AtmConfig cfg, DataFlow::PathNode source, DataFlow::PathNode sink, float score
+where cfg.hasBoostedFlowPath(source, sink, score)
+select sink.getNode(), source, sink, "(Experimental) This query depends on a $@.", source.getNode(),
+  "user-provided value", score
--- a/java/ql/experimental/adaptivethreatmodeling/src/TaintedPathATM.ql
+++ b/java/ql/experimental/adaptivethreatmodeling/src/TaintedPathATM.ql
@@ -0,0 +1,26 @@
+/**
+ * For internal use only.
+ *
+ * @name Uncontrolled data used in path expression (experimental)
+ * @description Accessing paths influenced by users can allow an attacker to access unexpected resources.
+ * @kind path-problem
+ * @scored
+ * @problem.severity error
+ * @security-severity 7.5
+ * @precision high
+ * @id java/ml-powered/path-injection
+ * @tags experimental security
+ *       external/cwe/cwe-022
+ *       external/cwe/cwe-023
+ *       external/cwe/cwe-036
+ *       external/cwe/cwe-073
+ */
+
+import experimental.adaptivethreatmodeling.TaintedPathATM
+import AtmResultsInfo
+import DataFlow::PathGraph
+
+from AtmConfig cfg, DataFlow::PathNode source, DataFlow::PathNode sink, float score
+where cfg.hasBoostedFlowPath(source, sink, score)
+select sink.getNode(), source, sink, "(Experimental) This path depends on a $@.", source.getNode(),
+  "user-provided value", score
--- a/java/ql/experimental/adaptivethreatmodeling/src/codeql-pack.lock.yml
+++ b/java/ql/experimental/adaptivethreatmodeling/src/codeql-pack.lock.yml
@@ -0,0 +1,6 @@
+---
+dependencies:
+  codeql/java-experimental-atm-model:
+    version: 0.0.1
+compiled: false
+lockVersion: 1.0.0
--- a/java/ql/experimental/adaptivethreatmodeling/src/codeql-suites/java-atm-code-scanning.qls
+++ b/java/ql/experimental/adaptivethreatmodeling/src/codeql-suites/java-atm-code-scanning.qls
@@ -0,0 +1,2 @@
+- description: ATM boosted Code Scanning queries for Java
+- queries: .
--- a/java/ql/experimental/adaptivethreatmodeling/src/qlpack.yml
+++ b/java/ql/experimental/adaptivethreatmodeling/src/qlpack.yml
@@ -0,0 +1,12 @@
+name: codeql/java-experimental-atm-queries
+description: Experimental ML-powered queries for Java
+language: java
+version: 0.4.5
+suites: codeql-suites
+defaultSuiteFile: codeql-suites/java-atm-code-scanning.qls
+groups:
+    - java
+    - experimental
+dependencies:
+    codeql/java-experimental-atm-lib: ${workspace}
+    codeql/java-experimental-atm-model: "0.0.1"
--- a/java/ql/experimental/adaptivethreatmodeling/test/.gitignore
+++ b/java/ql/experimental/adaptivethreatmodeling/test/.gitignore
@@ -0,0 +1,2 @@
+**/*.testproj
+**/*.actual
--- a/java/ql/experimental/adaptivethreatmodeling/test/codeql-pack.lock.yml
+++ b/java/ql/experimental/adaptivethreatmodeling/test/codeql-pack.lock.yml
@@ -0,0 +1,6 @@
+---
+dependencies:
+  codeql/java-experimental-atm-model:
+    version: 0.3.0
+compiled: false
+lockVersion: 1.0.0
--- a/java/ql/experimental/adaptivethreatmodeling/test/qlpack.yml
+++ b/java/ql/experimental/adaptivethreatmodeling/test/qlpack.yml
@@ -0,0 +1,4 @@
+name: codeql/java-experimental-atm-tests
+extractor: java
+dependencies:
+    codeql/java-experimental-atm-model-building: ${workspace}
--- a/javascript/ql/experimental/adaptivethreatmodeling/model/qlpack.yml
+++ b/javascript/ql/experimental/adaptivethreatmodeling/model/qlpack.yml
@@ -5,4 +5,4 @@ groups:
    - javascript
    - experimental
 mlModels:
-    - "resources/*.codeqlmodel"
+    - "resources/shellcommand.codeqlmodel"
--- a/javascript/ql/experimental/adaptivethreatmodeling/src/codeql-pack.lock.yml
+++ b/javascript/ql/experimental/adaptivethreatmodeling/src/codeql-pack.lock.yml
@@ -1,6 +1,6 @@
 ---
 dependencies:
-  codeql/javascript-experimental-atm-model:
-    version: 0.3.0
+  dsp-testing/javascript-experimental-atm-model:
+    version: 0.3.1-2022-12-21-01h55m24s.gray-roof-szzhgkwk.689231edea6179400bcffbcb0e7f6eb2bacd29c6be27a2930dd4f63ccdb64f34
 compiled: false
 lockVersion: 1.0.0