Merge pull request #7800 from github/henrymercer/js-atm-add-model-building-pack

JS: Add model building pack for ML-powered queries
2025-12-16 16:53:25 +01:00 · 2022-02-01 20:51:19 +00:00
parent fb00a6c61b 14601316a5
commit e622e517d9
30 changed files with 993 additions and 0 deletions
--- a/.codeqlmanifest.json
+++ b/.codeqlmanifest.json
@@ -6,6 +6,7 @@
        "*/ql/examples/qlpack.yml",
        "cpp/ql/test/query-tests/Security/CWE/CWE-190/semmle/tainted/qlpack.yml",
        "javascript/ql/experimental/adaptivethreatmodeling/lib/qlpack.yml",
+        "javascript/ql/experimental/adaptivethreatmodeling/modelbuilding/qlpack.yml",
        "javascript/ql/experimental/adaptivethreatmodeling/src/qlpack.yml",
        "csharp/ql/campaigns/Solorigate/lib/qlpack.yml",
        "csharp/ql/campaigns/Solorigate/src/qlpack.yml",
--- a/javascript/ql/experimental/adaptivethreatmodeling/modelbuilding/DebugResultInclusion.ql
+++ b/javascript/ql/experimental/adaptivethreatmodeling/modelbuilding/DebugResultInclusion.ql
@@ -0,0 +1,67 @@
+/**
+ * @name Debug result inclusion
+ * @description Use this query to understand why some alerts are included or excluded from the
+ *              results of boosted queries. The results for this query are the union of the alerts
+ *              generated by each boosted query. Each alert includes an explanation why it was
+ *              included or excluded for each of the four security queries.
+ * @kind problem
+ * @problem.severity error
+ * @id adaptive-threat-modeling/js/debug-result-inclusion
+ */
+
+import javascript
+import experimental.adaptivethreatmodeling.ATMConfig
+import extraction.ExtractEndpointData
+
+string getAReasonSinkExcluded(DataFlow::Node sinkCandidate, Query query) {
+  query instanceof NosqlInjectionQuery and
+  result = NosqlInjectionATM::SinkEndpointFilter::getAReasonSinkExcluded(sinkCandidate)
+  or
+  query instanceof SqlInjectionQuery and
+  result = SqlInjectionATM::SinkEndpointFilter::getAReasonSinkExcluded(sinkCandidate)
+  or
+  query instanceof TaintedPathQuery and
+  result = TaintedPathATM::SinkEndpointFilter::getAReasonSinkExcluded(sinkCandidate)
+  or
+  query instanceof XssQuery and
+  result = XssATM::SinkEndpointFilter::getAReasonSinkExcluded(sinkCandidate)
+}
+
+pragma[inline]
+string getDescriptionForAlertCandidate(
+  DataFlow::Node sourceCandidate, DataFlow::Node sinkCandidate, Query query
+) {
+  result = "excluded[reason=" + getAReasonSinkExcluded(sinkCandidate, query) + "]"
+  or
+  getATMCfg(query).isKnownSink(sinkCandidate) and
+  result = "excluded[reason=known-sink]"
+  or
+  not exists(getAReasonSinkExcluded(sinkCandidate, query)) and
+  not getDataFlowCfg(query).hasFlow(sourceCandidate, sinkCandidate) and
+  (
+    if
+      getDataFlowCfg(query).isSource(sourceCandidate) or
+      getDataFlowCfg(query).isSource(sourceCandidate, _)
+    then result = "no flow"
+    else result = "not a known source"
+  )
+  or
+  getDataFlowCfg(query).hasFlow(sourceCandidate, sinkCandidate) and
+  result = "included"
+}
+
+pragma[inline]
+string getDescriptionForAlert(DataFlow::Node sourceCandidate, DataFlow::Node sinkCandidate) {
+  result =
+    concat(Query query |
+      |
+      query.getName() + ": " +
+          getDescriptionForAlertCandidate(sourceCandidate, sinkCandidate, query), ", "
+    )
+}
+
+from DataFlow::Configuration cfg, DataFlow::Node source, DataFlow::Node sink
+where cfg.hasFlow(source, sink)
+select sink,
+  "This is an ATM result that may depend on $@ [" + getDescriptionForAlert(source, sink) + "]",
+  source, "a user-provided value"
--- a/javascript/ql/experimental/adaptivethreatmodeling/modelbuilding/evaluation/EndToEndEvaluation.qll
+++ b/javascript/ql/experimental/adaptivethreatmodeling/modelbuilding/evaluation/EndToEndEvaluation.qll
@@ -0,0 +1,11 @@
+private import javascript
+private import extraction.Exclusions as Exclusions
+
+/**
+ * Holds if the flow from `source` to `sink` should be excluded from the results of an end-to-end
+ * evaluation query.
+ */
+pragma[inline]
+predicate isFlowExcluded(DataFlow::Node source, DataFlow::Node sink) {
+  Exclusions::isFileExcluded([source.getFile(), sink.getFile()])
+}
--- a/javascript/ql/experimental/adaptivethreatmodeling/modelbuilding/evaluation/EndpointScoresIntegrationTest.ql
+++ b/javascript/ql/experimental/adaptivethreatmodeling/modelbuilding/evaluation/EndpointScoresIntegrationTest.ql
@@ -0,0 +1,27 @@
+/**
+ * EndpointScoresIntegrationTest.ql
+ *
+ * Extract scores for each test endpoint that is an argument to a function call in the database.
+ * This is used by integration tests to verify that QL and the modeling codebase agree on the scores
+ * of a set of test endpoints.
+ */
+
+import javascript
+import experimental.adaptivethreatmodeling.ATMConfig
+import experimental.adaptivethreatmodeling.FeaturizationConfig
+import experimental.adaptivethreatmodeling.EndpointScoring::ModelScoring as ModelScoring
+
+/**
+ * A featurization config that featurizes endpoints that are arguments to function calls.
+ *
+ * This should only be used in extraction queries and tests.
+ */
+class FunctionArgumentFeaturizationConfig extends FeaturizationConfig {
+  FunctionArgumentFeaturizationConfig() { this = "FunctionArgumentFeaturization" }
+
+  override DataFlow::Node getAnEndpointToFeaturize() {
+    exists(DataFlow::CallNode call | result = call.getAnArgument())
+  }
+}
+
+query predicate endpointScores = ModelScoring::endpointScores/3;
--- a/javascript/ql/experimental/adaptivethreatmodeling/modelbuilding/evaluation/ModelCheck.ql
+++ b/javascript/ql/experimental/adaptivethreatmodeling/modelbuilding/evaluation/ModelCheck.ql
@@ -0,0 +1,16 @@
+/**
+ * ModelCheck.ql
+ *
+ * Returns checksums of ATM models.
+ */
+
+/**
+ * The `availableMlModels` template predicate.
+ *
+ * This is populated by the evaluator with metadata for the available machine learning models.
+ */
+external predicate availableMlModels(
+  string modelChecksum, string modelLanguage, string modelName, string modelType
+);
+
+select any(string checksum | availableMlModels(checksum, "javascript", _, _))
--- a/javascript/ql/experimental/adaptivethreatmodeling/modelbuilding/evaluation/NosqlInjection.ql
+++ b/javascript/ql/experimental/adaptivethreatmodeling/modelbuilding/evaluation/NosqlInjection.ql
@@ -0,0 +1,24 @@
+/**
+ * NosqlInjection.ql
+ *
+ * Version of the standard NoSQL injection query with an output relation ready to plug into the
+ * evaluation pipeline.
+ */
+
+import semmle.javascript.security.dataflow.NosqlInjection
+import EndToEndEvaluation as EndToEndEvaluation
+
+from
+  DataFlow::Configuration cfg, DataFlow::Node source, DataFlow::Node sink, string filePathSink,
+  int startLineSink, int endLineSink, int startColumnSink, int endColumnSink, string filePathSource,
+  int startLineSource, int endLineSource, int startColumnSource, int endColumnSource
+where
+  cfg instanceof NosqlInjection::Configuration and
+  cfg.hasFlow(source, sink) and
+  not EndToEndEvaluation::isFlowExcluded(source, sink) and
+  sink.hasLocationInfo(filePathSink, startLineSink, startColumnSink, endLineSink, endColumnSink) and
+  source
+      .hasLocationInfo(filePathSource, startLineSource, startColumnSource, endLineSource,
+        endColumnSource)
+select source, startLineSource, startColumnSource, endLineSource, endColumnSource, filePathSource,
+  sink, startLineSink, startColumnSink, endLineSink, endColumnSink, filePathSink
--- a/javascript/ql/experimental/adaptivethreatmodeling/modelbuilding/evaluation/NosqlInjectionATM.ql
+++ b/javascript/ql/experimental/adaptivethreatmodeling/modelbuilding/evaluation/NosqlInjectionATM.ql
@@ -0,0 +1,28 @@
+/**
+ * NosqlInjectionATM.ql
+ *
+ * Version of the boosted NoSQL injection query with an output relation ready to plug into the
+ * evaluation pipeline.
+ */
+
+import ATM::ResultsInfo
+import EndToEndEvaluation as EndToEndEvaluation
+import experimental.adaptivethreatmodeling.NosqlInjectionATM
+
+from
+  DataFlow::Configuration cfg, DataFlow::Node source, DataFlow::Node sink, string filePathSink,
+  int startLineSink, int endLineSink, int startColumnSink, int endColumnSink, string filePathSource,
+  int startLineSource, int endLineSource, int startColumnSource, int endColumnSource, float score
+where
+  cfg.hasFlow(source, sink) and
+  not EndToEndEvaluation::isFlowExcluded(source, sink) and
+  not isFlowLikelyInBaseQuery(source, sink) and
+  sink.hasLocationInfo(filePathSink, startLineSink, startColumnSink, endLineSink, endColumnSink) and
+  source
+      .hasLocationInfo(filePathSource, startLineSource, startColumnSource, endLineSource,
+        endColumnSource) and
+  getScoreForFlow(source, sink) = score
+select source, startLineSource, startColumnSource, endLineSource, endColumnSource, filePathSource,
+  sink, startLineSink, startColumnSink, endLineSink, endColumnSink, filePathSink, score order by
+    score desc, startLineSource, startColumnSource, endLineSource, endColumnSource, filePathSource,
+    startLineSink, startColumnSink, endLineSink, endColumnSink, filePathSink
--- a/javascript/ql/experimental/adaptivethreatmodeling/modelbuilding/evaluation/NosqlInjectionATMLite.ql
+++ b/javascript/ql/experimental/adaptivethreatmodeling/modelbuilding/evaluation/NosqlInjectionATMLite.ql
@@ -0,0 +1,29 @@
+/**
+ * NosqlInjectionATMLite.ql
+ *
+ * Arbitrarily ranked version of the boosted NoSQL injection query with an output relation ready to
+ * plug into the evaluation pipeline. This is useful (a) for evaluating the performance of endpoint
+ * filters, and (b) as a baseline to compare the model against.
+ */
+
+import ATM::ResultsInfo
+import EndToEndEvaluation as EndToEndEvaluation
+import experimental.adaptivethreatmodeling.NosqlInjectionATM
+
+from
+  DataFlow::Configuration cfg, DataFlow::Node source, DataFlow::Node sink, string filePathSink,
+  int startLineSink, int endLineSink, int startColumnSink, int endColumnSink, string filePathSource,
+  int startLineSource, int endLineSource, int startColumnSource, int endColumnSource, float score
+where
+  cfg.hasFlow(source, sink) and
+  not EndToEndEvaluation::isFlowExcluded(source, sink) and
+  not isFlowLikelyInBaseQuery(source, sink) and
+  sink.hasLocationInfo(filePathSink, startLineSink, startColumnSink, endLineSink, endColumnSink) and
+  source
+      .hasLocationInfo(filePathSource, startLineSource, startColumnSource, endLineSource,
+        endColumnSource) and
+  score = 0
+select source, startLineSource, startColumnSource, endLineSource, endColumnSource, filePathSource,
+  sink, startLineSink, startColumnSink, endLineSink, endColumnSink, filePathSink, score order by
+    score desc, startLineSource, startColumnSource, endLineSource, endColumnSource, filePathSource,
+    startLineSink, startColumnSink, endLineSink, endColumnSink, filePathSink
--- a/javascript/ql/experimental/adaptivethreatmodeling/modelbuilding/evaluation/SqlInjection.ql
+++ b/javascript/ql/experimental/adaptivethreatmodeling/modelbuilding/evaluation/SqlInjection.ql
@@ -0,0 +1,24 @@
+/**
+ * SqlInjection.ql
+ *
+ * Version of the standard SQL injection query with an output relation ready to plug into the
+ * evaluation pipeline.
+ */
+
+import semmle.javascript.security.dataflow.SqlInjection
+import EndToEndEvaluation as EndToEndEvaluation
+
+from
+  DataFlow::Configuration cfg, DataFlow::Node source, DataFlow::Node sink, string filePathSink,
+  int startLineSink, int endLineSink, int startColumnSink, int endColumnSink, string filePathSource,
+  int startLineSource, int endLineSource, int startColumnSource, int endColumnSource
+where
+  cfg instanceof SqlInjection::Configuration and
+  cfg.hasFlow(source, sink) and
+  not EndToEndEvaluation::isFlowExcluded(source, sink) and
+  sink.hasLocationInfo(filePathSink, startLineSink, startColumnSink, endLineSink, endColumnSink) and
+  source
+      .hasLocationInfo(filePathSource, startLineSource, startColumnSource, endLineSource,
+        endColumnSource)
+select source, startLineSource, startColumnSource, endLineSource, endColumnSource, filePathSource,
+  sink, startLineSink, startColumnSink, endLineSink, endColumnSink, filePathSink
--- a/javascript/ql/experimental/adaptivethreatmodeling/modelbuilding/evaluation/SqlInjectionATM.ql
+++ b/javascript/ql/experimental/adaptivethreatmodeling/modelbuilding/evaluation/SqlInjectionATM.ql
@@ -0,0 +1,28 @@
+/**
+ * SqlInjectionATM.ql
+ *
+ * Version of the boosted SQL injection query with an output relation ready to plug into the
+ * evaluation pipeline.
+ */
+
+import ATM::ResultsInfo
+import EndToEndEvaluation as EndToEndEvaluation
+import experimental.adaptivethreatmodeling.SqlInjectionATM
+
+from
+  DataFlow::Configuration cfg, DataFlow::Node source, DataFlow::Node sink, string filePathSink,
+  int startLineSink, int endLineSink, int startColumnSink, int endColumnSink, string filePathSource,
+  int startLineSource, int endLineSource, int startColumnSource, int endColumnSource, float score
+where
+  cfg.hasFlow(source, sink) and
+  not EndToEndEvaluation::isFlowExcluded(source, sink) and
+  not isFlowLikelyInBaseQuery(source, sink) and
+  sink.hasLocationInfo(filePathSink, startLineSink, startColumnSink, endLineSink, endColumnSink) and
+  source
+      .hasLocationInfo(filePathSource, startLineSource, startColumnSource, endLineSource,
+        endColumnSource) and
+  getScoreForFlow(source, sink) = score
+select source, startLineSource, startColumnSource, endLineSource, endColumnSource, filePathSource,
+  sink, startLineSink, startColumnSink, endLineSink, endColumnSink, filePathSink, score order by
+    score desc, startLineSource, startColumnSource, endLineSource, endColumnSource, filePathSource,
+    startLineSink, startColumnSink, endLineSink, endColumnSink, filePathSink
--- a/javascript/ql/experimental/adaptivethreatmodeling/modelbuilding/evaluation/SqlInjectionATMLite.ql
+++ b/javascript/ql/experimental/adaptivethreatmodeling/modelbuilding/evaluation/SqlInjectionATMLite.ql
@@ -0,0 +1,29 @@
+/**
+ * SqlInjectionATMLite.ql
+ *
+ * Arbitrarily ranked version of the boosted SQL injection query with an output relation ready to
+ * plug into the evaluation pipeline. This is useful (a) for evaluating the performance of endpoint
+ * filters, and (b) as a baseline to compare the model against.
+ */
+
+import ATM::ResultsInfo
+import EndToEndEvaluation as EndToEndEvaluation
+import experimental.adaptivethreatmodeling.SqlInjectionATM
+
+from
+  DataFlow::Configuration cfg, DataFlow::Node source, DataFlow::Node sink, string filePathSink,
+  int startLineSink, int endLineSink, int startColumnSink, int endColumnSink, string filePathSource,
+  int startLineSource, int endLineSource, int startColumnSource, int endColumnSource, float score
+where
+  cfg.hasFlow(source, sink) and
+  not EndToEndEvaluation::isFlowExcluded(source, sink) and
+  not isFlowLikelyInBaseQuery(source, sink) and
+  sink.hasLocationInfo(filePathSink, startLineSink, startColumnSink, endLineSink, endColumnSink) and
+  source
+      .hasLocationInfo(filePathSource, startLineSource, startColumnSource, endLineSource,
+        endColumnSource) and
+  score = 0
+select source, startLineSource, startColumnSource, endLineSource, endColumnSource, filePathSource,
+  sink, startLineSink, startColumnSink, endLineSink, endColumnSink, filePathSink, score order by
+    score desc, startLineSource, startColumnSource, endLineSource, endColumnSource, filePathSource,
+    startLineSink, startColumnSink, endLineSink, endColumnSink, filePathSink
--- a/javascript/ql/experimental/adaptivethreatmodeling/modelbuilding/evaluation/TaintedPath.ql
+++ b/javascript/ql/experimental/adaptivethreatmodeling/modelbuilding/evaluation/TaintedPath.ql
@@ -0,0 +1,24 @@
+/**
+ * TaintedPath.ql
+ *
+ * Version of the standard path injection query with an output relation ready to plug into the
+ * evaluation pipeline.
+ */
+
+import semmle.javascript.security.dataflow.TaintedPath
+import EndToEndEvaluation as EndToEndEvaluation
+
+from
+  DataFlow::Configuration cfg, DataFlow::Node source, DataFlow::Node sink, string filePathSink,
+  int startLineSink, int endLineSink, int startColumnSink, int endColumnSink, string filePathSource,
+  int startLineSource, int endLineSource, int startColumnSource, int endColumnSource
+where
+  cfg instanceof TaintedPath::Configuration and
+  cfg.hasFlow(source, sink) and
+  not EndToEndEvaluation::isFlowExcluded(source, sink) and
+  sink.hasLocationInfo(filePathSink, startLineSink, startColumnSink, endLineSink, endColumnSink) and
+  source
+      .hasLocationInfo(filePathSource, startLineSource, startColumnSource, endLineSource,
+        endColumnSource)
+select source, startLineSource, startColumnSource, endLineSource, endColumnSource, filePathSource,
+  sink, startLineSink, startColumnSink, endLineSink, endColumnSink, filePathSink
--- a/javascript/ql/experimental/adaptivethreatmodeling/modelbuilding/evaluation/TaintedPathATM.ql
+++ b/javascript/ql/experimental/adaptivethreatmodeling/modelbuilding/evaluation/TaintedPathATM.ql
@@ -0,0 +1,28 @@
+/**
+ * TaintedPathATM.ql
+ *
+ * Version of the boosted path injection query with an output relation ready to plug into the
+ * evaluation pipeline.
+ */
+
+import ATM::ResultsInfo
+import EndToEndEvaluation as EndToEndEvaluation
+import experimental.adaptivethreatmodeling.TaintedPathATM
+
+from
+  DataFlow::Configuration cfg, DataFlow::Node source, DataFlow::Node sink, string filePathSink,
+  int startLineSink, int endLineSink, int startColumnSink, int endColumnSink, string filePathSource,
+  int startLineSource, int endLineSource, int startColumnSource, int endColumnSource, float score
+where
+  cfg.hasFlow(source, sink) and
+  not EndToEndEvaluation::isFlowExcluded(source, sink) and
+  not isFlowLikelyInBaseQuery(source, sink) and
+  sink.hasLocationInfo(filePathSink, startLineSink, startColumnSink, endLineSink, endColumnSink) and
+  source
+      .hasLocationInfo(filePathSource, startLineSource, startColumnSource, endLineSource,
+        endColumnSource) and
+  getScoreForFlow(source, sink) = score
+select source, startLineSource, startColumnSource, endLineSource, endColumnSource, filePathSource,
+  sink, startLineSink, startColumnSink, endLineSink, endColumnSink, filePathSink, score order by
+    score desc, startLineSource, startColumnSource, endLineSource, endColumnSource, filePathSource,
+    startLineSink, startColumnSink, endLineSink, endColumnSink, filePathSink
--- a/javascript/ql/experimental/adaptivethreatmodeling/modelbuilding/evaluation/TaintedPathATMLite.ql
+++ b/javascript/ql/experimental/adaptivethreatmodeling/modelbuilding/evaluation/TaintedPathATMLite.ql
@@ -0,0 +1,29 @@
+/**
+ * TaintedPathATMLite.ql
+ *
+ * Arbitrarily ranked version of the boosted path injection query with an output relation ready to
+ * plug into the evaluation pipeline. This is useful (a) for evaluating the performance of endpoint
+ * filters, and (b) as a baseline to compare the model against.
+ */
+
+import ATM::ResultsInfo
+import EndToEndEvaluation as EndToEndEvaluation
+import experimental.adaptivethreatmodeling.TaintedPathATM
+
+from
+  DataFlow::Configuration cfg, DataFlow::Node source, DataFlow::Node sink, string filePathSink,
+  int startLineSink, int endLineSink, int startColumnSink, int endColumnSink, string filePathSource,
+  int startLineSource, int endLineSource, int startColumnSource, int endColumnSource, float score
+where
+  cfg.hasFlow(source, sink) and
+  not EndToEndEvaluation::isFlowExcluded(source, sink) and
+  not isFlowLikelyInBaseQuery(source, sink) and
+  sink.hasLocationInfo(filePathSink, startLineSink, startColumnSink, endLineSink, endColumnSink) and
+  source
+      .hasLocationInfo(filePathSource, startLineSource, startColumnSource, endLineSource,
+        endColumnSource) and
+  score = 0
+select source, startLineSource, startColumnSource, endLineSource, endColumnSource, filePathSource,
+  sink, startLineSink, startColumnSink, endLineSink, endColumnSink, filePathSink, score order by
+    score desc, startLineSource, startColumnSource, endLineSource, endColumnSource, filePathSource,
+    startLineSink, startColumnSink, endLineSink, endColumnSink, filePathSink
--- a/javascript/ql/experimental/adaptivethreatmodeling/modelbuilding/evaluation/Xss.ql
+++ b/javascript/ql/experimental/adaptivethreatmodeling/modelbuilding/evaluation/Xss.ql
@@ -0,0 +1,24 @@
+/**
+ * Xss.ql
+ *
+ * Version of the standard XSS query with an output relation ready to plug into the evaluation
+ * pipeline.
+ */
+
+import semmle.javascript.security.dataflow.DomBasedXss
+import EndToEndEvaluation as EndToEndEvaluation
+
+from
+  DataFlow::Configuration cfg, DataFlow::Node source, DataFlow::Node sink, string filePathSink,
+  int startLineSink, int endLineSink, int startColumnSink, int endColumnSink, string filePathSource,
+  int startLineSource, int endLineSource, int startColumnSource, int endColumnSource
+where
+  cfg instanceof DomBasedXss::Configuration and
+  cfg.hasFlow(source, sink) and
+  not EndToEndEvaluation::isFlowExcluded(source, sink) and
+  sink.hasLocationInfo(filePathSink, startLineSink, startColumnSink, endLineSink, endColumnSink) and
+  source
+      .hasLocationInfo(filePathSource, startLineSource, startColumnSource, endLineSource,
+        endColumnSource)
+select source, startLineSource, startColumnSource, endLineSource, endColumnSource, filePathSource,
+  sink, startLineSink, startColumnSink, endLineSink, endColumnSink, filePathSink
--- a/javascript/ql/experimental/adaptivethreatmodeling/modelbuilding/evaluation/XssATM.ql
+++ b/javascript/ql/experimental/adaptivethreatmodeling/modelbuilding/evaluation/XssATM.ql
@@ -0,0 +1,29 @@
+/**
+ * XssATM.ql
+ *
+ * Version of the boosted XSS query with an output relation ready to plug into the evaluation
+ * pipeline.
+ */
+
+import javascript
+import ATM::ResultsInfo
+import EndToEndEvaluation as EndToEndEvaluation
+import experimental.adaptivethreatmodeling.XssATM
+
+from
+  DataFlow::Configuration cfg, DataFlow::Node source, DataFlow::Node sink, string filePathSink,
+  int startLineSink, int endLineSink, int startColumnSink, int endColumnSink, string filePathSource,
+  int startLineSource, int endLineSource, int startColumnSource, int endColumnSource, float score
+where
+  cfg.hasFlow(source, sink) and
+  not EndToEndEvaluation::isFlowExcluded(source, sink) and
+  not isFlowLikelyInBaseQuery(source, sink) and
+  sink.hasLocationInfo(filePathSink, startLineSink, startColumnSink, endLineSink, endColumnSink) and
+  source
+      .hasLocationInfo(filePathSource, startLineSource, startColumnSource, endLineSource,
+        endColumnSource) and
+  getScoreForFlow(source, sink) = score
+select source, startLineSource, startColumnSource, endLineSource, endColumnSource, filePathSource,
+  sink, startLineSink, startColumnSink, endLineSink, endColumnSink, filePathSink, score order by
+    score desc, startLineSource, startColumnSource, endLineSource, endColumnSource, filePathSource,
+    startLineSink, startColumnSink, endLineSink, endColumnSink, filePathSink
--- a/javascript/ql/experimental/adaptivethreatmodeling/modelbuilding/evaluation/XssATMLite.ql
+++ b/javascript/ql/experimental/adaptivethreatmodeling/modelbuilding/evaluation/XssATMLite.ql
@@ -0,0 +1,30 @@
+/**
+ * XssATMLite.ql
+ *
+ * Arbitrarily ranked version of the boosted XSS query with an output relation ready to plug into
+ * the evaluation pipeline. This is useful (a) for evaluating the performance of endpoint filters,
+ * and (b) as a baseline to compare the model against.
+ */
+
+import javascript
+import ATM::ResultsInfo
+import EndToEndEvaluation as EndToEndEvaluation
+import experimental.adaptivethreatmodeling.XssATM
+
+from
+  DataFlow::Configuration cfg, DataFlow::Node source, DataFlow::Node sink, string filePathSink,
+  int startLineSink, int endLineSink, int startColumnSink, int endColumnSink, string filePathSource,
+  int startLineSource, int endLineSource, int startColumnSource, int endColumnSource, float score
+where
+  cfg.hasFlow(source, sink) and
+  not EndToEndEvaluation::isFlowExcluded(source, sink) and
+  not isFlowLikelyInBaseQuery(source, sink) and
+  sink.hasLocationInfo(filePathSink, startLineSink, startColumnSink, endLineSink, endColumnSink) and
+  source
+      .hasLocationInfo(filePathSource, startLineSource, startColumnSource, endLineSource,
+        endColumnSource) and
+  score = 0
+select source, startLineSource, startColumnSource, endLineSource, endColumnSource, filePathSource,
+  sink, startLineSink, startColumnSink, endLineSink, endColumnSink, filePathSink, score order by
+    score desc, startLineSource, startColumnSource, endLineSource, endColumnSource, filePathSource,
+    startLineSink, startColumnSink, endLineSink, endColumnSink, filePathSink
--- a/javascript/ql/experimental/adaptivethreatmodeling/modelbuilding/extraction/CountAlertsAndEndpoints.ql
+++ b/javascript/ql/experimental/adaptivethreatmodeling/modelbuilding/extraction/CountAlertsAndEndpoints.ql
@@ -0,0 +1,26 @@
+/*
+ * For internal use only.
+ *
+ * [DEPRECATED] Counts alerts and sinks for JavaScript security queries.
+ *
+ * This query is deprecated due to the performance implications of bringing in data flow
+ * configurations from multiple queries. Instead use `CountSourcesAndSinks.ql` to count sinks for
+ * JavaScript security queries, and count alerts by running the standard or evaluation queries for
+ * each security vulnerability.
+ */
+
+import semmle.javascript.security.dataflow.NosqlInjection
+import semmle.javascript.security.dataflow.SqlInjection
+import semmle.javascript.security.dataflow.TaintedPath
+import semmle.javascript.security.dataflow.DomBasedXss
+
+int numAlerts(DataFlow::Configuration cfg) {
+  result = count(DataFlow::Node source, DataFlow::Node sink | cfg.hasFlow(source, sink))
+}
+
+select numAlerts(any(NosqlInjection::Configuration cfg)) as numNosqlAlerts,
+  numAlerts(any(SqlInjection::Configuration cfg)) as numSqlAlerts,
+  numAlerts(any(TaintedPath::Configuration cfg)) as numTaintedPathAlerts,
+  numAlerts(any(DomBasedXss::Configuration cfg)) as numXssAlerts,
+  count(NosqlInjection::Sink sink) as numNosqlSinks, count(SqlInjection::Sink sink) as numSqlSinks,
+  count(TaintedPath::Sink sink) as numTaintedPathSinks, count(DomBasedXss::Sink sink) as numXssSinks
--- a/javascript/ql/experimental/adaptivethreatmodeling/modelbuilding/extraction/CountSourcesAndSinks.ql
+++ b/javascript/ql/experimental/adaptivethreatmodeling/modelbuilding/extraction/CountSourcesAndSinks.ql
@@ -0,0 +1,72 @@
+/*
+ * For internal use only.
+ *
+ * Counts sources and sinks for JavaScript security queries.
+ */
+
+import javascript
+import semmle.javascript.dataflow.Configuration
+// javascript/ql/lib/semmle/javascript/security/dataflow$ ls *Query.qll | sed -e 's/\(.*\)Query.qll/import semmle.javascript.security.dataflow.\1Query as \1/'
+import semmle.javascript.security.dataflow.BrokenCryptoAlgorithmQuery as BrokenCryptoAlgorithm
+import semmle.javascript.security.dataflow.BuildArtifactLeakQuery as BuildArtifactLeak
+import semmle.javascript.security.dataflow.CleartextLoggingQuery as CleartextLogging
+import semmle.javascript.security.dataflow.CleartextStorageQuery as CleartextStorage
+import semmle.javascript.security.dataflow.ClientSideUrlRedirectQuery as ClientSideUrlRedirect
+import semmle.javascript.security.dataflow.CodeInjectionQuery as CodeInjection
+import semmle.javascript.security.dataflow.CommandInjectionQuery as CommandInjection
+import semmle.javascript.security.dataflow.ConditionalBypassQuery as ConditionalBypass
+import semmle.javascript.security.dataflow.CorsMisconfigurationForCredentialsQuery as CorsMisconfigurationForCredentials
+import semmle.javascript.security.dataflow.DeepObjectResourceExhaustionQuery as DeepObjectResourceExhaustion
+import semmle.javascript.security.dataflow.DifferentKindsComparisonBypassQuery as DifferentKindsComparisonBypass
+import semmle.javascript.security.dataflow.DomBasedXssQuery as DomBasedXss
+import semmle.javascript.security.dataflow.ExceptionXssQuery as ExceptionXss
+import semmle.javascript.security.dataflow.ExternalAPIUsedWithUntrustedDataQuery as ExternalAPIUsedWithUntrustedData
+import semmle.javascript.security.dataflow.FileAccessToHttpQuery as FileAccessToHttp
+import semmle.javascript.security.dataflow.HardcodedCredentialsQuery as HardcodedCredentials
+import semmle.javascript.security.dataflow.HardcodedDataInterpretedAsCodeQuery as HardcodedDataInterpretedAsCode
+import semmle.javascript.security.dataflow.HostHeaderPoisoningInEmailGenerationQuery as HostHeaderPoisoningInEmailGeneration
+import semmle.javascript.security.dataflow.HttpToFileAccessQuery as HttpToFileAccess
+import semmle.javascript.security.dataflow.ImproperCodeSanitizationQuery as ImproperCodeSanitization
+import semmle.javascript.security.dataflow.IncompleteHtmlAttributeSanitizationQuery as IncompleteHtmlAttributeSanitization
+import semmle.javascript.security.dataflow.IndirectCommandInjectionQuery as IndirectCommandInjection
+import semmle.javascript.security.dataflow.InsecureDownloadQuery as InsecureDownload
+import semmle.javascript.security.dataflow.InsecureRandomnessQuery as InsecureRandomness
+import semmle.javascript.security.dataflow.InsufficientPasswordHashQuery as InsufficientPasswordHash
+import semmle.javascript.security.dataflow.LogInjectionQuery as LogInjection
+import semmle.javascript.security.dataflow.LoopBoundInjectionQuery as LoopBoundInjection
+import semmle.javascript.security.dataflow.NosqlInjectionQuery as NosqlInjection
+import semmle.javascript.security.dataflow.PostMessageStarQuery as PostMessageStar
+import semmle.javascript.security.dataflow.PrototypePollutingAssignmentQuery as PrototypePollutingAssignment
+import semmle.javascript.security.dataflow.PrototypePollutionQuery as PrototypePollution
+import semmle.javascript.security.dataflow.ReflectedXssQuery as ReflectedXss
+import semmle.javascript.security.dataflow.RegExpInjectionQuery as RegExpInjection
+import semmle.javascript.security.dataflow.RemotePropertyInjectionQuery as RemotePropertyInjection
+import semmle.javascript.security.dataflow.RequestForgeryQuery as RequestForgery
+import semmle.javascript.security.dataflow.ServerSideUrlRedirectQuery as ServerSideUrlRedirect
+import semmle.javascript.security.dataflow.ShellCommandInjectionFromEnvironmentQuery as ShellCommandInjectionFromEnvironment
+import semmle.javascript.security.dataflow.SqlInjectionQuery as SqlInjection
+import semmle.javascript.security.dataflow.StackTraceExposureQuery as StackTraceExposure
+import semmle.javascript.security.dataflow.StoredXssQuery as StoredXss
+import semmle.javascript.security.dataflow.TaintedFormatStringQuery as TaintedFormatString
+import semmle.javascript.security.dataflow.TaintedPathQuery as TaintedPath
+import semmle.javascript.security.dataflow.TemplateObjectInjectionQuery as TemplateObjectInjection
+import semmle.javascript.security.dataflow.TypeConfusionThroughParameterTamperingQuery as TypeConfusionThroughParameterTampering
+import semmle.javascript.security.dataflow.UnsafeDeserializationQuery as UnsafeDeserialization
+import semmle.javascript.security.dataflow.UnsafeDynamicMethodAccessQuery as UnsafeDynamicMethodAccess
+import semmle.javascript.security.dataflow.UnsafeHtmlConstructionQuery as UnsafeHtmlConstruction
+import semmle.javascript.security.dataflow.UnsafeJQueryPluginQuery as UnsafeJQueryPlugin
+import semmle.javascript.security.dataflow.UnsafeShellCommandConstructionQuery as UnsafeShellCommandConstruction
+import semmle.javascript.security.dataflow.UnvalidatedDynamicMethodCallQuery as UnvalidatedDynamicMethodCall
+import semmle.javascript.security.dataflow.XmlBombQuery as XmlBomb
+import semmle.javascript.security.dataflow.XpathInjectionQuery as XpathInjection
+import semmle.javascript.security.dataflow.XssThroughDomQuery as XssThroughDom
+import semmle.javascript.security.dataflow.XxeQuery as Xxe
+import semmle.javascript.security.dataflow.ZipSlipQuery as ZipSlip
+
+DataFlow::Node getASink(Configuration cfg) { cfg.isSink(result) or cfg.isSink(result, _) }
+
+DataFlow::Node getASource(Configuration cfg) { cfg.isSource(result) or cfg.isSource(result, _) }
+
+from Configuration cfg, int sources, int sinks
+where count(getASource(cfg)) = sources and count(getASink(cfg)) = sinks
+select cfg, sources, sinks
--- a/javascript/ql/experimental/adaptivethreatmodeling/modelbuilding/extraction/Exclusions.qll
+++ b/javascript/ql/experimental/adaptivethreatmodeling/modelbuilding/extraction/Exclusions.qll
@@ -0,0 +1,49 @@
+/*
+ * For internal use only.
+ *
+ * Defines files that should be excluded from the evaluation of ML models.
+ */
+
+private import javascript
+private import semmle.javascript.filters.ClassifyFiles as ClassifyFiles
+
+/** Holds if the file should be excluded from end-to-end evaluation. */
+predicate isFileExcluded(File file) {
+  // Ignore files that are outside the root folder of the analyzed source location.
+  //
+  // If the file doesn't have a relative path, then the source file is located outside the root
+  // folder of the analyzed source location, meaning that the files are additional files added to
+  // the database like standard library files that we would like to ignore.
+  not exists(file.getRelativePath())
+  or
+  // Ignore files based on their path.
+  exists(string ignorePattern, string separator |
+    ignorePattern =
+      // Exclude test files
+      "(tests?|test[_-]?case|" +
+        // Exclude library files
+        //
+        // - The Bower and npm package managers store packages in bower_components and node_modules
+        // folders respectively.
+        // - Specific exclusion for end-to-end: `applications/examples/static/epydoc` contains
+        //   library code from Epydoc.
+        "3rd[_-]?party|bower_components|extern(s|al)?|node_modules|resources|third[_-]?party|_?vendor|"
+        + "applications" + separator + "examples" + separator + "static" + separator + "epydoc|" +
+        // Exclude generated code
+        "gen|\\.?generated|" +
+        // Exclude benchmarks
+        "benchmarks?|" +
+        // Exclude documentation
+        "docs?|documentation)" and
+    separator = "(\\/|\\.)" and
+    exists(
+      file.getRelativePath()
+          .toLowerCase()
+          .regexpFind(separator + ignorePattern + separator + "|" + "^" + ignorePattern + separator +
+              "|" + separator + ignorePattern + "$", _, _)
+    )
+  )
+  or
+  // Ignore externs, generated, library, and test files.
+  ClassifyFiles::classify(file, ["externs", "generated", "library", "test"])
+}
--- a/javascript/ql/experimental/adaptivethreatmodeling/modelbuilding/extraction/ExtractEndpointData.ql
+++ b/javascript/ql/experimental/adaptivethreatmodeling/modelbuilding/extraction/ExtractEndpointData.ql
@@ -0,0 +1,11 @@
+/*
+ * For internal use only.
+ *
+ * Extracts training and evaluation data we can use to train ML models for ML-powered queries.
+ */
+
+import ExtractEndpointData as ExtractEndpointData
+
+query predicate endpoints = ExtractEndpointData::endpoints/5;
+
+query predicate tokenFeatures = ExtractEndpointData::tokenFeatures/3;
--- a/javascript/ql/experimental/adaptivethreatmodeling/modelbuilding/extraction/ExtractEndpointData.qll
+++ b/javascript/ql/experimental/adaptivethreatmodeling/modelbuilding/extraction/ExtractEndpointData.qll
@@ -0,0 +1,195 @@
+/*
+ * For internal use only.
+ *
+ * Library code for training and evaluation data we can use to train ML models for ML-powered
+ * queries.
+ */
+
+import javascript
+import Exclusions as Exclusions
+import evaluation.EndToEndEvaluation as EndToEndEvaluation
+import experimental.adaptivethreatmodeling.ATMConfig
+import experimental.adaptivethreatmodeling.CoreKnowledge as CoreKnowledge
+import experimental.adaptivethreatmodeling.EndpointFeatures as EndpointFeatures
+import experimental.adaptivethreatmodeling.EndpointScoring as EndpointScoring
+import experimental.adaptivethreatmodeling.EndpointTypes
+import experimental.adaptivethreatmodeling.FilteringReasons
+import experimental.adaptivethreatmodeling.NosqlInjectionATM as NosqlInjectionATM
+import experimental.adaptivethreatmodeling.SqlInjectionATM as SqlInjectionATM
+import experimental.adaptivethreatmodeling.TaintedPathATM as TaintedPathATM
+import experimental.adaptivethreatmodeling.XssATM as XssATM
+import Labels
+import NoFeaturizationRestrictionsConfig
+import Queries
+
+/** Gets the ATM configuration object for the specified query. */
+ATMConfig getATMCfg(Query query) {
+  query instanceof NosqlInjectionQuery and
+  result instanceof NosqlInjectionATM::NosqlInjectionATMConfig
+  or
+  query instanceof SqlInjectionQuery and result instanceof SqlInjectionATM::SqlInjectionATMConfig
+  or
+  query instanceof TaintedPathQuery and result instanceof TaintedPathATM::TaintedPathATMConfig
+  or
+  query instanceof XssQuery and result instanceof XssATM::DomBasedXssATMConfig
+}
+
+/** Gets the ATM data flow configuration for the specified query. */
+DataFlow::Configuration getDataFlowCfg(Query query) {
+  query instanceof NosqlInjectionQuery and result instanceof NosqlInjectionATM::Configuration
+  or
+  query instanceof SqlInjectionQuery and result instanceof SqlInjectionATM::Configuration
+  or
+  query instanceof TaintedPathQuery and result instanceof TaintedPathATM::Configuration
+  or
+  query instanceof XssQuery and result instanceof XssATM::Configuration
+}
+
+/** Gets a known sink for the specified query. */
+private DataFlow::Node getASink(Query query) {
+  getATMCfg(query).isKnownSink(result) and
+  // Only consider the source code for the project being analyzed.
+  exists(result.getFile().getRelativePath())
+}
+
+/** Gets a data flow node that is known not to be a sink for the specified query. */
+private DataFlow::Node getANotASink(NotASinkReason reason) {
+  CoreKnowledge::isOtherModeledArgument(result, reason) and
+  // Some endpoints can be assigned both a `NotASinkReason` and a `LikelyNotASinkReason`. We
+  // consider these endpoints to be `LikelyNotASink`, therefore this line excludes them from the
+  // definition of `NotASink`.
+  not CoreKnowledge::isOtherModeledArgument(result, any(LikelyNotASinkReason t)) and
+  not result = getASink(_) and
+  // Only consider the source code for the project being analyzed.
+  exists(result.getFile().getRelativePath())
+}
+
+/**
+ * Gets a data flow node whose label is unknown for the specified query.
+ *
+ * In other words, this is an endpoint that is not `Sink`, `NotASink`, or `LikelyNotASink` for the
+ * specified query.
+ */
+private DataFlow::Node getAnUnknown(Query query) {
+  (
+    getATMCfg(query).isEffectiveSink(result) or
+    getATMCfg(query).isEffectiveSinkWithOverridingScore(result, _, _)
+  ) and
+  not result = getASink(query) and
+  // Only consider the source code for the project being analyzed.
+  exists(result.getFile().getRelativePath())
+}
+
+/** Gets the query-specific sink label for the given endpoint, if such a label exists. */
+private EndpointLabel getSinkLabelForEndpoint(DataFlow::Node endpoint, Query query) {
+  endpoint = getASink(query) and result instanceof SinkLabel
+  or
+  endpoint = getANotASink(_) and result instanceof NotASinkLabel
+  or
+  endpoint = getAnUnknown(query) and result instanceof UnknownLabel
+}
+
+/** Gets an endpoint that should be extracted. */
+DataFlow::Node getAnEndpoint(Query query) { exists(getSinkLabelForEndpoint(result, query)) }
+
+/**
+ * Endpoints and associated metadata.
+ *
+ * Note that we draw a distinction between _features_, that are provided to the model at training
+ * and query time, and _metadata_, that is only provided to the model at training time.
+ *
+ * Internal: See the design document for
+ * [extensible extraction queries](https://docs.google.com/document/d/1g3ci2Nf1hGMG6ZUP0Y4PqCy_8elcoC_dhBvgTxdAWpg)
+ * for technical information about the design of this predicate.
+ */
+predicate endpoints(
+  DataFlow::Node endpoint, string queryName, string key, string value, string valueType
+) {
+  exists(Query query |
+    // Only provide metadata for labelled endpoints, since we do not extract all endpoints.
+    endpoint = getAnEndpoint(query) and
+    queryName = query.getName() and
+    (
+      // Holds if there is a taint flow path from a known source to the endpoint
+      key = "hasFlowFromSource" and
+      (
+        if FlowFromSource::hasFlowFromSource(endpoint, query)
+        then value = "true"
+        else value = "false"
+      ) and
+      valueType = "boolean"
+      or
+      // Constant expressions always evaluate to a constant primitive value. Therefore they can't ever
+      // appear in an alert, making them less interesting training examples.
+      key = "isConstantExpression" and
+      (if endpoint.asExpr() instanceof ConstantExpr then value = "true" else value = "false") and
+      valueType = "boolean"
+      or
+      // Holds if alerts involving the endpoint are excluded from the end-to-end evaluation.
+      key = "isExcludedFromEndToEndEvaluation" and
+      (if Exclusions::isFileExcluded(endpoint.getFile()) then value = "true" else value = "false") and
+      valueType = "boolean"
+      or
+      // The label for this query, considering the endpoint as a sink.
+      key = "sinkLabel" and
+      value = getSinkLabelForEndpoint(endpoint, query).getEncoding() and
+      valueType = "string"
+      or
+      // The reason, or reasons, why the endpoint was labeled NotASink for this query.
+      key = "notASinkReason" and
+      exists(FilteringReason reason |
+        endpoint = getANotASink(reason) and
+        value = reason.getDescription()
+      ) and
+      valueType = "string"
+    )
+  )
+}
+
+/**
+ * `EndpointFeatures::tokenFeatures` has no results when `featureName` is absent for the endpoint
+ * `endpoint`. To preserve compatibility with the data pipeline, this relation will instead set
+ * `featureValue` to the empty string in this case.
+ */
+predicate tokenFeatures(DataFlow::Node endpoint, string featureName, string featureValue) {
+  endpoints(endpoint, _, _, _, _) and
+  (
+    EndpointFeatures::tokenFeatures(endpoint, featureName, featureValue)
+    or
+    // Performance note: this creates a Cartesian product between `endpoint` and `featureName`.
+    featureName = EndpointFeatures::getASupportedFeatureName() and
+    not exists(string value | EndpointFeatures::tokenFeatures(endpoint, featureName, value)) and
+    featureValue = ""
+  )
+}
+
+module FlowFromSource {
+  predicate hasFlowFromSource(DataFlow::Node endpoint, Query q) {
+    exists(Configuration cfg | cfg.getQuery() = q | cfg.hasFlow(_, endpoint))
+  }
+
+  /**
+   * A data flow configuration that replicates the data flow configuration for a specific query, but
+   * replaces the set of sinks with the set of endpoints we're extracting.
+   *
+   * We use this to find out when there is flow to a particular endpoint from a known source.
+   *
+   * This configuration behaves in a very similar way to the `ForwardExploringConfiguration` class
+   * from the CodeQL standard libraries for JavaScript.
+   */
+  private class Configuration extends DataFlow::Configuration {
+    Query q;
+
+    Configuration() { this = getDataFlowCfg(q) }
+
+    Query getQuery() { result = q }
+
+    /** The sinks are the endpoints we're extracting. */
+    override predicate isSink(DataFlow::Node sink) { sink = getAnEndpoint(q) }
+
+    /** The sinks are the endpoints we're extracting. */
+    override predicate isSink(DataFlow::Node sink, DataFlow::FlowLabel lbl) {
+      sink = getAnEndpoint(q)
+    }
+  }
+}
--- a/javascript/ql/experimental/adaptivethreatmodeling/modelbuilding/extraction/ExtractEndpointDataEvaluation.ql
+++ b/javascript/ql/experimental/adaptivethreatmodeling/modelbuilding/extraction/ExtractEndpointDataEvaluation.ql
@@ -0,0 +1,25 @@
+/*
+ * For internal use only.
+ *
+ * Extracts evaluation data we can use to evaluate ML models for ML-powered queries.
+ */
+
+import javascript
+import ExtractEndpointData as ExtractEndpointData
+
+query predicate endpoints(
+  DataFlow::Node endpoint, string queryName, string key, string value, string valueType
+) {
+  ExtractEndpointData::endpoints(endpoint, queryName, key, value, valueType) and
+  // only select endpoints that are either Sink, NotASink or Unknown
+  ExtractEndpointData::endpoints(endpoint, queryName, "sinkLabel", ["Sink", "NotASink", "Unknown"],
+    "string") and
+  // do not select endpoints filtered out by end-to-end evaluation
+  ExtractEndpointData::endpoints(endpoint, queryName, "isExcludedFromEndToEndEvaluation", "false",
+    "boolean")
+}
+
+query predicate tokenFeatures(DataFlow::Node endpoint, string featureName, string featureValue) {
+  endpoints(endpoint, _, _, _, _) and
+  ExtractEndpointData::tokenFeatures(endpoint, featureName, featureValue)
+}
--- a/javascript/ql/experimental/adaptivethreatmodeling/modelbuilding/extraction/ExtractEndpointDataTraining.ql
+++ b/javascript/ql/experimental/adaptivethreatmodeling/modelbuilding/extraction/ExtractEndpointDataTraining.ql
@@ -0,0 +1,26 @@
+/*
+ * For internal use only.
+ *
+ * Extracts training data we can use to train ML models for ML-powered queries.
+ */
+
+import javascript
+import ExtractEndpointData as ExtractEndpointData
+
+query predicate endpoints(
+  DataFlow::Node endpoint, string queryName, string key, string value, string valueType
+) {
+  ExtractEndpointData::endpoints(endpoint, queryName, key, value, valueType) and
+  // only select endpoints that are either Sink or NotASink
+  ExtractEndpointData::endpoints(endpoint, queryName, "sinkLabel", ["Sink", "NotASink"], "string") and
+  // do not select endpoints filtered out by end-to-end evaluation
+  ExtractEndpointData::endpoints(endpoint, queryName, "isExcludedFromEndToEndEvaluation", "false",
+    "boolean") and
+  // only select endpoints that can be part of a tainted flow
+  ExtractEndpointData::endpoints(endpoint, queryName, "isConstantExpression", "false", "boolean")
+}
+
+query predicate tokenFeatures(DataFlow::Node endpoint, string featureName, string featureValue) {
+  endpoints(endpoint, _, _, _, _) and
+  ExtractEndpointData::tokenFeatures(endpoint, featureName, featureValue)
+}
--- a/javascript/ql/experimental/adaptivethreatmodeling/modelbuilding/extraction/ExtractEndpointLabelEncoding.ql
+++ b/javascript/ql/experimental/adaptivethreatmodeling/modelbuilding/extraction/ExtractEndpointLabelEncoding.ql
@@ -0,0 +1,10 @@
+/**
+ * @name Endpoint types
+ * @description Maps endpoint type encodings to human-readable descriptions.
+ * @kind table
+ */
+
+import experimental.adaptivethreatmodeling.EndpointTypes
+
+from EndpointType type
+select type.getEncoding() as encoding, type.getDescription() as description order by encoding
--- a/javascript/ql/experimental/adaptivethreatmodeling/modelbuilding/extraction/ExtractMisclassifiedEndpointFeatures.ql
+++ b/javascript/ql/experimental/adaptivethreatmodeling/modelbuilding/extraction/ExtractMisclassifiedEndpointFeatures.ql
@@ -0,0 +1,44 @@
+/*
+ * For internal use only.
+ *
+ * Query for finding misclassified endpoints which we can use to debug ML-powered queries.
+ */
+
+import javascript
+import experimental.adaptivethreatmodeling.AdaptiveThreatModeling
+import experimental.adaptivethreatmodeling.ATMConfig
+import experimental.adaptivethreatmodeling.BaseScoring
+import experimental.adaptivethreatmodeling.EndpointFeatures as EndpointFeatures
+import experimental.adaptivethreatmodeling.EndpointTypes
+import semmle.javascript.security.dataflow.NosqlInjectionCustomizations
+
+/** The positive endpoint type for which you wish to find misclassified examples. */
+EndpointType getEndpointType() { result instanceof NosqlInjectionSinkType }
+
+/** Get a positive endpoint. This will be run through the classifier to determine whether it is misclassified. */
+DataFlow::Node getAPositiveEndpoint() { result instanceof NosqlInjection::Sink }
+
+/** An ATM configuration to find misclassified endpoints of type `getEndpointType()`. */
+class ExtractMisclassifiedEndpointsATMConfig extends ATMConfig {
+  ExtractMisclassifiedEndpointsATMConfig() { this = "ExtractMisclassifiedEndpointsATMConfig" }
+
+  override predicate isEffectiveSink(DataFlow::Node sinkCandidate) {
+    sinkCandidate = getAPositiveEndpoint()
+  }
+
+  override EndpointType getASinkEndpointType() { result = getEndpointType() }
+}
+
+/** Get an endpoint from `getAPositiveEndpoint()` that is incorrectly excluded from the results. */
+DataFlow::Node getAMisclassifedEndpoint() {
+  any(ExtractMisclassifiedEndpointsATMConfig config).isEffectiveSink(result) and
+  not any(ScoringResults results).shouldResultBeIncluded(_, result)
+}
+
+/** The token features for each misclassified endpoint. */
+query predicate tokenFeaturesForMisclassifiedEndpoints(
+  DataFlow::Node endpoint, string featureName, string featureValue
+) {
+  endpoint = getAMisclassifedEndpoint() and
+  EndpointFeatures::tokenFeatures(endpoint, featureName, featureValue)
+}
--- a/javascript/ql/experimental/adaptivethreatmodeling/modelbuilding/extraction/Labels.qll
+++ b/javascript/ql/experimental/adaptivethreatmodeling/modelbuilding/extraction/Labels.qll
@@ -0,0 +1,29 @@
+/*
+ * For internal use only.
+ *
+ * Labels used in training and evaluation data to indicate knowledge about whether an endpoint is a
+ * sink for a particular security query.
+ */
+
+newtype TEndpointLabel =
+  TSinkLabel() or
+  TNotASinkLabel() or
+  TUnknownLabel()
+
+abstract class EndpointLabel extends TEndpointLabel {
+  abstract string getEncoding();
+
+  string toString() { result = getEncoding() }
+}
+
+class SinkLabel extends EndpointLabel, TSinkLabel {
+  override string getEncoding() { result = "Sink" }
+}
+
+class NotASinkLabel extends EndpointLabel, TNotASinkLabel {
+  override string getEncoding() { result = "NotASink" }
+}
+
+class UnknownLabel extends EndpointLabel, TUnknownLabel {
+  override string getEncoding() { result = "Unknown" }
+}
--- a/javascript/ql/experimental/adaptivethreatmodeling/modelbuilding/extraction/NoFeaturizationRestrictionsConfig.qll
+++ b/javascript/ql/experimental/adaptivethreatmodeling/modelbuilding/extraction/NoFeaturizationRestrictionsConfig.qll
@@ -0,0 +1,16 @@
+/*
+ * For internal use only.
+ */
+
+private import experimental.adaptivethreatmodeling.FeaturizationConfig
+
+/**
+ * A featurization config that featurizes all endpoints.
+ *
+ * This should only be used in extraction queries and tests.
+ */
+class NoRestrictionsFeaturizationConfig extends FeaturizationConfig {
+  NoRestrictionsFeaturizationConfig() { this = "NoRestrictionsFeaturization" }
+
+  override DataFlow::Node getAnEndpointToFeaturize() { any() }
+}
--- a/javascript/ql/experimental/adaptivethreatmodeling/modelbuilding/extraction/Queries.qll
+++ b/javascript/ql/experimental/adaptivethreatmodeling/modelbuilding/extraction/Queries.qll
@@ -0,0 +1,33 @@
+/*
+ * For internal use only.
+ *
+ * Represents the security queries for which we currently have ML-powered versions.
+ */
+
+newtype TQuery =
+  TNosqlInjectionQuery() or
+  TSqlInjectionQuery() or
+  TTaintedPathQuery() or
+  TXssQuery()
+
+abstract class Query extends TQuery {
+  abstract string getName();
+
+  string toString() { result = getName() }
+}
+
+class NosqlInjectionQuery extends Query, TNosqlInjectionQuery {
+  override string getName() { result = "NosqlInjection" }
+}
+
+class SqlInjectionQuery extends Query, TSqlInjectionQuery {
+  override string getName() { result = "SqlInjection" }
+}
+
+class TaintedPathQuery extends Query, TTaintedPathQuery {
+  override string getName() { result = "TaintedPath" }
+}
+
+class XssQuery extends Query, TXssQuery {
+  override string getName() { result = "Xss" }
+}
--- a/javascript/ql/experimental/adaptivethreatmodeling/modelbuilding/qlpack.yml
+++ b/javascript/ql/experimental/adaptivethreatmodeling/modelbuilding/qlpack.yml
@@ -0,0 +1,9 @@
+name: codeql/javascript-experimental-atm-model-building
+version: 0.0.0
+extractor: javascript
+library: false
+groups:
+    - javascript
+    - experimental
+dependencies:
+    codeql/javascript-experimental-atm-lib: "*"