Merge pull request #11263 from github/tiferet/extract-training-data

ATM: Extract training data
2026-04-29 10:45:15 +02:00 · 2022-11-15 12:08:13 -08:00
parent 2ffb4b6480 fc078a47fd
commit 710b215c38
8 changed files with 246 additions and 248 deletions
--- a/javascript/ql/experimental/adaptivethreatmodeling/lib/experimental/adaptivethreatmodeling/EndpointCharacteristics.qll
+++ b/javascript/ql/experimental/adaptivethreatmodeling/lib/experimental/adaptivethreatmodeling/EndpointCharacteristics.qll
@@ -45,6 +45,9 @@ abstract class EndpointCharacteristic extends string {
    EndpointType endpointClass, boolean isPositiveIndicator, float confidence
  );

+  /** Indicators with confidence at or above this threshold are considered to be high-confidence indicators. */
+  final float getHighConfidenceThreshold() { result = 0.8 }
+
  // The following are some confidence values that are used in practice by the subclasses. They are defined as named
  // constants here to make it easier to change them in the future.
  final float maximalConfidence() { result = 1.0 }
--- a/javascript/ql/experimental/adaptivethreatmodeling/modelbuilding/DebugResultInclusion.ql
+++ b/javascript/ql/experimental/adaptivethreatmodeling/modelbuilding/DebugResultInclusion.ql
@@ -11,7 +11,7 @@

 import javascript
 import experimental.adaptivethreatmodeling.ATMConfig
-import extraction.ExtractEndpointData
+import extraction.ExtractEndpointDataTraining

 string getAReasonSinkExcluded(DataFlow::Node sinkCandidate, Query query) {
  query instanceof NosqlInjectionQuery and
@@ -33,7 +33,7 @@ string getDescriptionForAlertCandidate(
 ) {
  result = "excluded[reason=" + getAReasonSinkExcluded(sinkCandidate, query) + "]"
  or
-  getAtmCfg(query).isKnownSink(sinkCandidate) and
+  getDataFlowCfg(query).(AtmConfig).isKnownSink(sinkCandidate) and
  result = "excluded[reason=known-sink]"
  or
  not exists(getAReasonSinkExcluded(sinkCandidate, query)) and
--- a/javascript/ql/experimental/adaptivethreatmodeling/modelbuilding/extraction/ExtractEndpointData.ql
+++ b/javascript/ql/experimental/adaptivethreatmodeling/modelbuilding/extraction/ExtractEndpointData.ql
@@ -1,11 +0,0 @@
-/*
- * For internal use only.
- *
- * Extracts training and evaluation data we can use to train ML models for ML-powered queries.
- */
-
-import ExtractEndpointData as ExtractEndpointData
-
-query predicate endpoints = ExtractEndpointData::endpoints/5;
-
-query predicate tokenFeatures = ExtractEndpointData::tokenFeatures/3;
--- a/javascript/ql/experimental/adaptivethreatmodeling/modelbuilding/extraction/ExtractEndpointData.qll
+++ b/javascript/ql/experimental/adaptivethreatmodeling/modelbuilding/extraction/ExtractEndpointData.qll
@@ -1,215 +0,0 @@
-/*
- * For internal use only.
- *
- * Library code for training and evaluation data we can use to train ML models for ML-powered
- * queries.
- */
-
-import javascript
-import Exclusions as Exclusions
-import evaluation.EndToEndEvaluation as EndToEndEvaluation
-import experimental.adaptivethreatmodeling.ATMConfig
-import experimental.adaptivethreatmodeling.CoreKnowledge as CoreKnowledge
-import experimental.adaptivethreatmodeling.EndpointFeatures as EndpointFeatures
-import experimental.adaptivethreatmodeling.EndpointScoring as EndpointScoring
-import experimental.adaptivethreatmodeling.EndpointTypes
-import experimental.adaptivethreatmodeling.FilteringReasons
-import experimental.adaptivethreatmodeling.NosqlInjectionATM as NosqlInjectionAtm
-
-/** DEPRECATED: Alias for NosqlInjectionAtm */
-deprecated module NosqlInjectionATM = NosqlInjectionAtm;
-
-import experimental.adaptivethreatmodeling.SqlInjectionATM as SqlInjectionAtm
-
-/** DEPRECATED: Alias for SqlInjectionAtm */
-deprecated module SqlInjectionATM = SqlInjectionAtm;
-
-import experimental.adaptivethreatmodeling.TaintedPathATM as TaintedPathAtm
-
-/** DEPRECATED: Alias for TaintedPathAtm */
-deprecated module TaintedPathATM = TaintedPathAtm;
-
-import experimental.adaptivethreatmodeling.XssATM as XssAtm
-
-/** DEPRECATED: Alias for XssAtm */
-deprecated module XssATM = XssAtm;
-
-import Labels
-import NoFeaturizationRestrictionsConfig
-import Queries
-
-/** Gets the ATM configuration object for the specified query. */
-AtmConfig getAtmCfg(Query query) {
-  query instanceof NosqlInjectionQuery and
-  result instanceof NosqlInjectionAtm::NosqlInjectionAtmConfig
-  or
-  query instanceof SqlInjectionQuery and result instanceof SqlInjectionAtm::SqlInjectionAtmConfig
-  or
-  query instanceof TaintedPathQuery and result instanceof TaintedPathAtm::TaintedPathAtmConfig
-  or
-  query instanceof XssQuery and result instanceof XssAtm::DomBasedXssAtmConfig
-}
-
-/** DEPRECATED: Alias for getAtmCfg */
-deprecated ATMConfig getATMCfg(Query query) { result = getAtmCfg(query) }
-
-/** Gets the ATM data flow configuration for the specified query. */
-DataFlow::Configuration getDataFlowCfg(Query query) {
-  query instanceof NosqlInjectionQuery and result instanceof NosqlInjectionAtm::Configuration
-  or
-  query instanceof SqlInjectionQuery and result instanceof SqlInjectionAtm::Configuration
-  or
-  query instanceof TaintedPathQuery and result instanceof TaintedPathAtm::Configuration
-  or
-  query instanceof XssQuery and result instanceof XssAtm::Configuration
-}
-
-/** Gets a known sink for the specified query. */
-private DataFlow::Node getASink(Query query) {
-  getAtmCfg(query).isKnownSink(result) and
-  // Only consider the source code for the project being analyzed.
-  exists(result.getFile().getRelativePath())
-}
-
-/** Gets a data flow node that is known not to be a sink for the specified query. */
-private DataFlow::Node getANotASink(NotASinkReason reason) {
-  CoreKnowledge::isOtherModeledArgument(result, reason) and
-  // Some endpoints can be assigned both a `NotASinkReason` and a `LikelyNotASinkReason`. We
-  // consider these endpoints to be `LikelyNotASink`, therefore this line excludes them from the
-  // definition of `NotASink`.
-  not CoreKnowledge::isOtherModeledArgument(result, any(LikelyNotASinkReason t)) and
-  not result = getASink(_) and
-  // Only consider the source code for the project being analyzed.
-  exists(result.getFile().getRelativePath())
-}
-
-/**
- * Gets a data flow node whose label is unknown for the specified query.
- *
- * In other words, this is an endpoint that is not `Sink`, `NotASink`, or `LikelyNotASink` for the
- * specified query.
- */
-private DataFlow::Node getAnUnknown(Query query) {
-  getAtmCfg(query).isEffectiveSink(result) and
-  // Effective sinks should exclude sinks but this is a defensive requirement
-  not result = getASink(query) and
-  // Effective sinks should exclude NotASink but for some queries (e.g. Xss) this is currently not always the case and
-  // so this is a defensive requirement
-  not result = getANotASink(_) and
-  // Only consider the source code for the project being analyzed.
-  exists(result.getFile().getRelativePath())
-}
-
-/** Gets the query-specific sink label for the given endpoint, if such a label exists. */
-private EndpointLabel getSinkLabelForEndpoint(DataFlow::Node endpoint, Query query) {
-  endpoint = getASink(query) and result instanceof SinkLabel
-  or
-  endpoint = getANotASink(_) and result instanceof NotASinkLabel
-  or
-  endpoint = getAnUnknown(query) and result instanceof UnknownLabel
-}
-
-/** Gets an endpoint that should be extracted. */
-DataFlow::Node getAnEndpoint(Query query) { exists(getSinkLabelForEndpoint(result, query)) }
-
-/**
- * Endpoints and associated metadata.
- *
- * Note that we draw a distinction between _features_, that are provided to the model at training
- * and query time, and _metadata_, that is only provided to the model at training time.
- *
- * Internal: See the design document for
- * [extensible extraction queries](https://docs.google.com/document/d/1g3ci2Nf1hGMG6ZUP0Y4PqCy_8elcoC_dhBvgTxdAWpg)
- * for technical information about the design of this predicate.
- */
-predicate endpoints(
-  DataFlow::Node endpoint, string queryName, string key, string value, string valueType
-) {
-  exists(Query query |
-    // Only provide metadata for labelled endpoints, since we do not extract all endpoints.
-    endpoint = getAnEndpoint(query) and
-    queryName = query.getName() and
-    (
-      // Holds if there is a taint flow path from a known source to the endpoint
-      key = "hasFlowFromSource" and
-      (
-        if FlowFromSource::hasFlowFromSource(endpoint, query)
-        then value = "true"
-        else value = "false"
-      ) and
-      valueType = "boolean"
-      or
-      // Constant expressions always evaluate to a constant primitive value. Therefore they can't ever
-      // appear in an alert, making them less interesting training examples.
-      key = "isConstantExpression" and
-      (if endpoint.asExpr() instanceof ConstantExpr then value = "true" else value = "false") and
-      valueType = "boolean"
-      or
-      // Holds if alerts involving the endpoint are excluded from the end-to-end evaluation.
-      key = "isExcludedFromEndToEndEvaluation" and
-      (if Exclusions::isFileExcluded(endpoint.getFile()) then value = "true" else value = "false") and
-      valueType = "boolean"
-      or
-      // The label for this query, considering the endpoint as a sink.
-      key = "sinkLabel" and
-      value = getSinkLabelForEndpoint(endpoint, query).getEncoding() and
-      valueType = "string"
-      or
-      // The reason, or reasons, why the endpoint was labeled NotASink for this query.
-      key = "notASinkReason" and
-      exists(FilteringReason reason |
-        endpoint = getANotASink(reason) and
-        value = reason.getDescription()
-      ) and
-      valueType = "string"
-    )
-  )
-}
-
-/**
- * `EndpointFeatures::tokenFeatures` has no results when `featureName` is absent for the endpoint
- * `endpoint`. To preserve compatibility with the data pipeline, this relation will instead set
- * `featureValue` to the empty string in this case.
- */
-predicate tokenFeatures(DataFlow::Node endpoint, string featureName, string featureValue) {
-  endpoints(endpoint, _, _, _, _) and
-  (
-    EndpointFeatures::tokenFeatures(endpoint, featureName, featureValue)
-    or
-    // Performance note: this creates a Cartesian product between `endpoint` and `featureName`.
-    featureName = EndpointFeatures::getASupportedFeatureName() and
-    not exists(string value | EndpointFeatures::tokenFeatures(endpoint, featureName, value)) and
-    featureValue = ""
-  )
-}
-
-module FlowFromSource {
-  predicate hasFlowFromSource(DataFlow::Node endpoint, Query q) {
-    exists(Configuration cfg | cfg.getQuery() = q | cfg.hasFlow(_, endpoint))
-  }
-
-  /**
-   * A data flow configuration that replicates the data flow configuration for a specific query, but
-   * replaces the set of sinks with the set of endpoints we're extracting.
-   *
-   * We use this to find out when there is flow to a particular endpoint from a known source.
-   *
-   * This configuration behaves in a very similar way to the `ForwardExploringConfiguration` class
-   * from the CodeQL standard libraries for JavaScript.
-   */
-  private class Configuration extends DataFlow::Configuration {
-    Query q;
-
-    Configuration() { this = getDataFlowCfg(q) }
-
-    Query getQuery() { result = q }
-
-    /** Holds if `sink` is an endpoint we're extracting. */
-    override predicate isSink(DataFlow::Node sink) { sink = getAnEndpoint(q) }
-
-    /** Holds if `sink` is an endpoint we're extracting. */
-    override predicate isSink(DataFlow::Node sink, DataFlow::FlowLabel lbl) {
-      sink = getAnEndpoint(q) and exists(lbl)
-    }
-  }
-}
--- a/javascript/ql/experimental/adaptivethreatmodeling/modelbuilding/extraction/ExtractEndpointDataTraining.ql
+++ b/javascript/ql/experimental/adaptivethreatmodeling/modelbuilding/extraction/ExtractEndpointDataTraining.ql
@@ -4,23 +4,8 @@
 * Extracts training data we can use to train ML models for ML-powered queries.
 */

-import javascript
-import ExtractEndpointData as ExtractEndpointData
+private import ExtractEndpointDataTraining as ExtractEndpointDataTraining

-query predicate endpoints(
-  DataFlow::Node endpoint, string queryName, string key, string value, string valueType
-) {
-  ExtractEndpointData::endpoints(endpoint, queryName, key, value, valueType) and
-  // only select endpoints that are either Sink or NotASink
-  ExtractEndpointData::endpoints(endpoint, queryName, "sinkLabel", ["Sink", "NotASink"], "string") and
-  // do not select endpoints filtered out by end-to-end evaluation
-  ExtractEndpointData::endpoints(endpoint, queryName, "isExcludedFromEndToEndEvaluation", "false",
-    "boolean") and
-  // only select endpoints that can be part of a tainted flow
-  ExtractEndpointData::endpoints(endpoint, queryName, "isConstantExpression", "false", "boolean")
-}
+query predicate endpoints = ExtractEndpointDataTraining::reformattedTrainingEndpoints/5;

-query predicate tokenFeatures(DataFlow::Node endpoint, string featureName, string featureValue) {
-  endpoints(endpoint, _, _, _, _) and
-  ExtractEndpointData::tokenFeatures(endpoint, featureName, featureValue)
-}
+query predicate tokenFeatures = ExtractEndpointDataTraining::tokenFeatures/3;
--- a/javascript/ql/experimental/adaptivethreatmodeling/modelbuilding/extraction/ExtractEndpointDataTraining.qll
+++ b/javascript/ql/experimental/adaptivethreatmodeling/modelbuilding/extraction/ExtractEndpointDataTraining.qll
@@ -0,0 +1,238 @@
+/*
+ * For internal use only.
+ *
+ * Extracts training data we can use to train ML models for ML-powered queries.
+ */
+
+import javascript
+import experimental.adaptivethreatmodeling.EndpointCharacteristics
+import experimental.adaptivethreatmodeling.EndpointFeatures as EndpointFeatures
+import NoFeaturizationRestrictionsConfig
+private import Exclusions as Exclusions
+import Queries
+import experimental.adaptivethreatmodeling.NosqlInjectionATM as NosqlInjectionAtm
+import experimental.adaptivethreatmodeling.SqlInjectionATM as SqlInjectionAtm
+import experimental.adaptivethreatmodeling.TaintedPathATM as TaintedPathAtm
+import experimental.adaptivethreatmodeling.XssATM as XssAtm
+
+/**
+ * Gets the set of featureName-featureValue pairs for each endpoint in the training set.
+ *
+ * `EndpointFeatures::tokenFeatures` has no results when `featureName` is absent for the endpoint
+ * `endpoint`. To preserve compatibility with the data pipeline, this relation will instead set
+ * `featureValue` to the empty string in this case.
+ */
+predicate tokenFeatures(DataFlow::Node endpoint, string featureName, string featureValue) {
+  trainingEndpoints(endpoint, _, _) and
+  (
+    EndpointFeatures::tokenFeatures(endpoint, featureName, featureValue)
+    or
+    // Performance note: this creates a Cartesian product between `endpoint` and `featureName`.
+    featureName = EndpointFeatures::getASupportedFeatureName() and
+    not exists(string value | EndpointFeatures::tokenFeatures(endpoint, featureName, value)) and
+    featureValue = ""
+  )
+}
+
+/**
+ * Holds if the given endpoint should be included in the training set as a sample belonging to endpointClass, and has
+ * the given characteristic. This query uses the endpoint characteristics to select and label endpoints for the training
+ * set, and provides a list of characteristics for each endpoint in the training set, which is used in the modeling
+ * code.
+ *
+ * Params:
+ * endpoint: The endpoint to include / exclude.
+ * endpointClass: The sink type. See the documentation of EndpointType.getEncoding for details about the relationship
+ * between an EndpointType and a class in the classifier.
+ * characteristic: Provides the list of characteristics that apply to the endpoint, which the modeling code currently
+ * uses for type balancing.
+ *
+ * Note: This predicate will produce multiple tuples for endpoints that have multiple characteristics, which we must
+ * then group together into a list of characteristics.
+ */
+query predicate trainingEndpoints(
+  DataFlow::Node endpoint, EndpointType endpointClass, EndpointCharacteristic characteristic
+) {
+  characteristic.getEndpoints(endpoint) and
+  // Only consider the source code for the project being analyzed.
+  exists(endpoint.getFile().getRelativePath()) and
+  // Only select endpoints that can be part of a tainted flow: Constant expressions always evaluate to a constant
+  // primitive value. Therefore they can't ever appear in an alert, making them less interesting training examples.
+  // TODO: Experiment with removing this requirement.
+  not endpoint.asExpr() instanceof ConstantExpr and
+  // Do not select endpoints filtered out by end-to-end evaluation.
+  // TODO: Experiment with removing this requirement.
+  not Exclusions::isFileExcluded(endpoint.getFile()) and
+  // Filter out negative examples that also have a LikelyNotASinkReason, because this is currently done here
+  // https://github.com/github/codeql/blob/387e57546bf7352f7c1cfe781daa1a3799b7063e/javascript/ql/experimental/adaptivethreatmodeling/modelbuilding/extraction/ExtractEndpointData.qll#L77
+  // TODO: Experiment with removing this requirement.
+  not (
+    endpointClass instanceof NegativeType and
+    exists(EndpointCharacteristic c |
+      c.getEndpoints(endpoint) and
+      c instanceof LikelyNotASinkCharacteristic
+    )
+  ) and
+  (
+    // If the list of characteristics includes positive indicators with high confidence for this class, select this as a
+    // training sample belonging to the class.
+    exists(EndpointCharacteristic characteristic2, float confidence |
+      characteristic2.getEndpoints(endpoint) and
+      characteristic2.getImplications(endpointClass, true, confidence) and
+      confidence >= characteristic2.getHighConfidenceThreshold()
+    ) and
+    (
+      // Temporarily limit this only to positive classes. For negative classes, additionally select only endpoints that
+      // have no high confidence indicators that they are sinks, because this is what was previously done.
+      // TODO: Experiment with removing this requirement, and instead ensuring that an endpoint never has both a high
+      // confidence indicator that it _is_ a sink and a high confidence indicator that it is _not_ a sink.
+      not endpointClass instanceof NegativeType
+      or
+      not exists(EndpointCharacteristic characteristic3, float confidence3, EndpointType posClass |
+        characteristic3.getEndpoints(endpoint) and
+        characteristic3.getImplications(posClass, true, confidence3) and
+        confidence3 >= characteristic3.getHighConfidenceThreshold() and
+        not posClass instanceof NegativeType
+      )
+    )
+    or
+    // If the list of characteristics includes negative indicators with high confidence for all classes other than 0,
+    // select this as a training sample of class 0 (this means we had query-specific characteristics to decide this
+    // endpoint isn't a sink for each of our sink types).
+    endpointClass instanceof NegativeType and
+    forall(EndpointType otherClass | not otherClass instanceof NegativeType |
+      exists(EndpointCharacteristic characteristic2, float confidence |
+        characteristic2.getEndpoints(endpoint) and
+        characteristic2.getImplications(otherClass, false, confidence) and
+        confidence >= characteristic2.getHighConfidenceThreshold()
+      )
+    )
+  )
+}
+
+/**
+ * Temporary:
+ * Reformat the training data that was extracted with the new logic to match the format produced by the old predicate.
+ * This is the format expected by the endpoint pipeline.
+ */
+query predicate reformattedTrainingEndpoints(
+  DataFlow::Node endpoint, string queryName, string key, string value, string valueType
+) {
+  trainingEndpoints(endpoint, _, _) and
+  exists(Query query |
+    queryName = query.getName() and
+    // For sinks, only list that sink type, but for non-sinks, list all sink types.
+    (
+      exists(EndpointType endpointClass |
+        endpointClass.getDescription().matches(queryName + "%") and
+        not endpointClass instanceof NegativeType and
+        trainingEndpoints(endpoint, endpointClass, _)
+      )
+      or
+      exists(EndpointType endpointClass |
+        endpointClass instanceof NegativeType and
+        trainingEndpoints(endpoint, endpointClass, _)
+      )
+    ) and
+    (
+      // NOTE: We don't use hasFlowFromSource in training, so we could just hardcode it to be false.
+      key = "hasFlowFromSource" and
+      (
+        if FlowFromSource::hasFlowFromSource(endpoint, query)
+        then value = "true"
+        else value = "false"
+      ) and
+      valueType = "boolean"
+      or
+      // Constant expressions always evaluate to a constant primitive value. Therefore they can't ever
+      // appear in an alert, making them less interesting training examples.
+      key = "isConstantExpression" and
+      (if endpoint.asExpr() instanceof ConstantExpr then value = "true" else value = "false") and
+      valueType = "boolean"
+      or
+      // Holds if alerts involving the endpoint are excluded from the end-to-end evaluation.
+      key = "isExcludedFromEndToEndEvaluation" and
+      (if Exclusions::isFileExcluded(endpoint.getFile()) then value = "true" else value = "false") and
+      valueType = "boolean"
+      or
+      // The label for this query, considering the endpoint as a sink.
+      key = "sinkLabel" and
+      valueType = "string" and
+      value = "Sink" and
+      exists(EndpointType endpointClass |
+        endpointClass.getDescription().matches(queryName + "%") and
+        not endpointClass instanceof NegativeType and
+        trainingEndpoints(endpoint, endpointClass, _)
+      )
+      or
+      key = "sinkLabel" and
+      valueType = "string" and
+      value = "NotASink" and
+      exists(EndpointType endpointClass |
+        endpointClass instanceof NegativeType and
+        trainingEndpoints(endpoint, endpointClass, _)
+      )
+      or
+      // The reason, or reasons, why the endpoint was labeled NotASink for this query, only for negative examples.
+      key = "notASinkReason" and
+      exists(EndpointCharacteristic characteristic, EndpointType endpointClass |
+        characteristic.getEndpoints(endpoint) and
+        characteristic.getImplications(endpointClass, true, _) and
+        endpointClass instanceof NegativeType and
+        value = characteristic
+      ) and
+      // Don't include a notASinkReason for endpoints that are also known sinks.
+      not exists(EndpointCharacteristic characteristic3, float confidence3, EndpointType posClass |
+        characteristic3.getEndpoints(endpoint) and
+        characteristic3.getImplications(posClass, true, confidence3) and
+        confidence3 >= characteristic3.getHighConfidenceThreshold() and
+        not posClass instanceof NegativeType
+      ) and
+      valueType = "string"
+    )
+  )
+}
+
+/**
+ * Gets the ATM data flow configuration for the specified query.
+ * TODO: Delete this once we are no longer surfacing `hasFlowFromSource`.
+ */
+DataFlow::Configuration getDataFlowCfg(Query query) {
+  query instanceof NosqlInjectionQuery and result instanceof NosqlInjectionAtm::Configuration
+  or
+  query instanceof SqlInjectionQuery and result instanceof SqlInjectionAtm::Configuration
+  or
+  query instanceof TaintedPathQuery and result instanceof TaintedPathAtm::Configuration
+  or
+  query instanceof XssQuery and result instanceof XssAtm::Configuration
+}
+
+// TODO: Delete this once we are no longer surfacing `hasFlowFromSource`.
+private module FlowFromSource {
+  predicate hasFlowFromSource(DataFlow::Node endpoint, Query q) {
+    exists(Configuration cfg | cfg.getQuery() = q | cfg.hasFlow(_, endpoint))
+  }
+
+  /**
+   * A data flow configuration that replicates the data flow configuration for a specific query, but
+   * replaces the set of sinks with the set of endpoints we're extracting.
+   *
+   * We use this to find out when there is flow to a particular endpoint from a known source.
+   *
+   * This configuration behaves in a very similar way to the `ForwardExploringConfiguration` class
+   * from the CodeQL standard libraries for JavaScript.
+   */
+  private class Configuration extends DataFlow::Configuration {
+    Query q;
+
+    Configuration() { this = getDataFlowCfg(q) }
+
+    Query getQuery() { result = q }
+
+    /** Holds if `sink` is an endpoint we're extracting. */
+    override predicate isSink(DataFlow::Node sink) { any() }
+
+    /** Holds if `sink` is an endpoint we're extracting. */
+    override predicate isSink(DataFlow::Node sink, DataFlow::FlowLabel lbl) { exists(lbl) }
+  }
+}
--- a/javascript/ql/experimental/adaptivethreatmodeling/test/endpoint_large_scale/ExtractEndpointData.qlref
+++ b/javascript/ql/experimental/adaptivethreatmodeling/test/endpoint_large_scale/ExtractEndpointData.qlref
@@ -1 +0,0 @@
-extraction/ExtractEndpointData.ql
--- a/javascript/ql/experimental/adaptivethreatmodeling/test/endpoint_unit_tests/ExtractEndpointData.qlref
+++ b/javascript/ql/experimental/adaptivethreatmodeling/test/endpoint_unit_tests/ExtractEndpointData.qlref
@@ -1 +0,0 @@
-extraction/ExtractEndpointData.ql