Refactor framework-mode queries to make them more easily testable.

2026-04-25 08:45:14 +02:00 · 2024-01-17 12:05:58 +00:00
parent adea805546
commit 6c47a5d5f9
5 changed files with 105 additions and 63 deletions
--- a/java/ql/automodel/src/AutomodelApplicationModeCharacteristics.qll
+++ b/java/ql/automodel/src/AutomodelApplicationModeCharacteristics.qll
@@ -372,7 +372,7 @@ class ApplicationModeMetadataExtractor extends string {
 }

 /**
- * Holds if the given `endpoint` should be considered a candidate for the `extensibleType.
+ * Holds if the given `endpoint` should be considered a candidate for the `extensibleType`.
 *
 * The other parameters record various other properties of interest.
 */
@@ -380,21 +380,20 @@ predicate isCandidate(
  Endpoint endpoint, string package, string type, string subtypes, string name, string signature,
  string input, string output, string isVarargs, string extensibleType, string alreadyAiModeled
 ) {
-  exists(ApplicationModeMetadataExtractor meta |
-    CharacteristicsImpl::isCandidate(endpoint, _) and
-    not exists(CharacteristicsImpl::UninterestingToModelCharacteristic u |
-      u.appliesToEndpoint(endpoint)
-    ) and
-    meta.hasMetadata(endpoint, package, type, subtypes, name, signature, input, output, isVarargs,
-      alreadyAiModeled, extensibleType) and
-    // If a node is already modeled in MaD, we don't include it as a candidate. Otherwise, we might include it as a
-    // candidate for query A, but the model will label it as a sink for one of the sink types of query B, for which it's
-    // already a known sink. This would result in overlap between our detected sinks and the pre-existing modeling. We
-    // assume that, if a sink has already been modeled in a MaD model, then it doesn't belong to any additional sink
-    // types, and we don't need to reexamine it.
-    alreadyAiModeled.matches(["", "%ai-%"]) and
-    AutomodelJavaUtil::includeAutomodelCandidate(package, type, name, signature)
-  )
+  CharacteristicsImpl::isCandidate(endpoint, _) and
+  not exists(CharacteristicsImpl::UninterestingToModelCharacteristic u |
+    u.appliesToEndpoint(endpoint)
+  ) and
+  any(ApplicationModeMetadataExtractor meta)
+      .hasMetadata(endpoint, package, type, subtypes, name, signature, input, output, isVarargs,
+        alreadyAiModeled, extensibleType) and
+  // If a node is already modeled in MaD, we don't include it as a candidate. Otherwise, we might include it as a
+  // candidate for query A, but the model will label it as a sink for one of the sink types of query B, for which it's
+  // already a known sink. This would result in overlap between our detected sinks and the pre-existing modeling. We
+  // assume that, if a sink has already been modeled in a MaD model, then it doesn't belong to any additional sink
+  // types, and we don't need to reexamine it.
+  alreadyAiModeled.matches(["", "%ai-%"]) and
+  AutomodelJavaUtil::includeAutomodelCandidate(package, type, name, signature)
 }

 /**
--- a/java/ql/automodel/src/AutomodelFrameworkModeCharacteristics.qll
+++ b/java/ql/automodel/src/AutomodelFrameworkModeCharacteristics.qll
@@ -312,6 +312,85 @@ class FrameworkModeMetadataExtractor extends string {
  }
 }

+/**
+ * Holds if the given `endpoint` should be considered a candidate for the `extensibleType`.
+ *
+ * The other parameters record various other properties of interest.
+ */
+predicate isCandidate(
+  Endpoint endpoint, string package, string type, string subtypes, string name, string signature,
+  string input, string output, string parameterName, string extensibleType, string alreadyAiModeled
+) {
+  CharacteristicsImpl::isCandidate(endpoint, _) and
+  not exists(CharacteristicsImpl::UninterestingToModelCharacteristic u |
+    u.appliesToEndpoint(endpoint)
+  ) and
+  any(FrameworkModeMetadataExtractor meta)
+      .hasMetadata(endpoint, package, type, subtypes, name, signature, input, output, parameterName,
+        alreadyAiModeled, extensibleType) and
+  // If a node is already modeled in MaD, we don't include it as a candidate. Otherwise, we might include it as a
+  // candidate for query A, but the model will label it as a sink for one of the sink types of query B, for which it's
+  // already a known sink. This would result in overlap between our detected sinks and the pre-existing modeling. We
+  // assume that, if a sink has already been modeled in a MaD model, then it doesn't belong to any additional sink
+  // types, and we don't need to reexamine it.
+  alreadyAiModeled.matches(["", "%ai-%"]) and
+  AutomodelJavaUtil::includeAutomodelCandidate(package, type, name, signature)
+}
+
+/**
+ * Holds if the given `endpoint` is a negative example for the `extensibleType`
+ * because of the `characteristic`.
+ *
+ * The other parameters record various other properties of interest.
+ */
+predicate isNegativeExample(
+  Endpoint endpoint, EndpointCharacteristic characteristic, float confidence, string package,
+  string type, string subtypes, string name, string signature, string input, string output,
+  string parameterName, string extensibleType
+) {
+  characteristic.appliesToEndpoint(endpoint) and
+  // the node is known not to be an endpoint of any appropriate type
+  forall(AutomodelEndpointTypes::EndpointType tp |
+    tp = CharacteristicsImpl::getAPotentialType(endpoint)
+  |
+    characteristic.hasImplications(tp, false, _)
+  ) and
+  // the lowest confidence across all endpoint types should be at least highConfidence
+  confidence =
+    min(float c |
+      characteristic.hasImplications(CharacteristicsImpl::getAPotentialType(endpoint), false, c)
+    ) and
+  confidence >= SharedCharacteristics::highConfidence() and
+  any(FrameworkModeMetadataExtractor meta)
+      .hasMetadata(endpoint, package, type, subtypes, name, signature, input, output, parameterName,
+        _, extensibleType) and
+  // It's valid for a node to be both a potential source/sanitizer and a sink. We don't want to include such nodes
+  // as negative examples in the prompt, because they're ambiguous and might confuse the model, so we explicitly exclude them here.
+  not exists(EndpointCharacteristic characteristic2, float confidence2 |
+    characteristic2 != characteristic
+  |
+    characteristic2.appliesToEndpoint(endpoint) and
+    confidence2 >= SharedCharacteristics::maximalConfidence() and
+    characteristic2
+        .hasImplications(CharacteristicsImpl::getAPotentialType(endpoint), true, confidence2)
+  )
+}
+
+/**
+ * Holds if the given `endpoint` is a positive example for the `endpointType`.
+ *
+ * The other parameters record various other properties of interest.
+ */
+predicate isPositiveExample(
+  Endpoint endpoint, string endpointType, string package, string type, string subtypes, string name,
+  string signature, string input, string output, string parameterName, string extensibleType
+) {
+  any(FrameworkModeMetadataExtractor meta)
+      .hasMetadata(endpoint, package, type, subtypes, name, signature, input, output, parameterName,
+        _, extensibleType) and
+  CharacteristicsImpl::isKnownAs(endpoint, endpointType, _)
+}
+
 /*
 * EndpointCharacteristic classes that are specific to Automodel for Java.
 */
--- a/java/ql/automodel/src/AutomodelFrameworkModeExtractCandidates.ql
+++ b/java/ql/automodel/src/AutomodelFrameworkModeExtractCandidates.ql
@@ -16,24 +16,12 @@ private import AutomodelFrameworkModeCharacteristics
 private import AutomodelJavaUtil

 from
-  Endpoint endpoint, FrameworkModeMetadataExtractor meta, DollarAtString package,
+  Endpoint endpoint, DollarAtString package,
  DollarAtString type, DollarAtString subtypes, DollarAtString name, DollarAtString signature,
  DollarAtString input, DollarAtString output, DollarAtString parameterName,
  DollarAtString alreadyAiModeled, DollarAtString extensibleType
 where
-  not exists(CharacteristicsImpl::UninterestingToModelCharacteristic u |
-    u.appliesToEndpoint(endpoint)
-  ) and
-  CharacteristicsImpl::isCandidate(endpoint, _) and
-  meta.hasMetadata(endpoint, package, type, subtypes, name, signature, input, output, parameterName,
-    alreadyAiModeled, extensibleType) and
-  // If a node is already modeled in MaD, we don't include it as a candidate. Otherwise, we might include it as a
-  // candidate for query A, but the model will label it as a sink for one of the sink types of query B, for which it's
-  // already a known sink. This would result in overlap between our detected sinks and the pre-existing modeling. We
-  // assume that, if a sink has already been modeled in a MaD model, then it doesn't belong to any additional sink
-  // types, and we don't need to reexamine it.
-  alreadyAiModeled.matches(["", "%ai-%"]) and
-  includeAutomodelCandidate(package, type, name, signature)
+  isCandidate(endpoint, package, type, subtypes, name, signature, input, output, parameterName, extensibleType, alreadyAiModeled)
 select endpoint,
  "Related locations: $@, $@." + "\nmetadata: $@, $@, $@, $@, $@, $@, $@, $@, $@, $@.", //
  CharacteristicsImpl::getRelatedLocationOrCandidate(endpoint, MethodDoc()), "MethodDoc", //
--- a/java/ql/automodel/src/AutomodelFrameworkModeExtractNegativeExamples.ql
+++ b/java/ql/automodel/src/AutomodelFrameworkModeExtractNegativeExamples.ql
@@ -14,37 +14,15 @@ private import AutomodelJavaUtil

 from
  Endpoint endpoint, EndpointCharacteristic characteristic, float confidence,
-  DollarAtString message, FrameworkModeMetadataExtractor meta, DollarAtString package,
-  DollarAtString type, DollarAtString subtypes, DollarAtString name, DollarAtString signature,
-  DollarAtString input, DollarAtString output, DollarAtString parameterName,
-  DollarAtString extensibleType
+  DollarAtString package, DollarAtString type, DollarAtString subtypes, DollarAtString name,
+  DollarAtString signature, DollarAtString input, DollarAtString output,
+  DollarAtString parameterName, DollarAtString extensibleType
 where
-  characteristic.appliesToEndpoint(endpoint) and
-  // the node is known not to be an endpoint of any appropriate type
-  forall(EndpointType tp | tp = CharacteristicsImpl::getAPotentialType(endpoint) |
-    characteristic.hasImplications(tp, false, _)
-  ) and
-  // the lowest confidence across all endpoint types should be at least highConfidence
-  confidence =
-    min(float c |
-      characteristic.hasImplications(CharacteristicsImpl::getAPotentialType(endpoint), false, c)
-    ) and
-  confidence >= SharedCharacteristics::highConfidence() and
-  meta.hasMetadata(endpoint, package, type, subtypes, name, signature, input, output, parameterName,
-    _, extensibleType) and
-  // It's valid for a node to be both a potential source/sanitizer and a sink. We don't want to include such nodes
-  // as negative examples in the prompt, because they're ambiguous and might confuse the model, so we explicitly exclude them here.
-  not exists(EndpointCharacteristic characteristic2, float confidence2 |
-    characteristic2 != characteristic
-  |
-    characteristic2.appliesToEndpoint(endpoint) and
-    confidence2 >= SharedCharacteristics::maximalConfidence() and
-    characteristic2
-        .hasImplications(CharacteristicsImpl::getAPotentialType(endpoint), true, confidence2)
-  ) and
-  message = characteristic
+  isNegativeExample(endpoint, characteristic, confidence, package, type, subtypes, name, signature,
+    input, output, parameterName, extensibleType)
 select endpoint,
-  message + "\nrelated locations: $@, $@." + "\nmetadata: $@, $@, $@, $@, $@, $@, $@, $@, $@.", //
+  characteristic + "\nrelated locations: $@, $@." +
+    "\nmetadata: $@, $@, $@, $@, $@, $@, $@, $@, $@.", //
  CharacteristicsImpl::getRelatedLocationOrCandidate(endpoint, MethodDoc()), "MethodDoc", //
  CharacteristicsImpl::getRelatedLocationOrCandidate(endpoint, ClassDoc()), "ClassDoc", //
  package, "package", //
--- a/java/ql/automodel/src/AutomodelFrameworkModeExtractPositiveExamples.ql
+++ b/java/ql/automodel/src/AutomodelFrameworkModeExtractPositiveExamples.ql
@@ -13,14 +13,12 @@ private import AutomodelEndpointTypes
 private import AutomodelJavaUtil

 from
-  Endpoint endpoint, EndpointType endpointType, FrameworkModeMetadataExtractor meta,
+  Endpoint endpoint, EndpointType endpointType,
  DollarAtString package, DollarAtString type, DollarAtString subtypes, DollarAtString name,
  DollarAtString signature, DollarAtString input, DollarAtString output,
  DollarAtString parameterName, DollarAtString extensibleType
 where
-  meta.hasMetadata(endpoint, package, type, subtypes, name, signature, input, output, parameterName,
-    _, extensibleType) and
-  CharacteristicsImpl::isKnownAs(endpoint, endpointType, _)
+  isPositiveExample(endpoint, endpointType, package, type, subtypes, name, signature, input, output, parameterName, extensibleType)
 select endpoint,
  endpointType + "\nrelated locations: $@, $@." + "\nmetadata: $@, $@, $@, $@, $@, $@, $@, $@, $@.", //
  CharacteristicsImpl::getRelatedLocationOrCandidate(endpoint, MethodDoc()), "MethodDoc", //