Address review feedback.

2026-04-28 18:25:24 +02:00 · 2024-01-17 14:27:24 +00:00
parent 90a4552c4f
commit 8614d7bddb
7 changed files with 44 additions and 41 deletions
--- a/java/ql/automodel/src/AutomodelApplicationModeCharacteristics.qll
+++ b/java/ql/automodel/src/AutomodelApplicationModeCharacteristics.qll
@@ -93,18 +93,6 @@ abstract private class ApplicationModeEndpoint extends TApplicationModeEndpoint
      else none() // if both exist, it would be a summaryModel (not yet supported)
  }

-  /**
-   * Gets a potential type of this endpoint to make sure that sources are
-   * associated with source types and sinks with sink types.
-   */
-  AutomodelEndpointTypes::EndpointType getAPotentialType() {
-    this.getExtensibleType() = "sourceModel" and
-    result instanceof AutomodelEndpointTypes::SourceType
-    or
-    this.getExtensibleType() = "sinkModel" and
-    result instanceof AutomodelEndpointTypes::SinkType
-  }
-
  abstract string toString();
 }

--- a/java/ql/automodel/src/AutomodelApplicationModeExtractNegativeExamples.ql
+++ b/java/ql/automodel/src/AutomodelApplicationModeExtractNegativeExamples.ql
@@ -45,22 +45,28 @@ predicate candidate(
  string type, string subtypes, string name, string signature, string input, string output,
  string isVarargsArray, string extensibleType
 ) {
-  // the node is know not to be an endpoint of any appropriate type
-  forall(EndpointType tp | tp = endpoint.getAPotentialType() |
+  // the node is known not to be an endpoint of any appropriate type
+  forall(EndpointType tp | tp = CharacteristicsImpl::getAPotentialType(endpoint) |
    characteristic.hasImplications(tp, false, _)
  ) and
  // the lowest confidence across all endpoint types should be at least highConfidence
-  confidence = min(float c | characteristic.hasImplications(endpoint.getAPotentialType(), false, c)) and
+  confidence =
+    min(float c |
+      characteristic.hasImplications(CharacteristicsImpl::getAPotentialType(endpoint), false, c)
+    ) and
  confidence >= SharedCharacteristics::highConfidence() and
  any(ApplicationModeMetadataExtractor meta)
      .hasMetadata(endpoint, package, type, subtypes, name, signature, input, output,
        isVarargsArray, _, extensibleType) and
  // It's valid for a node to be both a potential source/sanitizer and a sink. We don't want to include such nodes
-  // as negative examples in the prompt, because they're ambiguous and might confuse the model, so we explicitly them here.
+  // as negative examples in the prompt, because they're ambiguous and might confuse the model, so we explicitly exclude them here.
  not exists(EndpointCharacteristic characteristic2, float confidence2 |
+    characteristic2 != characteristic
+  |
    characteristic2.appliesToEndpoint(endpoint) and
    confidence2 >= SharedCharacteristics::maximalConfidence() and
-    characteristic2.hasImplications(endpoint.getAPotentialType(), true, confidence2)
+    characteristic2
+        .hasImplications(CharacteristicsImpl::getAPotentialType(endpoint), true, confidence2)
  )
 }

--- a/java/ql/automodel/src/AutomodelFrameworkModeCharacteristics.qll
+++ b/java/ql/automodel/src/AutomodelFrameworkModeCharacteristics.qll
@@ -89,18 +89,6 @@ abstract class FrameworkModeEndpoint extends TFrameworkModeEndpoint {

  abstract string getExtensibleType();

-  /**
-   * Gets a potential type of this endpoint to make sure that sources are
-   * associated with source types and sinks with sink types.
-   */
-  AutomodelEndpointTypes::EndpointType getAPotentialType() {
-    this.getExtensibleType() = "sourceModel" and
-    result instanceof AutomodelEndpointTypes::SourceType
-    or
-    this.getExtensibleType() = "sinkModel" and
-    result instanceof AutomodelEndpointTypes::SinkType
-  }
-
  string toString() { result = this.asTop().toString() }

  Location getLocation() { result = this.asTop().getLocation() }
--- a/java/ql/automodel/src/AutomodelFrameworkModeExtractNegativeExamples.ql
+++ b/java/ql/automodel/src/AutomodelFrameworkModeExtractNegativeExamples.ql
@@ -21,20 +21,26 @@ from
 where
  characteristic.appliesToEndpoint(endpoint) and
  // the node is known not to be an endpoint of any appropriate type
-  forall(EndpointType tp | tp = endpoint.getAPotentialType() |
+  forall(EndpointType tp | tp = CharacteristicsImpl::getAPotentialType(endpoint) |
    characteristic.hasImplications(tp, false, _)
  ) and
  // the lowest confidence across all endpoint types should be at least highConfidence
-  confidence = min(float c | characteristic.hasImplications(endpoint.getAPotentialType(), false, c)) and
+  confidence =
+    min(float c |
+      characteristic.hasImplications(CharacteristicsImpl::getAPotentialType(endpoint), false, c)
+    ) and
  confidence >= SharedCharacteristics::highConfidence() and
  meta.hasMetadata(endpoint, package, type, subtypes, name, signature, input, output, parameterName,
    _, extensibleType) and
  // It's valid for a node to be both a potential source/sanitizer and a sink. We don't want to include such nodes
-  // as negative examples in the prompt, because they're ambiguous and might confuse the model, so we explicitly them here.
+  // as negative examples in the prompt, because they're ambiguous and might confuse the model, so we explicitly exclude them here.
  not exists(EndpointCharacteristic characteristic2, float confidence2 |
+    characteristic2 != characteristic
+  |
    characteristic2.appliesToEndpoint(endpoint) and
    confidence2 >= SharedCharacteristics::maximalConfidence() and
-    characteristic2.hasImplications(endpoint.getAPotentialType(), true, confidence2)
+    characteristic2
+        .hasImplications(CharacteristicsImpl::getAPotentialType(endpoint), true, confidence2)
  ) and
  message = characteristic
 select endpoint,
--- a/java/ql/automodel/src/AutomodelSharedCharacteristics.qll
+++ b/java/ql/automodel/src/AutomodelSharedCharacteristics.qll
@@ -17,7 +17,10 @@ signature module CandidateSig {
   * DataFlow node class, or a subtype thereof.
   */
  class Endpoint {
-    EndpointType getAPotentialType();
+    /**
+     * Gets the kind of this endpoint, either "sourceModel" or "sinkModel".
+     */
+    string getExtensibleType();
  }

  /**
@@ -122,6 +125,18 @@ module SharedCharacteristics<CandidateSig Candidate> {
    characteristic.hasImplications(endpointType, true, maximalConfidence())
  }

+  /**
+   * Gets a potential type of this endpoint to make sure that sources are
+   * associated with source types and sinks with sink types.
+   */
+  Candidate::EndpointType getAPotentialType(Candidate::Endpoint endpoint) {
+    endpoint.getExtensibleType() = "sourceModel" and
+    result instanceof Candidate::SourceType
+    or
+    endpoint.getExtensibleType() = "sinkModel" and
+    result instanceof Candidate::SinkType
+  }
+
  /**
   * Holds if the given `endpoint` should be considered as a candidate for type `endpointType`,
   * and classified by the ML model.
@@ -129,7 +144,7 @@ module SharedCharacteristics<CandidateSig Candidate> {
   * A candidate is an endpoint that cannot be excluded from `endpointType` based on its characteristics.
   */
  predicate isCandidate(Candidate::Endpoint endpoint, Candidate::EndpointType endpointType) {
-    endpointType = endpoint.getAPotentialType() and
+    endpointType = getAPotentialType(endpoint) and
    not exists(getAnExcludingCharacteristic(endpoint, endpointType))
  }

@@ -375,7 +390,7 @@ module SharedCharacteristics<CandidateSig Candidate> {
     * A negative characteristic that indicates that an endpoint was manually modeled as a neutral model.
     */
    private class NeutralModelCharacteristic extends NeitherSourceNorSinkCharacteristic {
-      NeutralModelCharacteristic() { this = "known non-endpoint" }
+      NeutralModelCharacteristic() { this = "known non-sink" }

      override predicate appliesToEndpoint(Candidate::Endpoint e) { Candidate::isNeutral(e) }
    }