Java: Automodel, remove erroneousEndpoints predicate from production queries

2026-04-23 15:55:18 +02:00 · 2023-09-18 12:19:47 +00:00
parent 5d608acd0b
commit 32502d5e2c
6 changed files with 0 additions and 150 deletions
--- a/java/ql/automodel/src/AutomodelApplicationModeCharacteristics.qll
+++ b/java/ql/automodel/src/AutomodelApplicationModeCharacteristics.qll
@@ -569,72 +569,3 @@ private class CannotBeTaintedCharacteristic extends CharacteristicsImpl::LikelyN
    FlowSummaryImpl::Private::Steps::summarySetterStep(_, _, e.asNode(), _)
  }
 }
-
-/**
- * Holds if the given endpoint has a self-contradictory combination of characteristics. Detects errors in our endpoint
- * characteristics. Lists the problematic characteristics and their implications for all such endpoints, together with
- * an error message indicating why this combination is problematic.
- *
- * Copied from
- *   javascript/ql/experimental/adaptivethreatmodeling/test/endpoint_large_scale/ContradictoryEndpointCharacteristics.ql
- */
-predicate erroneousEndpoints(
-  Endpoint endpoint, EndpointCharacteristic characteristic,
-  AutomodelEndpointTypes::EndpointType endpointType, float confidence, string errorMessage,
-  boolean ignoreKnownModelingErrors
-) {
-  // An endpoint's characteristics should not include positive indicators with medium/high confidence for more than one
-  // sink/source type (including the negative type).
-  exists(
-    EndpointCharacteristic characteristic2, AutomodelEndpointTypes::EndpointType endpointClass2,
-    float confidence2
-  |
-    endpointType != endpointClass2 and
-    (
-      endpointType instanceof AutomodelEndpointTypes::SinkType and
-      endpointClass2 instanceof AutomodelEndpointTypes::SinkType
-      or
-      endpointType instanceof AutomodelEndpointTypes::SourceType and
-      endpointClass2 instanceof AutomodelEndpointTypes::SourceType
-    ) and
-    characteristic.appliesToEndpoint(endpoint) and
-    characteristic2.appliesToEndpoint(endpoint) and
-    characteristic.hasImplications(endpointType, true, confidence) and
-    characteristic2.hasImplications(endpointClass2, true, confidence2) and
-    confidence > SharedCharacteristics::mediumConfidence() and
-    confidence2 > SharedCharacteristics::mediumConfidence() and
-    (
-      ignoreKnownModelingErrors = true and
-      not knownOverlappingCharacteristics(characteristic, characteristic2)
-      or
-      ignoreKnownModelingErrors = false
-    )
-  ) and
-  errorMessage = "Endpoint has high-confidence positive indicators for multiple classes"
-  or
-  // An endpoint's characteristics should not include positive indicators with medium/high confidence for some class and
-  // also include negative indicators with medium/high confidence for this same class.
-  exists(EndpointCharacteristic characteristic2, float confidence2 |
-    characteristic.appliesToEndpoint(endpoint) and
-    characteristic2.appliesToEndpoint(endpoint) and
-    characteristic.hasImplications(endpointType, true, confidence) and
-    characteristic2.hasImplications(endpointType, false, confidence2) and
-    confidence > SharedCharacteristics::mediumConfidence() and
-    confidence2 > SharedCharacteristics::mediumConfidence()
-  ) and
-  ignoreKnownModelingErrors = false and
-  errorMessage = "Endpoint has high-confidence positive and negative indicators for the same class"
-}
-
-/**
- * Holds if `characteristic1` and `characteristic2` are among the pairs of currently known positive characteristics that
- * have some overlap in their results. This indicates a problem with the underlying Java modeling. Specifically,
- * `PathCreation` is prone to FPs.
- */
-private predicate knownOverlappingCharacteristics(
-  EndpointCharacteristic characteristic1, EndpointCharacteristic characteristic2
-) {
-  characteristic1 != characteristic2 and
-  characteristic1 = ["mad taint step", "create path", "read file", "known non-sink"] and
-  characteristic2 = ["mad taint step", "create path", "read file", "known non-sink"]
-}
--- a/java/ql/automodel/src/AutomodelApplicationModeExtractNegativeExamples.ql
+++ b/java/ql/automodel/src/AutomodelApplicationModeExtractNegativeExamples.ql
@@ -50,9 +50,6 @@ where
  extensibleType = endpoint.getExtensibleType() and
  confidence >= SharedCharacteristics::highConfidence() and
  characteristic.hasImplications(any(NegativeSinkType negative), true, confidence) and
-  // Exclude endpoints that have contradictory endpoint characteristics, because we only want examples we're highly
-  // certain about in the prompt.
-  not erroneousEndpoints(endpoint, _, _, _, _, false) and
  meta.hasMetadata(endpoint, package, type, subtypes, name, signature, input, output, isVarargsArray) and
  // It's valid for a node to satisfy the logic for both `isSink` and `isSanitizer`, but in that case it will be
  // treated by the actual query as a sanitizer, since the final logic is something like
--- a/java/ql/automodel/src/AutomodelApplicationModeExtractPositiveExamples.ql
+++ b/java/ql/automodel/src/AutomodelApplicationModeExtractPositiveExamples.ql
@@ -18,9 +18,6 @@ from
  DollarAtString signature, DollarAtString input, DollarAtString output,
  DollarAtString isVarargsArray, DollarAtString extensibleType
 where
-  // Exclude endpoints that have contradictory endpoint characteristics, because we only want examples we're highly
-  // certain about in the prompt.
-  not erroneousEndpoints(endpoint, _, _, _, _, false) and
  extensibleType = endpoint.getExtensibleType() and
  meta.hasMetadata(endpoint, package, type, subtypes, name, signature, input, output, isVarargsArray) and
  // Extract positive examples of sinks belonging to the existing ATM query configurations.
--- a/java/ql/automodel/src/AutomodelFrameworkModeCharacteristics.qll
+++ b/java/ql/automodel/src/AutomodelFrameworkModeCharacteristics.qll
@@ -385,72 +385,3 @@ private class NonPublicMethodCharacteristic extends CharacteristicsImpl::Uninter

  override predicate appliesToEndpoint(Endpoint e) { not e.getEnclosingCallable().isPublic() }
 }
-
-/**
- * Holds if the given endpoint has a self-contradictory combination of characteristics. Detects errors in our endpoint
- * characteristics. Lists the problematic characteristics and their implications for all such endpoints, together with
- * an error message indicating why this combination is problematic.
- *
- * Copied from
- *   javascript/ql/experimental/adaptivethreatmodeling/test/endpoint_large_scale/ContradictoryEndpointCharacteristics.ql
- */
-predicate erroneousEndpoints(
-  Endpoint endpoint, EndpointCharacteristic characteristic,
-  AutomodelEndpointTypes::EndpointType endpointType, float confidence, string errorMessage,
-  boolean ignoreKnownModelingErrors
-) {
-  // An endpoint's characteristics should not include positive indicators with medium/high confidence for more than one
-  // sink/source type (including the negative type).
-  exists(
-    EndpointCharacteristic characteristic2, AutomodelEndpointTypes::EndpointType endpointClass2,
-    float confidence2
-  |
-    endpointType != endpointClass2 and
-    (
-      endpointType instanceof AutomodelEndpointTypes::SinkType and
-      endpointClass2 instanceof AutomodelEndpointTypes::SinkType
-      or
-      endpointType instanceof AutomodelEndpointTypes::SourceType and
-      endpointClass2 instanceof AutomodelEndpointTypes::SourceType
-    ) and
-    characteristic.appliesToEndpoint(endpoint) and
-    characteristic2.appliesToEndpoint(endpoint) and
-    characteristic.hasImplications(endpointType, true, confidence) and
-    characteristic2.hasImplications(endpointClass2, true, confidence2) and
-    confidence > SharedCharacteristics::mediumConfidence() and
-    confidence2 > SharedCharacteristics::mediumConfidence() and
-    (
-      ignoreKnownModelingErrors = true and
-      not knownOverlappingCharacteristics(characteristic, characteristic2)
-      or
-      ignoreKnownModelingErrors = false
-    )
-  ) and
-  errorMessage = "Endpoint has high-confidence positive indicators for multiple classes"
-  or
-  // An endpoint's characteristics should not include positive indicators with medium/high confidence for some class and
-  // also include negative indicators with medium/high confidence for this same class.
-  exists(EndpointCharacteristic characteristic2, float confidence2 |
-    characteristic.appliesToEndpoint(endpoint) and
-    characteristic2.appliesToEndpoint(endpoint) and
-    characteristic.hasImplications(endpointType, true, confidence) and
-    characteristic2.hasImplications(endpointType, false, confidence2) and
-    confidence > SharedCharacteristics::mediumConfidence() and
-    confidence2 > SharedCharacteristics::mediumConfidence()
-  ) and
-  ignoreKnownModelingErrors = false and
-  errorMessage = "Endpoint has high-confidence positive and negative indicators for the same class"
-}
-
-/**
- * Holds if `characteristic1` and `characteristic2` are among the pairs of currently known positive characteristics that
- * have some overlap in their results. This indicates a problem with the underlying Java modeling. Specifically,
- * `PathCreation` is prone to FPs.
- */
-private predicate knownOverlappingCharacteristics(
-  EndpointCharacteristic characteristic1, EndpointCharacteristic characteristic2
-) {
-  characteristic1 != characteristic2 and
-  characteristic1 = ["mad taint step", "create path", "read file", "known non-sink"] and
-  characteristic2 = ["mad taint step", "create path", "read file", "known non-sink"]
-}
--- a/java/ql/automodel/src/AutomodelFrameworkModeExtractNegativeExamples.ql
+++ b/java/ql/automodel/src/AutomodelFrameworkModeExtractNegativeExamples.ql
@@ -23,9 +23,6 @@ where
  characteristic.appliesToEndpoint(endpoint) and
  confidence >= SharedCharacteristics::highConfidence() and
  characteristic.hasImplications(any(NegativeSinkType negative), true, confidence) and
-  // Exclude endpoints that have contradictory endpoint characteristics, because we only want examples we're highly
-  // certain about in the prompt.
-  not erroneousEndpoints(endpoint, _, _, _, _, false) and
  meta.hasMetadata(endpoint, package, type, subtypes, name, signature, input, output, parameterName) and
  // It's valid for a node to satisfy the logic for both `isSink` and `isSanitizer`, but in that case it will be
  // treated by the actual query as a sanitizer, since the final logic is something like
--- a/java/ql/automodel/src/AutomodelFrameworkModeExtractPositiveExamples.ql
+++ b/java/ql/automodel/src/AutomodelFrameworkModeExtractPositiveExamples.ql
@@ -19,9 +19,6 @@ from
  DollarAtString extensibleType
 where
  endpoint.getExtensibleType() = extensibleType and
-  // Exclude endpoints that have contradictory endpoint characteristics, because we only want examples we're highly
-  // certain about in the prompt.
-  not erroneousEndpoints(endpoint, _, _, _, _, false) and
  meta.hasMetadata(endpoint, package, type, subtypes, name, signature, input, output, parameterName) and
  // Extract positive examples of sinks belonging to the existing ATM query configurations.
  CharacteristicsImpl::isKnownAs(endpoint, sinkType, _)