Refactor to avoid bad join order.

2026-04-22 23:35:14 +02:00 · 2024-01-12 14:57:05 +00:00
parent 45ca301593
commit bb63fcde43
9 changed files with 74 additions and 62 deletions
--- a/java/ql/automodel/src/AutomodelApplicationModeCharacteristics.qll
+++ b/java/ql/automodel/src/AutomodelApplicationModeCharacteristics.qll
@@ -356,10 +356,10 @@ class ApplicationModeMetadataExtractor extends string {

  predicate hasMetadata(
    Endpoint e, string package, string type, string subtypes, string name, string signature,
-    string input, string output, string isVarargsArray
+    string input, string output, string isVarargsArray, string alreadyAiModeled,
+    string extensibleType
  ) {
-    exists(Callable callable |
-      e.getCallable() = callable and
+    exists(Callable callable | e.getCallable() = callable |
      (if exists(e.getMaDInput()) then input = e.getMaDInput() else input = "") and
      (if exists(e.getMaDOutput()) then output = e.getMaDOutput() else output = "") and
      package = callable.getDeclaringType().getPackage().getName() and
@@ -369,9 +369,17 @@ class ApplicationModeMetadataExtractor extends string {
      subtypes = AutomodelJavaUtil::considerSubtypes(callable).toString() and
      name = callable.getName() and
      signature = ExternalFlow::paramsString(callable) and
-      if e instanceof ImplicitVarargsArray
-      then isVarargsArray = "true"
-      else isVarargsArray = "false"
+      (
+        if e instanceof ImplicitVarargsArray
+        then isVarargsArray = "true"
+        else isVarargsArray = "false"
+      ) and
+      extensibleType = e.getExtensibleType()
+    ) and
+    (
+      not CharacteristicsImpl::isModeled(e, _, _, _) and alreadyAiModeled = ""
+      or
+      CharacteristicsImpl::isModeled(e, _, _, alreadyAiModeled)
    )
  }
 }
@@ -416,7 +424,8 @@ private class UnexploitableIsCharacteristic extends CharacteristicsImpl::Neither
 * boolean return type. These kinds of calls normally do only checks, and appear before the proper call that does the
 * dangerous/interesting thing, so we want the latter to be modeled as the sink.
 */
-private class UnexploitableExistsCharacteristic extends CharacteristicsImpl::NeitherSourceNorSinkCharacteristic {
+private class UnexploitableExistsCharacteristic extends CharacteristicsImpl::NeitherSourceNorSinkCharacteristic
+{
  UnexploitableExistsCharacteristic() { this = "unexploitable (existence-checking boolean method)" }

  override predicate appliesToEndpoint(Endpoint e) {
@@ -439,7 +448,8 @@ private class UnexploitableExistsCharacteristic extends CharacteristicsImpl::Nei
 * A negative characteristic that indicates that parameters of an exception method or constructor should not be considered sinks,
 * and its return value should not be considered a source.
 */
-private class ExceptionCharacteristic extends CharacteristicsImpl::NeitherSourceNorSinkCharacteristic {
+private class ExceptionCharacteristic extends CharacteristicsImpl::NeitherSourceNorSinkCharacteristic
+{
  ExceptionCharacteristic() { this = "exception" }

  override predicate appliesToEndpoint(Endpoint e) {
--- a/java/ql/automodel/src/AutomodelApplicationModeExtractCandidates.ql
+++ b/java/ql/automodel/src/AutomodelApplicationModeExtractCandidates.ql
@@ -25,20 +25,20 @@ private import AutomodelJavaUtil
 bindingset[limit]
 private Endpoint getSampleForSignature(
  int limit, string package, string type, string subtypes, string name, string signature,
-  string input, string output, string isVarargs, string extensibleType
+  string input, string output, string isVarargs, string extensibleType, string alreadyAiModeled
 ) {
  exists(int n, int num_endpoints, ApplicationModeMetadataExtractor meta |
    num_endpoints =
      count(Endpoint e |
-        e.getExtensibleType() = extensibleType and
-        meta.hasMetadata(e, package, type, subtypes, name, signature, input, output, isVarargs)
+        meta.hasMetadata(e, package, type, subtypes, name, signature, input, output, isVarargs,
+          alreadyAiModeled, extensibleType)
      )
  |
    result =
      rank[n](Endpoint e, Location loc |
        loc = e.asTop().getLocation() and
-        e.getExtensibleType() = extensibleType and
-        meta.hasMetadata(e, package, type, subtypes, name, signature, input, output, isVarargs)
+        meta.hasMetadata(e, package, type, subtypes, name, signature, input, output, isVarargs,
+          alreadyAiModeled, extensibleType)
      |
        e
        order by
@@ -66,19 +66,15 @@ where
  CharacteristicsImpl::isCandidate(endpoint, _) and
  endpoint =
    getSampleForSignature(9, package, type, subtypes, name, signature, input, output,
-      isVarargsArray, extensibleType) and
+      isVarargsArray, extensibleType, alreadyAiModeled) and
+  meta.hasMetadata(endpoint, package, type, subtypes, name, signature, input, output,
+    isVarargsArray, alreadyAiModeled, extensibleType) and
  // If a node is already modeled in MaD, we don't include it as a candidate. Otherwise, we might include it as a
  // candidate for query A, but the model will label it as a sink for one of the sink types of query B, for which it's
  // already a known sink. This would result in overlap between our detected sinks and the pre-existing modeling. We
  // assume that, if a sink has already been modeled in a MaD model, then it doesn't belong to any additional sink
  // types, and we don't need to reexamine it.
-  (
-    not CharacteristicsImpl::isModeled(endpoint, _, _, _) and alreadyAiModeled = ""
-    or
-    alreadyAiModeled.matches("%ai-%") and
-    CharacteristicsImpl::isModeled(endpoint, _, _, alreadyAiModeled)
-  ) and
-  meta.hasMetadata(endpoint, package, type, subtypes, name, signature, input, output, isVarargsArray) and
+  alreadyAiModeled.matches(["", "%ai-%"]) and
  includeAutomodelCandidate(package, type, name, signature)
 select endpoint.asNode(),
  "Related locations: $@, $@, $@." + "\nmetadata: $@, $@, $@, $@, $@, $@, $@, $@, $@, $@.", //
--- a/java/ql/automodel/src/AutomodelApplicationModeExtractNegativeExamples.ql
+++ b/java/ql/automodel/src/AutomodelApplicationModeExtractNegativeExamples.ql
@@ -47,7 +47,6 @@ from
  DollarAtString output, DollarAtString isVarargsArray, DollarAtString extensibleType
 where
  endpoint = getSampleForCharacteristic(characteristic, 100) and
-  extensibleType = endpoint.getExtensibleType() and
  // the node is know not to be an endpoint of any appropriate type
  forall(EndpointType tp | tp = endpoint.getAPotentialType() |
    characteristic.hasImplications(tp, false, _)
@@ -55,7 +54,8 @@ where
  // the lowest confidence across all endpoint types should be at least highConfidence
  confidence = min(float c | characteristic.hasImplications(endpoint.getAPotentialType(), false, c)) and
  confidence >= SharedCharacteristics::highConfidence() and
-  meta.hasMetadata(endpoint, package, type, subtypes, name, signature, input, output, isVarargsArray) and
+  meta.hasMetadata(endpoint, package, type, subtypes, name, signature, input, output,
+    isVarargsArray, _, extensibleType) and
  // It's valid for a node to be both a potential source/sanitizer and a sink. We don't want to include such nodes
  // as negative examples in the prompt, because they're ambiguous and might confuse the model, so we explicitly them here.
  not exists(EndpointCharacteristic characteristic2, float confidence2, EndpointType type2 |
--- a/java/ql/automodel/src/AutomodelApplicationModeExtractPositiveExamples.ql
+++ b/java/ql/automodel/src/AutomodelApplicationModeExtractPositiveExamples.ql
@@ -18,9 +18,8 @@ from
  DollarAtString signature, DollarAtString input, DollarAtString output,
  DollarAtString isVarargsArray, DollarAtString extensibleType
 where
-  extensibleType = endpoint.getExtensibleType() and
-  meta.hasMetadata(endpoint, package, type, subtypes, name, signature, input, output, isVarargsArray) and
-  // Extract positive examples of sinks belonging to the existing ATM query configurations.
+  meta.hasMetadata(endpoint, package, type, subtypes, name, signature, input, output,
+    isVarargsArray, _, extensibleType) and
  CharacteristicsImpl::isKnownAs(endpoint, endpointType, _) and
  exists(CharacteristicsImpl::getRelatedLocationOrCandidate(endpoint, CallContext()))
 select endpoint.asNode(),
--- a/java/ql/automodel/src/AutomodelFrameworkModeCharacteristics.qll
+++ b/java/ql/automodel/src/AutomodelFrameworkModeCharacteristics.qll
@@ -305,16 +305,27 @@ class FrameworkModeMetadataExtractor extends string {

  predicate hasMetadata(
    Endpoint e, string package, string type, string subtypes, string name, string signature,
-    string input, string output, string parameterName
+    string input, string output, string parameterName, string alreadyAiModeled,
+    string extensibleType
  ) {
-    (if exists(e.getParamName()) then parameterName = e.getParamName() else parameterName = "") and
-    name = e.getCallable().getName() and
-    (if exists(e.getMaDInput()) then input = e.getMaDInput() else input = "") and
-    (if exists(e.getMaDOutput()) then output = e.getMaDOutput() else output = "") and
-    package = e.getCallable().getDeclaringType().getPackage().getName() and
-    type = e.getCallable().getDeclaringType().getErasure().(RefType).nestedName() and
-    subtypes = AutomodelJavaUtil::considerSubtypes(e.getCallable()).toString() and
-    signature = ExternalFlow::paramsString(e.getCallable())
+    exists(Callable callable | e.getCallable() = callable |
+      (if exists(e.getMaDInput()) then input = e.getMaDInput() else input = "") and
+      (if exists(e.getMaDOutput()) then output = e.getMaDOutput() else output = "") and
+      package = callable.getDeclaringType().getPackage().getName() and
+      // we're using the erased types because the MaD convention is to not specify type parameters.
+      // Whether something is or isn't a sink doesn't usually depend on the type parameters.
+      type = callable.getDeclaringType().getErasure().(RefType).nestedName() and
+      subtypes = AutomodelJavaUtil::considerSubtypes(callable).toString() and
+      name = callable.getName() and
+      signature = ExternalFlow::paramsString(callable) and
+      (if exists(e.getParamName()) then parameterName = e.getParamName() else parameterName = "") and
+      e.getExtensibleType() = extensibleType
+    ) and
+    (
+      not CharacteristicsImpl::isModeled(e, _, _, _) and alreadyAiModeled = ""
+      or
+      CharacteristicsImpl::isModeled(e, _, _, alreadyAiModeled)
+    )
  }
 }

@@ -332,7 +343,8 @@ class FrameworkModeMetadataExtractor extends string {
 *
 * TODO: this might filter too much, it's possible that methods with more than one parameter contain interesting sinks
 */
-private class UnexploitableIsCharacteristic extends CharacteristicsImpl::NeitherSourceNorSinkCharacteristic {
+private class UnexploitableIsCharacteristic extends CharacteristicsImpl::NeitherSourceNorSinkCharacteristic
+{
  UnexploitableIsCharacteristic() { this = "unexploitable (is-style boolean method)" }

  override predicate appliesToEndpoint(Endpoint e) {
@@ -357,7 +369,8 @@ private class UnexploitableIsCharacteristic extends CharacteristicsImpl::Neither
 * boolean return type. These kinds of calls normally do only checks, and appear before the proper call that does the
 * dangerous/interesting thing, so we want the latter to be modeled as the sink.
 */
-private class UnexploitableExistsCharacteristic extends CharacteristicsImpl::NeitherSourceNorSinkCharacteristic {
+private class UnexploitableExistsCharacteristic extends CharacteristicsImpl::NeitherSourceNorSinkCharacteristic
+{
  UnexploitableExistsCharacteristic() { this = "unexploitable (existence-checking boolean method)" }

  override predicate appliesToEndpoint(Endpoint e) {
@@ -380,7 +393,8 @@ private class UnexploitableExistsCharacteristic extends CharacteristicsImpl::Nei
 * A negative characteristic that indicates that parameters of an exception method or constructor should not be considered sinks,
 * and its return value should not be considered a source.
 */
-private class ExceptionCharacteristic extends CharacteristicsImpl::NeitherSourceNorSinkCharacteristic {
+private class ExceptionCharacteristic extends CharacteristicsImpl::NeitherSourceNorSinkCharacteristic
+{
  ExceptionCharacteristic() { this = "exception" }

  override predicate appliesToEndpoint(Endpoint e) {
@@ -396,7 +410,6 @@ private class ExceptionCharacteristic extends CharacteristicsImpl::NeitherSource
  }
 }

-
 /**
 * A characteristic that limits candidates to parameters of methods that are recognized as `ModelApi`, iow., APIs that
 * are considered worth modeling.
--- a/java/ql/automodel/src/AutomodelFrameworkModeExtractCandidates.ql
+++ b/java/ql/automodel/src/AutomodelFrameworkModeExtractCandidates.ql
@@ -21,23 +21,18 @@ from
  DollarAtString input, DollarAtString output, DollarAtString parameterName,
  DollarAtString alreadyAiModeled, DollarAtString extensibleType
 where
-  endpoint.getExtensibleType() = extensibleType and
  not exists(CharacteristicsImpl::UninterestingToModelCharacteristic u |
    u.appliesToEndpoint(endpoint)
  ) and
  CharacteristicsImpl::isCandidate(endpoint, _) and
-  // If a node is already a known sink for any of our existing ATM queries and is already modeled as a MaD sink, we
-  // don't include it as a candidate. Otherwise, we might include it as a candidate for query A, but the model will
-  // label it as a sink for one of the sink types of query B, for which it's already a known sink. This would result in
-  // overlap between our detected sinks and the pre-existing modeling. We assume that, if a sink has already been
-  // modeled in a MaD model, then it doesn't belong to any additional sink types, and we don't need to reexamine it.
-  (
-    not CharacteristicsImpl::isSink(endpoint, _, _) and alreadyAiModeled = ""
-    or
-    alreadyAiModeled.matches("%ai-%") and
-    CharacteristicsImpl::isSink(endpoint, _, alreadyAiModeled)
-  ) and
-  meta.hasMetadata(endpoint, package, type, subtypes, name, signature, input, output, parameterName) and
+  meta.hasMetadata(endpoint, package, type, subtypes, name, signature, input, output, parameterName,
+    alreadyAiModeled, extensibleType) and
+  // If a node is already modeled in MaD, we don't include it as a candidate. Otherwise, we might include it as a
+  // candidate for query A, but the model will label it as a sink for one of the sink types of query B, for which it's
+  // already a known sink. This would result in overlap between our detected sinks and the pre-existing modeling. We
+  // assume that, if a sink has already been modeled in a MaD model, then it doesn't belong to any additional sink
+  // types, and we don't need to reexamine it.
+  alreadyAiModeled.matches(["", "%ai-%"]) and
  includeAutomodelCandidate(package, type, name, signature)
 select endpoint,
  "Related locations: $@, $@." + "\nmetadata: $@, $@, $@, $@, $@, $@, $@, $@, $@, $@.", //
--- a/java/ql/automodel/src/AutomodelFrameworkModeExtractNegativeExamples.ql
+++ b/java/ql/automodel/src/AutomodelFrameworkModeExtractNegativeExamples.ql
@@ -19,7 +19,6 @@ from
  DollarAtString input, DollarAtString output, DollarAtString parameterName,
  DollarAtString extensibleType
 where
-  endpoint.getExtensibleType() = extensibleType and
  characteristic.appliesToEndpoint(endpoint) and
  // the node is known not to be an endpoint of any appropriate type
  forall(EndpointType tp | tp = endpoint.getAPotentialType() |
@@ -28,7 +27,8 @@ where
  // the lowest confidence across all endpoint types should be at least highConfidence
  confidence = min(float c | characteristic.hasImplications(endpoint.getAPotentialType(), false, c)) and
  confidence >= SharedCharacteristics::highConfidence() and
-  meta.hasMetadata(endpoint, package, type, subtypes, name, signature, input, output, parameterName) and
+  meta.hasMetadata(endpoint, package, type, subtypes, name, signature, input, output, parameterName,
+    _, extensibleType) and
  // It's valid for a node to be both a potential source/sanitizer and a sink. We don't want to include such nodes
  // as negative examples in the prompt, because they're ambiguous and might confuse the model, so we explicitly them here.
  not exists(EndpointCharacteristic characteristic2, float confidence2, EndpointType type2 |
--- a/java/ql/automodel/src/AutomodelFrameworkModeExtractPositiveExamples.ql
+++ b/java/ql/automodel/src/AutomodelFrameworkModeExtractPositiveExamples.ql
@@ -18,9 +18,8 @@ from
  DollarAtString signature, DollarAtString input, DollarAtString output,
  DollarAtString parameterName, DollarAtString extensibleType
 where
-  endpoint.getExtensibleType() = extensibleType and
-  meta.hasMetadata(endpoint, package, type, subtypes, name, signature, input, output, parameterName) and
-  // Extract positive examples of sinks belonging to the existing ATM query configurations.
+  meta.hasMetadata(endpoint, package, type, subtypes, name, signature, input, output, parameterName,
+    _, extensibleType) and
  CharacteristicsImpl::isKnownAs(endpoint, endpointType, _)
 select endpoint,
  endpointType + "\nrelated locations: $@, $@." + "\nmetadata: $@, $@, $@, $@, $@, $@, $@, $@, $@.", //
--- a/java/ql/automodel/src/AutomodelSharedCharacteristics.qll
+++ b/java/ql/automodel/src/AutomodelSharedCharacteristics.qll
@@ -272,7 +272,9 @@ module SharedCharacteristics<CandidateSig Candidate> {
  /**
   * A high-confidence characteristic that indicates that an endpoint is neither a source nor a sink of any type.
   */
-  abstract class NeitherSourceNorSinkCharacteristic extends NotASinkCharacteristic, NotASourceCharacteristic {
+  abstract class NeitherSourceNorSinkCharacteristic extends NotASinkCharacteristic,
+    NotASourceCharacteristic
+  {
    bindingset[this]
    NeitherSourceNorSinkCharacteristic() { any() }

@@ -280,8 +282,7 @@ module SharedCharacteristics<CandidateSig Candidate> {
      Candidate::EndpointType endpointType, boolean isPositiveIndicator, float confidence
    ) {
      NotASinkCharacteristic.super.hasImplications(endpointType, isPositiveIndicator, confidence) or
-      NotASourceCharacteristic.super
-          .hasImplications(endpointType, isPositiveIndicator, confidence)
+      NotASourceCharacteristic.super.hasImplications(endpointType, isPositiveIndicator, confidence)
    }
  }

@@ -373,8 +374,7 @@ module SharedCharacteristics<CandidateSig Candidate> {
    /**
     * A negative characteristic that indicates that an endpoint was manually modeled as a neutral model.
     */
-    private class NeutralModelCharacteristic extends NeitherSourceNorSinkCharacteristic
-    {
+    private class NeutralModelCharacteristic extends NeitherSourceNorSinkCharacteristic {
      NeutralModelCharacteristic() { this = "known non-endpoint" }

      override predicate appliesToEndpoint(Candidate::Endpoint e) { Candidate::isNeutral(e) }