Exclude negative examples that are type access nodes.

These will never be on a flow path so they're not useful negative examples.
2026-05-21 14:47:10 +02:00 · 2023-01-13 16:26:45 -08:00
parent 3229b37436
commit e06bcc3112
2 changed files with 15 additions and 2 deletions
--- a/java/ql/experimental/adaptivethreatmodeling/lib/experimental/adaptivethreatmodeling/EndpointCharacteristics.qll
+++ b/java/ql/experimental/adaptivethreatmodeling/lib/experimental/adaptivethreatmodeling/EndpointCharacteristics.qll
@@ -23,7 +23,7 @@ private import semmle.code.java.Expr as Expr
 *
 * Copied from javascript/ql/experimental/adaptivethreatmodeling/test/endpoint_large_scale/ContradictoryEndpointCharacteristics.ql
 */
-query predicate erroneousEndpoints(
+predicate erroneousEndpoints(
  DataFlow::Node endpoint, EndpointCharacteristic characteristic, EndpointType endpointClass,
  float confidence, string errorMessage
 ) {
@@ -60,7 +60,7 @@ query predicate erroneousEndpoints(
  errorMessage = "Endpoint has high-confidence positive and negative indicators for the same class"
 }

-query predicate erroneousConfidences(
+predicate erroneousConfidences(
  EndpointCharacteristic characteristic, float confidence, string errorMessage
 ) {
  characteristic.hasImplications(_, _, confidence) and
@@ -68,6 +68,8 @@ query predicate erroneousConfidences(
  errorMessage = "Characteristic has an indicator with confidence outside of [0, 1]"
 }

+predicate isTypeAccess(DataFlow::Node n) { n.asExpr() instanceof TypeAccess }
+
 /**
 * A set of characteristics that a particular endpoint might have. This set of characteristics is used to make decisions
 * about whether to include the endpoint in the training set and with what label, as well as whether to score the
@@ -359,6 +361,15 @@ abstract class LikelyNotASinkCharacteristic extends EndpointCharacteristic {
  }
 }

+/**
+ * An EndpointFilterCharacteristic that indicates that an endpoint is a type access. Type accesses are not sinks.
+ */
+private class IsTypeAccessCharacteristic extends NotASinkCharacteristic {
+  IsTypeAccessCharacteristic() { this = "is type access" }
+
+  override predicate appliesToEndpoint(DataFlow::Node n) { isTypeAccess(n) }
+}
+
 /**
 * An EndpointFilterCharacteristic that indicates that an endpoint is a sanitizer for some sink type. A sanitizer can
 * never be a sink.
--- a/java/ql/experimental/adaptivethreatmodeling/src/ExtractNegativeExamples.ql
+++ b/java/ql/experimental/adaptivethreatmodeling/src/ExtractNegativeExamples.ql
@@ -35,6 +35,8 @@ where
  // Exclude endpoints that have contradictory endpoint characteristics, because we only want examples we're highly
  // certain about in the prompt.
  not EndpointCharacteristics::erroneousEndpoints(endpoint, _, _, _, _) and
+  // Exclude type access nodes because they will never be on a flow path so they're not useful negative examples.
+  not EndpointCharacteristics::isTypeAccess(endpoint) and
  // It's valid for a node to satisfy the logic for both `isSink` and `isSanitizer`, but in that case it will be
  // treated by the actual query as a sanitizer, since the final logic is something like
  // `isSink(n) and not isSanitizer(n)`. We don't want to include such nodes as negative examples in the prompt, because