diff --git a/java/ql/experimental/adaptivethreatmodeling/lib/experimental/adaptivethreatmodeling/EndpointCharacteristics.qll b/java/ql/experimental/adaptivethreatmodeling/lib/experimental/adaptivethreatmodeling/EndpointCharacteristics.qll index 82b549b5968..b7732ca8f38 100644 --- a/java/ql/experimental/adaptivethreatmodeling/lib/experimental/adaptivethreatmodeling/EndpointCharacteristics.qll +++ b/java/ql/experimental/adaptivethreatmodeling/lib/experimental/adaptivethreatmodeling/EndpointCharacteristics.qll @@ -23,7 +23,7 @@ private import semmle.code.java.Expr as Expr * * Copied from javascript/ql/experimental/adaptivethreatmodeling/test/endpoint_large_scale/ContradictoryEndpointCharacteristics.ql */ -query predicate erroneousEndpoints( +predicate erroneousEndpoints( DataFlow::Node endpoint, EndpointCharacteristic characteristic, EndpointType endpointClass, float confidence, string errorMessage ) { @@ -60,7 +60,7 @@ query predicate erroneousEndpoints( errorMessage = "Endpoint has high-confidence positive and negative indicators for the same class" } -query predicate erroneousConfidences( +predicate erroneousConfidences( EndpointCharacteristic characteristic, float confidence, string errorMessage ) { characteristic.hasImplications(_, _, confidence) and @@ -68,6 +68,8 @@ query predicate erroneousConfidences( errorMessage = "Characteristic has an indicator with confidence outside of [0, 1]" } +predicate isTypeAccess(DataFlow::Node n) { n.asExpr() instanceof TypeAccess } + /** * A set of characteristics that a particular endpoint might have. This set of characteristics is used to make decisions * about whether to include the endpoint in the training set and with what label, as well as whether to score the @@ -359,6 +361,15 @@ abstract class LikelyNotASinkCharacteristic extends EndpointCharacteristic { } } +/** + * An EndpointFilterCharacteristic that indicates that an endpoint is a type access. Type accesses are not sinks. + */ +private class IsTypeAccessCharacteristic extends NotASinkCharacteristic { + IsTypeAccessCharacteristic() { this = "is type access" } + + override predicate appliesToEndpoint(DataFlow::Node n) { isTypeAccess(n) } +} + /** * An EndpointFilterCharacteristic that indicates that an endpoint is a sanitizer for some sink type. A sanitizer can * never be a sink. diff --git a/java/ql/experimental/adaptivethreatmodeling/src/ExtractNegativeExamples.ql b/java/ql/experimental/adaptivethreatmodeling/src/ExtractNegativeExamples.ql index e27a565d173..84111f67164 100644 --- a/java/ql/experimental/adaptivethreatmodeling/src/ExtractNegativeExamples.ql +++ b/java/ql/experimental/adaptivethreatmodeling/src/ExtractNegativeExamples.ql @@ -35,6 +35,8 @@ where // Exclude endpoints that have contradictory endpoint characteristics, because we only want examples we're highly // certain about in the prompt. not EndpointCharacteristics::erroneousEndpoints(endpoint, _, _, _, _) and + // Exclude type access nodes because they will never be on a flow path so they're not useful negative examples. + not EndpointCharacteristics::isTypeAccess(endpoint) and // It's valid for a node to satisfy the logic for both `isSink` and `isSanitizer`, but in that case it will be // treated by the actual query as a sanitizer, since the final logic is something like // `isSink(n) and not isSanitizer(n)`. We don't want to include such nodes as negative examples in the prompt, because