Java: update extraction query metadata

2025-12-24 04:36:35 +01:00 · 2023-05-23 12:17:17 +00:00
parent 7c3bc26c41
commit 6e21f14c09
6 changed files with 89 additions and 12 deletions
--- a/java/ql/src/Telemetry/AutomodelApplicationModeExtractCandidates.ql
+++ b/java/ql/src/Telemetry/AutomodelApplicationModeExtractCandidates.ql
@@ -4,12 +4,12 @@
 *
 * Note: This query does not actually classify the endpoints using the model.
 *
- * @name Automodel candidates
- * @description A query to extract automodel candidates.
+ * @name Automodel candidates (application mode)
+ * @description A query to extract automodel candidates in application mode.
 * @kind problem
 * @severity info
 * @id java/ml/extract-automodel-application-candidates
- * @tags internal automodel extract candidates application-mode
+ * @tags internal extract automodel application-mode candidates
 */

 private import AutomodelApplicationModeCharacteristics
--- a/java/ql/src/Telemetry/AutomodelApplicationModeExtractNegativeExamples.ql
+++ b/java/ql/src/Telemetry/AutomodelApplicationModeExtractNegativeExamples.ql
@@ -0,0 +1,45 @@
+/**
+ * Surfaces endpoints that are non-sinks with high confidence, for use as negative examples in the prompt.
+ *
+ * @name Negative examples (application mode)
+ * @kind problem
+ * @severity info
+ * @id java/ml/extract-automodel-application-negative-examples
+ * @tags internal extract automodel application-mode negative examples
+ */
+
+private import AutomodelApplicationModeCharacteristics
+private import AutomodelEndpointTypes
+private import AutomodelSharedUtil
+
+from
+  Endpoint endpoint, EndpointCharacteristic characteristic, float confidence, string message,
+  MetadataExtractor meta, string package, string type, boolean subtypes, string name,
+  string signature, string input
+where
+  characteristic.appliesToEndpoint(endpoint) and
+  confidence >= SharedCharacteristics::highConfidence() and
+  characteristic.hasImplications(any(NegativeSinkType negative), true, confidence) and
+  // Exclude endpoints that have contradictory endpoint characteristics, because we only want examples we're highly
+  // certain about in the prompt.
+  not erroneousEndpoints(endpoint, _, _, _, _, false) and
+  meta.hasMetadata(endpoint, package, type, subtypes, name, signature, input) and
+  // It's valid for a node to satisfy the logic for both `isSink` and `isSanitizer`, but in that case it will be
+  // treated by the actual query as a sanitizer, since the final logic is something like
+  // `isSink(n) and not isSanitizer(n)`. We don't want to include such nodes as negative examples in the prompt, because
+  // they're ambiguous and might confuse the model, so we explicitly exclude all known sinks from the negative examples.
+  not exists(EndpointCharacteristic characteristic2, float confidence2, SinkType positiveType |
+    not positiveType instanceof NegativeSinkType and
+    characteristic2.appliesToEndpoint(endpoint) and
+    confidence2 >= SharedCharacteristics::maximalConfidence() and
+    characteristic2.hasImplications(positiveType, true, confidence2)
+  ) and
+  message = characteristic
+select endpoint, message + "\nrelated locations: $@." + "\nmetadata: $@, $@, $@, $@, $@, $@.", //
+  CharacteristicsImpl::getRelatedLocationOrCandidate(endpoint, CallContext()), "CallContext", //
+  package.(DollarAtString), "package", //
+  type.(DollarAtString), "type", //
+  subtypes.toString().(DollarAtString), "subtypes", //
+  name.(DollarAtString), "name", //
+  signature.(DollarAtString), "signature", //
+  input.(DollarAtString), "input" //
--- a/java/ql/src/Telemetry/AutomodelApplicationModeExtractPositiveExamples.ql
+++ b/java/ql/src/Telemetry/AutomodelApplicationModeExtractPositiveExamples.ql
@@ -0,0 +1,32 @@
+/**
+ * Surfaces endpoints that are sinks with high confidence, for use as positive examples in the prompt.
+ *
+ * @name Positive examples (application mode)
+ * @kind problem
+ * @severity info
+ * @id java/ml/extract-automodel-application-positive-examples
+ * @tags internal extract automodel application-mode positive examples
+ */
+
+private import AutomodelApplicationModeCharacteristics
+private import AutomodelEndpointTypes
+private import AutomodelSharedUtil
+
+from
+  Endpoint endpoint, SinkType sinkType, MetadataExtractor meta, string package, string type,
+  boolean subtypes, string name, string signature, string input
+where
+  // Exclude endpoints that have contradictory endpoint characteristics, because we only want examples we're highly
+  // certain about in the prompt.
+  not erroneousEndpoints(endpoint, _, _, _, _, false) and
+  meta.hasMetadata(endpoint, package, type, subtypes, name, signature, input) and
+  // Extract positive examples of sinks belonging to the existing ATM query configurations.
+  CharacteristicsImpl::isKnownSink(endpoint, sinkType)
+select endpoint, sinkType + "\nrelated locations: $@." + "\nmetadata: $@, $@, $@, $@, $@, $@.", //
+  CharacteristicsImpl::getRelatedLocationOrCandidate(endpoint, CallContext()), "CallContext", //
+  package.(DollarAtString), "package", //
+  type.(DollarAtString), "type", //
+  subtypes.toString().(DollarAtString), "subtypes", //
+  name.(DollarAtString), "name", //
+  signature.(DollarAtString), "signature", //
+  input.(DollarAtString), "input" //
--- a/java/ql/src/Telemetry/AutomodelFrameworkModeExtractCandidates.ql
+++ b/java/ql/src/Telemetry/AutomodelFrameworkModeExtractCandidates.ql
@@ -4,12 +4,12 @@
 *
 * Note: This query does not actually classify the endpoints using the model.
 *
- * @name Automodel candidates
- * @description A query to extract automodel candidates.
+ * @name Automodel candidates (framework mode)
+ * @description A query to extract automodel candidates in framework mode.
 * @kind problem
 * @severity info
 * @id java/ml/extract-automodel-framework-candidates
- * @tags internal automodel extract candidates framework-mode
+ * @tags internal extract automodel framework-mode candidates
 */

 private import AutomodelFrameworkModeCharacteristics
--- a/java/ql/src/Telemetry/AutomodelFrameworkModeExtractNegativeExamples.ql
+++ b/java/ql/src/Telemetry/AutomodelFrameworkModeExtractNegativeExamples.ql
@@ -1,11 +1,11 @@
 /**
 * Surfaces endpoints that are non-sinks with high confidence, for use as negative examples in the prompt.
 *
- * @name Negative examples (experimental)
+ * @name Negative examples (framework mode)
 * @kind problem
 * @severity info
- * @id java/ml/non-sink
- * @tags internal automodel extract examples negative framework-mode
+ * @id java/ml/extract-automodel-framework-negative-examples
+ * @tags internal extract automodel framework-mode negative examples
 */

 private import AutomodelFrameworkModeCharacteristics
--- a/java/ql/src/Telemetry/AutomodelFrameworkModeExtractPositiveExamples.ql
+++ b/java/ql/src/Telemetry/AutomodelFrameworkModeExtractPositiveExamples.ql
@@ -1,11 +1,11 @@
 /**
 * Surfaces endpoints that are sinks with high confidence, for use as positive examples in the prompt.
 *
- * @name Positive examples (experimental)
+ * @name Positive examples (framework mode)
 * @kind problem
 * @severity info
- * @id java/ml/known-sink
- * @tags internal automodel extract examples positive framework-mode
+ * @id java/ml/extract-automodel-framework-positive-examples
+ * @tags internal extract automodel framework-mode positive examples
 */

 private import AutomodelFrameworkModeCharacteristics