diff --git a/java/ql/src/Telemetry/AutomodelApplicationModeExtractCandidates.ql b/java/ql/src/Telemetry/AutomodelApplicationModeExtractCandidates.ql index 43b2c2d626c..1f4d12abbbc 100644 --- a/java/ql/src/Telemetry/AutomodelApplicationModeExtractCandidates.ql +++ b/java/ql/src/Telemetry/AutomodelApplicationModeExtractCandidates.ql @@ -4,12 +4,12 @@ * * Note: This query does not actually classify the endpoints using the model. * - * @name Automodel candidates - * @description A query to extract automodel candidates. + * @name Automodel candidates (application mode) + * @description A query to extract automodel candidates in application mode. * @kind problem * @severity info * @id java/ml/extract-automodel-application-candidates - * @tags internal automodel extract candidates application-mode + * @tags internal extract automodel application-mode candidates */ private import AutomodelApplicationModeCharacteristics diff --git a/java/ql/src/Telemetry/AutomodelApplicationModeExtractNegativeExamples.ql b/java/ql/src/Telemetry/AutomodelApplicationModeExtractNegativeExamples.ql new file mode 100644 index 00000000000..3b3f1cd9c6f --- /dev/null +++ b/java/ql/src/Telemetry/AutomodelApplicationModeExtractNegativeExamples.ql @@ -0,0 +1,45 @@ +/** + * Surfaces endpoints that are non-sinks with high confidence, for use as negative examples in the prompt. + * + * @name Negative examples (application mode) + * @kind problem + * @severity info + * @id java/ml/extract-automodel-application-negative-examples + * @tags internal extract automodel application-mode negative examples + */ + +private import AutomodelApplicationModeCharacteristics +private import AutomodelEndpointTypes +private import AutomodelSharedUtil + +from + Endpoint endpoint, EndpointCharacteristic characteristic, float confidence, string message, + MetadataExtractor meta, string package, string type, boolean subtypes, string name, + string signature, string input +where + characteristic.appliesToEndpoint(endpoint) and + confidence >= SharedCharacteristics::highConfidence() and + characteristic.hasImplications(any(NegativeSinkType negative), true, confidence) and + // Exclude endpoints that have contradictory endpoint characteristics, because we only want examples we're highly + // certain about in the prompt. + not erroneousEndpoints(endpoint, _, _, _, _, false) and + meta.hasMetadata(endpoint, package, type, subtypes, name, signature, input) and + // It's valid for a node to satisfy the logic for both `isSink` and `isSanitizer`, but in that case it will be + // treated by the actual query as a sanitizer, since the final logic is something like + // `isSink(n) and not isSanitizer(n)`. We don't want to include such nodes as negative examples in the prompt, because + // they're ambiguous and might confuse the model, so we explicitly exclude all known sinks from the negative examples. + not exists(EndpointCharacteristic characteristic2, float confidence2, SinkType positiveType | + not positiveType instanceof NegativeSinkType and + characteristic2.appliesToEndpoint(endpoint) and + confidence2 >= SharedCharacteristics::maximalConfidence() and + characteristic2.hasImplications(positiveType, true, confidence2) + ) and + message = characteristic +select endpoint, message + "\nrelated locations: $@." + "\nmetadata: $@, $@, $@, $@, $@, $@.", // + CharacteristicsImpl::getRelatedLocationOrCandidate(endpoint, CallContext()), "CallContext", // + package.(DollarAtString), "package", // + type.(DollarAtString), "type", // + subtypes.toString().(DollarAtString), "subtypes", // + name.(DollarAtString), "name", // + signature.(DollarAtString), "signature", // + input.(DollarAtString), "input" // diff --git a/java/ql/src/Telemetry/AutomodelApplicationModeExtractPositiveExamples.ql b/java/ql/src/Telemetry/AutomodelApplicationModeExtractPositiveExamples.ql new file mode 100644 index 00000000000..37f3bb5cd69 --- /dev/null +++ b/java/ql/src/Telemetry/AutomodelApplicationModeExtractPositiveExamples.ql @@ -0,0 +1,32 @@ +/** + * Surfaces endpoints that are sinks with high confidence, for use as positive examples in the prompt. + * + * @name Positive examples (application mode) + * @kind problem + * @severity info + * @id java/ml/extract-automodel-application-positive-examples + * @tags internal extract automodel application-mode positive examples + */ + +private import AutomodelApplicationModeCharacteristics +private import AutomodelEndpointTypes +private import AutomodelSharedUtil + +from + Endpoint endpoint, SinkType sinkType, MetadataExtractor meta, string package, string type, + boolean subtypes, string name, string signature, string input +where + // Exclude endpoints that have contradictory endpoint characteristics, because we only want examples we're highly + // certain about in the prompt. + not erroneousEndpoints(endpoint, _, _, _, _, false) and + meta.hasMetadata(endpoint, package, type, subtypes, name, signature, input) and + // Extract positive examples of sinks belonging to the existing ATM query configurations. + CharacteristicsImpl::isKnownSink(endpoint, sinkType) +select endpoint, sinkType + "\nrelated locations: $@." + "\nmetadata: $@, $@, $@, $@, $@, $@.", // + CharacteristicsImpl::getRelatedLocationOrCandidate(endpoint, CallContext()), "CallContext", // + package.(DollarAtString), "package", // + type.(DollarAtString), "type", // + subtypes.toString().(DollarAtString), "subtypes", // + name.(DollarAtString), "name", // + signature.(DollarAtString), "signature", // + input.(DollarAtString), "input" // diff --git a/java/ql/src/Telemetry/AutomodelFrameworkModeExtractCandidates.ql b/java/ql/src/Telemetry/AutomodelFrameworkModeExtractCandidates.ql index 488ba532920..0f53399d2e3 100644 --- a/java/ql/src/Telemetry/AutomodelFrameworkModeExtractCandidates.ql +++ b/java/ql/src/Telemetry/AutomodelFrameworkModeExtractCandidates.ql @@ -4,12 +4,12 @@ * * Note: This query does not actually classify the endpoints using the model. * - * @name Automodel candidates - * @description A query to extract automodel candidates. + * @name Automodel candidates (framework mode) + * @description A query to extract automodel candidates in framework mode. * @kind problem * @severity info * @id java/ml/extract-automodel-framework-candidates - * @tags internal automodel extract candidates framework-mode + * @tags internal extract automodel framework-mode candidates */ private import AutomodelFrameworkModeCharacteristics diff --git a/java/ql/src/Telemetry/AutomodelFrameworkModeExtractNegativeExamples.ql b/java/ql/src/Telemetry/AutomodelFrameworkModeExtractNegativeExamples.ql index 9c1076ae05d..9ecc1636c60 100644 --- a/java/ql/src/Telemetry/AutomodelFrameworkModeExtractNegativeExamples.ql +++ b/java/ql/src/Telemetry/AutomodelFrameworkModeExtractNegativeExamples.ql @@ -1,11 +1,11 @@ /** * Surfaces endpoints that are non-sinks with high confidence, for use as negative examples in the prompt. * - * @name Negative examples (experimental) + * @name Negative examples (framework mode) * @kind problem * @severity info - * @id java/ml/non-sink - * @tags internal automodel extract examples negative framework-mode + * @id java/ml/extract-automodel-framework-negative-examples + * @tags internal extract automodel framework-mode negative examples */ private import AutomodelFrameworkModeCharacteristics diff --git a/java/ql/src/Telemetry/AutomodelFrameworkModeExtractPositiveExamples.ql b/java/ql/src/Telemetry/AutomodelFrameworkModeExtractPositiveExamples.ql index d6c4926bbac..f03bb995a19 100644 --- a/java/ql/src/Telemetry/AutomodelFrameworkModeExtractPositiveExamples.ql +++ b/java/ql/src/Telemetry/AutomodelFrameworkModeExtractPositiveExamples.ql @@ -1,11 +1,11 @@ /** * Surfaces endpoints that are sinks with high confidence, for use as positive examples in the prompt. * - * @name Positive examples (experimental) + * @name Positive examples (framework mode) * @kind problem * @severity info - * @id java/ml/known-sink - * @tags internal automodel extract examples positive framework-mode + * @id java/ml/extract-automodel-framework-positive-examples + * @tags internal extract automodel framework-mode positive examples */ private import AutomodelFrameworkModeCharacteristics