Java: update extraction query metadata

This commit is contained in:
Stephan Brandauer
2023-05-23 12:17:17 +00:00
parent 7c3bc26c41
commit 6e21f14c09
6 changed files with 89 additions and 12 deletions

View File

@@ -4,12 +4,12 @@
*
* Note: This query does not actually classify the endpoints using the model.
*
* @name Automodel candidates
* @description A query to extract automodel candidates.
* @name Automodel candidates (application mode)
* @description A query to extract automodel candidates in application mode.
* @kind problem
* @severity info
* @id java/ml/extract-automodel-application-candidates
* @tags internal automodel extract candidates application-mode
* @tags internal extract automodel application-mode candidates
*/
private import AutomodelApplicationModeCharacteristics

View File

@@ -0,0 +1,45 @@
/**
* Surfaces endpoints that are non-sinks with high confidence, for use as negative examples in the prompt.
*
* @name Negative examples (application mode)
* @kind problem
* @severity info
* @id java/ml/extract-automodel-application-negative-examples
* @tags internal extract automodel application-mode negative examples
*/
private import AutomodelApplicationModeCharacteristics
private import AutomodelEndpointTypes
private import AutomodelSharedUtil
from
Endpoint endpoint, EndpointCharacteristic characteristic, float confidence, string message,
MetadataExtractor meta, string package, string type, boolean subtypes, string name,
string signature, string input
where
characteristic.appliesToEndpoint(endpoint) and
confidence >= SharedCharacteristics::highConfidence() and
characteristic.hasImplications(any(NegativeSinkType negative), true, confidence) and
// Exclude endpoints that have contradictory endpoint characteristics, because we only want examples we're highly
// certain about in the prompt.
not erroneousEndpoints(endpoint, _, _, _, _, false) and
meta.hasMetadata(endpoint, package, type, subtypes, name, signature, input) and
// It's valid for a node to satisfy the logic for both `isSink` and `isSanitizer`, but in that case it will be
// treated by the actual query as a sanitizer, since the final logic is something like
// `isSink(n) and not isSanitizer(n)`. We don't want to include such nodes as negative examples in the prompt, because
// they're ambiguous and might confuse the model, so we explicitly exclude all known sinks from the negative examples.
not exists(EndpointCharacteristic characteristic2, float confidence2, SinkType positiveType |
not positiveType instanceof NegativeSinkType and
characteristic2.appliesToEndpoint(endpoint) and
confidence2 >= SharedCharacteristics::maximalConfidence() and
characteristic2.hasImplications(positiveType, true, confidence2)
) and
message = characteristic
select endpoint, message + "\nrelated locations: $@." + "\nmetadata: $@, $@, $@, $@, $@, $@.", //
CharacteristicsImpl::getRelatedLocationOrCandidate(endpoint, CallContext()), "CallContext", //
package.(DollarAtString), "package", //
type.(DollarAtString), "type", //
subtypes.toString().(DollarAtString), "subtypes", //
name.(DollarAtString), "name", //
signature.(DollarAtString), "signature", //
input.(DollarAtString), "input" //

View File

@@ -0,0 +1,32 @@
/**
* Surfaces endpoints that are sinks with high confidence, for use as positive examples in the prompt.
*
* @name Positive examples (application mode)
* @kind problem
* @severity info
* @id java/ml/extract-automodel-application-positive-examples
* @tags internal extract automodel application-mode positive examples
*/
private import AutomodelApplicationModeCharacteristics
private import AutomodelEndpointTypes
private import AutomodelSharedUtil
from
Endpoint endpoint, SinkType sinkType, MetadataExtractor meta, string package, string type,
boolean subtypes, string name, string signature, string input
where
// Exclude endpoints that have contradictory endpoint characteristics, because we only want examples we're highly
// certain about in the prompt.
not erroneousEndpoints(endpoint, _, _, _, _, false) and
meta.hasMetadata(endpoint, package, type, subtypes, name, signature, input) and
// Extract positive examples of sinks belonging to the existing ATM query configurations.
CharacteristicsImpl::isKnownSink(endpoint, sinkType)
select endpoint, sinkType + "\nrelated locations: $@." + "\nmetadata: $@, $@, $@, $@, $@, $@.", //
CharacteristicsImpl::getRelatedLocationOrCandidate(endpoint, CallContext()), "CallContext", //
package.(DollarAtString), "package", //
type.(DollarAtString), "type", //
subtypes.toString().(DollarAtString), "subtypes", //
name.(DollarAtString), "name", //
signature.(DollarAtString), "signature", //
input.(DollarAtString), "input" //

View File

@@ -4,12 +4,12 @@
*
* Note: This query does not actually classify the endpoints using the model.
*
* @name Automodel candidates
* @description A query to extract automodel candidates.
* @name Automodel candidates (framework mode)
* @description A query to extract automodel candidates in framework mode.
* @kind problem
* @severity info
* @id java/ml/extract-automodel-framework-candidates
* @tags internal automodel extract candidates framework-mode
* @tags internal extract automodel framework-mode candidates
*/
private import AutomodelFrameworkModeCharacteristics

View File

@@ -1,11 +1,11 @@
/**
* Surfaces endpoints that are non-sinks with high confidence, for use as negative examples in the prompt.
*
* @name Negative examples (experimental)
* @name Negative examples (framework mode)
* @kind problem
* @severity info
* @id java/ml/non-sink
* @tags internal automodel extract examples negative framework-mode
* @id java/ml/extract-automodel-framework-negative-examples
* @tags internal extract automodel framework-mode negative examples
*/
private import AutomodelFrameworkModeCharacteristics

View File

@@ -1,11 +1,11 @@
/**
* Surfaces endpoints that are sinks with high confidence, for use as positive examples in the prompt.
*
* @name Positive examples (experimental)
* @name Positive examples (framework mode)
* @kind problem
* @severity info
* @id java/ml/known-sink
* @tags internal automodel extract examples positive framework-mode
* @id java/ml/extract-automodel-framework-positive-examples
* @tags internal extract automodel framework-mode positive examples
*/
private import AutomodelFrameworkModeCharacteristics