mirror of
https://github.com/github/codeql.git
synced 2026-05-21 22:57:11 +02:00
If a node satisfies the logic for both isSink and isSanitizer, don't include it as a positive or negative example in the prompt, because it's too ambiguous and will confuse the model.
This commit is contained in:
@@ -35,5 +35,18 @@ where
|
||||
// Exclude endpoints that have contradictory endpoint characteristics, because we only want examples we're highly
|
||||
// certain about in the prompt.
|
||||
not EndpointCharacteristics::erroneousEndpoints(endpoint, _, _, _, _) and
|
||||
// It's valid for a node to satisfy the logic for both `isSink` and `isSanitizer`, but in that case it will be
|
||||
// treated by the actual query as a sanitizer, since the final logic is something like
|
||||
// `isSink(n) and not isSanitizer(n)`. We don't want to include such nodes as negative examples in the prompt, because
|
||||
// they're ambiguous and might confuse the model, so we explicitly exclude all known sinks from the negative examples.
|
||||
not exists(
|
||||
EndpointCharacteristics::EndpointCharacteristic characteristic2, float confidence2,
|
||||
EndpointType positiveType
|
||||
|
|
||||
characteristic2.appliesToEndpoint(endpoint) and
|
||||
confidence2 >= characteristic2.maximalConfidence() and
|
||||
not positiveType instanceof NegativeType and
|
||||
characteristic2.hasImplications(positiveType, true, confidence2)
|
||||
) and
|
||||
endpoint = getSampleFromSampleRate(0.01)
|
||||
select endpoint, "Non-sink of type " + characteristic + " with confidence " + confidence.toString()
|
||||
|
||||
@@ -26,5 +26,9 @@ where
|
||||
config.isKnownSink(sink) and
|
||||
// If there are _any_ erroneous endpoints, return nothing. This will prevent us from accidentally running this query
|
||||
// when there's a codex-generated data extension file in `java/ql/lib/ext`.
|
||||
not EndpointCharacteristics::erroneousEndpoints(_, _, _, _, _)
|
||||
not EndpointCharacteristics::erroneousEndpoints(_, _, _, _, _) and
|
||||
// It's valid for a node to satisfy the logic for both `isSink` and `isSanitizer`, but in that case it will be
|
||||
// treated by the actual query as a sanitizer, since the final logic is something like
|
||||
// `isSink(n) and not isSanitizer(n)`. We don't want to include such nodes as positive examples in the prompt.
|
||||
not config.isSanitizer(sink)
|
||||
select sink, config.getASinkEndpointType().getDescription()
|
||||
|
||||
Reference in New Issue
Block a user