mirror of
https://github.com/github/codeql.git
synced 2026-05-21 22:57:11 +02:00
Add a predicate that finds endpoints with logically-inconsistent characteristics, and exclude such endpoints from both positive and negative examples extracted for the codex prompt.
This commit is contained in:
@@ -16,6 +16,51 @@ private import experimental.adaptivethreatmodeling.RequestForgeryATM
|
||||
private import semmle.code.java.security.ExternalAPIs as ExternalAPIs
|
||||
private import semmle.code.java.Expr as Expr
|
||||
|
||||
/**
|
||||
* Holds if the given endpoint has a self-contradictory combination of characteristics. Detects errors in our endpoint
|
||||
* characteristics. Lists the problematic characterisitics and their implications for all such endpoints, together with
|
||||
* an error message indicating why this combination is problematic.
|
||||
*
|
||||
* Copied from javascript/ql/experimental/adaptivethreatmodeling/test/endpoint_large_scale/ContradictoryEndpointCharacteristics.ql
|
||||
*/
|
||||
query predicate erroneousEndpoints(
|
||||
DataFlow::Node endpoint, EndpointCharacteristic characteristic, EndpointType endpointClass,
|
||||
float confidence, string errorMessage
|
||||
) {
|
||||
// An endpoint's characteristics should not include positive indicators with medium/high confidence for more than one
|
||||
// class.
|
||||
exists(EndpointCharacteristic characteristic2, EndpointType endpointClass2, float confidence2 |
|
||||
endpointClass.getEncoding() != endpointClass2.getEncoding() and
|
||||
characteristic.appliesToEndpoint(endpoint) and
|
||||
characteristic2.appliesToEndpoint(endpoint) and
|
||||
characteristic.hasImplications(endpointClass, true, confidence) and
|
||||
characteristic2.hasImplications(endpointClass2, true, confidence2) and
|
||||
confidence > characteristic.mediumConfidence() and
|
||||
confidence2 > characteristic2.mediumConfidence()
|
||||
) and
|
||||
errorMessage = "Endpoint has high-confidence positive indicators for multiple classes"
|
||||
or
|
||||
// An enpoint's characteristics should not include positive indicators with medium/high confidence for some class and
|
||||
// also include negative indicators with medium/high confidence for this same class.
|
||||
exists(EndpointCharacteristic characteristic2, float confidence2 |
|
||||
characteristic.appliesToEndpoint(endpoint) and
|
||||
characteristic2.appliesToEndpoint(endpoint) and
|
||||
characteristic.hasImplications(endpointClass, true, confidence) and
|
||||
characteristic2.hasImplications(endpointClass, false, confidence2) and
|
||||
confidence > characteristic.mediumConfidence() and
|
||||
confidence2 > characteristic2.mediumConfidence()
|
||||
) and
|
||||
errorMessage = "Endpoint has high-confidence positive and negative indicators for the same class"
|
||||
}
|
||||
|
||||
query predicate erroneousConfidences(
|
||||
EndpointCharacteristic characteristic, float confidence, string errorMessage
|
||||
) {
|
||||
characteristic.hasImplications(_, _, confidence) and
|
||||
(confidence < 0 or confidence > 1) and
|
||||
errorMessage = "Characteristic has an indicator with confidence outside of [0, 1]"
|
||||
}
|
||||
|
||||
/**
|
||||
* A set of characteristics that a particular endpoint might have. This set of characteristics is used to make decisions
|
||||
* about whether to include the endpoint in the training set and with what label, as well as whether to score the
|
||||
|
||||
@@ -26,11 +26,14 @@ DataFlow::Node getSampleFromSampleRate(float rate) {
|
||||
}
|
||||
|
||||
from
|
||||
DataFlow::Node sink, EndpointCharacteristics::EndpointCharacteristic characteristic,
|
||||
DataFlow::Node endpoint, EndpointCharacteristics::EndpointCharacteristic characteristic,
|
||||
float confidence
|
||||
where
|
||||
characteristic.appliesToEndpoint(sink) and
|
||||
characteristic.appliesToEndpoint(endpoint) and
|
||||
confidence >= characteristic.highConfidence() and
|
||||
characteristic.hasImplications(any(NegativeType negative), true, confidence) and
|
||||
sink = getSampleFromSampleRate(0.01)
|
||||
select sink, "Non-sink of type " + characteristic + " with confidence " + confidence.toString()
|
||||
// Exclude endpoints that have contradictory endpoint characteristics, because we only want examples we're highly
|
||||
// certain about in the prompt.
|
||||
not EndpointCharacteristics::erroneousEndpoints(endpoint, _, _, _, _) and
|
||||
endpoint = getSampleFromSampleRate(0.01)
|
||||
select endpoint, "Non-sink of type " + characteristic + " with confidence " + confidence.toString()
|
||||
|
||||
@@ -21,5 +21,8 @@ from
|
||||
where
|
||||
characteristic.appliesToEndpoint(sink) and
|
||||
confidence >= characteristic.maximalConfidence() and
|
||||
characteristic.hasImplications(config.getASinkEndpointType(), true, confidence)
|
||||
characteristic.hasImplications(config.getASinkEndpointType(), true, confidence) and
|
||||
// Exclude sinks that have contradictory endpoint characteristics, because we only want examples we're highly certain
|
||||
// about in the prompt.
|
||||
not EndpointCharacteristics::erroneousEndpoints(sink, _, _, _, _)
|
||||
select sink, characteristic.toString()
|
||||
|
||||
Reference in New Issue
Block a user