Add a predicate that finds endpoints with logically-inconsistent characteristics, and exclude such endpoints from both positive and negative examples extracted for the codex prompt.

This commit is contained in:
tiferet
2023-01-11 16:56:11 -08:00
parent 1211197914
commit 0d4e85ff93
3 changed files with 56 additions and 5 deletions

View File

@@ -16,6 +16,51 @@ private import experimental.adaptivethreatmodeling.RequestForgeryATM
private import semmle.code.java.security.ExternalAPIs as ExternalAPIs
private import semmle.code.java.Expr as Expr
/**
* Holds if the given endpoint has a self-contradictory combination of characteristics. Detects errors in our endpoint
* characteristics. Lists the problematic characterisitics and their implications for all such endpoints, together with
* an error message indicating why this combination is problematic.
*
* Copied from javascript/ql/experimental/adaptivethreatmodeling/test/endpoint_large_scale/ContradictoryEndpointCharacteristics.ql
*/
query predicate erroneousEndpoints(
DataFlow::Node endpoint, EndpointCharacteristic characteristic, EndpointType endpointClass,
float confidence, string errorMessage
) {
// An endpoint's characteristics should not include positive indicators with medium/high confidence for more than one
// class.
exists(EndpointCharacteristic characteristic2, EndpointType endpointClass2, float confidence2 |
endpointClass.getEncoding() != endpointClass2.getEncoding() and
characteristic.appliesToEndpoint(endpoint) and
characteristic2.appliesToEndpoint(endpoint) and
characteristic.hasImplications(endpointClass, true, confidence) and
characteristic2.hasImplications(endpointClass2, true, confidence2) and
confidence > characteristic.mediumConfidence() and
confidence2 > characteristic2.mediumConfidence()
) and
errorMessage = "Endpoint has high-confidence positive indicators for multiple classes"
or
// An enpoint's characteristics should not include positive indicators with medium/high confidence for some class and
// also include negative indicators with medium/high confidence for this same class.
exists(EndpointCharacteristic characteristic2, float confidence2 |
characteristic.appliesToEndpoint(endpoint) and
characteristic2.appliesToEndpoint(endpoint) and
characteristic.hasImplications(endpointClass, true, confidence) and
characteristic2.hasImplications(endpointClass, false, confidence2) and
confidence > characteristic.mediumConfidence() and
confidence2 > characteristic2.mediumConfidence()
) and
errorMessage = "Endpoint has high-confidence positive and negative indicators for the same class"
}
query predicate erroneousConfidences(
EndpointCharacteristic characteristic, float confidence, string errorMessage
) {
characteristic.hasImplications(_, _, confidence) and
(confidence < 0 or confidence > 1) and
errorMessage = "Characteristic has an indicator with confidence outside of [0, 1]"
}
/**
* A set of characteristics that a particular endpoint might have. This set of characteristics is used to make decisions
* about whether to include the endpoint in the training set and with what label, as well as whether to score the

View File

@@ -26,11 +26,14 @@ DataFlow::Node getSampleFromSampleRate(float rate) {
}
from
DataFlow::Node sink, EndpointCharacteristics::EndpointCharacteristic characteristic,
DataFlow::Node endpoint, EndpointCharacteristics::EndpointCharacteristic characteristic,
float confidence
where
characteristic.appliesToEndpoint(sink) and
characteristic.appliesToEndpoint(endpoint) and
confidence >= characteristic.highConfidence() and
characteristic.hasImplications(any(NegativeType negative), true, confidence) and
sink = getSampleFromSampleRate(0.01)
select sink, "Non-sink of type " + characteristic + " with confidence " + confidence.toString()
// Exclude endpoints that have contradictory endpoint characteristics, because we only want examples we're highly
// certain about in the prompt.
not EndpointCharacteristics::erroneousEndpoints(endpoint, _, _, _, _) and
endpoint = getSampleFromSampleRate(0.01)
select endpoint, "Non-sink of type " + characteristic + " with confidence " + confidence.toString()

View File

@@ -21,5 +21,8 @@ from
where
characteristic.appliesToEndpoint(sink) and
confidence >= characteristic.maximalConfidence() and
characteristic.hasImplications(config.getASinkEndpointType(), true, confidence)
characteristic.hasImplications(config.getASinkEndpointType(), true, confidence) and
// Exclude sinks that have contradictory endpoint characteristics, because we only want examples we're highly certain
// about in the prompt.
not EndpointCharacteristics::erroneousEndpoints(sink, _, _, _, _)
select sink, characteristic.toString()