Subsample negative examples to 1% to prevent huge numbers.

This commit is contained in:
tiferet
2022-12-26 23:24:53 -08:00
parent 5d62dc3d2e
commit 4546dbe51b

View File

@@ -12,11 +12,25 @@ import semmle.code.java.dataflow.TaintTracking
private import experimental.adaptivethreatmodeling.EndpointCharacteristics as EndpointCharacteristics
private import experimental.adaptivethreatmodeling.EndpointTypes
bindingset[rate]
DataFlow::Node getSampleFromSampleRate(float rate) {
exists(int r |
result =
rank[r](DataFlow::Node n, string path, int a, int b, int c, int d |
n.asExpr().getLocation().hasLocationInfo(path, a, b, c, d)
|
n order by path, a, b, c, d
) and
r % (1 / rate).ceil() = 0
)
}
from
DataFlow::Node sink, EndpointCharacteristics::EndpointCharacteristic characteristic,
float confidence
where
characteristic.appliesToEndpoint(sink) and
confidence >= characteristic.highConfidence() and
characteristic.hasImplications(any(NegativeType negative), true, confidence)
characteristic.hasImplications(any(NegativeType negative), true, confidence) and
sink = getSampleFromSampleRate(0.01)
select sink, "Non-sink of type " + characteristic + " with confidence " + confidence.toString()