From 4546dbe51b94800a9479abd11cb6543d235ff106 Mon Sep 17 00:00:00 2001 From: tiferet Date: Mon, 26 Dec 2022 23:24:53 -0800 Subject: [PATCH] Subsample negative examples to 1% to prevent huge numbers. --- .../src/ExtractNegativeExamples.ql | 16 +++++++++++++++- 1 file changed, 15 insertions(+), 1 deletion(-) diff --git a/java/ql/experimental/adaptivethreatmodeling/src/ExtractNegativeExamples.ql b/java/ql/experimental/adaptivethreatmodeling/src/ExtractNegativeExamples.ql index 55fa1c70e4d..021cad6014e 100644 --- a/java/ql/experimental/adaptivethreatmodeling/src/ExtractNegativeExamples.ql +++ b/java/ql/experimental/adaptivethreatmodeling/src/ExtractNegativeExamples.ql @@ -12,11 +12,25 @@ import semmle.code.java.dataflow.TaintTracking private import experimental.adaptivethreatmodeling.EndpointCharacteristics as EndpointCharacteristics private import experimental.adaptivethreatmodeling.EndpointTypes +bindingset[rate] +DataFlow::Node getSampleFromSampleRate(float rate) { + exists(int r | + result = + rank[r](DataFlow::Node n, string path, int a, int b, int c, int d | + n.asExpr().getLocation().hasLocationInfo(path, a, b, c, d) + | + n order by path, a, b, c, d + ) and + r % (1 / rate).ceil() = 0 + ) +} + from DataFlow::Node sink, EndpointCharacteristics::EndpointCharacteristic characteristic, float confidence where characteristic.appliesToEndpoint(sink) and confidence >= characteristic.highConfidence() and - characteristic.hasImplications(any(NegativeType negative), true, confidence) + characteristic.hasImplications(any(NegativeType negative), true, confidence) and + sink = getSampleFromSampleRate(0.01) select sink, "Non-sink of type " + characteristic + " with confidence " + confidence.toString()