Java: Improve sampling strategy

Instead of the "random" sampling used before (which could -- in rare circumstances -- end up sampling fewer points than we want) we now sample an equally distributed set of points.
2026-04-24 16:25:15 +02:00 · 2023-05-30 11:22:26 +00:00
parent d4b964c849
commit 73aa790cdd
1 changed files with 2 additions and 2 deletions
--- a/java/ql/src/Telemetry/AutomodelApplicationModeExtractNegativeExamples.ql
+++ b/java/ql/src/Telemetry/AutomodelApplicationModeExtractNegativeExamples.ql
@@ -18,7 +18,7 @@ private import AutomodelSharedUtil
 */
 bindingset[limit]
 Endpoint getSampleForCharacteristic(EndpointCharacteristic c, int limit) {
-  exists(int n |
+  exists(int n, int num_endpoints | num_endpoints = count(Endpoint e | c.appliesToEndpoint(e)) |
    result =
      rank[n](Endpoint e, Location loc |
        loc = e.getLocation() and c.appliesToEndpoint(e)
@@ -29,7 +29,7 @@ Endpoint getSampleForCharacteristic(EndpointCharacteristic c, int limit) {
          loc.getEndLine(), loc.getEndColumn()
      ) and
    // we order the endpoints by location, but (to avoid bias) we select the indices semi-randomly
-    n = 1 + (([1 .. limit] * 271) % count(Endpoint e | c.appliesToEndpoint(e)))
+    n = 1 + (([0 .. limit - 1] * (num_endpoints / limit).floor() + 46337) % num_endpoints)
  )
 }