JS: Simplify aggregation of tokens into entity strings

Change the cutoff logic from `count` to `strictcount`, since we know it only applies to a non-empty set of results. Use a single `strictconcat` aggregate to combine tokens in order of location, instead of computing a `rank` followed by a `concat`. Strictness introduces a slight change of behaviour because missing tokens will now result in no results from the predicate rather than an empty feature string.
2025-12-18 09:43:15 +01:00 · 2021-12-03 12:11:10 -08:00
parent 2a3b5fc2b2
commit 0e31439b7e
1 changed files with 9 additions and 12 deletions
--- a/javascript/ql/experimental/adaptivethreatmodeling/lib/experimental/adaptivethreatmodeling/EndpointFeatures.qll
+++ b/javascript/ql/experimental/adaptivethreatmodeling/lib/experimental/adaptivethreatmodeling/EndpointFeatures.qll
@@ -127,22 +127,19 @@ module FunctionBodies {
    // If a function has more than 256 body subtokens, then featurize it as absent. This
    // approximates the behavior of the classifer on non-generic body features where large body
    // features are replaced by the absent token.
-    if count(DatabaseFeatures::AstNode node, string token | bodyTokens(entity, node, token)) > 256
+    if
      strictcount(DatabaseFeatures::AstNode node, string token | bodyTokens(entity, node, token)) >
        256
    then result = ""
    else
      result =
-        concat(int i, string rankedToken |
+        strictconcat(DatabaseFeatures::AstNode node, string token, Location l |
-          rankedToken =
+          bodyTokens(entity, node, token) and l = node.getLocation()
            rank[i](DatabaseFeatures::AstNode node, string token, Location l |
              bodyTokens(entity, node, token) and l = node.getLocation()
            |
              token
              order by
                l.getFile().getAbsolutePath(), l.getStartLine(), l.getStartColumn(), l.getEndLine(),
                l.getEndColumn(), token
            )
        |
-          rankedToken, " " order by i
+          token, " "
          order by
            l.getFile().getAbsolutePath(), l.getStartLine(), l.getStartColumn(), l.getEndLine(),
            l.getEndColumn(), token
        )
  }
 }