mirror of
https://github.com/github/codeql.git
synced 2025-12-18 09:43:15 +01:00
JS: Simplify aggregation of tokens into entity strings
Change the cutoff logic from `count` to `strictcount`, since we know it only applies to a non-empty set of results. Use a single `strictconcat` aggregate to combine tokens in order of location, instead of computing a `rank` followed by a `concat`. Strictness introduces a slight change of behaviour because missing tokens will now result in no results from the predicate rather than an empty feature string.
This commit is contained in:
@@ -127,23 +127,20 @@ module FunctionBodies {
|
||||
// If a function has more than 256 body subtokens, then featurize it as absent. This
|
||||
// approximates the behavior of the classifer on non-generic body features where large body
|
||||
// features are replaced by the absent token.
|
||||
if count(DatabaseFeatures::AstNode node, string token | bodyTokens(entity, node, token)) > 256
|
||||
if
|
||||
strictcount(DatabaseFeatures::AstNode node, string token | bodyTokens(entity, node, token)) >
|
||||
256
|
||||
then result = ""
|
||||
else
|
||||
result =
|
||||
concat(int i, string rankedToken |
|
||||
rankedToken =
|
||||
rank[i](DatabaseFeatures::AstNode node, string token, Location l |
|
||||
strictconcat(DatabaseFeatures::AstNode node, string token, Location l |
|
||||
bodyTokens(entity, node, token) and l = node.getLocation()
|
||||
|
|
||||
token
|
||||
token, " "
|
||||
order by
|
||||
l.getFile().getAbsolutePath(), l.getStartLine(), l.getStartColumn(), l.getEndLine(),
|
||||
l.getEndColumn(), token
|
||||
)
|
||||
|
|
||||
rankedToken, " " order by i
|
||||
)
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
Reference in New Issue
Block a user