mirror of
https://github.com/github/codeql.git
synced 2025-12-18 18:10:39 +01:00
JS: Simplify aggregation of tokens into entity strings
Change the cutoff logic from `count` to `strictcount`, since we know it only applies to a non-empty set of results. Use a single `strictconcat` aggregate to combine tokens in order of location, instead of computing a `rank` followed by a `concat`. Strictness introduces a slight change of behaviour because missing tokens will now result in no results from the predicate rather than an empty feature string.
This commit is contained in:
@@ -127,23 +127,20 @@ module FunctionBodies {
|
|||||||
// If a function has more than 256 body subtokens, then featurize it as absent. This
|
// If a function has more than 256 body subtokens, then featurize it as absent. This
|
||||||
// approximates the behavior of the classifer on non-generic body features where large body
|
// approximates the behavior of the classifer on non-generic body features where large body
|
||||||
// features are replaced by the absent token.
|
// features are replaced by the absent token.
|
||||||
if count(DatabaseFeatures::AstNode node, string token | bodyTokens(entity, node, token)) > 256
|
if
|
||||||
|
strictcount(DatabaseFeatures::AstNode node, string token | bodyTokens(entity, node, token)) >
|
||||||
|
256
|
||||||
then result = ""
|
then result = ""
|
||||||
else
|
else
|
||||||
result =
|
result =
|
||||||
concat(int i, string rankedToken |
|
strictconcat(DatabaseFeatures::AstNode node, string token, Location l |
|
||||||
rankedToken =
|
|
||||||
rank[i](DatabaseFeatures::AstNode node, string token, Location l |
|
|
||||||
bodyTokens(entity, node, token) and l = node.getLocation()
|
bodyTokens(entity, node, token) and l = node.getLocation()
|
||||||
|
|
|
|
||||||
token
|
token, " "
|
||||||
order by
|
order by
|
||||||
l.getFile().getAbsolutePath(), l.getStartLine(), l.getStartColumn(), l.getEndLine(),
|
l.getFile().getAbsolutePath(), l.getStartLine(), l.getStartColumn(), l.getEndLine(),
|
||||||
l.getEndColumn(), token
|
l.getEndColumn(), token
|
||||||
)
|
)
|
||||||
|
|
|
||||||
rankedToken, " " order by i
|
|
||||||
)
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user