mirror of
https://github.com/github/codeql.git
synced 2026-05-03 12:45:27 +02:00
Merge pull request #7307 from adityasharad/atm/perf-debugging
JS/ATM: Various compilation fixes and performance improvements
This commit is contained in:
@@ -25,9 +25,8 @@ private string getTokenFeature(DataFlow::Node endpoint, string featureName) {
|
||||
result = unique(string x | x = FunctionBodies::getBodyTokenFeatureForEntity(entity))
|
||||
)
|
||||
or
|
||||
exists(getACallBasedTokenFeatureComponent(endpoint, _, featureName)) and
|
||||
result =
|
||||
concat(DataFlow::CallNode call, string component |
|
||||
strictconcat(DataFlow::CallNode call, string component |
|
||||
component = getACallBasedTokenFeatureComponent(endpoint, call, featureName)
|
||||
|
|
||||
component, " "
|
||||
@@ -110,12 +109,13 @@ private string getACallBasedTokenFeatureComponent(
|
||||
|
||||
/** This module provides functionality for getting the function body feature associated with a particular entity. */
|
||||
module FunctionBodies {
|
||||
/** Holds if `node` is an AST node within the entity `entity` and `token` is a node attribute associated with `node`. */
|
||||
private predicate bodyTokens(
|
||||
DatabaseFeatures::Entity entity, DatabaseFeatures::AstNode node, string token
|
||||
) {
|
||||
DatabaseFeatures::astNodes(entity, _, _, node, _) and
|
||||
token = unique(string t | DatabaseFeatures::nodeAttributes(node, t))
|
||||
/** Holds if `location` is the location of an AST node within the entity `entity` and `token` is a node attribute associated with that AST node. */
|
||||
private predicate bodyTokens(DatabaseFeatures::Entity entity, Location location, string token) {
|
||||
exists(DatabaseFeatures::AstNode node |
|
||||
DatabaseFeatures::astNodes(entity, _, _, node, _) and
|
||||
token = unique(string t | DatabaseFeatures::nodeAttributes(node, t)) and
|
||||
location = node.getLocation()
|
||||
)
|
||||
}
|
||||
|
||||
/**
|
||||
@@ -127,23 +127,18 @@ module FunctionBodies {
|
||||
// If a function has more than 256 body subtokens, then featurize it as absent. This
|
||||
// approximates the behavior of the classifer on non-generic body features where large body
|
||||
// features are replaced by the absent token.
|
||||
if count(DatabaseFeatures::AstNode node, string token | bodyTokens(entity, node, token)) > 256
|
||||
then result = ""
|
||||
else
|
||||
result =
|
||||
concat(int i, string rankedToken |
|
||||
rankedToken =
|
||||
rank[i](DatabaseFeatures::AstNode node, string token, Location l |
|
||||
bodyTokens(entity, node, token) and l = node.getLocation()
|
||||
|
|
||||
token
|
||||
order by
|
||||
l.getFile().getAbsolutePath(), l.getStartLine(), l.getStartColumn(), l.getEndLine(),
|
||||
l.getEndColumn(), token
|
||||
)
|
||||
|
|
||||
rankedToken, " " order by i
|
||||
)
|
||||
//
|
||||
// We count locations instead of tokens because tokens are often not unique.
|
||||
strictcount(Location l | bodyTokens(entity, l, _)) <= 256 and
|
||||
result =
|
||||
strictconcat(string token, Location l |
|
||||
bodyTokens(entity, l, token)
|
||||
|
|
||||
token, " "
|
||||
order by
|
||||
l.getFile().getAbsolutePath(), l.getStartLine(), l.getStartColumn(), l.getEndLine(),
|
||||
l.getEndColumn(), token
|
||||
)
|
||||
}
|
||||
}
|
||||
|
||||
@@ -247,11 +242,12 @@ private module AccessPaths {
|
||||
else accessPath = previousAccessPath + " " + paramName
|
||||
)
|
||||
or
|
||||
exists(string callbackName, string index |
|
||||
exists(string callbackName, int index |
|
||||
node =
|
||||
getNamedParameter(previousNode.getASuccessor("param " + index).getMember(callbackName),
|
||||
paramName) and
|
||||
index != "-1" and // ignore receiver
|
||||
getNamedParameter(previousNode
|
||||
.getASuccessor(API::Label::parameter(index))
|
||||
.getMember(callbackName), paramName) and
|
||||
index != -1 and // ignore receiver
|
||||
if includeStructuralInfo = true
|
||||
then
|
||||
accessPath =
|
||||
@@ -280,10 +276,13 @@ private string getASupportedFeatureName() {
|
||||
* `featureValue` for the endpoint `endpoint`.
|
||||
*/
|
||||
predicate tokenFeatures(DataFlow::Node endpoint, string featureName, string featureValue) {
|
||||
featureName = getASupportedFeatureName() and
|
||||
ModelScoring::endpoints(endpoint) and
|
||||
(
|
||||
featureValue = unique(string x | x = getTokenFeature(endpoint, featureName))
|
||||
or
|
||||
not exists(unique(string x | x = getTokenFeature(endpoint, featureName))) and featureValue = ""
|
||||
if strictcount(getTokenFeature(endpoint, featureName)) = 1
|
||||
then featureValue = getTokenFeature(endpoint, featureName)
|
||||
else (
|
||||
// Performance note: this is a Cartesian product between all endpoints and feature names.
|
||||
featureValue = "" and featureName = getASupportedFeatureName()
|
||||
)
|
||||
)
|
||||
}
|
||||
|
||||
@@ -92,14 +92,31 @@ abstract class FrameworkLibraryWithMarkerComment extends FrameworkLibrary {
|
||||
|
||||
/**
|
||||
* Gets a regular expression that can be used to identify an instance of
|
||||
* this framework library.
|
||||
* this framework library, with `<VERSION>` as a placeholder for version
|
||||
* numbers.
|
||||
*
|
||||
* The first capture group of this regular expression should match
|
||||
* the version number. Any occurrences of the string `<VERSION>` in
|
||||
* the regular expression are replaced by `versionRegex()` before
|
||||
* matching.
|
||||
* the version number.
|
||||
*
|
||||
* Subclasses should implement this predicate.
|
||||
*
|
||||
* Callers should avoid using this predicate directly,
|
||||
* and instead use `getAMarkerCommentRegexWithoutPlaceholders()`,
|
||||
* which will replace any occurrences of the string `<VERSION>` in
|
||||
* the regular expression with `versionRegex()`.
|
||||
*/
|
||||
abstract string getAMarkerCommentRegex();
|
||||
|
||||
/**
|
||||
* Gets a regular expression that can be used to identify an instance of
|
||||
* this framework library.
|
||||
*
|
||||
* The first capture group of this regular expression is intended to match
|
||||
* the version number.
|
||||
*/
|
||||
final string getAMarkerCommentRegexWithoutPlaceholders() {
|
||||
result = this.getAMarkerCommentRegex().replaceAll("<VERSION>", versionRegex())
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
@@ -182,18 +199,64 @@ class FrameworkLibraryInstanceWithMarkerComment extends FrameworkLibraryInstance
|
||||
override predicate info(FrameworkLibrary fl, string v) { matchMarkerComment(_, this, fl, v) }
|
||||
}
|
||||
|
||||
/** A marker comment that indicates a framework library. */
|
||||
private class MarkerComment extends Comment {
|
||||
MarkerComment() {
|
||||
/*
|
||||
* PERFORMANCE OPTIMISATION:
|
||||
*
|
||||
* Each framework library has a regular expression describing its marker comments.
|
||||
* We want to find the set of marker comments and the framework regexes they match.
|
||||
* In order to perform such regex matching, CodeQL needs to compute the
|
||||
* Cartesian product of possible receiver strings and regexes first,
|
||||
* containing `num_receivers * num_regexes` tuples.
|
||||
*
|
||||
* A straightforward attempt to match marker comments with individual
|
||||
* framework regexes will compute the Cartesian product between
|
||||
* the set of comments and the set of framework regexes.
|
||||
* Total: `num_comments * num_frameworks` tuples.
|
||||
*
|
||||
* Instead, create a single regex that matches *all* frameworks.
|
||||
* This is the regex union of the individual framework regexes
|
||||
* i.e. `(regex_1)|(regex_2)|...|(regex_n)`
|
||||
* This approach will compute the Cartesian product between
|
||||
* the set of comments and the singleton set of this union regex.
|
||||
* Total: `num_comments * 1` tuples.
|
||||
*
|
||||
* To identify the individual frameworks and extract the version number from capture groups,
|
||||
* use the member predicate `matchesFramework` *after* this predicate has been computed.
|
||||
*/
|
||||
|
||||
exists(string unionRegex |
|
||||
unionRegex =
|
||||
concat(FrameworkLibraryWithMarkerComment fl |
|
||||
|
|
||||
"(" + fl.getAMarkerCommentRegexWithoutPlaceholders() + ")", "|"
|
||||
)
|
||||
|
|
||||
this.getText().regexpMatch(unionRegex)
|
||||
)
|
||||
}
|
||||
|
||||
/**
|
||||
* Holds if this marker comment indicates an instance of the framework `fl`
|
||||
* with version number `version`.
|
||||
*/
|
||||
predicate matchesFramework(FrameworkLibraryWithMarkerComment fl, string version) {
|
||||
this.getText().regexpCapture(fl.getAMarkerCommentRegexWithoutPlaceholders(), 1) = version
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Holds if comment `c` in toplevel `tl` matches the marker comment of library
|
||||
* `fl` at `version`.
|
||||
*/
|
||||
cached
|
||||
private predicate matchMarkerComment(
|
||||
Comment c, TopLevel tl, FrameworkLibraryWithMarkerComment fl, string version
|
||||
MarkerComment c, TopLevel tl, FrameworkLibraryWithMarkerComment fl, string version
|
||||
) {
|
||||
c.getTopLevel() = tl and
|
||||
exists(string r | r = fl.getAMarkerCommentRegex().replaceAll("<VERSION>", versionRegex()) |
|
||||
version = c.getText().regexpCapture(r, 1)
|
||||
)
|
||||
c.matchesFramework(fl, version)
|
||||
}
|
||||
|
||||
/**
|
||||
|
||||
Reference in New Issue
Block a user