mirror of
https://github.com/github/codeql.git
synced 2026-05-01 03:35:13 +02:00
JS: Performance optimisation for matching framework libraries with their marker comments
The `matchMarkerComment` predicate performs badly on any codebase with a moderately large number of comments, because the current implementation has to first compute the Cartesian product between the set of comments and the set of framework library comment regexes. Instead, match first against a single regex: the union of all framework library comment regexes. This computes a more benign Cartesian product, the same size as the set of comments. See inline comments for more details.
This commit is contained in:
@@ -92,14 +92,26 @@ abstract class FrameworkLibraryWithMarkerComment extends FrameworkLibrary {
|
||||
|
||||
/**
|
||||
* Gets a regular expression that can be used to identify an instance of
|
||||
* this framework library.
|
||||
* this framework library, with `<VERSION>` as a placeholder for version
|
||||
* numbers.
|
||||
*
|
||||
* The first capture group of this regular expression should match
|
||||
* the version number. Any occurrences of the string `<VERSION>` in
|
||||
* the regular expression are replaced by `versionRegex()` before
|
||||
* the regular expression will be replaced by `versionRegex()` before
|
||||
* matching.
|
||||
*/
|
||||
abstract string getAMarkerCommentRegex();
|
||||
|
||||
/**
|
||||
* Gets a regular expression that can be used to identify an instance of
|
||||
* this framework library.
|
||||
*
|
||||
* The first capture group of this regular expression is intended to match
|
||||
* the version number.
|
||||
*/
|
||||
final string getAMarkerCommentRegexWithoutPlaceholders() {
|
||||
result = this.getAMarkerCommentRegex().replaceAll("<VERSION>", versionRegex())
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
@@ -182,18 +194,64 @@ class FrameworkLibraryInstanceWithMarkerComment extends FrameworkLibraryInstance
|
||||
override predicate info(FrameworkLibrary fl, string v) { matchMarkerComment(_, this, fl, v) }
|
||||
}
|
||||
|
||||
/** A marker comment that indicates a framework library. */
|
||||
private class MarkerComment extends Comment {
|
||||
MarkerComment() {
|
||||
/*
|
||||
* PERFORMANCE OPTIMISATION:
|
||||
*
|
||||
* Each framework library has a regular expression describing its marker comments.
|
||||
* We want to find the set of marker comments and the framework regexes they match.
|
||||
* In order to perform such regex matching, CodeQL needs to compute the
|
||||
* Cartesian product of possible receiver strings and regexes first,
|
||||
* containing `num_receivers * num_regexes` tuples.
|
||||
*
|
||||
* A straightforward attempt to match marker comments with individual
|
||||
* framework regexes will compute the Cartesian product between
|
||||
* the set of comments and the set of framework regexes.
|
||||
* Total: `num_comments * num_frameworks` tuples.
|
||||
*
|
||||
* Instead, create a single regex that matches *all* frameworks.
|
||||
* This is the regex union of the individual framework regexes
|
||||
* i.e. `(regex_1)|(regex_2)|...|(regex_n)`
|
||||
* This approach will compute the Cartesian product between
|
||||
* the set of comments and the singleton set of this union regex.
|
||||
* Total: `num_comments * 1` tuples.
|
||||
*
|
||||
* To identify the individual frameworks and extract the version number from capture groups,
|
||||
* use the member predicate `matchesFramework` *after* this predicate has been computed.
|
||||
*/
|
||||
|
||||
exists(string unionRegex |
|
||||
unionRegex =
|
||||
concat(FrameworkLibraryWithMarkerComment fl |
|
||||
|
|
||||
"(" + fl.getAMarkerCommentRegexWithoutPlaceholders() + ")", "|"
|
||||
)
|
||||
|
|
||||
this.getText().regexpMatch(unionRegex)
|
||||
)
|
||||
}
|
||||
|
||||
/**
|
||||
* Holds if this marker comment indicates an instance of the framework `fl`
|
||||
* with version number `version`.
|
||||
*/
|
||||
predicate matchesFramework(FrameworkLibraryWithMarkerComment fl, string version) {
|
||||
fl.getAMarkerCommentRegexWithoutPlaceholders().regexpCapture(this.getText(), 1) = version
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Holds if comment `c` in toplevel `tl` matches the marker comment of library
|
||||
* `fl` at `version`.
|
||||
*/
|
||||
cached
|
||||
private predicate matchMarkerComment(
|
||||
Comment c, TopLevel tl, FrameworkLibraryWithMarkerComment fl, string version
|
||||
MarkerComment c, TopLevel tl, FrameworkLibraryWithMarkerComment fl, string version
|
||||
) {
|
||||
c.getTopLevel() = tl and
|
||||
exists(string r | r = fl.getAMarkerCommentRegex().replaceAll("<VERSION>", versionRegex()) |
|
||||
version = c.getText().regexpCapture(r, 1)
|
||||
)
|
||||
c.matchesFramework(fl, version)
|
||||
}
|
||||
|
||||
/**
|
||||
|
||||
Reference in New Issue
Block a user