Merge pull request #7307 from adityasharad/atm/perf-debugging

JS/ATM: Various compilation fixes and performance improvements
2026-05-03 12:45:27 +02:00 · 2021-12-10 11:00:27 +00:00
parent 657cd89286 271b23ba8f
commit 6e167040f5
2 changed files with 103 additions and 41 deletions
--- a/javascript/ql/experimental/adaptivethreatmodeling/lib/experimental/adaptivethreatmodeling/EndpointFeatures.qll
+++ b/javascript/ql/experimental/adaptivethreatmodeling/lib/experimental/adaptivethreatmodeling/EndpointFeatures.qll
@@ -25,9 +25,8 @@ private string getTokenFeature(DataFlow::Node endpoint, string featureName) {
    result = unique(string x | x = FunctionBodies::getBodyTokenFeatureForEntity(entity))
  )
  or
-  exists(getACallBasedTokenFeatureComponent(endpoint, _, featureName)) and
  result =
-    concat(DataFlow::CallNode call, string component |
+    strictconcat(DataFlow::CallNode call, string component |
      component = getACallBasedTokenFeatureComponent(endpoint, call, featureName)
    |
      component, " "
@@ -110,12 +109,13 @@ private string getACallBasedTokenFeatureComponent(

 /** This module provides functionality for getting the function body feature associated with a particular entity. */
 module FunctionBodies {
-  /** Holds if `node` is an AST node within the entity `entity` and `token` is a node attribute associated with `node`. */
-  private predicate bodyTokens(
-    DatabaseFeatures::Entity entity, DatabaseFeatures::AstNode node, string token
-  ) {
-    DatabaseFeatures::astNodes(entity, _, _, node, _) and
-    token = unique(string t | DatabaseFeatures::nodeAttributes(node, t))
+  /** Holds if `location` is the location of an AST node within the entity `entity` and `token` is a node attribute associated with that AST node. */
+  private predicate bodyTokens(DatabaseFeatures::Entity entity, Location location, string token) {
+    exists(DatabaseFeatures::AstNode node |
+      DatabaseFeatures::astNodes(entity, _, _, node, _) and
+      token = unique(string t | DatabaseFeatures::nodeAttributes(node, t)) and
+      location = node.getLocation()
+    )
  }

  /**
@@ -127,23 +127,18 @@ module FunctionBodies {
    // If a function has more than 256 body subtokens, then featurize it as absent. This
    // approximates the behavior of the classifer on non-generic body features where large body
    // features are replaced by the absent token.
-    if count(DatabaseFeatures::AstNode node, string token | bodyTokens(entity, node, token)) > 256
-    then result = ""
-    else
-      result =
-        concat(int i, string rankedToken |
-          rankedToken =
-            rank[i](DatabaseFeatures::AstNode node, string token, Location l |
-              bodyTokens(entity, node, token) and l = node.getLocation()
-            |
-              token
-              order by
-                l.getFile().getAbsolutePath(), l.getStartLine(), l.getStartColumn(), l.getEndLine(),
-                l.getEndColumn(), token
-            )
-        |
-          rankedToken, " " order by i
-        )
+    //
+    // We count locations instead of tokens because tokens are often not unique.
+    strictcount(Location l | bodyTokens(entity, l, _)) <= 256 and
+    result =
+      strictconcat(string token, Location l |
+        bodyTokens(entity, l, token)
+      |
+        token, " "
+        order by
+          l.getFile().getAbsolutePath(), l.getStartLine(), l.getStartColumn(), l.getEndLine(),
+          l.getEndColumn(), token
+      )
  }
 }

@@ -247,11 +242,12 @@ private module AccessPaths {
          else accessPath = previousAccessPath + " " + paramName
        )
        or
-        exists(string callbackName, string index |
+        exists(string callbackName, int index |
          node =
-            getNamedParameter(previousNode.getASuccessor("param " + index).getMember(callbackName),
-              paramName) and
-          index != "-1" and // ignore receiver
+            getNamedParameter(previousNode
+                  .getASuccessor(API::Label::parameter(index))
+                  .getMember(callbackName), paramName) and
+          index != -1 and // ignore receiver
          if includeStructuralInfo = true
          then
            accessPath =
@@ -280,10 +276,13 @@ private string getASupportedFeatureName() {
 * `featureValue` for the endpoint `endpoint`.
 */
 predicate tokenFeatures(DataFlow::Node endpoint, string featureName, string featureValue) {
-  featureName = getASupportedFeatureName() and
+  ModelScoring::endpoints(endpoint) and
  (
-    featureValue = unique(string x | x = getTokenFeature(endpoint, featureName))
-    or
-    not exists(unique(string x | x = getTokenFeature(endpoint, featureName))) and featureValue = ""
+    if strictcount(getTokenFeature(endpoint, featureName)) = 1
+    then featureValue = getTokenFeature(endpoint, featureName)
+    else (
+      // Performance note: this is a Cartesian product between all endpoints and feature names.
+      featureValue = "" and featureName = getASupportedFeatureName()
+    )
  )
 }
--- a/javascript/ql/lib/semmle/javascript/dependencies/FrameworkLibraries.qll
+++ b/javascript/ql/lib/semmle/javascript/dependencies/FrameworkLibraries.qll
@@ -92,14 +92,31 @@ abstract class FrameworkLibraryWithMarkerComment extends FrameworkLibrary {

  /**
   * Gets a regular expression that can be used to identify an instance of
-   * this framework library.
+   * this framework library, with `<VERSION>` as a placeholder for version
+   * numbers.
   *
   * The first capture group of this regular expression should match
-   * the version number. Any occurrences of the string `<VERSION>` in
-   * the regular expression are replaced by `versionRegex()` before
-   * matching.
+   * the version number.
+   *
+   * Subclasses should implement this predicate.
+   *
+   * Callers should avoid using this predicate directly,
+   * and instead use `getAMarkerCommentRegexWithoutPlaceholders()`,
+   * which will replace any occurrences of the string `<VERSION>` in
+   * the regular expression with `versionRegex()`.
   */
  abstract string getAMarkerCommentRegex();
+
+  /**
+   * Gets a regular expression that can be used to identify an instance of
+   * this framework library.
+   *
+   * The first capture group of this regular expression is intended to match
+   * the version number.
+   */
+  final string getAMarkerCommentRegexWithoutPlaceholders() {
+    result = this.getAMarkerCommentRegex().replaceAll("<VERSION>", versionRegex())
+  }
 }

 /**
@@ -182,18 +199,64 @@ class FrameworkLibraryInstanceWithMarkerComment extends FrameworkLibraryInstance
  override predicate info(FrameworkLibrary fl, string v) { matchMarkerComment(_, this, fl, v) }
 }

+/** A marker comment that indicates a framework library. */
+private class MarkerComment extends Comment {
+  MarkerComment() {
+    /*
+     * PERFORMANCE OPTIMISATION:
+     *
+     * Each framework library has a regular expression describing its marker comments.
+     * We want to find the set of marker comments and the framework regexes they match.
+     * In order to perform such regex matching, CodeQL needs to compute the
+     * Cartesian product of possible receiver strings and regexes first,
+     * containing `num_receivers * num_regexes` tuples.
+     *
+     * A straightforward attempt to match marker comments with individual
+     * framework regexes will compute the Cartesian product between
+     * the set of comments and the set of framework regexes.
+     * Total: `num_comments * num_frameworks` tuples.
+     *
+     * Instead, create a single regex that matches *all* frameworks.
+     * This is the regex union of the individual framework regexes
+     * i.e. `(regex_1)|(regex_2)|...|(regex_n)`
+     * This approach will compute the Cartesian product between
+     * the set of comments and the singleton set of this union regex.
+     * Total: `num_comments * 1` tuples.
+     *
+     * To identify the individual frameworks and extract the version number from capture groups,
+     * use the member predicate `matchesFramework` *after* this predicate has been computed.
+     */
+
+    exists(string unionRegex |
+      unionRegex =
+        concat(FrameworkLibraryWithMarkerComment fl |
+          |
+          "(" + fl.getAMarkerCommentRegexWithoutPlaceholders() + ")", "|"
+        )
+    |
+      this.getText().regexpMatch(unionRegex)
+    )
+  }
+
+  /**
+   * Holds if this marker comment indicates an instance of the framework `fl`
+   * with version number `version`.
+   */
+  predicate matchesFramework(FrameworkLibraryWithMarkerComment fl, string version) {
+    this.getText().regexpCapture(fl.getAMarkerCommentRegexWithoutPlaceholders(), 1) = version
+  }
+}
+
 /**
 * Holds if comment `c` in toplevel `tl` matches the marker comment of library
 * `fl` at `version`.
 */
 cached
 private predicate matchMarkerComment(
-  Comment c, TopLevel tl, FrameworkLibraryWithMarkerComment fl, string version
+  MarkerComment c, TopLevel tl, FrameworkLibraryWithMarkerComment fl, string version
 ) {
  c.getTopLevel() = tl and
-  exists(string r | r = fl.getAMarkerCommentRegex().replaceAll("<VERSION>", versionRegex()) |
-    version = c.getText().regexpCapture(r, 1)
-  )
+  c.matchesFramework(fl, version)
 }

 /**