JS: Format HTMLExtractor

2025-12-21 11:16:30 +01:00 · 2021-07-20 10:34:53 +02:00
parent 8666bc1894
commit 66cec65bfb
1 changed files with 34 additions and 22 deletions
--- a/javascript/extractor/src/com/semmle/js/extractor/HTMLExtractor.java
+++ b/javascript/extractor/src/com/semmle/js/extractor/HTMLExtractor.java
@@ -4,6 +4,7 @@ import java.io.File;
 import java.io.IOException;
 import java.nio.file.Path;
 import java.util.List;
+import java.util.function.Supplier;
 import java.util.regex.Matcher;
 import java.util.regex.Pattern;

@@ -24,6 +25,7 @@ import net.htmlparser.jericho.Attributes;
 import net.htmlparser.jericho.Element;
 import net.htmlparser.jericho.HTMLElementName;
 import net.htmlparser.jericho.Segment;
+import net.htmlparser.jericho.Source;

 /** Extractor for handling HTML and XHTML files. */
 public class HTMLExtractor implements IExtractor {
@@ -32,7 +34,7 @@ public class HTMLExtractor implements IExtractor {
  private class JavaScriptHTMLElementHandler implements HtmlPopulator.ElementHandler {
    private final ScopeManager scopeManager;
    private final TextualExtractor textualExtractor;
-    
+
    public JavaScriptHTMLElementHandler(TextualExtractor textualExtractor) {
      this.textualExtractor = textualExtractor;

@@ -169,14 +171,16 @@ public class HTMLExtractor implements IExtractor {
  }

  private boolean isAngularTemplateAttributeName(String name) {
-    return name.startsWith("[") && name.endsWith("]") ||
-        name.startsWith("(") && name.endsWith(")") ||
-        name.startsWith("*ng");
+    return name.startsWith("[") && name.endsWith("]")
+        || name.startsWith("(") && name.endsWith(")")
+        || name.startsWith("*ng");
  }

-  private static final Pattern ANGULAR_FOR_LOOP_DECL = Pattern.compile("^ *let +(\\w+) +of(?: +|(?!\\w))(.*)");
+  private static final Pattern ANGULAR_FOR_LOOP_DECL =
+      Pattern.compile("^ *let +(\\w+) +of(?: +|(?!\\w))(.*)");

-  private static final Pattern VALID_ATTRIBUTE_NAME = Pattern.compile("\\*?\\[?\\(?[\\w:_\\-]+\\]?\\)?");
+  private static final Pattern VALID_ATTRIBUTE_NAME =
+      Pattern.compile("\\*?\\[?\\(?[\\w:_\\-]+\\]?\\)?");

  /** List of HTML attributes whose value is interpreted as JavaScript. */
  private static final Pattern JS_ATTRIBUTE =
@@ -202,15 +206,17 @@ public class HTMLExtractor implements IExtractor {
  public static HTMLExtractor forEmbeddedHtml(ExtractorConfig config) {
    return new HTMLExtractor(config, null, true);
  }
-  
+
  @Override
  public LoCInfo extract(TextualExtractor textualExtractor) throws IOException {
    return extractEx(textualExtractor).snd();
  }

  public Pair<List<Label>, LoCInfo> extractEx(TextualExtractor textualExtractor) {
-    // Angular templates contain attribute names that are not valid HTML/XML, such as [foo], (foo), [(foo)], and *foo.
-    // Allow a large number of errors in attribute names, so the Jericho parser does not give up.
+    // Angular templates contain attribute names that are not valid HTML/XML, such
+    // as [foo], (foo), [(foo)], and *foo.
+    // Allow a large number of errors in attribute names, so the Jericho parser does
+    // not give up.
    Attributes.setDefaultMaxErrorCount(100);
    JavaScriptHTMLElementHandler eltHandler = new JavaScriptHTMLElementHandler(textualExtractor);

@@ -222,7 +228,8 @@ public class HTMLExtractor implements IExtractor {
            textualExtractor.getTrapwriter(),
            locationManager.getFileLabel());

-    // For efficiency, avoid building the source map if not needed (i.e. for plain HTML files).
+    // For efficiency, avoid building the source map if not needed (i.e. for plain
+    // HTML files).
    if (textualExtractor.hasNonTrivialSourceMap()) {
      extractor.setSourceMap(textualExtractor.getSourceMap());
    }
@@ -282,7 +289,8 @@ public class HTMLExtractor implements IExtractor {

  private boolean isTypeScriptTag(Element script) {
    String language = getScriptLanguage(script);
-    if ("ts".equals(language) || "tsx".equals(language) || "typescript".equals(language)) return true;
+    if ("ts".equals(language) || "tsx".equals(language) || "typescript".equals(language))
+      return true;
    String type = getAttributeValueLC(script, "type");
    if (type != null && type.contains("typescript")) return true;
    return false;
@@ -311,14 +319,16 @@ public class HTMLExtractor implements IExtractor {
    // JavaScript AST extraction does not currently support source maps, so just set
    // line/column numbers on the location manager.
    Position pos = textualExtractor.getSourceMap().getStart(offset);
-    LocationManager scriptLocationManager = locationManager.startingAt(pos.getLine(), pos.getColumn());
+    LocationManager scriptLocationManager =
+        locationManager.startingAt(pos.getLine(), pos.getColumn());
    if (isTypeScript) {
      if (isEmbedded) {
        return; // Do not extract files from HTML embedded in other files.
      }
      Path file = textualExtractor.getExtractedFile().toPath();
      FileSnippet snippet =
-          new FileSnippet(file, pos.getLine(), pos.getColumn(), toplevelKind, config.getSourceType());
+          new FileSnippet(
+              file, pos.getLine(), pos.getColumn(), toplevelKind, config.getSourceType());
      VirtualSourceRoot vroot = config.getVirtualSourceRoot();
      // Vue files are special in that they can be imported as modules, and may only
      // contain one <script> tag.
@@ -335,11 +345,12 @@ public class HTMLExtractor implements IExtractor {
        }
        state.getSnippets().put(virtualFile, snippet);
      }
-      Label topLevelLabel = ASTExtractor.makeTopLevelLabel(
-          textualExtractor.getTrapwriter(),
-          scriptLocationManager.getFileLabel(),
-          scriptLocationManager.getStartLine(),
-          scriptLocationManager.getStartColumn());
+      Label topLevelLabel =
+          ASTExtractor.makeTopLevelLabel(
+              textualExtractor.getTrapwriter(),
+              scriptLocationManager.getFileLabel(),
+              scriptLocationManager.getStartLine(),
+              scriptLocationManager.getStartColumn());
      emitTopLevelXmlNodeBinding(parentLabel, topLevelLabel, trapWriter);
      // Note: LoC info is accounted for later, so not added here.
      return;
@@ -377,7 +388,8 @@ public class HTMLExtractor implements IExtractor {
  private static final String EJS_TAG = "<%(?![%<>}])[-=]?(.*?)[_-]?%>"; // <% x %>

  /** Pattern for a template tag whose contents should be parsed as an expression */
-  private static final Pattern TEMPLATE_EXPR_OPENING_TAG = Pattern.compile("^(?:\\{\\{\\{?|<%[-=])"); // {{, {{{, <%=, <%-
+  private static final Pattern TEMPLATE_EXPR_OPENING_TAG =
+      Pattern.compile("^(?:\\{\\{\\{?|<%[-=])"); // {{, {{{, <%=, <%-

  private static final Pattern TEMPLATE_TAGS =
      Pattern.compile(
@@ -432,15 +444,15 @@ public class HTMLExtractor implements IExtractor {
              m.group(bodyGroup),
              m.start(bodyGroup),
              false /* isTypeScript */,
-              lbl);  
+              lbl);
        }
      }
    }
  }

  /**
-   * Returns the index of the first capture group that captured something
-   * (apart from group zero which is the whole match).
+   * Returns the index of the first capture group that captured something (apart from group zero
+   * which is the whole match).
   */
  private static int getNonNullCaptureGroup(Matcher m) {
    for (int i = 1; i <= m.groupCount(); ++i) {