diff --git a/javascript/extractor/src/com/semmle/js/extractor/FileExtractor.java b/javascript/extractor/src/com/semmle/js/extractor/FileExtractor.java index 5ebd7374a77..42b4aa83c34 100644 --- a/javascript/extractor/src/com/semmle/js/extractor/FileExtractor.java +++ b/javascript/extractor/src/com/semmle/js/extractor/FileExtractor.java @@ -549,10 +549,15 @@ public class FileExtractor { new TextualExtractor( trapwriter, locationManager, source, config.getExtractLines(), metrics, extractedFile); ParseResultInfo loc = extractor.extract(textualExtractor); - int numLines = textualExtractor.isSnippet() ? 0 : textualExtractor.getNumLines(); - int linesOfCode = loc.getLinesOfCode(), linesOfComments = loc.getLinesOfComments(); - trapwriter.addTuple("numlines", fileLabel, numLines, linesOfCode, linesOfComments); - trapwriter.addTuple("filetype", fileLabel, fileType.toString()); + if (loc.getSkipReason() != null) { + System.err.println("Skipping file " + extractedFile + ": " + loc.getSkipReason()); + System.err.flush(); + } else{ + int numLines = textualExtractor.isSnippet() ? 0 : textualExtractor.getNumLines(); + int linesOfCode = loc.getLinesOfCode(), linesOfComments = loc.getLinesOfComments(); + trapwriter.addTuple("numlines", fileLabel, numLines, linesOfCode, linesOfComments); + trapwriter.addTuple("filetype", fileLabel, fileType.toString()); + } metrics.stopPhase(ExtractionPhase.FileExtractor_extractContents); metrics.writeTimingsToTrap(trapwriter); successful = true; diff --git a/javascript/extractor/src/com/semmle/js/extractor/ParseResultInfo.java b/javascript/extractor/src/com/semmle/js/extractor/ParseResultInfo.java index 6a1b14447ce..28b412207d4 100644 --- a/javascript/extractor/src/com/semmle/js/extractor/ParseResultInfo.java +++ b/javascript/extractor/src/com/semmle/js/extractor/ParseResultInfo.java @@ -10,6 +10,7 @@ import java.util.List; public class ParseResultInfo { private int linesOfCode, linesOfComments; private List parseErrors; + private String skipReason; public ParseResultInfo(int linesOfCode, int linesOfComments, List parseErrors) { this.linesOfCode = linesOfCode; @@ -17,6 +18,19 @@ public class ParseResultInfo { this.parseErrors = new ArrayList<>(parseErrors); } + private ParseResultInfo() { + this.linesOfCode = 0; + this.linesOfComments = 0; + this.parseErrors = new ArrayList<>(); + this.skipReason = null; + } + + public static final ParseResultInfo skipped(String reason) { + ParseResultInfo info = new ParseResultInfo(); + info.skipReason = reason; + return info; + } + public void add(ParseResultInfo that) { this.linesOfCode += that.linesOfCode; this.linesOfComments += that.linesOfComments; @@ -41,4 +55,11 @@ public class ParseResultInfo { public List getParseErrors() { return parseErrors; } + + /** + * If extraction of this file was skipped, gets the reason for skipping it. + */ + public String getSkipReason() { + return skipReason; + } } diff --git a/javascript/extractor/src/com/semmle/js/extractor/ScriptExtractor.java b/javascript/extractor/src/com/semmle/js/extractor/ScriptExtractor.java index 7c539d70e63..6c9bfd2725c 100644 --- a/javascript/extractor/src/com/semmle/js/extractor/ScriptExtractor.java +++ b/javascript/extractor/src/com/semmle/js/extractor/ScriptExtractor.java @@ -38,10 +38,34 @@ public class ScriptExtractor implements IExtractor { return extension.equals(".cjs") || (extension.equals(".js") && "commonjs".equals(packageType)); } + private boolean isMinified(String source) { + // If the average line length is over 200 characters, consider the file minified. + int numberOfLineBreaks = 0; + for (int i = 0; i < source.length(); i++) { + char c = source.charAt(i); + if (c == '\n') { + numberOfLineBreaks++; + } else if (c == '\r') { + numberOfLineBreaks++; + if (i + 1 < source.length() && source.charAt(i + 1) == '\n') { + i++; // skip the next \n in case of \r\n + } + } + } + int averageLineLength = + numberOfLineBreaks == 0 ? source.length() : source.length() / numberOfLineBreaks; + return averageLineLength > 200; + } + @Override public ParseResultInfo extract(TextualExtractor textualExtractor) { LocationManager locationManager = textualExtractor.getLocationManager(); String source = textualExtractor.getSource(); + + if (isMinified(source)) { + return ParseResultInfo.skipped("File appears to be minified."); + } + String shebangLine = null, shebangLineTerm = null; if (source.startsWith("#!")) {