mirror of
https://github.com/github/codeql.git
synced 2026-02-12 05:01:06 +01:00
Merge pull request #20940 from asgerf/js/detect-minified-files
JS: Skip minified file if avg line length > 200
This commit is contained in:
@@ -408,8 +408,10 @@ public class AutoBuild {
|
||||
for (String extension : fileTypes.keySet()) patterns.add("**/*" + extension);
|
||||
|
||||
// exclude files whose name strongly suggests they are minified
|
||||
patterns.add("-**/*.min.js");
|
||||
patterns.add("-**/*-min.js");
|
||||
if (!EnvironmentVariables.allowMinifiedFiles()) {
|
||||
patterns.add("-**/*.min.js");
|
||||
patterns.add("-**/*-min.js");
|
||||
}
|
||||
|
||||
// exclude `node_modules` and `bower_components`
|
||||
patterns.add("-**/node_modules");
|
||||
@@ -1074,6 +1076,7 @@ protected DependencyInstallationResult preparePackagesAndDependencies(Set<Path>
|
||||
config = config.withSourceType(getSourceType());
|
||||
config = config.withVirtualSourceRoot(virtualSourceRoot);
|
||||
if (defaultEncoding != null) config = config.withDefaultEncoding(defaultEncoding);
|
||||
config = config.withAllowMinified(EnvironmentVariables.allowMinifiedFiles());
|
||||
return config;
|
||||
}
|
||||
|
||||
|
||||
@@ -101,4 +101,12 @@ public class EnvironmentVariables {
|
||||
public static boolean isActionsExtractor() {
|
||||
return Env.systemEnv().getNonEmpty(CODEQL_EXTRACTOR_ACTIONS_WIP_DATABASE_ENV_VAR) != null;
|
||||
}
|
||||
|
||||
public static boolean allowMinifiedFiles() {
|
||||
String env = Env.systemEnv().getNonEmpty("CODEQL_EXTRACTOR_JAVASCRIPT_ALLOW_MINIFIED_FILES");
|
||||
if (env == null) {
|
||||
return false; // default is to not allow minified files
|
||||
}
|
||||
return Boolean.parseBoolean(env);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -205,6 +205,9 @@ public class ExtractorConfig {
|
||||
/** Should parse errors be reported as violations instead of aborting extraction? */
|
||||
private boolean tolerateParseErrors;
|
||||
|
||||
/** Should minified files be allowed? */
|
||||
private boolean allowMinified;
|
||||
|
||||
/** How should HTML files be extracted? */
|
||||
private HtmlPopulator.Config htmlHandling;
|
||||
|
||||
@@ -236,6 +239,7 @@ public class ExtractorConfig {
|
||||
this.sourceType = SourceType.AUTO;
|
||||
this.htmlHandling = HtmlPopulator.Config.ELEMENTS;
|
||||
this.tolerateParseErrors = true;
|
||||
this.allowMinified = false;
|
||||
if (experimental) {
|
||||
this.mozExtensions = true;
|
||||
this.jscript = true;
|
||||
@@ -258,6 +262,7 @@ public class ExtractorConfig {
|
||||
this.v8Extensions = that.v8Extensions;
|
||||
this.e4x = that.e4x;
|
||||
this.tolerateParseErrors = that.tolerateParseErrors;
|
||||
this.allowMinified = that.allowMinified;
|
||||
this.fileType = that.fileType;
|
||||
this.sourceType = that.sourceType;
|
||||
this.htmlHandling = that.htmlHandling;
|
||||
@@ -357,6 +362,16 @@ public class ExtractorConfig {
|
||||
return res;
|
||||
}
|
||||
|
||||
public boolean isAllowMinified() {
|
||||
return allowMinified;
|
||||
}
|
||||
|
||||
public ExtractorConfig withAllowMinified(boolean allowMinified) {
|
||||
ExtractorConfig res = new ExtractorConfig(this);
|
||||
res.allowMinified = allowMinified;
|
||||
return res;
|
||||
}
|
||||
|
||||
public boolean hasFileType() {
|
||||
return fileType != null;
|
||||
}
|
||||
@@ -467,6 +482,8 @@ public class ExtractorConfig {
|
||||
+ e4x
|
||||
+ ", tolerateParseErrors="
|
||||
+ tolerateParseErrors
|
||||
+ ", allowMinified="
|
||||
+ allowMinified
|
||||
+ ", htmlHandling="
|
||||
+ htmlHandling
|
||||
+ ", fileType="
|
||||
|
||||
@@ -549,10 +549,15 @@ public class FileExtractor {
|
||||
new TextualExtractor(
|
||||
trapwriter, locationManager, source, config.getExtractLines(), metrics, extractedFile);
|
||||
ParseResultInfo loc = extractor.extract(textualExtractor);
|
||||
int numLines = textualExtractor.isSnippet() ? 0 : textualExtractor.getNumLines();
|
||||
int linesOfCode = loc.getLinesOfCode(), linesOfComments = loc.getLinesOfComments();
|
||||
trapwriter.addTuple("numlines", fileLabel, numLines, linesOfCode, linesOfComments);
|
||||
trapwriter.addTuple("filetype", fileLabel, fileType.toString());
|
||||
if (loc.getSkipReason() != null) {
|
||||
System.err.println("Skipping file " + extractedFile + ": " + loc.getSkipReason());
|
||||
System.err.flush();
|
||||
} else {
|
||||
int numLines = textualExtractor.isSnippet() ? 0 : textualExtractor.getNumLines();
|
||||
int linesOfCode = loc.getLinesOfCode(), linesOfComments = loc.getLinesOfComments();
|
||||
trapwriter.addTuple("numlines", fileLabel, numLines, linesOfCode, linesOfComments);
|
||||
trapwriter.addTuple("filetype", fileLabel, fileType.toString());
|
||||
}
|
||||
metrics.stopPhase(ExtractionPhase.FileExtractor_extractContents);
|
||||
metrics.writeTimingsToTrap(trapwriter);
|
||||
successful = true;
|
||||
|
||||
@@ -10,6 +10,7 @@ import java.util.List;
|
||||
public class ParseResultInfo {
|
||||
private int linesOfCode, linesOfComments;
|
||||
private List<ParseError> parseErrors;
|
||||
private String skipReason;
|
||||
|
||||
public ParseResultInfo(int linesOfCode, int linesOfComments, List<ParseError> parseErrors) {
|
||||
this.linesOfCode = linesOfCode;
|
||||
@@ -17,6 +18,19 @@ public class ParseResultInfo {
|
||||
this.parseErrors = new ArrayList<>(parseErrors);
|
||||
}
|
||||
|
||||
private ParseResultInfo() {
|
||||
this.linesOfCode = 0;
|
||||
this.linesOfComments = 0;
|
||||
this.parseErrors = new ArrayList<>();
|
||||
this.skipReason = null;
|
||||
}
|
||||
|
||||
public static final ParseResultInfo skipped(String reason) {
|
||||
ParseResultInfo info = new ParseResultInfo();
|
||||
info.skipReason = reason;
|
||||
return info;
|
||||
}
|
||||
|
||||
public void add(ParseResultInfo that) {
|
||||
this.linesOfCode += that.linesOfCode;
|
||||
this.linesOfComments += that.linesOfComments;
|
||||
@@ -41,4 +55,11 @@ public class ParseResultInfo {
|
||||
public List<ParseError> getParseErrors() {
|
||||
return parseErrors;
|
||||
}
|
||||
|
||||
/**
|
||||
* If extraction of this file was skipped, gets the reason for skipping it.
|
||||
*/
|
||||
public String getSkipReason() {
|
||||
return skipReason;
|
||||
}
|
||||
}
|
||||
|
||||
@@ -38,10 +38,34 @@ public class ScriptExtractor implements IExtractor {
|
||||
return extension.equals(".cjs") || (extension.equals(".js") && "commonjs".equals(packageType));
|
||||
}
|
||||
|
||||
private boolean isMinified(String source) {
|
||||
// If the average line length is over 200 characters, consider the file minified.
|
||||
int numberOfLineBreaks = 0;
|
||||
for (int i = 0; i < source.length(); i++) {
|
||||
char c = source.charAt(i);
|
||||
if (c == '\n') {
|
||||
numberOfLineBreaks++;
|
||||
} else if (c == '\r') {
|
||||
numberOfLineBreaks++;
|
||||
if (i + 1 < source.length() && source.charAt(i + 1) == '\n') {
|
||||
i++; // skip the next \n in case of \r\n
|
||||
}
|
||||
}
|
||||
}
|
||||
int averageLineLength =
|
||||
numberOfLineBreaks == 0 ? source.length() : source.length() / numberOfLineBreaks;
|
||||
return averageLineLength > 200;
|
||||
}
|
||||
|
||||
@Override
|
||||
public ParseResultInfo extract(TextualExtractor textualExtractor) {
|
||||
LocationManager locationManager = textualExtractor.getLocationManager();
|
||||
String source = textualExtractor.getSource();
|
||||
|
||||
if (!config.isAllowMinified() && isMinified(source)) {
|
||||
return ParseResultInfo.skipped("File appears to be minified.");
|
||||
}
|
||||
|
||||
String shebangLine = null, shebangLineTerm = null;
|
||||
|
||||
if (source.startsWith("#!")) {
|
||||
|
||||
@@ -0,0 +1,6 @@
|
||||
---
|
||||
category: majorAnalysis
|
||||
---
|
||||
* JavaScript files with an average line length greater than 200 are now considered minified and will no longer be analyzed.
|
||||
For use-cases where minified files should be analyzed, the original behavior can be restored by setting the environment variable
|
||||
`CODEQL_EXTRACTOR_JAVASCRIPT_ALLOW_MINIFIED_FILES=true`.
|
||||
@@ -132,3 +132,103 @@
|
||||
/^(([0-9])|([0-1][0-9])|([2][0-3])):?([0-5][0-9])$/g;
|
||||
/^[\w-\.]+@([\w-]+\.)+[\w-]{2,3}$/g;
|
||||
/(((0[1-9]|[12][0-9]|3[01])([/])(0[13578]|10|12)([/])(\d{4}))|(([0][1-9]|[12][0-9]|30)([/])(0[469]|11)([/])(\d{4}))|((0[1-9]|1[0-9]|2[0-8])([/])(02)([/])(\d{4}))|((29)(\.|-|\/)(02)([/])([02468][048]00))|((29)([/])(02)([/])([13579][26]00))|((29)([/])(02)([/])([0-9][0-9][0][48]))|((29)([/])(02)([/])([0-9][0-9][2468][048]))|((29)([/])(02)([/])([0-9][0-9][13579][26])))/g;
|
||||
//
|
||||
// Add some empty lines to lower the average line length so the file is not classified as minified.
|
||||
//
|
||||
//
|
||||
//
|
||||
//
|
||||
//
|
||||
//
|
||||
//
|
||||
//
|
||||
//
|
||||
//
|
||||
//
|
||||
//
|
||||
//
|
||||
//
|
||||
//
|
||||
//
|
||||
//
|
||||
//
|
||||
//
|
||||
//
|
||||
//
|
||||
//
|
||||
//
|
||||
//
|
||||
//
|
||||
//
|
||||
//
|
||||
//
|
||||
//
|
||||
//
|
||||
//
|
||||
//
|
||||
//
|
||||
//
|
||||
//
|
||||
//
|
||||
//
|
||||
//
|
||||
//
|
||||
//
|
||||
//
|
||||
//
|
||||
//
|
||||
//
|
||||
//
|
||||
//
|
||||
//
|
||||
//
|
||||
//
|
||||
//
|
||||
//
|
||||
//
|
||||
//
|
||||
//
|
||||
//
|
||||
//
|
||||
//
|
||||
//
|
||||
//
|
||||
//
|
||||
//
|
||||
//
|
||||
//
|
||||
//
|
||||
//
|
||||
//
|
||||
//
|
||||
//
|
||||
//
|
||||
//
|
||||
//
|
||||
//
|
||||
//
|
||||
//
|
||||
//
|
||||
//
|
||||
//
|
||||
//
|
||||
//
|
||||
//
|
||||
//
|
||||
//
|
||||
//
|
||||
//
|
||||
//
|
||||
//
|
||||
//
|
||||
//
|
||||
//
|
||||
//
|
||||
//
|
||||
//
|
||||
//
|
||||
//
|
||||
//
|
||||
//
|
||||
//
|
||||
//
|
||||
|
||||
@@ -17,7 +17,6 @@
|
||||
| jquery-datatables.js:0:0:0:0 | jquery-datatables.js | library |
|
||||
| jquery-jstree.js:0:0:0:0 | jquery-jstree.js | library |
|
||||
| jquery-snippet.js:0:0:0:0 | jquery-snippet.js | library |
|
||||
| json-like.js:0:0:0:0 | json-like.js | generated |
|
||||
| jsx-old.js:0:0:0:0 | jsx-old.js | generated |
|
||||
| jsx.js:0:0:0:0 | jsx.js | generated |
|
||||
| multi-part-bundle.html:0:0:0:0 | multi-part-bundle.html | generated |
|
||||
|
||||
Reference in New Issue
Block a user