Merge pull request #20940 from asgerf/js/detect-minified-files

JS: Skip minified file if avg line length > 200
This commit is contained in:
Asger F
2026-01-19 14:31:09 +01:00
committed by GitHub
9 changed files with 190 additions and 7 deletions

View File

@@ -408,8 +408,10 @@ public class AutoBuild {
for (String extension : fileTypes.keySet()) patterns.add("**/*" + extension);
// exclude files whose name strongly suggests they are minified
patterns.add("-**/*.min.js");
patterns.add("-**/*-min.js");
if (!EnvironmentVariables.allowMinifiedFiles()) {
patterns.add("-**/*.min.js");
patterns.add("-**/*-min.js");
}
// exclude `node_modules` and `bower_components`
patterns.add("-**/node_modules");
@@ -1074,6 +1076,7 @@ protected DependencyInstallationResult preparePackagesAndDependencies(Set<Path>
config = config.withSourceType(getSourceType());
config = config.withVirtualSourceRoot(virtualSourceRoot);
if (defaultEncoding != null) config = config.withDefaultEncoding(defaultEncoding);
config = config.withAllowMinified(EnvironmentVariables.allowMinifiedFiles());
return config;
}

View File

@@ -101,4 +101,12 @@ public class EnvironmentVariables {
public static boolean isActionsExtractor() {
return Env.systemEnv().getNonEmpty(CODEQL_EXTRACTOR_ACTIONS_WIP_DATABASE_ENV_VAR) != null;
}
public static boolean allowMinifiedFiles() {
String env = Env.systemEnv().getNonEmpty("CODEQL_EXTRACTOR_JAVASCRIPT_ALLOW_MINIFIED_FILES");
if (env == null) {
return false; // default is to not allow minified files
}
return Boolean.parseBoolean(env);
}
}

View File

@@ -205,6 +205,9 @@ public class ExtractorConfig {
/** Should parse errors be reported as violations instead of aborting extraction? */
private boolean tolerateParseErrors;
/** Should minified files be allowed? */
private boolean allowMinified;
/** How should HTML files be extracted? */
private HtmlPopulator.Config htmlHandling;
@@ -236,6 +239,7 @@ public class ExtractorConfig {
this.sourceType = SourceType.AUTO;
this.htmlHandling = HtmlPopulator.Config.ELEMENTS;
this.tolerateParseErrors = true;
this.allowMinified = false;
if (experimental) {
this.mozExtensions = true;
this.jscript = true;
@@ -258,6 +262,7 @@ public class ExtractorConfig {
this.v8Extensions = that.v8Extensions;
this.e4x = that.e4x;
this.tolerateParseErrors = that.tolerateParseErrors;
this.allowMinified = that.allowMinified;
this.fileType = that.fileType;
this.sourceType = that.sourceType;
this.htmlHandling = that.htmlHandling;
@@ -357,6 +362,16 @@ public class ExtractorConfig {
return res;
}
public boolean isAllowMinified() {
return allowMinified;
}
public ExtractorConfig withAllowMinified(boolean allowMinified) {
ExtractorConfig res = new ExtractorConfig(this);
res.allowMinified = allowMinified;
return res;
}
public boolean hasFileType() {
return fileType != null;
}
@@ -467,6 +482,8 @@ public class ExtractorConfig {
+ e4x
+ ", tolerateParseErrors="
+ tolerateParseErrors
+ ", allowMinified="
+ allowMinified
+ ", htmlHandling="
+ htmlHandling
+ ", fileType="

View File

@@ -549,10 +549,15 @@ public class FileExtractor {
new TextualExtractor(
trapwriter, locationManager, source, config.getExtractLines(), metrics, extractedFile);
ParseResultInfo loc = extractor.extract(textualExtractor);
int numLines = textualExtractor.isSnippet() ? 0 : textualExtractor.getNumLines();
int linesOfCode = loc.getLinesOfCode(), linesOfComments = loc.getLinesOfComments();
trapwriter.addTuple("numlines", fileLabel, numLines, linesOfCode, linesOfComments);
trapwriter.addTuple("filetype", fileLabel, fileType.toString());
if (loc.getSkipReason() != null) {
System.err.println("Skipping file " + extractedFile + ": " + loc.getSkipReason());
System.err.flush();
} else {
int numLines = textualExtractor.isSnippet() ? 0 : textualExtractor.getNumLines();
int linesOfCode = loc.getLinesOfCode(), linesOfComments = loc.getLinesOfComments();
trapwriter.addTuple("numlines", fileLabel, numLines, linesOfCode, linesOfComments);
trapwriter.addTuple("filetype", fileLabel, fileType.toString());
}
metrics.stopPhase(ExtractionPhase.FileExtractor_extractContents);
metrics.writeTimingsToTrap(trapwriter);
successful = true;

View File

@@ -10,6 +10,7 @@ import java.util.List;
public class ParseResultInfo {
private int linesOfCode, linesOfComments;
private List<ParseError> parseErrors;
private String skipReason;
public ParseResultInfo(int linesOfCode, int linesOfComments, List<ParseError> parseErrors) {
this.linesOfCode = linesOfCode;
@@ -17,6 +18,19 @@ public class ParseResultInfo {
this.parseErrors = new ArrayList<>(parseErrors);
}
private ParseResultInfo() {
this.linesOfCode = 0;
this.linesOfComments = 0;
this.parseErrors = new ArrayList<>();
this.skipReason = null;
}
public static final ParseResultInfo skipped(String reason) {
ParseResultInfo info = new ParseResultInfo();
info.skipReason = reason;
return info;
}
public void add(ParseResultInfo that) {
this.linesOfCode += that.linesOfCode;
this.linesOfComments += that.linesOfComments;
@@ -41,4 +55,11 @@ public class ParseResultInfo {
public List<ParseError> getParseErrors() {
return parseErrors;
}
/**
* If extraction of this file was skipped, gets the reason for skipping it.
*/
public String getSkipReason() {
return skipReason;
}
}

View File

@@ -38,10 +38,34 @@ public class ScriptExtractor implements IExtractor {
return extension.equals(".cjs") || (extension.equals(".js") && "commonjs".equals(packageType));
}
private boolean isMinified(String source) {
// If the average line length is over 200 characters, consider the file minified.
int numberOfLineBreaks = 0;
for (int i = 0; i < source.length(); i++) {
char c = source.charAt(i);
if (c == '\n') {
numberOfLineBreaks++;
} else if (c == '\r') {
numberOfLineBreaks++;
if (i + 1 < source.length() && source.charAt(i + 1) == '\n') {
i++; // skip the next \n in case of \r\n
}
}
}
int averageLineLength =
numberOfLineBreaks == 0 ? source.length() : source.length() / numberOfLineBreaks;
return averageLineLength > 200;
}
@Override
public ParseResultInfo extract(TextualExtractor textualExtractor) {
LocationManager locationManager = textualExtractor.getLocationManager();
String source = textualExtractor.getSource();
if (!config.isAllowMinified() && isMinified(source)) {
return ParseResultInfo.skipped("File appears to be minified.");
}
String shebangLine = null, shebangLineTerm = null;
if (source.startsWith("#!")) {

View File

@@ -0,0 +1,6 @@
---
category: majorAnalysis
---
* JavaScript files with an average line length greater than 200 are now considered minified and will no longer be analyzed.
For use-cases where minified files should be analyzed, the original behavior can be restored by setting the environment variable
`CODEQL_EXTRACTOR_JAVASCRIPT_ALLOW_MINIFIED_FILES=true`.

View File

@@ -132,3 +132,103 @@
/^(([0-9])|([0-1][0-9])|([2][0-3])):?([0-5][0-9])$/g;
/^[\w-\.]+@([\w-]+\.)+[\w-]{2,3}$/g;
/(((0[1-9]|[12][0-9]|3[01])([/])(0[13578]|10|12)([/])(\d{4}))|(([0][1-9]|[12][0-9]|30)([/])(0[469]|11)([/])(\d{4}))|((0[1-9]|1[0-9]|2[0-8])([/])(02)([/])(\d{4}))|((29)(\.|-|\/)(02)([/])([02468][048]00))|((29)([/])(02)([/])([13579][26]00))|((29)([/])(02)([/])([0-9][0-9][0][48]))|((29)([/])(02)([/])([0-9][0-9][2468][048]))|((29)([/])(02)([/])([0-9][0-9][13579][26])))/g;
//
// Add some empty lines to lower the average line length so the file is not classified as minified.
//
//
//
//
//
//
//
//
//
//
//
//
//
//
//
//
//
//
//
//
//
//
//
//
//
//
//
//
//
//
//
//
//
//
//
//
//
//
//
//
//
//
//
//
//
//
//
//
//
//
//
//
//
//
//
//
//
//
//
//
//
//
//
//
//
//
//
//
//
//
//
//
//
//
//
//
//
//
//
//
//
//
//
//
//
//
//
//
//
//
//
//
//
//
//
//
//
//

View File

@@ -17,7 +17,6 @@
| jquery-datatables.js:0:0:0:0 | jquery-datatables.js | library |
| jquery-jstree.js:0:0:0:0 | jquery-jstree.js | library |
| jquery-snippet.js:0:0:0:0 | jquery-snippet.js | library |
| json-like.js:0:0:0:0 | json-like.js | generated |
| jsx-old.js:0:0:0:0 | jsx-old.js | generated |
| jsx.js:0:0:0:0 | jsx.js | generated |
| multi-part-bundle.html:0:0:0:0 | multi-part-bundle.html | generated |