Merge pull request #19680 from github/tausbn/javascript-exclude-obviously-generated-files

JavaScript: Don't extract obviously generated files
This commit is contained in:
Taus
2025-06-20 15:52:39 +02:00
committed by GitHub
5 changed files with 104 additions and 4 deletions

View File

@@ -39,6 +39,8 @@ import java.util.stream.Stream;
import com.google.gson.Gson;
import com.google.gson.JsonParseException;
import com.semmle.js.extractor.tsconfig.TsConfigJson;
import com.semmle.js.extractor.tsconfig.CompilerOptions;
import com.semmle.js.dependencies.AsyncFetcher;
import com.semmle.js.dependencies.DependencyResolver;
import com.semmle.js.dependencies.packument.PackageJson;
@@ -745,6 +747,26 @@ public class AutoBuild {
.filter(p -> !isFileTooLarge(p))
.sorted(PATH_ORDERING)
.collect(Collectors.toCollection(() -> new LinkedHashSet<>()));
// gather all output directories specified in tsconfig.json files
final List<Path> outDirs = new ArrayList<>();
for (Path cfg : tsconfigFiles) {
try {
String txt = new WholeIO().read(cfg);
TsConfigJson root = new Gson().fromJson(txt, TsConfigJson.class);
if (root != null && root.getCompilerOptions() != null) {
if (root.getCompilerOptions().getOutDir() == null) {
// no outDir specified, so skip this tsconfig.json
continue;
}
Path odir = cfg.getParent().resolve(root.getCompilerOptions().getOutDir()).toAbsolutePath().normalize();
outDirs.add(odir);
}
} catch (Exception e) {
// ignore malformed tsconfig or missing fields
}
}
// exclude files in output directories as configured in tsconfig.json
filesToExtract.removeIf(f -> outDirs.stream().anyMatch(od -> f.startsWith(od)));
DependencyInstallationResult dependencyInstallationResult = DependencyInstallationResult.empty;
if (!tsconfigFiles.isEmpty()) {
@@ -796,9 +818,19 @@ public class AutoBuild {
*/
private boolean isFileDerivedFromTypeScriptFile(Path path, Set<Path> extractedFiles) {
String name = path.getFileName().toString();
if (!name.endsWith(".js"))
// only skip JS variants when a corresponding TS/TSX file was already extracted
if (!(name.endsWith(".js")
|| name.endsWith(".cjs")
|| name.endsWith(".mjs")
|| name.endsWith(".jsx")
|| name.endsWith(".cjsx")
|| name.endsWith(".mjsx"))) {
return false;
String stem = name.substring(0, name.length() - ".js".length());
}
// strip off extension
int dot = name.lastIndexOf('.');
String stem = dot != -1 ? name.substring(0, dot) : name;
// if a TS/TSX file with same base name was extracted, skip this file
for (String ext : FileType.TYPESCRIPT.getExtensions()) {
if (extractedFiles.contains(path.getParent().resolve(stem + ext))) {
return true;
@@ -1154,7 +1186,7 @@ protected DependencyInstallationResult preparePackagesAndDependencies(Set<Path>
}
// extract TypeScript projects from 'tsconfig.json'
if (typeScriptMode == TypeScriptMode.FULL
if (typeScriptMode != TypeScriptMode.NONE
&& treatAsTSConfig(file.getFileName().toString())
&& !excludes.contains(file)
&& isFileIncluded(file)) {

View File

@@ -0,0 +1,13 @@
package com.semmle.js.extractor.tsconfig;
public class CompilerOptions {
private String outDir;
public String getOutDir() {
return outDir;
}
public void setOutDir(String outDir) {
this.outDir = outDir;
}
}

View File

@@ -0,0 +1,13 @@
package com.semmle.js.extractor.tsconfig;
public class TsConfigJson {
private CompilerOptions compilerOptions;
public CompilerOptions getCompilerOptions() {
return compilerOptions;
}
public void setCompilerOptions(CompilerOptions compilerOptions) {
this.compilerOptions = compilerOptions;
}
}

View File

@@ -135,6 +135,7 @@ public class AutoBuildTests {
FileExtractors extractors) {
for (Path f : files) {
actual.add(f.toString());
extractedFiles.add(f);
}
}
@@ -175,7 +176,7 @@ public class AutoBuildTests {
@Test
public void basicTest() throws IOException {
addFile(true, LGTM_SRC, "tst.js");
addFile(false, LGTM_SRC, "tst.js");
addFile(true, LGTM_SRC, "tst.ts");
addFile(true, LGTM_SRC, "tst.html");
addFile(true, LGTM_SRC, "tst.xsjs");
@@ -203,6 +204,43 @@ public class AutoBuildTests {
runTest();
}
@Test
public void skipJsFilesDerivedFromTypeScriptFiles() throws IOException {
// JS-derived files (.js, .cjs, .mjs, .jsx, .cjsx, .mjsx) should be skipped when TS indexing
envVars.put("LGTM_INDEX_TYPESCRIPT", "basic");
// Add TypeScript sources
addFile(true, LGTM_SRC, "foo.ts");
addFile(true, LGTM_SRC, "bar.tsx");
// Add derived JS variants (should be skipped)
addFile(false, LGTM_SRC, "foo.js");
addFile(false, LGTM_SRC, "bar.jsx");
addFile(false, LGTM_SRC, "foo.cjs");
addFile(false, LGTM_SRC, "foo.mjs");
addFile(false, LGTM_SRC, "bar.cjsx");
addFile(false, LGTM_SRC, "bar.mjsx");
// A normal JS file without TS counterpart should be extracted
addFile(true, LGTM_SRC, "normal.js");
runTest();
}
@Test
public void skipFilesInTsconfigOutDir() throws IOException {
envVars.put("LGTM_INDEX_TYPESCRIPT", "basic");
// Files under outDir in tsconfig.json should be excluded
// Create tsconfig.json with outDir set to "dist"
addFile(true, LGTM_SRC, "tsconfig.json");
Path config = Paths.get(LGTM_SRC.toString(), "tsconfig.json");
Files.write(config,
"{\"compilerOptions\":{\"outDir\":\"dist\"}}".getBytes(StandardCharsets.UTF_8));
// Add files outside outDir (should be extracted)
addFile(true, LGTM_SRC, "src", "app.ts");
addFile(true, LGTM_SRC, "main.js");
// Add files under dist/outDir (should be skipped)
addFile(false, LGTM_SRC, "dist", "generated.js");
addFile(false, LGTM_SRC, "dist", "sub", "x.js");
runTest();
}
@Test
public void includeFile() throws IOException {
envVars.put("LGTM_INDEX_INCLUDE", "tst.js");