From 9d77619afc9364957dc69a1480a97b30f7e5bb2e Mon Sep 17 00:00:00 2001
From: Max Schaefer
Date: Wed, 27 Feb 2019 12:02:01 +0000
Subject: [PATCH] JavaScript: Make file types customisable in AutoBuild.
Every once in a while we encounter projects using some custom file extension for files that we could in principle extract, but since the extractor doesn't know about the extension the files are skipped.
To handle this, the legacy extractor has a `--file-type` option that one can use to specify a file type to use for all files in that particular extraction. So far, `AutoBuild` has nothing of the sort.
This PR proposes to introduce an environment variable `LGTM_INDEX_FILETYPES` to allow a similar customisation. In the fullness of time, this variable would be set through `lgtm.yml` in the usual way, but for now it is undocumented and for internal use only.
Specifically, `LGTM_INDEX_FILETYPES` is a newline-separated list of ".extension:filetype" pairs, specifying that files with the given `.extension` should be extracted as type `filetype`, where
`filetype` is one of `js`, `html`, `json`, `typescript` or `yaml`.
For example, `.jsm:js` causes all `.jsm` files to be extracted as JavaScript.
This can also be used to override default file types: for example, by specifying `.js:typescript` all JavaScript files will be extracted as TypeScript.
---
.../com/semmle/js/extractor/AutoBuild.java | 96 +++++++++++++++----
.../js/extractor/test/AutoBuildTests.java | 61 ++++++++++--
2 files changed, 135 insertions(+), 22 deletions(-)
diff --git a/javascript/extractor/src/com/semmle/js/extractor/AutoBuild.java b/javascript/extractor/src/com/semmle/js/extractor/AutoBuild.java
index 762df7fe8e5..71bb515ec54 100644
--- a/javascript/extractor/src/com/semmle/js/extractor/AutoBuild.java
+++ b/javascript/extractor/src/com/semmle/js/extractor/AutoBuild.java
@@ -16,8 +16,10 @@ import java.nio.file.SimpleFileVisitor;
import java.nio.file.attribute.BasicFileAttributes;
import java.util.ArrayList;
import java.util.Arrays;
+import java.util.LinkedHashMap;
import java.util.LinkedHashSet;
import java.util.List;
+import java.util.Map;
import java.util.Set;
import java.util.concurrent.ExecutorService;
import java.util.concurrent.Executors;
@@ -69,6 +71,8 @@ import com.semmle.util.trap.TrapWriter;
* LGTM_INDEX_FILTERS: a newline-separated list of {@link ProjectLayout}-style
* patterns that can be used to refine the list of files to include and exclude
* LGTM_INDEX_TYPESCRIPT: whether to extract TypeScript
+ * LGTM_INDEX_FILETYPES: a newline-separated list of ".extension:filetype" pairs
+ * specifying which {@link FileType} to use for the given extension
* LGTM_INDEX_THREADS: the maximum number of files to extract in parallel
* LGTM_TRAP_CACHE: the path of a directory to use for trap caching
* LGTM_TRAP_CACHE_BOUND: the size to bound the trap cache to
@@ -160,6 +164,12 @@ import com.semmle.util.trap.TrapWriter;
*
*
*
+ * The environment variable LGTM_INDEX_FILETYPES may be set to a newline-separated
+ * list of file type specifications of the form .extension:filetype, causing all
+ * files whose name ends in .extension to also be included by default.
+ *
+ *
+ *
* The default exclusion patterns cause the following files to be excluded:
*
*
@@ -174,6 +184,11 @@ import com.semmle.util.trap.TrapWriter;
*
*
*
+ * The file type as which a file is extracted can be customised via the LGTM_INDEX_FILETYPES
+ * environment variable explained above.
+ *
+ *
+ *
* Note that all these customisations only apply to LGTM_SRC. Extraction of
* externs is not customisable.
*
@@ -193,6 +208,7 @@ import com.semmle.util.trap.TrapWriter;
public class AutoBuild {
private final ExtractorOutputConfig outputConfig;
private final ITrapCache trapCache;
+ private final Map fileTypes = new LinkedHashMap<>();
private final Set includes = new LinkedHashSet<>();
private final Set excludes = new LinkedHashSet<>();
private ProjectLayout filters;
@@ -208,6 +224,7 @@ public class AutoBuild {
this.trapCache = mkTrapCache();
this.typeScriptMode = getEnumFromEnvVar("LGTM_INDEX_TYPESCRIPT", TypeScriptMode.class, TypeScriptMode.BASIC);
this.defaultEncoding = getEnvVar("LGTM_INDEX_DEFAULT_ENCODING");
+ setupFileTypes();
setupMatchers();
}
@@ -277,6 +294,25 @@ public class AutoBuild {
return trapCache;
}
+ private void setupFileTypes() {
+ for (String spec : Main.NEWLINE.split(getEnvVar("LGTM_INDEX_FILETYPES", ""))) {
+ spec = spec.trim();
+ if (spec.isEmpty())
+ continue;
+ String[] fields = spec.split(":");
+ if (fields.length != 2)
+ continue;
+ String extension = fields[0].trim();
+ String fileType = fields[1].trim();
+ try {
+ fileTypes.put(extension, FileType.valueOf(StringUtil.uc(fileType)));
+ } catch (IllegalArgumentException e) {
+ Exceptions.ignore(e, "We construct a better error message.");
+ throw new UserError("Invalid file type '" + fileType + "'.");
+ }
+ }
+ }
+
/**
* Set up include and exclude matchers based on environment variables.
*/
@@ -350,6 +386,10 @@ public class AutoBuild {
patterns.add("**/.eslintrc*");
patterns.add("**/package.json");
+ // include any explicitly specified extensions
+ for (String extension : fileTypes.keySet())
+ patterns.add("**/*" + extension);
+
// exclude files whose name strongly suggests they are minified
patterns.add("-**/*.min.js");
patterns.add("-**/*-min.js");
@@ -483,26 +523,46 @@ public class AutoBuild {
* Extract all supported candidate files that pass the filters.
*/
private void extractSource() throws IOException {
+ // default extractor
+ FileExtractor defaultExtractor = new FileExtractor(mkExtractorConfig(), outputConfig, trapCache);
+
+ // custom extractor for explicitly specified file types
+ Map customExtractors = new LinkedHashMap<>();
+ for (Map.Entry spec : fileTypes.entrySet()) {
+ String extension = spec.getKey();
+ String fileType = spec.getValue().name();
+ ExtractorConfig extractorConfig = mkExtractorConfig().withFileType(fileType);
+ customExtractors.put(extension, new FileExtractor(extractorConfig, outputConfig, trapCache));
+ }
+
+ Set filesToExtract = new LinkedHashSet<>();
+ List tsconfigFiles = new ArrayList<>();
+ findFilesToExtract(defaultExtractor, filesToExtract, tsconfigFiles);
+
+ // extract TypeScript projects and files
+ Set extractedFiles = extractTypeScript(defaultExtractor, filesToExtract, tsconfigFiles);
+
+ // extract remaining files
+ for (Path f : filesToExtract) {
+ if (extractedFiles.add(f)) {
+ FileExtractor extractor = defaultExtractor;
+ if (!fileTypes.isEmpty()) {
+ String extension = FileUtil.extension(f);
+ if (customExtractors.containsKey(extension))
+ extractor = customExtractors.get(extension);
+ }
+ extract(extractor, f, null);
+ }
+ }
+ }
+
+ private ExtractorConfig mkExtractorConfig() {
ExtractorConfig config = new ExtractorConfig(true);
config = config.withSourceType(getSourceType());
config = config.withTypeScriptMode(typeScriptMode);
if (defaultEncoding != null)
config = config.withDefaultEncoding(defaultEncoding);
- FileExtractor extractor = new FileExtractor(config, outputConfig, trapCache);
-
- Set filesToExtract = new LinkedHashSet<>();
- List tsconfigFiles = new ArrayList<>();
- findFilesToExtract(extractor, filesToExtract, tsconfigFiles);
-
- // extract TypeScript projects and files
- Set extractedFiles = extractTypeScript(extractor, filesToExtract, tsconfigFiles);
-
- // extract remaining files
- for (Path f : filesToExtract) {
- if (extractedFiles.add(f)) {
- extract(extractor, f, null);
- }
- }
+ return config;
}
private Set extractTypeScript(FileExtractor extractor, Set files, List tsconfig) {
@@ -591,7 +651,11 @@ public class AutoBuild {
return FileVisitResult.SKIP_SUBTREE;
// extract files that are supported and pass the include/exclude patterns
- if (extractor.supports(file.toFile()) && isFileIncluded(file)) {
+ boolean supported = extractor.supports(file.toFile());
+ if (!supported && !fileTypes.isEmpty()) {
+ supported = fileTypes.containsKey(FileUtil.extension(file));
+ }
+ if (supported && isFileIncluded(file)) {
filesToExtract.add(normalizePath(file));
}
diff --git a/javascript/extractor/src/com/semmle/js/extractor/test/AutoBuildTests.java b/javascript/extractor/src/com/semmle/js/extractor/test/AutoBuildTests.java
index c2c156ee003..77b3e511960 100644
--- a/javascript/extractor/src/com/semmle/js/extractor/test/AutoBuildTests.java
+++ b/javascript/extractor/src/com/semmle/js/extractor/test/AutoBuildTests.java
@@ -23,6 +23,7 @@ import org.junit.Test;
import com.semmle.js.extractor.AutoBuild;
import com.semmle.js.extractor.ExtractorState;
import com.semmle.js.extractor.FileExtractor;
+import com.semmle.js.extractor.FileExtractor.FileType;
import com.semmle.util.data.StringUtil;
import com.semmle.util.exception.UserError;
import com.semmle.util.files.FileUtil8;
@@ -74,15 +75,31 @@ public class AutoBuildTests {
/**
* Add a file under {@code root} that we either do or don't expect to be extracted,
* depending on the value of {@code extracted}. If the file is expected to be
- * extracted, its path is added to {@link #expected}.
+ * extracted, its path is added to {@link #expected}. If non-null, parameter
+ * {@code fileType} indicates the file type with which we expect the file to be extracted.
+ */
+ private Path addFile(boolean extracted, FileType fileType, Path root, String... components) throws IOException {
+ Path f = addFile(root, components);
+ if (extracted) {
+ expected.add(f + (fileType == null ? "" : ":" + fileType.toString()));
+ }
+ return f;
+ }
+
+ /**
+ * Add a file with default file type; see {@link #addFile(boolean, FileType, Path, String...)}.
*/
private Path addFile(boolean extracted, Path root, String... components) throws IOException {
+ return addFile(extracted, null, root, components);
+ }
+
+ /**
+ * Create a file at the specified path under {@code root} and return it.
+ */
+ private Path addFile(Path root, String... components) throws IOException {
Path p = Paths.get(root.toString(), components);
Files.createDirectories(p.getParent());
- Path f = Files.createFile(p);
- if (extracted)
- expected.add(f.toString());
- return f;
+ return Files.createFile(p);
}
/**
@@ -96,7 +113,10 @@ public class AutoBuildTests {
new AutoBuild() {
@Override
protected void extract(FileExtractor extractor, Path file, ExtractorState state) {
- actual.add(file.toString());
+ String extracted = file.toString();
+ if (extractor.getConfig().hasFileType())
+ extracted += ":" + extractor.getFileType(file.toFile());
+ actual.add(extracted);
}
@Override
@@ -453,4 +473,33 @@ public class AutoBuildTests {
addFile(true, LGTM_SRC, "compute_min.js");
runTest();
}
+
+ @Test
+ public void customExtensions() throws IOException {
+ envVars.put("LGTM_INDEX_FILETYPES", ".jsm:js\n.soy:html");
+ addFile(true, FileType.JS, LGTM_SRC, "tst.jsm");
+ addFile(false, LGTM_SRC, "tstjsm");
+ addFile(true, FileType.HTML, LGTM_SRC, "tst.soy");
+ addFile(true, LGTM_SRC, "tst.html");
+ addFile(true, LGTM_SRC, "tst.js");
+ runTest();
+ }
+
+ @Test
+ public void overrideExtension() throws IOException {
+ envVars.put("LGTM_INDEX_FILETYPES", ".js:typescript");
+ addFile(true, FileType.TYPESCRIPT, LGTM_SRC, "tst.js");
+ runTest();
+ }
+
+ @Test
+ public void invalidFileType() throws IOException {
+ envVars.put("LGTM_INDEX_FILETYPES", ".jsm:javascript");
+ try {
+ runTest();
+ Assert.fail("expected UserError");
+ } catch (UserError ue) {
+ Assert.assertEquals("Invalid file type 'javascript'.", ue.getMessage());
+ }
+ }
}