diff --git a/javascript/extractor/src/com/semmle/js/extractor/AutoBuild.java b/javascript/extractor/src/com/semmle/js/extractor/AutoBuild.java
index 762df7fe8e5..71bb515ec54 100644
--- a/javascript/extractor/src/com/semmle/js/extractor/AutoBuild.java
+++ b/javascript/extractor/src/com/semmle/js/extractor/AutoBuild.java
@@ -16,8 +16,10 @@ import java.nio.file.SimpleFileVisitor;
import java.nio.file.attribute.BasicFileAttributes;
import java.util.ArrayList;
import java.util.Arrays;
+import java.util.LinkedHashMap;
import java.util.LinkedHashSet;
import java.util.List;
+import java.util.Map;
import java.util.Set;
import java.util.concurrent.ExecutorService;
import java.util.concurrent.Executors;
@@ -69,6 +71,8 @@ import com.semmle.util.trap.TrapWriter;
*
LGTM_INDEX_FILTERS: a newline-separated list of {@link ProjectLayout}-style
* patterns that can be used to refine the list of files to include and exclude
* LGTM_INDEX_TYPESCRIPT: whether to extract TypeScript
+ * LGTM_INDEX_FILETYPES: a newline-separated list of ".extension:filetype" pairs
+ * specifying which {@link FileType} to use for the given extension
* LGTM_INDEX_THREADS: the maximum number of files to extract in parallel
* LGTM_TRAP_CACHE: the path of a directory to use for trap caching
* LGTM_TRAP_CACHE_BOUND: the size to bound the trap cache to
@@ -160,6 +164,12 @@ import com.semmle.util.trap.TrapWriter;
*
*
*
+ * The environment variable LGTM_INDEX_FILETYPES may be set to a newline-separated
+ * list of file type specifications of the form .extension:filetype, causing all
+ * files whose name ends in .extension to also be included by default.
+ *
+ *
+ *
* The default exclusion patterns cause the following files to be excluded:
*
*
@@ -174,6 +184,11 @@ import com.semmle.util.trap.TrapWriter;
*
*
*
+ * The file type as which a file is extracted can be customised via the LGTM_INDEX_FILETYPES
+ * environment variable explained above.
+ *
+ *
+ *
* Note that all these customisations only apply to LGTM_SRC. Extraction of
* externs is not customisable.
*
@@ -193,6 +208,7 @@ import com.semmle.util.trap.TrapWriter;
public class AutoBuild {
private final ExtractorOutputConfig outputConfig;
private final ITrapCache trapCache;
+ private final Map fileTypes = new LinkedHashMap<>();
private final Set includes = new LinkedHashSet<>();
private final Set excludes = new LinkedHashSet<>();
private ProjectLayout filters;
@@ -208,6 +224,7 @@ public class AutoBuild {
this.trapCache = mkTrapCache();
this.typeScriptMode = getEnumFromEnvVar("LGTM_INDEX_TYPESCRIPT", TypeScriptMode.class, TypeScriptMode.BASIC);
this.defaultEncoding = getEnvVar("LGTM_INDEX_DEFAULT_ENCODING");
+ setupFileTypes();
setupMatchers();
}
@@ -277,6 +294,25 @@ public class AutoBuild {
return trapCache;
}
+ private void setupFileTypes() {
+ for (String spec : Main.NEWLINE.split(getEnvVar("LGTM_INDEX_FILETYPES", ""))) {
+ spec = spec.trim();
+ if (spec.isEmpty())
+ continue;
+ String[] fields = spec.split(":");
+ if (fields.length != 2)
+ continue;
+ String extension = fields[0].trim();
+ String fileType = fields[1].trim();
+ try {
+ fileTypes.put(extension, FileType.valueOf(StringUtil.uc(fileType)));
+ } catch (IllegalArgumentException e) {
+ Exceptions.ignore(e, "We construct a better error message.");
+ throw new UserError("Invalid file type '" + fileType + "'.");
+ }
+ }
+ }
+
/**
* Set up include and exclude matchers based on environment variables.
*/
@@ -350,6 +386,10 @@ public class AutoBuild {
patterns.add("**/.eslintrc*");
patterns.add("**/package.json");
+ // include any explicitly specified extensions
+ for (String extension : fileTypes.keySet())
+ patterns.add("**/*" + extension);
+
// exclude files whose name strongly suggests they are minified
patterns.add("-**/*.min.js");
patterns.add("-**/*-min.js");
@@ -483,26 +523,46 @@ public class AutoBuild {
* Extract all supported candidate files that pass the filters.
*/
private void extractSource() throws IOException {
+ // default extractor
+ FileExtractor defaultExtractor = new FileExtractor(mkExtractorConfig(), outputConfig, trapCache);
+
+ // custom extractor for explicitly specified file types
+ Map customExtractors = new LinkedHashMap<>();
+ for (Map.Entry spec : fileTypes.entrySet()) {
+ String extension = spec.getKey();
+ String fileType = spec.getValue().name();
+ ExtractorConfig extractorConfig = mkExtractorConfig().withFileType(fileType);
+ customExtractors.put(extension, new FileExtractor(extractorConfig, outputConfig, trapCache));
+ }
+
+ Set filesToExtract = new LinkedHashSet<>();
+ List tsconfigFiles = new ArrayList<>();
+ findFilesToExtract(defaultExtractor, filesToExtract, tsconfigFiles);
+
+ // extract TypeScript projects and files
+ Set extractedFiles = extractTypeScript(defaultExtractor, filesToExtract, tsconfigFiles);
+
+ // extract remaining files
+ for (Path f : filesToExtract) {
+ if (extractedFiles.add(f)) {
+ FileExtractor extractor = defaultExtractor;
+ if (!fileTypes.isEmpty()) {
+ String extension = FileUtil.extension(f);
+ if (customExtractors.containsKey(extension))
+ extractor = customExtractors.get(extension);
+ }
+ extract(extractor, f, null);
+ }
+ }
+ }
+
+ private ExtractorConfig mkExtractorConfig() {
ExtractorConfig config = new ExtractorConfig(true);
config = config.withSourceType(getSourceType());
config = config.withTypeScriptMode(typeScriptMode);
if (defaultEncoding != null)
config = config.withDefaultEncoding(defaultEncoding);
- FileExtractor extractor = new FileExtractor(config, outputConfig, trapCache);
-
- Set filesToExtract = new LinkedHashSet<>();
- List tsconfigFiles = new ArrayList<>();
- findFilesToExtract(extractor, filesToExtract, tsconfigFiles);
-
- // extract TypeScript projects and files
- Set extractedFiles = extractTypeScript(extractor, filesToExtract, tsconfigFiles);
-
- // extract remaining files
- for (Path f : filesToExtract) {
- if (extractedFiles.add(f)) {
- extract(extractor, f, null);
- }
- }
+ return config;
}
private Set extractTypeScript(FileExtractor extractor, Set files, List tsconfig) {
@@ -591,7 +651,11 @@ public class AutoBuild {
return FileVisitResult.SKIP_SUBTREE;
// extract files that are supported and pass the include/exclude patterns
- if (extractor.supports(file.toFile()) && isFileIncluded(file)) {
+ boolean supported = extractor.supports(file.toFile());
+ if (!supported && !fileTypes.isEmpty()) {
+ supported = fileTypes.containsKey(FileUtil.extension(file));
+ }
+ if (supported && isFileIncluded(file)) {
filesToExtract.add(normalizePath(file));
}
diff --git a/javascript/extractor/src/com/semmle/js/extractor/test/AutoBuildTests.java b/javascript/extractor/src/com/semmle/js/extractor/test/AutoBuildTests.java
index c2c156ee003..77b3e511960 100644
--- a/javascript/extractor/src/com/semmle/js/extractor/test/AutoBuildTests.java
+++ b/javascript/extractor/src/com/semmle/js/extractor/test/AutoBuildTests.java
@@ -23,6 +23,7 @@ import org.junit.Test;
import com.semmle.js.extractor.AutoBuild;
import com.semmle.js.extractor.ExtractorState;
import com.semmle.js.extractor.FileExtractor;
+import com.semmle.js.extractor.FileExtractor.FileType;
import com.semmle.util.data.StringUtil;
import com.semmle.util.exception.UserError;
import com.semmle.util.files.FileUtil8;
@@ -74,15 +75,31 @@ public class AutoBuildTests {
/**
* Add a file under {@code root} that we either do or don't expect to be extracted,
* depending on the value of {@code extracted}. If the file is expected to be
- * extracted, its path is added to {@link #expected}.
+ * extracted, its path is added to {@link #expected}. If non-null, parameter
+ * {@code fileType} indicates the file type with which we expect the file to be extracted.
+ */
+ private Path addFile(boolean extracted, FileType fileType, Path root, String... components) throws IOException {
+ Path f = addFile(root, components);
+ if (extracted) {
+ expected.add(f + (fileType == null ? "" : ":" + fileType.toString()));
+ }
+ return f;
+ }
+
+ /**
+ * Add a file with default file type; see {@link #addFile(boolean, FileType, Path, String...)}.
*/
private Path addFile(boolean extracted, Path root, String... components) throws IOException {
+ return addFile(extracted, null, root, components);
+ }
+
+ /**
+ * Create a file at the specified path under {@code root} and return it.
+ */
+ private Path addFile(Path root, String... components) throws IOException {
Path p = Paths.get(root.toString(), components);
Files.createDirectories(p.getParent());
- Path f = Files.createFile(p);
- if (extracted)
- expected.add(f.toString());
- return f;
+ return Files.createFile(p);
}
/**
@@ -96,7 +113,10 @@ public class AutoBuildTests {
new AutoBuild() {
@Override
protected void extract(FileExtractor extractor, Path file, ExtractorState state) {
- actual.add(file.toString());
+ String extracted = file.toString();
+ if (extractor.getConfig().hasFileType())
+ extracted += ":" + extractor.getFileType(file.toFile());
+ actual.add(extracted);
}
@Override
@@ -453,4 +473,33 @@ public class AutoBuildTests {
addFile(true, LGTM_SRC, "compute_min.js");
runTest();
}
+
+ @Test
+ public void customExtensions() throws IOException {
+ envVars.put("LGTM_INDEX_FILETYPES", ".jsm:js\n.soy:html");
+ addFile(true, FileType.JS, LGTM_SRC, "tst.jsm");
+ addFile(false, LGTM_SRC, "tstjsm");
+ addFile(true, FileType.HTML, LGTM_SRC, "tst.soy");
+ addFile(true, LGTM_SRC, "tst.html");
+ addFile(true, LGTM_SRC, "tst.js");
+ runTest();
+ }
+
+ @Test
+ public void overrideExtension() throws IOException {
+ envVars.put("LGTM_INDEX_FILETYPES", ".js:typescript");
+ addFile(true, FileType.TYPESCRIPT, LGTM_SRC, "tst.js");
+ runTest();
+ }
+
+ @Test
+ public void invalidFileType() throws IOException {
+ envVars.put("LGTM_INDEX_FILETYPES", ".jsm:javascript");
+ try {
+ runTest();
+ Assert.fail("expected UserError");
+ } catch (UserError ue) {
+ Assert.assertEquals("Invalid file type 'javascript'.", ue.getMessage());
+ }
+ }
}