JavaScript: Make file types customisable in AutoBuild.

Every once in a while we encounter projects using some custom file extension for files that we could in principle extract, but since the extractor doesn't know about the extension the files are skipped.

To handle this, the legacy extractor has a `--file-type` option that one can use to specify a file type to use for all files in that particular extraction. So far, `AutoBuild` has nothing of the sort.

This PR proposes to introduce an environment variable `LGTM_INDEX_FILETYPES` to allow a similar customisation. In the fullness of time, this variable would be set through `lgtm.yml` in the usual way, but for now it is undocumented and for internal use only.

Specifically, `LGTM_INDEX_FILETYPES` is a newline-separated list of ".extension:filetype" pairs, specifying that files with the given `.extension` should be extracted as type `filetype`, where
`filetype` is one of `js`, `html`, `json`, `typescript` or `yaml`.

For example, `.jsm:js` causes all `.jsm` files to be extracted as JavaScript.

This can also be used to override default file types: for example, by specifying `.js:typescript` all JavaScript files will be extracted as TypeScript.
This commit is contained in:
Max Schaefer
2019-02-27 12:02:01 +00:00
parent 2ed37903d8
commit 9d77619afc
2 changed files with 135 additions and 22 deletions

View File

@@ -16,8 +16,10 @@ import java.nio.file.SimpleFileVisitor;
import java.nio.file.attribute.BasicFileAttributes;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.LinkedHashMap;
import java.util.LinkedHashSet;
import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.concurrent.ExecutorService;
import java.util.concurrent.Executors;
@@ -69,6 +71,8 @@ import com.semmle.util.trap.TrapWriter;
* <li><code>LGTM_INDEX_FILTERS</code>: a newline-separated list of {@link ProjectLayout}-style
* patterns that can be used to refine the list of files to include and exclude</li>
* <li><code>LGTM_INDEX_TYPESCRIPT</code>: whether to extract TypeScript</li>
* <li><code>LGTM_INDEX_FILETYPES</code>: a newline-separated list of ".extension:filetype" pairs
* specifying which {@link FileType} to use for the given extension</li>
* <li><code>LGTM_INDEX_THREADS</code>: the maximum number of files to extract in parallel</li>
* <li><code>LGTM_TRAP_CACHE</code>: the path of a directory to use for trap caching</li>
* <li><code>LGTM_TRAP_CACHE_BOUND</code>: the size to bound the trap cache to</li>
@@ -160,6 +164,12 @@ import com.semmle.util.trap.TrapWriter;
* </p>
*
* <p>
* The environment variable <code>LGTM_INDEX_FILETYPES</code> may be set to a newline-separated
* list of file type specifications of the form <code>.extension:filetype</code>, causing all
* files whose name ends in <code>.extension</code> to also be included by default.
* </p>
*
* <p>
* The default exclusion patterns cause the following files to be excluded:
* </p>
* <ul>
@@ -174,6 +184,11 @@ import com.semmle.util.trap.TrapWriter;
* </p>
*
* <p>
* The file type as which a file is extracted can be customised via the <code>LGTM_INDEX_FILETYPES</code>
* environment variable explained above.
* </p>
*
* <p>
* Note that all these customisations only apply to <code>LGTM_SRC</code>. Extraction of
* externs is not customisable.
* </p>
@@ -193,6 +208,7 @@ import com.semmle.util.trap.TrapWriter;
public class AutoBuild {
private final ExtractorOutputConfig outputConfig;
private final ITrapCache trapCache;
private final Map<String, FileType> fileTypes = new LinkedHashMap<>();
private final Set<Path> includes = new LinkedHashSet<>();
private final Set<Path> excludes = new LinkedHashSet<>();
private ProjectLayout filters;
@@ -208,6 +224,7 @@ public class AutoBuild {
this.trapCache = mkTrapCache();
this.typeScriptMode = getEnumFromEnvVar("LGTM_INDEX_TYPESCRIPT", TypeScriptMode.class, TypeScriptMode.BASIC);
this.defaultEncoding = getEnvVar("LGTM_INDEX_DEFAULT_ENCODING");
setupFileTypes();
setupMatchers();
}
@@ -277,6 +294,25 @@ public class AutoBuild {
return trapCache;
}
private void setupFileTypes() {
for (String spec : Main.NEWLINE.split(getEnvVar("LGTM_INDEX_FILETYPES", ""))) {
spec = spec.trim();
if (spec.isEmpty())
continue;
String[] fields = spec.split(":");
if (fields.length != 2)
continue;
String extension = fields[0].trim();
String fileType = fields[1].trim();
try {
fileTypes.put(extension, FileType.valueOf(StringUtil.uc(fileType)));
} catch (IllegalArgumentException e) {
Exceptions.ignore(e, "We construct a better error message.");
throw new UserError("Invalid file type '" + fileType + "'.");
}
}
}
/**
* Set up include and exclude matchers based on environment variables.
*/
@@ -350,6 +386,10 @@ public class AutoBuild {
patterns.add("**/.eslintrc*");
patterns.add("**/package.json");
// include any explicitly specified extensions
for (String extension : fileTypes.keySet())
patterns.add("**/*" + extension);
// exclude files whose name strongly suggests they are minified
patterns.add("-**/*.min.js");
patterns.add("-**/*-min.js");
@@ -483,26 +523,46 @@ public class AutoBuild {
* Extract all supported candidate files that pass the filters.
*/
private void extractSource() throws IOException {
// default extractor
FileExtractor defaultExtractor = new FileExtractor(mkExtractorConfig(), outputConfig, trapCache);
// custom extractor for explicitly specified file types
Map<String, FileExtractor> customExtractors = new LinkedHashMap<>();
for (Map.Entry<String, FileType> spec : fileTypes.entrySet()) {
String extension = spec.getKey();
String fileType = spec.getValue().name();
ExtractorConfig extractorConfig = mkExtractorConfig().withFileType(fileType);
customExtractors.put(extension, new FileExtractor(extractorConfig, outputConfig, trapCache));
}
Set<Path> filesToExtract = new LinkedHashSet<>();
List<Path> tsconfigFiles = new ArrayList<>();
findFilesToExtract(defaultExtractor, filesToExtract, tsconfigFiles);
// extract TypeScript projects and files
Set<Path> extractedFiles = extractTypeScript(defaultExtractor, filesToExtract, tsconfigFiles);
// extract remaining files
for (Path f : filesToExtract) {
if (extractedFiles.add(f)) {
FileExtractor extractor = defaultExtractor;
if (!fileTypes.isEmpty()) {
String extension = FileUtil.extension(f);
if (customExtractors.containsKey(extension))
extractor = customExtractors.get(extension);
}
extract(extractor, f, null);
}
}
}
private ExtractorConfig mkExtractorConfig() {
ExtractorConfig config = new ExtractorConfig(true);
config = config.withSourceType(getSourceType());
config = config.withTypeScriptMode(typeScriptMode);
if (defaultEncoding != null)
config = config.withDefaultEncoding(defaultEncoding);
FileExtractor extractor = new FileExtractor(config, outputConfig, trapCache);
Set<Path> filesToExtract = new LinkedHashSet<>();
List<Path> tsconfigFiles = new ArrayList<>();
findFilesToExtract(extractor, filesToExtract, tsconfigFiles);
// extract TypeScript projects and files
Set<Path> extractedFiles = extractTypeScript(extractor, filesToExtract, tsconfigFiles);
// extract remaining files
for (Path f : filesToExtract) {
if (extractedFiles.add(f)) {
extract(extractor, f, null);
}
}
return config;
}
private Set<Path> extractTypeScript(FileExtractor extractor, Set<Path> files, List<Path> tsconfig) {
@@ -591,7 +651,11 @@ public class AutoBuild {
return FileVisitResult.SKIP_SUBTREE;
// extract files that are supported and pass the include/exclude patterns
if (extractor.supports(file.toFile()) && isFileIncluded(file)) {
boolean supported = extractor.supports(file.toFile());
if (!supported && !fileTypes.isEmpty()) {
supported = fileTypes.containsKey(FileUtil.extension(file));
}
if (supported && isFileIncluded(file)) {
filesToExtract.add(normalizePath(file));
}

View File

@@ -23,6 +23,7 @@ import org.junit.Test;
import com.semmle.js.extractor.AutoBuild;
import com.semmle.js.extractor.ExtractorState;
import com.semmle.js.extractor.FileExtractor;
import com.semmle.js.extractor.FileExtractor.FileType;
import com.semmle.util.data.StringUtil;
import com.semmle.util.exception.UserError;
import com.semmle.util.files.FileUtil8;
@@ -74,15 +75,31 @@ public class AutoBuildTests {
/**
* Add a file under {@code root} that we either do or don't expect to be extracted,
* depending on the value of {@code extracted}. If the file is expected to be
* extracted, its path is added to {@link #expected}.
* extracted, its path is added to {@link #expected}. If non-null, parameter
* {@code fileType} indicates the file type with which we expect the file to be extracted.
*/
private Path addFile(boolean extracted, FileType fileType, Path root, String... components) throws IOException {
Path f = addFile(root, components);
if (extracted) {
expected.add(f + (fileType == null ? "" : ":" + fileType.toString()));
}
return f;
}
/**
* Add a file with default file type; see {@link #addFile(boolean, FileType, Path, String...)}.
*/
private Path addFile(boolean extracted, Path root, String... components) throws IOException {
return addFile(extracted, null, root, components);
}
/**
* Create a file at the specified path under {@code root} and return it.
*/
private Path addFile(Path root, String... components) throws IOException {
Path p = Paths.get(root.toString(), components);
Files.createDirectories(p.getParent());
Path f = Files.createFile(p);
if (extracted)
expected.add(f.toString());
return f;
return Files.createFile(p);
}
/**
@@ -96,7 +113,10 @@ public class AutoBuildTests {
new AutoBuild() {
@Override
protected void extract(FileExtractor extractor, Path file, ExtractorState state) {
actual.add(file.toString());
String extracted = file.toString();
if (extractor.getConfig().hasFileType())
extracted += ":" + extractor.getFileType(file.toFile());
actual.add(extracted);
}
@Override
@@ -453,4 +473,33 @@ public class AutoBuildTests {
addFile(true, LGTM_SRC, "compute_min.js");
runTest();
}
@Test
public void customExtensions() throws IOException {
envVars.put("LGTM_INDEX_FILETYPES", ".jsm:js\n.soy:html");
addFile(true, FileType.JS, LGTM_SRC, "tst.jsm");
addFile(false, LGTM_SRC, "tstjsm");
addFile(true, FileType.HTML, LGTM_SRC, "tst.soy");
addFile(true, LGTM_SRC, "tst.html");
addFile(true, LGTM_SRC, "tst.js");
runTest();
}
@Test
public void overrideExtension() throws IOException {
envVars.put("LGTM_INDEX_FILETYPES", ".js:typescript");
addFile(true, FileType.TYPESCRIPT, LGTM_SRC, "tst.js");
runTest();
}
@Test
public void invalidFileType() throws IOException {
envVars.put("LGTM_INDEX_FILETYPES", ".jsm:javascript");
try {
runTest();
Assert.fail("expected UserError");
} catch (UserError ue) {
Assert.assertEquals("Invalid file type 'javascript'.", ue.getMessage());
}
}
}