Merge pull request #998 from xiemaisi/js/autobuild-file-types

JavaScript: Make file types customisable in AutoBuild.
This commit is contained in:
Asger F
2019-02-28 15:26:35 +01:00
committed by GitHub
2 changed files with 152 additions and 22 deletions

View File

@@ -16,8 +16,10 @@ import java.nio.file.SimpleFileVisitor;
import java.nio.file.attribute.BasicFileAttributes;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.LinkedHashMap;
import java.util.LinkedHashSet;
import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.concurrent.ExecutorService;
import java.util.concurrent.Executors;
@@ -58,6 +60,25 @@ import com.semmle.util.trap.TrapWriter;
* </ul>
*
* <p>
* Additionally, the following environment variables may be set to customise extraction
* (explained in more detail below):
* </p>
*
* <ul>
* <li><code>LGTM_INDEX_INCLUDE</code>: a newline-separated list of paths to include</li>
* <li><code>LGTM_INDEX_EXCLUDE</code>: a newline-separated list of paths to exclude</li>
* <li><code>LGTM_REPOSITORY_FOLDERS_CSV</code>: the path of a CSV file containing file classifications</li>
* <li><code>LGTM_INDEX_FILTERS</code>: a newline-separated list of {@link ProjectLayout}-style
* patterns that can be used to refine the list of files to include and exclude</li>
* <li><code>LGTM_INDEX_TYPESCRIPT</code>: whether to extract TypeScript</li>
* <li><code>LGTM_INDEX_FILETYPES</code>: a newline-separated list of ".extension:filetype" pairs
* specifying which {@link FileType} to use for the given extension</li>
* <li><code>LGTM_INDEX_THREADS</code>: the maximum number of files to extract in parallel</li>
* <li><code>LGTM_TRAP_CACHE</code>: the path of a directory to use for trap caching</li>
* <li><code>LGTM_TRAP_CACHE_BOUND</code>: the size to bound the trap cache to</li>
</ul>
*
* <p>
* It extracts the following:
* </p>
*
@@ -143,6 +164,12 @@ import com.semmle.util.trap.TrapWriter;
* </p>
*
* <p>
* The environment variable <code>LGTM_INDEX_FILETYPES</code> may be set to a newline-separated
* list of file type specifications of the form <code>.extension:filetype</code>, causing all
* files whose name ends in <code>.extension</code> to also be included by default.
* </p>
*
* <p>
* The default exclusion patterns cause the following files to be excluded:
* </p>
* <ul>
@@ -157,6 +184,11 @@ import com.semmle.util.trap.TrapWriter;
* </p>
*
* <p>
* The file type as which a file is extracted can be customised via the <code>LGTM_INDEX_FILETYPES</code>
* environment variable explained above.
* </p>
*
* <p>
* Note that all these customisations only apply to <code>LGTM_SRC</code>. Extraction of
* externs is not customisable.
* </p>
@@ -176,6 +208,7 @@ import com.semmle.util.trap.TrapWriter;
public class AutoBuild {
private final ExtractorOutputConfig outputConfig;
private final ITrapCache trapCache;
private final Map<String, FileType> fileTypes = new LinkedHashMap<>();
private final Set<Path> includes = new LinkedHashSet<>();
private final Set<Path> excludes = new LinkedHashSet<>();
private ProjectLayout filters;
@@ -191,6 +224,7 @@ public class AutoBuild {
this.trapCache = mkTrapCache();
this.typeScriptMode = getEnumFromEnvVar("LGTM_INDEX_TYPESCRIPT", TypeScriptMode.class, TypeScriptMode.BASIC);
this.defaultEncoding = getEnvVar("LGTM_INDEX_DEFAULT_ENCODING");
setupFileTypes();
setupMatchers();
}
@@ -260,6 +294,25 @@ public class AutoBuild {
return trapCache;
}
private void setupFileTypes() {
for (String spec : Main.NEWLINE.split(getEnvVar("LGTM_INDEX_FILETYPES", ""))) {
spec = spec.trim();
if (spec.isEmpty())
continue;
String[] fields = spec.split(":");
if (fields.length != 2)
continue;
String extension = fields[0].trim();
String fileType = fields[1].trim();
try {
fileTypes.put(extension, FileType.valueOf(StringUtil.uc(fileType)));
} catch (IllegalArgumentException e) {
Exceptions.ignore(e, "We construct a better error message.");
throw new UserError("Invalid file type '" + fileType + "'.");
}
}
}
/**
* Set up include and exclude matchers based on environment variables.
*/
@@ -333,6 +386,10 @@ public class AutoBuild {
patterns.add("**/.eslintrc*");
patterns.add("**/package.json");
// include any explicitly specified extensions
for (String extension : fileTypes.keySet())
patterns.add("**/*" + extension);
// exclude files whose name strongly suggests they are minified
patterns.add("-**/*.min.js");
patterns.add("-**/*-min.js");
@@ -466,26 +523,46 @@ public class AutoBuild {
* Extract all supported candidate files that pass the filters.
*/
private void extractSource() throws IOException {
// default extractor
FileExtractor defaultExtractor = new FileExtractor(mkExtractorConfig(), outputConfig, trapCache);
// custom extractor for explicitly specified file types
Map<String, FileExtractor> customExtractors = new LinkedHashMap<>();
for (Map.Entry<String, FileType> spec : fileTypes.entrySet()) {
String extension = spec.getKey();
String fileType = spec.getValue().name();
ExtractorConfig extractorConfig = mkExtractorConfig().withFileType(fileType);
customExtractors.put(extension, new FileExtractor(extractorConfig, outputConfig, trapCache));
}
Set<Path> filesToExtract = new LinkedHashSet<>();
List<Path> tsconfigFiles = new ArrayList<>();
findFilesToExtract(defaultExtractor, filesToExtract, tsconfigFiles);
// extract TypeScript projects and files
Set<Path> extractedFiles = extractTypeScript(defaultExtractor, filesToExtract, tsconfigFiles);
// extract remaining files
for (Path f : filesToExtract) {
if (extractedFiles.add(f)) {
FileExtractor extractor = defaultExtractor;
if (!fileTypes.isEmpty()) {
String extension = FileUtil.extension(f);
if (customExtractors.containsKey(extension))
extractor = customExtractors.get(extension);
}
extract(extractor, f, null);
}
}
}
private ExtractorConfig mkExtractorConfig() {
ExtractorConfig config = new ExtractorConfig(true);
config = config.withSourceType(getSourceType());
config = config.withTypeScriptMode(typeScriptMode);
if (defaultEncoding != null)
config = config.withDefaultEncoding(defaultEncoding);
FileExtractor extractor = new FileExtractor(config, outputConfig, trapCache);
Set<Path> filesToExtract = new LinkedHashSet<>();
List<Path> tsconfigFiles = new ArrayList<>();
findFilesToExtract(extractor, filesToExtract, tsconfigFiles);
// extract TypeScript projects and files
Set<Path> extractedFiles = extractTypeScript(extractor, filesToExtract, tsconfigFiles);
// extract remaining files
for (Path f : filesToExtract) {
if (extractedFiles.add(f)) {
extract(extractor, f, null);
}
}
return config;
}
private Set<Path> extractTypeScript(FileExtractor extractor, Set<Path> files, List<Path> tsconfig) {
@@ -574,7 +651,11 @@ public class AutoBuild {
return FileVisitResult.SKIP_SUBTREE;
// extract files that are supported and pass the include/exclude patterns
if (extractor.supports(file.toFile()) && isFileIncluded(file)) {
boolean supported = extractor.supports(file.toFile());
if (!supported && !fileTypes.isEmpty()) {
supported = fileTypes.containsKey(FileUtil.extension(file));
}
if (supported && isFileIncluded(file)) {
filesToExtract.add(normalizePath(file));
}

View File

@@ -23,6 +23,7 @@ import org.junit.Test;
import com.semmle.js.extractor.AutoBuild;
import com.semmle.js.extractor.ExtractorState;
import com.semmle.js.extractor.FileExtractor;
import com.semmle.js.extractor.FileExtractor.FileType;
import com.semmle.util.data.StringUtil;
import com.semmle.util.exception.UserError;
import com.semmle.util.files.FileUtil8;
@@ -74,15 +75,31 @@ public class AutoBuildTests {
/**
* Add a file under {@code root} that we either do or don't expect to be extracted,
* depending on the value of {@code extracted}. If the file is expected to be
* extracted, its path is added to {@link #expected}.
* extracted, its path is added to {@link #expected}. If non-null, parameter
* {@code fileType} indicates the file type with which we expect the file to be extracted.
*/
private Path addFile(boolean extracted, FileType fileType, Path root, String... components) throws IOException {
Path f = addFile(root, components);
if (extracted) {
expected.add(f + (fileType == null ? "" : ":" + fileType.toString()));
}
return f;
}
/**
* Add a file with default file type; see {@link #addFile(boolean, FileType, Path, String...)}.
*/
private Path addFile(boolean extracted, Path root, String... components) throws IOException {
return addFile(extracted, null, root, components);
}
/**
* Create a file at the specified path under {@code root} and return it.
*/
private Path addFile(Path root, String... components) throws IOException {
Path p = Paths.get(root.toString(), components);
Files.createDirectories(p.getParent());
Path f = Files.createFile(p);
if (extracted)
expected.add(f.toString());
return f;
return Files.createFile(p);
}
/**
@@ -96,7 +113,10 @@ public class AutoBuildTests {
new AutoBuild() {
@Override
protected void extract(FileExtractor extractor, Path file, ExtractorState state) {
actual.add(file.toString());
String extracted = file.toString();
if (extractor.getConfig().hasFileType())
extracted += ":" + extractor.getFileType(file.toFile());
actual.add(extracted);
}
@Override
@@ -453,4 +473,33 @@ public class AutoBuildTests {
addFile(true, LGTM_SRC, "compute_min.js");
runTest();
}
@Test
public void customExtensions() throws IOException {
envVars.put("LGTM_INDEX_FILETYPES", ".jsm:js\n.soy:html");
addFile(true, FileType.JS, LGTM_SRC, "tst.jsm");
addFile(false, LGTM_SRC, "tstjsm");
addFile(true, FileType.HTML, LGTM_SRC, "tst.soy");
addFile(true, LGTM_SRC, "tst.html");
addFile(true, LGTM_SRC, "tst.js");
runTest();
}
@Test
public void overrideExtension() throws IOException {
envVars.put("LGTM_INDEX_FILETYPES", ".js:typescript");
addFile(true, FileType.TYPESCRIPT, LGTM_SRC, "tst.js");
runTest();
}
@Test
public void invalidFileType() throws IOException {
envVars.put("LGTM_INDEX_FILETYPES", ".jsm:javascript");
try {
runTest();
Assert.fail("expected UserError");
} catch (UserError ue) {
Assert.assertEquals("Invalid file type 'javascript'.", ue.getMessage());
}
}
}