mirror of
https://github.com/github/codeql.git
synced 2026-01-08 12:10:22 +01:00
JavaScript: Make file types customisable in AutoBuild.
Every once in a while we encounter projects using some custom file extension for files that we could in principle extract, but since the extractor doesn't know about the extension the files are skipped. To handle this, the legacy extractor has a `--file-type` option that one can use to specify a file type to use for all files in that particular extraction. So far, `AutoBuild` has nothing of the sort. This PR proposes to introduce an environment variable `LGTM_INDEX_FILETYPES` to allow a similar customisation. In the fullness of time, this variable would be set through `lgtm.yml` in the usual way, but for now it is undocumented and for internal use only. Specifically, `LGTM_INDEX_FILETYPES` is a newline-separated list of ".extension:filetype" pairs, specifying that files with the given `.extension` should be extracted as type `filetype`, where `filetype` is one of `js`, `html`, `json`, `typescript` or `yaml`. For example, `.jsm:js` causes all `.jsm` files to be extracted as JavaScript. This can also be used to override default file types: for example, by specifying `.js:typescript` all JavaScript files will be extracted as TypeScript.
This commit is contained in:
@@ -16,8 +16,10 @@ import java.nio.file.SimpleFileVisitor;
|
||||
import java.nio.file.attribute.BasicFileAttributes;
|
||||
import java.util.ArrayList;
|
||||
import java.util.Arrays;
|
||||
import java.util.LinkedHashMap;
|
||||
import java.util.LinkedHashSet;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
import java.util.Set;
|
||||
import java.util.concurrent.ExecutorService;
|
||||
import java.util.concurrent.Executors;
|
||||
@@ -69,6 +71,8 @@ import com.semmle.util.trap.TrapWriter;
|
||||
* <li><code>LGTM_INDEX_FILTERS</code>: a newline-separated list of {@link ProjectLayout}-style
|
||||
* patterns that can be used to refine the list of files to include and exclude</li>
|
||||
* <li><code>LGTM_INDEX_TYPESCRIPT</code>: whether to extract TypeScript</li>
|
||||
* <li><code>LGTM_INDEX_FILETYPES</code>: a newline-separated list of ".extension:filetype" pairs
|
||||
* specifying which {@link FileType} to use for the given extension</li>
|
||||
* <li><code>LGTM_INDEX_THREADS</code>: the maximum number of files to extract in parallel</li>
|
||||
* <li><code>LGTM_TRAP_CACHE</code>: the path of a directory to use for trap caching</li>
|
||||
* <li><code>LGTM_TRAP_CACHE_BOUND</code>: the size to bound the trap cache to</li>
|
||||
@@ -160,6 +164,12 @@ import com.semmle.util.trap.TrapWriter;
|
||||
* </p>
|
||||
*
|
||||
* <p>
|
||||
* The environment variable <code>LGTM_INDEX_FILETYPES</code> may be set to a newline-separated
|
||||
* list of file type specifications of the form <code>.extension:filetype</code>, causing all
|
||||
* files whose name ends in <code>.extension</code> to also be included by default.
|
||||
* </p>
|
||||
*
|
||||
* <p>
|
||||
* The default exclusion patterns cause the following files to be excluded:
|
||||
* </p>
|
||||
* <ul>
|
||||
@@ -174,6 +184,11 @@ import com.semmle.util.trap.TrapWriter;
|
||||
* </p>
|
||||
*
|
||||
* <p>
|
||||
* The file type as which a file is extracted can be customised via the <code>LGTM_INDEX_FILETYPES</code>
|
||||
* environment variable explained above.
|
||||
* </p>
|
||||
*
|
||||
* <p>
|
||||
* Note that all these customisations only apply to <code>LGTM_SRC</code>. Extraction of
|
||||
* externs is not customisable.
|
||||
* </p>
|
||||
@@ -193,6 +208,7 @@ import com.semmle.util.trap.TrapWriter;
|
||||
public class AutoBuild {
|
||||
private final ExtractorOutputConfig outputConfig;
|
||||
private final ITrapCache trapCache;
|
||||
private final Map<String, FileType> fileTypes = new LinkedHashMap<>();
|
||||
private final Set<Path> includes = new LinkedHashSet<>();
|
||||
private final Set<Path> excludes = new LinkedHashSet<>();
|
||||
private ProjectLayout filters;
|
||||
@@ -208,6 +224,7 @@ public class AutoBuild {
|
||||
this.trapCache = mkTrapCache();
|
||||
this.typeScriptMode = getEnumFromEnvVar("LGTM_INDEX_TYPESCRIPT", TypeScriptMode.class, TypeScriptMode.BASIC);
|
||||
this.defaultEncoding = getEnvVar("LGTM_INDEX_DEFAULT_ENCODING");
|
||||
setupFileTypes();
|
||||
setupMatchers();
|
||||
}
|
||||
|
||||
@@ -277,6 +294,25 @@ public class AutoBuild {
|
||||
return trapCache;
|
||||
}
|
||||
|
||||
private void setupFileTypes() {
|
||||
for (String spec : Main.NEWLINE.split(getEnvVar("LGTM_INDEX_FILETYPES", ""))) {
|
||||
spec = spec.trim();
|
||||
if (spec.isEmpty())
|
||||
continue;
|
||||
String[] fields = spec.split(":");
|
||||
if (fields.length != 2)
|
||||
continue;
|
||||
String extension = fields[0].trim();
|
||||
String fileType = fields[1].trim();
|
||||
try {
|
||||
fileTypes.put(extension, FileType.valueOf(StringUtil.uc(fileType)));
|
||||
} catch (IllegalArgumentException e) {
|
||||
Exceptions.ignore(e, "We construct a better error message.");
|
||||
throw new UserError("Invalid file type '" + fileType + "'.");
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Set up include and exclude matchers based on environment variables.
|
||||
*/
|
||||
@@ -350,6 +386,10 @@ public class AutoBuild {
|
||||
patterns.add("**/.eslintrc*");
|
||||
patterns.add("**/package.json");
|
||||
|
||||
// include any explicitly specified extensions
|
||||
for (String extension : fileTypes.keySet())
|
||||
patterns.add("**/*" + extension);
|
||||
|
||||
// exclude files whose name strongly suggests they are minified
|
||||
patterns.add("-**/*.min.js");
|
||||
patterns.add("-**/*-min.js");
|
||||
@@ -483,26 +523,46 @@ public class AutoBuild {
|
||||
* Extract all supported candidate files that pass the filters.
|
||||
*/
|
||||
private void extractSource() throws IOException {
|
||||
// default extractor
|
||||
FileExtractor defaultExtractor = new FileExtractor(mkExtractorConfig(), outputConfig, trapCache);
|
||||
|
||||
// custom extractor for explicitly specified file types
|
||||
Map<String, FileExtractor> customExtractors = new LinkedHashMap<>();
|
||||
for (Map.Entry<String, FileType> spec : fileTypes.entrySet()) {
|
||||
String extension = spec.getKey();
|
||||
String fileType = spec.getValue().name();
|
||||
ExtractorConfig extractorConfig = mkExtractorConfig().withFileType(fileType);
|
||||
customExtractors.put(extension, new FileExtractor(extractorConfig, outputConfig, trapCache));
|
||||
}
|
||||
|
||||
Set<Path> filesToExtract = new LinkedHashSet<>();
|
||||
List<Path> tsconfigFiles = new ArrayList<>();
|
||||
findFilesToExtract(defaultExtractor, filesToExtract, tsconfigFiles);
|
||||
|
||||
// extract TypeScript projects and files
|
||||
Set<Path> extractedFiles = extractTypeScript(defaultExtractor, filesToExtract, tsconfigFiles);
|
||||
|
||||
// extract remaining files
|
||||
for (Path f : filesToExtract) {
|
||||
if (extractedFiles.add(f)) {
|
||||
FileExtractor extractor = defaultExtractor;
|
||||
if (!fileTypes.isEmpty()) {
|
||||
String extension = FileUtil.extension(f);
|
||||
if (customExtractors.containsKey(extension))
|
||||
extractor = customExtractors.get(extension);
|
||||
}
|
||||
extract(extractor, f, null);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
private ExtractorConfig mkExtractorConfig() {
|
||||
ExtractorConfig config = new ExtractorConfig(true);
|
||||
config = config.withSourceType(getSourceType());
|
||||
config = config.withTypeScriptMode(typeScriptMode);
|
||||
if (defaultEncoding != null)
|
||||
config = config.withDefaultEncoding(defaultEncoding);
|
||||
FileExtractor extractor = new FileExtractor(config, outputConfig, trapCache);
|
||||
|
||||
Set<Path> filesToExtract = new LinkedHashSet<>();
|
||||
List<Path> tsconfigFiles = new ArrayList<>();
|
||||
findFilesToExtract(extractor, filesToExtract, tsconfigFiles);
|
||||
|
||||
// extract TypeScript projects and files
|
||||
Set<Path> extractedFiles = extractTypeScript(extractor, filesToExtract, tsconfigFiles);
|
||||
|
||||
// extract remaining files
|
||||
for (Path f : filesToExtract) {
|
||||
if (extractedFiles.add(f)) {
|
||||
extract(extractor, f, null);
|
||||
}
|
||||
}
|
||||
return config;
|
||||
}
|
||||
|
||||
private Set<Path> extractTypeScript(FileExtractor extractor, Set<Path> files, List<Path> tsconfig) {
|
||||
@@ -591,7 +651,11 @@ public class AutoBuild {
|
||||
return FileVisitResult.SKIP_SUBTREE;
|
||||
|
||||
// extract files that are supported and pass the include/exclude patterns
|
||||
if (extractor.supports(file.toFile()) && isFileIncluded(file)) {
|
||||
boolean supported = extractor.supports(file.toFile());
|
||||
if (!supported && !fileTypes.isEmpty()) {
|
||||
supported = fileTypes.containsKey(FileUtil.extension(file));
|
||||
}
|
||||
if (supported && isFileIncluded(file)) {
|
||||
filesToExtract.add(normalizePath(file));
|
||||
}
|
||||
|
||||
|
||||
@@ -23,6 +23,7 @@ import org.junit.Test;
|
||||
import com.semmle.js.extractor.AutoBuild;
|
||||
import com.semmle.js.extractor.ExtractorState;
|
||||
import com.semmle.js.extractor.FileExtractor;
|
||||
import com.semmle.js.extractor.FileExtractor.FileType;
|
||||
import com.semmle.util.data.StringUtil;
|
||||
import com.semmle.util.exception.UserError;
|
||||
import com.semmle.util.files.FileUtil8;
|
||||
@@ -74,15 +75,31 @@ public class AutoBuildTests {
|
||||
/**
|
||||
* Add a file under {@code root} that we either do or don't expect to be extracted,
|
||||
* depending on the value of {@code extracted}. If the file is expected to be
|
||||
* extracted, its path is added to {@link #expected}.
|
||||
* extracted, its path is added to {@link #expected}. If non-null, parameter
|
||||
* {@code fileType} indicates the file type with which we expect the file to be extracted.
|
||||
*/
|
||||
private Path addFile(boolean extracted, FileType fileType, Path root, String... components) throws IOException {
|
||||
Path f = addFile(root, components);
|
||||
if (extracted) {
|
||||
expected.add(f + (fileType == null ? "" : ":" + fileType.toString()));
|
||||
}
|
||||
return f;
|
||||
}
|
||||
|
||||
/**
|
||||
* Add a file with default file type; see {@link #addFile(boolean, FileType, Path, String...)}.
|
||||
*/
|
||||
private Path addFile(boolean extracted, Path root, String... components) throws IOException {
|
||||
return addFile(extracted, null, root, components);
|
||||
}
|
||||
|
||||
/**
|
||||
* Create a file at the specified path under {@code root} and return it.
|
||||
*/
|
||||
private Path addFile(Path root, String... components) throws IOException {
|
||||
Path p = Paths.get(root.toString(), components);
|
||||
Files.createDirectories(p.getParent());
|
||||
Path f = Files.createFile(p);
|
||||
if (extracted)
|
||||
expected.add(f.toString());
|
||||
return f;
|
||||
return Files.createFile(p);
|
||||
}
|
||||
|
||||
/**
|
||||
@@ -96,7 +113,10 @@ public class AutoBuildTests {
|
||||
new AutoBuild() {
|
||||
@Override
|
||||
protected void extract(FileExtractor extractor, Path file, ExtractorState state) {
|
||||
actual.add(file.toString());
|
||||
String extracted = file.toString();
|
||||
if (extractor.getConfig().hasFileType())
|
||||
extracted += ":" + extractor.getFileType(file.toFile());
|
||||
actual.add(extracted);
|
||||
}
|
||||
|
||||
@Override
|
||||
@@ -453,4 +473,33 @@ public class AutoBuildTests {
|
||||
addFile(true, LGTM_SRC, "compute_min.js");
|
||||
runTest();
|
||||
}
|
||||
|
||||
@Test
|
||||
public void customExtensions() throws IOException {
|
||||
envVars.put("LGTM_INDEX_FILETYPES", ".jsm:js\n.soy:html");
|
||||
addFile(true, FileType.JS, LGTM_SRC, "tst.jsm");
|
||||
addFile(false, LGTM_SRC, "tstjsm");
|
||||
addFile(true, FileType.HTML, LGTM_SRC, "tst.soy");
|
||||
addFile(true, LGTM_SRC, "tst.html");
|
||||
addFile(true, LGTM_SRC, "tst.js");
|
||||
runTest();
|
||||
}
|
||||
|
||||
@Test
|
||||
public void overrideExtension() throws IOException {
|
||||
envVars.put("LGTM_INDEX_FILETYPES", ".js:typescript");
|
||||
addFile(true, FileType.TYPESCRIPT, LGTM_SRC, "tst.js");
|
||||
runTest();
|
||||
}
|
||||
|
||||
@Test
|
||||
public void invalidFileType() throws IOException {
|
||||
envVars.put("LGTM_INDEX_FILETYPES", ".jsm:javascript");
|
||||
try {
|
||||
runTest();
|
||||
Assert.fail("expected UserError");
|
||||
} catch (UserError ue) {
|
||||
Assert.assertEquals("Invalid file type 'javascript'.", ue.getMessage());
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user