From 9d77619afc9364957dc69a1480a97b30f7e5bb2e Mon Sep 17 00:00:00 2001 From: Max Schaefer Date: Wed, 27 Feb 2019 12:02:01 +0000 Subject: [PATCH] JavaScript: Make file types customisable in AutoBuild. Every once in a while we encounter projects using some custom file extension for files that we could in principle extract, but since the extractor doesn't know about the extension the files are skipped. To handle this, the legacy extractor has a `--file-type` option that one can use to specify a file type to use for all files in that particular extraction. So far, `AutoBuild` has nothing of the sort. This PR proposes to introduce an environment variable `LGTM_INDEX_FILETYPES` to allow a similar customisation. In the fullness of time, this variable would be set through `lgtm.yml` in the usual way, but for now it is undocumented and for internal use only. Specifically, `LGTM_INDEX_FILETYPES` is a newline-separated list of ".extension:filetype" pairs, specifying that files with the given `.extension` should be extracted as type `filetype`, where `filetype` is one of `js`, `html`, `json`, `typescript` or `yaml`. For example, `.jsm:js` causes all `.jsm` files to be extracted as JavaScript. This can also be used to override default file types: for example, by specifying `.js:typescript` all JavaScript files will be extracted as TypeScript. --- .../com/semmle/js/extractor/AutoBuild.java | 96 +++++++++++++++---- .../js/extractor/test/AutoBuildTests.java | 61 ++++++++++-- 2 files changed, 135 insertions(+), 22 deletions(-) diff --git a/javascript/extractor/src/com/semmle/js/extractor/AutoBuild.java b/javascript/extractor/src/com/semmle/js/extractor/AutoBuild.java index 762df7fe8e5..71bb515ec54 100644 --- a/javascript/extractor/src/com/semmle/js/extractor/AutoBuild.java +++ b/javascript/extractor/src/com/semmle/js/extractor/AutoBuild.java @@ -16,8 +16,10 @@ import java.nio.file.SimpleFileVisitor; import java.nio.file.attribute.BasicFileAttributes; import java.util.ArrayList; import java.util.Arrays; +import java.util.LinkedHashMap; import java.util.LinkedHashSet; import java.util.List; +import java.util.Map; import java.util.Set; import java.util.concurrent.ExecutorService; import java.util.concurrent.Executors; @@ -69,6 +71,8 @@ import com.semmle.util.trap.TrapWriter; *
  • LGTM_INDEX_FILTERS: a newline-separated list of {@link ProjectLayout}-style * patterns that can be used to refine the list of files to include and exclude
  • *
  • LGTM_INDEX_TYPESCRIPT: whether to extract TypeScript
  • + *
  • LGTM_INDEX_FILETYPES: a newline-separated list of ".extension:filetype" pairs + * specifying which {@link FileType} to use for the given extension
  • *
  • LGTM_INDEX_THREADS: the maximum number of files to extract in parallel
  • *
  • LGTM_TRAP_CACHE: the path of a directory to use for trap caching
  • *
  • LGTM_TRAP_CACHE_BOUND: the size to bound the trap cache to
  • @@ -160,6 +164,12 @@ import com.semmle.util.trap.TrapWriter; *

    * *

    + * The environment variable LGTM_INDEX_FILETYPES may be set to a newline-separated + * list of file type specifications of the form .extension:filetype, causing all + * files whose name ends in .extension to also be included by default. + *

    + * + *

    * The default exclusion patterns cause the following files to be excluded: *

    *