From 7f8f126338004cfb6c96492446a9d412c463dff1 Mon Sep 17 00:00:00 2001 From: Max Schaefer Date: Thu, 23 May 2019 08:16:07 +0100 Subject: [PATCH] JavaScript: Add support for XML extraction. --- .../com/semmle/js/extractor/AutoBuild.java | 58 ++++++++++++++- .../js/extractor/test/AutoBuildTests.java | 70 ++++++++++++++++--- 2 files changed, 116 insertions(+), 12 deletions(-) diff --git a/javascript/extractor/src/com/semmle/js/extractor/AutoBuild.java b/javascript/extractor/src/com/semmle/js/extractor/AutoBuild.java index 2cab91b2483..f5a7721984c 100644 --- a/javascript/extractor/src/com/semmle/js/extractor/AutoBuild.java +++ b/javascript/extractor/src/com/semmle/js/extractor/AutoBuild.java @@ -10,6 +10,7 @@ import com.semmle.js.parser.TypeScriptParser; import com.semmle.ts.extractor.TypeExtractor; import com.semmle.ts.extractor.TypeTable; import com.semmle.util.data.StringUtil; +import com.semmle.util.exception.CatastrophicError; import com.semmle.util.exception.Exceptions; import com.semmle.util.exception.ResourceError; import com.semmle.util.exception.UserError; @@ -23,6 +24,7 @@ import com.semmle.util.trap.TrapWriter; import java.io.File; import java.io.IOException; import java.io.Reader; +import java.lang.ProcessBuilder.Redirect; import java.net.URI; import java.net.URISyntaxException; import java.nio.charset.StandardCharsets; @@ -68,7 +70,9 @@ import java.util.stream.Stream; * patterns that can be used to refine the list of files to include and exclude *
  • LGTM_INDEX_TYPESCRIPT: whether to extract TypeScript *
  • LGTM_INDEX_FILETYPES: a newline-separated list of ".extension:filetype" pairs - * specifying which {@link FileType} to use for the given extension + * specifying which {@link FileType} to use for the given extension; the additional file + * type XML is also supported + *
  • LGTM_INDEX_XML_MODE: whether to extract XML files *
  • LGTM_THREADS: the maximum number of files to extract in parallel *
  • LGTM_TRAP_CACHE: the path of a directory to use for trap caching *
  • LGTM_TRAP_CACHE_BOUND: the size to bound the trap cache to @@ -158,6 +162,12 @@ import java.util.stream.Stream; *

    The file type as which a file is extracted can be customised via the * LGTM_INDEX_FILETYPES environment variable explained above. * + *

    If LGTM_INDEX_XML_MODE is set to ALL, then all files with extension + * .xml under LGTM_SRC are extracted as XML (in addition to any files + * whose file type is specified to be XML via LGTM_INDEX_SOURCE_TYPE). + * Currently XML extraction does not respect inclusion and exclusion filters, but this is a bug, + * not a feature, and hence will change eventually. + * *

    Note that all these customisations only apply to LGTM_SRC. Extraction of externs * is not customisable. * @@ -178,6 +188,7 @@ public class AutoBuild { private final Map fileTypes = new LinkedHashMap<>(); private final Set includes = new LinkedHashSet<>(); private final Set excludes = new LinkedHashSet<>(); + private final Set xmlExtensions = new LinkedHashSet<>(); private ProjectLayout filters; private final Path LGTM_SRC, SEMMLE_DIST; private final TypeScriptMode typeScriptMode; @@ -193,6 +204,7 @@ public class AutoBuild { getEnumFromEnvVar("LGTM_INDEX_TYPESCRIPT", TypeScriptMode.class, TypeScriptMode.FULL); this.defaultEncoding = getEnvVar("LGTM_INDEX_DEFAULT_ENCODING"); setupFileTypes(); + setupXmlMode(); setupMatchers(); } @@ -272,7 +284,14 @@ public class AutoBuild { String extension = fields[0].trim(); String fileType = fields[1].trim(); try { - fileTypes.put(extension, FileType.valueOf(StringUtil.uc(fileType))); + fileType = StringUtil.uc(fileType); + if ("XML".equals(fileType)) { + if (extension.length() < 2) + throw new UserError("Invalid extension '" + extension + "'."); + xmlExtensions.add(extension.substring(1)); + } else { + fileTypes.put(extension, FileType.valueOf(fileType)); + } } catch (IllegalArgumentException e) { Exceptions.ignore(e, "We construct a better error message."); throw new UserError("Invalid file type '" + fileType + "'."); @@ -280,6 +299,15 @@ public class AutoBuild { } } + private void setupXmlMode() { + String xmlMode = getEnvVar("LGTM_INDEX_XML_MODE", "DISABLED"); + xmlMode = StringUtil.uc(xmlMode.trim()); + if ("ALL".equals(xmlMode)) + xmlExtensions.add("xml"); + else if (!"DISABLED".equals(xmlMode)) + throw new UserError("Invalid XML mode '" + xmlMode + "' (should be either ALL or DISABLED)."); + } + /** Set up include and exclude matchers based on environment variables. */ private void setupMatchers() { setupIncludesAndExcludes(); @@ -402,6 +430,7 @@ public class AutoBuild { try { extractSource(); extractExterns(); + extractXml(); } finally { shutdownThreadPool(); } @@ -733,10 +762,33 @@ public class AutoBuild { System.out.flush(); } + public Set getXmlExtensions() { + return xmlExtensions; + } + + protected void extractXml() throws IOException { + if (xmlExtensions.isEmpty()) + return; + List cmd = new ArrayList<>(); + cmd.add("odasa"); + cmd.add("index"); + cmd.add("--xml"); + cmd.add("--extensions"); + cmd.addAll(xmlExtensions); + ProcessBuilder pb = new ProcessBuilder(cmd); + try { + pb.redirectError(Redirect.INHERIT); + pb.redirectOutput(Redirect.INHERIT); + pb.start().waitFor(); + } catch (InterruptedException e) { + throw new CatastrophicError(e); + } + } + public static void main(String[] args) { try { new AutoBuild().run(); - } catch (IOException | UserError e) { + } catch (IOException | UserError | CatastrophicError e) { System.err.println(e.toString()); System.exit(1); } diff --git a/javascript/extractor/src/com/semmle/js/extractor/test/AutoBuildTests.java b/javascript/extractor/src/com/semmle/js/extractor/test/AutoBuildTests.java index b404c880599..ea79971aedc 100644 --- a/javascript/extractor/src/com/semmle/js/extractor/test/AutoBuildTests.java +++ b/javascript/extractor/src/com/semmle/js/extractor/test/AutoBuildTests.java @@ -1,19 +1,14 @@ package com.semmle.js.extractor.test; -import com.semmle.js.extractor.AutoBuild; -import com.semmle.js.extractor.ExtractorState; -import com.semmle.js.extractor.FileExtractor; -import com.semmle.js.extractor.FileExtractor.FileType; -import com.semmle.util.data.StringUtil; -import com.semmle.util.exception.UserError; -import com.semmle.util.files.FileUtil8; -import com.semmle.util.process.Env; import java.io.File; import java.io.IOException; import java.nio.charset.StandardCharsets; +import java.nio.file.FileVisitResult; import java.nio.file.Files; import java.nio.file.Path; import java.nio.file.Paths; +import java.nio.file.SimpleFileVisitor; +import java.nio.file.attribute.BasicFileAttributes; import java.nio.file.attribute.DosFileAttributeView; import java.util.ArrayList; import java.util.LinkedHashMap; @@ -21,12 +16,23 @@ import java.util.LinkedHashSet; import java.util.List; import java.util.Map; import java.util.Set; + import org.junit.After; import org.junit.Assert; import org.junit.Assume; import org.junit.Before; import org.junit.Test; +import com.semmle.js.extractor.AutoBuild; +import com.semmle.js.extractor.ExtractorState; +import com.semmle.js.extractor.FileExtractor; +import com.semmle.js.extractor.FileExtractor.FileType; +import com.semmle.util.data.StringUtil; +import com.semmle.util.exception.UserError; +import com.semmle.util.files.FileUtil; +import com.semmle.util.files.FileUtil8; +import com.semmle.util.process.Env; + public class AutoBuildTests { private Path SEMMLE_DIST, LGTM_SRC; private Set expected; @@ -123,6 +129,20 @@ public class AutoBuildTests { actual.add(f.toString()); } } + + @Override + protected void extractXml() throws IOException { + Files.walkFileTree(LGTM_SRC, new SimpleFileVisitor(){ + @Override + public FileVisitResult visitFile(Path file, BasicFileAttributes attrs) + throws IOException { + String ext = FileUtil.extension(file); + if (!ext.isEmpty() && getXmlExtensions().contains(ext.substring(1))) + actual.add(file.toString()); + return FileVisitResult.CONTINUE; + } + }); + } }.run(); String expectedString = StringUtil.glue("\n", expected.stream().sorted().toArray()); String actualString = StringUtil.glue("\n", actual.stream().sorted().toArray()); @@ -488,7 +508,7 @@ public class AutoBuildTests { runTest(); Assert.fail("expected UserError"); } catch (UserError ue) { - Assert.assertEquals("Invalid file type 'javascript'.", ue.getMessage()); + Assert.assertEquals("Invalid file type 'JAVASCRIPT'.", ue.getMessage()); } } @@ -499,4 +519,36 @@ public class AutoBuildTests { addFile(true, LGTM_SRC, "tst.raml"); runTest(); } + + @Test + public void dontIncludeXmlByDefault() throws IOException { + addFile(false, LGTM_SRC, "tst.xml"); + addFile(false, LGTM_SRC, "tst.qhelp"); + runTest(); + } + + @Test + public void includeXml() throws IOException { + envVars.put("LGTM_INDEX_XML_MODE", "all"); + addFile(true, LGTM_SRC, "tst.xml"); + addFile(false, LGTM_SRC, "tst.qhelp"); + runTest(); + } + + @Test + public void qhelpAsXml() throws IOException { + envVars.put("LGTM_INDEX_FILETYPES", ".qhelp:xml"); + addFile(false, LGTM_SRC, "tst.xml"); + addFile(true, LGTM_SRC, "tst.qhelp"); + runTest(); + } + + @Test + public void qhelpAsXmlAndAllXml() throws IOException { + envVars.put("LGTM_INDEX_XML_MODE", "all"); + envVars.put("LGTM_INDEX_FILETYPES", ".qhelp:xml"); + addFile(true, LGTM_SRC, "tst.xml"); + addFile(true, LGTM_SRC, "tst.qhelp"); + runTest(); + } }