Merge pull request #794 from xiemaisi/js/parallel-extraction

Approved by asger-semmle, esben-semmle
This commit is contained in:
semmle-qlci
2019-01-20 00:22:38 +00:00
committed by GitHub
6 changed files with 211 additions and 138 deletions

View File

@@ -19,6 +19,9 @@ import java.util.Arrays;
import java.util.LinkedHashSet;
import java.util.List;
import java.util.Set;
import java.util.concurrent.ExecutorService;
import java.util.concurrent.Executors;
import java.util.concurrent.TimeUnit;
import java.util.stream.Stream;
import com.semmle.js.extractor.ExtractorConfig.SourceType;
@@ -159,22 +162,27 @@ import com.semmle.util.trap.TrapWriter;
* </p>
*
* <p>
* Finally, the environment variables <code>LGTM_TRAP_CACHE</code> and
* <code>LGTM_TRAP_CACHE_BOUND</code> can optionally be used to specify the location and size
* of a trap cache to be used during extraction.
* To customise the actual extraction (as opposed to determining which files to extract),
* the following environment variables are available:
* </p>
* <ul>
* <li><code>LGTM_INDEX_THREADS</code> determines how many threads are used for parallel extraction
* of JavaScript files (TypeScript files cannot currently be extracted in parallel). If left
* unspecified, the extractor uses as many threads as there are cores.</li>
* <li><code>LGTM_TRAP_CACHE</code> and <code>LGTM_TRAP_CACHE_BOUND</code> can be used to specify the
* location and size of a trap cache to be used during extraction.</li>
* </ul>
*/
public class AutoBuild {
private final ExtractorOutputConfig outputConfig;
private final ITrapCache trapCache;
private Set<Path> includes = new LinkedHashSet<>();
private Set<Path> excludes = new LinkedHashSet<>();
private final Set<Path> includes = new LinkedHashSet<>();
private final Set<Path> excludes = new LinkedHashSet<>();
private ProjectLayout filters;
private final Path LGTM_SRC, SEMMLE_DIST;
private final TypeScriptMode typeScriptMode;
private final String defaultEncoding;
private ExtractorState extractorState;
private long timedLogMessageStart = 0;
private ExecutorService threadPool;
public AutoBuild() {
this.LGTM_SRC = toRealPath(getPathFromEnvVar("LGTM_SRC"));
@@ -183,7 +191,6 @@ public class AutoBuild {
this.trapCache = mkTrapCache();
this.typeScriptMode = getEnumFromEnvVar("LGTM_INDEX_TYPESCRIPT", TypeScriptMode.class, TypeScriptMode.BASIC);
this.defaultEncoding = getEnvVar("LGTM_INDEX_DEFAULT_ENCODING");
this.extractorState = new ExtractorState();
setupMatchers();
}
@@ -375,8 +382,36 @@ public class AutoBuild {
* Perform extraction.
*/
public void run() throws IOException {
extractExterns();
extractSource();
startThreadPool();
try {
extractSource();
extractExterns();
} finally {
shutdownThreadPool();
}
}
private void startThreadPool() {
int defaultNumThreads = Runtime.getRuntime().availableProcessors();
int numThreads = Env.systemEnv().getInt("LGTM_INDEX_THREADS", defaultNumThreads);
if (numThreads > 1) {
System.out.println("Parallel extraction with " + numThreads + " threads.");
threadPool = Executors.newFixedThreadPool(numThreads);
} else {
System.out.println("Single-threaded extraction.");
threadPool = null;
}
}
private void shutdownThreadPool() {
if (threadPool != null) {
threadPool.shutdown();
try {
threadPool.awaitTermination(365, TimeUnit.DAYS);
} catch (InterruptedException e) {
Exceptions.ignore(e, "Awaiting termination is not essential.");
}
}
}
/**
@@ -414,12 +449,12 @@ public class AutoBuild {
}
}
FileExtractor extractor = new FileExtractor(config, outputConfig, trapCache, extractorState);
FileExtractor extractor = new FileExtractor(config, outputConfig, trapCache);
FileVisitor<? super Path> visitor = new SimpleFileVisitor<Path>() {
@Override
public FileVisitResult visitFile(Path file, BasicFileAttributes attrs) throws IOException {
if (".js".equals(FileUtil.extension(file.toString())))
extract(extractor, file);
extract(extractor, file, null);
return super.visitFile(file, attrs);
}
};
@@ -436,10 +471,91 @@ public class AutoBuild {
config = config.withTypeScriptMode(typeScriptMode);
if (defaultEncoding != null)
config = config.withDefaultEncoding(defaultEncoding);
FileExtractor extractor = new FileExtractor(config, outputConfig, trapCache, extractorState);
FileExtractor extractor = new FileExtractor(config, outputConfig, trapCache);
Set<Path> filesToExtract = new LinkedHashSet<>();
List<Path> tsconfigFiles = new ArrayList<>();
findFilesToExtract(extractor, filesToExtract, tsconfigFiles);
// extract TypeScript projects and files
Set<Path> extractedFiles = extractTypeScript(extractor, filesToExtract, tsconfigFiles);
// extract remaining files
for (Path f : filesToExtract) {
if (extractedFiles.add(f)) {
extract(extractor, f, null);
}
}
}
private Set<Path> extractTypeScript(FileExtractor extractor, Set<Path> files, List<Path> tsconfig) {
Set<Path> extractedFiles = new LinkedHashSet<>();
if (hasTypeScriptFiles(files) || !tsconfig.isEmpty()) {
ExtractorState extractorState = new ExtractorState();
TypeScriptParser tsParser = extractorState.getTypeScriptParser();
verifyTypeScriptInstallation(extractorState);
// Extract TypeScript projects
for (Path projectPath : tsconfig) {
File projectFile = projectPath.toFile();
long start = logBeginProcess("Opening project " + projectFile);
ParsedProject project = tsParser.openProject(projectFile);
logEndProcess(start, "Done opening project " + projectFile);
// Extract all files belonging to this project which are also matched
// by our include/exclude filters.
List<File> typeScriptFiles = new ArrayList<File>();
for (File sourceFile : project.getSourceFiles()) {
Path sourcePath = sourceFile.toPath();
if (!files.contains(normalizePath(sourcePath)))
continue;
if (!extractedFiles.contains(sourcePath)) {
typeScriptFiles.add(sourcePath.toFile());
}
}
extractTypeScriptFiles(typeScriptFiles, extractedFiles, extractor, extractorState);
tsParser.closeProject(projectFile);
}
// Extract all the types discovered when extracting the ASTs.
if (!tsconfig.isEmpty()) {
TypeTable typeTable = tsParser.getTypeTable();
extractTypeTable(tsconfig.iterator().next(), typeTable);
}
// Extract remaining TypeScript files.
List<File> remainingTypeScriptFiles = new ArrayList<File>();
for (Path f : files) {
if (!extractedFiles.contains(f) && FileType.forFileExtension(f.toFile()) == FileType.TYPESCRIPT) {
remainingTypeScriptFiles.add(f.toFile());
}
}
if (!remainingTypeScriptFiles.isEmpty()) {
extractTypeScriptFiles(remainingTypeScriptFiles, extractedFiles, extractor, extractorState);
}
// The TypeScript compiler instance is no longer needed.
tsParser.killProcess();
}
return extractedFiles;
}
private boolean hasTypeScriptFiles(Set<Path> filesToExtract) {
for (Path file : filesToExtract) {
// Check if there are any files with the TypeScript extension.
// Do not use FileType.forFile as it involves I/O for file header checks,
// and files with a bad header have already been excluded.
if (FileType.forFileExtension(file.toFile()) == FileType.TYPESCRIPT)
return true;
}
return false;
}
private void findFilesToExtract(FileExtractor extractor,
final Set<Path> filesToExtract, final List<Path> tsconfigFiles)
throws IOException {
Path[] currentRoot = new Path[1];
final Set<Path> filesToExtract = new LinkedHashSet<>();
final List<Path> tsconfigFiles = new ArrayList<>();
FileVisitor<? super Path> visitor = new SimpleFileVisitor<Path>() {
private boolean isFileIncluded(Path file) {
// normalise path for matching
@@ -481,87 +597,23 @@ public class AutoBuild {
currentRoot[0] = root;
Files.walkFileTree(currentRoot[0], visitor);
}
// If there are any .ts files, verify that TypeScript is installed.
TypeScriptParser tsParser = extractorState.getTypeScriptParser();
boolean hasTypeScriptFiles = false;
for (Path file : filesToExtract) {
// Check if there are any files with the TypeScript extension.
// Do not use FileType.forFile as it involves I/O for file header checks,
// and files with a bad header have already been excluded.
if (FileType.forFileExtension(file.toFile()) == FileType.TYPESCRIPT) {
hasTypeScriptFiles = true;
break;
}
}
if (hasTypeScriptFiles || !tsconfigFiles.isEmpty()) {
verifyTypeScriptInstallation();
}
// Extract TypeScript projects
Set<Path> extractedFiles = new LinkedHashSet<>();
for (Path projectPath : tsconfigFiles) {
File projectFile = projectPath.toFile();
logBeginProcess("Opening project " + projectFile);
ParsedProject project = tsParser.openProject(projectFile);
logEndProcess();
// Extract all files belonging to this project which are also matched
// by our include/exclude filters.
List<File> typeScriptFiles = new ArrayList<File>();
for (File sourceFile : project.getSourceFiles()) {
Path sourcePath = sourceFile.toPath();
if (!filesToExtract.contains(normalizePath(sourcePath)))
continue;
if (!extractedFiles.contains(sourcePath)) {
typeScriptFiles.add(sourcePath.toFile());
}
}
extractTypeScriptFiles(typeScriptFiles, extractedFiles, extractor);
tsParser.closeProject(projectFile);
}
if (!tsconfigFiles.isEmpty()) {
// Extract all the types discovered when extracting the ASTs.
TypeTable typeTable = tsParser.getTypeTable();
extractTypeTable(tsconfigFiles.iterator().next(), typeTable);
}
// Extract remaining TypeScript files.
List<File> remainingTypeScriptFiles = new ArrayList<File>();
for (Path f : filesToExtract) {
if (!extractedFiles.contains(f) && FileType.forFileExtension(f.toFile()) == FileType.TYPESCRIPT) {
remainingTypeScriptFiles.add(f.toFile());
}
}
if (!remainingTypeScriptFiles.isEmpty()) {
extractTypeScriptFiles(remainingTypeScriptFiles, extractedFiles, extractor);
}
// The TypeScript compiler instance is no longer needed.
tsParser.killProcess();
// Extract non-TypeScript files
for (Path f : filesToExtract) {
if (extractedFiles.add(f)) {
extract(extractor, f);
}
}
}
/**
* Verifies that Node.js and the TypeScript compiler are installed and can be
* found.
*/
public void verifyTypeScriptInstallation() {
public void verifyTypeScriptInstallation(ExtractorState extractorState) {
extractorState.getTypeScriptParser().verifyInstallation(true);
}
public void extractTypeScriptFiles(List<File> files, Set<Path> extractedFiles, FileExtractor extractor) throws IOException {
public void extractTypeScriptFiles(List<File> files, Set<Path> extractedFiles,
FileExtractor extractor, ExtractorState extractorState) {
extractorState.getTypeScriptParser().prepareFiles(files);
for (File f : files) {
Path path = f.toPath();
extractedFiles.add(path);
extract(extractor, f.toPath());
extract(extractor, f.toPath(), extractorState);
}
}
@@ -596,18 +648,34 @@ public class AutoBuild {
}
/**
* Extract a single file.
* Extract a single file using the given extractor and state.
*
* If the state is {@code null}, the extraction job will be submitted to the {@link #threadPool},
* otherwise extraction will happen on the main thread.
*/
protected void extract(FileExtractor extractor, Path file) throws IOException {
protected void extract(FileExtractor extractor, Path file, ExtractorState state) {
if (state == null && threadPool != null)
threadPool.submit(() -> doExtract(extractor, file, state));
else
doExtract(extractor, file, state);
}
private void doExtract(FileExtractor extractor, Path file, ExtractorState state) {
File f = file.toFile();
if (!f.exists()) {
warn("Skipping " + file + ", which does not exist.");
return;
}
logBeginProcess("Extracting " + file);
extractor.extract(f);
logEndProcess();
try {
long start = logBeginProcess("Extracting " + file);
extractor.extract(f, state);
logEndProcess(start, "Done extracting " + file);
} catch (Throwable t) {
System.err.println("Exception while extracting " + file + ".");
t.printStackTrace(System.err);
System.exit(1);
}
}
private void warn(String msg) {
@@ -615,16 +683,16 @@ public class AutoBuild {
System.err.flush();
}
private void logBeginProcess(String message) {
System.out.print(message + "...");
System.out.flush();
this.timedLogMessageStart = System.nanoTime();
private long logBeginProcess(String message) {
System.out.println(message);
return System.nanoTime();
}
private void logEndProcess() {
private void logEndProcess(long timedLogMessageStart, String message) {
long end = System.nanoTime();
int milliseconds = (int) ((end - this.timedLogMessageStart) / 1000000);
System.out.println(" done (" + milliseconds + " ms)");
int milliseconds = (int) ((end - timedLogMessageStart) / 1_000_000);
System.out.println(message + " (" + milliseconds + " ms)");
System.out.flush();
}
public static void main(String[] args) {

View File

@@ -395,13 +395,11 @@ public class FileExtractor {
private final ExtractorConfig config;
private final ExtractorOutputConfig outputConfig;
private final ITrapCache trapCache;
private final ExtractorState state;
public FileExtractor(ExtractorConfig config, ExtractorOutputConfig outputConfig, ITrapCache trapCache, ExtractorState state) {
public FileExtractor(ExtractorConfig config, ExtractorOutputConfig outputConfig, ITrapCache trapCache) {
this.config = config;
this.outputConfig = outputConfig;
this.trapCache = trapCache;
this.state = state;
}
public ExtractorConfig getConfig() {
@@ -412,7 +410,7 @@ public class FileExtractor {
return config.hasFileType() || FileType.forFile(f, config) != null;
}
public void extract(File f) throws IOException {
public void extract(File f, ExtractorState state) throws IOException {
// populate source archive
String source = new WholeIO(config.getDefaultEncoding()).strictread(f);
outputConfig.getSourceArchive().add(f, source);
@@ -424,7 +422,7 @@ public class FileExtractor {
locationManager.emitFileLocation(fileLabel, 0, 0, 0, 0);
// now extract the contents
extractContents(f, fileLabel, source, locationManager);
extractContents(f, fileLabel, source, locationManager, state);
}
@@ -440,7 +438,7 @@ public class FileExtractor {
*
* We only cache the content-dependent part, which makes up the bulk of the TRAP
* file anyway. The location-dependent part is emitted from scratch every time
* by the {@link #extract(File)} method above.
* by the {@link #extract(File, ExtractorState)} method above.
*
* In order to keep labels in the main part independent of the file's location,
* we bump the TRAP label counter to a known value (currently 20000) after the
@@ -451,10 +449,10 @@ public class FileExtractor {
* Also note that we support extraction with TRAP writer factories that are not file-backed;
* obviously, no caching is done in that scenario.
*/
private void extractContents(File f, Label fileLabel, String source, LocationManager locationManager) throws IOException {
private void extractContents(File f, Label fileLabel, String source, LocationManager locationManager,
ExtractorState state) throws IOException {
TrapWriter trapwriter = locationManager.getTrapWriter();
FileType fileType = config.hasFileType() ? FileType.valueOf(config.getFileType())
: FileType.forFile(f, config);
FileType fileType = getFileType(f);
File cacheFile = null, // the cache file for this extraction
resultFile = null; // the final result TRAP file for this extraction
@@ -502,6 +500,11 @@ public class FileExtractor {
}
}
public FileType getFileType(File f) {
return config.hasFileType() ? FileType.valueOf(config.getFileType())
: FileType.forFile(f, config);
}
/**
* Bump trap ID counter to separate path-dependent and path-independent parts of the TRAP file.
*

View File

@@ -41,7 +41,7 @@ public class Main {
* such a way that it may produce different tuples for the same file under the same
* {@link ExtractorConfig}.
*/
public static final String EXTRACTOR_VERSION = "2019-14-01";
public static final String EXTRACTOR_VERSION = "2019-01-17";
public static final Pattern NEWLINE = Pattern.compile("\n");
@@ -125,7 +125,7 @@ public class Main {
ap.error(P_TRAP_CACHE_BOUND + " should only be specified together with " + P_TRAP_CACHE + ".");
trapCache = new DummyTrapCache();
}
fileExtractor = new FileExtractor(extractorConfig, extractorOutputConfig, trapCache, extractorState);
fileExtractor = new FileExtractor(extractorConfig, extractorOutputConfig, trapCache);
setupMatchers(ap);
@@ -211,7 +211,7 @@ public class Main {
}
long start = verboseLogStartTimer(ap, "Extracting " + f);
try {
fileExtractor.extract(f.getAbsoluteFile());
fileExtractor.extract(f.getAbsoluteFile(), extractorState);
verboseLogEndTimer(ap, start);
} catch (IOException e) {
throw new ResourceError("Extraction of " + f + " failed.", e);

View File

@@ -21,6 +21,7 @@ import org.junit.Before;
import org.junit.Test;
import com.semmle.js.extractor.AutoBuild;
import com.semmle.js.extractor.ExtractorState;
import com.semmle.js.extractor.FileExtractor;
import com.semmle.util.data.StringUtil;
import com.semmle.util.exception.UserError;
@@ -94,16 +95,16 @@ public class AutoBuildTests {
Set<String> actual = new LinkedHashSet<>();
new AutoBuild() {
@Override
protected void extract(FileExtractor extractor, Path file) {
protected void extract(FileExtractor extractor, Path file, ExtractorState state) {
actual.add(file.toString());
}
@Override
public void verifyTypeScriptInstallation() {
public void verifyTypeScriptInstallation(ExtractorState state) {
}
@Override
public void extractTypeScriptFiles(List<File> files, Set<Path> extractedFiles, FileExtractor extractor) throws IOException {
public void extractTypeScriptFiles(java.util.List<File> files, java.util.Set<Path> extractedFiles, FileExtractor extractor, ExtractorState extractorState) {
for (File f : files) {
actual.add(f.toString());
}

View File

@@ -175,33 +175,33 @@ public class JSDocParser {
throw new ParseError(message, null);
}
private static class TypeExpressionParser {
private enum Token {
ILLEGAL, // ILLEGAL
DOT, // .
DOT_LT, // .<
REST, // ...
LT, // <
GT, // >
LPAREN, // (
RPAREN, // )
LBRACE, // {
RBRACE, // }
LBRACK, // [
RBRACK, // ]
COMMA, // ,
COLON, // :
STAR, // *
PIPE, // |
QUESTION, // ?
BANG, // !
EQUAL, // =
NAME, // name token
STRING, // string
NUMBER, // number
EOF
};
private enum Token {
ILLEGAL, // ILLEGAL
DOT, // .
DOT_LT, // .<
REST, // ...
LT, // <
GT, // >
LPAREN, // (
RPAREN, // )
LBRACE, // {
RBRACE, // }
LBRACK, // [
RBRACK, // ]
COMMA, // ,
COLON, // :
STAR, // *
PIPE, // |
QUESTION, // ?
BANG, // !
EQUAL, // =
NAME, // name token
STRING, // string
NUMBER, // number
EOF
};
private class TypeExpressionParser {
String source;
int length;
int previous, index;
@@ -1161,9 +1161,9 @@ public class JSDocParser {
return expr;
}
}
private static TypeExpressionParser typed = new TypeExpressionParser();
private TypeExpressionParser typed = new TypeExpressionParser();
private static class JSDocTagParser {
private class JSDocTagParser {
int index, lineNumber, lineStart, length;
String source;
boolean recoverable = true, sloppy = false;