JS: Share extraction results via symlinks

This commit is contained in:
Asger Feldthaus
2020-09-17 14:42:56 +01:00
parent c84e43d95b
commit 57a588ceb6
4 changed files with 55 additions and 18 deletions

View File

@@ -1,8 +1,10 @@
package com.semmle.js.dependencies;
import java.io.IOException;
import java.nio.file.Files;
import java.nio.file.Path;
import java.util.LinkedHashMap;
import java.util.List;
import java.util.Map;
import java.util.concurrent.CompletableFuture;
import java.util.concurrent.CompletionException;
@@ -68,17 +70,51 @@ public class AsyncFetcher {
});
}
/** Result of a tarball extraction */
class ExtractionResult {
/** The directory into which the tarball was extracted. */
Path destDir;
/** Files created by the extraction, relative to <code>destDir</code>. */
List<Path> relativePaths;
ExtractionResult(Path destDir, List<Path> relativePaths) {
this.destDir = destDir;
this.relativePaths = relativePaths;
}
}
private CachedOperation<String, ExtractionResult> tarballExtractions = new CachedOperation<>();
/**
* Extracts the relevant contents of the given tarball URL in the given folder;
* the returned future completes when done.
*/
public CompletableFuture<Void> installFromTarballUrl(String tarballUrl, Path destDir) {
return CompletableFuture.runAsync(() -> {
return tarballExtractions.get(tarballUrl, () -> {
try {
fetcher.extractFromTarballUrl(tarballUrl, destDir);
List<Path> relativePaths = fetcher.extractFromTarballUrl(tarballUrl, destDir);
return new ExtractionResult(destDir, relativePaths);
} catch (IOException e) {
throw makeError("Could not install package from " + tarballUrl, e);
}
}, executor);
}).thenAccept(extractionResult -> {
if (!extractionResult.destDir.equals(destDir)) {
// We've been asked to extract the same tarball into multiple directories (due to multiple package.json files).
// Symlink files from the original directory instead of extracting again.
// In principle we could symlink the whole directory, but directory symlinks are hard to create in a portable way.
System.out.println("Creating symlink farm from " + destDir + " to " + extractionResult.destDir);
for (Path relativePath : extractionResult.relativePaths) {
Path originalFile = extractionResult.destDir.resolve(relativePath);
Path newFile = destDir.resolve(relativePath);
try {
fetcher.mkdirp(newFile.getParent());
Files.createSymbolicLink(newFile, originalFile);
} catch (IOException e) {
throw makeError("Failed to create symlink " + newFile + " -> " + originalFile, e);
}
}
}
});
}
}

View File

@@ -12,7 +12,6 @@ import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.concurrent.CompletableFuture;
import java.util.concurrent.CompletionException;
import java.util.concurrent.ExecutorService;
import java.util.concurrent.Executors;
@@ -45,15 +44,11 @@ public class DependencyResolver {
}
}
public DependencyResolver(ExecutorService threadPool, Set<String> packagesInRepo) {
this.fetcher = new AsyncFetcher(threadPool, this::reportError);
public DependencyResolver(AsyncFetcher fetcher, Set<String> packagesInRepo) {
this.fetcher = fetcher;
this.packagesInRepo = packagesInRepo;
}
private void reportError(CompletionException ex) {
System.err.println(ex);
}
private void addConstraint(Constraint constraint) {
synchronized(constraints) {
constraints.add(constraint);
@@ -207,7 +202,7 @@ public class DependencyResolver {
public static void main(String[] args) throws IOException {
ExecutorService executors = Executors.newFixedThreadPool(50);
try {
DependencyResolver resolver = new DependencyResolver(executors, Collections.emptySet());
DependencyResolver resolver = new DependencyResolver(new AsyncFetcher(executors, err -> { System.err.println(err); }), Collections.emptySet());
for (String packageJsonPath : args) {
Path path = Paths.get(packageJsonPath).toAbsolutePath();
PackageJson packageJson = new Gson().fromJson(Files.newBufferedReader(path), PackageJson.class);

View File

@@ -14,6 +14,8 @@ import java.net.URLConnection;
import java.nio.file.Files;
import java.nio.file.Path;
import java.nio.file.Paths;
import java.util.ArrayList;
import java.util.List;
import java.util.regex.Pattern;
import com.google.gson.Gson;
@@ -35,7 +37,7 @@ public class Fetcher {
private Object mkdirpLock = new Object();
/** Creates the given directory and its parent directories. Only one thread is allowed to create directories at once. */
private void mkdirp(Path dir) throws IOException {
public void mkdirp(Path dir) throws IOException {
synchronized (mkdirpLock) {
Files.createDirectories(dir);
}
@@ -88,14 +90,17 @@ public class Fetcher {
/**
* Extracts the package at the given tarball URL into the given directory.
*
* <p>
* Only `package.json` and `.d.ts` files are extracted.
*
* @return paths of the files created by this call, relative to <code>destDir</code>
*/
public void extractFromTarballUrl(String tarballUrl, Path destDir) throws IOException {
public List<Path> extractFromTarballUrl(String tarballUrl, Path destDir) throws IOException {
if (!tarballUrl.startsWith("https://registry.npmjs.org/") || !tarballUrl.endsWith(".tgz")) { // Paranoid check
throw new IOException("Tarball URL has unexpected format: " + tarballUrl);
}
System.out.println("Unpacking " + tarballUrl + " to " + destDir);
List<Path> relativePaths = new ArrayList<>();
try (InputStream rawStream = new URL(tarballUrl).openStream()) {
// Despite having the .tgz extension, the file is not always gzipped, sometimes it's just a raw tar archive,
// regardless of what Accept-Encoding header we send.
@@ -129,6 +134,7 @@ public class Fetcher {
if (!filename.endsWith(".d.ts") && !filename.equals("package.json")) {
continue; // Only extract .d.ts files and package.json
}
relativePaths.add(entryPath);
Path outputFile = destDir.resolve(entryPath);
mkdirp(outputFile.getParent());
try (OutputStream output = new BufferedOutputStream(Files.newOutputStream(outputFile))) {
@@ -136,5 +142,6 @@ public class Fetcher {
}
}
}
return relativePaths;
}
}

View File

@@ -32,10 +32,8 @@ import java.util.stream.Collectors;
import java.util.stream.Stream;
import com.google.gson.Gson;
import com.google.gson.JsonElement;
import com.google.gson.JsonObject;
import com.google.gson.JsonParseException;
import com.google.gson.JsonPrimitive;
import com.semmle.js.dependencies.AsyncFetcher;
import com.semmle.js.dependencies.DependencyResolver;
import com.semmle.js.dependencies.packument.PackageJson;
import com.semmle.js.extractor.ExtractorConfig.SourceType;
@@ -791,12 +789,13 @@ protected DependencyInstallationResult preparePackagesAndDependencies(Set<Path>
// Use more threads for dependency installation than for extraction, as this is mainly I/O bound and we want
// many concurrent HTTP requests.
ExecutorService installationThreadPool = Executors.newFixedThreadPool(50);
AsyncFetcher fetcher = new AsyncFetcher(installationThreadPool, err -> { System.err.println(err); });
try {
List<CompletableFuture<Void>> futures = new ArrayList<>();
packageJsonFiles.forEach((file, packageJson) -> {
Path virtualFile = virtualSourceRoot.toVirtualFile(file);
Path nodeModulesDir = virtualFile.getParent().resolve("node_modules");
futures.add(new DependencyResolver(installationThreadPool, packagesInRepo.keySet()).installDependencies(packageJson, nodeModulesDir));
futures.add(new DependencyResolver(fetcher, packagesInRepo.keySet()).installDependencies(packageJson, nodeModulesDir));
});
CompletableFuture.allOf(futures.toArray(new CompletableFuture[0])).join();
} finally {