Merge pull request #4291 from asgerf/js/lean-dependency-installation-plainjava

Approved by erik-krogh
This commit is contained in:
CodeQL CI
2020-10-08 03:09:38 -07:00
committed by GitHub
15 changed files with 901 additions and 150 deletions

View File

@@ -95,11 +95,85 @@ class State {
/** Next response to be delivered. */
public pendingResponse: string = null;
/** Map from `package.json` files to their contents. */
public parsedPackageJson = new Map<string, any>();
/** Map from `package.json` files to the file referenced in its `types` or `typings` field. */
public packageTypings = new Map<string, string | undefined>();
/** Map from file path to the enclosing `package.json` file, if any. Will not traverse outside node_modules. */
public enclosingPackageJson = new Map<string, string | undefined>();
}
let state = new State();
const reloadMemoryThresholdMb = getEnvironmentVariable("SEMMLE_TYPESCRIPT_MEMORY_THRESHOLD", Number, 1000);
function getPackageJson(file: string): any {
let cache = state.parsedPackageJson;
if (cache.has(file)) return cache.get(file);
let result = getPackageJsonRaw(file);
cache.set(file, result);
return result;
}
function getPackageJsonRaw(file: string): any {
if (!ts.sys.fileExists(file)) return undefined;
try {
let json = JSON.parse(ts.sys.readFile(file));
if (typeof json !== 'object') return undefined;
return json;
} catch (e) {
return undefined;
}
}
function getPackageTypings(file: string): string | undefined {
let cache = state.packageTypings;
if (cache.has(file)) return cache.get(file);
let result = getPackageTypingsRaw(file);
cache.set(file, result);
return result;
}
function getPackageTypingsRaw(packageJsonFile: string): string | undefined {
let json = getPackageJson(packageJsonFile);
if (json == null) return undefined;
let typings = json.types || json.typings; // "types" and "typings" are aliases
if (typeof typings !== 'string') return undefined;
let absolutePath = pathlib.join(pathlib.dirname(packageJsonFile), typings);
if (ts.sys.directoryExists(absolutePath)) {
absolutePath = pathlib.join(absolutePath, 'index.d.ts');
} else if (!absolutePath.endsWith('.ts')) {
absolutePath += '.d.ts';
}
if (!ts.sys.fileExists(absolutePath)) return undefined;
return ts.sys.resolvePath(absolutePath);
}
function getEnclosingPackageJson(file: string): string | undefined {
let cache = state.packageTypings;
if (cache.has(file)) return cache.get(file);
let result = getEnclosingPackageJsonRaw(file);
cache.set(file, result);
return result;
}
function getEnclosingPackageJsonRaw(file: string): string | undefined {
let packageJson = pathlib.join(file, 'package.json');
if (ts.sys.fileExists(packageJson)) {
return packageJson;
}
if (pathlib.basename(file) === 'node_modules') {
return undefined;
}
let dirname = pathlib.dirname(file);
if (dirname.length < file.length) {
return getEnclosingPackageJson(dirname);
}
return undefined;
}
/**
* Debugging method for finding cycles in the TypeScript AST. Should not be used in production.
*
@@ -505,14 +579,18 @@ function handleOpenProjectCommand(command: OpenProjectCommand) {
// inverse mapping, nor a way to enumerate all known module names. So we discover all
// modules on the type roots (usually "node_modules/@types" but this is configurable).
let typeRoots = ts.getEffectiveTypeRoots(config.options, {
directoryExists: (path) => fs.existsSync(path),
directoryExists: (path) => ts.sys.directoryExists(path),
getCurrentDirectory: () => basePath,
});
for (let typeRoot of typeRoots || []) {
if (fs.existsSync(typeRoot) && fs.statSync(typeRoot).isDirectory()) {
if (ts.sys.directoryExists(typeRoot)) {
traverseTypeRoot(typeRoot, "");
}
let virtualTypeRoot = virtualSourceRoot.toVirtualPathIfDirectoryExists(typeRoot);
if (virtualTypeRoot != null) {
traverseTypeRoot(virtualTypeRoot, "");
}
}
for (let sourceFile of program.getSourceFiles()) {
@@ -549,22 +627,25 @@ function handleOpenProjectCommand(command: OpenProjectCommand) {
if (sourceFile == null) {
continue;
}
addModuleBindingFromRelativePath(sourceFile, importPrefix, child);
let importPath = getImportPathFromFileInFolder(importPrefix, child);
addModuleBindingFromImportPath(sourceFile, importPath);
}
}
function getImportPathFromFileInFolder(folder: string, baseName: string) {
let stem = getStem(baseName);
return (stem === "index")
? folder
: joinModulePath(folder, stem);
}
/**
* Emits module bindings for a module with relative path `folder/baseName`.
*/
function addModuleBindingFromRelativePath(sourceFile: ts.SourceFile, folder: string, baseName: string) {
function addModuleBindingFromImportPath(sourceFile: ts.SourceFile, importPath: string) {
let symbol = typeChecker.getSymbolAtLocation(sourceFile);
if (symbol == null) return; // Happens if the source file is not a module.
let stem = getStem(baseName);
let importPath = (stem === "index")
? folder
: joinModulePath(folder, stem);
let canonicalSymbol = getEffectiveExportTarget(symbol); // Follow `export = X` declarations.
let symbolId = state.typeTable.getSymbolId(canonicalSymbol);
@@ -576,7 +657,7 @@ function handleOpenProjectCommand(command: OpenProjectCommand) {
// Note: the `globalExports` map is stored on the original symbol, not the target of `export=`.
if (symbol.globalExports != null) {
symbol.globalExports.forEach((global: ts.Symbol) => {
state.typeTable.addGlobalMapping(symbolId, global.name);
state.typeTable.addGlobalMapping(symbolId, global.name);
});
}
}
@@ -605,11 +686,30 @@ function handleOpenProjectCommand(command: OpenProjectCommand) {
let fullPath = sourceFile.fileName;
let index = fullPath.lastIndexOf('/node_modules/');
if (index === -1) return;
let relativePath = fullPath.substring(index + '/node_modules/'.length);
// Ignore node_modules/@types folders here as they are typically handled as type roots.
if (relativePath.startsWith("@types/")) return;
// If the enclosing package has a "typings" field, only add module bindings for that file.
let packageJsonFile = getEnclosingPackageJson(fullPath);
if (packageJsonFile != null) {
let json = getPackageJson(packageJsonFile);
let typings = getPackageTypings(packageJsonFile);
if (json != null && typings != null) {
let name = json.name;
if (typings === fullPath && typeof name === 'string') {
addModuleBindingFromImportPath(sourceFile, name);
} else if (typings != null) {
return; // Typings field prevents access to other files in package.
}
}
}
// Add module bindings relative to package directory.
let { dir, base } = pathlib.parse(relativePath);
addModuleBindingFromRelativePath(sourceFile, dir, base);
addModuleBindingFromImportPath(sourceFile, getImportPathFromFileInFolder(dir, base));
}
/**

View File

@@ -55,4 +55,15 @@ export class VirtualSourceRoot {
}
return null;
}
/**
* Maps a path under the real source root to the corresponding path in the virtual source root.
*/
public toVirtualPathIfDirectoryExists(path: string) {
let virtualPath = this.toVirtualPath(path);
if (virtualPath != null && ts.sys.directoryExists(virtualPath)) {
return virtualPath;
}
return null;
}
}

View File

@@ -0,0 +1,122 @@
package com.semmle.js.dependencies;
import java.io.IOException;
import java.nio.file.Files;
import java.nio.file.Path;
import java.util.LinkedHashMap;
import java.util.List;
import java.util.Map;
import java.util.concurrent.CompletableFuture;
import java.util.concurrent.CompletionException;
import java.util.concurrent.ExecutorService;
import java.util.function.Consumer;
import java.util.function.Supplier;
import com.semmle.js.dependencies.packument.Packument;
/**
* Asynchronous I/O operations needed for dependency installation.
* <p>
* The methods in this class are non-blocking, that is, they return more or less immediately, always scheduling the work
* in the provided executor service. Requests are cached where it makes sense.
*/
public class AsyncFetcher {
private Fetcher fetcher = new Fetcher();
private ExecutorService executor;
private Consumer<CompletionException> errorReporter;
/**
* @param executor thread pool to perform I/O tasks
* @param errorReporter called once for each error from the underlying I/O tasks
*/
public AsyncFetcher(ExecutorService executor, Consumer<CompletionException> errorReporter) {
this.executor = executor;
this.errorReporter = errorReporter;
}
private CompletionException makeError(String message, Exception cause) {
CompletionException ex = new CompletionException(message, cause);
errorReporter.accept(ex); // Handle here to ensure each exception is logged at most once, not once per consumer
throw ex;
}
private class CachedOperation<K, V> {
private Map<K, CompletableFuture<V>> cache = new LinkedHashMap<>();
public synchronized CompletableFuture<V> get(K key, Supplier<V> builder) {
CompletableFuture<V> future = cache.get(key);
if (future == null) {
future = CompletableFuture.supplyAsync(() -> builder.get(), executor);
cache.put(key, future);
}
return future;
}
}
private CachedOperation<String, Packument> packuments = new CachedOperation<>();
/**
* Returns a future that completes with the packument for the given package.
* <p>
* At most one fetch will be performed.
*/
public CompletableFuture<Packument> getPackument(String packageName) {
return packuments.get(packageName, () -> {
try {
return fetcher.getPackument(packageName);
} catch (IOException e) {
throw makeError("Could not fetch packument for " + packageName, e);
}
});
}
/** Result of a tarball extraction */
private static class ExtractionResult {
/** The directory into which the tarball was extracted. */
Path destDir;
/** Files created by the extraction, relative to <code>destDir</code>. */
List<Path> relativePaths;
ExtractionResult(Path destDir, List<Path> relativePaths) {
this.destDir = destDir;
this.relativePaths = relativePaths;
}
}
private CachedOperation<String, ExtractionResult> tarballExtractions = new CachedOperation<>();
/**
* Extracts the relevant contents of the given tarball URL in the given folder;
* the returned future completes when done.
*
* If the same tarball has already been extracted elsewhere, then symbolic links are added to `destDir` linking to the already extracted tarball.
*/
public CompletableFuture<Void> installFromTarballUrl(String tarballUrl, Path destDir) {
return tarballExtractions.get(tarballUrl, () -> {
try {
List<Path> relativePaths = fetcher.extractFromTarballUrl(tarballUrl, destDir);
return new ExtractionResult(destDir, relativePaths);
} catch (IOException e) {
throw makeError("Could not install package from " + tarballUrl, e);
}
}).thenAccept(extractionResult -> {
if (!extractionResult.destDir.equals(destDir)) {
// We've been asked to extract the same tarball into multiple directories (due to multiple package.json files).
// Symlink files from the original directory instead of extracting again.
// In principle we could symlink the whole directory, but directory symlinks are hard to create in a portable way.
System.out.println("Creating symlink farm from " + destDir + " to " + extractionResult.destDir);
for (Path relativePath : extractionResult.relativePaths) {
Path originalFile = extractionResult.destDir.resolve(relativePath);
Path newFile = destDir.resolve(relativePath);
try {
fetcher.mkdirp(newFile.getParent());
Files.createSymbolicLink(newFile, originalFile);
} catch (IOException e) {
throw makeError("Failed to create symlink " + newFile + " -> " + originalFile, e);
}
}
}
});
}
}

View File

@@ -0,0 +1,236 @@
package com.semmle.js.dependencies;
import java.io.IOException;
import java.nio.file.Files;
import java.nio.file.Path;
import java.nio.file.Paths;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collections;
import java.util.LinkedHashMap;
import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.concurrent.CompletableFuture;
import java.util.concurrent.ExecutorService;
import java.util.concurrent.Executors;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import com.google.gson.Gson;
import com.semmle.js.dependencies.packument.PackageJson;
import com.semmle.util.data.Pair;
public class DependencyResolver {
private AsyncFetcher fetcher;
private List<Constraint> constraints = new ArrayList<>();
/** Packages we don't try to install because it is part of the same monorepo. */
private Set<String> packagesInRepo;
private static class Constraint {
final PackageJson targetPackage;
final SemVer targetPackageVersion;
final PackageJson demandingPackage;
final int depth;
Constraint(PackageJson targetPackage, SemVer targetPackageVersion, PackageJson demandingPackage, int depth) {
this.targetPackage = targetPackage;
this.targetPackageVersion = targetPackageVersion;
this.demandingPackage = demandingPackage;
this.depth = depth;
}
String getTargetPackageName() {
return targetPackage.getName(); // Must exist as you can't depend on a package without a name
}
}
public DependencyResolver(AsyncFetcher fetcher, Set<String> packagesInRepo) {
this.fetcher = fetcher;
this.packagesInRepo = packagesInRepo;
}
private void addConstraint(Constraint constraint) {
synchronized(constraints) {
constraints.add(constraint);
}
}
// Matches either a version ("2.1.x" / "3.0", etc..), or a version constraint operator ("<", "||", "~", etc...).
private static final Pattern semVerToken = Pattern.compile("\\d+(?:\\.[\\dx]+)+(?:-[\\w.-]*)?|[~^<>=|&-]+");
/**
* Returns the first version number mentioned in the given constraints, excluding upper bounds such as `&lt; 2.0.0`,
* or `null` if no such version number was found.
* <p>
* To help ensure deterministic version resolution, we prefer the version mentioned in the constraint, rather than
* the latest version satisfying the constraint (as the latter can change in time).
*/
public static SemVer getPreferredVersionFromVersionSpec(String versionSpec) {
versionSpec = versionSpec.trim();
boolean isFirst = true;
Matcher m = semVerToken.matcher(versionSpec);
while (m.find()) {
if (isFirst && m.start() != 0) {
return null; // Not a version range
}
isFirst = false;
String text = m.group();
if (text.equals("<")) {
// Skip next token to ignore upper bound constraints like `< 2.0.0`.
if (!m.find()) break;
}
if (text.charAt(0) >= '0' && text.charAt(0) <= '9') {
SemVer semVer = SemVer.tryParse(text.replace("x", "0"));
if (semVer != null) {
return semVer;
}
}
}
return null;
}
/**
* Given a set of available versions, pick the oldest version no older than <code>preferredVersion</code>.
*/
private Pair<SemVer, PackageJson> getTargetVersion(Map<String, PackageJson> versions, SemVer preferredVersion) {
PackageJson result = versions.get(preferredVersion.toString());
if (result != null) return Pair.make(preferredVersion, result);
SemVer bestVersion = null;
for (Map.Entry<String, PackageJson> entry : versions.entrySet()) {
SemVer version = SemVer.tryParse(entry.getKey());
if (version == null) continue; // Could not parse version
if (version.compareTo(preferredVersion) < 0) continue; // Version is older than preferred version, ignore
if (bestVersion != null && bestVersion.compareTo(version) < 0) continue; // We already found an older version
bestVersion = version;
result = entry.getValue();
}
return Pair.make(bestVersion, result);
}
/**
* Fetches all packages and builds up the constraint system needed for resolving.
*/
private CompletableFuture<Void> fetchRelevantPackages(PackageJson pack, int depth) {
List<CompletableFuture<Void>> futures = new ArrayList<>();
List<Map<String, String>> dependencyMaps = depth == 0
? Arrays.asList(pack.getDependencies(), pack.getPeerDependencies(), pack.getDevDependencies())
: Arrays.asList(pack.getDependencies()); // for transitive dependencies, only follow explicit dependencies
for (Map<String, String> dependencies : dependencyMaps) {
if (dependencies == null) continue;
dependencies.forEach((targetName, targetVersions) -> {
if (packagesInRepo.contains(targetName)) {
return;
}
SemVer preferredVersion = getPreferredVersionFromVersionSpec(targetVersions);
System.out.println("Prefer " + preferredVersion + " from " + targetVersions);
if (preferredVersion == null) return;
futures.add(fetcher.getPackument(targetName).exceptionally(ex -> null).thenCompose(targetPackument -> {
if (targetPackument == null) {
return CompletableFuture.completedFuture(null);
}
Map<String, PackageJson> versions = targetPackument.getVersions();
if (versions == null) return CompletableFuture.completedFuture(null);
// Pick the matching version
Pair<SemVer, PackageJson> targetVersionAndPackage = getTargetVersion(versions, preferredVersion);
SemVer targetVersion = targetVersionAndPackage.fst();
PackageJson targetPackage = targetVersionAndPackage.snd();
if (targetPackage == null) return CompletableFuture.completedFuture(null);
if (targetName.startsWith("@types/")) {
// Deeply install dependencies in `@types`
addConstraint(new Constraint(targetPackage, targetVersion, pack, depth));
return fetchRelevantPackages(targetPackage, depth + 1);
} else if (dependencies != pack.getDevDependencies() && (targetPackage.getTypes() != null || targetPackage.getTypings() != null)) {
// If a non-dev dependency contains its own typings, do a shallow install of that package
addConstraint(new Constraint(targetPackage, targetVersion, pack, depth));
}
return CompletableFuture.completedFuture(null);
}));
});
}
return CompletableFuture.allOf(futures.toArray(new CompletableFuture[0]));
}
/**
* Resolves the dependencies of the given package in a deterministic way.
*/
private CompletableFuture<Map<String, PackageJson>> resolvePackages(PackageJson rootPackage) {
return fetchRelevantPackages(rootPackage, 0).thenApply(void_ -> {
// Compute the minimum depth from which each dependency is requested.
Map<String, Integer> packageDepth = new LinkedHashMap<>();
for (Constraint constraint : constraints) {
Integer currentDepth = packageDepth.get(constraint.getTargetPackageName());
if (currentDepth == null || currentDepth > constraint.depth) {
packageDepth.put(constraint.getTargetPackageName(), constraint.depth);
}
}
// We use a greedy solver: sort the constraints and then satisfy them eagerly in that order.
constraints.sort((c1, c2) -> {
int cmp;
cmp = Integer.compare(packageDepth.get(c1.getTargetPackageName()), packageDepth.get(c2.getTargetPackageName()));
if (cmp != 0) return cmp;
cmp = c1.getTargetPackageName().compareTo(c2.getTargetPackageName());
if (cmp != 0) return cmp;
// Pick the most recent version, so reverse-sort by package version.
cmp = -c1.targetPackageVersion.compareTo(c2.targetPackageVersion);
if (cmp != 0) return cmp;
return 0;
});
Map<String, PackageJson> selectedPackages = new LinkedHashMap<>();
for (Constraint constraint : constraints) {
if (selectedPackages.containsKey(constraint.getTargetPackageName())) {
// Too bad, we already picked a version for this package. Ignore the constraint.
continue;
}
if (constraint.demandingPackage != rootPackage) {
PackageJson selectedDemander = selectedPackages.get(constraint.demandingPackage.getName());
if (selectedDemander != null && selectedDemander != constraint.demandingPackage) {
// The constraint comes from a package version we already decided not to install (a different version was picked).
// There is no need to try to satisfy this constraint, so ignore it.
continue;
}
}
System.out.println("Picked " + constraint.getTargetPackageName() + "@" + constraint.targetPackageVersion);
selectedPackages.put(constraint.getTargetPackageName(), constraint.targetPackage);
}
return selectedPackages;
});
}
public CompletableFuture<Void> installDependencies(PackageJson rootPackage, Path nodeModulesDir) {
return resolvePackages(rootPackage).thenCompose(resolvedPackages -> {
List<CompletableFuture<Void>> futures = new ArrayList<>();
resolvedPackages.forEach((name, targetPackage) -> {
Path destinationDir = nodeModulesDir.resolve(Fetcher.toSafePath(name));
futures.add(fetcher.installFromTarballUrl(targetPackage.getDist().getTarball(), destinationDir));
});
return CompletableFuture.allOf(futures.toArray(new CompletableFuture[0]));
});
}
/** Entry point which installs dependencies from a given `package.json`, used for testing and benchmarking. */
public static void main(String[] args) throws IOException {
ExecutorService executors = Executors.newFixedThreadPool(50);
try {
DependencyResolver resolver = new DependencyResolver(new AsyncFetcher(executors, err -> { System.err.println(err); }), Collections.emptySet());
for (String packageJsonPath : args) {
Path path = Paths.get(packageJsonPath).toAbsolutePath();
PackageJson packageJson = new Gson().fromJson(Files.newBufferedReader(path), PackageJson.class);
resolver.installDependencies(packageJson, path.getParent().resolve("node_modules")).join();
}
System.out.println("Done");
} finally {
executors.shutdown();
}
}
}

View File

@@ -0,0 +1,149 @@
package com.semmle.js.dependencies;
import java.io.BufferedInputStream;
import java.io.BufferedOutputStream;
import java.io.BufferedReader;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.io.OutputStream;
import java.io.PushbackInputStream;
import java.io.Reader;
import java.net.URL;
import java.net.URLConnection;
import java.nio.file.Files;
import java.nio.file.Path;
import java.nio.file.Paths;
import java.util.ArrayList;
import java.util.List;
import java.util.regex.Pattern;
import com.google.gson.Gson;
import com.semmle.js.dependencies.packument.Packument;
import org.apache.commons.compress.archivers.tar.TarArchiveEntry;
import org.apache.commons.compress.archivers.tar.TarArchiveInputStream;
import org.apache.commons.compress.compressors.gzip.GzipCompressorInputStream;
import org.apache.commons.compress.utils.IOUtils;
/**
* Synchronous I/O operations needed for dependency installation.
* <p>
* By design, the methods in this class are thread-safe, synchronous (blocking), and do not cache anything.
* <p>
* Should only be used through the {@link AsyncFetcher} class, which wraps this class with scheduling and caching.
*/
public class Fetcher {
private Object mkdirpLock = new Object();
/** Creates the given directory and its parent directories. Only one thread is allowed to create directories at once. */
public void mkdirp(Path dir) throws IOException {
synchronized (mkdirpLock) {
Files.createDirectories(dir);
}
}
private static final Pattern validPackageName = Pattern.compile("(?:@[\\w.-]+\\/)?\\w[\\w.-]*");
private boolean isValidPackageName(String name) {
return validPackageName.matcher(name).matches();
}
public static Path toSafePath(String string) {
if (string == null) return null;
Path path = Paths.get(string).normalize();
if (path.startsWith("..") || path.isAbsolute()) {
return null;
}
return path;
}
/**
* Submits a GET request to the given URL and returns an input with the response.
*/
private InputStream fetch(String url) throws IOException {
URLConnection connection = new URL(url).openConnection();
connection.setRequestProperty("Accept-Encoding", "gzip, identity, *");
connection.setDoInput(true);
connection.connect();
InputStream input = connection.getInputStream();
String encoding = connection.getContentEncoding();
if ("gzip".equals(encoding)) {
return new GzipCompressorInputStream(new BufferedInputStream(input));
} else {
return input;
}
}
/**
* Fetches the packument for the given package (containing all versions of the package.json).
*/
public Packument getPackument(String packageName) throws IOException {
if (!isValidPackageName(packageName)) {
throw new IOException("Package name contains unexpected characters:" + packageName);
}
System.out.println("Fetching package metadata for " + packageName);
try (Reader reader = new BufferedReader(new InputStreamReader(fetch("https://registry.npmjs.org/" + packageName)))) {
return new Gson().fromJson(reader, Packument.class);
}
}
/**
* Extracts the package at the given tarball URL into the given directory.
* <p>
* Only `package.json` and `.d.ts` files are extracted.
*
* @return paths of the files created by this call, relative to <code>destDir</code>
*/
public List<Path> extractFromTarballUrl(String tarballUrl, Path destDir) throws IOException {
if (!tarballUrl.startsWith("https://registry.npmjs.org/") || !tarballUrl.endsWith(".tgz")) { // Paranoid check
throw new IOException("Tarball URL has unexpected format: " + tarballUrl);
}
System.out.println("Unpacking " + tarballUrl + " to " + destDir);
List<Path> relativePaths = new ArrayList<>();
try (InputStream rawStream = new URL(tarballUrl).openStream()) {
// Despite having the .tgz extension, the file is not always gzipped, sometimes it's just a raw tar archive,
// regardless of what Accept-Encoding header we send.
// Sniff the header to detect which is the case.
// Note that the compression format has nothing to do with the Accept-Encoding/Content-Encoding headers,
// so we can't reuse the code from fetch().
PushbackInputStream pushback = new PushbackInputStream(rawStream, 2);
int byte1 = pushback.read();
int byte2 = pushback.read();
pushback.unread(byte2);
pushback.unread(byte1);
InputStream decompressedStream = (byte1 == 31 && byte2 == 139)
? new GzipCompressorInputStream(new BufferedInputStream(pushback))
: pushback;
TarArchiveInputStream stream = new TarArchiveInputStream(new BufferedInputStream(decompressedStream));
TarArchiveEntry tarEntry;
while ((tarEntry = stream.getNextTarEntry()) != null) {
if (!stream.canReadEntryData(tarEntry)) {
continue;
}
if (tarEntry.isDirectory()) {
continue; // We create directories on demand.
}
Path entryPath = toSafePath(tarEntry.getName());
if (entryPath == null) continue;
// Strip off the leading folder name.
// The entire package is inside a folder, but the name of that folder is unspecified and its name varies.
if (entryPath.getNameCount() < 2) continue;
entryPath = entryPath.subpath(1, entryPath.getNameCount());
String filename = entryPath.getFileName().toString();
if (!filename.endsWith(".d.ts") && !filename.equals("package.json")) {
continue; // Only extract .d.ts files and package.json
}
relativePaths.add(entryPath);
Path outputFile = destDir.resolve(entryPath);
mkdirp(outputFile.getParent());
try (OutputStream output = new BufferedOutputStream(Files.newOutputStream(outputFile))) {
IOUtils.copy(stream, output);
}
}
}
return relativePaths;
}
}

View File

@@ -0,0 +1,106 @@
package com.semmle.js.dependencies;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
public class SemVer implements Comparable<SemVer> {
private int major, minor, patch;
private String preRelease;
public SemVer(int major, int minor, int patch, String preRelease) {
this.major = major;
this.minor = minor;
this.patch = patch;
if (preRelease == null) {
preRelease = "";
}
this.preRelease = preRelease;
}
public int getMajor() {
return major;
}
public int getMinor() {
return minor;
}
public int getPatch() {
return patch;
}
public String getPreRelease() {
return preRelease;
}
private static final Pattern pattern = Pattern.compile("(\\d+)(?:\\.(\\d+)(?:\\.(\\d+))?)?(-[0-9A-Za-z.-]*)?(\\+.*)?");
public static SemVer tryParse(String str) {
Matcher m = pattern.matcher(str);
if (m.matches()) {
int major = Integer.parseInt(m.group(1));
int minor = m.group(2) == null ? 0 : Integer.parseInt(m.group(2));
int patch = m.group(3) == null ? 0 : Integer.parseInt(m.group(3));
String preRelease = m.group(4);
return new SemVer(major, minor, patch, preRelease);
} else {
return null;
}
}
@Override
public int hashCode() {
final int prime = 31;
int result = 1;
result = prime * result + major;
result = prime * result + minor;
result = prime * result + patch;
result = prime * result + ((preRelease == null) ? 0 : preRelease.hashCode());
return result;
}
@Override
public boolean equals(Object obj) {
if (this == obj)
return true;
if (obj == null)
return false;
if (getClass() != obj.getClass())
return false;
SemVer other = (SemVer) obj;
if (major != other.major)
return false;
if (minor != other.minor)
return false;
if (patch != other.patch)
return false;
if (preRelease == null) {
if (other.preRelease != null)
return false;
} else if (!preRelease.equals(other.preRelease))
return false;
return true;
}
@Override
public int compareTo(SemVer other) {
if (major != other.major) {
return Integer.compare(major, other.major);
}
if (minor != other.minor) {
return Integer.compare(minor, other.minor);
}
if (patch != other.patch) {
return Integer.compare(patch, other.patch);
}
if (!preRelease.equals(other.preRelease)) {
return preRelease.compareTo(other.preRelease);
}
return 0;
}
@Override
public String toString() {
return major + "." + minor + "." + patch + (preRelease.isEmpty() ? "" : "-" + preRelease);
}
}

View File

@@ -0,0 +1,95 @@
package com.semmle.js.dependencies.packument;
import java.util.Map;
public class PackageJson {
private String name;
private String version;
private Map<String, String> dependencies;
private Map<String, String> devDependencies;
private Map<String, String> peerDependencies;
private String types;
private String typings;
private String main;
private Dist dist;
public static class Dist {
private String tarball;
public String getTarball() {
return tarball;
}
}
public String getName() {
return name;
}
public void setName(String name) {
this.name = name;
}
public String getVersion() {
return version;
}
public void setVersion(String version) {
this.version = version;
}
public Map<String, String> getDependencies() {
return dependencies;
}
public void setDependencies(Map<String, String> dependencies) {
this.dependencies = dependencies;
}
public Map<String, String> getDevDependencies() {
return devDependencies;
}
public void setDevDependencies(Map<String, String> devDependencies) {
this.devDependencies = devDependencies;
}
public Map<String, String> getPeerDependencies() {
return peerDependencies;
}
public void setPeerDependencies(Map<String, String> peerDependencies) {
this.peerDependencies = peerDependencies;
}
public String getTypes() {
return types;
}
public void setTypes(String types) {
this.types = types;
}
public String getTypings() {
return typings;
}
public void setTypings(String typings) {
this.typings = typings;
}
public String getMain() {
return main;
}
public void setMain(String main) {
this.main = main;
}
public Dist getDist() {
return dist;
}
public void setDist(Dist dist) {
this.dist = dist;
}
}

View File

@@ -0,0 +1,30 @@
package com.semmle.js.dependencies.packument;
import java.util.Map;
/**
* A package metadata object, informally known as a "packument".
*
* see https://github.com/npm/registry/blob/master/docs/REGISTRY-API.md#getpackage
* see https://github.com/npm/registry/blob/master/docs/responses/package-metadata.md
*/
public class Packument {
private String name;
private Map<String, PackageJson> versions;
public String getName() {
return name;
}
public void setName(String name) {
this.name = name;
}
public Map<String, PackageJson> getVersions() {
return versions;
}
public void setVersions(Map<String, PackageJson> versions) {
this.versions = versions;
}
}

View File

@@ -1,11 +1,8 @@
package com.semmle.js.extractor;
import java.io.BufferedReader;
import java.io.File;
import java.io.IOException;
import java.io.InputStreamReader;
import java.io.Reader;
import java.io.Writer;
import java.lang.ProcessBuilder.Redirect;
import java.net.URI;
import java.net.URISyntaxException;
@@ -35,19 +32,18 @@ import java.util.stream.Collectors;
import java.util.stream.Stream;
import com.google.gson.Gson;
import com.google.gson.JsonElement;
import com.google.gson.JsonObject;
import com.google.gson.JsonParseException;
import com.google.gson.JsonParser;
import com.google.gson.JsonPrimitive;
import com.semmle.js.dependencies.AsyncFetcher;
import com.semmle.js.dependencies.DependencyResolver;
import com.semmle.js.dependencies.packument.PackageJson;
import com.semmle.js.extractor.ExtractorConfig.SourceType;
import com.semmle.js.extractor.FileExtractor.FileType;
import com.semmle.js.extractor.trapcache.DefaultTrapCache;
import com.semmle.js.extractor.trapcache.DummyTrapCache;
import com.semmle.js.extractor.trapcache.ITrapCache;
import com.semmle.js.parser.ParsedProject;
import com.semmle.js.parser.TypeScriptParser;
import com.semmle.ts.extractor.TypeExtractor;
import com.semmle.ts.extractor.TypeScriptParser;
import com.semmle.ts.extractor.TypeTable;
import com.semmle.util.data.StringUtil;
import com.semmle.util.exception.CatastrophicError;
@@ -213,11 +209,10 @@ public class AutoBuild {
private volatile boolean seenCode = false;
private volatile boolean seenFiles = false;
private boolean installDependencies = false;
private int installDependenciesTimeout;
private final VirtualSourceRoot virtualSourceRoot;
private ExtractorState state;
/** The default timeout when running <code>yarn</code>, in milliseconds. */
/** The default timeout when installing dependencies, in milliseconds. */
public static final int INSTALL_DEPENDENCIES_DEFAULT_TIMEOUT = 10 * 60 * 1000; // 10 minutes
public AutoBuild() {
@@ -229,10 +224,6 @@ public class AutoBuild {
getEnumFromEnvVar("LGTM_INDEX_TYPESCRIPT", TypeScriptMode.class, TypeScriptMode.FULL);
this.defaultEncoding = getEnvVar("LGTM_INDEX_DEFAULT_ENCODING");
this.installDependencies = Boolean.valueOf(getEnvVar("LGTM_INDEX_TYPESCRIPT_INSTALL_DEPS"));
this.installDependenciesTimeout =
Env.systemEnv()
.getInt(
"LGTM_INDEX_TYPESCRIPT_INSTALL_DEPS_TIMEOUT", INSTALL_DEPENDENCIES_DEFAULT_TIMEOUT);
this.virtualSourceRoot = makeVirtualSourceRoot();
setupFileTypes();
setupXmlMode();
@@ -690,28 +681,6 @@ public class AutoBuild {
return false;
}
/** Returns true if yarn is installed, otherwise prints a warning and returns false. */
private boolean verifyYarnInstallation() {
ProcessBuilder pb = new ProcessBuilder(Arrays.asList("yarn", "-v"));
try {
Process process = pb.start();
boolean completed = process.waitFor(this.installDependenciesTimeout, TimeUnit.MILLISECONDS);
if (!completed) {
System.err.println("Yarn could not be launched. Timeout during 'yarn -v'.");
return false;
}
BufferedReader reader = new BufferedReader(new InputStreamReader(process.getInputStream()));
String version = reader.readLine();
System.out.println("Found yarn version: " + version);
return true;
} catch (IOException | InterruptedException ex) {
System.err.println(
"Yarn not found. Please put 'yarn' on the PATH for automatic dependency installation.");
Exceptions.ignore(ex, "Continue without dependency installation");
return false;
}
}
/**
* Returns an existing file named <code>dir/stem.ext</code> where <code>.ext</code> is any
* of the given extensions, or <code>null</code> if no such file exists.
@@ -736,17 +705,6 @@ public class AutoBuild {
return tryResolveWithExtensions(dir, stem, FileType.JS.getExtensions());
}
/**
* Gets a child of a JSON object as a string, or <code>null</code>.
*/
private String getChildAsString(JsonObject obj, String name) {
JsonElement child = obj.get(name);
if (child instanceof JsonPrimitive && ((JsonPrimitive)child).isString()) {
return child.getAsString();
}
return null;
}
/**
* Gets a relative path from <code>from</code> to <code>to</code> provided
* the latter is contained in the former. Otherwise returns <code>null</code>.
@@ -769,11 +727,8 @@ public class AutoBuild {
* <p>
* Downloaded packages are intalled under <tt>SCRATCH_DIR</tt>, in a mirrored directory hierarchy
* we call the "virtual source root".
* Each <tt>package.json</tt> file is rewritten and copied to the virtual source root,
* where <tt>yarn install</tt> is invoked.
* <p>
* Packages that exists within the repo are stripped from the dependencies
* before installation, so they are not downloaded. Since they are part of the main source tree,
* Packages that exists within the repo are not downloaded. Since they are part of the main source tree,
* these packages are not mirrored under the virtual source root.
* Instead, an explicit package location mapping is passed to the TypeScript parser wrapper.
* <p>
@@ -784,23 +739,20 @@ protected DependencyInstallationResult preparePackagesAndDependencies(Set<Path>
final Path sourceRoot = LGTM_SRC;
// Read all package.json files and index them by name.
Map<Path, JsonObject> packageJsonFiles = new LinkedHashMap<>();
Map<Path, PackageJson> packageJsonFiles = new LinkedHashMap<>();
Map<String, Path> packagesInRepo = new LinkedHashMap<>();
Map<String, Path> packageMainFile = new LinkedHashMap<>();
for (Path file : filesToExtract) {
if (file.getFileName().toString().equals("package.json")) {
try {
String text = new WholeIO().read(file);
JsonElement json = new JsonParser().parse(text);
if (!(json instanceof JsonObject)) continue;
JsonObject jsonObject = (JsonObject) json;
PackageJson packageJson = new Gson().fromJson(new WholeIO().read(file), PackageJson.class);
file = file.toAbsolutePath();
if (tryRelativize(sourceRoot, file) == null) {
continue; // Ignore package.json files outside the source root.
}
packageJsonFiles.put(file, jsonObject);
packageJsonFiles.put(file, packageJson);
String name = getChildAsString(jsonObject, "name");
String name = packageJson.getName();
if (name != null) {
packagesInRepo.put(name, file);
}
@@ -812,45 +764,12 @@ protected DependencyInstallationResult preparePackagesAndDependencies(Set<Path>
}
}
// Process all package.json files now that we know the names of all local packages.
// - remove dependencies on local packages
// - guess the main file for each package
// Note that we ignore optional dependencies during installation, so "optionalDependencies"
// is ignored here as well.
final List<String> dependencyFields =
Arrays.asList("dependencies", "devDependencies", "peerDependencies");
// Guess the main file for each package.
packageJsonFiles.forEach(
(path, packageJson) -> {
(path, packageJson) -> {
Path relativePath = sourceRoot.relativize(path);
for (String dependencyField : dependencyFields) {
JsonElement dependencyElm = packageJson.get(dependencyField);
if (!(dependencyElm instanceof JsonObject)) continue;
JsonObject dependencyObj = (JsonObject) dependencyElm;
List<String> propsToRemove = new ArrayList<>();
for (String packageName : dependencyObj.keySet()) {
if (packagesInRepo.containsKey(packageName)) {
// Remove dependency on local package
propsToRemove.add(packageName);
} else {
// Remove file dependency on a package that doesn't exist in the checkout.
String dependency = getChildAsString(dependencyObj, packageName);
if (dependency != null && (dependency.startsWith("file:") || dependency.startsWith("./") || dependency.startsWith("../"))) {
if (dependency.startsWith("file:")) {
dependency = dependency.substring("file:".length());
}
Path resolvedPackage = path.getParent().resolve(dependency + "/package.json");
if (!Files.exists(resolvedPackage)) {
propsToRemove.add(packageName);
}
}
}
}
for (String prop : propsToRemove) {
dependencyObj.remove(prop);
}
}
// For named packages, find the main file.
String name = getChildAsString(packageJson, "name");
String name = packageJson.getName();
if (name != null) {
Path entryPoint = guessPackageMainFile(path, packageJson, FileType.TYPESCRIPT.getExtensions());
if (entryPoint == null) {
@@ -866,45 +785,25 @@ protected DependencyInstallationResult preparePackagesAndDependencies(Set<Path>
}
});
// Write the new package.json files to disk
for (Path file : packageJsonFiles.keySet()) {
Path virtualFile = virtualSourceRoot.toVirtualFile(file);
if (installDependencies) {
// Use more threads for dependency installation than for extraction, as this is mainly I/O bound and we want
// many concurrent HTTP requests.
ExecutorService installationThreadPool = Executors.newFixedThreadPool(50);
AsyncFetcher fetcher = new AsyncFetcher(installationThreadPool, err -> { System.err.println(err); });
try {
Files.createDirectories(virtualFile.getParent());
try (Writer writer = Files.newBufferedWriter(virtualFile)) {
new Gson().toJson(packageJsonFiles.get(file), writer);
}
} catch (IOException e) {
throw new ResourceError("Could not rewrite package.json file: " + virtualFile, e);
}
}
// Install dependencies
if (this.installDependencies && verifyYarnInstallation()) {
for (Path file : packageJsonFiles.keySet()) {
Path virtualFile = virtualSourceRoot.toVirtualFile(file);
System.out.println("Installing dependencies from " + virtualFile);
ProcessBuilder pb =
new ProcessBuilder(
Arrays.asList(
"yarn",
"install",
"--non-interactive",
"--ignore-scripts",
"--ignore-platform",
"--ignore-engines",
"--ignore-optional",
"--no-default-rc",
"--no-bin-links",
"--pure-lockfile"));
pb.directory(virtualFile.getParent().toFile());
pb.redirectOutput(Redirect.INHERIT);
pb.redirectError(Redirect.INHERIT);
List<CompletableFuture<Void>> futures = new ArrayList<>();
packageJsonFiles.forEach((file, packageJson) -> {
Path virtualFile = virtualSourceRoot.toVirtualFile(file);
Path nodeModulesDir = virtualFile.getParent().resolve("node_modules");
futures.add(new DependencyResolver(fetcher, packagesInRepo.keySet()).installDependencies(packageJson, nodeModulesDir));
});
CompletableFuture.allOf(futures.toArray(new CompletableFuture[0])).join();
} finally {
installationThreadPool.shutdown();
try {
pb.start().waitFor(this.installDependenciesTimeout, TimeUnit.MILLISECONDS);
} catch (IOException | InterruptedException ex) {
throw new ResourceError("Could not install dependencies from " + file, ex);
installationThreadPool.awaitTermination(1, TimeUnit.HOURS);
} catch (InterruptedException e) {
Exceptions.ignore(e, "Awaiting termination is not essential.");
}
}
}
@@ -917,7 +816,7 @@ protected DependencyInstallationResult preparePackagesAndDependencies(Set<Path>
* given package - that is, the file you get when importing the package by name
* without any path suffix.
*/
private Path guessPackageMainFile(Path packageJsonFile, JsonObject packageJson, Iterable<String> extensions) {
private Path guessPackageMainFile(Path packageJsonFile, PackageJson packageJson, Iterable<String> extensions) {
Path packageDir = packageJsonFile.getParent();
// Try <package_dir>/index.ts.
@@ -929,7 +828,7 @@ protected DependencyInstallationResult preparePackagesAndDependencies(Set<Path>
// Get the "main" property from the package.json
// This usually refers to the compiled output, such as `./out/foo.js` but may hint as to
// the name of main file ("foo" in this case).
String mainStr = getChildAsString(packageJson, "main");
String mainStr = packageJson.getMain();
// Look for source files `./src` if it exists
Path sourceDir = packageDir.resolve("src");

View File

@@ -6,7 +6,7 @@ import java.util.concurrent.ConcurrentHashMap;
import java.util.concurrent.ConcurrentMap;
import java.util.Optional;
import com.semmle.js.parser.TypeScriptParser;
import com.semmle.ts.extractor.TypeScriptParser;
/**
* Contains the state to be shared between extractions of different files.

View File

@@ -19,8 +19,8 @@ import com.semmle.js.extractor.trapcache.DefaultTrapCache;
import com.semmle.js.extractor.trapcache.DummyTrapCache;
import com.semmle.js.extractor.trapcache.ITrapCache;
import com.semmle.js.parser.ParsedProject;
import com.semmle.js.parser.TypeScriptParser;
import com.semmle.ts.extractor.TypeExtractor;
import com.semmle.ts.extractor.TypeScriptParser;
import com.semmle.ts.extractor.TypeTable;
import com.semmle.util.data.StringUtil;
import com.semmle.util.data.UnitParser;

View File

@@ -5,6 +5,7 @@ import java.io.File;
import com.semmle.js.extractor.ExtractorConfig.ECMAVersion;
import com.semmle.js.extractor.ExtractorConfig.SourceType;
import com.semmle.js.parser.JSParser.Result;
import com.semmle.ts.extractor.TypeScriptParser;
import com.semmle.js.parser.ParseError;
public class TypeScriptExtractor implements IExtractor {

View File

@@ -1,4 +1,4 @@
package com.semmle.js.parser;
package com.semmle.ts.extractor;
import java.util.ArrayList;
import java.util.Collections;
@@ -113,6 +113,7 @@ import com.semmle.js.ast.jsx.JSXOpeningElement;
import com.semmle.js.ast.jsx.JSXSpreadAttribute;
import com.semmle.js.ast.jsx.JSXThisExpr;
import com.semmle.js.parser.JSParser.Result;
import com.semmle.js.parser.ParseError;
import com.semmle.ts.ast.ArrayTypeExpr;
import com.semmle.ts.ast.ConditionalTypeExpr;
import com.semmle.ts.ast.DecoratorList;

View File

@@ -1,4 +1,4 @@
package com.semmle.js.parser;
package com.semmle.ts.extractor;
import java.io.BufferedReader;
import java.io.BufferedWriter;
@@ -32,8 +32,9 @@ import com.semmle.js.extractor.DependencyInstallationResult;
import com.semmle.js.extractor.EnvironmentVariables;
import com.semmle.js.extractor.ExtractionMetrics;
import com.semmle.js.extractor.VirtualSourceRoot;
import com.semmle.js.parser.JSParser;
import com.semmle.js.parser.ParsedProject;
import com.semmle.js.parser.JSParser.Result;
import com.semmle.ts.extractor.TypeTable;
import com.semmle.util.data.StringUtil;
import com.semmle.util.data.UnitParser;
import com.semmle.util.exception.CatastrophicError;

View File

@@ -1,4 +1,4 @@
package com.semmle.js.parser;
package com.semmle.ts.extractor;
import java.util.LinkedHashMap;
import java.util.Map;