External class extraction prototype

This commit is contained in:
Chris Smowton
2021-09-30 12:27:48 +01:00
committed by Ian Lynagh
parent e9b249855b
commit 2cc003ff0e
51 changed files with 11119 additions and 41 deletions

View File

@@ -0,0 +1,534 @@
package com.semmle.extractor.java;
import java.io.File;
import java.io.IOException;
import java.net.URI;
import java.util.Arrays;
import java.util.LinkedHashMap;
import java.util.List;
import java.util.Map;
import java.util.regex.Pattern;
import com.github.codeql.Logger;
import com.github.codeql.Severity;
import static com.github.codeql.ClassNamesKt.getIrClassBinaryPath;
import org.jetbrains.kotlin.ir.declarations.IrClass;
import com.semmle.util.concurrent.LockDirectory;
import com.semmle.util.concurrent.LockDirectory.LockingMode;
import com.semmle.util.exception.CatastrophicError;
import com.semmle.util.exception.NestedError;
import com.semmle.util.exception.ResourceError;
import com.semmle.util.extraction.PopulationSpecFile;
import com.semmle.util.extraction.SpecFileEntry;
import com.semmle.util.files.FileUtil;
import com.semmle.util.io.WholeIO;
import com.semmle.util.process.Env;
import com.semmle.util.process.Env.Var;
import com.semmle.util.trap.dependencies.TrapDependencies;
import com.semmle.util.trap.dependencies.TrapSet;
import com.semmle.util.trap.pathtransformers.PathTransformer;
public class OdasaOutput {
// either these are set ...
private final File trapFolder;
private final File sourceArchiveFolder;
// ... or this one is set
private final PopulationSpecFile specFile;
private File currentSourceFile;
private TrapSet trapsCreated;
private TrapDependencies trapDependenciesForSource;
private SpecFileEntry currentSpecFileEntry;
// should origin tracking be used?
private final boolean trackClassOrigins;
private final Logger log;
/** DEBUG only: just use the given file as the root for TRAP, source archive etc */
OdasaOutput(File outputRoot, Logger log) {
this.trapFolder = new File(outputRoot, "trap");
this.sourceArchiveFolder = new File(outputRoot, "src_archive");
this.specFile = null;
this.trackClassOrigins = false;
this.log = log;
}
public OdasaOutput(boolean trackClassOrigins, Logger log) {
String trapFolderVar = Env.systemEnv().getFirstNonEmpty("CODEQL_EXTRACTOR_JAVA_TRAP_DIR", Var.TRAP_FOLDER.name());
if (trapFolderVar != null) {
String sourceArchiveVar = Env.systemEnv().getFirstNonEmpty("CODEQL_EXTRACTOR_JAVA_SOURCE_ARCHIVE_DIR", Var.SOURCE_ARCHIVE.name());
if (sourceArchiveVar == null)
throw new ResourceError(Var.TRAP_FOLDER + " was set to '" + trapFolderVar + "', but "
+ Var.SOURCE_ARCHIVE + " was not set");
this.trapFolder = new File(trapFolderVar);
this.sourceArchiveFolder = new File(sourceArchiveVar);
this.specFile = null;
} else {
this.trapFolder = null;
this.sourceArchiveFolder = null;
String specFileVar = Env.systemEnv().get(Var.ODASA_JAVA_LAYOUT);
if (specFileVar == null)
throw new ResourceError("Neither " + Var.TRAP_FOLDER + " nor " + Var.ODASA_JAVA_LAYOUT + " was set");
this.specFile = new PopulationSpecFile(new File(specFileVar));
}
this.trackClassOrigins = trackClassOrigins;
this.log = log;
}
public File getTrapFolder() {
return trapFolder;
}
public boolean getTrackClassOrigins() {
return trackClassOrigins;
}
/**
* Set the source file that is currently being processed. This may affect
* things like trap and source archive directories, and persists as a
* setting until this method is called again.
* @param f the current source file
*/
public void setCurrentSourceFile(File f) {
currentSourceFile = f;
currentSpecFileEntry = entryFor();
trapsCreated = new TrapSet();
trapsCreated.addSource(PathTransformer.std().fileAsDatabaseString(f));
trapDependenciesForSource = null;
}
/** The output paths for that file, or null if it shouldn't be included */
private SpecFileEntry entryFor() {
if (specFile != null)
return specFile.getEntryFor(currentSourceFile);
else
return new SpecFileEntry(trapFolder, sourceArchiveFolder,
Arrays.asList(PathTransformer.std().fileAsDatabaseString(currentSourceFile)));
}
/*
* Trap sets and dependencies.
*/
private void writeTrapSet() {
trapsCreated.save(trapSetFor(currentSourceFile).toPath());
}
private File trapSetFor(File file) {
return FileUtil.appendAbsolutePath(
currentSpecFileEntry.getTrapFolder(), PathTransformer.std().fileAsDatabaseString(file) + ".set");
}
public void addDependency(IrClass sym) {
String path = trapFilePathForClass(sym);
trapDependenciesForSource.addDependency(path);
}
/*
* Source archive.
*/
/**
* Write the given source file to the right source archive, encoded in UTF-8,
* or do nothing if the file shouldn't be populated.
*/
public void writeCurrentSourceFileToSourceArchive(String contents) {
if (currentSpecFileEntry != null && currentSpecFileEntry.getSourceArchivePath() != null) {
File target = sourceArchiveFileFor(currentSourceFile);
target.getParentFile().mkdirs();
new WholeIO().write(target, contents);
}
}
public void writeFileToSourceArchive(File srcFile) {
File target = sourceArchiveFileFor(srcFile);
target.getParentFile().mkdirs();
String contents = new WholeIO().strictread(srcFile);
new WholeIO().write(target, contents);
}
private File sourceArchiveFileFor(File file) {
return FileUtil.appendAbsolutePath(currentSpecFileEntry.getSourceArchivePath(),
PathTransformer.std().fileAsDatabaseString(file));
}
/*
* Trap file names and paths.
*/
private static final String CLASSES_DIR = "classes";
private static final String JARS_DIR = "jars";
private static final String MODULES_DIR = "modules";
private File getTrapFileForCurrentSourceFile() {
if (currentSpecFileEntry == null)
return null;
return trapFileFor(currentSourceFile);
}
private File getTrapFileForJarFile(File jarFile) {
if (!jarFile.getAbsolutePath().endsWith(".jar"))
return null;
return FileUtil.appendAbsolutePath(
currentSpecFileEntry.getTrapFolder(),
JARS_DIR + "/" + PathTransformer.std().fileAsDatabaseString(jarFile) + ".trap.gz");
}
private File getTrapFileForModule(String moduleName) {
return FileUtil.appendAbsolutePath(
currentSpecFileEntry.getTrapFolder(),
MODULES_DIR + "/" + moduleName + ".trap.gz");
}
private File trapFileFor(File file) {
return FileUtil.appendAbsolutePath(currentSpecFileEntry.getTrapFolder(),
PathTransformer.std().fileAsDatabaseString(file) + ".trap.gz");
}
private File getTrapFileForClassFile(IrClass sym) {
if (currentSpecFileEntry == null)
return null;
return trapFileForClass(sym);
}
private File trapFileForClass(IrClass sym) {
return FileUtil.fileRelativeTo(currentSpecFileEntry.getTrapFolder(),
trapFilePathForClass(sym));
}
private final Map<String, String> memberTrapPaths = new LinkedHashMap<String, String>();
private static final Pattern dots = Pattern.compile(".", Pattern.LITERAL);
private String trapFilePathForClass(IrClass sym) {
String classId = getIrClassBinaryPath(sym);
// TODO: Reinstate this?
//if (getTrackClassOrigins())
// classId += "-" + StringDigestor.digest(sym.getSourceFileId());
String result = memberTrapPaths.get(classId);
if (result == null) {
result = CLASSES_DIR + "/" +
dots.matcher(classId).replaceAll("/") +
".members" +
".trap.gz";
memberTrapPaths.put(classId, result);
}
return result;
}
/*
* Deletion of existing trap files.
*/
private void deleteTrapFileAndDependencies(IrClass sym) {
File trap = trapFileForClass(sym);
if (trap.exists()) {
trap.delete();
File depFile = new File(trap.getParentFile(), trap.getName().replace(".trap.gz", ".dep"));
if (depFile.exists())
depFile.delete();
File metadataFile = new File(trap.getParentFile(), trap.getName().replace(".trap.gz", ".metadata"));
if (metadataFile.exists())
metadataFile.delete();
}
}
/*
* Trap writers.
*/
/**
* A {@link TrapFileManager} to output facts for the given source file,
* or <code>null</code> if the source file should not be populated.
*/
private TrapFileManager getTrapWriterForCurrentSourceFile() {
File trapFile = getTrapFileForCurrentSourceFile();
if (trapFile==null)
return null;
return trapWriter(trapFile, null);
}
/**
* Get a {@link TrapFileManager} to write members
* about a class, or <code>null</code> if the class shouldn't be populated.
*
* @param sym
* The class's symbol, including, in particular, its fully qualified
* binary class name.
*/
private TrapFileManager getMembersWriterForClass(IrClass sym) {
File trap = getTrapFileForClassFile(sym);
if (trap==null)
return null;
TrapClassVersion currVersion = TrapClassVersion.fromSymbol(sym);
if (trap.exists()) {
// Only re-write an existing trap file if we encountered a newer version of the same class.
TrapClassVersion trapVersion = readVersionInfo(trap);
if (!currVersion.isValid()) {
log.warn("Not rewriting trap file for: " + sym + " " + trapVersion + " " + currVersion + " " + trap);
} else if (currVersion.newerThan(trapVersion)) {
log.info("Rewriting trap file for: " + sym + " " + trapVersion + " " + currVersion + " " + trap);
deleteTrapFileAndDependencies(sym);
} else {
return null;
}
} else {
log.info("Writing trap file for: " + sym.getName() + " " + currVersion + " " + trap);
}
return trapWriter(trap, sym);
}
private TrapFileManager trapWriter(File trapFile, IrClass sym) {
if (!trapFile.getName().endsWith(".trap.gz"))
throw new CatastrophicError("OdasaOutput only supports writing to compressed trap files");
String relative = FileUtil.relativePath(trapFile, currentSpecFileEntry.getTrapFolder());
trapFile.getParentFile().mkdirs();
return concurrentWriter(trapFile, relative, log, sym);
}
private TrapFileManager concurrentWriter(File trapFile, String relative, Logger log, IrClass sym) {
if (trapFile.exists())
return null;
return new TrapFileManager(trapFile, relative, true, log, sym);
}
public class TrapFileManager implements AutoCloseable {
private TrapDependencies trapDependenciesForClass;
private File trapFile;
private IrClass sym;
private TrapFileManager(File trapFile, String relative, boolean concurrentCreation, Logger log, IrClass sym) {
trapDependenciesForClass = new TrapDependencies(relative);
this.trapFile = trapFile;
this.sym = sym;
}
public File getFile() {
return trapFile;
}
public void addDependency(IrClass dep) {
trapDependenciesForClass.addDependency(trapFilePathForClass(dep));
}
public void close() {
writeTrapDependencies(trapDependenciesForClass);
// Record major/minor version information for extracted class files.
// This is subsequently used to determine whether to re-extract (a newer version of) the same class.
File metadataFile = new File(trapFile.getAbsolutePath().replace(".trap.gz", ".metadata"));
try {
Map<String, String> versionMap = new LinkedHashMap<>();
TrapClassVersion tcv = TrapClassVersion.fromSymbol(sym);
versionMap.put(MAJOR_VERSION, String.valueOf(tcv.getMajorVersion()));
versionMap.put(MINOR_VERSION, String.valueOf(tcv.getMinorVersion()));
versionMap.put(LAST_MODIFIED, String.valueOf(tcv.getLastModified()));
FileUtil.writePropertiesCSV(metadataFile, versionMap);
} catch (IOException e) {
log.warn("Could not save trap metadata file: " + metadataFile.getAbsolutePath(), e);
}
}
private void writeTrapDependencies(TrapDependencies trapDependencies) {
String dep = trapDependencies.trapFile().replace(".trap.gz", ".dep");
trapDependencies.save(
currentSpecFileEntry.getTrapFolder().toPath().resolve(dep));
}
}
/*
* Trap file locking.
*/
/**
* <b>CAUTION</b>: to avoid the potential for deadlock between multiple concurrent extractor processes,
* only one source file {@link TrapLocker} may be open at any time, and the lock must be obtained
* <b>before</b> any <b>class</b> file lock.
*
* Trap file extensions (and paths) ensure that source and class file locks are distinct.
*
* @return a {@link TrapLocker} for the currently processed source file, which must have been
* previously set by a call to {@link OdasaOutput#setCurrentSourceFile(File)}.
*/
public TrapLocker getTrapLockerForCurrentSourceFile() {
return new TrapLocker((IrClass)null);
}
/**
* <b>CAUTION</b>: to avoid the potential for deadlock between multiple concurrent extractor processes,
* only one jar file {@link TrapLocker} may be open at any time, and the lock must be obtained
* <b>after</b> any <b>source</b> file lock. Only one jar or class file lock may be open at any time.
*
* Trap file extensions (and paths) ensure that source and jar file locks are distinct.
*
* @return a {@link TrapLocker} for the trap file corresponding to the given jar file.
*/
public TrapLocker getTrapLockerForJarFile(File jarFile) {
return new TrapLocker(jarFile);
}
/**
* <b>CAUTION</b>: to avoid the potential for deadlock between multiple concurrent extractor processes,
* only one module {@link TrapLocker} may be open at any time, and the lock must be obtained
* <b>after</b> any <b>source</b> file lock. Only one jar or class file or module lock may be open at any time.
*
* Trap file extensions (and paths) ensure that source and module file locks are distinct.
*
* @return a {@link TrapLocker} for the trap file corresponding to the given module.
*/
public TrapLocker getTrapLockerForModule(String moduleName) {
return new TrapLocker(moduleName);
}
/**
* <b>CAUTION</b>: to avoid the potential for deadlock between multiple concurrent extractor processes,
* only one class file {@link TrapLocker} may be open at any time, and the lock must be obtained
* <b>after</b> any <b>source</b> file lock. Only one jar or class file lock may be open at any time.
*
* Trap file extensions (and paths) ensure that source and class file locks are distinct.
*
* @return a {@link TrapLocker} for the trap file corresponding to the given class symbol.
*/
public TrapLocker getTrapLockerForClassFile(IrClass sym) {
return new TrapLocker(sym);
}
public class TrapLocker implements AutoCloseable {
private final IrClass sym;
private final File trapFile;
private final boolean isNonSourceTrapFile;
private TrapLocker(IrClass sym) {
this.sym = sym;
if (sym==null) {
trapFile = getTrapFileForCurrentSourceFile();
} else {
trapFile = getTrapFileForClassFile(sym);
}
isNonSourceTrapFile = false;
}
private TrapLocker(File jarFile) {
sym = null;
trapFile = getTrapFileForJarFile(jarFile);
isNonSourceTrapFile = true;
}
private TrapLocker(String moduleName) {
sym = null;
trapFile = getTrapFileForModule(moduleName);
isNonSourceTrapFile = true;
}
public TrapFileManager getTrapFileManager() {
if (trapFile!=null) {
lockTrapFile(trapFile);
return getMembersWriterForClass(sym);
} else {
return null;
}
}
@Override
public void close() {
if (trapFile!=null) {
try {
unlockTrapFile(trapFile);
} catch (NestedError e) {
log.warn("Error unlocking trap file " + trapFile.getAbsolutePath(), e);
}
}
}
private LockDirectory getExtractorLockDir() {
return LockDirectory.instance(currentSpecFileEntry.getTrapFolder(), log);
}
private void lockTrapFile(File trapFile) {
getExtractorLockDir().blockingLock(LockingMode.Exclusive, trapFile, "Java extractor lock");
}
private void unlockTrapFile(File trapFile) {
boolean success = getExtractorLockDir().maybeUnlock(LockingMode.Exclusive, trapFile);
if (!success) {
log.warn("Trap file was not locked: " + trapFile);
}
}
}
/*
* Class version tracking.
*/
private static final String MAJOR_VERSION = "majorVersion";
private static final String MINOR_VERSION = "minorVersion";
private static final String LAST_MODIFIED = "lastModified";
private static class TrapClassVersion {
private int majorVersion;
private int minorVersion;
private long lastModified;
public int getMajorVersion() {
return majorVersion;
}
public int getMinorVersion() {
return minorVersion;
}
public long getLastModified() {
return lastModified;
}
private TrapClassVersion(int majorVersion, int minorVersion, long lastModified) {
this.majorVersion = majorVersion;
this.minorVersion = minorVersion;
this.lastModified = lastModified;
}
private boolean newerThan(TrapClassVersion tcv) {
// Classes being compiled from source have major version 0 but should take precedence
// over any classes with the same qualified name loaded from the classpath
// in previous or subsequent extractor invocations.
if (tcv.majorVersion==0)
return false;
else if (majorVersion==0)
return true;
// Otherwise, determine precedence in the following order:
// majorVersion, minorVersion, lastModified.
return tcv.majorVersion < majorVersion ||
(tcv.majorVersion == majorVersion && tcv.minorVersion < minorVersion) ||
(tcv.majorVersion == majorVersion && tcv.minorVersion == minorVersion &&
tcv.lastModified < lastModified);
}
private static TrapClassVersion fromSymbol(IrClass sym) {
return new TrapClassVersion(100, 101, 102);
}
private boolean isValid() {
return majorVersion>=0 && minorVersion>=0;
}
@Override
public String toString() {
return majorVersion + "." + minorVersion + "-" + lastModified;
}
}
private TrapClassVersion readVersionInfo(File trap) {
int majorVersion = 0;
int minorVersion = 0;
long lastModified = 0;
File metadataFile = new File(trap.getAbsolutePath().replace(".trap.gz", ".metadata"));
if (metadataFile.exists()) {
Map<String,String> metadataMap = FileUtil.readPropertiesCSV(metadataFile);
try {
majorVersion = Integer.parseInt(metadataMap.get(MAJOR_VERSION));
minorVersion = Integer.parseInt(metadataMap.get(MINOR_VERSION));
lastModified = Long.parseLong(metadataMap.get(LAST_MODIFIED));
} catch (NumberFormatException e) {
log.warn("Invalid class file version for " + trap.getAbsolutePath(), e);
}
} else {
log.warn("Trap metadata file does not exist: " + metadataFile.getAbsolutePath());
}
return new TrapClassVersion(majorVersion, minorVersion, lastModified);
}
}

View File

@@ -0,0 +1,246 @@
package com.semmle.util.array;
import java.util.Arrays;
import java.util.Collections;
import java.util.LinkedHashSet;
import java.util.Set;
import com.semmle.util.basic.ObjectUtil;
/**
* Convenience methods for manipulating arrays.
*/
public class ArrayUtil
{
/**
* A number slightly smaller than the maximum length of an array on most vms.
* This matches the constant in ArrayList.
*/
public static final int MAX_ARRAY_LENGTH = Integer.MAX_VALUE - 8;
/**
* Comparator for primitive int values.
*/
public static interface IntComparator
{
/**
* Compare ints {@code a} and {@code b}, returning a negative value if {@code a} is 'less' than
* {@code b}, zero if they are equal, otherwise a positive value.
*/
public int compare (int a, int b);
}
/**
* Find the index of the first occurrence of the given {@code value} in the given {@code array},
* returning -1 if there is no such element.
*/
public static int findFirst(boolean[] array, boolean value)
{
for(int i=0; i<array.length; ++i) {
if (value == array[i])
return i;
}
return -1;
}
/**
* Find the index of the first occurrence of the given {@code value} in the given {@code array},
* returning -1 if there is no such element.
*/
public static int findFirst(byte[] array, byte value)
{
for(int i=0; i<array.length; ++i) {
if (value == array[i])
return i;
}
return -1;
}
/**
* Find the index of the first occurrence of the given {@code value} in the given {@code array},
* returning -1 if there is no such element.
*/
public static int findFirst(char[] array, char value)
{
for(int i=0; i<array.length; ++i) {
if (value == array[i])
return i;
}
return -1;
}
/**
* Find the index of the first occurrence of the given {@code value} in the given {@code array},
* returning -1 if there is no such element.
*/
public static int findFirst(double[] array, double value)
{
for(int i=0; i<array.length; ++i) {
if (value == array[i])
return i;
}
return -1;
}
/**
* Find the index of the first occurrence of the given {@code value} in the given {@code array},
* returning -1 if there is no such element.
*/
public static int findFirst(float[] array, float value)
{
for(int i=0; i<array.length; ++i) {
if (value == array[i])
return i;
}
return -1;
}
/**
* Find the index of the first occurrence of the given {@code value} in the given {@code array},
* returning -1 if there is no such element.
*/
public static int findFirst(int[] array, int value)
{
for(int i=0; i<array.length; ++i) {
if (value == array[i])
return i;
}
return -1;
}
/**
* Find the index of the first occurrence of the given {@code value} in the given {@code array},
* returning -1 if there is no element for which {@code value.equals(element)} is true.
*
* @see #findFirstSame(Object[], Object)
*/
public static <T> int findFirst(T[] array, T value)
{
for(int i=0; i<array.length; ++i) {
if (ObjectUtil.equals(value, array[i])) {
return i;
}
}
return -1;
}
/**
* Find the index of the first occurrence of the given {@code value} in the given {@code array},
* returning -1 if there is no element for which {@code value == element}.
*
* @see #findFirstSame(Object[], Object)
*/
public static <T> int findFirstSame(T[] array, T value)
{
for(int i=0; i<array.length; ++i) {
if (value == array[i])
return i;
}
return -1;
}
/**
* Query whether the given {@code array} contains any element equal to the given {@code element}.
*/
public static boolean contains (int element, int ... array)
{
return findFirst(array, element) != -1;
}
/**
* Query whether the given {@code array} contains any element equal to the given {@code element}.
*/
@SafeVarargs
public static <T> boolean contains (T element, T ... array)
{
return findFirst(array, element) != -1;
}
/**
* Construct a new array with length increased by one, containing all elements of a given array
* followed by an additional element.
*/
public static <T> T[] append (T[] array, T element)
{
array = Arrays.copyOf(array, array.length + 1);
array[array.length-1] = element;
return array;
}
/**
* Construct a new array containing the concatenation of the elements in a number of arrays.
*
* @param arrays The arrays to concatenate; may be null (in which case the result will be null).
* Null elements will be treated as empty arrays.
* @return If {@code arrays} is null, a null array, otherwise a newly allocated array containing
* the elements of every non-null array in {@code arrays} concatenated consecutively.
*/
public static byte[] concatenate (byte[] ... arrays)
{
// Quick break-out if arrays is null
if (arrays == null) {
return null;
}
// Find the total length that will be required
int totalLength = 0;
for(byte[] array : arrays) {
totalLength += array == null ? 0 : array.length;
}
// Allocate a new array for the concatenation
byte[] concatenation = new byte[totalLength];
// Copy each non-null array into the concatenation
int offset = 0;
for(byte[] array : arrays) {
if (array != null) {
System.arraycopy(array, 0, concatenation, offset, array.length);
offset += array.length;
}
}
return concatenation;
}
/** Trivial short-hand for building an array (returns {@code elements} unchanged). */
public static <T> T[] toArray (T ... elements)
{
return elements;
}
/**
* Swap two elements in an array.
*
* @param array The array containing the elements to be swapped; must be non-null.
* @param index1 The index of the first element to swap; must be in-bounds.
* @param index2 The index of the second element to swap; must be in-bounds.
* @return The given {@code array}.
*/
public static int[] swap (int[] array, int index1, int index2)
{
int value = array[index1];
array[index1] = array[index2];
array[index2] = value;
return array;
}
/**
* Returns a fresh Set containing all the elements in the array.
*
* @param <T>
* the class of the objects in the array
* @param array
* the array containing the elements
* @return a Set containing all the elements in the array.
*/
@SafeVarargs
public static <T> Set<T> asSet (T ... array)
{
Set<T> ts = new LinkedHashSet<>();
Collections.addAll(ts, array);
return ts;
}
}

View File

@@ -0,0 +1,73 @@
package com.semmle.util.basic;
/**
* Trivial utility methods.
*/
public class ObjectUtil {
/** Query if {@code object1} and {@code object2} are reference-equal, or both null. */
public static boolean isSame (Object object1, Object object2)
{
return object1 == object2; // Reference equality comparison is deliberate
}
/**
* Query if {@code object1} and {@code object2} are both null, or both non-null and equal
* according to {@link Object#equals(Object)} (applied as {@code object1.equals(object2)}).
*/
public static boolean equals (Object object1, Object object2)
{
return object1 == null ? object2 == null : object1.equals(object2);
}
/**
* Query whether {@code object} is equal to any element in {@code objects}, short-circuiting
* the evaluation if possible.
*/
public static boolean equalsAny (Object object, Object ... objects)
{
// Quick break-out if there are no objects to be equal to
if (objects == null || objects.length == 0) {
return false;
}
// Compare against each object in turn
for(Object other : objects) {
if (equals(object, other)) {
return true;
}
}
return false;
}
/**
* Return {@code object1.compareTo(object2)}, but handle the case of null input by returning 0 if
* both objects are null, or 1 if only {@code object1} is null (implying that null is always
* 'greater' than non-null).
*/
public static <T1, T2 extends T1> int compareTo (Comparable<T1> object1, T2 object2)
{
if (object1 == null) {
return object2 == null ? 0 : 1;
}
return object1.compareTo(object2);
}
/**
* Return {@code value} if non-null, otherwise {@code replacement}.
*/
public static <T> T replaceNull (T value, T replacement)
{
return value == null ? replacement : value;
}
@SafeVarargs
public static <T> T nullCoalesce(T... values) {
for(T value : values) {
if (value != null) {
return value;
}
}
return null;
}
}

View File

@@ -0,0 +1,395 @@
package com.semmle.util.concurrent;
import java.io.File;
import java.io.IOException;
import java.io.RandomAccessFile;
import java.nio.channels.FileChannel;
import java.nio.channels.FileLock;
import java.util.LinkedHashMap;
import java.util.Map;
import com.semmle.util.data.StringDigestor;
import com.semmle.util.exception.CatastrophicError;
import com.semmle.util.exception.ResourceError;
import com.semmle.util.files.FileUtil;
import com.semmle.util.io.WholeIO;
import com.github.codeql.Logger;
import com.github.codeql.Severity;
/**
* Helper class to simplify handling of file-system-based inter-process
* locking and mutual exclusion.
*
* Both files and directories can be locked; locks are provided in the
* usual flavours of "shared" and "exclusive", plus a no-op variety to
* help unify code -- see the {@link LockingMode} enum.
*
* Note that each locked file requires one file descriptor to be held open.
* It is vital for clients to avoid creating too many locks, and to release
* locks when possible.
*
* The locks obtained by this class are VM-wide, and cannot be used to
* ensure mutual exclusion between threads of the same VM. Rather, they
* can enforce mutual exclusion between separate VMs trying to acquire
* locks for the same paths.
*/
public class LockDirectory {
private final Logger logger;
private final File lockDir;
/**
* An enum describing the possible locking modes.
*/
public enum LockingMode {
/**
* Shared mode: A shared lock can be taken any number of times, but only
* if no exclusive lock is in place.
*/
Shared(true),
/**
* An exclusive lock can only be taken if no other lock is in place; it
* prevents all other locks.
*/
Exclusive(false),
/**
* A dummy mode: Lock and unlock operations are no-ops.
*/
None(true),
;
private boolean shared;
private LockingMode(boolean shared) {
this.shared = shared;
}
public boolean isShared() { return shared; }
}
/**
* An internal representation of a locked path. Contains some immutable state: The canonical
* path being locked, and the (derived) lock and status files. When the {@link #lock(LockDirectory.LockingMode, String)}
* method is called, a file descriptor to the lock file is opened; {@link #unlock(LockDirectory.LockingMode)} must be
* called to release it when the lock is no longer required.
*
* This class is not thread-safe, but it is expected that its clients ({@link LockDirectory})
* enforce thread-safe access to instances.
*/
private class LockFile {
private final String lockedPath;
private final File lockFile;
private final File statusFile;
private LockingMode mode = null;
private RandomAccessFile lockStream = null;
private FileChannel lockChannel = null;
private FileLock lock = null;
public LockFile(File f) {
try {
lockedPath = f.getCanonicalPath();
} catch (IOException e) {
throw new ResourceError("Failed to canonicalise path for locking: " + f, e);
}
String sha = StringDigestor.digest(lockedPath);
lockFile = new File(lockDir, sha);
statusFile = new File(lockDir, sha + ".log");
}
/**
* Get the (canonical) path associated with this lock file -- this is the
* path that is being locked.
*/
public String getLockedPath() {
return lockedPath;
}
/**
* Acquire a lock with the given mode. If this method returns normally,
* the lock has been acquired -- an exception is thrown otherwise. This
* method does not block.
*
* If no exception is thrown, a file descriptor is kept open until
* {@link #unlock(LockDirectory.LockingMode)} is called.
* @param mode The desired locking mode. If {@link LockingMode#None}, this
* operation is a no-op (and does not in fact open a file descriptor).
* @param message A message to be recorded alongside the lock file. This
* is included in the exception message of other processes using this
* infrastructure when the lock acquisition fails.
* @throws CatastrophicError if a lock has already been obtained and not released.
* @throws ResourceError if an exception occurs while obtaining the lock, including
* if it cannot be acquired because another process holds it.
*/
public void lock(LockingMode mode, String message) {
if (mode == LockingMode.None) return;
if (lock != null)
throw new CatastrophicError("Trying to re-lock existing lock for " + lockedPath);
this.mode = mode;
try {
lockStream = new RandomAccessFile(lockFile, "rw");
lockChannel = lockStream.getChannel();
tryLock(mode);
new WholeIO().strictwrite(statusFile, mode + " lock acquired for " + lockedPath + ": " + message);
} catch (IOException e) {
throw new ResourceError("Failed to obtain lock for " + lockedPath + " at " + lockFile, e);
}
}
/**
* Acquire a lock with the given mode. If this method returns normally,
* the lock has been acquired -- an exception is thrown otherwise. This
* method blocks indefinitely while waiting to acquire the lock.
*
* If no exception is thrown, a file descriptor is kept open until
* {@link #unlock(LockDirectory.LockingMode)} is called.
* @param mode The desired locking mode. If {@link LockingMode#None}, this
* operation is a no-op (and does not in fact open a file descriptor).
* @param message A message to be recorded alongside the lock file. This
* is included in the exception message of other processes using this
* infrastructure when the lock acquisition fails.
* @throws ResourceError if an exception occurs while obtaining the lock,.
*/
public void blockingLock(LockingMode mode, String message) {
if (mode == LockingMode.None) return;
if (lock != null)
throw new CatastrophicError("Trying to re-lock existing lock for " + lockedPath);
this.mode = mode;
try {
lockStream = new RandomAccessFile(lockFile, "rw");
lockChannel = lockStream.getChannel();
lock = lockChannel.tryLock(0, Long.MAX_VALUE, mode.isShared());
while (lock == null) {
ThreadUtil.sleep(500, true);
lock = lockChannel.tryLock(0, Long.MAX_VALUE, mode.isShared());
}
new WholeIO().strictwrite(statusFile, mode + " lock acquired for " + lockedPath + ": " + message);
} catch (IOException e) {
throw new ResourceError("Failed to obtain lock for " + lockedPath + " at " + lockFile, e);
}
}
/**
* Internal helper method: Try to acquire a particular kind of lock, assuming the
* {@link #lockChannel} has been set up. Throws if acquisition fails, rather than
* blocking.
* @param mode The desired lock mode -- exclusive or shared.
* @throws IOException if acquisition of the lock fails for reasons other than
* an incompatible lock already being held by another process.
* @throws ResourceError if the lock is already held by another process. The exception
* message includes the status string, if it can be determined.
*/
private void tryLock(LockingMode mode) throws IOException {
lock = lockChannel.tryLock(0, Long.MAX_VALUE, mode.isShared());
if (lock == null) {
String status = new WholeIO().read(statusFile);
throw new ResourceError("Failed to acquire " + mode + " lock for " + lockedPath + "." +
(status == null ? "" : "\nExisting lock message: " + status));
}
}
/**
* Release this lock. This will close the file descriptor opened by {@link #lock(LockDirectory.LockingMode, String)}.
* @param mode A mode, which must match the mode passed into {@link #lock(LockDirectory.LockingMode, String)}
* (unless it is {@link LockingMode#None}, in which case the method is a no-op).
* @throws CatastrophicError if the passed mode does not match the one used for locking.
* @throws ResourceError if releasing the lock or clearing up temporary files fails.
*/
public void unlock(LockingMode mode) {
if (mode == LockingMode.None)
return;
if (mode != this.mode)
throw new CatastrophicError("Attempting to unlock " + lockedPath + " with incompatible mode: " +
this.mode + " lock was obtained, but " + mode + " lock is being released.");
release(mode);
}
private void release(LockingMode mode) {
try {
if (lock != null)
try {
// On Windows, the lockChannel/lockStream prevents the lockFile from being
// deleted. The statusFile should only be written after the lock is held,
// so deleting it before releasing the lock is not expected to fail if the
// lock is exclusive.
// Deleting the lock file may fail, if another process just acquires it
// after we release it.
try {
if (statusFile.exists() && !statusFile.delete()) {
if (!mode.isShared()) throw new ResourceError("Could not clear status file " + statusFile);
}
} finally {
lock.release();
FileUtil.close(lockStream);
FileUtil.close(lockChannel);
if (!lockFile.delete())
logger.error("Could not clear lock file " + lockFile + " (it might have been locked by another process).");
}
} catch (IOException e) {
throw new ResourceError("Couldn't release lock for " + lockedPath, e);
}
} finally {
mode = null;
lockStream = null;
lockChannel = null;
lock = null;
}
}
}
private static final Map<File, LockDirectory> instances = new LinkedHashMap<File, LockDirectory>();
/**
* Obtain the {@link LockDirectory} instance for a given lock directory. The directory
* in question will be created if it doesn't exist.
* @param lockDir A directory -- must be writable, and will be created if it doesn't
* already exist.
* @return The {@link LockDirectory} instance responsible for the specified lock directory.
* @throws ResourceError if the directory cannot be created, exists as a non-directory
* or cannot be canonicalised.
*/
public static synchronized LockDirectory instance(File lockDir) {
return instance(lockDir, null);
}
/**
* See {@link #instance(File)}.
* Use this method only if log output should be directed to a custom {@link Logger}.
*/
public static synchronized LockDirectory instance(File lockDir, Logger logger) {
// First try to create the directory -- canonicalisation will fail if it doesn't exist.
try {
FileUtil.mkdirs(lockDir);
} catch(ResourceError e) {
throw new ResourceError("Couldn't ensure lock directory " + lockDir + " exists.", e);
}
// Canonicalise.
try {
lockDir = lockDir.getCanonicalFile();
} catch (IOException e) {
throw new ResourceError("Couldn't canonicalise requested lock directory " + lockDir, e);
}
// Find and return the right instance.
LockDirectory instance = instances.get(lockDir);
if (instance == null) {
instance = new LockDirectory(lockDir, logger);
instances.put(lockDir, instance);
}
return instance;
}
/**
* A map from canonical locked paths to the associated {@link LockFile} instances.
*/
private final Map<String, LockFile> locks = new LinkedHashMap<String, LockFile>();
/**
* Create a new instance of {@link LockDirectory}, holding all locks in the
* specified log directory.
* @param lockDir A writable directory in which locks will be stored.
* @param logger The {@link Logger} to use, if non-null.
*/
private LockDirectory(File lockDir, Logger logger) {
this.lockDir = lockDir;
this.logger = logger;
}
/**
* Acquire a lock of the specified kind for the path represented by the given file.
* The file should exist, and its path should be canonicalisable.
*
* Calling this method keeps one file descriptor open
* @param mode The desired locking mode. If {@link LockingMode#None} is passed, this is a no-op,
* otherwise it determines whether a shared or exclusive lock is acquired.
* @param f The path that should be locked -- does not need to be writable, and will not
* be opened.
* @param message A message describing the purpose of the lock acquisition. This is
* potentially displayed when other processes fail to acquire the lock for the given
* path.
* @throws CatastrophicError if an attempt is made to lock an already locked path.
*/
public synchronized void lock(LockingMode mode, File f, String message) {
if (mode == LockingMode.None) return;
LockFile lock = new LockFile(f);
if (locks.containsKey(lock.getLockedPath()))
throw new CatastrophicError("Trying to lock already locked path " + lock.getLockedPath() + ".");
lock.lock(mode, message);
locks.put(lock.getLockedPath(), lock);
}
/**
* Acquire a lock of the specified kind for the path represented by the given file.
* The file should exist, and its path should be canonicalisable. This method waits
* indefinitely for the lock to become available. There is no ordering on processes
* that are waiting to acquire the lock in this manner.
*
* Calling this method keeps one file descriptor open
* @param mode The desired locking mode. If {@link LockingMode#None} is passed, this is a no-op,
* otherwise it determines whether a shared or exclusive lock is acquired.
* @param f The path that should be locked -- does not need to be writable, and will not
* be opened.
* @param message A message describing the purpose of the lock acquisition. This is
* potentially displayed when other processes fail to acquire the lock for the given
* path.
*/
public synchronized void blockingLock(LockingMode mode, File f, String message) {
if (mode == LockingMode.None) return;
LockFile lock = new LockFile(f);
if (locks.containsKey(lock.getLockedPath()))
throw new CatastrophicError("Trying to lock already locked path " + lock.getLockedPath() + ".");
lock.blockingLock(mode, message);
locks.put(lock.getLockedPath(), lock);
}
/**
* Release a lock held on a particular path.
*
* This method closes the file descriptor associated with the lock, freeing related
* resources.
* @param mode the mode of the lock. If it equals {@link LockingMode#None}, this is a no-op; otherwise
* it is expected to match the mode passed to the corresponding {@link #lock(LockingMode, File, String)}
* call.
* @param f The path which should be unlocked. As with {@link #lock(LockingMode, File, String)}, it is
* expected to exist and be canonicalisable. It also must be currently locked.
* @throws CatastrophicError on API contract violation: The path isn't currently locked, or the
* mode doesn't correspond to the mode specified when it was locked.
* @throws ResourceError if something goes wrong while releasing resources.
*/
public synchronized void unlock(LockingMode mode, File f) {
if (!maybeUnlock(mode, f))
throw new CatastrophicError("Trying to unlock " + new LockFile(f).getLockedPath() + ", but it is not locked.");
}
/**
* Release a lock that may be held on a particular path.
*
* This method closes the file descriptor associated with the lock, freeing related
* resources. Unlike {@link #unlock(LockingMode, File)}, this method will not throw
* if the specified {@link File} is not locked, making it more suitable for post-exception
* cleanup -- <code>false</code> will be returned in that case.
* @param mode the mode of the lock. If it equals {@link LockingMode#None}, this is a no-op; otherwise
* it is expected to match the mode passed to the corresponding {@link #lock(LockingMode, File, String)}
* call.
* @param f The path which should be unlocked. As with {@link #lock(LockingMode, File, String)}, it is
* expected to exist and be canonicalisable.
* @return <code>true</code> if <code>mode == LockingMode.None</code>, or the unlock operation completed
* successfully; <code>false</code> if the path <code>f</code> isn't currently locked.
* @throws ResourceError if something goes wrong while releasing resources.
*/
public synchronized boolean maybeUnlock(LockingMode mode, File f) {
if (mode == LockingMode.None) return true;
// New instance constructed just to share the logic of computing the canonical path.
LockFile key = new LockFile(f);
LockFile existing = locks.get(key.getLockedPath());
if (existing == null)
return false;
locks.remove(key.getLockedPath());
existing.unlock(mode);
return true;
}
public File getDir(){ return lockDir; }
}

View File

@@ -0,0 +1,43 @@
package com.semmle.util.concurrent;
import com.semmle.util.exception.CatastrophicError;
import com.semmle.util.exception.Exceptions;
/**
* Utility methods related to Threads.
*/
public enum ThreadUtil
{
/** Singleton instance of {@link ThreadUtil}. */
SINGLETON;
/**
* Sleep for {@code millis} milliseconds.
* <p>
* Unlike {@link Thread#sleep(long)} (which is wrapped), this method does not throw an
* {@link InterruptedException}, rather in the event of interruption it either throws an
* {@link CatastrophicError} (if {@code allowInterrupt} is false), or accepts the interruption and
* returns false.
* </p>
*
* @return true if a sleep of {@code millis} milliseconds was performed without interruption, or
* false if an interruption occurred.
*/
public static boolean sleep(long millis, boolean allowInterrupt)
{
try {
Thread.sleep(millis);
}
catch (InterruptedException ie) {
if (allowInterrupt) {
Exceptions.ignore(ie, "explicitly permitted interruption");
return false;
}
else {
throw new CatastrophicError("Interrupted", ie);
}
}
return true;
}
}

View File

@@ -0,0 +1,19 @@
package com.semmle.util.data;
/**
* A mutable reference to a primitive int. Specialised to avoid
* boxing.
*
*/
public class IntRef {
private int value;
public IntRef(int value) {
this.value = value;
}
public int get() { return value; }
public void set(int value) { this.value = value; }
public void inc() { value++; }
public void add(int val) { value += val; };
}

View File

@@ -0,0 +1,62 @@
package com.semmle.util.data;
/**
* An (immutable) ordered pair of values.
* <p>
* Pairs are compared with structural equality: <code>(x,y) = (x', y')</code> iff <code>x=x'</code>
* and <code>y=y'</code>.
* </p>
*
* @param <X> the type of the first component of the pair
* @param <Y> the type of the second component of the pair
*/
public class Pair<X,Y> extends Tuple2<X, Y>
{
private static final long serialVersionUID = -2871892357006076659L;
/*
* Constructor and factory
*/
/**
* Create a new pair of values
* @param x the first component of the pair
* @param y the second component of the pair
*/
public Pair(X x, Y y) {
super(x, y);
}
/**
* Create a new pair of values. This behaves identically
* to the constructor, but benefits from type inference
* @param x the first component of the pair
* @param y the second component of the pair
*/
public static <X,Y> Pair<X,Y> make(X x, Y y) {
return new Pair<X,Y>(x, y);
}
/*
* Getters
*/
/**
* Get the first component of this pair
* @return the first component of the pair
*/
public X fst() {
return value0();
}
/**
* Get the second component of this pair
* @return the second component of the pair
*/
public Y snd() {
return value1();
}
}

View File

@@ -0,0 +1,173 @@
package com.semmle.util.data;
import java.nio.charset.Charset;
import java.security.MessageDigest;
import java.security.NoSuchAlgorithmException;
import com.semmle.util.exception.CatastrophicError;
/**
* Encapsulate the creation of message digests from strings.
*
* <p>
* This class acts as a (partial) output stream, until the <code>getDigest()</code> method is
* called. After this the class can no longer be used, except to repeatedly call
* {@link #getDigest()}.
*
* <p>
* UTF-8 is used internally as the {@link Charset} for this class when converting Strings to bytes.
*/
public class StringDigestor {
private static final Charset UTF8 = Charset.forName("UTF-8");
private static final String NULL_STRING = "<null>";
private static final int CHUNK_SIZE = 32;
private MessageDigest digest;
private byte[] digestBytes;
private final byte[] buf = new byte[CHUNK_SIZE * 3]; // A Java char becomes at most 3 bytes of UTF-8
/**
* Create a StringDigestor using SHA-1, ready to accept data
*/
public StringDigestor() {
this("SHA-1");
}
/**
* @param digestAlgorithm the algorithm to use in the internal {@link MessageDigest}.
*/
public StringDigestor(String digestAlgorithm) {
try {
digest = MessageDigest.getInstance(digestAlgorithm);
} catch (NoSuchAlgorithmException e) {
throw new CatastrophicError("StringDigestor failed to find the required digest algorithm: " + digestAlgorithm, e);
}
}
public void reset() {
if (digestBytes == null) throw new CatastrophicError("API violation: Digestor is not finished.");
digest.reset();
digestBytes = null;
}
/**
* Write an object into this digestor. This converts the object to a
* string using toString(), writes the length, and then writes the
* string itself.
*/
public StringDigestor write(Object toAppend) {
String str;
if (toAppend == null) {
str = NULL_STRING;
} else {
str = toAppend.toString();
}
writeBinaryInt(str.length());
writeNoLength(str);
return this;
}
/**
* Write the given string without prefixing it by its length.
*/
public StringDigestor writeNoLength(Object toAppend) {
String s = toAppend.toString();
int len = s.length();
int i = 0;
while(i + CHUNK_SIZE < len) {
i = writeUTF8(s, i, i + CHUNK_SIZE);
}
writeUTF8(s, i, len);
return this;
}
private int writeUTF8(String s, int begin, int end) {
if (digestBytes != null) throw new CatastrophicError("API violation: Digestor is finished.");
byte[] buf = this.buf;
int len = 0;
for(int i = begin; i < end; ++i) {
int c = s.charAt(i);
if (c <= 0x7f) {
buf[len++] = (byte)c;
} else if (c <= 0x7ff) {
buf[len] = (byte)(0xc0 | (c >> 6));
buf[len+1] = (byte)(0x80 | (c & 0x3f));
len += 2;
} else if (c < 0xd800 || c > 0xdfff) {
buf[len] = (byte)(0xe0 | (c >> 12));
buf[len+1] = (byte)(0x80 | ((c >> 6) & 0x3f));
buf[len+2] = (byte)(0x80 | (c & 0x3f));
len += 3;
} else if (i + 1 < end) {
int c2 = s.charAt(i + 1);
if (c > 0xdbff || c2 < 0xdc00 || c2 > 0xdfff) {
// Invalid UTF-16
} else {
c = 0x10000 + ((c - 0xd800) << 10) + (c2 - 0xdc00);
buf[len] = (byte)(0xf0 | (c >> 18));
buf[len+1] = (byte)(0x80 | ((c >> 12) & 0x3f));
buf[len+2] = (byte)(0x80 | ((c >> 6) & 0x3f));
buf[len+3] = (byte)(0x80 | (c & 0x3f));
len += 4;
++i;
}
} else {
--end;
break;
}
}
digest.update(buf, 0, len);
return end;
}
/**
* Write an array of raw bytes to the digestor. This appends the contents
* of the array to the accumulated data used for the digest.
*/
public StringDigestor writeBytes(byte[] data) {
if (digestBytes != null) throw new CatastrophicError("API violation: Digestor is finished.");
digest.update(data);
return this;
}
/**
* Return the hex-encoded digest as a {@link String}.
*
* Get the digest from the data previously appended using <code>write(Object)</code>.
* After this is called, the instance's {@link #write(Object)} and {@link #writeBytes(byte[])}
* methods may no longer be used.
*/
public String getDigest() {
if (digestBytes == null) {
digestBytes = digest.digest();
}
return StringUtil.toHex(digestBytes);
}
public static String digest(Object o) {
StringDigestor digestor = new StringDigestor();
digestor.writeNoLength(o);
return digestor.getDigest();
}
/** Compute a git-style SHA for the given string. */
public static String gitBlobSha(String content) {
byte[] bytes = content.getBytes(UTF8);
return digest("blob " + bytes.length + "\0" + content);
}
/**
* Convert an int to a byte[4] using its little-endian 32bit representation, and append the
* resulting bytes to the accumulated data used for the digest.
*/
public StringDigestor writeBinaryInt(int i) {
if (digestBytes != null) throw new CatastrophicError("API violation: Digestor is finished.");
byte[] buf = this.buf;
buf[0] = (byte)(i & 0xff);
buf[1] = (byte)((i >>> 8) & 0xff);
buf[2] = (byte)((i >>> 16) & 0xff);
buf[3] = (byte)((i >>> 24) & 0xff);
digest.update(buf, 0, 4);
return this;
}
}

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,106 @@
package com.semmle.util.data;
import java.io.Serializable;
/**
* Tuple of one typed element.
* <p>
* Note that this is a sub-class of {@link TupleN} and a super-class of {@link Tuple2},
* {@link Tuple3}, and any subsequent extensions in a similar vein.
* </p>
*/
public class Tuple1 <Type0> extends TupleN
{
/**
* Serializable variant of {@link Tuple1}.
*/
public static class SerializableTuple1<T0 extends Serializable>
extends Tuple1<T0> implements Serializable {
private static final long serialVersionUID = -7989122667707773448L;
public SerializableTuple1() {
}
public SerializableTuple1(T0 t0) {
super(t0);
}
}
private static final long serialVersionUID = -4317563803154647477L;
/** The single contained value. */
protected Type0 _value0;
/** Construct a new {@link Tuple1} with a null value. */
public Tuple1 () {}
/** Construct a new {@link Tuple1} with the given value. */
public Tuple1 (Type0 value0)
{
_value0 = value0;
}
/** Construct a new {@link Tuple1} with the given value. */
public static <Type0> Tuple1<Type0> make(Type0 value0)
{
return new Tuple1<Type0>(value0);
}
/**
* Get the value contained by this {@link Tuple1}.
*/
public final Type0 value0 ()
{
return _value0;
}
@Override
protected Object value_ (int n)
{
return _value0;
}
/**
* Return the number of elements in this {@link Tuple1}.
* <p>
* Sub-classes shall override this method to increase its value accordingly.
* </p>
*/
@Override
public int size ()
{
return 1;
}
/**
* Return a plain string representation of the contained value (where null is represented by the
* empty string).
* <p>
* Sub-classes shall implement a comma-separated concatenation.
* </p>
*/
@Override
public String toPlainString ()
{
return _value0 == null ? "" : _value0.toString();
}
@Override
public int hashCode ()
{
final int prime = 31;
int result = super.hashCode();
result = prime * result + ((_value0 == null) ? 0 : _value0.hashCode());
return result;
}
@Override
public boolean equals (Object obj)
{
return obj == this || (super.equals(obj) && equal(((Tuple1<?>)obj)._value0, _value0));
}
}

View File

@@ -0,0 +1,93 @@
package com.semmle.util.data;
import java.io.Serializable;
/**
* Tuple of two typed elements.
* <p>
* Note that this is an extension of {@link Tuple1} and a super-class of {@link Tuple3} (and any
* subsequent additions).
* </p>
*/
public class Tuple2 <Type0, Type1> extends Tuple1<Type0>
{
/**
* Serializable variant of {@link Tuple2}.
*/
public static class SerializableTuple2<T0 extends Serializable, T1 extends Serializable>
extends Tuple2<T0, T1> implements Serializable {
private static final long serialVersionUID = 1624467154864321244L;
public SerializableTuple2() {
}
public SerializableTuple2(T0 t0, T1 t1) {
super(t0, t1);
}
}
private static final long serialVersionUID = -400406676673562583L;
/** The additional element contained by this {@link Tuple2}. */
protected Type1 _value1;
/** Construct a new {@link Tuple2} with null values. */
public Tuple2 () {}
/** Construct a new {@link Tuple2} with the given values. */
public Tuple2 (Type0 value0, Type1 value1)
{
super(value0);
_value1 = value1;
}
/** Construct a new {@link Tuple2} with the given value. */
public static <Type1, Type2> Tuple2<Type1, Type2> make(Type1 value0, Type2 value1)
{
return new Tuple2<Type1,Type2>(value0, value1);
}
/**
* Get the second value in this {@link Tuple2}.
*/
public final Type1 value1 ()
{
return _value1;
}
@Override
protected Object value_ (int n)
{
return n == 2 ? _value1 : super.value_(n);
}
@Override
public int size ()
{
return 2;
}
@Override
public String toPlainString ()
{
return super.toPlainString() + ", " + (_value1 == null ? "" : _value1.toString());
}
@Override
public int hashCode ()
{
final int prime = 31;
int result = super.hashCode();
result = prime * result + ((_value1 == null) ? 0 : _value1.hashCode());
return result;
}
@Override
public boolean equals (Object obj)
{
return obj == this || (super.equals(obj) && equal(((Tuple2<?,?>)obj)._value1, _value1));
}
}

View File

@@ -0,0 +1,85 @@
package com.semmle.util.data;
import java.io.Serializable;
/**
* Untyped base-class for the generic {@link Tuple1}, {@link Tuple2}, ... <i>etc.</i>
* <p>
* This class also functions as a zero-element tuple.
* </p>
*/
public class TupleN implements Serializable
{
private static final long serialVersionUID = -1799116497122427806L;
/**
* Get the n'th value contained by this {@link TupleN}.
*
* @param n The zero-based index of the value to be returned.
* @return The n'th value, or null if n is out of range.
*/
public final Object value (int n)
{
return n < 0 || n > size() ? null : value_(n);
}
/** Internal method for obtaining the n'th value (n is guaranteed to be in-range). */
protected Object value_ (int n)
{
return null;
}
/**
* Get the number of values contained by this {@link TupleN}.
*/
public int size ()
{
return 0;
}
/**
* Return a plain string representation of the contained value (where null is represented by the
* empty string).
* <p>
* Sub-classes shall implement a comma-separated concatenation.
* </p>
*/
public String toPlainString ()
{
return "";
}
/**
* Get a parenthesized, comma-separated string representing the values contained by this
* {@link TupleN}. Null values are represented by an empty string.
*/
@Override
public final String toString ()
{
return "(" + toPlainString() + ")";
}
@Override
public int hashCode ()
{
return 0;
}
@Override
public boolean equals (Object obj)
{
return obj == this || (obj !=null && obj.getClass().equals(getClass()));
}
/**
* Convenience method implementing objects.equals(object, object), which is not available due to a
* java version restriction.
*/
protected static boolean equal(Object obj1, Object obj2)
{
if (obj1 == null) {
return obj2 == null;
}
return obj1.equals(obj2);
}
}

View File

@@ -0,0 +1,117 @@
package com.semmle.util.exception;
import java.util.Arrays;
/**
* This is a standard Semmle unchecked exception.
* Usage of this should follow the guidelines described in docs/semmle-unchecked-exceptions.md
*/
public class CatastrophicError extends NestedError {
private static final long serialVersionUID = 4132771414092814913L;
public CatastrophicError(String message) {
super(message);
}
public CatastrophicError(Throwable throwable) {
super(throwable);
}
public CatastrophicError(String message, Throwable throwable) {
super(message,throwable);
}
/**
* Utility method for throwing a {@link CatastrophicError} with the given {@code message} if the given
* {@code condition} is true.
*/
public static void throwIf(boolean condition, String message)
{
if (condition) {
throw new CatastrophicError(message);
}
}
/**
* Utility method for throwing a {@link CatastrophicError} if the given {@code object} is null.
* <p>
* See {@link #throwIfAnyNull(Object...)} which may be more convenient for checking multiple
* arguments.
* </p>
*/
public static void throwIfNull(Object object)
{
if (object == null) {
throw new CatastrophicError("null object");
}
}
/**
* Utility method for throwing a {@link CatastrophicError} with the given {@code message} if the given
* {@code object} is null.
* <p>
* See {@link #throwIfAnyNull(Object...)} which may be more convenient for checking multiple
* arguments.
* </p>
*/
public static void throwIfNull (Object object, String message)
{
if (object == null) {
throw new CatastrophicError(message);
}
}
/**
* Throw a {@link CatastrophicError} if any of the given {@code objects} is null.
* <p>
* If a {@link CatastrophicError} is thrown, its message will indicate <i>all</i> null arguments by index.
* </p>
* <p>
* See {@link #throwIfNull(Object, String)} which may be a fraction more efficient if there's only
* one argument, and allows an 'optional' message parameter.
* </p>
*/
public static void throwIfAnyNull (Object ... objects)
{
/*
* Check each argument for nullity, and start building a set of index strings iff at least one
* is non-null
*/
String[] nullArgs = null;
for (int argNum = 0; argNum < objects.length; ++argNum) {
if (objects[argNum] == null) {
nullArgs = nullArgs == null ? new String[1] : Arrays.copyOf(nullArgs, nullArgs.length+1);
nullArgs[nullArgs.length-1] = "" + argNum;
}
}
if (nullArgs != null) {
// Compose a message describing which arguments are null
StringBuffer strBuf = new StringBuffer();
if (nullArgs.length == 0) {
strBuf.append("null argument(s)");
} else {
strBuf.append("null argument" + (nullArgs.length > 1 ? "s: " : ": ") + nullArgs[0]);
for (int i = 1; i < nullArgs.length; ++i) {
strBuf.append(", " + nullArgs[i]);
}
}
String message = strBuf.toString();
throw new CatastrophicError(message);
}
}
/**
* Convenience method for use in constructors that assign a parameter to a
* field, assuming the former to be non-null.
*
* @param t A non-null value of type {@code T}.
* @return {@code t}
* @throws CatastrophicError if {@code t} is null.
* @see #throwIfNull(Object)
*/
public static <T> T nonNull(T t) {
throwIfNull(t);
return t;
}
}

View File

@@ -0,0 +1,120 @@
package com.semmle.util.exception;
import java.io.PrintWriter;
import java.io.StringWriter;
/**
* Simple functions for printing exceptions. This is intended for use
* in debug output, not for formatting for user consumption
*/
public class Exceptions {
/**
* Compose a String with the same format as that output by {@link Throwable#printStackTrace()}.
*/
public static String printStackTrace(Throwable t)
{
StringWriter stringWriter = new StringWriter();
t.printStackTrace(new PrintWriter(stringWriter));
return stringWriter.toString();
}
/**
* Print an exception in a readable format with all information,
* including the type, message, stack trace, and nested exceptions
*/
public static String print(Throwable t) {
return printDetailed(t, true);
}
/**
* Print an exception in a somewhat readable format fitting on one line.
* Most of the time simply using <code>print</code> is preferable
*/
public static String printShort(Throwable t) {
return printDetailed(t, false);
}
/**
* Ignore an exception. This method does nothing, but should be called
* (with a reasonable message) to document the reason why the exception does
* not need to be used.
*/
public static void ignore(Throwable e, String message) {
}
/**
* Print an exception in a long format, possibly producing multiple
* lines if the appropriate flag is passed
* @param multiline if <code>true</code>, produce multiple lines of output
*/
private static String printDetailed(Throwable t, boolean multiline) {
StringBuilder sb = new StringBuilder();
Throwable current = t;
while (current != null) {
printOneException(current, multiline, sb);
Throwable cause = current.getCause();
if (cause == current)
current = null;
else
current = cause;
if (current != null) {
if (multiline)
sb.append("\n\n ... caused by:\n\n");
else
sb.append(", caused by: ");
}
}
return sb.toString();
}
private static void printOneException(Throwable t, boolean multiline, StringBuilder sb) {
sb.append(multiline ? t.toString() : t.toString().replace('\n', ' ').replace('\r', ' '));
boolean first = true;
for (StackTraceElement e : t.getStackTrace()) {
if (first)
sb.append(multiline ? "\n" : " - [");
else
sb.append(multiline ? "\n" : ", ");
first = false;
sb.append(e.toString());
}
if (!multiline)
sb.append("]");
}
/** A stand-in replacement for `assert` that throws a {@link CatastrophicError} and isn't compiled out. */
public static void assertion(boolean cond, String message) {
if(!cond)
throw new CatastrophicError(message);
}
/**
* Turn the given {@link Throwable} into a {@link RuntimeException} by wrapping it if necessary.
*/
public static RuntimeException asUnchecked(Throwable t) {
if (t instanceof RuntimeException)
return (RuntimeException)t;
else
return new RuntimeException(t);
}
/**
* Throws an arbitrary {@link Throwable}, wrapping in a runtime exception if necessary.
* Unlike {@link #asUnchecked} it preserves subclasses of {@link Error}.
*/
public static <T> T rethrowUnchecked(Throwable t) {
if (t instanceof RuntimeException) {
throw (RuntimeException) t;
} else if (t instanceof Error) {
throw (Error) t;
}
throw new RuntimeException(t);
}
}

View File

@@ -0,0 +1,26 @@
package com.semmle.util.exception;
/**
* An exception thrown in cases where it is impossible to
* throw the (checked) Java {@link InterruptedException},
* eg. in visitors
*/
public class InterruptedError extends RuntimeException {
private static final long serialVersionUID = 9163340147606765395L;
public InterruptedError() { }
public InterruptedError(String message, Throwable cause) {
super(message, cause);
}
public InterruptedError(String message) {
super(message);
}
public InterruptedError(Throwable cause) {
super(cause);
}
}

View File

@@ -0,0 +1,47 @@
package com.semmle.util.exception;
public abstract class NestedError extends RuntimeException {
private static final long serialVersionUID = -3145876396931008989L;
public NestedError(String message) {
super(message);
}
public NestedError(Throwable throwable) {
super(throwable);
}
public NestedError(String message, Throwable throwable) {
super(buildMessage(message, throwable), throwable);
}
/**
* Subclasses should not need to call this directly -- just call the
* two-argument super constructor.
*/
private static String buildMessage(String message, Throwable throwable) {
if (throwable == null)
return message;
while (throwable.getCause() != null && throwable.getCause() != throwable)
throwable = throwable.getCause();
String banner = "eventual cause: " + throwable.getClass().getSimpleName();
String rootmsg = throwable.getMessage();
if (rootmsg == null) {
// Don't amend the banner
} else {
int p = rootmsg.indexOf('\n');
if (p >= 0)
rootmsg = rootmsg.substring(0, p) + "...";
if (rootmsg.length() > 100)
rootmsg = rootmsg.substring(0, 80) + "...";
banner += " \"" + rootmsg + "\"";
}
if (message.contains(banner))
return message;
else
return message + "\n(" + banner + ")";
}
}

View File

@@ -0,0 +1,30 @@
package com.semmle.util.exception;
/**
* This is a standard Semmle unchecked exception.
* Usage of this should follow the guidelines described in docs/semmle-unchecked-exceptions.md
*/
public class ResourceError extends NestedError {
private static final long serialVersionUID = 4132771414092814913L;
public ResourceError(String message) {
super(message);
}
@Deprecated // A ResourceError may be presented to the user, so should always have a message
public ResourceError(Throwable throwable) {
super(throwable);
}
public ResourceError(String message, Throwable throwable) {
super(message,throwable);
}
@Override
public String toString() {
// The message here should always be meaningful enough that we can return that.
return getMessage() != null ? getMessage() : super.toString();
}
}

View File

@@ -0,0 +1,46 @@
package com.semmle.util.exception;
/**
* This is a standard Semmle unchecked exception.
* Usage of this should follow the guidelines described in docs/semmle-unchecked-exceptions.md
*/
public class UserError extends NestedError {
private static final long serialVersionUID = 4132771414092814913L;
private final boolean reportAsInfoMessage;
public UserError(String message) {
this(message, false);
}
/**
* A user-visible error
*
* @param message The message to display
* @param reportAsInfoMessage If <code>true</code>, report as information only - not an error
*/
public UserError(String message, boolean reportAsInfoMessage) {
super(message);
this.reportAsInfoMessage = reportAsInfoMessage;
}
public UserError(String message, Throwable throwable) {
super(message,throwable);
this.reportAsInfoMessage = false;
}
/**
* If <code>true</code>, report the message without interpreting it as a fatal error
*/
public boolean reportAsInfoMessage() {
return reportAsInfoMessage;
}
@Override
public String toString() {
// The message here should always be meaningful enough that we can return that.
return getMessage() != null ? getMessage() : super.toString();
}
}

View File

@@ -0,0 +1,893 @@
package com.semmle.util.expansion;
import java.io.ByteArrayOutputStream;
import java.io.File;
import java.security.GeneralSecurityException;
import java.util.ArrayList;
import java.util.Collections;
import java.util.LinkedHashMap;
import java.util.LinkedHashSet;
import java.util.List;
import java.util.Map;
import java.util.Map.Entry;
import java.util.Properties;
import java.util.Set;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import com.semmle.util.data.StringUtil;
import com.semmle.util.exception.CatastrophicError;
import com.semmle.util.exception.ResourceError;
import com.semmle.util.exception.UserError;
import com.semmle.util.files.FileUtil;
import com.semmle.util.process.Builder;
import com.semmle.util.process.Env;
import com.semmle.util.process.Env.Var;
import com.semmle.util.process.LeakPrevention;
/**
* An environment for performing variable expansions.
*
* <p>
* The environment is defined by a set of variable definitions, which are
* name/value pairs of strings. Once this has been populated (via the
* {@link #defineVar(String, String)} and {@link #defineVars(Map)} methods),
* arbitrary strings can be expanded.
* </p>
*
* <p>
* Two modes of expansion are supported:
* </p>
* <ul>
* <li>String mode ({@link #strExpand(String)}): The result is intended to be a
* single string.</li>
* <li>List mode ({@link #listExpand(String)}): The result will be interpreted
* as a command line, and hence is a list of strings.
* </ul>
*
* <p>
* Variables are referenced by <code>${name}</code> to trigger a string-mode
* expansion, and by <code>${=name}</code> to trigger a list-mode expansion.
* This makes {@code $} a meta-character, and so it has to be escaped; the
* escape sequence for it is <code>${}</code>.
* </p>
*
* <p>
* In list mode, strings are split in a platform-independent way similar (but
* not identical) to normal shell argument splitting. Runs of white-space
* separate arguments, and double-quotes can be used to protect whitespace from
* splitting. The escape character is backslash. All of these metacharacters
* have no special meaning in string mode.
* </p>
*
* <p>
* The {@code define*} and {@link #doNotExpand(String...)} methods of this
* class are not thread-safe; they mutate instance state in an unsynchronized
* way. By contrast, the expansion methods ({@link #strExpand(String)},
* {@link #strExpandVar(String)}, {@link #listExpand(String)},
* {@link #listExpandVar(String)} and {@link #varLookup(String)})
* are thread safe relative to each
* other. This means that it's fine to construct an expansion environment once,
* and then use it from multiple threads concurrently, as long as no new variables
* are defined. In addition, {@link #validate(String)} is safe to call once an
* {@link ExpansionEnvironment} is fully initialised, even concurrently.
* </p>
*
* <p>
* Upon encountering any error (malformed variable expansion, malformed quoted
* string (in list mode), reference to unknown variable, cyclic variable
* definitions), the {@link #strExpand(String)} and {@link #listExpand(String)}
* methods will throw {@link UserError} with a suitable message.
* </p>
*
* <p>
* As an advanced feature, command substitutions can be supported. They take the
* form of <code>$(cmd arg1 arg2)</code> for string-mode expansion, and
* <code>$(=cmd arg1
* arg2)</code> for list-mode. The contents of the <code>$(..)</code> operator
* undergo normal splitting, and are then run as a new process with the given
* list of arguments. The working directory is unspecified, and it is an error
* to depend upon it. A non-zero exit code, or a non-empty {@code stderr} stream
* of the command, will result in a {@link UserError} indicating that something
* went wrong; otherwise, the {@code stdout} output is collected and substituted
* (possibly undergoing splitting, in the second form).
* </p>
*/
public class ExpansionEnvironment {
/**
* A source for variable definitions to be used in an expansion environment.
*/
public static interface VariableSource {
/**
* A callback which is expected to add all variables in the source to
* the given environment.
*
* @param env
* The environment that should be filled in.
*/
public void fillIn(ExpansionEnvironment env);
}
private final Map<String, String> vars = new LinkedHashMap<String, String>();
private final Set<String> unexpandedVars = new LinkedHashSet<String>();
private final boolean commandSubstitutions;
/**
* Construct an empty {@link ExpansionEnvironment}.
*/
public ExpansionEnvironment(boolean commandSubstitutions) {
this.commandSubstitutions = commandSubstitutions;
}
/**
* This the old default constructor, which always enables command substutitions.
* <b>Doing so is a security risk</b> whenever the string you expand may come
* from an untrusted source, so you should only do that when you explicitly want
* to do it and have decided that it is safe. (And then use the constructor that
* has an explicit argument to say so!)
*/
@Deprecated
public ExpansionEnvironment() {
this(true);
}
/**
* Construct an environment based on an existing map.
*/
public ExpansionEnvironment(boolean commandSubstitutions, Map<String, String> vars) {
this(commandSubstitutions);
this.vars.putAll(vars);
}
/**
* Construct a copy of an existing {@link ExpansionEnvironment}.
*/
public ExpansionEnvironment(ExpansionEnvironment other) {
this(other.commandSubstitutions);
this.vars.putAll(other.vars);
this.unexpandedVars.addAll(other.unexpandedVars);
}
/**
* Add a set of variable definitions to this environment.
*
* @param vars
* A mapping from variable names to variable values. Recursive
* variable references are allowed, but cycles are an error.
*/
public void defineVars(Map<String, String> vars) {
this.vars.putAll(vars);
}
/**
* Add the specified variable definition to this environment.
*
* @param name
* A variable name.
* @param value
* The value that the variable should expand to. References to
* other variables or expansions are allowed, but cycles are an
* error.
*/
public void defineVar(String name, String value) {
this.vars.put(name, value);
}
/**
* Try to load a file as a Java properties file and add all of its key/value
* pairs as variable definitions.
*
* @param vars
* A {@link File} that will be loaded as a Java properties file,
* if it exists. May be <code>null</code> or a file whose
* existence has not been checked.
* @throws ResourceError
* if the file exists but can't be read, or exists as a
* directory, or reading it fails.
*/
public void defineVarsFromFile(File vars) {
if (vars == null || !vars.exists())
return;
if (vars.isDirectory())
throw new ResourceError(vars
+ " is a directory, cannot load variables from it.");
Properties properties = FileUtil.loadProperties(vars);
for (String key : properties.stringPropertyNames())
defineVar(key, properties.getProperty(key));
}
/**
* Add a variable definition of {@code env.foo=bar} for each system
* environment variable {@code foo=bar}. Typically it is desirable to allow
* the environment to override previously specified variables, so this
* should be called once all other variables have been defined.
*
* <p>
* The values of variables taken from the environment are escaped to prevent
* recursive expansion; in particular, this prevents accidental command
* execution if a command substitution is encountered in the environment.
* </p>
*/
public void defineVarsFromEnvironment(Env environment) {
String extraVars = environment.get(Var.ODASA_EXTRA_VARIABLES);
if (extraVars != null)
defineVarsFromFile(new File(extraVars));
for (Entry<String, String> var : environment.getenv().entrySet())
defineVar("env." + var.getKey(), var.getValue().replace("$", "${}"));
environment.addEnvironmentToNewEnv(this);
}
/**
* Indicate that references to the given set of variable names should not be
* expanded. This means that they need not be defined, and the output will
* contain the literal variable expansion sequences.
*
* @param vars
* A list of variable names.
*/
public void doNotExpand(String... vars) {
for (String var : vars)
unexpandedVars.add(var);
}
/**
* Supply a "default value" for a variable, meaning that the variable will
* be set to the given default value if it hasn't already been defined. No
* change is made to this environment if a definition exists.
* @param var A variable name.
* @param defaultValue The default value for the named variable.
*/
public void setDefault(String var, String defaultValue) {
if (!vars.containsKey(var))
vars.put(var, defaultValue);
}
/**
* Expand the given string in "string mode", resolving variable references
* and command substitutions.
*/
public String strExpand(String s) {
try {
return new Expander().new ExpansionParser(s).parseAsString().expandAsString();
} catch (UserError e) {
throw new UserError("Failed to expand '" + s + "'.", e);
}
}
/**
* Expand the given string in "list mode", resolving variable references and
* command substitutions.
*/
public List<String> listExpand(String s) {
try {
return new Expander().new ExpansionParser(s).parseAsList().expandAsList();
} catch (UserError e) {
throw new UserError("Failed to expand '" + s
+ "' as an argument list.", e);
}
}
/**
* Expand the given variable fully in "string mode", resolving variable
* references and command substitutions. The entire string is interpreted as
* the name of the initial variable.
*/
public String strExpandVar(String varName) {
return new Expander().new Variable(varName).expandAsString();
}
/**
* Expand the given variable fully in "list mode", resolving variable
* references and command substitutions. The entire string is interpreted as
* the name of the initial variable.
*/
public List<String> listExpandVar(String varName) {
return new Expander().new SplitVariable(varName).expandAsList();
}
/**
* Validate the given string for expansion. This verifies the absence of
* parse errors, and the fact that all directly referenced variables are
* defined by this environment.
*
* <p>
* Expansion using {@link #strExpand(String)} or {@link #listExpand(String)}
* may still not succeed, if there are semantic errors (like circular
* variable definitions) or a command substitution introduces a reference to
* an undefined variable.
* </p>
*
* @param str
* A string that should be validated.
* @throws UserError
* if validation fails, with a suitable error message.
*/
public void validate(String str) {
new Expander().new ExpansionParser(str).parseAsList().validate();
}
/**
* Look up the (raw) value of a given variable, without performing expansion
* on it.
*
* @param name
* The variable name.
* @return The value that this variable is mapped to.
* @throws UserError
* if the variable is not defined.
*/
public synchronized String varLookup(String name) {
String value = vars.get(name);
if (value == null) {
ArrayList<String> available = new ArrayList<String>(vars.keySet());
Collections.sort(available);
throw new UserError("Attempting to expand unknown variable: "
+ name + ", available variables are: " + available);
}
return value;
}
/**
* Check whether this environment defines a variable of the given name, without
* performing expansion on it -- such full expansion may still fail.
*
* @param name The variable name.
* @return <code>true</code> if this environment contains a direct definition
*/
public boolean definesVar(String name) {
return vars.containsKey(name);
}
private static class ExpansionTokeniser {
/**
* The delimiters which should be returned as their own tokens. Order of
* alternatives matters! The recognised tokens are, in order:
*
* <ul>
* <li>{@code \\}</li>
* <li>{@code \"}</li>
* <li>{@code "}</li>
* <li><code>${}</code></li>
* <li><code>${=</code></li>
* <li><code>${</code></li>
* <li><code>$(=</code></li>
* <li><code>$(</code></li>
* <li><code>$</code></li>
* <li><code>}</code></li>
* <li><code>)</code></li>
* <li>Runs of whitespace.</li>
* </ul>
*
* <p>
* By defining the alternatives in this order, longer matches will be
* preferred, so that checking for escape sequences is easy. Note that
* in the regular expression source, a literal {@code \} must undergo
* two levels of escaping: Java strings and regular expression
* metacharacters; it thus becomes {@code \\\\}.
*/
private static final Pattern delims = Pattern
.compile("\\\\\\\\|\\\\\"|\"|\\$\\{\\}|\\$\\{=|\\$\\{|"
+ "\\$\\(=|\\$\\(|\\$|\\}|\\)|\\s+");
private final List<String> tokens = new ArrayList<String>();
private final int[] positions;
private int nextToken = 0;
public ExpansionTokeniser(String str) {
Matcher matcher = delims.matcher(str);
StringBuffer tmp = new StringBuffer();
while (matcher.find()) {
matcher.appendReplacement(tmp, "");
if (tmp.length() > 0) {
tokens.add(tmp.toString());
tmp = new StringBuffer();
}
tokens.add(matcher.group());
}
matcher.appendTail(tmp);
if (tmp.length() > 0)
tokens.add(tmp.toString());
positions = new int[tokens.size()];
int pos = 0;
for (int i = 0; i < tokens.size(); i++) {
positions[i] = pos;
pos += tokens.get(i).length();
}
}
public boolean hasMoreTokens() {
return nextToken < tokens.size();
}
public String nextToken() {
return tokens.get(nextToken++);
}
public boolean isDelimiter(String token) {
return delims.matcher(token).matches();
}
public int pos() {
return positions[nextToken - 1] + 1;
}
}
/**
* A wrapper around the various expansion classes, holding some expansion
* state to detect things like circular variable definitions.
*/
private class Expander {
private final Set<String> expansionsInProgress = new LinkedHashSet<String>();
/**
* A string expansion. This can be a literal string, a variable reference or
* a command substitution; the latter two can optionally be "split". Each
* expansion can be interpreted to yield a single string or a list of
* strings (typically as program arguments).
*/
abstract class Expansion {
public abstract String expandAsString();
public abstract List<String> expandAsList();
public abstract void validate();
}
class Sentence extends Expansion {
private final List<List<Expansion>> words = new ArrayList<List<Expansion>>();
public Sentence(List<List<Expansion>> words) {
this.words.addAll(words);
}
@Override
public void validate() {
for (List<Expansion> expansions : words)
for (Expansion expansion : expansions)
expansion.validate();
}
private String expandWord(List<Expansion> word) {
StringBuilder result = new StringBuilder();
for (Expansion e : word)
result.append(e.expandAsString());
return result.toString();
}
@Override
public String expandAsString() {
StringBuilder result = new StringBuilder();
for (List<Expansion> word : words) {
if (result.length() > 0)
result.append(' ');
result.append(expandWord(word));
}
return result.toString();
}
@Override
public List<String> expandAsList() {
List<String> result = new ArrayList<String>();
for (List<Expansion> word : words) {
List<List<String>> segments = new ArrayList<List<String>>();
for (Expansion e : word) {
segments.add(e.expandAsList());
}
result.addAll(glue(segments));
}
return result;
}
/**
* This is a non-quadratic implementation of the following Haskell code:
*
* <pre>
* <code>
* glue :: [[String]] -&gt; [String]
* glue = foldr join []
* where join [] xs = xs
* join xs [] = xs
* join xs ys = init xs ++ [last xs ++ head ys] ++ tail ys
* </code>
* </pre>
*/
private List<String> glue(List<List<String>> segments) {
String trailingWord = null;
List<String> result = new ArrayList<String>();
for (List<String> segment : segments)
trailingWord = glue_join_accum(result, segment, trailingWord);
if (trailingWord != null)
result.add(trailingWord);
return result;
}
private String glue_join_accum(List<String> result,
List<String> segment, String trailingWord) {
int n = segment.size();
switch (n) {
case 0:
return trailingWord;
case 1:
return combine(trailingWord, segment.get(0));
default:
result.add(combine(trailingWord, segment.get(0)));
result.addAll(segment.subList(1, n - 1));
return segment.get(n - 1);
}
}
private String combine(String a, String b) {
if (a == null)
return b;
return a + b;
}
}
class Literal extends Expansion {
private final String value;
public Literal(String value) {
this.value = value;
}
@Override
public void validate() {
// Always valid.
}
@Override
public String expandAsString() {
return value;
}
@Override
public List<String> expandAsList() {
return Collections.singletonList(value);
}
}
class QuotedString extends Sentence {
public QuotedString(List<Expansion> content) {
super(Collections.singletonList(content));
}
@Override
public List<String> expandAsList() {
return Collections.singletonList(this.expandAsString());
}
}
class Variable extends Expansion {
protected final String name;
public Variable(String name) {
this.name = name;
}
@Override
public void validate() {
varLookup(name); // Will throw if variable is undefined.
}
protected void startExpanding(String name) {
if (!expansionsInProgress.add(name))
throw new UserError("Circular expansion of variable " + name);
}
protected void doneWith(String name) {
if (!expansionsInProgress.remove(name))
throw new CatastrophicError("Not currently expanding " + name);
}
protected String ref() {
return "${" + name + "}";
}
@Override
public final String expandAsString() {
if (unexpandedVars.contains(name))
return ref();
startExpanding(name);
String result = expandAsStringImpl();
doneWith(name);
return result;
}
public String expandAsStringImpl() {
// Not calling ExpansionEnvironment.strExpand(), since
// we must run in the same enclosing instance of Expander.
return new ExpansionParser(varLookup(name)).parseAsString().expandAsString();
}
@Override
public final List<String> expandAsList() {
if (unexpandedVars.contains(name))
return Collections.singletonList(ref());
startExpanding(name);
List<String> result = expandAsListImpl();
doneWith(name);
return result;
}
public List<String> expandAsListImpl() {
return Collections.singletonList(expandAsStringImpl());
}
}
class SplitVariable extends Variable {
public SplitVariable(String name) {
super(name);
}
@Override
protected String ref() {
return "${=" + name + "}";
}
@Override
public String expandAsStringImpl() {
return StringUtil.glue(" ", expandAsListImpl());
}
@Override
public List<String> expandAsListImpl() {
return listExpand(varLookup(name));
}
}
class Command extends Expansion {
private final Sentence argv;
public Command(List<List<Expansion>> args) {
this.argv = new Sentence(args);
}
@Override
public void validate() {
argv.validate();
}
protected String run() {
List<String> args = argv.expandAsList();
ByteArrayOutputStream result = new ByteArrayOutputStream();
ByteArrayOutputStream err = new ByteArrayOutputStream();
Builder builder = new Builder(args, result, err);
builder.setLeakPrevention(LeakPrevention.ALL);
try {
int exitCode = builder.execute();
if (exitCode != 0)
throw new UserError("Exit code " + exitCode
+ " from command "
+ builder.toString());
if (err.size() > 0)
throw new UserError("Command \""
+ builder.toString()
+ "\" produced output on stderr: " + err.toString());
} catch (RuntimeException e) {
throw new UserError("Could not execute command "
+ builder.toString(), e);
}
return result.toString();
}
@Override
public String expandAsString() {
return run();
}
@Override
public List<String> expandAsList() {
return Collections.singletonList(expandAsString());
}
}
class SplitCommand extends Command {
public SplitCommand(List<List<Expansion>> argv) {
super(argv);
}
@Override
public String expandAsString() {
return StringUtil.glue(" ", expandAsList());
}
@Override
public List<String> expandAsList() {
return new ExpansionParser(run()).splitAsString().expandAsList();
}
}
private class ExpansionParser {
private final ExpansionTokeniser tokens;
public ExpansionParser(String str) {
tokens = new ExpansionTokeniser(str);
}
public Sentence parseAsString() {
List<List<Expansion>> words = new ArrayList<List<Expansion>>();
words.add(parseTerminatedString(null));
return new Sentence(words);
}
public Sentence parseAsList() {
return new Sentence(parseTerminatedList(null, false));
}
public Sentence splitAsString() {
return new Sentence(parseTerminatedList(null, true));
}
private List<Expansion> parseTerminatedString(String terminator) {
List<Expansion> result = new ArrayList<Expansion>();
while (tokens.hasMoreTokens()) {
String next = tokens.nextToken();
if (next.equals(terminator)) {
return result;
} else if (next.equals("\\\"")) {
result.add(new Literal("\""));
} else if (next.equals("\\\\")) {
result.add(new Literal("\\"));
} else if (!tryParseExpansion(result, next)) {
result.add(new Literal(next));
}
}
if (terminator != null)
throw new UserError(
"Premature end of input while looking for matching '"
+ terminator + "'.");
return result;
}
private List<List<Expansion>> parseTerminatedList(String terminator,
boolean noExpansions) {
List<List<Expansion>> result = new ArrayList<List<Expansion>>();
List<Expansion> accum = new ArrayList<Expansion>();
boolean mustSeeSpace = false;
while (tokens.hasMoreTokens()) {
String next = tokens.nextToken();
if (next.equals(terminator)) {
if (accum.size() > 0)
result.add(accum);
return result;
} else if (mustSeeSpace
&& !Character.isWhitespace(next.charAt(0))) {
throw new UserError("The quoted string ending at "
+ tokens.pos()
+ " must be surrounded by whitespace.");
} else if (next.length() > 0
&& Character.isWhitespace(next.charAt(0))) {
mustSeeSpace = false;
if (accum.size() > 0) {
result.add(accum);
accum = new ArrayList<Expansion>();
}
} else if (next.equals("\"")) {
if (!accum.isEmpty())
throw new UserError(
"At position "
+ tokens.pos()
+ ", the quote should "
+ "either be preceded by a space (if it is intended to start an argument) "
+ "or escaped as \\\".");
accum.add(new QuotedString(parseTerminatedString("\"")));
result.add(accum);
accum = new ArrayList<Expansion>();
mustSeeSpace = true;
} else if (next.equals("\\\"")) {
// An escaped quote means a literal quote.
accum.add(new Literal("\""));
} else if (next.equals("\\\\")) {
// An escaped backslash means a literal backslash.
accum.add(new Literal("\\"));
} else if (noExpansions || !tryParseExpansion(accum, next)) {
accum.add(new Literal(next));
}
}
if (terminator != null)
throw new UserError(
"Premature end of expansion while looking for '"
+ terminator + "'.");
if (accum.size() > 0)
result.add(accum);
return result;
}
private boolean tryParseExpansion(List<Expansion> result,
String curToken) {
if (curToken.equals("${}")) {
result.add(new Literal("$"));
} else if (curToken.equals("$(=") && commandSubstitutions) {
result.add(new SplitCommand(parseTerminatedList(")", false)));
} else if (curToken.equals("$(") && commandSubstitutions) {
result.add(new Command(parseTerminatedList(")", false)));
} else if (curToken.equals("${=")) {
result.add(new SplitVariable(parseVarName()));
} else if (curToken.equals("${")) {
result.add(new Variable(parseVarName()));
} else if (curToken.equals("$")) {
throw new UserError(
"Malformed expansion: A standalone '$' character should be escaped as '${}'.");
} else {
return false;
}
return true;
}
protected String parseVarName() {
if (!tokens.hasMoreTokens())
throw new UserError(
"Malformed variable substitution: stray '${' at " + tokens.pos());
String name = tokens.nextToken();
if (tokens.isDelimiter(name))
throw new UserError(
"Malformed variable substitution: Unexpected '" + name
+ "' at " + tokens.pos());
if (!tokens.hasMoreTokens())
throw new UserError(
"Malformed variable substitution for '" + name +
"': Missing '}' at " + tokens.pos());
String next = tokens.nextToken();
if (!next.equals("}"))
throw new UserError(
"Malformed variable substitution: Expecting '}' at "
+ tokens.pos() + ", found '" + next + "'.");
return name;
}
}
}
/**
* Resolve a path. Any variables in the path will be expanded. If
* the path is an absolute path after expansion, it is returned as is.
* Otherwise, it is combined with the given base path.
*/
public File expandPath(File base, String path) {
String expanded = strExpand(path);
if (FileUtil.isAbsolute(expanded)) {
return new File(expanded);
} else {
return FileUtil.fileRelativeTo(base, expanded);
}
}
/**
* Escape a string so that any '$'s inside it will be interpreted literally, rather than
* as parts of variable references.
*/
public static String escape(String base) {
return base.replace("$", "${}");
}
/**
* Escape {@code argument} as an argument, so that any {@code $}, {@code \} or {@code "} is interpreted literally.
*
* @param argument - the String to escape.
* @return the escaped String.
*/
public static String escapeArgument(String argument) {
return escape(argument).replaceAll(Matcher.quoteReplacement("\\"), Matcher.quoteReplacement("\\\\")).replaceAll(Matcher.quoteReplacement("\""), Matcher.quoteReplacement("\\\""));
}
}

View File

@@ -0,0 +1,100 @@
package com.semmle.util.extraction;
import java.io.BufferedReader;
import java.io.File;
import java.io.FileReader;
import java.io.IOException;
import java.util.ArrayList;
import java.util.List;
import com.semmle.util.exception.ResourceError;
import com.semmle.util.files.FileUtil;
import com.semmle.util.process.Env;
import com.semmle.util.trap.pathtransformers.PathTransformer;
/**
* A file listing patterns of source files and which ODASA project
* each should be populated to (if any).
*/
public class PopulationSpecFile {
private final List<SpecFileEntry> specs = new ArrayList<SpecFileEntry>();
public PopulationSpecFile(File specFile) {
FileReader fileReader = null;
BufferedReader reader = null;
try {
fileReader = new FileReader(specFile);
reader = new BufferedReader(fileReader);
File dbPath = null;
File trapFolder = null;
File sourceArchivePath = null;
List<String> patterns = new ArrayList<String>();
String line;
while ((line = reader.readLine()) != null) {
line = line.trim();
if (line.length() == 0 || line.startsWith("@"))
continue;
if (line.startsWith("#")) {
if (dbPath != null)
specs.add(new SpecFileEntry(trapFolder, sourceArchivePath, patterns));
dbPath = null;
sourceArchivePath = null;
patterns = new ArrayList<String>();
} else if (line.startsWith("TRAP_FOLDER=")) {
trapFolder = new File(line.substring("TRAP_FOLDER=".length()));
} else if (line.startsWith("ODASA_DB=")) {
dbPath = new File(line.substring("ODASA_DB=".length()));
} else if (line.startsWith("SOURCE_ARCHIVE=")) {
sourceArchivePath = new File(line.substring("SOURCE_ARCHIVE=".length()));
} else if (line.startsWith("BUILD_ERROR_DIR=")) {
// Accept and ignore for backwards compatibility
} else if (line.startsWith("-")) {
File path = new File(line.substring(1).trim());
patterns.add("-" + normalisePathAndCase(path) + "/");
} else {
File path = new File(line);
patterns.add(normalisePathAndCase(path) + "/");
}
}
if (dbPath != null)
specs.add(new SpecFileEntry(trapFolder, sourceArchivePath, patterns));
} catch (IOException e) {
throw new ResourceError("I/O error while reading specification file at " + specFile, e);
} finally {
FileUtil.close(reader);
FileUtil.close(fileReader);
}
}
/**
* Get the entry for a file, or <code>null</code> if there is no matching entry
*/
public SpecFileEntry getEntryFor(File f) {
String path = normalisePathAndCase(f);
for (SpecFileEntry entry : specs)
if (entry.matches(path))
return entry;
return null;
}
/**
* Normalises the path like {@link PathTransformer#fileAsDatabaseString(File)}, and, in
* addition, converts it to all-lowercase if we're on a case-insensitive
* filesystem.
* @param file the file to normalise
* @return a normalised path that is lowercased if the file system is case-insensitive.
*/
private static String normalisePathAndCase(File file) {
String path = PathTransformer.std().fileAsDatabaseString(file);
if (!Env.getOS().isFileSystemCaseSensitive())
path = path.toLowerCase();
return path;
}
}

View File

@@ -0,0 +1,48 @@
package com.semmle.util.extraction;
import java.io.File;
import java.util.List;
import com.semmle.util.data.StringUtil;
public class SpecFileEntry {
private final File trapFolder;
private final File sourceArchivePath;
private final List<String> patterns;
public SpecFileEntry(File trapFolder, File sourceArchivePath, List<String> patterns) {
this.trapFolder = trapFolder;
this.sourceArchivePath = sourceArchivePath;
this.patterns = patterns;
}
public boolean matches(String path) {
boolean matches = false;
for (String pattern : patterns) {
if (pattern.startsWith("-")) {
if (path.startsWith(pattern.substring(1)))
matches = false;
} else {
if (path.startsWith(pattern))
matches = true;
}
}
return matches;
}
public File getTrapFolder() {
return trapFolder;
}
public File getSourceArchivePath() {
return sourceArchivePath;
}
@Override
public String toString() {
return
"TRAP_FOLDER=" + trapFolder + "\n" +
"SOURCE_ARCHIVE=" + sourceArchivePath + "\n" +
StringUtil.glue("\n", patterns);
}
}

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,160 @@
package com.semmle.util.files;
import java.util.regex.Pattern;
import com.semmle.util.data.StringUtil;
/**
* Utility class to match a string to a pattern, which can either be
* an ant-like include/exclude pattern (with wildcards), or a rsync-like
* pattern.
* <p>
* In ant-like mode:
* <ul>
* <li>'**' matches zero or more characters (most notably including '/').
* <li>'*' matches zero or more characters except for '/'.
* <li>'?' matches any character (other than '/').
* </ul>
* <p>
* In rsync-like mode:
* <ul>
* <li>A pattern is matched only at the root if it starts with '/', and otherwise
* it is matched against each level of the directory tree.
* <li>'**', '*' and '?' have the same meaning as for ant.
* <li>Other rsync features (like [:..:] groups and backslash-escapes) are not supported.
* </ul>
*/
public class PathMatcher {
public enum Mode {
Ant, Rsync;
}
private final Mode mode;
private final Pattern pattern;
private final String originalPattern;
/**
* Create a {@link PathMatcher}.
*
* @param pattern An ant-like pattern
*/
public PathMatcher(String pattern) {
this(Mode.Ant, pattern);
}
/** Create a {@link PathMatcher}.
*
* @param mode The {@link Mode} to use
* @param pattern A pattern, interpreted as ant-like or rsync-like depending on
* the value of {@code mode}
*/
public PathMatcher(Mode mode, String pattern) {
this.mode = mode;
this.originalPattern = pattern;
StringBuilder b = new StringBuilder();
toRegex(b, pattern);
this.pattern = Pattern.compile(b.toString());
}
/** Create a {@link PathMatcher}.
*
* @param patterns Several ant-like patterns
*/
public PathMatcher(Iterable<String> patterns) {
this(Mode.Ant, patterns);
}
/** Create a {@link PathMatcher}.
*
* @param mode The {@link Mode} to use.
* @param patterns Several patterns, interpreted as ant-like or rsync-like depending
* on the value of {@code mode}.
*/
public PathMatcher(Mode mode, Iterable<String> patterns) {
this.mode = mode;
this.originalPattern = patterns.toString();
StringBuilder b = new StringBuilder();
for (String pattern : patterns) {
if (b.length() > 0)
b.append('|');
toRegex(b, pattern);
}
this.pattern = Pattern.compile(b.toString());
}
private void toRegex(StringBuilder b, String pattern) {
if (pattern.length() == 0) return;
//normalize pattern path separators
pattern = pattern.replace('\\', '/');
//replace double slashes
pattern = pattern.replaceAll("//+", "/");
// escape
pattern = StringUtil.escapeStringLiteralForRegexp(pattern, "*?");
// for ant, ending with '/' is shorthand for "/**"
if (mode == Mode.Ant && pattern.endsWith("/")) pattern = pattern + "**";
// replace "**/" with (^|.*/)"
// replace "**" with ".*"
// replace "*" with "[^/]*
// replace "?" with "[^/]"
int i = 0;
// In rsync-mode, a leading slash is an 'anchor' -- the pattern is only matched
// when rooted at the start of the path. This is the default behaviour for ant-like
// patterns.
if (mode == Mode.Rsync) {
if (pattern.charAt(0) == '/') {
// The slash is just anchoring, and may actually be missing
// in the case of a relative path.
b.append("/?");
i++;
} else {
// Non-anchored rsync pattern: the pattern can match at any level in the tree.
b.append("(.*/)?");
}
}
while (i < pattern.length()) {
char c = pattern.charAt(i);
if (c == '*' && i < pattern.length() - 2 && pattern.charAt(i+1) == '*' && pattern.charAt(i+2) == '/') {
b.append("(?:^|.*/)");
i += 3;
}
else if (c == '*' && i < pattern.length() - 1 && pattern.charAt(i+1) == '*') {
b.append(".*");
i += 2;
}
else if(c == '*') {
b.append("[^/]*");
i++;
}
else if(c == '?') {
b.append("[^/]");
i++;
}
else {
b.append(c);
i++;
}
}
}
/**
* Match the specified path against a shell pattern. The path is normalised by replacing '\' with '/'.
* @param path The path to match.
*/
public boolean matches(String path) {
// normalise path
path = path.replace('\\', '/');
if(path.endsWith("/"))
path = path.substring(0, path.length()-1);
return pattern.matcher(path).matches();
}
@Override
public String toString() {
return "Matches " + originalPattern + " [" + pattern + "]";
}
}

View File

@@ -0,0 +1,103 @@
package com.semmle.util.io;
import java.io.BufferedReader;
import java.io.Closeable;
import java.io.IOException;
import java.io.Reader;
import com.semmle.util.files.FileUtil;
/**
* A custom buffered reader akin to {@link BufferedReader}, except that it preserves
* line terminators (and so its {@code readLine()} method is called
* {@link #readLineAndTerminator()}). The other {@link Reader} methods should not
* be called, and will throw.
*/
public class BufferedLineReader implements Closeable {
private final char[] buffer = new char[8192];
private int nextChar = 0, nChars = 0;
private final Reader in;
public BufferedLineReader(Reader in) {
this.in = in;
}
/**
* Read the string up to and including the next CRLF or LF terminator. This method
* may return a non-terminated string at EOF, or if a line is too long to fit in the
* internal buffer. Calls will block until enough data has been read to fill the
* buffer or find a line terminator.
* @return The next line (or buffer-full) of text.
* @throws IOException if the underlying stream throws.
*/
public String readLineAndTerminator() throws IOException {
int terminator = findNextLineTerminator();
if (terminator == -1)
return null;
String result = new String(buffer, nextChar, terminator - nextChar + 1);
nextChar = terminator + 1;
return result;
}
/**
* Get the index of the last character that should be included in the next line.
* Usually, this is the LF in a LF or CRLF line terminator, but it might be the
* end of the buffer (if it is full, and no newlines are present), or it may be
* -1 (but only if EOF has been reached, and the buffer is currently empty).
* The first character of the line is pointed to by {@link #nextChar}, which
* may be modified by this method if the buffer is refilled.
*/
private int findNextLineTerminator() throws IOException {
int alreadyChecked = 0;
do {
for (int i = nextChar + alreadyChecked; i < nChars; i++) {
if (buffer[i] == '\r' && i+1 < nChars && buffer[i+1] == '\n')
return i+1; // CRLF
else if (buffer[i] == '\n')
return i; // LF
}
// We didn't find a full newline in the existing buffer: Try to fill.
alreadyChecked = nChars - nextChar;
int newlyRead = fill();
if (newlyRead <= 0)
return nChars - 1;
} while (true);
}
/**
* Block until at least one character from the underlying stream is read,
* or EOF is reached.
*/
private int fill() throws IOException {
if (nextChar >= nChars) {
// No unread characters.
nextChar = 0;
nChars = 0;
} else if (nextChar > 0) {
// Some unread characters.
System.arraycopy(buffer, nextChar, buffer, 0, nChars - nextChar);
nChars = nChars - nextChar;
nextChar = 0;
}
// Is the buffer full?
if (nChars == buffer.length)
return 0;
int read;
do {
read = in.read(buffer, nChars, buffer.length - nChars);
} while (read == 0);
if (read > 0) {
nChars += read;
}
return read;
}
@Override
public void close() {
FileUtil.close(in);
}
}

View File

@@ -0,0 +1,34 @@
package com.semmle.util.io;
import com.semmle.util.exception.Exceptions;
import com.semmle.util.files.FileUtil;
import java.io.IOException;
import java.io.InputStream;
import java.io.OutputStream;
/**
* A thread that copies data from an input stream to an output stream. When
* the input stream runs out, it closes both the input and output streams.
*/
public class RawStreamMuncher extends Thread {
private final InputStream in;
private final OutputStream out;
public RawStreamMuncher(InputStream in, OutputStream out) {
this.in = in;
this.out = out;
}
@Override
public void run() {
try {
StreamUtil.copy(in, out);
} catch (IOException e) {
Exceptions.ignore(e, "When the process exits, a harmless IOException will occur here");
} finally {
FileUtil.close(in);
FileUtil.close(out);
}
}
}

View File

@@ -0,0 +1,49 @@
package com.semmle.util.io;
import com.semmle.util.exception.Exceptions;
import com.semmle.util.files.FileUtil;
import com.semmle.util.io.BufferedLineReader;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.io.OutputStream;
import java.io.PrintStream;
/**
* A thread that forwards data from one stream to another. It waits for
* entire lines of input from one stream before writing data to the next
* stream, and it flushes as it goes.
*/
public class StreamMuncher extends Thread {
private final InputStream is;
private PrintStream output;
private BufferedLineReader reader;
public StreamMuncher(InputStream is, OutputStream output) {
this.is = is;
if (output != null)
this.output = new PrintStream(output);
}
@Override
public void run() {
InputStreamReader isr = null;
try {
isr = new InputStreamReader(is);
reader = new BufferedLineReader(isr);
String line;
while ((line = reader.readLineAndTerminator()) != null) {
if (output != null) {
output.print(line);
output.flush();
}
}
} catch (IOException e) {
Exceptions.ignore(e, "When the process exits, a harmless IOException will occur here");
} finally {
FileUtil.close(reader);
FileUtil.close(isr);
}
}
}

View File

@@ -0,0 +1,201 @@
package com.semmle.util.io;
import java.io.BufferedReader;
import java.io.BufferedWriter;
import java.io.ByteArrayOutputStream;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.io.OutputStream;
import java.io.OutputStreamWriter;
import java.io.Reader;
import java.io.Writer;
import java.nio.charset.StandardCharsets;
import com.semmle.util.exception.CatastrophicError;
/**
* Utility methods concerning {@link InputStream}s and {@link OutputStream}s.
*/
public class StreamUtil
{
/**
* Copy all bytes that can be read from an {@link InputStream}, into an {@link OutputStream}.
*
* @param inputStream The InputStream from which to read, until an
* {@link InputStream#read(byte[])} operation returns indicating that the input stream
* has reached its end.
* @param outputStream The OutputStream to which all bytes read from {@code inputStream} should be
* written.
* @return The number of bytes copied.
* @throws IOException from {@link InputStream#read(byte[])} or
* {@link OutputStream#write(byte[], int, int)}
* @throws CatastrophicError if either of the streams is {@code null}
*/
public static long copy(InputStream inputStream, OutputStream outputStream) throws IOException
{
nullCheck(inputStream, outputStream);
// Copy byte data
long total = 0;
byte[] bytes = new byte[1024];
int read;
while ((read = inputStream.read(bytes)) > 0) {
outputStream.write(bytes, 0, read);
total += read;
}
return total;
}
/**
* Copy all chars that can be read from a {@link Reader}, into a {@link Writer}.
*
* @param reader The Reader from which to read, until a {@link Reader#read(char[])} operation
* returns indicating that the reader has reached its end.
* @param writer The Writer to which all characters read from {@code reader} should be written.
* @return The number of bytes copied.
* @throws IOException from {@link Reader#read(char[])} or
* {@link Writer#write(char[], int, int)}
* @throws CatastrophicError if either of the streams is {@code null}
*/
public static long copy(Reader reader, Writer writer) throws IOException
{
nullCheck(reader, writer);
// Copy byte data
long total = 0;
char[] chars = new char[1024];
int read;
while ((read = reader.read(chars)) > 0) {
writer.write(chars, 0, read);
total += read;
}
return total;
}
/**
* Copy at most {@code length} bytes from an {@link InputStream}, into an {@link OutputStream}.
* <p>
* Note that this method will busy-wait during periods for which the {@code inputStream} cannot
* supply any data, but has not reached its end.
* </p>
*
* @param inputStream The InputStream from which to read, until {@code length} bytes have
* been read or {@link InputStream#read(byte[], int, int)} operation returns
* indicating that the input stream has reached its end.
* @param outputStream The OutputStream to which all bytes read from {@code inputStream} should be
* written.
* @param length The maximum number of bytes to copy
* @return The number of bytes copied.
* @throws IOException from {@link InputStream#read(byte[], int, int)} or
* {@link OutputStream#write(byte[], int, int)}
* @throws CatastrophicError if either of the streams is {@code null}
*/
public static long limitedCopy(InputStream inputStream, OutputStream outputStream, long length) throws IOException
{
nullCheck(inputStream, outputStream);
// Copy byte data
long total = 0;
byte[] bytes = new byte[1024];
int read;
while ((read = inputStream.read(bytes, 0, (int) Math.min(bytes.length, length))) > 0) {
outputStream.write(bytes, 0, read);
length -= read;
total += read;
}
return total;
}
private static void nullCheck(Object input, Object output) {
CatastrophicError.throwIfAnyNull(input, output);
}
/**
* Skips over and discards n bytes of data from an input stream. If n is negative then no bytes are skipped.
* @param stream the InputStream
* @param n the number of bytes to be skipped.
* @return false if the end-of-file was reached before successfully skipping n bytes
*/
public static boolean skip(InputStream stream, long n) throws IOException {
if (n <= 0)
return true;
long toSkip = n - 1;
while (toSkip > 0) {
long skipped = stream.skip(toSkip);
if (skipped == 0) {
if(stream.read() == -1)
return false;
else
skipped++;
}
toSkip -= skipped;
}
if(stream.read() == -1)
return false;
else
return true;
}
/**
* Reads n bytes from the input stream and returns them. This method will block
* until all n bytes are available. If the end of the stream is reached before n bytes are
* read it returns just the read bytes.
*
* @param stream the InputStream
* @param n the number of bytes to read
* @return the read bytes
* @throws IOException if an IOException occurs when accessing the stream
* @throws IllegalArgumentException if n is negative
*/
public static byte[] readN(InputStream stream, int n) throws IOException {
if (n < 0) throw new IllegalArgumentException("n must be positive");
ByteArrayOutputStream bOut = new ByteArrayOutputStream();
limitedCopy(stream, bOut, n);
return bOut.toByteArray();
}
/**
* Reads bytes from the input stream into the given buffer. This method will block
* until all bytes are available. If the end of the stream is reached before enough bytes are
* read it reads as much as it can.
*
* @param stream the InputStream
* @param buf the buffer to read into
* @param offset the offset to read into
* @param length the number of bytes to read
* @return the total number of read bytes
* @throws IOException if an IOException occurs when accessing the stream
* @throws IllegalArgumentException if n is negative
*/
public static int read(InputStream stream, byte[] buf, int offset, int length) throws IOException {
if (length < 0) throw new IllegalArgumentException("length must be positive");
// Copy byte data
int total = 0;
int read;
while ((read = stream.read(buf, offset, length)) > 0) {
length -= read;
total += read;
}
return total;
}
/**
* Convenience method for constructing a buffered reader with a UTF8 charset.
*/
public static BufferedReader newUTF8BufferedReader(InputStream inputStream) {
return new BufferedReader(new InputStreamReader(inputStream, StandardCharsets.UTF_8));
}
/**
* Convenience method for constructing a buffered writer with a UTF8 charset.
*/
public static BufferedWriter newUTF8BufferedWriter(OutputStream outputStream) {
return new BufferedWriter(new OutputStreamWriter(outputStream, StandardCharsets.UTF_8));
}
}

View File

@@ -0,0 +1,548 @@
package com.semmle.util.io;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileNotFoundException;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.InputStream;
import java.io.OutputStreamWriter;
import java.io.UnsupportedEncodingException;
import java.io.Writer;
import java.nio.charset.Charset;
import java.nio.file.Files;
import java.nio.file.OpenOption;
import java.nio.file.Path;
import java.nio.file.StandardOpenOption;
import java.util.Arrays;
import java.util.regex.Pattern;
import com.semmle.util.array.ArrayUtil;
import com.semmle.util.data.IntRef;
import com.semmle.util.exception.ResourceError;
import com.semmle.util.files.FileUtil;
/**
* A class that allows bulk operations on entire files,
* reading or writing them as {@link String} values.
*
* This is intended to address the woeful inadequacy of
* the Java standard libraries in this area.
*/
public class WholeIO {
private IOException e;
/**
* Regular expression {@link Pattern}
*/
private final static Pattern rpLineEndingCRLF = Pattern.compile("\r\n");
/**
* The default encoding to use for writing, and for reading if no
* encoding can be detected.
*/
private final String defaultEncoding;
/**
* Construct a new {@link WholeIO} instance using ODASA's default
* charset ({@code "UTF-8"}) for all input and output (unless a
* different encoding is detected for a file being read).
*/
public WholeIO() {
this("UTF-8");
}
/**
* Construct a new {@link WholeIO} instance using the specified
* encoding for all input and output (unless a different encoding
* is detected for a file being read).
*
* @param encoding The encoding name, e.g. {@code "UTF-8"}.
*/
public WholeIO(String encoding) {
defaultEncoding = encoding;
}
/**
* Open the given file for reading, get the entire content
* and return it as a {@link String}. Returns <code>null</code>
* on error, in which case you can check the getLastException()
* method for the exception that occurred.
*
* <b>Warning:</b> This method trims the content of the file, removing
* leading and trailing whitespace. Do not use it if you care about file
* locations being preserved; use 'read' instead.
*
* @param file The file to read
* @return The <b>trimmed</b> contents of the file, or <code>null</code> on error.
*/
public String readAndTrim(File file) {
e = null;
FileInputStream f = null;
try {
f = new FileInputStream(file);
String contents = readString(f);
return contents == null ? null : contents.trim();
} catch (IOException e) {
this.e = e;
return null;
} finally {
FileUtil.close(f);
}
}
/**
* Open the given filename for writing and dump the given
* {@link String} into it. Returns <code>false</code>
* on error, in which case you can check the getLastException()
* method for the exception that occurred. Tries to create any
* enclosing directories that do not exist.
*
* @param filename The name of the file to write to
* @param contents the string to write out
* @return the success state
*/
public boolean write(String filename, String contents) {
return write(new File(filename), contents);
}
/**
* Open the given filename for writing and dump the given
* {@link String} into it. Returns <code>false</code>
* on error, in which case you can check the getLastException()
* method for the exception that occurred. Tries to create any
* enclosing directories that do not exist.
*
* @param file The file to write to
* @param contents the string to write out
* @return the success state
*/
public boolean write(File file, String contents) {
return write(file, contents, false);
}
/**
* Open the given path for writing and dump the given
* {@link String} into it. Returns <code>false</code>
* on error, in which case you can check the getLastException()
* method for the exception that occurred. Tries to create any
* enclosing directories that do not exist.
*
* @param path The path to write to
* @param contents the string to write out
* @return the success state
*/
public boolean write(Path path, String contents) {
return write(path, contents, false);
}
/**
* Open the given filename for writing and dump the given
* {@link String} into it. Throws {@link ResourceError}
* if we fail.
*
* @param file The file to write to
* @param contents the string to write out
*/
public void strictwrite(File file, String contents) {
strictwrite(file, contents, false);
}
/**
* Open the given path for writing and dump the given
* {@link String} into it. Throws {@link ResourceError}
* if we fail.
*
* @param path The path to write to
* @param contents the string to write out
*/
public void strictwrite(Path path, String contents) {
strictwrite(path, contents, false);
}
/**
* This is the same as {@link #write(File,String)},
* except that this method allows appending to an existing file.
*
* @param file the file to write to
* @param contents the string to write out
* @param append whether or not to append to any existing file
* @return the success state
*/
public boolean write(File file, String contents, boolean append) {
if (file.getParentFile() != null)
file.getParentFile().mkdirs();
FileOutputStream fos = null;
try {
fos = new FileOutputStream(file, append);
Writer writer = new OutputStreamWriter(fos, Charset.forName(defaultEncoding));
writer.append(contents);
writer.close();
return true;
} catch (IOException e) {
this.e = e;
return false;
} finally {
FileUtil.close(fos);
}
}
/**
* This is the same as {@link #write(Path,String)},
* except that this method allows appending to an existing file.
*
* @param path the path to write to
* @param contents the string to write out
* @param append whether or not to append to any existing file
* @return the success state
*/
public boolean write(Path path, String contents, boolean append) {
try {
if (path.getParent() != null)
Files.createDirectories(path.getParent());
try (Writer writer = Files.newBufferedWriter(path, Charset.forName(defaultEncoding),
StandardOpenOption.CREATE, StandardOpenOption.WRITE,
append ? StandardOpenOption.APPEND : StandardOpenOption.TRUNCATE_EXISTING)) {
writer.append(contents);
}
} catch (IOException e) {
this.e = e;
return false;
}
return true;
}
/**
* This is the same as {@link #strictwrite(File,String)},
* except that this method allows appending to an existing file.
*/
public void strictwrite(File file, String contents, boolean append) {
if (!write(file, contents, append))
throw new ResourceError("Failed to write file " + file, getLastException());
}
/**
* This is the same as {@link #strictwrite(Path,String)},
* except that this method allows appending to an existing file.
*/
public void strictwrite(Path path, String contents, boolean append) {
if (!write(path, contents, append))
throw new ResourceError("Failed to write path " + path, getLastException());
}
/**
* Get the exception that occurred during the last call to
* read(), if any. If the last read() call completed normally,
* this returns null.
* @return The last caught exception, or <code>null</code> if N/A.
*/
public IOException getLastException() {
return e;
}
public String read(File file) {
InputStream is = null;
try {
is = new FileInputStream(file);
return readString(is);
}
catch (IOException e) {
this.e = e;
return null;
}
finally {
FileUtil.close(is);
}
}
public String read(Path path) {
InputStream is = null;
try {
is = Files.newInputStream(path);
return readString(is);
}
catch (IOException e) {
this.e = e;
return null;
}
finally {
FileUtil.close(is);
}
}
/**
* Read the contents of the given {@link File} as text (line endings are normalised to "\n" in the output).
*
* @param file The file to read.
* @return The text contents of the file, if possible, or null if the file cannot be read.
*/
public String readText(File file) {
String result = read(file);
return result != null ? result.replaceAll("\r\n", "\n") : null;
}
/**
* Read the contents of the given {@link Path} as text (line endings are normalised to "\n" in the output).
*
* @param path The path to read.
* @return The text contents of the path, if possible, or null if the file cannot be read.
*/
public String readText(Path path) {
String result = read(path);
return result != null ? result.replaceAll("\r\n", "\n") : null;
}
/**
* Read the contents of the given {@link File}, throwing a {@link ResourceError}
* if we fail.
*/
public String strictread(File f) {
String content = read(f);
if (content == null)
throw new ResourceError("Failed to read file " + f, getLastException());
return content;
}
/**
* Read the contents of the given {@link Path}, throwing a {@link ResourceError}
* if we fail.
*/
public String strictread(Path f) {
String content = read(f);
if (content == null)
throw new ResourceError("Failed to read path " + f, getLastException());
return content;
}
/**
* Read the contents of the given {@link File} as text (line endings are normalised to "\n" in the output).
*
* @param file The file to read.
* @return The text contents of the file, if possible.
* @throws ResourceError If the file cannot be read.
*/
public String strictreadText(File file) {
return rpLineEndingCRLF.matcher(strictread(file)).replaceAll("\n");
}
/**
* Read the contents of the given {@link Path} as text (line endings are normalised to "\n" in the output).
*
* @param path The path to read.
* @return The text contents of the path, if possible.
* @throws ResourceError If the path cannot be read.
*/
public String strictreadText(Path path) {
return rpLineEndingCRLF.matcher(strictread(path)).replaceAll("\n");
}
/**
* Get the entire content of an {@link InputStream}
* and interpret it as a {@link String} trying to detect its character set.
* Returns <code>null</code> on error, in which case you can check
* the getLastException() method for the exception that occurred.
*
* @param stream the stream to read from
* @return The contents of the file, or <code>null</code> on error.
*/
public String readString(InputStream stream) {
IntRef length = new IntRef(0);
byte[] bytes = readBinary(stream, length);
if (bytes == null) return null;
try {
IntRef start = new IntRef(0);
String charset = determineCharset(bytes, length.get(), start);
return new String(bytes, start.get(), length.get() - start.get(), charset);
} catch (UnsupportedEncodingException e) {
this.e = e;
return null;
}
}
/**
* Get the entire content of an {@link InputStream}
* and interpret it as a {@link String} trying to detect its character set.
* Throws a {@link ResourceError} on error.
*
* @param stream the stream to read from
* @return the contents of the input stream
*/
public String strictReadString(InputStream stream) {
String content = readString(stream);
if (content == null)
throw new ResourceError("Could not read from stream", getLastException());
return content;
}
/**
* Get the entire content of an {@link InputStream}, interpreting it
* as a sequence of bytes. This removes restrictions regarding invalid
* code points that would potentially prevent reading a file's contents
* as a String.
*
* This method returns <code>null</code> on error, in which case you can
* check {@link #getLastException()} for the exception that occurred.
*
* @param stream the stream to read from
* @return The binary contents of the file, or <code>null</code> on error.
*/
public byte[] readBinary(InputStream stream) {
IntRef length = new IntRef(0);
byte[] bytes = readBinary(stream, length);
return bytes == null ? null : Arrays.copyOf(bytes, length.get());
}
/**
* Get the entire content of an {@link InputStream}, interpreting it
* as a sequence of bytes. This removes restrictions regarding invalid
* code points that would potentially prevent reading a file's contents
* as a String.
*
* @param stream the stream to read from
* @return The binary contents of the file -- always non-null.
* @throws ResourceError if an exception occurs during IO.
*/
public byte[] strictReadBinary(InputStream stream) {
byte[] result = readBinary(stream);
if (result == null)
throw new ResourceError("Couldn't read from stream", e);
return result;
}
/**
* Get the entire binary contents of a {@link File} as a sequence of bytes.
*
* @param file the file to read
* @return the file's contents as a byte[] -- always non-null.
* @throws ResourceError if an exception occurs during IO.
*/
public byte[] strictReadBinary(File file) {
FileInputStream stream = null;
try {
stream = new FileInputStream(file);
byte[] result = readBinary(stream);
if (result == null)
throw new ResourceError("Couldn't read from file " + file + ".", e);
return result;
} catch (FileNotFoundException e) {
throw new ResourceError("Couldn't read from file " + file + ".", e);
} finally {
FileUtil.close(stream);
}
}
/**
* Get the entire binary contents of a {@link Path} as a sequence of bytes.
*
* @param path the path to read
* @return the file's contents as a byte[] -- always non-null.
* @throws ResourceError if an exception occurs during IO.
*/
public byte[] strictReadBinary(Path path) {
InputStream stream = null;
try {
stream = Files.newInputStream(path);
byte[] result = readBinary(stream);
if (result == null)
throw new ResourceError("Couldn't read from path " + path + ".", e);
return result;
} catch (IOException e) {
throw new ResourceError("Couldn't read from path " + path + ".", e);
} finally {
FileUtil.close(stream);
}
}
/**
* Get the entire binary contents of a {@link Path} as a sequence of bytes.
*
* @param path the path to read
* @return the file's contents as a byte[] -- always non-null.
*/
public byte[] readBinary(Path path) throws IOException {
InputStream stream = null;
try {
stream = Files.newInputStream(path);
byte[] result = readBinary(stream);
if (result == null)
throw new ResourceError("Couldn't read from path " + path + ".", e);
return result;
} finally {
FileUtil.close(stream);
}
}
private byte[] readBinary(InputStream stream, IntRef offsetHolder) {
try {
byte[] bytes = new byte[16384];
int offset = 0;
int readThisTime;
do {
readThisTime = stream.read(bytes, offset, bytes.length - offset);
if (readThisTime > 0) {
offset += readThisTime;
if (offset == bytes.length)
bytes = safeArrayDouble(bytes);
}
} while (readThisTime > 0);
offsetHolder.set(offset);
return bytes;
} catch (IOException e) {
this.e = e;
return null;
}
}
/**
* Safely attempt to double the length of an array.
* @param array The array which want to be doubled
* @return a new array that is longer than array
*/
private byte[] safeArrayDouble(byte[] array) {
if (array.length >= ArrayUtil.MAX_ARRAY_LENGTH) {
throw new ResourceError("Cannot stream into array as it exceed the maximum array size");
}
// Compute desired capacity
long newCapacity = array.length * 2L;
// Ensure it is at least as large as minCapacity
if (newCapacity < 16)
newCapacity = 16;
// Ensure it is at most MAX_ARRAY_LENGTH
if (newCapacity > ArrayUtil.MAX_ARRAY_LENGTH) {
newCapacity = ArrayUtil.MAX_ARRAY_LENGTH;
}
return Arrays.copyOf(array, (int)newCapacity);
}
/**
* Try to determine the encoding of a byte[] using a byte-order mark (if present)
* Defaults to UTF-8 if none found.
*/
private String determineCharset(byte[] bom, int length, IntRef start) {
start.set(0);
String ret = defaultEncoding;
if(length < 2)
return ret;
if (length >= 3 && byteToInt(bom[0]) == 0xEF && byteToInt(bom[1]) == 0xBB && byteToInt(bom[2]) == 0xBF) {
ret = "UTF-8";
start.set(3);
} else if (byteToInt(bom[0]) == 0xFE && byteToInt(bom[1]) == 0xFF) {
ret = "UTF-16BE";
start.set(2);
} else if (byteToInt(bom[0]) == 0xFF && byteToInt(bom[1]) == 0xFE) {
ret = "UTF-16LE";
start.set(2);
}
return ret;
}
private static int byteToInt(byte b) {
return b & 0xFF;
}
}

View File

@@ -0,0 +1,207 @@
package com.semmle.util.io.csv;
/**
Copyright 2005 Bytecode Pty Ltd.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
import java.io.IOException;
import java.util.ArrayList;
import java.util.List;
/**
* A very simple CSV parser released under a commercial-friendly license.
* This just implements splitting a single line into fields.
*
* @author Glen Smith
* @author Rainer Pruy
*
*/
public class CSVParser {
private final char separator;
private final char quotechar;
private final char escape;
private final boolean strictQuotes;
private StringBuilder buf = new StringBuilder(INITIAL_READ_SIZE);
/** The default separator to use if none is supplied to the constructor. */
public static final char DEFAULT_SEPARATOR = ',';
private static final int INITIAL_READ_SIZE = 128;
/**
* The default quote character to use if none is supplied to the
* constructor.
*/
public static final char DEFAULT_QUOTE_CHARACTER = '"';
/**
* The default escape character to use if none is supplied to the
* constructor.
*/
public static final char DEFAULT_ESCAPE_CHARACTER = '"';
/**
* The default strict quote behavior to use if none is supplied to the
* constructor
*/
public static final boolean DEFAULT_STRICT_QUOTES = false;
/**
* Constructs CSVReader with supplied separator and quote char.
* Allows setting the "strict quotes" flag
* @param separator
* the delimiter to use for separating entries
* @param quotechar
* the character to use for quoted elements
* @param escape
* the character to use for escaping a separator or quote
* @param strictQuotes
* if true, characters outside the quotes are ignored
*/
CSVParser(char separator, char quotechar, char escape, boolean strictQuotes) {
this.separator = separator;
this.quotechar = quotechar;
this.escape = escape;
this.strictQuotes = strictQuotes;
}
/**
*
* @return true if something was left over from last call(s)
*/
public boolean isPending() {
return buf.length() != 0;
}
public String[] parseLineMulti(String nextLine) throws IOException {
return parseLine(nextLine, true);
}
public String[] parseLine(String nextLine) throws IOException {
return parseLine(nextLine, false);
}
/**
* Parses an incoming String and returns an array of elements.
*
* @param nextLine
* the string to parse
* @return the comma-tokenized list of elements, or null if nextLine is null
* @throws IOException if bad things happen during the read
*/
private String[] parseLine(String nextLine, boolean multi) throws IOException {
if (!multi && isPending()) {
clear();
}
if (nextLine == null) {
if (isPending()) {
String s = buf.toString();
clear();
return new String[] {s};
} else {
return null;
}
}
List<String>tokensOnThisLine = new ArrayList<String>();
boolean inQuotes = isPending();
for (int i = 0; i < nextLine.length(); i++) {
char c = nextLine.charAt(i);
if (c == this.escape && isNextCharacterEscapable(nextLine, inQuotes, i)) {
buf.append(nextLine.charAt(i+1));
i++;
} else if (c == quotechar) {
if( isNextCharacterEscapedQuote(nextLine, inQuotes, i) ){
buf.append(nextLine.charAt(i+1));
i++;
}else{
inQuotes = !inQuotes;
// the tricky case of an embedded quote in the middle: a,bc"d"ef,g
if (!strictQuotes) {
if(i>2 //not on the beginning of the line
&& nextLine.charAt(i-1) != this.separator //not at the beginning of an escape sequence
&& nextLine.length()>(i+1) &&
nextLine.charAt(i+1) != this.separator //not at the end of an escape sequence
){
buf.append(c);
}
}
}
} else if (c == separator && !inQuotes) {
tokensOnThisLine.add(buf.toString());
clear(); // start work on next token
} else {
if (!strictQuotes || inQuotes)
buf.append(c);
}
}
// line is done - check status
if (inQuotes) {
if (multi) {
// continuing a quoted section, re-append newline
buf.append('\n');
// this partial content is not to be added to field list yet
} else {
throw new IOException("Un-terminated quoted field at end of CSV line");
}
} else {
tokensOnThisLine.add(buf.toString());
clear();
}
return tokensOnThisLine.toArray(new String[tokensOnThisLine.size()]);
}
/**
* precondition: the current character is a quote or an escape
* @param nextLine the current line
* @param inQuotes true if the current context is quoted
* @param i current index in line
* @return true if the following character is a quote
*/
private boolean isNextCharacterEscapedQuote(String nextLine, boolean inQuotes, int i) {
return inQuotes // we are in quotes, therefore there can be escaped quotes in here.
&& nextLine.length() > (i+1) // there is indeed another character to check.
&& nextLine.charAt(i+1) == quotechar;
}
/**
* precondition: the current character is an escape
* @param nextLine the current line
* @param inQuotes true if the current context is quoted
* @param i current index in line
* @return true if the following character is a quote
*/
protected boolean isNextCharacterEscapable(String nextLine, boolean inQuotes, int i) {
return inQuotes // we are in quotes, therefore there can be escaped quotes in here.
&& nextLine.length() > (i+1) // there is indeed another character to check.
&& ( nextLine.charAt(i+1) == quotechar || nextLine.charAt(i+1) == this.escape);
}
/**
* Reset the buffer used for storing the current field's value
*/
private void clear() {
buf.setLength(0);
}
}

View File

@@ -0,0 +1,192 @@
package com.semmle.util.io.csv;
/**
Copyright 2005 Bytecode Pty Ltd.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
import java.io.BufferedReader;
import java.io.Closeable;
import java.io.IOException;
import java.io.Reader;
import java.util.ArrayList;
import java.util.List;
/**
* A very simple CSV reader released under a commercial-friendly license.
*
* @author Glen Smith
*
*/
public class CSVReader implements Closeable {
private final BufferedReader br;
private boolean hasNext = true;
private final CSVParser parser;
private final int skipLines;
private boolean linesSkipped;
/** The line number of the last physical line read (one-based). */
private int curline = 0;
/** The physical line number at which the last logical line read started (one-based). */
private int startLine = 0;
/**
* The default line to start reading.
*/
private static final int DEFAULT_SKIP_LINES = 0;
/**
* Constructs CSVReader using a comma for the separator.
*
* @param reader
* the reader to an underlying CSV source.
*/
public CSVReader(Reader reader) {
this(reader,
CSVParser.DEFAULT_SEPARATOR, CSVParser.DEFAULT_QUOTE_CHARACTER,
CSVParser.DEFAULT_ESCAPE_CHARACTER, DEFAULT_SKIP_LINES,
CSVParser.DEFAULT_STRICT_QUOTES);
}
/**
* Constructs CSVReader with supplied separator and quote char.
*
* @param reader
* the reader to an underlying CSV source.
* @param separator
* the delimiter to use for separating entries
* @param quotechar
* the character to use for quoted elements
* @param escape
* the character to use for escaping a separator or quote
* @param line
* the line number to skip for start reading
* @param strictQuotes
* sets if characters outside the quotes are ignored
*/
private CSVReader(Reader reader, char separator, char quotechar, char escape, int line, boolean strictQuotes) {
this.br = new BufferedReader(reader);
this.parser = new CSVParser(separator, quotechar, escape, strictQuotes);
this.skipLines = line;
}
/**
* Reads the entire file into a List with each element being a String[] of
* tokens.
*
* @return a List of String[], with each String[] representing a line of the
* file.
*
* @throws IOException
* if bad things happen during the read
*/
public List<String[]> readAll() throws IOException {
List<String[]> allElements = new ArrayList<String[]>();
while (hasNext) {
String[] nextLineAsTokens = readNext();
if (nextLineAsTokens != null)
allElements.add(nextLineAsTokens);
}
return allElements;
}
/**
* Reads the next line from the buffer and converts to a string array.
*
* @return a string array with each comma-separated element as a separate
* entry, or null if there are no more lines to read.
*
* @throws IOException
* if bad things happen during the read
*/
public String[] readNext() throws IOException {
boolean first = true;
String[] result = null;
do {
String nextLine = getNextLine();
if (first) {
startLine = curline;
first = false;
}
if (!hasNext) {
return result; // should throw if still pending?
}
String[] r = parser.parseLineMulti(nextLine);
if (r.length > 0) {
if (result == null) {
result = r;
} else {
String[] t = new String[result.length+r.length];
System.arraycopy(result, 0, t, 0, result.length);
System.arraycopy(r, 0, t, result.length, r.length);
result = t;
}
}
} while (parser.isPending());
return result;
}
/**
* Reads the next line from the file.
*
* @return the next line from the file without trailing newline
* @throws IOException
* if bad things happen during the read
*/
private String getNextLine() throws IOException {
if (!this.linesSkipped) {
for (int i = 0; i < skipLines; i++) {
br.readLine();
++curline;
}
this.linesSkipped = true;
}
String nextLine = br.readLine();
if (nextLine == null) {
hasNext = false;
} else {
++curline;
}
return hasNext ? nextLine : null;
}
/**
* Closes the underlying reader.
*
* @throws IOException if the close fails
*/
@Override
public void close() throws IOException{
br.close();
}
/**
* Return the physical line number (one-based) at which the last logical line read started,
* or zero if no line has been read yet.
*/
public int getStartLine() {
return startLine;
}
}

View File

@@ -0,0 +1,226 @@
package com.semmle.util.io.csv;
/**
Copyright 2005 Bytecode Pty Ltd.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
import java.io.Closeable;
import java.io.IOException;
import java.io.Writer;
import java.util.List;
/**
* A very simple CSV writer released under a commercial-friendly license.
*
* @author Glen Smith
*
*/
public class CSVWriter implements Closeable {
private static final int INITIAL_STRING_SIZE = 128;
private Writer rawWriter;
private char separator;
private char quotechar;
private char escapechar;
private String lineEnd;
/** The quote constant to use when you wish to suppress all quoting. */
public static final char NO_QUOTE_CHARACTER = '\u0000';
/** The escape constant to use when you wish to suppress all escaping. */
private static final char NO_ESCAPE_CHARACTER = '\u0000';
/** Default line terminator uses platform encoding. */
private static final String DEFAULT_LINE_END = "\n";
private boolean[] eagerQuotingFlags = {};
/**
* Constructs CSVWriter using a comma for the separator.
*
* @param writer
* the writer to an underlying CSV source.
*/
public CSVWriter(Writer writer) {
this(writer,
CSVParser.DEFAULT_SEPARATOR,
CSVParser.DEFAULT_QUOTE_CHARACTER,
CSVParser.DEFAULT_ESCAPE_CHARACTER
);
}
/**
* Constructs CSVWriter with supplied separator and quote char.
*
* @param writer
* the writer to an underlying CSV source.
* @param separator
* the delimiter to use for separating entries
* @param quotechar
* the character to use for quoted elements
* @param escapechar
* the character to use for escaping quotechars or escapechars
*/
public CSVWriter(Writer writer, char separator, char quotechar, char escapechar) {
this(writer, separator, quotechar, escapechar, DEFAULT_LINE_END);
}
/**
* Constructs CSVWriter with supplied separator, quote char, escape char and line ending.
*
* @param writer
* the writer to an underlying CSV source.
* @param separator
* the delimiter to use for separating entries
* @param quotechar
* the character to use for quoted elements
* @param escapechar
* the character to use for escaping quotechars or escapechars
* @param lineEnd
* the line feed terminator to use
*/
private CSVWriter(Writer writer, char separator, char quotechar, char escapechar, String lineEnd) {
this.rawWriter = writer;
this.separator = separator;
this.quotechar = quotechar;
this.escapechar = escapechar;
this.lineEnd = lineEnd;
}
/**
* Call with an array of booleans, corresponding to columns, where columns that have
* <code>false</code> will not be quoted unless they contain special characters.
* <p>
* If there are more columns to print than have been configured here, any additional
* columns will be treated as if <code>true</code> was passed.
*/
public void setEagerQuotingColumns(boolean... flags) {
eagerQuotingFlags = flags;
}
/**
* Writes the entire list to a CSV file. The list is assumed to be a
* String[]
*
* @param allLines
* a List of String[], with each String[] representing a line of
* the file.
*/
public void writeAll(List<String[]> allLines) throws IOException {
for (String[] line : allLines) {
writeNext(line);
}
}
/**
* Writes the next line to the file.
*
* @param nextLine
* a string array with each comma-separated element as a separate
* entry.
*/
public void writeNext(String... nextLine) throws IOException {
if (nextLine == null)
return;
StringBuilder sb = new StringBuilder(INITIAL_STRING_SIZE);
for (int i = 0; i < nextLine.length; i++) {
if (i != 0) {
sb.append(separator);
}
String nextElement = nextLine[i];
if (nextElement == null)
continue;
boolean hasSpecials = stringContainsSpecialCharacters(nextElement);
if (hasSpecials || i >= eagerQuotingFlags.length || eagerQuotingFlags[i]
|| stringContainsSomewhatSpecialCharacter(nextElement)) {
if (quotechar != NO_QUOTE_CHARACTER)
sb.append(quotechar);
sb.append(hasSpecials ? processLine(nextElement) : nextElement);
if (quotechar != NO_QUOTE_CHARACTER)
sb.append(quotechar);
} else {
sb.append(nextElement);
}
}
sb.append(lineEnd);
rawWriter.write(sb.toString());
}
/**
* Return true if there are characters that need to be escaped in addition to
* being quoted.
*/
private boolean stringContainsSpecialCharacters(String line) {
return line.indexOf(quotechar) != -1 || line.indexOf(escapechar) != -1;
}
/**
* Return true if there are characters that should not appear in a completely
* unquoted field.
*/
private boolean stringContainsSomewhatSpecialCharacter(String s) {
return s.indexOf('"') != -1 || s.indexOf('\'') != -1 || s.indexOf('\t') != -1 || s.indexOf(separator) != -1;
}
protected StringBuilder processLine(String nextElement)
{
StringBuilder sb = new StringBuilder(INITIAL_STRING_SIZE);
for (int j = 0; j < nextElement.length(); j++) {
char nextChar = nextElement.charAt(j);
if (escapechar != NO_ESCAPE_CHARACTER && nextChar == quotechar) {
sb.append(escapechar).append(nextChar);
} else if (escapechar != NO_ESCAPE_CHARACTER && nextChar == escapechar) {
sb.append(escapechar).append(nextChar);
} else {
sb.append(nextChar);
}
}
return sb;
}
/**
* Flush underlying stream to writer.
*
* @throws IOException if bad things happen
*/
public void flush() throws IOException {
rawWriter.flush();
}
/**
* Close the underlying stream writer flushing any buffered content.
*
* @throws IOException if bad things happen
*
*/
@Override
public void close() throws IOException {
rawWriter.close();
}
}

View File

@@ -0,0 +1,101 @@
package com.semmle.util.logging;
import java.io.InputStream;
import java.io.OutputStream;
import java.io.PrintStream;
import java.util.Stack;
import com.semmle.util.exception.CatastrophicError;
/**
* A class to wrap around accesses to {@link System#out} and
* {@link System#err}, so that tools can behave consistently when
* run in-process or out-of-process.
*/
public class Streams {
private static final InheritableThreadLocal<PrintStream> out =
new InheritableThreadLocal<PrintStream>() {
@Override
protected PrintStream initialValue() {
return System.out;
}
};
private static final InheritableThreadLocal<PrintStream> err =
new InheritableThreadLocal<PrintStream>() {
@Override
protected PrintStream initialValue() {
return System.err;
}
};
private static final InheritableThreadLocal<InputStream> in =
new InheritableThreadLocal<InputStream>() {
@Override
protected InputStream initialValue() {
return System.in;
}
};
private static class SavedContext {
public PrintStream out, err;
public InputStream in;
}
private static final ThreadLocal<Stack<SavedContext>> contexts =
new ThreadLocal<Stack<SavedContext>>() {
@Override
protected Stack<SavedContext> initialValue() {
return new Stack<SavedContext>();
}
};
public static PrintStream out() {
return out.get();
}
public static PrintStream err() {
return err.get();
}
public static InputStream in() {
return in.get();
}
public static void pushContext(OutputStream stdout, OutputStream stderr, InputStream stdin) {
SavedContext context = new SavedContext();
context.out = out.get();
context.err = err.get();
context.in = in.get();
// When we run in-process, we don't benefit from
// a clean slate like we do when starting a new
// process. We need to reset anything that we care
// about manually.
// In particular, the parent VM may well have set
// showAllLogs=True, and we don't want the extra
// noise when executing the child, so we set a
// fresh log state for the duration of the child.
contexts.get().push(context);
out.set(asPrintStream(stdout));
err.set(asPrintStream(stderr));
in.set(stdin);
}
private static PrintStream asPrintStream(OutputStream stdout) {
return stdout instanceof PrintStream ?
(PrintStream)stdout : new PrintStream(stdout);
}
public static void popContext() {
Stack<SavedContext> context = contexts.get();
out.get().flush();
err.get().flush();
if (context.isEmpty())
throw new CatastrophicError("Popping logging context without preceding push.");
SavedContext old = context.pop();
out.set(old.out);
err.set(old.err);
in.set(old.in);
}
}

View File

@@ -0,0 +1,398 @@
package com.semmle.util.process;
import java.io.File;
import java.io.IOException;
import java.io.InputStream;
import java.io.OutputStream;
import java.util.ArrayList;
import java.util.Iterator;
import java.util.LinkedHashMap;
import java.util.List;
import java.util.Map;
import java.util.Map.Entry;
import java.util.Timer;
import java.util.TimerTask;
import com.github.codeql.Logger;
import com.github.codeql.Severity;
import com.semmle.util.data.StringUtil;
import com.semmle.util.exception.CatastrophicError;
import com.semmle.util.exception.Exceptions;
import com.semmle.util.exception.InterruptedError;
import com.semmle.util.exception.ResourceError;
import com.semmle.util.files.FileUtil;
import com.semmle.util.io.RawStreamMuncher;
/**
* A builder for an external process. This class wraps {@link ProcessBuilder},
* adding support for spawning threads to manage the input and output streams of
* the created process.
*/
public abstract class AbstractProcessBuilder {
public static Logger logger = null;
// timeout for the muncher threads in seconds
protected static final long MUNCH_TIMEOUT = 20;
private final ProcessBuilder builder;
private boolean logFailure = true;
private InputStream in;
private LeakPrevention leakPrevention;
private volatile boolean interrupted = false;
private volatile Thread threadToInterrupt = null;
private volatile boolean hitTimeout = false;
private final Map<String, String> canonicalEnvVarNames = new LinkedHashMap<>();
private RawStreamMuncher inMuncher;
public AbstractProcessBuilder (List<String> args, File cwd, Map<String, String> env)
{
// Sanity checks
CatastrophicError.throwIfNull(args);
for (int i = 0; i < args.size(); ++i)
CatastrophicError.throwIfNull(args.get(i));
leakPrevention = LeakPrevention.NONE;
builder = new ProcessBuilder(new ArrayList<>(args));
if (cwd != null) {
builder.directory(cwd);
}
// Make sure that values that have been explicitly removed from Env.systemEnv()
// -- such as the variables representing command-line arguments --
// are not taken over by the new ProcessBuilder.
Map<String, String> keepThese = Env.systemEnv().getenv();
for (Iterator<String> it = builder.environment().keySet().iterator(); it.hasNext();) {
String name = it.next();
if (!keepThese.containsKey(name))
it.remove();
}
if (env != null) {
addEnvironment(env);
}
}
public void setLeakPrevention(LeakPrevention leakPrevention) {
CatastrophicError.throwIfNull(leakPrevention);
this.leakPrevention = leakPrevention;
}
/**
* See {@link ProcessBuilder#redirectErrorStream(boolean)}.
*/
public void setRedirectErrorStream(boolean redirectErrorStream) {
this.builder.redirectErrorStream(redirectErrorStream);
}
public final boolean hasEnvVar(String name) {
return builder.environment().containsKey(getCanonicalVarName(name));
}
/**
* Add the specified key/value pair to the environment of the builder,
* overriding any previous environment entry of that name. This method
* provides additional logic to handle systems where environment
* variable names are case-insensitive, ensuring the last-added value
* for a name ends up in the final environment regardless of case.
* @param name The name of the environment variable. Whether case matters
* is OS-dependent.
* @param value The value for the environment variable.
*/
public final void addEnvVar(String name, String value) {
builder.environment().put(getCanonicalVarName(name), value);
}
/**
* Prepend a specified set of arguments to this process builder's command line.
* This only makes sense before the builder is started.
*/
public void prependArgs(List<String> args) {
builder.command().addAll(0, args);
}
/**
* Compute a canonical environment variable name relative to this process
* builder.
*
* The need for this method arises on platforms where the environment is
* case-insensitive -- any inspection of it in such a situation needs to
* canonicalise the variable name to have well-defined behaviour. This is
* builder-specific, because it depends on its existing environment. For
* example, if it already defines a variable called <code>Path</code>, and the
* environment is case-insensitive, then setting a variable called
* <code>PATH</code> should overwrite this, and checking whether a variable
* called <code>PATH</code> is already defined should return <code>true</code>.
*/
public String getCanonicalVarName(String name) {
if (!Env.getOS().isEnvironmentCaseSensitive()) {
// We need to canonicalise the variable name to work around Java API limitations.
if (canonicalEnvVarNames.isEmpty())
for (String var : builder.environment().keySet())
canonicalEnvVarNames.put(StringUtil.lc(var), var);
String canonical = canonicalEnvVarNames.get(StringUtil.lc(name));
if (canonical == null)
canonicalEnvVarNames.put(StringUtil.lc(name), name);
else
name = canonical;
}
return name;
}
/**
* Get a snapshot of this builder's environment, using canonical variable names
* (as per {@link #getCanonicalVarName(String)}) as keys. Modifications to this
* map do not propagate back to the builder; use
* {@link #addEnvVar(String, String)} or {@link #addEnvironment(Map)} to extend
* its environment.
*/
public Map<String, String> getCanonicalCurrentEnv() {
Map<String, String> result = new LinkedHashMap<>();
for (Entry<String, String> e : builder.environment().entrySet())
result.put(getCanonicalVarName(e.getKey()), e.getValue());
return result;
}
/**
* Specify an input stream of data that will be piped to the process's
* standard input.
*
* CAUTION: if this stream is the current process' standard in and no
* input is ever received, then we will leak an uninterruptible thread
* waiting for some input. This will terminate only when the standard in
* is closed, i.e. when the current process terminates.
*/
public final void setIn(InputStream in) {
this.in = in;
}
/**
* Set the environment of this builder to the given map. Any
* existing environment entries (either from the current process
* environment or from previous calls to {@link #addEnvVar(String, String)},
* {@link #addEnvironment(Map)} or {@link #setEnvironment(Map)})
* are discarded.
* @param env The environment to use.
*/
public final void setEnvironment(Map<String, String> env) {
builder.environment().clear();
canonicalEnvVarNames.clear();
addEnvironment(env);
}
/**
* Add the specified set of environment variables to the environment for
* the builder. This leaves existing variable definitions in place, but
* can override them.
* @param env The environment to merge into the current environment.
*/
public final void addEnvironment(Map<String, String> env) {
for (Entry<String, String> entry : env.entrySet())
addEnvVar(entry.getKey(), entry.getValue());
}
public final int execute() {
return execute(0);
}
/**
* Set the flag indicating that a non-zero exit code may be expected. This
* will suppress the log of failed commands.
*/
public final void expectFailure() {
logFailure = false;
}
public final int execute(long timeout) {
Process process = null;
boolean processStopped = true;
Timer timer = null;
try {
synchronized (this) {
// Handle the case where we called kill() too early to use
// Thread.interrupt()
if (interrupted)
throw new InterruptedException();
threadToInterrupt = Thread.currentThread();
}
processStopped = false;
String directory;
if (builder.directory() == null) {
directory = "current directory ('" + System.getProperty("user.dir") + "')";
} else {
directory = "'" + builder.directory().toString() + "'";
}
logger.debug("Running command: '" + toString() + "' in " + directory);
process = builder.start();
setupInputHandling(process.getOutputStream());
setupOutputHandling(process.getInputStream(),
process.getErrorStream());
if (timeout != 0) {
// create the timer's thread as a "daemon" thread, so it does not
// prevent the jvm from terminating
timer = new Timer(true);
final Thread current = Thread.currentThread();
timer.schedule(new TimerTask() {
@Override
public void run() {
hitTimeout = true;
current.interrupt();
}
}, timeout);
}
int result = process.waitFor();
processStopped = true;
if (result != 0 && logFailure)
logger.error("Spawned process exited abnormally (code " + result
+ "; tried to run: " + getBuilderCommand() + ")");
return result;
} catch (IOException e) {
throw new ResourceError(
"IOException while executing process with args: "
+ getBuilderCommand(), e);
} catch (InterruptedException e) {
throw new InterruptedError(
"InterruptedException while executing process with args: "
+ getBuilderCommand(), e);
} finally {
// cancel the timer
if (timer != null) {
timer.cancel();
}
// clear the interrupted flag of the current thread
// in case it was set earlier (ie by the Timer or a call to kill())
synchronized (this) {
threadToInterrupt = null;
Thread.interrupted();
}
// get rid of the process, in case it is still running.
if (process != null && !processStopped) {
killProcess(process);
}
try {
cleanupInputHandling();
cleanupOutputHandling();
} finally {
if (process != null) {
FileUtil.close(process.getErrorStream());
FileUtil.close(process.getInputStream());
FileUtil.close(process.getOutputStream());
}
}
}
}
/**
* Provides the implementation of actually stopping the child
* process. Provided as an extension point so that this can
* be customised for later Java versions or for other reasons.
*/
protected void killProcess(Process process) {
process.destroy();
}
/**
* Setup handling of the process input stream (stdin).
*
* @param outputStream OutputStream connected to the process's standard input.
*/
protected void setupInputHandling(OutputStream outputStream) {
if (in == null) {
FileUtil.close(outputStream);
return;
}
inMuncher = new RawStreamMuncher(in, outputStream);
inMuncher.start();
}
/**
* Setup handling of the process' output streams (stdout and stderr).
*
* @param stdout
* InputStream connected to the process' standard output stream.
* @param stderr
* InputStream connected to the process' standard error stream.
*/
protected abstract void setupOutputHandling(InputStream stdout, InputStream stderr);
/**
* Cleanup resources related to output handling. The method is always called, either after the process
* has exited normally, or after an abnormal termination due to an exception. As a result cleanupOutputHandling()
* might be called, without a previous call to setupOutputHandling. The implementation of this method should
* handle this case.
*/
protected abstract void cleanupOutputHandling();
private void cleanupInputHandling() {
if (inMuncher != null && inMuncher.isAlive()) {
// There's no real need to wait for the muncher to terminate -- on the contrary,
// if it's still alive it will typically be waiting for a closing action that
// will only happen after execute() returns anyway.
// The best we can do is try to interrupt it.
inMuncher.interrupt();
}
}
protected void waitForMuncher(String which, Thread muncher, long timeout) {
// wait for termination of the muncher until a deadline is reached
try {
muncher.join(timeout);
} catch (InterruptedException e) {
Exceptions.ignore(e,"Further interruption attempts are ineffective --"
+ " we're already waiting for termination.");
}
// if muncher is still alive, report an error
if(muncher.isAlive()){
muncher.interrupt();
logger.error(String.format("Standard %s stream hasn't closed %s seconds after termination of subprocess '%s'.", which, MUNCH_TIMEOUT, this));
}
}
public final void kill() {
synchronized (this) {
interrupted = true;
if (threadToInterrupt != null)
threadToInterrupt.interrupt();
}
}
public boolean processTimedOut() {
return hitTimeout;
}
@Override
public String toString() {
return commandLineToString(getBuilderCommand());
}
private List<String> getBuilderCommand() {
return leakPrevention.cleanUpArguments(builder.command());
}
private static String commandLineToString(List<String> commandLine) {
StringBuilder sb = new StringBuilder();
boolean first = true;
for (String s : commandLine) {
boolean tricky = s.isEmpty() || s.contains(" ") ;
if (!first)
sb.append(" ");
first = false;
if (tricky)
sb.append("\"");
sb.append(s.replace("\"", "\\\""));
if (tricky)
sb.append("\"");
}
return sb.toString();
}
}

View File

@@ -0,0 +1,81 @@
package com.semmle.util.process;
import java.io.File;
import java.io.InputStream;
import java.io.OutputStream;
import java.util.Arrays;
import java.util.List;
import java.util.Map;
import com.semmle.util.io.StreamMuncher;
import com.semmle.util.logging.Streams;
public class Builder extends AbstractProcessBuilder {
private final OutputStream err;
private final OutputStream out;
protected StreamMuncher errMuncher;
protected StreamMuncher outMuncher;
public Builder(OutputStream out, OutputStream err, File cwd, String... args) {
this(out, err, cwd, null, args);
}
public Builder(OutputStream out, OutputStream err, File cwd,
Map<String, String> env, String... args) {
this(Arrays.asList(args), out, err, env, cwd);
}
public Builder(List<String> args, OutputStream out, OutputStream err) {
this(args, out, err, null, null);
}
public Builder(List<String> args, OutputStream out, OutputStream err,
File cwd) {
this(args, out, err, null, cwd);
}
public Builder(List<String> args, OutputStream out, OutputStream err,
Map<String, String> env) {
this(args, out, err, env, null);
}
public Builder(List<String> args, OutputStream out, OutputStream err,
Map<String, String> env, File cwd) {
super(args, cwd, env);
this.out = out;
this.err = err;
}
/**
* Convenience method that executes the given command line in the current
* working directory with the current environment, blocking until
* completion. The process's output stream is redirected to System.out, and
* its error stream to System.err. It returns the exit code of the command.
*/
public static int run(List<String> commandLine) {
return new Builder(commandLine, Streams.out(), Streams.err()).execute();
}
@Override
protected void cleanupOutputHandling() {
// wait for munchers to finish munching.
long deadline = 1000*MUNCH_TIMEOUT;
// note: check that munchers are not null, in case setupOutputHandling was
// not called to initialize them
if(outMuncher != null) {
waitForMuncher("output", outMuncher,deadline);
}
if(errMuncher != null) {
waitForMuncher("error", errMuncher,deadline);
}
}
@Override
protected void setupOutputHandling(InputStream stdout, InputStream stderr) {
errMuncher = new StreamMuncher(stderr, err);
errMuncher.start();
outMuncher = new StreamMuncher(stdout, out);
outMuncher.start();
}
}

View File

@@ -0,0 +1,725 @@
package com.semmle.util.process;
import java.io.Serializable;
import java.util.Collection;
import java.util.Collections;
import java.util.Comparator;
import java.util.LinkedHashMap;
import java.util.Locale;
import java.util.Map;
import java.util.Stack;
import java.util.TreeMap;
import com.semmle.util.exception.Exceptions;
import com.semmle.util.expansion.ExpansionEnvironment;
/**
* Helper methods for finding out environment properties like the OS type.
*/
public class Env {
/**
* Enum for commonly used environment variables.
*
* <p>
* The intention is that the name of the enum constant is the same as the environment
* variable itself. This means that the <code>toString</code> method does the right thing,
* as does calling {@link Enum#name() }.
* </p>
*
* <p>
* Should you wish to rename an environment variable (which you're unlikely to, due to the
* fact that there are many non-Java consumers), you can do a rename refactoring to make the
* Java consumers do the right thing.
* </p>
*/
public enum Var {
/*
* Core toolchain variables
*/
/**
* The location of the toolchain.
*
* Deprecated in favour of {@link Var#SEMMLE_DIST}, {@link Var#SEMMLE_HOME}, and
* {@link Var#SEMMLE_DATA}.
*/
@Deprecated
ODASA_HOME,
/**
* The location of the user's configuration files, including project configurations,
* dashboard configurations, team insight configurations, licenses etc.
*/
SEMMLE_HOME,
/**
* The location of the user's data, including snapshots, built dashboards, team
* insight data, etc.
*/
SEMMLE_DATA,
/**
* The location of any caches used by the toolchain, including compilation caches, trap caches, etc.
*/
SEMMLE_CACHE,
/**
* The location of the toolchain files, including the odasa jar, our queries etc.
*/
SEMMLE_DIST,
/**
* If running from a git tree, the root of the tree.
*/
SEMMLE_GIT_ROOT,
/**
* The root from which relative paths in a DOOD file are resolved.
*/
SEMMLE_QUERY_ROOT,
/**
* The directory where lock files are kept.
*/
SEMMLE_LOCK_DIR,
/**
* The directory which will be checked for licenses.
*/
SEMMLE_LICENSE_DIR,
/**
* The location where our queries are kept.
*/
ODASA_QUERIES,
/**
* The location of the 'tools' directory
*/
ODASA_TOOLS,
/**
* Whether we are running in 'prototyping mode'.
*/
ODASA_PROTOTYPE_MODE,
/**
* The location of the default compilation cache, as a space-separated list of URIs.
*
* Multiple entries are tried in sequence.
*/
SEMMLE_COMPILATION_CACHE,
/**
* Override the versions used in compilation caching.
*
* This is useful for testing without modifying the version manually.
*/
SEMMLE_OVERRIDE_OPTIMISER_VERSION,
/**
* If set, do not use compilation caching.
*/
SEMMLE_NO_COMPILATION_CACHING,
/**
* If set, use this as the size of compilation caches, in bytes. If set to 'INFINITY', no
* limit will be placed on the size.
*/
SEMMLE_COMPILATION_CACHE_SIZE,
/*
* Other toolchain variables
*/
SEMMLE_JAVA_HOME,
ODASA_JAVA_HOME,
ODASA_TRACER_CONFIGURATION,
/**
* The Java tracer agent to propagate to JVM processes.
*/
SEMMLE_JAVA_TOOL_OPTIONS,
/**
* Whether to run jar-based subprocesses in-process instead.
*/
ODASA_IN_PROCESS,
/**
* The executable to use for importing trap files.
*/
SEMMLE_TRAP_IMPORTER,
SEMMLE_PRESERVE_SYMLINKS,
SEMMLE_PATH_TRANSFORMER,
/*
* Environment variables for password for credential stores.
* Either is accepted to allow a single entry point in the code
* while documenting as appropriate for the audience.
*/
SEMMLE_CREDENTIALS_PASSWORD,
LGTM_CREDENTIALS_PASSWORD,
/*
*
* Internal config variables
*/
/**
* Extra arguments to pass to JVMs launched by Semmle tools.
*/
SEMMLE_JAVA_ARGS,
/**
* A list of log levels to set, of the form:
* "foo.bar=TRACE,bar.baz=DEBUG"
*/
SEMMLE_LOG_LEVELS,
/**
* The default heap size for commands that accept a ram parameter.
*/
SEMMLE_DEFAULT_HEAP_SIZE,
SEMMLE_MAX_RAM_MB,
/**
* Whether to disable asynchronous logging in the query server (otherwise it may drop messages).
*/
SEMMLE_SYNCHRONOUS_LOGGING,
/**
* Whether or not to use memory mapping
*/
SEMMLE_MEMORY_MAPPING,
SEMMLE_METRICS_DIR,
/**
* Whether we are running in our own unit tests.
*/
SEMMLE_UNIT_TEST_MODE,
/**
* Whether to include the source QL in a QLO.
*/
SEMMLE_DEBUG_QL_IN_QLO,
/**
* Whether to enable extra assertions
*/
ODASA_ASSERTIONS,
/**
* A file containing extra variables for ExpansionEnvironments.
*/
ODASA_EXTRA_VARIABLES,
ODASA_TUNE_GC,
/**
* Whether to run PI in hosted mode.
*/
SEMMLE_ODASA_DEBUG,
/**
* The python executable to use for Qltest.
*/
SEMMLE_PYTHON,
/**
* The platform we are running on; one of "linux", "osx" and "win".
*/
SEMMLE_PLATFORM,
/**
* Location of platform specific tools, currently only used in universal LGTM distributions
*/
SEMMLE_PLATFORM_TOOLS,
/**
* PATH to use to look up tooling required by macOS Relocator scripts.
*/
CODEQL_TOOL_PATH,
/**
* This can override the heuristics for BDD factory resetting. Most useful for measurements
* and debugging.
*/
CODEQL_BDD_RESET_FRACTION,
/**
* How many TRAPLinker errors to report.
*/
SEMMLE_MAX_TRAP_ERRORS,
/**
* How many tuples to accumulate in memory before pushing to disk.
*/
SEMMLE_MAX_TRAP_INMEMORY_TUPLES,
/**
* How many files to merge at each merge step.
*/
SEMMLE_MAX_TRAP_MERGE,
/*
* Variables used by extractors.
*/
/**
* Whether the C++ extractor should copy executables before
* running them (works around System Integrity Protection
* on OS X 10.11+).
*/
SEMMLE_COPY_EXECUTABLES,
/**
* When SEMMLE_COPY_EXECUTABLES is in operation, where to
* create the directory to copy the executables to.
*/
SEMMLE_COPY_EXECUTABLES_SUPER_ROOT,
/**
* When SEMMLE_COPY_EXECUTABLES is in operation, the
* directory we are copying executables to.
*/
SEMMLE_COPY_EXECUTABLES_ROOT,
/**
* The executable which should be used as an implicit runner on Windows.
*/
SEMMLE_WINDOWS_RUNNER_BINARY,
/**
* Verbosity level for the Java interceptor.
*/
SEMMLE_INTERCEPT_VERBOSITY,
/**
* Verbosity level for the Java extractor.
*/
ODASA_JAVAC_VERBOSE,
/**
* Whether to use class origin tracking for the Java extractor.
*/
ODASA_JAVA_CLASS_ORIGIN_TRACKING,
ODASA_JAVAC_CORRECT_EXCEPTIONS,
ODASA_JAVAC_EXTRA_CLASSPATH,
ODASA_NO_ECLIPSE_BUILD,
/*
* Variables set during snapshot builds
*/
/**
* The location of the project being built.
*/
ODASA_PROJECT,
/**
* The location of the snapshot being built.
*/
ODASA_SNAPSHOT,
ODASA_SNAPSHOT_NAME,
ODASA_SRC,
ODASA_DB,
ODASA_BUILD_ERROR_DIR,
TRAP_FOLDER,
SOURCE_ARCHIVE,
ODASA_OUTPUT,
ODASA_SUBPROJECT_THREADS,
/*
* Layout variables
*/
ODASA_JAVA_LAYOUT,
ODASA_CPP_LAYOUT,
ODASA_CSHARP_LAYOUT,
ODASA_PYTHON_LAYOUT,
ODASA_JAVASCRIPT_LAYOUT,
/*
* External variables
*/
JAVA_HOME,
PATH,
LINUX_VARIANT,
/*
* If set, use this proxy for HTTP requests
*/
HTTP_PROXY,
http_proxy,
/*
* If set, use this proxy for HTTPS requests
*/
HTTPS_PROXY,
https_proxy,
/*
* If set, ignore the variables above and do not use any proxies for requests
*/
NO_PROXY,
no_proxy,
/*
* Variables set by the codeql-action. All variables will
* be unset if the CLI is not in the context of the
* codeql-action.
*/
/**
* Either {@code actions} or {@code runner}.
*/
CODEQL_ACTION_RUN_MODE,
/**
* Semantic version of the codeql-action.
*/
CODEQL_ACTION_VERSION,
/*
* tracer variables
*/
/**
* Colon-separated list of enabled tracing languages
*/
CODEQL_TRACER_LANGUAGES,
/**
* Path to the build-tracer log file
*/
CODEQL_TRACER_LOG,
/**
* Prefix to a language-specific root directory
*/
CODEQL_TRACER_ROOT_,
;
}
private static final int DEFAULT_RAM_MB_32 = 1024;
private static final int DEFAULT_RAM_MB = 4096;
private static final Env instance = new Env();
private final Stack<Map<String, String>> envVarContexts;
public static synchronized Env systemEnv() {
return instance;
}
/**
* Create an instance of Env containing no variables. Intended for use in
* testing to isolate the test from the local machine environment.
*/
public static Env emptyEnv() {
Env env = new Env();
env.envVarContexts.clear();
env.envVarContexts.push(Collections.unmodifiableMap(makeContext()));
return env;
}
private static Map<String, String> makeContext() {
if (getOS().equals(OS.WINDOWS)) {
// We want to compare in the same way Windows does, which means
// upper-casing. For example, '_' needs to come after 'Z', but
// would come before 'z'.
return new TreeMap<>((a, b) -> a.toUpperCase(Locale.ENGLISH).compareTo(b.toUpperCase(Locale.ENGLISH)));
} else {
return new LinkedHashMap<>();
}
}
public Env() {
envVarContexts = new Stack<>();
Map<String, String> env = makeContext();
try {
env.putAll(System.getenv());
} catch (SecurityException ex) {
Exceptions.ignore(ex, "Treat an inaccessible environment variable as not existing");
}
envVarContexts.push(Collections.unmodifiableMap(env));
}
public synchronized void unsetAll(Collection<String> names) {
if (!names.isEmpty()) {
Map<String, String> map = envVarContexts.pop();
map = new LinkedHashMap<>(map);
for (String name : names)
map.remove(name);
envVarContexts.push(Collections.unmodifiableMap(map));
}
}
public synchronized Map<String, String> getenv() {
return envVarContexts.peek();
}
/**
* Get the value of an environment variable, or <code>null</code> if
* the environment variable is not set. WARNING: not all systems may
* make a difference between an empty variable or <code>null</code>,
* so don't rely on that behavior.
*/
public synchronized String get(Var var) {
return get(var.name());
}
/**
* Get the value of an environment variable, or <code>null</code> if
* the environment variable is not set. WARNING: not all systems may
* make a difference between an empty variable or <code>null</code>,
* so don't rely on that behavior.
*/
public synchronized String get(String envVarName) {
return getenv().get(envVarName);
}
/**
* Get the non-empty value of an environment variable, or <code>null</code>
* if the environment variable is not set or set to an empty value.
*/
public synchronized String getNonEmpty(Var var) {
return getNonEmpty(var.name());
}
/**
* Get the value of an environment variable, or the empty string if it is not
* set.
*/
public synchronized String getPossiblyEmpty(String envVarName) {
String got = getenv().get(envVarName);
return got != null ? got : "";
}
/**
* Get the non-empty value of an environment variable, or <code>null</code>
* if the environment variable is not set or set to an empty value.
*/
public synchronized String getNonEmpty(String envVarName) {
String s = get(envVarName);
return s == null || s.isEmpty() ? null : s;
}
/**
* Gets the value of the first environment variable among <code>envVarNames</code>
* whose value is non-empty, or <code>null</code> if all variables have empty values.
*/
public synchronized String getFirstNonEmpty(String... envVarNames) {
for (String envVarName : envVarNames) {
String s = getNonEmpty(envVarName);
if (s != null)
return s;
}
return null;
}
/**
* Gets the value of the first environment variable among <code>envVars</code>
* whose value is non-empty, or <code>null</code> if all variables have empty values.
*/
public synchronized String getFirstNonEmpty(Var... envVars) {
String[] envVarNames = new String[envVars.length];
for (int i = 0; i < envVars.length; ++i)
envVarNames[i] = envVars[i].name();
return getFirstNonEmpty(envVarNames);
}
/**
* Read a boolean from the given environment variable. If the variable
* is not set, then return <code>false</code>. Otherwise, interpret the
* environment variable using {@link Boolean#parseBoolean(String)}.
*/
public boolean getBoolean(Var var) {
return getBoolean(var.name());
}
/**
* Read a boolean from the given environment variable name. If the variable
* is not set, then return <code>false</code>. Otherwise, interpret the
* environment variable using {@link Boolean#parseBoolean(String)}.
*/
public boolean getBoolean(String envVarName) {
return getBoolean(envVarName, false);
}
/**
* Read a boolean from the given environment variable. If the variable
* is not set, then return <code>def</code>. Otherwise, interpret the
* environment variable using {@link Boolean#parseBoolean(String)}.
*/
public boolean getBoolean(Var var, boolean def) {
return getBoolean(var.name(), def);
}
/**
* Read a boolean from the given environment variable name. If the variable
* is not set, then return <code>def</code>. Otherwise, interpret the
* environment variable using {@link Boolean#parseBoolean(String)}.
*/
public boolean getBoolean(String envVarName, boolean def) {
String v = get(envVarName);
return v == null ? def : Boolean.parseBoolean(v);
}
/**
* Read an integer setting from the given environment variable name. If the
* variable is not set, or fails to parse, return the supplied default value.
*/
public int getInt(Var var, int defaultValue) {
return getInt(var.name(), defaultValue);
}
/**
* Read an integer setting from the given environment variable name. If the
* variable is not set, or fails to parse, return the supplied default value.
*/
public int getInt(String envVarName, int defaultValue) {
String value = get(envVarName);
if (value == null)
return defaultValue;
try {
return Integer.parseInt(value);
} catch (NumberFormatException e) {
Exceptions.ignore(e, "We'll just use the default value.");
return defaultValue;
}
}
/**
* Enter a new context for environment variables, with the given
* new variable values. The values will override the current environment
* values if they define the same variables.
*/
public synchronized void pushEnvironmentContext(Map<String, String> addedValues) {
Map<String, String> newValues = makeContext();
newValues.putAll(envVarContexts.peek());
newValues.putAll(addedValues);
envVarContexts.push(Collections.unmodifiableMap(newValues));
}
/**
* Leave a context for environment variables that was created with
* <code>pushEnvironmentContext</code>
*/
public synchronized void popEnvironmentContext() {
envVarContexts.pop();
}
/**
* Add all the custom environment variables to a process builder, so that
* they are passed on to the child process.
*/
public synchronized void addEnvironmentToNewProcess(ProcessBuilder builder) {
if (envVarContexts.size() > 1)
builder.environment().putAll(envVarContexts.peek());
}
public synchronized void addEnvironmentToNewEnv(ExpansionEnvironment env) {
if (envVarContexts.size() > 1)
env.defineVars(envVarContexts.peek());
}
/**
* Get a string representing the OS type. This
* is not guaranteed to have any particular form, and
* is for displaying to users. Might return <code>null</code> if
* the property is not defined by the JVM.
*/
public static String getOSName() {
return System.getProperty("os.name");
}
/**
* Determine which OS is currently being run (somewhat best-effort).
* Does not determine whether a program is being run under Cygwin
* or not - Windows will be the OS even under Cygwin.
*/
public static OS getOS() {
String name = getOSName();
if (name == null)
return OS.UNKNOWN;
if (name.contains("Windows"))
return OS.WINDOWS;
else if (name.contains("Mac OS X"))
return OS.MACOS;
else if (name.contains("Linux"))
return OS.LINUX;
else
// Guess that we are probably some Unix flavour
return OS.UNKNOWN_UNIX;
}
/**
* Kinds of operating systems. A notable absence is Cygwin: this just
* gets reported as Windows.
*/
public static enum OS {
WINDOWS(false, false), LINUX(true, true), MACOS(false, true), UNKNOWN_UNIX(true, true), UNKNOWN(true, true),;
private final boolean fileSystemCaseSensitive;
private final boolean envVarsCaseSensitive;
private OS(boolean fileSystemCaseSensitive, boolean envVarsCaseSensitive) {
this.fileSystemCaseSensitive = fileSystemCaseSensitive;
this.envVarsCaseSensitive = envVarsCaseSensitive;
}
/**
* Get an OS value from the short display name. Acceptable
* inputs (case insensitive) are: Windows, Linux, MacOS or
* Mac OS.
*
* @throws IllegalArgumentException if the given name does not
* correspond to an OS
*/
public static OS fromDisplayName(String name) {
if (name != null) {
name = name.toUpperCase();
if ("WINDOWS".equals(name))
return WINDOWS;
if ("LINUX".equals(name))
return LINUX;
if ("MACOS".equals(name.replace(" ", "")))
return MACOS;
}
throw new IllegalArgumentException("No OS type found with name " + name);
}
public boolean isFileSystemCaseSensitive() {
return fileSystemCaseSensitive;
}
public boolean isEnvironmentCaseSensitive() {
return envVarsCaseSensitive;
}
/** The short name of this operating system, in the style of {@link Var#SEMMLE_PLATFORM}. */
public String getShortName() {
switch (this) {
case WINDOWS:
return "win";
case LINUX:
return "linux";
case MACOS:
return "osx";
default:
return "unknown";
}
}
}
public static enum Architecture {
X86(true, false), X64(false, true), UNDETERMINED(false, false);
private final boolean is32Bit;
private final boolean is64Bit;
private Architecture(boolean is32Bit, boolean is64Bit) {
this.is32Bit = is32Bit;
this.is64Bit = is64Bit;
}
/** Is this definitely a 32-bit architecture? */
public boolean is32Bit() {
return is32Bit;
}
/** Is this definitely a 64-bit architecture? */
public boolean is64Bit() {
return is64Bit;
}
}
/**
* Try to detect whether the JVM is 32-bit or 64-bit. Since there is no documented,
* portable way to do this it is best effort.
*/
public Architecture tryDetermineJvmArchitecture() {
String value = System.getProperty("sun.arch.data.model");
if ("32".equals(value))
return Architecture.X86;
else if ("64".equals(value))
return Architecture.X64;
// Look at the max heap value - if >= 4G we *must* be in 64-bit
long maxHeap = Runtime.getRuntime().maxMemory();
if (maxHeap < Long.MAX_VALUE && maxHeap >= 4096L << 20)
return Architecture.X64;
// Try to get the OS arch - it *appears* to give JVM bitness
String osArch = System.getProperty("os.arch");
if ("x86".equals(osArch) || "i386".equals(osArch))
return Architecture.X86;
else if ("x86_64".equals(osArch) || "amd64".equals(osArch))
return Architecture.X64;
return Architecture.UNDETERMINED;
}
/**
* Get the default amount of ram to use for new JVMs, depending on the
* current architecture. If it looks like we're running on a 32-bit
* machine, the result is sufficiently small to be representable.
*/
public int defaultRamMb() {
return getInt(
Var.SEMMLE_DEFAULT_HEAP_SIZE,
tryDetermineJvmArchitecture().is32Bit() ? DEFAULT_RAM_MB_32 : DEFAULT_RAM_MB);
}
}

View File

@@ -0,0 +1,95 @@
package com.semmle.util.process;
import java.util.ArrayList;
import java.util.BitSet;
import java.util.Collections;
import java.util.List;
public abstract class LeakPrevention {
public abstract List<String> cleanUpArguments(List<String> args);
/**
* What to put in place of any suppressed arguments.
*/
static final String REPLACEMENT_STRING = "*****";
/**
* Hides all arguments. Will only show the command name.
* e.g. "foo bar baz" is changed to "foo"
*/
public static final LeakPrevention ALL = new LeakPrevention() {
@Override
public List<String> cleanUpArguments(List<String> args) {
return args.isEmpty() ? args : Collections.singletonList(args.get(0));
}
};
/**
* Does not hide any arguments.
*/
public static final LeakPrevention NONE = new LeakPrevention() {
@Override
public List<String> cleanUpArguments(List<String> args) {
return args;
}
};
/**
* Hides the arguments at the given indexes.
*/
public static LeakPrevention suppressedArguments(int... args) {
if (args.length == 0)
return NONE;
final BitSet suppressed = new BitSet();
for (int index : args) {
suppressed.set(index);
}
return new LeakPrevention() {
@Override
public List<String> cleanUpArguments(List<String> args) {
List<String> result = new ArrayList<>(args.size());
int index = 0;
for (String arg : args) {
if (suppressed.get(index))
result.add(REPLACEMENT_STRING);
else
result.add(arg);
index++;
}
return result;
}
};
}
/**
* Hides the given string from any arguments that it appears in.
* The substring will be replaced while leaving the rest of the
* argument unmodified.
* <p>
* There are some potential pitfalls to be aware of when using this
* method.
* <ul>
* <li>This only suppresses exact textual matches. If the argument that
* appears is only derived from the secret instead of being an exact
* copy then it will not be suppressed.
* <li>If the secret value appears elsewhere in a known string, then it
* could leak the contents of the secret because the viewer knows what
* should have been there in the known case.
* </ul>
*/
public static LeakPrevention suppressSubstring(final String substringToSuppress) {
return new LeakPrevention() {
@Override
public List<String> cleanUpArguments(List<String> args) {
List<String> result = new ArrayList<>(args.size());
for (String arg : args) {
result.add(arg.replace(substringToSuppress, REPLACEMENT_STRING));
}
return result;
}
};
}
}

View File

@@ -0,0 +1,529 @@
package com.semmle.util.projectstructure;
import java.io.File;
import java.io.IOException;
import java.io.StringWriter;
import java.io.Writer;
import java.util.ArrayList;
import java.util.Collections;
import java.util.Comparator;
import java.util.LinkedHashMap;
import java.util.List;
import java.util.Map;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import com.semmle.util.data.StringUtil;
import com.semmle.util.exception.CatastrophicError;
import com.semmle.util.exception.UserError;
import com.semmle.util.io.WholeIO;
/**
* A project-layout file optionally begins with an '@'
* followed by the name the project should be renamed to.
* Optionally, it can then be followed by a list of
* include/exclude patterns (see below) which are kept
* as untransformed paths. This is followed by one or
* more clauses. Each clause has the following form:
*
* #virtual-path
* path/to/include
* another/path/to/include
* -/path/to/include/except/this
*
* i.e. one or more paths (to include) and zero or more paths
* prefixed by minus-signs (to exclude).
*/
public class ProjectLayout
{
public static final char PROJECT_NAME_PREFIX = '@';
private String project;
/**
* Map from virtual path prefixes (following the '#' in the project-layout)
* to the sequence of patterns that fall into that section. Declared as a
* {@link LinkedHashMap} since iteration order matters -- we process blocks in
* the same order as they occur in the project-layout.
*/
private final LinkedHashMap<String, Section> sections = new LinkedHashMap<String, Section>();
/**
* A file name, or similar string, to use in error messages so that the
* user knows what to fix.
*/
private String source;
/**
* Load a project-layout file.
*
* @param file the project-layout to load
*/
public ProjectLayout(File file) {
this(StringUtil.lines(new WholeIO().strictread(file)), file.toString());
}
/**
* Construct a project-layout object from an array of strings, each
* corresponding to one line of the project-layout. This constructor
* is for testing. For other uses see {@link ProjectLayout#ProjectLayout(File)}.
*
* @param lines the lines of the project-layout
*/
public ProjectLayout(String... lines) {
this(lines, null);
}
private ProjectLayout(String[] lines, String source) {
this.source = source;
String virtual = "";
Section section = new Section("");
sections.put("", section);
int num = 0;
for (String line : lines) {
num++;
line = line.trim();
if (line.isEmpty())
continue;
switch (line.charAt(0)) {
case PROJECT_NAME_PREFIX:
if (project != null)
throw error("Only one project name is allowed", source, num);
project = tail(line);
break;
case '#':
virtual = tail(line);
if (sections.containsKey(virtual))
throw error("Duplicate virtual path prefix " + virtual, source, num);
section = new Section(virtual);
sections.put(virtual, section);
break;
case '-':
section.add(new Rewrite(tail(line), source, num));
break;
default:
section.add(new Rewrite(line, virtual, source, num));
}
}
}
private static String tail(String line) {
return line.substring(1).trim();
}
/**
* Get the project name, if specified by the project-layout. This
* method should only be called if it is guaranteed that the
* project-layout will contain a project name, and it throws
* a {@link UserError} if it doesn't.
* @return the project name -- guaranteed not <code>null</code>.
* @throws UserError if the project-layout file did not specify a
* project name.
*/
public String projectName() {
if (project == null)
throw error("No project name is defined", source);
return project;
}
/**
* Get the project name, if specified by the project-layout file.
* If the file contains no renaming specification, return the
* given default value.
* @param defaultName The name to use if the project-layout doesn't
* specify a target project name.
* @return the specified name or default value.
*/
public String projectName(String defaultName) {
return project == null ? defaultName : project;
}
/**
* @return the section headings (aka virtual paths)
*/
public List<String> sections() {
List<String> result = new ArrayList<String>();
result.addAll(sections.keySet());
return result;
}
/**
* Determine whether or not a particular section in this
* project-layout is empty (has no include/exclude patterns).
*
* @param section the name of the section
* @return <code>true</code> if the section is empty
*/
public boolean sectionIsEmpty(String section) {
if (!sections.containsKey(section))
throw new CatastrophicError("Section does not exist: " + section);
return sections.get(section).isEmpty();
}
/**
* Reaname a section in this project-layout.
*
* @param oldName the old name of the section
* @param newName the new name
*/
public void renameSection(String oldName, String newName) {
if (!sections.containsKey(oldName))
throw new CatastrophicError("Section does not exist: " + oldName);
Section section = sections.remove(oldName);
section.rename(newName);
sections.put(newName, section);
}
/**
* Return a project-layout file for just one of the sections in this
* project-layout. This is done by copying all the rules from the
* section, and changing the section heading (beginning with '#')
* to a project name (beginning with '@').
*
* @param sectionName the section to create a project-layout from
* @return the text of the newly created project-layout
*/
public String subLayout(String sectionName) {
Section section = sections.get(sectionName);
if (section == null)
throw new CatastrophicError("Section does not exist: " + section);
return section.toLayout();
}
/**
* Maps a path to its corresponding artificial path according to the
* rules in this project-layout. If the path is excluded (either
* explicitly, or because it is not mentioned in the project-layout)
* then <code>null</code> is returned.
* <p>
* Paths should start with a leading forward-slash
*
* @param path the path to map
* @return the artificial path, or <code>null</code> if the path is excluded
*/
public String artificialPath(String path) {
// If there is no leading slash, the path does not conform to the expected
// format and there is no match. (An exception is made for a completely
// empty string, which will get the sole prefix '/' and be mapped as usual).
if (path.length() > 0 && path.charAt(0) != '/')
return null;
List<String> prefixes = Section.prefixes(path);
for (Section section : sections.values()) {
Rewrite rewrite = section.match(prefixes);
String rewritten = null;
if (rewrite != null)
rewritten = rewrite.rewrite(path);
if (rewritten != null)
return rewritten;
}
return null;
}
/**
* Checks whether a path should be included in the project specified by
* this file. A file is included if it is mapped to some location.
* <p>
* Paths should start with a leading forward-slash
*
* @param path the path to check
* @return <code>true</code> if the path should be included
*/
public boolean includeFile(String path) {
return artificialPath(path) != null;
}
public void writeTo(Writer writer) throws IOException {
if (project != null) {
writer.write(PROJECT_NAME_PREFIX);
writer.write(project);
writer.write("\n");
}
for (Section section : sections.values()) {
if (!section.virtual.isEmpty()) {
writer.write("#");
writer.write(section.virtual);
writer.write("\n");
}
section.outputRules(writer);
}
}
public void addPattern(String section, String pattern) {
if (pattern == null || pattern.isEmpty()) {
throw new IllegalArgumentException("ProjectLayout.addPattern: pattern must be a non-empty string");
}
boolean exclude = pattern.charAt(0) == '-';
Rewrite rewrite = exclude ?
new Rewrite(pattern.substring(1), null, 0) :
new Rewrite(pattern, section, null, 0);
Section s = sections.get(section);
if (s == null) {
s = new Section(section);
sections.put(section, s);
}
s.add(rewrite);
}
private static UserError error(String message, String source) {
return error(message, source, 0);
}
private static UserError error(String message, String source, int line) {
if (source == null)
return new UserError(message);
StringBuilder sb = new StringBuilder(message);
sb.append(" (");
if (line > 0)
sb.append("line ").append(line).append(" of ");
sb.append(source).append(")");
return new UserError(sb.toString());
}
/**
* Each section corresponds to a block beginning with '#some/path'. There
* is also an initial section for any include/exclude patterns before the
* first '#'.
*/
private static class Section {
private String virtual;
private final Map<String, Rewrite> simpleRewrites;
private final List<Rewrite> complexRewrites;
public Section(String virtual) {
this.virtual = virtual;
simpleRewrites = new LinkedHashMap<String, Rewrite>();
complexRewrites = new ArrayList<Rewrite>();
}
public String toLayout() {
StringWriter result = new StringWriter();
result.append('@').append(virtual).append('\n');
try {
outputRules(result);
} catch (IOException e) {
throw new CatastrophicError("StringWriter.append threw an IOException", e);
}
return result.toString();
}
private void outputRules(Writer writer) throws IOException {
List<Rewrite> all = new ArrayList<Rewrite>();
all.addAll(simpleRewrites.values());
all.addAll(complexRewrites);
Collections.sort(all, Rewrite.COMPARATOR);
for (Rewrite rewrite : all)
writer.append(rewrite.toString()).append('\n');
}
public void rename(String newName) {
virtual = newName;
for (Rewrite rewrite : simpleRewrites.values())
rewrite.virtual = newName;
for (Rewrite rewrite : complexRewrites)
rewrite.virtual = newName;
}
public void add(Rewrite rewrite) {
int index = simpleRewrites.size() + complexRewrites.size();
rewrite.setIndex(index);
if (rewrite.isSimple())
simpleRewrites.put(rewrite.simplePrefix(), rewrite);
else
complexRewrites.add(rewrite);
}
public boolean isEmpty() {
return simpleRewrites.isEmpty() && complexRewrites.isEmpty();
}
private static List<String> prefixes(String path) {
List<String> result = new ArrayList<String>();
result.add(path);
int i = path.length();
while (i > 1) {
i = path.lastIndexOf('/', i - 1);
result.add(path.substring(0, i));
}
result.add("/");
return result;
}
public Rewrite match(List<String> prefixes) {
Rewrite best = null;
for (String prefix : prefixes) {
Rewrite match = simpleRewrites.get(prefix);
if (match != null)
if (best == null || best.index < match.index)
best = match;
}
// Last matching rewrite 'wins'
for (int i = complexRewrites.size() - 1; i >= 0; i--) {
Rewrite rewrite = complexRewrites.get(i);
if (rewrite.matches(prefixes.get(0))) {
if (best == null || best.index < rewrite.index)
best = rewrite;
// no point continuing
break;
}
}
return best;
}
}
/**
* Each Rewrite corresponds to a single include or exclude line in the project-layout.
* For example, for following clause there would be three Rewrite objects:
*
* #Source
* /src
* /lib
* -/src/tests
*
* For includes use the two-argument constructor; for excludes the one-argument constructor.
*/
private static class Rewrite {
private static final Comparator<Rewrite> COMPARATOR = new Comparator<Rewrite>() {
@Override
public int compare(Rewrite t, Rewrite o) {
if (t.index < o.index)
return -1;
if (t.index == o.index)
return 0;
return 1;
}
};
private int index;
private final String original;
private final Pattern pattern;
private String virtual;
private final String simple;
/**
* The intention is to allow the ** wildcard when followed by a slash only. The
* following should be invalid:
* - a / *** / b (too many stars)
* - a / ** (** at the end should be omitted)
* - a / **b (illegal)
* - a / b** (illegal)
* - ** (the same as a singleton '/')
* This regex matches ** when followed by a non-/ character, or the end of string.
*/
private static final Pattern verifyStars = Pattern.compile(".*(?:\\*\\*[^/].*|\\*\\*$|[^/]\\*\\*.*)");
public Rewrite(String exclude, String source, int line) {
original = '-' + exclude;
if (!exclude.startsWith("/"))
exclude = '/' + exclude;
if (exclude.indexOf("//") != -1)
throw error("Illegal '//' in exclude path", source, line);
if (verifyStars.matcher(exclude).matches())
throw error("Illegal use of '**' in exclude path", source, line);
if (exclude.endsWith("/"))
exclude = exclude.substring(0, exclude.length() - 1);
pattern = compilePrefix(exclude);
exclude = exclude.replace("//", "/");
if (exclude.length() > 1 && exclude.endsWith("/"))
exclude = exclude.substring(0, exclude.length() - 1);
simple = exclude.contains("*") ? null : exclude;
}
public void setIndex(int index) {
this.index = index;
}
public Rewrite(String include, String virtual, String source, int line) {
original = include;
if (!include.startsWith("/"))
include = '/' + include;
int doubleslash = include.indexOf("//");
if (doubleslash != include.lastIndexOf("//"))
throw error("More than one '//' in include path", source, line);
if (verifyStars.matcher(include).matches())
throw error("Illegal use of '**' in include path", source, line);
if (!virtual.startsWith("/"))
virtual = "/" + virtual;
if (virtual.endsWith("/"))
virtual = virtual.substring(0, virtual.length() - 1);
this.virtual = virtual;
this.pattern = compilePrefix(include);
include = include.replace("//", "/");
if (include.length() > 1 && include.endsWith("/"))
include = include.substring(0, include.length() - 1);
simple = include.contains("*") ? null : include;
}
/**
* Patterns are matched by translation to regex. The following invariants
* are assumed to hold:
*
* - The pattern starts with a '/'.
* - There are no occurrences of '**' that is not surrounded by slashes
* (unless it is at the start of a pattern).
* - There is at most one double slash.
*
* The result of the translation has precisely one capture group, which
* (after successful matching) will contain the part of the path that
* should be glued to the virtual prefix.
*
* It proceeds by starting the capture group either after the double
* slash or at the start of the pattern, and then replacing '*' with
* '[^/]*' (meaning any number of non-slash characters) and '/**' with
* '(?:|/.*)' (meaning empty string or a slash followed by any number of
* characters including '/').
*
* The pattern is terminated by the term '(?:/.*|$)', saying 'either the
* next character is a '/' or the string ends' -- this avoids accidental
* matching of partial directory/file names.
*
* <b>IMPORTANT:</b> Run the ProjectLayoutTests when changing this!
*/
private static Pattern compilePrefix(String pattern) {
pattern = StringUtil.escapeStringLiteralForRegexp(pattern, "*");
if (pattern.contains("//"))
pattern = pattern.replace("//", "(/");
else
pattern = "(" + pattern;
if (pattern.endsWith("/"))
pattern = pattern.substring(0, pattern.length() - 1);
pattern = pattern.replace("/**", "-///-")
.replace("*", "[^/]*")
.replace("-///-", "(?:|/.*)");
return Pattern.compile(pattern + "(?:/.*|$))");
}
/** Is this rewrite simple? (i.e. contains no wildcards) */
public boolean isSimple() {
return simple != null;
}
/** Returns the path included/excluded by this rewrite, if it is
* simple, or <code>null</code> if it is not.
*
* @return included/excluded path, or <code>null</code>
*/
public String simplePrefix() {
return simple;
}
public boolean matches(String path) {
return pattern.matcher(path).matches();
}
public String rewrite(String path) {
if (virtual == null)
return null;
Matcher matcher = pattern.matcher(path);
if (!matcher.matches())
return null;
return virtual + matcher.group(1);
}
@Override
public String toString() {
return original;
}
}
}

View File

@@ -0,0 +1,29 @@
package com.semmle.util.trap;
import java.io.IOException;
import java.io.InputStream;
import java.nio.file.Files;
import java.nio.file.Path;
import com.semmle.util.zip.MultiMemberGZIPInputStream;
public class CompressedFileInputStream {
/**
* Create an input stream for reading the uncompressed data from a (possibly) compressed file, with
* the decompression method chosen based on the file extension.
*
* @param f The compressed file to read
* @return An input stream from which you can read the file's uncompressed data.
* @throws IOException From the underlying decompression input stream.
*/
public static InputStream fromFile(Path f) throws IOException {
InputStream fileInputStream = Files.newInputStream(f);
if (f.getFileName().toString().endsWith(".gz")) {
return new MultiMemberGZIPInputStream(fileInputStream, 8192);
//} else if (f.getFileName().toString().endsWith(".br")) {
// return new BrotliInputStream(fileInputStream);
} else {
return fileInputStream;
}
}
}

View File

@@ -0,0 +1,125 @@
package com.semmle.util.trap.dependencies;
import java.io.BufferedReader;
import java.io.IOException;
import java.io.InputStream;
import java.nio.charset.StandardCharsets;
import java.nio.file.Files;
import java.nio.file.Path;
import java.util.LinkedHashSet;
import java.util.Set;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import com.semmle.util.exception.ResourceError;
import com.semmle.util.io.StreamUtil;
import com.semmle.util.io.WholeIO;
import com.semmle.util.trap.CompressedFileInputStream;
public abstract class TextFile {
static final String TRAPS = "TRAPS";
private static final Pattern HEADER = Pattern.compile("([^\r\n]+?) (\\d\\.\\d)");
protected String version;
protected final Set<String> traps = new LinkedHashSet<String>();
protected abstract Set<String> getSet(Path path, String label);
protected abstract void parseError(Path path);
public TextFile(String version) {
this.version = version;
}
/**
* Load the current text file, checking that it matches the expected header.
*
* <p>
* This method is somewhat performance-sensitive, as at least our C++ extractors
* can generate very large input files. The format is therefore parsed by hand.
* </p>
*
* <p>
* The accepted format consists of:
* <ul>
* <li>Zero or more EOL comments, marked with {@code //}.
* <li>Precisely one header line, of the form {@code $HEADER $VERSION}; this is
* checked against {@code expected_header}.
* <li>Zero or more "file lists", each beginning with the name of a set (see
* {@link #getSet(File, String)}) on a line by itself, followed by file paths,
* one per line.
* </ul>
*
* <p>
* Empty lines are permitted throughout.
* </p>
*/
protected void load(String expected_header, Path path) {
try (InputStream is = CompressedFileInputStream.fromFile(path);
BufferedReader lines = StreamUtil.newUTF8BufferedReader(is)) {
boolean commentsPermitted = true;
Set<String> currentSet = null;
for (String line = lines.readLine(); line != null; line = lines.readLine()) {
// Skip empty lines.
if (line.isEmpty())
continue;
// If comments are still permitted, skip comment lines.
if (commentsPermitted && line.startsWith("//"))
continue;
// If comments are still permitted, the first non-comment line is the header.
// In addition, we allow no further comments.
if (commentsPermitted) {
Matcher matcher = HEADER.matcher(line);
if (!matcher.matches() || !matcher.group(1).equals(expected_header))
parseError(path);
commentsPermitted = false;
version = matcher.group(2);
continue;
}
// We have a non-blank line; this either names the new set, or is a line that
// should be put into the current set.
Set<String> newSet = getSet(path, line);
if (newSet != null) {
currentSet = newSet;
} else {
if (currentSet == null)
parseError(path);
else
currentSet.add(line);
}
}
} catch (IOException e) {
throw new ResourceError("Couldn't read " + path, e);
}
}
/**
* @return the format version of the loaded file
*/
public String version() {
return version;
}
/**
* Save this object to a file (or throw a ResourceError on failure)
*
* @param file the file in which to save this object
*/
public void save(Path file) {
new WholeIO().strictwrite(file, toString());
}
protected void appendHeaderString(StringBuilder sb, String header, String version) {
sb.append(header).append(' ').append(version).append('\n');
}
protected void appendSet(StringBuilder sb, String title, Set<String> set) {
sb.append('\n').append(title).append('\n');
for (String s : set)
sb.append(s).append('\n');
}
protected void appendSingleton(StringBuilder sb, String title, String s) {
sb.append('\n').append(title).append('\n');
sb.append(s).append('\n');
}
}

View File

@@ -0,0 +1,109 @@
package com.semmle.util.trap.dependencies;
import java.io.File;
import java.nio.file.Path;
import java.util.AbstractSet;
import java.util.Collections;
import java.util.Iterator;
import java.util.Set;
import com.semmle.util.exception.ResourceError;
/**
* The immediate dependencies of a particular TRAP file
*/
public class TrapDependencies extends TextFile
{
static final String TRAP = "TRAP";
private String trap;
/**
* Create an empty dependencies node for a TRAP file
*/
public TrapDependencies(String trap) {
super(TrapSet.LATEST_VERSION);
this.trap = trap;
}
/**
* Load a TRAP dependencies (.dep) file
*
* @param file the file to load
*/
public TrapDependencies(Path file) {
super(null);
load(TrapSet.HEADER, file);
if(trap == null)
parseError(file);
}
@Override
protected Set<String> getSet(final Path file, String label) {
if(label.equals(TRAP)) {
return new AbstractSet<String>() {
@Override
public Iterator<String> iterator() {
return null;
}
@Override
public int size() {
return 0;
}
@Override
public boolean add(String s) {
if(trap != null)
parseError(file);
trap = s;
return true;
}
};
}
if(label.equals(TRAPS)) return traps;
return null;
}
@Override
protected void parseError(Path file) {
throw new ResourceError("Corrupt TRAP dependencies: " + file);
}
/**
* @return the path of the TRAP with the dependencies stored in this object
* (relative to the source location)
*/
public String trapFile() {
return trap;
}
/**
* @return the paths of the TRAP file dependencies
* (relative to the trap directory)
*
*/
public Set<String> dependencies() {
return Collections.unmodifiableSet(traps);
}
/**
* Add a path to a TRAP file (relative to the trap directory).
*
* @param trap the path to the trap file to add
*/
public void addDependency(String trap) {
traps.add(trap);
}
/*
* (non-Javadoc)
* @see java.lang.Object#toString()
*/
@Override
public String toString() {
StringBuilder sb = new StringBuilder();
appendHeaderString(sb, TrapSet.HEADER, TrapSet.LATEST_VERSION);
appendSingleton(sb, TRAP, trap);
appendSet(sb, TRAPS, traps);
return sb.toString();
}
}

View File

@@ -0,0 +1,196 @@
package com.semmle.util.trap.dependencies;
import java.nio.file.Path;
import java.util.Collections;
import java.util.LinkedHashSet;
import java.util.Set;
import com.semmle.util.exception.ResourceError;
/**
* A set of source files and the TRAP files that were generated when
* compiling them.
* <p>
* The set of TRAP files is not necessarily sufficient to create a
* consistent database, unless combined with inter-TRAP dependency
* information from .dep files (see {@link TrapDependencies}).
*/
public class TrapSet extends TextFile
{
static final String HEADER = "TRAP dependencies";
static final String LATEST_VERSION = "1.2";
static final String SOURCES = "SOURCES";
static final String INCLUDES = "INCLUDES";
static final String OBJECTS = "OBJECTS";
static final String INPUT_OBJECTS = "INPUT_OBJECTS";
// state
private final Set<String> sources = new LinkedHashSet<String>();
private final Set<String> includes = new LinkedHashSet<String>();
private final Set<String> objects = new LinkedHashSet<String>();
private final Set<String> inputObjects = new LinkedHashSet<String>();
private Path file;
/**
* Create an empty TRAP set
*/
public TrapSet() {
super(LATEST_VERSION);
}
@Override
protected Set<String> getSet(Path file, String label) {
if (label.equals(SOURCES)) return sources;
if (label.equals(INCLUDES)) return includes;
if (label.equals(OBJECTS)) return objects;
if (label.equals(INPUT_OBJECTS)) return inputObjects;
if (label.equals(TRAPS)) return traps;
return null;
}
/**
* Load a TRAP set (.set) file
*
* @param path the file to load
*/
public TrapSet(Path path) {
super(null);
load(HEADER, path);
this.file = path;
}
/**
* Return the most recent file used when loading or saving this
* trap set. If this set was constructed, rather than loaded, and
* has not been saved then the result is <code>null</code>.
*
* @return the file or <code>null</code>
*/
public Path getFile() {
return file;
}
@Override
protected void parseError(Path file) {
throw new ResourceError("Corrupt TRAP set: " + file);
}
/**
* @return the paths of the source files contained in this TRAP set
*/
public Set<String> sourceFiles() {
return Collections.unmodifiableSet(sources);
}
/**
* @return the paths to the include files contained in this TRAP set
*/
public Set<String> includeFiles() {
return Collections.unmodifiableSet(includes);
}
/**
* @return the paths of the TRAP files contained in this TRAP set
* (relative to the trap directory)
*
*/
public Set<String> trapFiles() {
return Collections.unmodifiableSet(traps);
}
/**
* @return the object names in this TRAP set
*
*/
public Set<String> objectNames() {
return Collections.unmodifiableSet(objects);
}
/**
* @return the object names in this TRAP set
*
*/
public Set<String> inputObjectNames() {
return Collections.unmodifiableSet(inputObjects);
}
/**
* Add a fully-qualified path to a source-file.
*
* @param source the path to the source file to add
*/
public void addSource(String source) {
sources.add(source);
}
/**
* Add a fully-qualified path to an include-file.
*
* @param include the path to the include file to add
*/
public void addInclude(String include) {
includes.add(include);
}
/**
* Add a path to a TRAP file (relative to the trap directory).
*
* @param trap the path to the trap file to add
* @return true if the path was not already present
*/
public boolean addTrap(String trap) {
return traps.add(trap);
}
/**
* Check if this set contains a TRAP path
*
* @param trap the path to check
* @return true if this set contains the path
*/
public boolean containsTrap(String trap) {
return trap.contains(trap);
}
/**
* Are the sources mentioned in this TRAP set disjoint from the given
* set of paths?
*
* @param paths the set of paths to check disjointness with
* @return true if and only if the paths are disjoint
*/
public boolean sourcesDisjointFrom(Set<String> paths) {
for (String source : sources)
if (paths.contains(source))
return false;
return true;
}
/**
* Save this TRAP set to a .set file (or throw a ResourceError on failure)
*
* @param file the file in which to save this set
*/
@Override
public void save(Path file) {
super.save(file);
this.file = file;
}
/*
* (non-Javadoc)
* @see java.lang.Object#toString()
*/
@Override
public String toString() {
StringBuilder sb = new StringBuilder();
appendHeaderString(sb, HEADER, LATEST_VERSION);
appendSet(sb, SOURCES, sources);
appendSet(sb, INCLUDES, includes);
appendSet(sb, OBJECTS, objects);
appendSet(sb, INPUT_OBJECTS, inputObjects);
appendSet(sb, TRAPS, traps);
return sb.toString();
}
}

View File

@@ -0,0 +1,8 @@
package com.semmle.util.trap.pathtransformers;
public class NoopTransformer extends PathTransformer {
@Override
public String transform(String input) {
return input;
}
}

View File

@@ -0,0 +1,46 @@
package com.semmle.util.trap.pathtransformers;
import java.io.File;
import com.semmle.util.files.FileUtil;
import com.semmle.util.process.Env;
import com.semmle.util.process.Env.Var;
public abstract class PathTransformer {
public abstract String transform(String input);
/**
* Convert a file to its path in the (code) database. Turns file paths into
* canonical, absolute, strings and normalises away Unix/Windows differences.
*/
public String fileAsDatabaseString(File file) {
String path;
if (Boolean.valueOf(Env.systemEnv().get(Var.SEMMLE_PRESERVE_SYMLINKS)))
path = FileUtil.simplifyPath(file);
else
path = FileUtil.tryMakeCanonical(file).getPath();
return transform(FileUtil.normalisePath(path));
}
/**
* Utility method for extractors: Canonicalise the given path as required
* for the current extraction. Unlike {@link FileUtil#tryMakeCanonical(File)},
* this method is consistent with {@link #fileAsDatabaseString(File)}.
*/
public File canonicalFile(String path) {
return new File(fileAsDatabaseString(new File(path)));
}
private static final PathTransformer DEFAULT_TRANSFORMER;
static {
String layout = Env.systemEnv().get(Var.SEMMLE_PATH_TRANSFORMER);
if (layout == null)
DEFAULT_TRANSFORMER = new NoopTransformer();
else
DEFAULT_TRANSFORMER = new ProjectLayoutTransformer(new File(layout));
}
public static PathTransformer std() {
return DEFAULT_TRANSFORMER;
}
}

View File

@@ -0,0 +1,37 @@
package com.semmle.util.trap.pathtransformers;
import java.io.File;
import com.semmle.util.projectstructure.ProjectLayout;
public class ProjectLayoutTransformer extends PathTransformer {
private final ProjectLayout layout;
public ProjectLayoutTransformer(File file) {
layout = new ProjectLayout(file);
}
@Override
public String transform(String input) {
if (isWindowsPath(input, 0)) {
String result = layout.artificialPath('/' + input);
if (result == null) {
return input;
} else if (isWindowsPath(result, 1) && result.charAt(0) == '/') {
return result.substring(1);
} else {
return result;
}
} else {
String result = layout.artificialPath(input);
return result != null ? result : input;
}
}
private static boolean isWindowsPath(String s, int startAt) {
return s.length() >= (3 + startAt) &&
s.charAt(startAt) != '/' &&
s.charAt(startAt + 1) == ':' &&
s.charAt(startAt + 2) == '/';
}
}

View File

@@ -0,0 +1,71 @@
package com.semmle.util.zip;
import java.io.IOException;
import java.io.InputStream;
import java.io.PushbackInputStream;
import java.util.zip.GZIPInputStream;
public class MultiMemberGZIPInputStream extends GZIPInputStream {
public MultiMemberGZIPInputStream(InputStream in, int size) throws IOException {
// Wrap the stream in a PushbackInputStream...
super(new PushbackInputStream(in, size), size);
this.size = size;
}
public MultiMemberGZIPInputStream(InputStream in) throws IOException {
// Wrap the stream in a PushbackInputStream...
super(new PushbackInputStream(in, 1024));
this.size = -1;
}
private MultiMemberGZIPInputStream child;
private int size;
private boolean eos;
@Override
public int read(byte[] inputBuffer, int inputBufferOffset, int inputBufferLen) throws IOException {
if (eos) {
return -1;
}
else if (child != null) {
return child.read(inputBuffer, inputBufferOffset, inputBufferLen);
}
int charsRead = super.read(inputBuffer, inputBufferOffset, inputBufferLen);
if (charsRead == -1) {
// Push any remaining buffered data back onto the stream
// If the stream is then not empty, use it to construct
// a new instance of this class and delegate this and any
// future calls to it...
int n = inf.getRemaining() - 8;
if (n > 0) {
// More than 8 bytes remaining in deflater
// First 8 are gzip trailer. Add the rest to
// any un-read data...
((PushbackInputStream) this.in).unread(buf, len - n, n);
} else {
// Nothing in the buffer. We need to know whether or not
// there is unread data available in the underlying stream
// since the base class will not handle an empty file.
// Read a byte to see if there is data and if so,
// push it back onto the stream...
byte[] b = new byte[1];
int ret = in.read(b, 0, 1);
if (ret == -1) {
eos = true;
return -1;
} else {
((PushbackInputStream) this.in).unread(b, 0, 1);
}
}
if(size == -1)
child = new MultiMemberGZIPInputStream(in);
else
child = new MultiMemberGZIPInputStream(in, size);
return child.read(inputBuffer, inputBufferOffset, inputBufferLen);
} else {
return charsRead;
}
}
}

View File

@@ -22,6 +22,10 @@ import java.io.StringWriter
import java.nio.file.Files
import java.nio.file.Paths
import java.util.*
import com.intellij.openapi.vfs.StandardFileSystems
import com.semmle.extractor.java.OdasaOutput
import com.semmle.extractor.java.OdasaOutput.TrapFileManager
import com.semmle.util.files.FileUtil
import kotlin.system.exitProcess
class KotlinExtractorExtension(private val invocationTrapFile: String, private val checkTrapIdentical: Boolean) : IrGenerationExtension {
@@ -40,10 +44,13 @@ class KotlinExtractorExtension(private val invocationTrapFile: String, private v
val logger = Logger(logCounter, tw)
logger.info("Extraction started")
logger.flush()
// FIXME: FileUtil expects a static global logger
// which should be provided by SLF4J's factory facility. For now we set it here.
FileUtil.logger = logger
val srcDir = File(System.getenv("CODEQL_EXTRACTOR_JAVA_SOURCE_ARCHIVE_DIR").takeUnless { it.isNullOrEmpty() } ?: "kotlin-extractor/src")
srcDir.mkdirs()
moduleFragment.files.mapIndexed { index: Int, file: IrFile ->
val fileTrapWriter = FileTrapWriter(lm, invocationTrapFileBW, file)
val fileTrapWriter = SourceFileTrapWriter(lm, invocationTrapFileBW, file)
fileTrapWriter.writeCompilation_compiling_files(compilation, index, fileTrapWriter.fileId)
doFile(invocationTrapFile, fileTrapWriter, checkTrapIdentical, logCounter, trapDir, srcDir, file, pluginContext)
}
@@ -124,9 +131,11 @@ fun doFile(invocationTrapFile: String,
val trapTmpFile = File.createTempFile("$filePath.", ".trap.tmp", trapFileDir)
trapTmpFile.bufferedWriter().use { trapFileBW ->
trapFileBW.write("// Generated by invocation ${invocationTrapFile.replace("\n", "\n// ")}\n")
val tw = FileTrapWriter(TrapLabelManager(), trapFileBW, file)
val fileExtractor = KotlinFileExtractor(logger, tw, file, pluginContext)
val tw = SourceFileTrapWriter(TrapLabelManager(), trapFileBW, file)
val externalClassExtractor = ExternalClassExtractor(logger, file.path, pluginContext)
val fileExtractor = KotlinSourceFileExtractor(logger, tw, file, externalClassExtractor, pluginContext)
fileExtractor.extractFileContents(tw.fileId)
externalClassExtractor.extractExternalClasses()
}
if (checkTrapIdentical && trapFile.exists()) {
if(equivalentTrap(trapTmpFile, trapFile)) {
@@ -160,7 +169,51 @@ fun <T> fakeLabel(): Label<T> {
return IntLabel(0)
}
class KotlinFileExtractor(val logger: FileLogger, val tw: FileTrapWriter, val file: IrFile, val pluginContext: IrPluginContext) {
class ExternalClassExtractor(val logger: FileLogger, val sourceFilePath: String, val pluginContext: IrPluginContext) {
val externalClassesDone = HashSet<IrClass>()
val externalClassWorkList = ArrayList<IrClass>()
fun extractLater(c: IrClass): Boolean {
val ret = externalClassesDone.add(c)
if(ret) externalClassWorkList.add(c)
return ret
}
fun extractExternalClasses() {
val output = OdasaOutput(false, logger)
output.setCurrentSourceFile(File(sourceFilePath))
do {
val nextBatch = ArrayList<IrClass>(externalClassWorkList)
externalClassWorkList.clear()
nextBatch.forEach { irClass ->
output.getTrapLockerForClassFile(irClass).useAC { locker ->
locker.getTrapFileManager().useAC { manager ->
if(manager == null) {
logger.info("Skipping extracting class ${irClass.name}")
return
}
manager.getFile().bufferedWriter().use { trapFileBW ->
val tw = ClassFileTrapWriter(TrapLabelManager(), trapFileBW, getIrClassBinaryPath(irClass))
val fileExtractor = KotlinFileExtractor(logger, tw, manager, this, pluginContext)
fileExtractor.extractClassSource(irClass)
}
}
}
}
} while (!externalClassWorkList.isEmpty());
}
}
class KotlinSourceFileExtractor(
logger: FileLogger,
tw: FileTrapWriter,
val file: IrFile,
externalClassExtractor: ExternalClassExtractor,
pluginContext: IrPluginContext) :
KotlinFileExtractor(logger, tw, null, externalClassExtractor, pluginContext) {
val fileClass by lazy {
extractFileClass(file)
}
@@ -171,7 +224,7 @@ class KotlinFileExtractor(val logger: FileLogger, val tw: FileTrapWriter, val fi
val pkgId = extractPackage(pkg)
tw.writeHasLocation(id, locId)
tw.writeCupackage(id, pkgId)
file.declarations.map { extractDeclaration(it) }
file.declarations.map { extractDeclaration(it, fileClass) }
CommentExtractor(this).extract()
}
@@ -207,6 +260,15 @@ class KotlinFileExtractor(val logger: FileLogger, val tw: FileTrapWriter, val fi
return id
}
}
open class KotlinFileExtractor(
val logger: FileLogger,
val tw: FileTrapWriter,
val dependencyCollector: TrapFileManager?,
val externalClassExtractor: ExternalClassExtractor,
val pluginContext: IrPluginContext) {
fun usePackage(pkg: String): Label<out DbPackage> {
return extractPackage(pkg)
}
@@ -219,14 +281,14 @@ class KotlinFileExtractor(val logger: FileLogger, val tw: FileTrapWriter, val fi
return id
}
fun extractDeclaration(declaration: IrDeclaration) {
fun extractDeclaration(declaration: IrDeclaration, parentId: Label<out DbReftype>) {
when (declaration) {
is IrClass -> extractClassSource(declaration)
is IrFunction -> extractFunction(declaration)
is IrFunction -> extractFunction(declaration, parentId)
is IrAnonymousInitializer -> {
// Leaving this intentionally empty. init blocks are extracted during class extraction.
}
is IrProperty -> extractProperty(declaration)
is IrProperty -> extractProperty(declaration, parentId)
else -> logger.warnElement(Severity.ErrorSevere, "Unrecognised IrDeclaration: " + declaration.javaClass, declaration)
}
}
@@ -476,6 +538,11 @@ class X {
return id
}
fun extractExternalClassLater(c: IrClass) {
dependencyCollector?.addDependency(c)
externalClassExtractor.extractLater(c)
}
private fun getClassLabel(c: IrClass, typeArgs: List<IrTypeArgument>): String {
val pkg = c.packageFqName?.asString() ?: ""
val cls = c.name.asString()
@@ -548,7 +615,7 @@ class X {
// so for now we extract the source class for those too
if (c.origin == IrDeclarationOrigin.IR_EXTERNAL_DECLARATION_STUB ||
c.origin == IrDeclarationOrigin.IR_EXTERNAL_JAVA_DECLARATION_STUB) {
extractClassSource(c)
extractExternalClassLater(c)
}
})
}
@@ -598,7 +665,7 @@ class X {
extractClassCommon(c, id)
c.typeParameters.map { extractTypeParameter(it) }
c.declarations.map { extractDeclaration(it) }
c.declarations.map { extractDeclaration(it, id) }
extractObjectInitializerFunction(c, id)
return id
@@ -821,7 +888,7 @@ class X {
}
}
fun extractFunction(f: IrFunction) {
fun extractFunction(f: IrFunction, parentId: Label<out DbReftype>) {
currentFunction = f
f.typeParameters.map { extractTypeParameter(it) }
@@ -829,16 +896,6 @@ class X {
val locId = tw.getLocation(f)
val signature = "TODO"
val parent = f.parent
val parentId = when (parent) {
is IrClass -> useClassSource(parent)
is IrFile -> fileClass
else -> {
logger.warnElement(Severity.ErrorSevere, "Unrecognised function parent: " + parent.javaClass, parent)
fakeLabel()
}
}
val id: Label<out DbCallable>
if (f.symbol is IrConstructorSymbol) {
val returnTypeId = useTypeOld(erase(f.returnType))
@@ -880,7 +937,7 @@ class X {
return id
}
fun extractProperty(p: IrProperty) {
fun extractProperty(p: IrProperty, parentId: Label<out DbReftype>) {
val bf = p.backingField
if(bf == null) {
logger.warnElement(Severity.ErrorSevere, "IrProperty without backing field", p)
@@ -888,7 +945,6 @@ class X {
val id = useProperty(p)
val locId = tw.getLocation(p)
val typeId = useTypeOld(bf.type)
val parentId = if (p.parent is IrClass) useClassSource(p.parent as IrClass) else fileClass
tw.writeFields(id, p.name.asString(), typeId, parentId, id)
tw.writeHasLocation(id, locId)
}
@@ -1407,5 +1463,5 @@ class X {
tw.writeKtBreakContinueTargets(id, loopId)
}
}
}

View File

@@ -3,6 +3,7 @@ package com.github.codeql
import java.io.BufferedWriter
import java.io.File
import org.jetbrains.kotlin.ir.IrElement
import org.jetbrains.kotlin.ir.IrFileEntry
import org.jetbrains.kotlin.ir.declarations.path
import org.jetbrains.kotlin.ir.declarations.IrFile
import org.jetbrains.kotlin.ir.declarations.IrVariable
@@ -60,14 +61,42 @@ open class TrapWriter (val lm: TrapLabelManager, val bw: BufferedWriter) {
}
}
class FileTrapWriter (
abstract class SourceOffsetResolver {
abstract fun getLineNumber(offset: Int): Int
abstract fun getColumnNumber(offset: Int): Int
}
class FileSourceOffsetResolver(val fileEntry: IrFileEntry) : SourceOffsetResolver() {
override fun getLineNumber(offset: Int) = fileEntry.getLineNumber(offset)
override fun getColumnNumber(offset: Int) = fileEntry.getLineNumber(offset)
}
object NullSourceOffsetResolver : SourceOffsetResolver() {
override fun getLineNumber(offset: Int) = 0
override fun getColumnNumber(offset: Int) = 0
}
class SourceFileTrapWriter (
lm: TrapLabelManager,
bw: BufferedWriter,
val irFile: IrFile
irFile: IrFile) :
FileTrapWriter(lm, bw, irFile.path, FileSourceOffsetResolver(irFile.fileEntry)) {
}
class ClassFileTrapWriter (
lm: TrapLabelManager,
bw: BufferedWriter,
filePath: String) :
FileTrapWriter(lm, bw, filePath, NullSourceOffsetResolver) {
}
open class FileTrapWriter (
lm: TrapLabelManager,
bw: BufferedWriter,
val filePath: String,
val sourceOffsetResolver: SourceOffsetResolver
): TrapWriter (lm, bw) {
private val fileEntry = irFile.fileEntry
val fileId = {
val filePath = irFile.path
val fileLabel = "@\"$filePath;sourcefile\""
val id: Label<DbFile> = getLabelFor(fileLabel)
writeFiles(id, filePath)
@@ -87,24 +116,23 @@ class FileTrapWriter (
// be a zero-width location. QL doesn't support these, so we translate it
// into a one-width location.
val zeroWidthLoc = !unknownLoc && startOffset == endOffset
val startLine = if(unknownLoc) 0 else fileEntry.getLineNumber(startOffset) + 1
val startColumn = if(unknownLoc) 0 else fileEntry.getColumnNumber(startOffset) + 1
val endLine = if(unknownLoc) 0 else fileEntry.getLineNumber(endOffset) + 1
val endColumn = if(unknownLoc) 0 else fileEntry.getColumnNumber(endOffset)
val startLine = if(unknownLoc) 0 else sourceOffsetResolver.getLineNumber(startOffset) + 1
val startColumn = if(unknownLoc) 0 else sourceOffsetResolver.getColumnNumber(startOffset) + 1
val endLine = if(unknownLoc) 0 else sourceOffsetResolver.getLineNumber(endOffset) + 1
val endColumn = if(unknownLoc) 0 else sourceOffsetResolver.getColumnNumber(endOffset)
val endColumn2 = if(zeroWidthLoc) endColumn + 1 else endColumn
val locFileId: Label<DbFile> = if (unknownLoc) unknownFileId else fileId
return getLocation(locFileId, startLine, startColumn, endLine, endColumn2)
}
fun getLocationString(e: IrElement): String {
val path = irFile.path
if (e.startOffset == -1 && e.endOffset == -1) {
return "unknown location, while processing $path"
return "unknown location, while processing $filePath"
} else {
val startLine = fileEntry.getLineNumber(e.startOffset) + 1
val startColumn = fileEntry.getColumnNumber(e.startOffset) + 1
val endLine = fileEntry.getLineNumber(e.endOffset) + 1
val endColumn = fileEntry.getColumnNumber(e.endOffset)
return "file://$path:$startLine:$startColumn:$endLine:$endColumn"
val startLine = sourceOffsetResolver.getLineNumber(e.startOffset) + 1
val startColumn = sourceOffsetResolver.getColumnNumber(e.startOffset) + 1
val endLine = sourceOffsetResolver.getLineNumber(e.endOffset) + 1
val endColumn = sourceOffsetResolver.getColumnNumber(e.endOffset)
return "file://$filePath:$startLine:$startColumn:$endLine:$endColumn"
}
}
val variableLabelMapping: MutableMap<IrVariable, Label<out DbLocalvar>> = mutableMapOf<IrVariable, Label<out DbLocalvar>>()

View File

@@ -13,7 +13,7 @@ import org.jetbrains.kotlin.psi.KtVisitor
import org.jetbrains.kotlin.psi.psiUtil.endOffset
import org.jetbrains.kotlin.psi.psiUtil.startOffset
class CommentExtractor(private val fileExtractor: KotlinFileExtractor) {
class CommentExtractor(private val fileExtractor: KotlinSourceFileExtractor) {
private val file = fileExtractor.file
private val tw = fileExtractor.tw
private val logger = fileExtractor.logger
@@ -109,4 +109,3 @@ class CommentExtractor(private val fileExtractor: KotlinFileExtractor) {
})
}
}

View File

@@ -0,0 +1,43 @@
package com.github.codeql
// Functions copied from stdlib/jdk7/src/kotlin/AutoCloseable.kt, which is not available within kotlinc,
// but allows the `.use` pattern to be applied to JDK7 AutoCloseables:
/**
* Executes the given [block] function on this resource and then closes it down correctly whether an exception
* is thrown or not.
*
* In case if the resource is being closed due to an exception occurred in [block], and the closing also fails with an exception,
* the latter is added to the [suppressed][java.lang.Throwable.addSuppressed] exceptions of the former.
*
* @param block a function to process this [AutoCloseable] resource.
* @return the result of [block] function invoked on this resource.
*/
public inline fun <T : AutoCloseable?, R> T.useAC(block: (T) -> R): R {
var exception: Throwable? = null
try {
return block(this)
} catch (e: Throwable) {
exception = e
throw e
} finally {
this.closeFinallyAC(exception)
}
}
/**
* Closes this [AutoCloseable], suppressing possible exception or error thrown by [AutoCloseable.close] function when
* it's being closed due to some other [cause] exception occurred.
*
* The suppressed exception is added to the list of suppressed exceptions of [cause] exception.
*/
fun AutoCloseable?.closeFinallyAC(cause: Throwable?) = when {
this == null -> {}
cause == null -> close()
else ->
try {
close()
} catch (closeException: Throwable) {
cause.addSuppressed(closeException)
}
}

View File

@@ -0,0 +1,47 @@
package com.github.codeql
import org.jetbrains.kotlin.ir.declarations.IrClass
import org.jetbrains.kotlin.ir.declarations.IrDeclaration
import org.jetbrains.kotlin.ir.declarations.IrDeclarationParent
import org.jetbrains.kotlin.ir.declarations.IrPackageFragment
import org.jetbrains.kotlin.load.java.sources.JavaSourceElement
import org.jetbrains.kotlin.load.java.structure.impl.classFiles.BinaryJavaClass
import org.jetbrains.kotlin.load.kotlin.KotlinJvmBinarySourceElement
// Taken from Kotlin's interpreter/Utils.kt function 'internalName'
// Translates class names into their JLS section 13.1 binary name
fun getClassBinaryName(that: IrClass): String {
val internalName = StringBuilder(that.name.asString())
generateSequence(that as? IrDeclarationParent) { (it as? IrDeclaration)?.parent }
.drop(1)
.forEach {
when (it) {
is IrClass -> internalName.insert(0, it.name.asString() + "$")
is IrPackageFragment -> it.fqName.asString().takeIf { it.isNotEmpty() }?.let { internalName.insert(0, "$it.") }
}
}
return internalName.toString()
}
fun getRawIrClassBinaryPath(irClass: IrClass): String? {
val cSource = irClass.source
when(cSource) {
is JavaSourceElement -> {
val element = cSource.javaElement
when(element) {
is BinaryJavaClass -> return element.virtualFile.getPath()
}
}
is KotlinJvmBinarySourceElement -> {
return cSource.binaryClass.location
}
}
return null
}
fun getIrClassBinaryPath(irClass: IrClass): String {
// If a class location is known, replace the JAR delimiter !/:
return getRawIrClassBinaryPath(irClass)?.replaceFirst("!/", "/")
// Otherwise, make up a fake location:
?: "/!unknown-binary-location/${getClassBinaryName(irClass).replace(".", "/")}.class"
}

View File

@@ -42,6 +42,15 @@ open class Logger(val logCounter: LogCounter, open val tw: TrapWriter) {
tw.writeTrap("// " + fullMsg.replace("\n", "\n//") + "\n")
println(fullMsg)
}
fun trace(msg: String) {
info(msg)
}
fun debug(msg: String) {
info(msg)
}
fun trace(msg: String, exn: Exception) {
info(msg + " // " + exn)
}
fun warn(severity: Severity, msg: String, locationString: String? = null, locationId: Label<DbLocation> = tw.unknownLocation, stackIndex: Int = 2) {
val st = Exception().stackTrace
val suffix =
@@ -63,6 +72,18 @@ open class Logger(val logCounter: LogCounter, open val tw: TrapWriter) {
val locStr = if (locationString == null) "" else "At " + locationString + ": "
print("$ts Warning: $locStr$msg\n$suffix")
}
fun warn(msg: String, exn: Exception) {
warn(Severity.Warn, msg + " // " + exn)
}
fun warn(msg: String) {
warn(Severity.Warn, msg)
}
fun error(msg: String) {
warn(Severity.Error, msg)
}
fun error(msg: String, exn: Exception) {
error(msg + " // " + exn)
}
fun printLimitedWarningCounts() {
for((caller, count) in logCounter.warningCounts) {
if(count >= logCounter.warningLimit) {