Merge pull request #9109 from igfoo/igfoo/kotlin_merge

Initial Kotlin support
This commit is contained in:
Ian Lynagh
2022-05-11 16:16:22 +01:00
committed by GitHub
539 changed files with 55991 additions and 7783 deletions

10
java/kotlin-explorer/.gitignore vendored Normal file
View File

@@ -0,0 +1,10 @@
.classpath
.gradle
.idea
.project
.settings
bin/
build/
gradle/
gradlew
gradlew.bat

View File

@@ -0,0 +1,9 @@
This shows what is encoded in the kotlin.Metadata section shown in the
output of `javap -v SomeKotlinClass`.
It is not currently able to extract the information from .class files
itself; the values are hard coded in src/main/kotlin/Explorer.kt
Run `gradle run` in this directory to run it.

View File

@@ -0,0 +1,28 @@
plugins {
id 'org.jetbrains.kotlin.jvm' version "${kotlinVersion}"
id 'org.jetbrains.dokka' version '1.4.32'
id "com.vanniktech.maven.publish" version '0.15.1'
id 'application'
}
group 'com.github.codeql'
version '0.0.1'
dependencies {
implementation "org.jetbrains.kotlin:kotlin-stdlib"
implementation "org.jetbrains.kotlinx:kotlinx-metadata-jvm:0.3.0"
}
repositories {
mavenCentral()
}
tasks.withType(org.jetbrains.kotlin.gradle.tasks.KotlinCompile).configureEach {
kotlinOptions {
jvmTarget = "1.8"
}
}
application {
mainClass = 'com.github.codeql.ExplorerKt'
}

View File

@@ -0,0 +1,7 @@
kotlin.code.style=official
kotlinVersion=1.5.21
GROUP=com.github.codeql
VERSION_NAME=0.0.1
POM_DESCRIPTION=CodeQL Kotlin explorer

View File

@@ -0,0 +1,8 @@
pluginManagement {
repositories {
mavenCentral()
gradlePluginPortal()
}
}
rootProject.name = 'codeql-kotlin-explorer'

View File

@@ -0,0 +1,217 @@
package com.github.codeql
import kotlinx.metadata.internal.metadata.jvm.deserialization.JvmMetadataVersion
import kotlinx.metadata.jvm.*
import kotlinx.metadata.*
fun main(args : Array<String>) {
/*
Values from `javap -v` on TestKt.class from:
class MyClass {}
class MyParamClass<T> {}
fun f(x: MyClass, y: MyClass?,
l1: MyParamClass<MyClass>,
l2: MyParamClass<MyClass?>,
l3: MyParamClass<MyClass>?,
l4: MyParamClass<MyClass?>?) {
}
*/
val kind = 2
val metadataVersion = intArrayOf(1, 5, 1)
val data1 = arrayOf("\u0000\u0018\n\u0000\n\u0002\u0010\u0002\n\u0000\n\u0002\u0018\u0002\n\u0002\b\u0002\n\u0002\u0018\u0002\n\u0002\b\u0003\u001aX\u0010\u0000\u001a\u00020\u00012\u0006\u0010\u0002\u001a\u00020\u00032\b\u0010\u0004\u001a\u0004\u0018\u00010\u00032\u000c\u0010\u0005\u001a\b\u0012\u0004\u0012\u00020\u00030\u00062\u000e\u0010\u0007\u001a\n\u0012\u0006\u0012\u0004\u0018\u00010\u00030\u00062\u000e\u0010\b\u001a\n\u0012\u0004\u0012\u00020\u0003\u0018\u00010\u00062\u0010\u0010\t\u001a\u000c\u0012\u0006\u0012\u0004\u0018\u00010\u0003\u0018\u00010\u0006")
val data2 = arrayOf("f","","x","LMyClass;","y","l1","LMyParamClass;","l2","l3","l4")
val extraString = null
val packageName = null
val extraInt = 48
val kch = KotlinClassHeader(kind, metadataVersion, data1, data2, extraString, packageName, extraInt)
val md = KotlinClassMetadata.read(kch)
when (md) {
is KotlinClassMetadata.Class -> println("Metadata for Class not yet supported")
is KotlinClassMetadata.FileFacade -> {
println("Metadata for FileFacade:")
val kmp = md.toKmPackage()
kmp.accept(MyPackageVisitor(0))
}
is KotlinClassMetadata.SyntheticClass -> println("Metadata for SyntheticClass not yet supported")
is KotlinClassMetadata.MultiFileClassFacade -> println("Metadata for MultiFileClassFacade not yet supported")
is KotlinClassMetadata.MultiFileClassPart -> println("Metadata for MultiFileClassPart not yet supported")
is KotlinClassMetadata.Unknown -> println("Unknown kind")
else -> println("Unexpected kind")
}
}
fun pr(indent: Int, s: String) {
println(" ".repeat(indent) + s)
}
class MyPackageVisitor(val indent: Int): KmPackageVisitor() {
override fun visitFunction(flags: Flags, name: String): KmFunctionVisitor? {
pr(indent, "=> Function; flags:$flags, name:$name")
return MyFunctionVisitor(indent + 1)
}
override fun visitProperty(flags: Flags, name: String, getterFlags: Flags, setterFlags: Flags): KmPropertyVisitor? {
pr(indent, "=> Properties not yet handled")
return null
}
override fun visitTypeAlias(flags: Flags, name: String): KmTypeAliasVisitor? {
pr(indent, "=> Type aliases not yet handled")
return null
}
override fun visitExtensions(type: KmExtensionType): KmPackageExtensionVisitor? {
pr(indent, "=> Package extensions; type:$type")
when (type) {
JvmPackageExtensionVisitor.TYPE -> return MyJvmPackageExtensionVisitor(indent + 1)
else -> {
pr(indent, "- Not yet handled")
return null
}
}
}
}
class MyFunctionVisitor(val indent: Int): KmFunctionVisitor() {
override fun visitTypeParameter(flags: Flags, name: String, id: Int, variance: KmVariance): KmTypeParameterVisitor? {
pr(indent, "=> Type parameter; flags:$flags, name:$name, id:$id, variance:$variance")
pr(indent, " -> Not yet handled")
return null
}
override fun visitReceiverParameterType(flags: Flags): KmTypeVisitor? {
pr(indent, "=> Receiver parameter type; flags:$flags")
pr(indent, " -> Not yet handled")
return null
}
override fun visitValueParameter(flags: Flags, name: String): KmValueParameterVisitor? {
pr(indent, "=> Value parameter; flags:$flags, name:$name")
return MyValueParameterVisitor(indent + 1)
}
override fun visitReturnType(flags: Flags): KmTypeVisitor? {
pr(indent, "=> Return type; flags:$flags")
return MyTypeVisitor(indent + 1)
}
override fun visitVersionRequirement(): KmVersionRequirementVisitor? {
pr(indent, "=> VersionRequirement not yet handled")
return null
}
override fun visitContract(): KmContractVisitor? {
pr(indent, "=> Contract not yet handled")
return null
}
override fun visitExtensions(type: KmExtensionType): KmFunctionExtensionVisitor? {
pr(indent, "=> Function extensions; type:$type")
when (type) {
JvmFunctionExtensionVisitor.TYPE -> return MyJvmFunctionExtensionVisitor(indent + 1)
else -> {
pr(indent, "- Not yet handled")
return null
}
}
}
}
class MyValueParameterVisitor(val indent: Int): KmValueParameterVisitor() {
override fun visitType(flags: Flags): KmTypeVisitor? {
pr(indent, "=> Type; flags:$flags")
return MyTypeVisitor(indent + 1)
}
override fun visitVarargElementType(flags: Flags): KmTypeVisitor? {
pr(indent, "=> VarargElementType not yet handled")
return null
}
override fun visitExtensions(type: KmExtensionType): KmValueParameterExtensionVisitor? {
pr(indent, "=> Value parameter extensions; type:$type; not yet handled")
return null
}
}
class MyTypeVisitor(val indent: Int): KmTypeVisitor() {
override fun visitClass(name: ClassName) {
pr(indent, "=> Class; name:$name")
}
override fun visitTypeAlias(name: ClassName) {
pr(indent, "=> Type alias; name:$name")
}
override fun visitTypeParameter(id: Int) {
pr(indent, "=> Type parameter; id:$id")
}
override fun visitArgument(flags: Flags, variance: KmVariance): KmTypeVisitor? {
pr(indent, "=> Argument; flags:$flags, variance:$variance")
return MyTypeVisitor(indent + 1)
}
override fun visitStarProjection() {
pr(indent, "=> Star projection")
}
override fun visitAbbreviatedType(flags: Flags): KmTypeVisitor? {
pr(indent, "=> AbbreviatedType not yet handled")
return null
}
override fun visitOuterType(flags: Flags): KmTypeVisitor? {
pr(indent, "=> OuterType not yet handled")
return null
}
override fun visitFlexibleTypeUpperBound(flags: Flags, typeFlexibilityId: String?): KmTypeVisitor? {
pr(indent, "=> FlexibleTypeUpperBound not yet handled")
return null
}
override fun visitExtensions(type: KmExtensionType): KmTypeExtensionVisitor? {
pr(indent, "=> Type extensions; type:$type")
when (type) {
JvmTypeExtensionVisitor.TYPE -> return MyJvmTypeExtensionVisitor(indent + 1)
else -> {
pr(indent, "- Not yet handled")
return null
}
}
}
}
class MyJvmTypeExtensionVisitor(val indent: Int): JvmTypeExtensionVisitor() {
override fun visit(isRaw: Boolean) {
pr(indent, "=> isRaw:$isRaw")
}
override fun visitAnnotation(annotation: KmAnnotation) {
pr(indent, "=> Annotation; annotation:$annotation")
}
}
class MyJvmPackageExtensionVisitor(val indent: Int): JvmPackageExtensionVisitor() {
override fun visitLocalDelegatedProperty(flags: Flags, name: String, getterFlags: Flags, setterFlags: Flags): KmPropertyVisitor? {
pr(indent, "=> Local delegate not yet handled")
return null
}
override fun visitModuleName(name: String) {
pr(indent, "=> Module name; name:$name")
}
}
class MyJvmFunctionExtensionVisitor(val indent: Int): JvmFunctionExtensionVisitor() {
override fun visit(signature: JvmMethodSignature?) {
pr(indent, "=> signature:$signature")
}
override fun visitLambdaClassOriginName(internalName: String) {
pr(indent, "=> LambdaClassOriginName; internalName:$internalName")
}
}

12
java/kotlin-extractor/.gitignore vendored Normal file
View File

@@ -0,0 +1,12 @@
.classpath
.gradle
.idea
.project
.settings
bin/
build/
gradle/
gradlew
gradlew.bat
src/main/kotlin/KotlinExtractorDbScheme.kt

View File

@@ -0,0 +1,53 @@
plugins {
id 'org.jetbrains.kotlin.jvm' version "${kotlinVersion}"
id 'org.jetbrains.dokka' version '1.4.32'
}
group 'com.github.codeql'
version '0.0.1'
dependencies {
implementation "org.jetbrains.kotlin:kotlin-stdlib"
compileOnly("org.jetbrains.kotlin:kotlin-compiler")
}
repositories {
mavenCentral()
}
tasks.withType(org.jetbrains.kotlin.gradle.tasks.KotlinCompile).configureEach {
kotlinOptions {
jvmTarget = "1.8"
freeCompilerArgs += "-Xopt-in=kotlin.RequiresOptIn"
// enable the below for building with kotlinVersion=1.4.32:
// languageVersion = "1.5"
}
}
sourceSets {
main {
kotlin {
// change the excludes for building with other versions:
excludes = [
"utils/versions/v_1_4_32/*.kt",
"utils/versions/v_1_5_31/*.kt",
"utils/versions/v_1_6_10/*.kt"]
}
}
}
jar {
archiveName = "${OUTPUT_JAR_NAME}"
}
task getHomeDir {
doLast {
println gradle.gradleHomeDir
}
}
java {
toolchain {
languageVersion.set(JavaLanguageVersion.of(8))
}
}

206
java/kotlin-extractor/build.py Executable file
View File

@@ -0,0 +1,206 @@
#!/usr/bin/env python3
import argparse
import kotlin_plugin_versions
import glob
import platform
import re
import subprocess
import shutil
import os
import os.path
import sys
import shlex
def parse_args():
parser = argparse.ArgumentParser()
parser.add_argument('--dependencies', default='../../../resources/kotlin-dependencies',
help='Folder containing the dependencies')
parser.add_argument('--many', action='store_true',
help='Build for all versions/kinds')
parser.add_argument('--single', action='store_false',
dest='many', help='Build for a single version/kind')
return parser.parse_args()
args = parse_args()
def is_windows():
'''Whether we appear to be running on Windows'''
if platform.system() == 'Windows':
return True
if platform.system().startswith('CYGWIN'):
return True
return False
kotlinc = 'kotlinc.bat' if is_windows() else 'kotlinc'
javac = 'javac'
kotlin_dependency_folder = args.dependencies
def quote_for_batch(arg):
if ';' in arg or '=' in arg:
if '"' in arg:
raise Exception('Need to quote something containing a quote')
return '"' + arg + '"'
else:
return arg
def run_process(cmd, capture_output=False):
print("Running command: " + shlex.join(cmd))
if is_windows():
cmd = ' '.join(map(quote_for_batch, cmd))
print("Converted to Windows command: " + cmd)
try:
return subprocess.run(cmd, check=True, capture_output=capture_output)
except subprocess.CalledProcessError as e:
print("In: " + os.getcwd(), file=sys.stderr)
shell_cmd = cmd if is_windows() else shlex.join(cmd)
print("Command failed: " + shell_cmd, file=sys.stderr)
if capture_output:
print("stdout output:\n" + e.stdout.decode(encoding='UTF-8',
errors='replace'), file=sys.stderr)
print("stderr output:\n" + e.stderr.decode(encoding='UTF-8',
errors='replace'), file=sys.stderr)
raise e
def compile_to_dir(srcs, classpath, java_classpath, output):
# Use kotlinc to compile .kt files:
run_process([kotlinc,
# kotlinc can default to 256M, which isn't enough when we are extracting the build
'-J-Xmx2G',
'-Xopt-in=kotlin.RequiresOptIn',
'-d', output,
'-module-name', 'codeql-kotlin-extractor',
'-no-reflect', '-no-stdlib',
'-jvm-target', '1.8',
'-classpath', classpath] + srcs)
# Use javac to compile .java files, referencing the Kotlin class files:
run_process([javac,
'-d', output,
'-source', '8', '-target', '8',
'-classpath', os.path.pathsep.join([output, classpath, java_classpath])] + [s for s in srcs if s.endswith(".java")])
def compile_to_jar(srcs, classpath, java_classpath, output):
builddir = 'build/classes'
if os.path.exists(builddir):
shutil.rmtree(builddir)
os.makedirs(builddir)
compile_to_dir(srcs, classpath, java_classpath, builddir)
run_process(['jar', 'cf', output,
'-C', builddir, '.',
'-C', 'src/main/resources', 'META-INF'])
shutil.rmtree(builddir)
def find_sources(path):
return glob.glob(path + '/**/*.kt', recursive=True) + glob.glob(path + '/**/*.java', recursive=True)
def get_kotlin_lib_folder():
x = run_process([kotlinc, '-version', '-verbose'], capture_output=True)
output = x.stderr.decode(encoding='UTF-8', errors='strict')
m = re.match(
r'.*\nlogging: using Kotlin home directory ([^\n]+)\n.*', output)
if m is None:
raise Exception('Cannot determine kotlinc home directory')
kotlin_home = m.group(1)
print("Kotlin home directory: " + kotlin_home)
return kotlin_home + '/lib'
def get_gradle_lib_folder():
x = run_process(['gradle', 'getHomeDir'], capture_output=True)
output = x.stdout.decode(encoding='UTF-8', errors='strict')
m = re.search(r'(?m)^> Task :getHomeDir\n([^\n]+)$', output)
if m is None:
print("gradle getHomeDir output:\n" + output, file=sys.stderr)
raise Exception('Cannot determine gradle home directory')
gradle_home = m.group(1)
print("Gradle home directory: " + gradle_home)
return gradle_home + '/lib'
def find_jar(path, pattern):
result = glob.glob(path + '/' + pattern + '*.jar')
if len(result) == 0:
raise Exception('Cannot find jar file %s under path %s' %
(pattern, path))
return result
def patterns_to_classpath(path, patterns):
result = []
for pattern in patterns:
result += find_jar(path, pattern)
return os.path.pathsep.join(result)
def transform_to_embeddable(srcs):
# replace imports in files:
for src in srcs:
with open(src, 'r') as f:
content = f.read()
content = content.replace('import com.intellij',
'import org.jetbrains.kotlin.com.intellij')
with open(src, 'w') as f:
f.write(content)
def compile(jars, java_jars, dependency_folder, transform_to_embeddable, output, tmp_dir, version):
classpath = patterns_to_classpath(dependency_folder, jars)
java_classpath = patterns_to_classpath(dependency_folder, java_jars)
if os.path.exists(tmp_dir):
shutil.rmtree(tmp_dir)
shutil.copytree('src', tmp_dir)
for v in kotlin_plugin_versions.many_versions:
if v != version:
shutil.rmtree(
tmp_dir + '/main/kotlin/utils/versions/v_' + v.replace('.', '_'))
srcs = find_sources(tmp_dir)
transform_to_embeddable(srcs)
compile_to_jar(srcs, classpath, java_classpath, output)
shutil.rmtree(tmp_dir)
def compile_embeddable(version):
compile(['kotlin-stdlib-' + version, 'kotlin-compiler-embeddable-' + version],
['kotlin-stdlib-' + version],
kotlin_dependency_folder,
transform_to_embeddable,
'codeql-extractor-kotlin-embeddable-%s.jar' % (version),
'build/temp_src',
version)
def compile_standalone(version):
compile(['kotlin-stdlib-' + version, 'kotlin-compiler-' + version],
['kotlin-stdlib-' + version],
kotlin_dependency_folder,
lambda srcs: None,
'codeql-extractor-kotlin-standalone-%s.jar' % (version),
'build/temp_src',
version)
if args.many:
for version in kotlin_plugin_versions.many_versions:
compile_standalone(version)
compile_embeddable(version)
else:
compile_standalone(kotlin_plugin_versions.get_single_version())

View File

@@ -0,0 +1,183 @@
#!/usr/bin/env python3
import re
import sys
enums = {}
unions = {}
tables = {}
dbscheme = sys.argv[1] if len(sys.argv) >= 2 else '../ql/lib/config/semmlecode.dbscheme'
def parse_dbscheme(filename):
with open(filename, 'r') as f:
dbscheme = f.read()
# Remove comments
dbscheme = re.sub(r'/\*.*?\*/', '', dbscheme, flags=re.DOTALL)
dbscheme = re.sub(r'//[^\r\n]*', '', dbscheme)
# kind enums
for name, kind, body in re.findall(r'case\s+@([^.\s]*)\.([^.\s]*)\s+of\b(.*?);',
dbscheme,
flags=re.DOTALL):
mapping = []
for num, typ in re.findall(r'(\d+)\s*=\s*@(\S+)', body):
mapping.append((int(num), typ))
enums[name] = (kind, mapping)
# unions
for name, rhs in re.findall(r'@(\w+)\s*=\s*(@\w+(?:\s*\|\s*@\w+)*)',
dbscheme,
flags=re.DOTALL):
typs = re.findall(r'@(\w+)', rhs)
unions[name] = typs
# tables
for relname, body in re.findall('\n([\w_]+)(\([^)]*\))',
dbscheme,
flags=re.DOTALL):
columns = list(re.findall('(\S+)\s*:\s*([^\s,]+)(?:\s+(ref)|)', body))
tables[relname] = columns
parse_dbscheme(dbscheme)
type_aliases = {}
for alias, typs in unions.items():
if len(typs) == 1:
real = typs[0]
if real in type_aliases:
real = type_aliases[real]
type_aliases[alias] = real
def unalias(t):
return type_aliases.get(t, t)
type_leaf = set()
type_union = {}
for name, (kind, mapping) in enums.items():
s = set()
for num, typ in mapping:
s.add(typ)
type_leaf.add(typ)
type_union[name] = s
for name, typs in unions.items():
if name not in type_aliases:
type_union[name] = set(map(unalias, typs))
for relname, columns in tables.items():
for _, db_type, ref in columns:
if db_type[0] == '@' and ref == '':
db_type_name = db_type[1:]
if db_type_name not in enums:
type_leaf.add(db_type_name)
type_union_of_leaves = {}
def to_leaves(t):
if t not in type_union_of_leaves:
xs = type_union[t]
leaves = set()
for x in xs:
if x in type_leaf:
leaves.add(x)
else:
to_leaves(x)
leaves.update(type_union_of_leaves[x])
type_union_of_leaves[t] = leaves
for t in type_union:
to_leaves(t)
supertypes = {}
for t in type_leaf:
supers = set()
for sup, s in type_union_of_leaves.items():
if t in s:
supers.add(sup)
supertypes[t] = supers
for t, leaves in type_union_of_leaves.items():
supers = set()
for sup, s in type_union_of_leaves.items():
if t != sup and leaves.issubset(s):
supers.add(sup)
supertypes[t] = supers
def upperFirst(string):
return string[0].upper() + string[1:]
def genTable(kt, relname, columns, enum = None, kind = None, num = None, typ = None):
kt.write('fun TrapWriter.write' + upperFirst(relname))
if kind is not None:
kt.write('_' + typ)
kt.write('(')
for colname, db_type, _ in columns:
if colname != kind:
kt.write(colname + ': ')
if db_type == 'int':
kt.write('Int')
elif db_type == 'float':
kt.write('Double')
elif db_type == 'string':
kt.write('String')
elif db_type == 'date':
kt.write('Date')
elif db_type == 'boolean':
kt.write('Boolean')
elif db_type[0] == '@':
label = db_type[1:]
if label == enum:
label = typ
kt.write('Label<out Db' + upperFirst(label) + '>')
else:
raise Exception('Bad db_type: ' + db_type)
kt.write(', ')
kt.write(') {\n')
kt.write(' this.writeTrap("' + relname + '(')
comma = ''
for colname, db_type, _ in columns:
kt.write(comma)
if colname == kind:
kt.write(str(num))
elif db_type == 'string':
kt.write('\\"${this.escapeTrapString(this.truncateString(' + colname + '))}\\"')
elif db_type == 'date':
kt.write('D\\"${' + colname + '}\\"')
else:
kt.write('$' + colname)
comma = ', '
kt.write(')\\n")\n')
kt.write('}\n')
with open('src/main/kotlin/KotlinExtractorDbScheme.kt', 'w') as kt:
kt.write('/* Generated by ' + sys.argv[0] + ': Do not edit manually. */\n')
kt.write('package com.github.codeql\n')
kt.write('import java.util.Date\n')
for relname, columns in tables.items():
enum = None
for _, db_type, ref in columns:
if db_type[0] == '@' and ref == '':
db_type_name = db_type[1:]
if db_type_name in enums:
enum = db_type_name
if enum is None:
genTable(kt, relname, columns)
else:
(kind, mapping) = enums[enum]
for num, typ in mapping:
genTable(kt, relname, columns, enum, kind, num, typ)
for typ in sorted(supertypes):
kt.write('sealed interface Db' + upperFirst(typ))
# Sorting makes the output deterministic.
names = sorted(supertypes[typ])
if names:
kt.write(': ')
kt.write(', '.join(map(lambda name: 'Db' + upperFirst(name), names)))
kt.write('\n')
for alias in sorted(type_aliases):
kt.write('typealias Db' + upperFirst(alias) + ' = Db' + upperFirst(type_aliases[alias]) + '\n')

View File

@@ -0,0 +1,8 @@
kotlin.code.style=official
kotlinVersion=1.6.20
GROUP=com.github.codeql
VERSION_NAME=0.0.1
POM_DESCRIPTION=CodeQL Kotlin extractor
OUTPUT_JAR_NAME=codeql-extractor-kotlin.jar

View File

@@ -0,0 +1,62 @@
import platform
import re
import subprocess
import sys
def is_windows():
'''Whether we appear to be running on Windows'''
if platform.system() == 'Windows':
return True
if platform.system().startswith('CYGWIN'):
return True
return False
def version_tuple_to_string(version):
return f'{version[0]}.{version[1]}.{version[2]}'
def version_string_to_tuple(version):
m = re.match(r'([0-9]+)\.([0-9]+)\.([0-9]+)', version)
return tuple([int(m.group(i)) for i in range(1, 4)])
many_versions = [ '1.4.32', '1.5.31', '1.6.10', '1.6.20' ]
many_versions_tuples = [version_string_to_tuple(v) for v in many_versions]
def get_single_version(fakeVersionOutput = None):
# TODO: `shell=True` is a workaround to get CI working on Windows. It breaks the build on Linux.
versionOutput = subprocess.run(['kotlinc', '-version'], capture_output=True, text=True, shell=is_windows()).stderr if fakeVersionOutput is None else fakeVersionOutput
m = re.match(r'.* kotlinc-jvm ([0-9]+\.[0-9]+\.[0-9]+) .*', versionOutput)
if m is None:
raise Exception('Cannot detect version of kotlinc (got ' + str(versionOutput) + ')')
current_version = version_string_to_tuple(m.group(1))
matching_minor_versions = [ version for version in many_versions_tuples if version[0:2] == current_version[0:2] ]
if len(matching_minor_versions) == 0:
raise Exception(f'Cannot find a matching minor version for kotlinc version {current_version} (got {versionOutput}; know about {str(many_versions)})')
matching_minor_versions.sort()
for version in matching_minor_versions:
if version >= current_version:
return version_tuple_to_string(version)
return version_tuple_to_string(matching_minor_versions[-1])
raise Exception(f'No suitable kotlinc version found for {current_version} (got {versionOutput}; know about {str(many_versions)})')
def get_latest_url():
version = many_versions[-1]
url = 'https://github.com/JetBrains/kotlin/releases/download/v' + version + '/kotlin-compiler-' + version + '.zip'
return url
if __name__ == "__main__":
args = sys.argv
if len(args) < 2:
raise Exception("Bad arguments")
command = args[1]
if command == 'latest-url':
print(get_latest_url())
elif command == 'single-version':
print(get_single_version(*args[2:]))
else:
raise Exception("Unknown command: " + command)

View File

@@ -0,0 +1,8 @@
pluginManagement {
repositories {
mavenCentral()
gradlePluginPortal()
}
}
rootProject.name = 'codeql-kotlin-extractor'

View File

@@ -0,0 +1,628 @@
package com.semmle.extractor.java;
import java.lang.reflect.*;
import java.io.File;
import java.io.IOException;
import java.util.Arrays;
import java.util.LinkedHashMap;
import java.util.Map;
import java.util.Objects;
import java.util.regex.Pattern;
import com.github.codeql.Logger;
import static com.github.codeql.ClassNamesKt.getIrDeclBinaryName;
import static com.github.codeql.ClassNamesKt.getIrClassVirtualFile;
import org.jetbrains.kotlin.ir.declarations.IrClass;
import com.intellij.openapi.vfs.VirtualFile;
import org.jetbrains.kotlin.ir.declarations.IrDeclaration;
import org.jetbrains.kotlin.ir.declarations.IrDeclarationWithName;
import org.jetbrains.org.objectweb.asm.ClassVisitor;
import org.jetbrains.org.objectweb.asm.ClassReader;
import org.jetbrains.org.objectweb.asm.Opcodes;
import com.semmle.util.concurrent.LockDirectory;
import com.semmle.util.concurrent.LockDirectory.LockingMode;
import com.semmle.util.exception.CatastrophicError;
import com.semmle.util.exception.NestedError;
import com.semmle.util.exception.ResourceError;
import com.semmle.util.extraction.PopulationSpecFile;
import com.semmle.util.extraction.SpecFileEntry;
import com.semmle.util.files.FileUtil;
import com.semmle.util.io.WholeIO;
import com.semmle.util.process.Env;
import com.semmle.util.process.Env.Var;
import com.semmle.util.trap.dependencies.TrapDependencies;
import com.semmle.util.trap.dependencies.TrapSet;
import com.semmle.util.trap.pathtransformers.PathTransformer;
public class OdasaOutput {
// either these are set ...
private final File trapFolder;
private final File sourceArchiveFolder;
// ... or this one is set
private final PopulationSpecFile specFile;
private File currentSourceFile;
private TrapSet trapsCreated;
private TrapDependencies trapDependenciesForSource;
private SpecFileEntry currentSpecFileEntry;
// should origin tracking be used?
private final boolean trackClassOrigins;
private final Logger log;
/** DEBUG only: just use the given file as the root for TRAP, source archive etc */
OdasaOutput(File outputRoot, Logger log) {
this.trapFolder = new File(outputRoot, "trap");
this.sourceArchiveFolder = new File(outputRoot, "src_archive");
this.specFile = null;
this.trackClassOrigins = false;
this.log = log;
}
public OdasaOutput(boolean trackClassOrigins, Logger log) {
String trapFolderVar = Env.systemEnv().getFirstNonEmpty("CODEQL_EXTRACTOR_JAVA_TRAP_DIR", Var.TRAP_FOLDER.name());
if (trapFolderVar != null) {
String sourceArchiveVar = Env.systemEnv().getFirstNonEmpty("CODEQL_EXTRACTOR_JAVA_SOURCE_ARCHIVE_DIR", Var.SOURCE_ARCHIVE.name());
if (sourceArchiveVar == null)
throw new ResourceError(Var.TRAP_FOLDER + " was set to '" + trapFolderVar + "', but "
+ Var.SOURCE_ARCHIVE + " was not set");
this.trapFolder = new File(trapFolderVar);
this.sourceArchiveFolder = new File(sourceArchiveVar);
this.specFile = null;
} else {
this.trapFolder = null;
this.sourceArchiveFolder = null;
String specFileVar = Env.systemEnv().get(Var.ODASA_JAVA_LAYOUT);
if (specFileVar == null)
throw new ResourceError("Neither " + Var.TRAP_FOLDER + " nor " + Var.ODASA_JAVA_LAYOUT + " was set");
this.specFile = new PopulationSpecFile(new File(specFileVar));
}
this.trackClassOrigins = trackClassOrigins;
this.log = log;
}
public File getTrapFolder() {
return trapFolder;
}
public boolean getTrackClassOrigins() {
return trackClassOrigins;
}
/**
* Set the source file that is currently being processed. This may affect
* things like trap and source archive directories, and persists as a
* setting until this method is called again.
* @param f the current source file
*/
public void setCurrentSourceFile(File f) {
currentSourceFile = f;
currentSpecFileEntry = entryFor();
trapsCreated = new TrapSet();
trapsCreated.addSource(PathTransformer.std().fileAsDatabaseString(f));
trapDependenciesForSource = null;
}
/** The output paths for that file, or null if it shouldn't be included */
private SpecFileEntry entryFor() {
if (specFile != null)
return specFile.getEntryFor(currentSourceFile);
else
return new SpecFileEntry(trapFolder, sourceArchiveFolder,
Arrays.asList(PathTransformer.std().fileAsDatabaseString(currentSourceFile)));
}
/*
* Trap sets and dependencies.
*/
public void writeTrapSet() {
trapsCreated.save(trapSetFor(currentSourceFile).toPath());
}
private File trapSetFor(File file) {
return FileUtil.appendAbsolutePath(
currentSpecFileEntry.getTrapFolder(), PathTransformer.std().fileAsDatabaseString(file) + ".set");
}
public void addDependency(IrDeclaration sym, String signature) {
String path = trapFilePathForDecl(sym, signature);
trapDependenciesForSource.addDependency(path);
}
/*
* Source archive.
*/
/**
* Write the given source file to the right source archive, encoded in UTF-8,
* or do nothing if the file shouldn't be populated.
*/
public void writeCurrentSourceFileToSourceArchive(String contents) {
if (currentSpecFileEntry != null && currentSpecFileEntry.getSourceArchivePath() != null) {
File target = sourceArchiveFileFor(currentSourceFile);
target.getParentFile().mkdirs();
new WholeIO().write(target, contents);
}
}
public void writeFileToSourceArchive(File srcFile) {
File target = sourceArchiveFileFor(srcFile);
target.getParentFile().mkdirs();
String contents = new WholeIO().strictread(srcFile);
new WholeIO().write(target, contents);
}
private File sourceArchiveFileFor(File file) {
return FileUtil.appendAbsolutePath(currentSpecFileEntry.getSourceArchivePath(),
PathTransformer.std().fileAsDatabaseString(file));
}
/*
* Trap file names and paths.
*/
private static final String CLASSES_DIR = "classes";
private static final String JARS_DIR = "jars";
private static final String MODULES_DIR = "modules";
private File getTrapFileForCurrentSourceFile() {
if (currentSpecFileEntry == null)
return null;
return trapFileFor(currentSourceFile);
}
private File getTrapFileForJarFile(File jarFile) {
if (!jarFile.getAbsolutePath().endsWith(".jar"))
return null;
return FileUtil.appendAbsolutePath(
currentSpecFileEntry.getTrapFolder(),
JARS_DIR + "/" + PathTransformer.std().fileAsDatabaseString(jarFile) + ".trap.gz");
}
private File getTrapFileForModule(String moduleName) {
return FileUtil.appendAbsolutePath(
currentSpecFileEntry.getTrapFolder(),
MODULES_DIR + "/" + moduleName + ".trap.gz");
}
private File trapFileFor(File file) {
return FileUtil.appendAbsolutePath(currentSpecFileEntry.getTrapFolder(),
PathTransformer.std().fileAsDatabaseString(file) + ".trap.gz");
}
private File getTrapFileForDecl(IrDeclaration sym, String signature) {
if (currentSpecFileEntry == null)
return null;
return trapFileForDecl(sym, signature);
}
private File trapFileForDecl(IrDeclaration sym, String signature) {
return FileUtil.fileRelativeTo(currentSpecFileEntry.getTrapFolder(),
trapFilePathForDecl(sym, signature));
}
private final Map<String, String> memberTrapPaths = new LinkedHashMap<String, String>();
private static final Pattern dots = Pattern.compile(".", Pattern.LITERAL);
private String trapFilePathForDecl(IrDeclaration sym, String signature) {
String binaryName = getIrDeclBinaryName(sym);
String binaryNameWithSignature = binaryName + signature;
// TODO: Reinstate this?
//if (getTrackClassOrigins())
// classId += "-" + StringDigestor.digest(sym.getSourceFileId());
String result = memberTrapPaths.get(binaryNameWithSignature);
if (result == null) {
result = CLASSES_DIR + "/" +
dots.matcher(binaryName).replaceAll("/") +
signature +
".members" +
".trap.gz";
memberTrapPaths.put(binaryNameWithSignature, result);
}
return result;
}
/*
* Deletion of existing trap files.
*/
private void deleteTrapFileAndDependencies(IrDeclaration sym, String signature) {
File trap = trapFileForDecl(sym, signature);
if (trap.exists()) {
trap.delete();
File depFile = new File(trap.getParentFile(), trap.getName().replace(".trap.gz", ".dep"));
if (depFile.exists())
depFile.delete();
File metadataFile = new File(trap.getParentFile(), trap.getName().replace(".trap.gz", ".metadata"));
if (metadataFile.exists())
metadataFile.delete();
}
}
/*
* Trap writers.
*/
/**
* A {@link TrapFileManager} to output facts for the given source file,
* or <code>null</code> if the source file should not be populated.
*/
private TrapFileManager getTrapWriterForCurrentSourceFile() {
File trapFile = getTrapFileForCurrentSourceFile();
if (trapFile==null)
return null;
return trapWriter(trapFile, null, null);
}
/**
* Get a {@link TrapFileManager} to write members
* about a declaration, or <code>null</code> if the declaration shouldn't be populated.
*
* @param sym
* The declaration's symbol, including, in particular, its fully qualified
* binary class name.
* @param signature
* Any unique suffix needed to distinguish `sym` from other declarations with the same name.
* For functions for example, this means its parameter signature.
*/
private TrapFileManager getMembersWriterForDecl(IrDeclaration sym, String signature) {
File trap = getTrapFileForDecl(sym, signature);
if (trap==null)
return null;
TrapClassVersion currVersion = TrapClassVersion.fromSymbol(sym, log);
String shortName = sym instanceof IrDeclarationWithName ? ((IrDeclarationWithName)sym).getName().asString() : "(name unknown)";
if (trap.exists()) {
// Only re-write an existing trap file if we encountered a newer version of the same class.
TrapClassVersion trapVersion = readVersionInfo(trap);
if (!currVersion.isValid()) {
log.warn("Not rewriting trap file for: " + shortName + " " + trapVersion + " " + currVersion + " " + trap);
} else if (currVersion.newerThan(trapVersion)) {
log.trace("Rewriting trap file for: " + shortName + " " + trapVersion + " " + currVersion + " " + trap);
deleteTrapFileAndDependencies(sym, signature);
} else {
return null;
}
} else {
log.trace("Writing trap file for: " + shortName + " " + currVersion + " " + trap);
}
return trapWriter(trap, sym, signature);
}
private TrapFileManager trapWriter(File trapFile, IrDeclaration sym, String signature) {
if (!trapFile.getName().endsWith(".trap.gz"))
throw new CatastrophicError("OdasaOutput only supports writing to compressed trap files");
String relative = FileUtil.relativePath(trapFile, currentSpecFileEntry.getTrapFolder());
trapFile.getParentFile().mkdirs();
trapsCreated.addTrap(relative);
return concurrentWriter(trapFile, relative, log, sym, signature);
}
private TrapFileManager concurrentWriter(File trapFile, String relative, Logger log, IrDeclaration sym, String signature) {
if (trapFile.exists())
return null;
return new TrapFileManager(trapFile, relative, true, log, sym, signature);
}
public class TrapFileManager implements AutoCloseable {
private TrapDependencies trapDependenciesForClass;
private File trapFile;
private IrDeclaration sym;
private String signature;
private boolean hasError = false;
private TrapFileManager(File trapFile, String relative, boolean concurrentCreation, Logger log, IrDeclaration sym, String signature) {
trapDependenciesForClass = new TrapDependencies(relative);
this.trapFile = trapFile;
this.sym = sym;
this.signature = signature;
}
public File getFile() {
return trapFile;
}
public void addDependency(IrDeclaration dep, String signature) {
trapDependenciesForClass.addDependency(trapFilePathForDecl(dep, signature));
}
public void addDependency(IrClass c) {
addDependency(c, "");
}
public void close() {
if (hasError) {
return;
}
writeTrapDependencies(trapDependenciesForClass);
// Record major/minor version information for extracted class files.
// This is subsequently used to determine whether to re-extract (a newer version of) the same class.
File metadataFile = new File(trapFile.getAbsolutePath().replace(".trap.gz", ".metadata"));
try {
Map<String, String> versionMap = new LinkedHashMap<>();
TrapClassVersion tcv = TrapClassVersion.fromSymbol(sym, log);
versionMap.put(MAJOR_VERSION, String.valueOf(tcv.getMajorVersion()));
versionMap.put(MINOR_VERSION, String.valueOf(tcv.getMinorVersion()));
versionMap.put(LAST_MODIFIED, String.valueOf(tcv.getLastModified()));
versionMap.put(EXTRACTOR_NAME, tcv.getExtractorName());
FileUtil.writePropertiesCSV(metadataFile, versionMap);
} catch (IOException e) {
log.warn("Could not save trap metadata file: " + metadataFile.getAbsolutePath(), e);
}
}
private void writeTrapDependencies(TrapDependencies trapDependencies) {
String dep = trapDependencies.trapFile().replace(".trap.gz", ".dep");
trapDependencies.save(
currentSpecFileEntry.getTrapFolder().toPath().resolve(dep));
}
public void setHasError() {
hasError = true;
}
}
/*
* Trap file locking.
*/
/**
* <b>CAUTION</b>: to avoid the potential for deadlock between multiple concurrent extractor processes,
* only one source file {@link TrapLocker} may be open at any time, and the lock must be obtained
* <b>before</b> any <b>class</b> file lock.
*
* Trap file extensions (and paths) ensure that source and class file locks are distinct.
*
* @return a {@link TrapLocker} for the currently processed source file, which must have been
* previously set by a call to {@link OdasaOutput#setCurrentSourceFile(File)}.
*/
public TrapLocker getTrapLockerForCurrentSourceFile() {
return new TrapLocker((IrClass)null, null);
}
/**
* <b>CAUTION</b>: to avoid the potential for deadlock between multiple concurrent extractor processes,
* only one jar file {@link TrapLocker} may be open at any time, and the lock must be obtained
* <b>after</b> any <b>source</b> file lock. Only one jar or class file lock may be open at any time.
*
* Trap file extensions (and paths) ensure that source and jar file locks are distinct.
*
* @return a {@link TrapLocker} for the trap file corresponding to the given jar file.
*/
public TrapLocker getTrapLockerForJarFile(File jarFile) {
return new TrapLocker(jarFile);
}
/**
* <b>CAUTION</b>: to avoid the potential for deadlock between multiple concurrent extractor processes,
* only one module {@link TrapLocker} may be open at any time, and the lock must be obtained
* <b>after</b> any <b>source</b> file lock. Only one jar or class file or module lock may be open at any time.
*
* Trap file extensions (and paths) ensure that source and module file locks are distinct.
*
* @return a {@link TrapLocker} for the trap file corresponding to the given module.
*/
public TrapLocker getTrapLockerForModule(String moduleName) {
return new TrapLocker(moduleName);
}
/**
* <b>CAUTION</b>: to avoid the potential for deadlock between multiple concurrent extractor processes,
* only one class file {@link TrapLocker} may be open at any time, and the lock must be obtained
* <b>after</b> any <b>source</b> file lock. Only one jar or class file lock may be open at any time.
*
* Trap file extensions (and paths) ensure that source and class file locks are distinct.
*
* @return a {@link TrapLocker} for the trap file corresponding to the given class symbol.
*/
public TrapLocker getTrapLockerForDecl(IrDeclaration sym, String signature) {
return new TrapLocker(sym, signature);
}
public class TrapLocker implements AutoCloseable {
private final IrDeclaration sym;
private final File trapFile;
private final String signature;
private final boolean isNonSourceTrapFile;
private TrapLocker(IrDeclaration decl, String signature) {
this.sym = decl;
this.signature = signature;
if (sym==null) {
trapFile = getTrapFileForCurrentSourceFile();
} else {
trapFile = getTrapFileForDecl(sym, signature);
}
isNonSourceTrapFile = false;
}
private TrapLocker(File jarFile) {
sym = null;
signature = null;
trapFile = getTrapFileForJarFile(jarFile);
isNonSourceTrapFile = true;
}
private TrapLocker(String moduleName) {
sym = null;
signature = null;
trapFile = getTrapFileForModule(moduleName);
isNonSourceTrapFile = true;
}
public TrapFileManager getTrapFileManager() {
if (trapFile!=null) {
lockTrapFile(trapFile);
return getMembersWriterForDecl(sym, signature);
} else {
return null;
}
}
@Override
public void close() {
if (trapFile!=null) {
try {
unlockTrapFile(trapFile);
} catch (NestedError e) {
log.warn("Error unlocking trap file " + trapFile.getAbsolutePath(), e);
}
}
}
private LockDirectory getExtractorLockDir() {
return LockDirectory.instance(currentSpecFileEntry.getTrapFolder(), log);
}
private void lockTrapFile(File trapFile) {
getExtractorLockDir().blockingLock(LockingMode.Exclusive, trapFile, "Java extractor lock");
}
private void unlockTrapFile(File trapFile) {
boolean success = getExtractorLockDir().maybeUnlock(LockingMode.Exclusive, trapFile);
if (!success) {
log.warn("Trap file was not locked: " + trapFile);
}
}
}
/*
* Class version tracking.
*/
private static final String MAJOR_VERSION = "majorVersion";
private static final String MINOR_VERSION = "minorVersion";
private static final String LAST_MODIFIED = "lastModified";
private static final String EXTRACTOR_NAME = "extractorName";
private static class TrapClassVersion {
private int majorVersion;
private int minorVersion;
private long lastModified;
private String extractorName; // May be null if not given
public int getMajorVersion() {
return majorVersion;
}
public int getMinorVersion() {
return minorVersion;
}
public long getLastModified() {
return lastModified;
}
public String getExtractorName() { return extractorName; }
private TrapClassVersion(int majorVersion, int minorVersion, long lastModified, String extractorName) {
this.majorVersion = majorVersion;
this.minorVersion = minorVersion;
this.lastModified = lastModified;
this.extractorName = extractorName;
}
private boolean newerThan(TrapClassVersion tcv) {
// Classes being compiled from source have major version 0 but should take precedence
// over any classes with the same qualified name loaded from the classpath
// in previous or subsequent extractor invocations.
if (tcv.majorVersion==0)
return false;
else if (majorVersion==0)
return true;
// Always consider the Kotlin extractor superior to the Java extractor, because we may decode and extract
// Kotlin metadata that the Java extractor can't understand:
if(!Objects.equals(tcv.extractorName, extractorName)) {
if (Objects.equals(tcv.extractorName, "kotlin"))
return false;
if (Objects.equals(extractorName, "kotlin"))
return true;
}
// Otherwise, determine precedence in the following order:
// majorVersion, minorVersion, lastModified.
return tcv.majorVersion < majorVersion ||
(tcv.majorVersion == majorVersion && tcv.minorVersion < minorVersion) ||
(tcv.majorVersion == majorVersion && tcv.minorVersion == minorVersion &&
tcv.lastModified < lastModified);
}
private static TrapClassVersion fromSymbol(IrDeclaration sym, Logger log) {
VirtualFile vf = sym instanceof IrClass ? getIrClassVirtualFile((IrClass)sym) :
sym.getParent() instanceof IrClass ? getIrClassVirtualFile((IrClass)sym.getParent()) :
null;
if(vf == null)
return new TrapClassVersion(-1, 0, 0, null);
final int[] versionStore = new int[1];
try {
// Opcodes has fields called ASM4, ASM5, ...
// We want to use the latest one that there is.
Field asmField = null;
int asmNum = -1;
for(Field f : Opcodes.class.getDeclaredFields()) {
String name = f.getName();
if(name.startsWith("ASM")) {
try {
int i = Integer.parseInt(name.substring(3));
if(i > asmNum) {
asmNum = i;
asmField = f;
}
} catch (NumberFormatException ex) {
// Do nothing; this field doesn't have a name of the right format
}
}
}
int asm = asmField.getInt(null);
ClassVisitor versionGetter = new ClassVisitor(asm) {
public void visit(int version, int access, java.lang.String name, java.lang.String signature, java.lang.String superName, java.lang.String[] interfaces) {
versionStore[0] = version;
}
};
(new ClassReader(vf.contentsToByteArray())).accept(versionGetter, ClassReader.SKIP_CODE | ClassReader.SKIP_DEBUG | ClassReader.SKIP_FRAMES);
return new TrapClassVersion(versionStore[0] & 0xffff, versionStore[0] >> 16, vf.getTimeStamp(), "kotlin");
}
catch(IllegalAccessException e) {
log.warn("Failed to read class file version information", e);
return new TrapClassVersion(-1, 0, 0, null);
}
catch(IOException e) {
log.warn("Failed to read class file version information", e);
return new TrapClassVersion(-1, 0, 0, null);
}
}
private boolean isValid() {
return majorVersion>=0 && minorVersion>=0;
}
@Override
public String toString() {
return majorVersion + "." + minorVersion + "-" + lastModified + "-" + extractorName;
}
}
private TrapClassVersion readVersionInfo(File trap) {
int majorVersion = 0;
int minorVersion = 0;
long lastModified = 0;
String extractorName = null;
File metadataFile = new File(trap.getAbsolutePath().replace(".trap.gz", ".metadata"));
if (metadataFile.exists()) {
Map<String,String> metadataMap = FileUtil.readPropertiesCSV(metadataFile);
try {
majorVersion = Integer.parseInt(metadataMap.get(MAJOR_VERSION));
minorVersion = Integer.parseInt(metadataMap.get(MINOR_VERSION));
lastModified = Long.parseLong(metadataMap.get(LAST_MODIFIED));
extractorName = metadataMap.get(EXTRACTOR_NAME);
} catch (NumberFormatException e) {
log.warn("Invalid class file version for " + trap.getAbsolutePath(), e);
}
} else {
log.warn("Trap metadata file does not exist: " + metadataFile.getAbsolutePath());
}
return new TrapClassVersion(majorVersion, minorVersion, lastModified, extractorName);
}
}

View File

@@ -0,0 +1,152 @@
package com.semmle.extractor.java;
import java.io.File;
import java.util.HashMap;
import java.util.Map;
import com.github.codeql.Label;
import com.github.codeql.DbFile;
import com.github.codeql.TrapWriter;
import com.github.codeql.KotlinExtractorDbSchemeKt;
import com.semmle.util.exception.CatastrophicError;
import com.semmle.util.files.FileUtil;
import com.semmle.util.trap.pathtransformers.PathTransformer;
import kotlin.Unit;
public class PopulateFile {
private TrapWriter tw;
private PathTransformer transformer;
public PopulateFile(TrapWriter tw) {
this.tw = tw;
this.transformer = PathTransformer.std();
}
private static final String[] keyReplacementMap = new String[127];
static {
keyReplacementMap['&'] = "&amp;";
keyReplacementMap['{'] = "&lbrace;";
keyReplacementMap['}'] = "&rbrace;";
keyReplacementMap['"'] = "&quot;";
keyReplacementMap['@'] = "&commat;";
keyReplacementMap['#'] = "&num;";
}
/**
* Escape a string for use in a TRAP key, by replacing special characters with HTML entities.
* <p>
* The given string cannot contain any sub-keys, as the delimiters <code>{</code> and <code>}</code>
* are escaped.
* <p>
* To construct a key containing both sub-keys and arbitrary input data, escape the individual parts of
* the key rather than the key as a whole, for example:
* <pre>
* "foo;{" + label.toString() + "};" + escapeKey(data)
* </pre>
*/
public static String escapeKey(String s) {
StringBuilder sb = null;
int lastIndex = 0;
for (int i = 0; i < s.length(); ++i) {
char ch = s.charAt(i);
switch (ch) {
case '&':
case '{':
case '}':
case '"':
case '@':
case '#':
if (sb == null) {
sb = new StringBuilder();
}
sb.append(s, lastIndex, i);
sb.append(keyReplacementMap[ch]);
lastIndex = i + 1;
break;
}
}
if (sb != null) {
sb.append(s, lastIndex, s.length());
return sb.toString();
} else {
return s;
}
}
public Label populateFile(File absoluteFile) {
return getFileLabel(absoluteFile, true);
}
public Label<DbFile> getFileLabel(File absoluteFile, boolean populateTables) {
String databasePath = transformer.fileAsDatabaseString(absoluteFile);
Label result = tw.<DbFile>getLabelFor("@\"" + escapeKey(databasePath) + ";sourcefile" + "\"", label -> {
if(populateTables) {
KotlinExtractorDbSchemeKt.writeFiles(tw, label, databasePath);
populateParents(new File(databasePath), label);
}
return Unit.INSTANCE;
});
return result;
}
private Label addFolderTuple(String databasePath) {
Label result = tw.getLabelFor("@\"" + escapeKey(databasePath) + ";folder" + "\"");
KotlinExtractorDbSchemeKt.writeFolders(tw, result, databasePath);
return result;
}
/**
* Populate the parents of an already-normalised file. The path transformers
* and canonicalisation of {@link PathTransformer#fileAsDatabaseString(File)} will not be
* re-applied to this, so it should only be called after proper normalisation
* has happened. It will fill in all parent folders in the current TRAP file.
*/
private void populateParents(File normalisedFile, Label label) {
File parent = normalisedFile.getParentFile();
if (parent == null) return;
Label parentLabel = addFolderTuple(FileUtil.normalisePath(parent.getPath()));
populateParents(parent, parentLabel);
KotlinExtractorDbSchemeKt.writeContainerparent(tw, parentLabel, label);
}
public Label relativeFileId(File jarFile, String pathWithinJar) {
return getFileInJarLabel(jarFile, pathWithinJar, true);
}
public Label<DbFile> getFileInJarLabel(File jarFile, String pathWithinJar, boolean populateTables) {
if (pathWithinJar.contains("\\"))
throw new CatastrophicError("Invalid jar path: '" + pathWithinJar + "' should not contain '\\'.");
String databasePath = transformer.fileAsDatabaseString(jarFile);
if(!populateTables)
return tw.getLabelFor("@\"" + databasePath + "/" + pathWithinJar + ";jarFile\"");
Label jarFileId = this.populateFile(jarFile);
Label jarFileLocation = tw.getLocation(jarFileId, 0, 0, 0, 0);
KotlinExtractorDbSchemeKt.writeHasLocation(tw, jarFileId, jarFileLocation);
StringBuilder fullName = new StringBuilder(databasePath);
String[] split = pathWithinJar.split("/");
Label current = jarFileId;
for (int i = 0; i < split.length; i++) {
String shortName = split[i];
fullName.append("/");
fullName.append(shortName);
Label fileId = tw.getLabelFor("@\"" + fullName + ";jarFile" + "\"");
boolean file = i == split.length - 1;
if (file) {
KotlinExtractorDbSchemeKt.writeFiles(tw, fileId, fullName.toString());
} else {
KotlinExtractorDbSchemeKt.writeFolders(tw, fileId, fullName.toString());
}
KotlinExtractorDbSchemeKt.writeContainerparent(tw, current, fileId);
current = fileId;
}
return current;
}
}

View File

@@ -0,0 +1,246 @@
package com.semmle.util.array;
import java.util.Arrays;
import java.util.Collections;
import java.util.LinkedHashSet;
import java.util.Set;
import com.semmle.util.basic.ObjectUtil;
/**
* Convenience methods for manipulating arrays.
*/
public class ArrayUtil
{
/**
* A number slightly smaller than the maximum length of an array on most vms.
* This matches the constant in ArrayList.
*/
public static final int MAX_ARRAY_LENGTH = Integer.MAX_VALUE - 8;
/**
* Comparator for primitive int values.
*/
public static interface IntComparator
{
/**
* Compare ints {@code a} and {@code b}, returning a negative value if {@code a} is 'less' than
* {@code b}, zero if they are equal, otherwise a positive value.
*/
public int compare (int a, int b);
}
/**
* Find the index of the first occurrence of the given {@code value} in the given {@code array},
* returning -1 if there is no such element.
*/
public static int findFirst(boolean[] array, boolean value)
{
for(int i=0; i<array.length; ++i) {
if (value == array[i])
return i;
}
return -1;
}
/**
* Find the index of the first occurrence of the given {@code value} in the given {@code array},
* returning -1 if there is no such element.
*/
public static int findFirst(byte[] array, byte value)
{
for(int i=0; i<array.length; ++i) {
if (value == array[i])
return i;
}
return -1;
}
/**
* Find the index of the first occurrence of the given {@code value} in the given {@code array},
* returning -1 if there is no such element.
*/
public static int findFirst(char[] array, char value)
{
for(int i=0; i<array.length; ++i) {
if (value == array[i])
return i;
}
return -1;
}
/**
* Find the index of the first occurrence of the given {@code value} in the given {@code array},
* returning -1 if there is no such element.
*/
public static int findFirst(double[] array, double value)
{
for(int i=0; i<array.length; ++i) {
if (value == array[i])
return i;
}
return -1;
}
/**
* Find the index of the first occurrence of the given {@code value} in the given {@code array},
* returning -1 if there is no such element.
*/
public static int findFirst(float[] array, float value)
{
for(int i=0; i<array.length; ++i) {
if (value == array[i])
return i;
}
return -1;
}
/**
* Find the index of the first occurrence of the given {@code value} in the given {@code array},
* returning -1 if there is no such element.
*/
public static int findFirst(int[] array, int value)
{
for(int i=0; i<array.length; ++i) {
if (value == array[i])
return i;
}
return -1;
}
/**
* Find the index of the first occurrence of the given {@code value} in the given {@code array},
* returning -1 if there is no element for which {@code value.equals(element)} is true.
*
* @see #findFirstSame(Object[], Object)
*/
public static <T> int findFirst(T[] array, T value)
{
for(int i=0; i<array.length; ++i) {
if (ObjectUtil.equals(value, array[i])) {
return i;
}
}
return -1;
}
/**
* Find the index of the first occurrence of the given {@code value} in the given {@code array},
* returning -1 if there is no element for which {@code value == element}.
*
* @see #findFirstSame(Object[], Object)
*/
public static <T> int findFirstSame(T[] array, T value)
{
for(int i=0; i<array.length; ++i) {
if (value == array[i])
return i;
}
return -1;
}
/**
* Query whether the given {@code array} contains any element equal to the given {@code element}.
*/
public static boolean contains (int element, int ... array)
{
return findFirst(array, element) != -1;
}
/**
* Query whether the given {@code array} contains any element equal to the given {@code element}.
*/
@SafeVarargs
public static <T> boolean contains (T element, T ... array)
{
return findFirst(array, element) != -1;
}
/**
* Construct a new array with length increased by one, containing all elements of a given array
* followed by an additional element.
*/
public static <T> T[] append (T[] array, T element)
{
array = Arrays.copyOf(array, array.length + 1);
array[array.length-1] = element;
return array;
}
/**
* Construct a new array containing the concatenation of the elements in a number of arrays.
*
* @param arrays The arrays to concatenate; may be null (in which case the result will be null).
* Null elements will be treated as empty arrays.
* @return If {@code arrays} is null, a null array, otherwise a newly allocated array containing
* the elements of every non-null array in {@code arrays} concatenated consecutively.
*/
public static byte[] concatenate (byte[] ... arrays)
{
// Quick break-out if arrays is null
if (arrays == null) {
return null;
}
// Find the total length that will be required
int totalLength = 0;
for(byte[] array : arrays) {
totalLength += array == null ? 0 : array.length;
}
// Allocate a new array for the concatenation
byte[] concatenation = new byte[totalLength];
// Copy each non-null array into the concatenation
int offset = 0;
for(byte[] array : arrays) {
if (array != null) {
System.arraycopy(array, 0, concatenation, offset, array.length);
offset += array.length;
}
}
return concatenation;
}
/** Trivial short-hand for building an array (returns {@code elements} unchanged). */
public static <T> T[] toArray (T ... elements)
{
return elements;
}
/**
* Swap two elements in an array.
*
* @param array The array containing the elements to be swapped; must be non-null.
* @param index1 The index of the first element to swap; must be in-bounds.
* @param index2 The index of the second element to swap; must be in-bounds.
* @return The given {@code array}.
*/
public static int[] swap (int[] array, int index1, int index2)
{
int value = array[index1];
array[index1] = array[index2];
array[index2] = value;
return array;
}
/**
* Returns a fresh Set containing all the elements in the array.
*
* @param <T>
* the class of the objects in the array
* @param array
* the array containing the elements
* @return a Set containing all the elements in the array.
*/
@SafeVarargs
public static <T> Set<T> asSet (T ... array)
{
Set<T> ts = new LinkedHashSet<>();
Collections.addAll(ts, array);
return ts;
}
}

View File

@@ -0,0 +1,73 @@
package com.semmle.util.basic;
/**
* Trivial utility methods.
*/
public class ObjectUtil {
/** Query if {@code object1} and {@code object2} are reference-equal, or both null. */
public static boolean isSame (Object object1, Object object2)
{
return object1 == object2; // Reference equality comparison is deliberate
}
/**
* Query if {@code object1} and {@code object2} are both null, or both non-null and equal
* according to {@link Object#equals(Object)} (applied as {@code object1.equals(object2)}).
*/
public static boolean equals (Object object1, Object object2)
{
return object1 == null ? object2 == null : object1.equals(object2);
}
/**
* Query whether {@code object} is equal to any element in {@code objects}, short-circuiting
* the evaluation if possible.
*/
public static boolean equalsAny (Object object, Object ... objects)
{
// Quick break-out if there are no objects to be equal to
if (objects == null || objects.length == 0) {
return false;
}
// Compare against each object in turn
for(Object other : objects) {
if (equals(object, other)) {
return true;
}
}
return false;
}
/**
* Return {@code object1.compareTo(object2)}, but handle the case of null input by returning 0 if
* both objects are null, or 1 if only {@code object1} is null (implying that null is always
* 'greater' than non-null).
*/
public static <T1, T2 extends T1> int compareTo (Comparable<T1> object1, T2 object2)
{
if (object1 == null) {
return object2 == null ? 0 : 1;
}
return object1.compareTo(object2);
}
/**
* Return {@code value} if non-null, otherwise {@code replacement}.
*/
public static <T> T replaceNull (T value, T replacement)
{
return value == null ? replacement : value;
}
@SafeVarargs
public static <T> T nullCoalesce(T... values) {
for(T value : values) {
if (value != null) {
return value;
}
}
return null;
}
}

View File

@@ -0,0 +1,395 @@
package com.semmle.util.concurrent;
import java.io.File;
import java.io.IOException;
import java.io.RandomAccessFile;
import java.nio.channels.FileChannel;
import java.nio.channels.FileLock;
import java.util.LinkedHashMap;
import java.util.Map;
import com.semmle.util.data.StringDigestor;
import com.semmle.util.exception.CatastrophicError;
import com.semmle.util.exception.ResourceError;
import com.semmle.util.files.FileUtil;
import com.semmle.util.io.WholeIO;
import com.github.codeql.Logger;
import com.github.codeql.Severity;
/**
* Helper class to simplify handling of file-system-based inter-process
* locking and mutual exclusion.
*
* Both files and directories can be locked; locks are provided in the
* usual flavours of "shared" and "exclusive", plus a no-op variety to
* help unify code -- see the {@link LockingMode} enum.
*
* Note that each locked file requires one file descriptor to be held open.
* It is vital for clients to avoid creating too many locks, and to release
* locks when possible.
*
* The locks obtained by this class are VM-wide, and cannot be used to
* ensure mutual exclusion between threads of the same VM. Rather, they
* can enforce mutual exclusion between separate VMs trying to acquire
* locks for the same paths.
*/
public class LockDirectory {
private final Logger logger;
private final File lockDir;
/**
* An enum describing the possible locking modes.
*/
public enum LockingMode {
/**
* Shared mode: A shared lock can be taken any number of times, but only
* if no exclusive lock is in place.
*/
Shared(true),
/**
* An exclusive lock can only be taken if no other lock is in place; it
* prevents all other locks.
*/
Exclusive(false),
/**
* A dummy mode: Lock and unlock operations are no-ops.
*/
None(true),
;
private boolean shared;
private LockingMode(boolean shared) {
this.shared = shared;
}
public boolean isShared() { return shared; }
}
/**
* An internal representation of a locked path. Contains some immutable state: The canonical
* path being locked, and the (derived) lock and status files. When the {@link #lock(LockDirectory.LockingMode, String)}
* method is called, a file descriptor to the lock file is opened; {@link #unlock(LockDirectory.LockingMode)} must be
* called to release it when the lock is no longer required.
*
* This class is not thread-safe, but it is expected that its clients ({@link LockDirectory})
* enforce thread-safe access to instances.
*/
private class LockFile {
private final String lockedPath;
private final File lockFile;
private final File statusFile;
private LockingMode mode = null;
private RandomAccessFile lockStream = null;
private FileChannel lockChannel = null;
private FileLock lock = null;
public LockFile(File f) {
try {
lockedPath = f.getCanonicalPath();
} catch (IOException e) {
throw new ResourceError("Failed to canonicalise path for locking: " + f, e);
}
String sha = StringDigestor.digest(lockedPath);
lockFile = new File(lockDir, sha);
statusFile = new File(lockDir, sha + ".log");
}
/**
* Get the (canonical) path associated with this lock file -- this is the
* path that is being locked.
*/
public String getLockedPath() {
return lockedPath;
}
/**
* Acquire a lock with the given mode. If this method returns normally,
* the lock has been acquired -- an exception is thrown otherwise. This
* method does not block.
*
* If no exception is thrown, a file descriptor is kept open until
* {@link #unlock(LockDirectory.LockingMode)} is called.
* @param mode The desired locking mode. If {@link LockingMode#None}, this
* operation is a no-op (and does not in fact open a file descriptor).
* @param message A message to be recorded alongside the lock file. This
* is included in the exception message of other processes using this
* infrastructure when the lock acquisition fails.
* @throws CatastrophicError if a lock has already been obtained and not released.
* @throws ResourceError if an exception occurs while obtaining the lock, including
* if it cannot be acquired because another process holds it.
*/
public void lock(LockingMode mode, String message) {
if (mode == LockingMode.None) return;
if (lock != null)
throw new CatastrophicError("Trying to re-lock existing lock for " + lockedPath);
this.mode = mode;
try {
lockStream = new RandomAccessFile(lockFile, "rw");
lockChannel = lockStream.getChannel();
tryLock(mode);
new WholeIO().strictwrite(statusFile, mode + " lock acquired for " + lockedPath + ": " + message);
} catch (IOException e) {
throw new ResourceError("Failed to obtain lock for " + lockedPath + " at " + lockFile, e);
}
}
/**
* Acquire a lock with the given mode. If this method returns normally,
* the lock has been acquired -- an exception is thrown otherwise. This
* method blocks indefinitely while waiting to acquire the lock.
*
* If no exception is thrown, a file descriptor is kept open until
* {@link #unlock(LockDirectory.LockingMode)} is called.
* @param mode The desired locking mode. If {@link LockingMode#None}, this
* operation is a no-op (and does not in fact open a file descriptor).
* @param message A message to be recorded alongside the lock file. This
* is included in the exception message of other processes using this
* infrastructure when the lock acquisition fails.
* @throws ResourceError if an exception occurs while obtaining the lock,.
*/
public void blockingLock(LockingMode mode, String message) {
if (mode == LockingMode.None) return;
if (lock != null)
throw new CatastrophicError("Trying to re-lock existing lock for " + lockedPath);
this.mode = mode;
try {
lockStream = new RandomAccessFile(lockFile, "rw");
lockChannel = lockStream.getChannel();
lock = lockChannel.tryLock(0, Long.MAX_VALUE, mode.isShared());
while (lock == null) {
ThreadUtil.sleep(500, true);
lock = lockChannel.tryLock(0, Long.MAX_VALUE, mode.isShared());
}
new WholeIO().strictwrite(statusFile, mode + " lock acquired for " + lockedPath + ": " + message);
} catch (IOException e) {
throw new ResourceError("Failed to obtain lock for " + lockedPath + " at " + lockFile, e);
}
}
/**
* Internal helper method: Try to acquire a particular kind of lock, assuming the
* {@link #lockChannel} has been set up. Throws if acquisition fails, rather than
* blocking.
* @param mode The desired lock mode -- exclusive or shared.
* @throws IOException if acquisition of the lock fails for reasons other than
* an incompatible lock already being held by another process.
* @throws ResourceError if the lock is already held by another process. The exception
* message includes the status string, if it can be determined.
*/
private void tryLock(LockingMode mode) throws IOException {
lock = lockChannel.tryLock(0, Long.MAX_VALUE, mode.isShared());
if (lock == null) {
String status = new WholeIO().read(statusFile);
throw new ResourceError("Failed to acquire " + mode + " lock for " + lockedPath + "." +
(status == null ? "" : "\nExisting lock message: " + status));
}
}
/**
* Release this lock. This will close the file descriptor opened by {@link #lock(LockDirectory.LockingMode, String)}.
* @param mode A mode, which must match the mode passed into {@link #lock(LockDirectory.LockingMode, String)}
* (unless it is {@link LockingMode#None}, in which case the method is a no-op).
* @throws CatastrophicError if the passed mode does not match the one used for locking.
* @throws ResourceError if releasing the lock or clearing up temporary files fails.
*/
public void unlock(LockingMode mode) {
if (mode == LockingMode.None)
return;
if (mode != this.mode)
throw new CatastrophicError("Attempting to unlock " + lockedPath + " with incompatible mode: " +
this.mode + " lock was obtained, but " + mode + " lock is being released.");
release(mode);
}
private void release(LockingMode mode) {
try {
if (lock != null)
try {
// On Windows, the lockChannel/lockStream prevents the lockFile from being
// deleted. The statusFile should only be written after the lock is held,
// so deleting it before releasing the lock is not expected to fail if the
// lock is exclusive.
// Deleting the lock file may fail, if another process just acquires it
// after we release it.
try {
if (statusFile.exists() && !statusFile.delete()) {
if (!mode.isShared()) throw new ResourceError("Could not clear status file " + statusFile);
}
} finally {
lock.release();
FileUtil.close(lockStream);
FileUtil.close(lockChannel);
if (!lockFile.delete())
logger.error("Could not clear lock file " + lockFile + " (it might have been locked by another process).");
}
} catch (IOException e) {
throw new ResourceError("Couldn't release lock for " + lockedPath, e);
}
} finally {
mode = null;
lockStream = null;
lockChannel = null;
lock = null;
}
}
}
private static final Map<File, LockDirectory> instances = new LinkedHashMap<File, LockDirectory>();
/**
* Obtain the {@link LockDirectory} instance for a given lock directory. The directory
* in question will be created if it doesn't exist.
* @param lockDir A directory -- must be writable, and will be created if it doesn't
* already exist.
* @return The {@link LockDirectory} instance responsible for the specified lock directory.
* @throws ResourceError if the directory cannot be created, exists as a non-directory
* or cannot be canonicalised.
*/
public static synchronized LockDirectory instance(File lockDir) {
return instance(lockDir, null);
}
/**
* See {@link #instance(File)}.
* Use this method only if log output should be directed to a custom {@link Logger}.
*/
public static synchronized LockDirectory instance(File lockDir, Logger logger) {
// First try to create the directory -- canonicalisation will fail if it doesn't exist.
try {
FileUtil.mkdirs(lockDir);
} catch(ResourceError e) {
throw new ResourceError("Couldn't ensure lock directory " + lockDir + " exists.", e);
}
// Canonicalise.
try {
lockDir = lockDir.getCanonicalFile();
} catch (IOException e) {
throw new ResourceError("Couldn't canonicalise requested lock directory " + lockDir, e);
}
// Find and return the right instance.
LockDirectory instance = instances.get(lockDir);
if (instance == null) {
instance = new LockDirectory(lockDir, logger);
instances.put(lockDir, instance);
}
return instance;
}
/**
* A map from canonical locked paths to the associated {@link LockFile} instances.
*/
private final Map<String, LockFile> locks = new LinkedHashMap<String, LockFile>();
/**
* Create a new instance of {@link LockDirectory}, holding all locks in the
* specified log directory.
* @param lockDir A writable directory in which locks will be stored.
* @param logger The {@link Logger} to use, if non-null.
*/
private LockDirectory(File lockDir, Logger logger) {
this.lockDir = lockDir;
this.logger = logger;
}
/**
* Acquire a lock of the specified kind for the path represented by the given file.
* The file should exist, and its path should be canonicalisable.
*
* Calling this method keeps one file descriptor open
* @param mode The desired locking mode. If {@link LockingMode#None} is passed, this is a no-op,
* otherwise it determines whether a shared or exclusive lock is acquired.
* @param f The path that should be locked -- does not need to be writable, and will not
* be opened.
* @param message A message describing the purpose of the lock acquisition. This is
* potentially displayed when other processes fail to acquire the lock for the given
* path.
* @throws CatastrophicError if an attempt is made to lock an already locked path.
*/
public synchronized void lock(LockingMode mode, File f, String message) {
if (mode == LockingMode.None) return;
LockFile lock = new LockFile(f);
if (locks.containsKey(lock.getLockedPath()))
throw new CatastrophicError("Trying to lock already locked path " + lock.getLockedPath() + ".");
lock.lock(mode, message);
locks.put(lock.getLockedPath(), lock);
}
/**
* Acquire a lock of the specified kind for the path represented by the given file.
* The file should exist, and its path should be canonicalisable. This method waits
* indefinitely for the lock to become available. There is no ordering on processes
* that are waiting to acquire the lock in this manner.
*
* Calling this method keeps one file descriptor open
* @param mode The desired locking mode. If {@link LockingMode#None} is passed, this is a no-op,
* otherwise it determines whether a shared or exclusive lock is acquired.
* @param f The path that should be locked -- does not need to be writable, and will not
* be opened.
* @param message A message describing the purpose of the lock acquisition. This is
* potentially displayed when other processes fail to acquire the lock for the given
* path.
*/
public synchronized void blockingLock(LockingMode mode, File f, String message) {
if (mode == LockingMode.None) return;
LockFile lock = new LockFile(f);
if (locks.containsKey(lock.getLockedPath()))
throw new CatastrophicError("Trying to lock already locked path " + lock.getLockedPath() + ".");
lock.blockingLock(mode, message);
locks.put(lock.getLockedPath(), lock);
}
/**
* Release a lock held on a particular path.
*
* This method closes the file descriptor associated with the lock, freeing related
* resources.
* @param mode the mode of the lock. If it equals {@link LockingMode#None}, this is a no-op; otherwise
* it is expected to match the mode passed to the corresponding {@link #lock(LockingMode, File, String)}
* call.
* @param f The path which should be unlocked. As with {@link #lock(LockingMode, File, String)}, it is
* expected to exist and be canonicalisable. It also must be currently locked.
* @throws CatastrophicError on API contract violation: The path isn't currently locked, or the
* mode doesn't correspond to the mode specified when it was locked.
* @throws ResourceError if something goes wrong while releasing resources.
*/
public synchronized void unlock(LockingMode mode, File f) {
if (!maybeUnlock(mode, f))
throw new CatastrophicError("Trying to unlock " + new LockFile(f).getLockedPath() + ", but it is not locked.");
}
/**
* Release a lock that may be held on a particular path.
*
* This method closes the file descriptor associated with the lock, freeing related
* resources. Unlike {@link #unlock(LockingMode, File)}, this method will not throw
* if the specified {@link File} is not locked, making it more suitable for post-exception
* cleanup -- <code>false</code> will be returned in that case.
* @param mode the mode of the lock. If it equals {@link LockingMode#None}, this is a no-op; otherwise
* it is expected to match the mode passed to the corresponding {@link #lock(LockingMode, File, String)}
* call.
* @param f The path which should be unlocked. As with {@link #lock(LockingMode, File, String)}, it is
* expected to exist and be canonicalisable.
* @return <code>true</code> if <code>mode == LockingMode.None</code>, or the unlock operation completed
* successfully; <code>false</code> if the path <code>f</code> isn't currently locked.
* @throws ResourceError if something goes wrong while releasing resources.
*/
public synchronized boolean maybeUnlock(LockingMode mode, File f) {
if (mode == LockingMode.None) return true;
// New instance constructed just to share the logic of computing the canonical path.
LockFile key = new LockFile(f);
LockFile existing = locks.get(key.getLockedPath());
if (existing == null)
return false;
locks.remove(key.getLockedPath());
existing.unlock(mode);
return true;
}
public File getDir(){ return lockDir; }
}

View File

@@ -0,0 +1,43 @@
package com.semmle.util.concurrent;
import com.semmle.util.exception.CatastrophicError;
import com.semmle.util.exception.Exceptions;
/**
* Utility methods related to Threads.
*/
public enum ThreadUtil
{
/** Singleton instance of {@link ThreadUtil}. */
SINGLETON;
/**
* Sleep for {@code millis} milliseconds.
* <p>
* Unlike {@link Thread#sleep(long)} (which is wrapped), this method does not throw an
* {@link InterruptedException}, rather in the event of interruption it either throws an
* {@link CatastrophicError} (if {@code allowInterrupt} is false), or accepts the interruption and
* returns false.
* </p>
*
* @return true if a sleep of {@code millis} milliseconds was performed without interruption, or
* false if an interruption occurred.
*/
public static boolean sleep(long millis, boolean allowInterrupt)
{
try {
Thread.sleep(millis);
}
catch (InterruptedException ie) {
if (allowInterrupt) {
Exceptions.ignore(ie, "explicitly permitted interruption");
return false;
}
else {
throw new CatastrophicError("Interrupted", ie);
}
}
return true;
}
}

View File

@@ -0,0 +1,19 @@
package com.semmle.util.data;
/**
* A mutable reference to a primitive int. Specialised to avoid
* boxing.
*
*/
public class IntRef {
private int value;
public IntRef(int value) {
this.value = value;
}
public int get() { return value; }
public void set(int value) { this.value = value; }
public void inc() { value++; }
public void add(int val) { value += val; };
}

View File

@@ -0,0 +1,62 @@
package com.semmle.util.data;
/**
* An (immutable) ordered pair of values.
* <p>
* Pairs are compared with structural equality: <code>(x,y) = (x', y')</code> iff <code>x=x'</code>
* and <code>y=y'</code>.
* </p>
*
* @param <X> the type of the first component of the pair
* @param <Y> the type of the second component of the pair
*/
public class Pair<X,Y> extends Tuple2<X, Y>
{
private static final long serialVersionUID = -2871892357006076659L;
/*
* Constructor and factory
*/
/**
* Create a new pair of values
* @param x the first component of the pair
* @param y the second component of the pair
*/
public Pair(X x, Y y) {
super(x, y);
}
/**
* Create a new pair of values. This behaves identically
* to the constructor, but benefits from type inference
* @param x the first component of the pair
* @param y the second component of the pair
*/
public static <X,Y> Pair<X,Y> make(X x, Y y) {
return new Pair<X,Y>(x, y);
}
/*
* Getters
*/
/**
* Get the first component of this pair
* @return the first component of the pair
*/
public X fst() {
return value0();
}
/**
* Get the second component of this pair
* @return the second component of the pair
*/
public Y snd() {
return value1();
}
}

View File

@@ -0,0 +1,173 @@
package com.semmle.util.data;
import java.nio.charset.Charset;
import java.security.MessageDigest;
import java.security.NoSuchAlgorithmException;
import com.semmle.util.exception.CatastrophicError;
/**
* Encapsulate the creation of message digests from strings.
*
* <p>
* This class acts as a (partial) output stream, until the <code>getDigest()</code> method is
* called. After this the class can no longer be used, except to repeatedly call
* {@link #getDigest()}.
*
* <p>
* UTF-8 is used internally as the {@link Charset} for this class when converting Strings to bytes.
*/
public class StringDigestor {
private static final Charset UTF8 = Charset.forName("UTF-8");
private static final String NULL_STRING = "<null>";
private static final int CHUNK_SIZE = 32;
private MessageDigest digest;
private byte[] digestBytes;
private final byte[] buf = new byte[CHUNK_SIZE * 3]; // A Java char becomes at most 3 bytes of UTF-8
/**
* Create a StringDigestor using SHA-1, ready to accept data
*/
public StringDigestor() {
this("SHA-1");
}
/**
* @param digestAlgorithm the algorithm to use in the internal {@link MessageDigest}.
*/
public StringDigestor(String digestAlgorithm) {
try {
digest = MessageDigest.getInstance(digestAlgorithm);
} catch (NoSuchAlgorithmException e) {
throw new CatastrophicError("StringDigestor failed to find the required digest algorithm: " + digestAlgorithm, e);
}
}
public void reset() {
if (digestBytes == null) throw new CatastrophicError("API violation: Digestor is not finished.");
digest.reset();
digestBytes = null;
}
/**
* Write an object into this digestor. This converts the object to a
* string using toString(), writes the length, and then writes the
* string itself.
*/
public StringDigestor write(Object toAppend) {
String str;
if (toAppend == null) {
str = NULL_STRING;
} else {
str = toAppend.toString();
}
writeBinaryInt(str.length());
writeNoLength(str);
return this;
}
/**
* Write the given string without prefixing it by its length.
*/
public StringDigestor writeNoLength(Object toAppend) {
String s = toAppend.toString();
int len = s.length();
int i = 0;
while(i + CHUNK_SIZE < len) {
i = writeUTF8(s, i, i + CHUNK_SIZE);
}
writeUTF8(s, i, len);
return this;
}
private int writeUTF8(String s, int begin, int end) {
if (digestBytes != null) throw new CatastrophicError("API violation: Digestor is finished.");
byte[] buf = this.buf;
int len = 0;
for(int i = begin; i < end; ++i) {
int c = s.charAt(i);
if (c <= 0x7f) {
buf[len++] = (byte)c;
} else if (c <= 0x7ff) {
buf[len] = (byte)(0xc0 | (c >> 6));
buf[len+1] = (byte)(0x80 | (c & 0x3f));
len += 2;
} else if (c < 0xd800 || c > 0xdfff) {
buf[len] = (byte)(0xe0 | (c >> 12));
buf[len+1] = (byte)(0x80 | ((c >> 6) & 0x3f));
buf[len+2] = (byte)(0x80 | (c & 0x3f));
len += 3;
} else if (i + 1 < end) {
int c2 = s.charAt(i + 1);
if (c > 0xdbff || c2 < 0xdc00 || c2 > 0xdfff) {
// Invalid UTF-16
} else {
c = 0x10000 + ((c - 0xd800) << 10) + (c2 - 0xdc00);
buf[len] = (byte)(0xf0 | (c >> 18));
buf[len+1] = (byte)(0x80 | ((c >> 12) & 0x3f));
buf[len+2] = (byte)(0x80 | ((c >> 6) & 0x3f));
buf[len+3] = (byte)(0x80 | (c & 0x3f));
len += 4;
++i;
}
} else {
--end;
break;
}
}
digest.update(buf, 0, len);
return end;
}
/**
* Write an array of raw bytes to the digestor. This appends the contents
* of the array to the accumulated data used for the digest.
*/
public StringDigestor writeBytes(byte[] data) {
if (digestBytes != null) throw new CatastrophicError("API violation: Digestor is finished.");
digest.update(data);
return this;
}
/**
* Return the hex-encoded digest as a {@link String}.
*
* Get the digest from the data previously appended using <code>write(Object)</code>.
* After this is called, the instance's {@link #write(Object)} and {@link #writeBytes(byte[])}
* methods may no longer be used.
*/
public String getDigest() {
if (digestBytes == null) {
digestBytes = digest.digest();
}
return StringUtil.toHex(digestBytes);
}
public static String digest(Object o) {
StringDigestor digestor = new StringDigestor();
digestor.writeNoLength(o);
return digestor.getDigest();
}
/** Compute a git-style SHA for the given string. */
public static String gitBlobSha(String content) {
byte[] bytes = content.getBytes(UTF8);
return digest("blob " + bytes.length + "\0" + content);
}
/**
* Convert an int to a byte[4] using its little-endian 32bit representation, and append the
* resulting bytes to the accumulated data used for the digest.
*/
public StringDigestor writeBinaryInt(int i) {
if (digestBytes != null) throw new CatastrophicError("API violation: Digestor is finished.");
byte[] buf = this.buf;
buf[0] = (byte)(i & 0xff);
buf[1] = (byte)((i >>> 8) & 0xff);
buf[2] = (byte)((i >>> 16) & 0xff);
buf[3] = (byte)((i >>> 24) & 0xff);
digest.update(buf, 0, 4);
return this;
}
}

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,106 @@
package com.semmle.util.data;
import java.io.Serializable;
/**
* Tuple of one typed element.
* <p>
* Note that this is a sub-class of {@link TupleN} and a super-class of {@link Tuple2},
* {@link Tuple3}, and any subsequent extensions in a similar vein.
* </p>
*/
public class Tuple1 <Type0> extends TupleN
{
/**
* Serializable variant of {@link Tuple1}.
*/
public static class SerializableTuple1<T0 extends Serializable>
extends Tuple1<T0> implements Serializable {
private static final long serialVersionUID = -7989122667707773448L;
public SerializableTuple1() {
}
public SerializableTuple1(T0 t0) {
super(t0);
}
}
private static final long serialVersionUID = -4317563803154647477L;
/** The single contained value. */
protected Type0 _value0;
/** Construct a new {@link Tuple1} with a null value. */
public Tuple1 () {}
/** Construct a new {@link Tuple1} with the given value. */
public Tuple1 (Type0 value0)
{
_value0 = value0;
}
/** Construct a new {@link Tuple1} with the given value. */
public static <Type0> Tuple1<Type0> make(Type0 value0)
{
return new Tuple1<Type0>(value0);
}
/**
* Get the value contained by this {@link Tuple1}.
*/
public final Type0 value0 ()
{
return _value0;
}
@Override
protected Object value_ (int n)
{
return _value0;
}
/**
* Return the number of elements in this {@link Tuple1}.
* <p>
* Sub-classes shall override this method to increase its value accordingly.
* </p>
*/
@Override
public int size ()
{
return 1;
}
/**
* Return a plain string representation of the contained value (where null is represented by the
* empty string).
* <p>
* Sub-classes shall implement a comma-separated concatenation.
* </p>
*/
@Override
public String toPlainString ()
{
return _value0 == null ? "" : _value0.toString();
}
@Override
public int hashCode ()
{
final int prime = 31;
int result = super.hashCode();
result = prime * result + ((_value0 == null) ? 0 : _value0.hashCode());
return result;
}
@Override
public boolean equals (Object obj)
{
return obj == this || (super.equals(obj) && equal(((Tuple1<?>)obj)._value0, _value0));
}
}

View File

@@ -0,0 +1,93 @@
package com.semmle.util.data;
import java.io.Serializable;
/**
* Tuple of two typed elements.
* <p>
* Note that this is an extension of {@link Tuple1} and a super-class of {@link Tuple3} (and any
* subsequent additions).
* </p>
*/
public class Tuple2 <Type0, Type1> extends Tuple1<Type0>
{
/**
* Serializable variant of {@link Tuple2}.
*/
public static class SerializableTuple2<T0 extends Serializable, T1 extends Serializable>
extends Tuple2<T0, T1> implements Serializable {
private static final long serialVersionUID = 1624467154864321244L;
public SerializableTuple2() {
}
public SerializableTuple2(T0 t0, T1 t1) {
super(t0, t1);
}
}
private static final long serialVersionUID = -400406676673562583L;
/** The additional element contained by this {@link Tuple2}. */
protected Type1 _value1;
/** Construct a new {@link Tuple2} with null values. */
public Tuple2 () {}
/** Construct a new {@link Tuple2} with the given values. */
public Tuple2 (Type0 value0, Type1 value1)
{
super(value0);
_value1 = value1;
}
/** Construct a new {@link Tuple2} with the given value. */
public static <Type1, Type2> Tuple2<Type1, Type2> make(Type1 value0, Type2 value1)
{
return new Tuple2<Type1,Type2>(value0, value1);
}
/**
* Get the second value in this {@link Tuple2}.
*/
public final Type1 value1 ()
{
return _value1;
}
@Override
protected Object value_ (int n)
{
return n == 2 ? _value1 : super.value_(n);
}
@Override
public int size ()
{
return 2;
}
@Override
public String toPlainString ()
{
return super.toPlainString() + ", " + (_value1 == null ? "" : _value1.toString());
}
@Override
public int hashCode ()
{
final int prime = 31;
int result = super.hashCode();
result = prime * result + ((_value1 == null) ? 0 : _value1.hashCode());
return result;
}
@Override
public boolean equals (Object obj)
{
return obj == this || (super.equals(obj) && equal(((Tuple2<?,?>)obj)._value1, _value1));
}
}

View File

@@ -0,0 +1,85 @@
package com.semmle.util.data;
import java.io.Serializable;
/**
* Untyped base-class for the generic {@link Tuple1}, {@link Tuple2}, ... <i>etc.</i>
* <p>
* This class also functions as a zero-element tuple.
* </p>
*/
public class TupleN implements Serializable
{
private static final long serialVersionUID = -1799116497122427806L;
/**
* Get the n'th value contained by this {@link TupleN}.
*
* @param n The zero-based index of the value to be returned.
* @return The n'th value, or null if n is out of range.
*/
public final Object value (int n)
{
return n < 0 || n > size() ? null : value_(n);
}
/** Internal method for obtaining the n'th value (n is guaranteed to be in-range). */
protected Object value_ (int n)
{
return null;
}
/**
* Get the number of values contained by this {@link TupleN}.
*/
public int size ()
{
return 0;
}
/**
* Return a plain string representation of the contained value (where null is represented by the
* empty string).
* <p>
* Sub-classes shall implement a comma-separated concatenation.
* </p>
*/
public String toPlainString ()
{
return "";
}
/**
* Get a parenthesized, comma-separated string representing the values contained by this
* {@link TupleN}. Null values are represented by an empty string.
*/
@Override
public final String toString ()
{
return "(" + toPlainString() + ")";
}
@Override
public int hashCode ()
{
return 0;
}
@Override
public boolean equals (Object obj)
{
return obj == this || (obj !=null && obj.getClass().equals(getClass()));
}
/**
* Convenience method implementing objects.equals(object, object), which is not available due to a
* java version restriction.
*/
protected static boolean equal(Object obj1, Object obj2)
{
if (obj1 == null) {
return obj2 == null;
}
return obj1.equals(obj2);
}
}

View File

@@ -0,0 +1,117 @@
package com.semmle.util.exception;
import java.util.Arrays;
/**
* This is a standard Semmle unchecked exception.
* Usage of this should follow the guidelines described in docs/semmle-unchecked-exceptions.md
*/
public class CatastrophicError extends NestedError {
private static final long serialVersionUID = 4132771414092814913L;
public CatastrophicError(String message) {
super(message);
}
public CatastrophicError(Throwable throwable) {
super(throwable);
}
public CatastrophicError(String message, Throwable throwable) {
super(message,throwable);
}
/**
* Utility method for throwing a {@link CatastrophicError} with the given {@code message} if the given
* {@code condition} is true.
*/
public static void throwIf(boolean condition, String message)
{
if (condition) {
throw new CatastrophicError(message);
}
}
/**
* Utility method for throwing a {@link CatastrophicError} if the given {@code object} is null.
* <p>
* See {@link #throwIfAnyNull(Object...)} which may be more convenient for checking multiple
* arguments.
* </p>
*/
public static void throwIfNull(Object object)
{
if (object == null) {
throw new CatastrophicError("null object");
}
}
/**
* Utility method for throwing a {@link CatastrophicError} with the given {@code message} if the given
* {@code object} is null.
* <p>
* See {@link #throwIfAnyNull(Object...)} which may be more convenient for checking multiple
* arguments.
* </p>
*/
public static void throwIfNull (Object object, String message)
{
if (object == null) {
throw new CatastrophicError(message);
}
}
/**
* Throw a {@link CatastrophicError} if any of the given {@code objects} is null.
* <p>
* If a {@link CatastrophicError} is thrown, its message will indicate <i>all</i> null arguments by index.
* </p>
* <p>
* See {@link #throwIfNull(Object, String)} which may be a fraction more efficient if there's only
* one argument, and allows an 'optional' message parameter.
* </p>
*/
public static void throwIfAnyNull (Object ... objects)
{
/*
* Check each argument for nullity, and start building a set of index strings iff at least one
* is non-null
*/
String[] nullArgs = null;
for (int argNum = 0; argNum < objects.length; ++argNum) {
if (objects[argNum] == null) {
nullArgs = nullArgs == null ? new String[1] : Arrays.copyOf(nullArgs, nullArgs.length+1);
nullArgs[nullArgs.length-1] = "" + argNum;
}
}
if (nullArgs != null) {
// Compose a message describing which arguments are null
StringBuffer strBuf = new StringBuffer();
if (nullArgs.length == 0) {
strBuf.append("null argument(s)");
} else {
strBuf.append("null argument" + (nullArgs.length > 1 ? "s: " : ": ") + nullArgs[0]);
for (int i = 1; i < nullArgs.length; ++i) {
strBuf.append(", " + nullArgs[i]);
}
}
String message = strBuf.toString();
throw new CatastrophicError(message);
}
}
/**
* Convenience method for use in constructors that assign a parameter to a
* field, assuming the former to be non-null.
*
* @param t A non-null value of type {@code T}.
* @return {@code t}
* @throws CatastrophicError if {@code t} is null.
* @see #throwIfNull(Object)
*/
public static <T> T nonNull(T t) {
throwIfNull(t);
return t;
}
}

View File

@@ -0,0 +1,120 @@
package com.semmle.util.exception;
import java.io.PrintWriter;
import java.io.StringWriter;
/**
* Simple functions for printing exceptions. This is intended for use
* in debug output, not for formatting for user consumption
*/
public class Exceptions {
/**
* Compose a String with the same format as that output by {@link Throwable#printStackTrace()}.
*/
public static String printStackTrace(Throwable t)
{
StringWriter stringWriter = new StringWriter();
t.printStackTrace(new PrintWriter(stringWriter));
return stringWriter.toString();
}
/**
* Print an exception in a readable format with all information,
* including the type, message, stack trace, and nested exceptions
*/
public static String print(Throwable t) {
return printDetailed(t, true);
}
/**
* Print an exception in a somewhat readable format fitting on one line.
* Most of the time simply using <code>print</code> is preferable
*/
public static String printShort(Throwable t) {
return printDetailed(t, false);
}
/**
* Ignore an exception. This method does nothing, but should be called
* (with a reasonable message) to document the reason why the exception does
* not need to be used.
*/
public static void ignore(Throwable e, String message) {
}
/**
* Print an exception in a long format, possibly producing multiple
* lines if the appropriate flag is passed
* @param multiline if <code>true</code>, produce multiple lines of output
*/
private static String printDetailed(Throwable t, boolean multiline) {
StringBuilder sb = new StringBuilder();
Throwable current = t;
while (current != null) {
printOneException(current, multiline, sb);
Throwable cause = current.getCause();
if (cause == current)
current = null;
else
current = cause;
if (current != null) {
if (multiline)
sb.append("\n\n ... caused by:\n\n");
else
sb.append(", caused by: ");
}
}
return sb.toString();
}
private static void printOneException(Throwable t, boolean multiline, StringBuilder sb) {
sb.append(multiline ? t.toString() : t.toString().replace('\n', ' ').replace('\r', ' '));
boolean first = true;
for (StackTraceElement e : t.getStackTrace()) {
if (first)
sb.append(multiline ? "\n" : " - [");
else
sb.append(multiline ? "\n" : ", ");
first = false;
sb.append(e.toString());
}
if (!multiline)
sb.append("]");
}
/** A stand-in replacement for `assert` that throws a {@link CatastrophicError} and isn't compiled out. */
public static void assertion(boolean cond, String message) {
if(!cond)
throw new CatastrophicError(message);
}
/**
* Turn the given {@link Throwable} into a {@link RuntimeException} by wrapping it if necessary.
*/
public static RuntimeException asUnchecked(Throwable t) {
if (t instanceof RuntimeException)
return (RuntimeException)t;
else
return new RuntimeException(t);
}
/**
* Throws an arbitrary {@link Throwable}, wrapping in a runtime exception if necessary.
* Unlike {@link #asUnchecked} it preserves subclasses of {@link Error}.
*/
public static <T> T rethrowUnchecked(Throwable t) {
if (t instanceof RuntimeException) {
throw (RuntimeException) t;
} else if (t instanceof Error) {
throw (Error) t;
}
throw new RuntimeException(t);
}
}

View File

@@ -0,0 +1,26 @@
package com.semmle.util.exception;
/**
* An exception thrown in cases where it is impossible to
* throw the (checked) Java {@link InterruptedException},
* eg. in visitors
*/
public class InterruptedError extends RuntimeException {
private static final long serialVersionUID = 9163340147606765395L;
public InterruptedError() { }
public InterruptedError(String message, Throwable cause) {
super(message, cause);
}
public InterruptedError(String message) {
super(message);
}
public InterruptedError(Throwable cause) {
super(cause);
}
}

View File

@@ -0,0 +1,47 @@
package com.semmle.util.exception;
public abstract class NestedError extends RuntimeException {
private static final long serialVersionUID = -3145876396931008989L;
public NestedError(String message) {
super(message);
}
public NestedError(Throwable throwable) {
super(throwable);
}
public NestedError(String message, Throwable throwable) {
super(buildMessage(message, throwable), throwable);
}
/**
* Subclasses should not need to call this directly -- just call the
* two-argument super constructor.
*/
private static String buildMessage(String message, Throwable throwable) {
if (throwable == null)
return message;
while (throwable.getCause() != null && throwable.getCause() != throwable)
throwable = throwable.getCause();
String banner = "eventual cause: " + throwable.getClass().getSimpleName();
String rootmsg = throwable.getMessage();
if (rootmsg == null) {
// Don't amend the banner
} else {
int p = rootmsg.indexOf('\n');
if (p >= 0)
rootmsg = rootmsg.substring(0, p) + "...";
if (rootmsg.length() > 100)
rootmsg = rootmsg.substring(0, 80) + "...";
banner += " \"" + rootmsg + "\"";
}
if (message.contains(banner))
return message;
else
return message + "\n(" + banner + ")";
}
}

View File

@@ -0,0 +1,30 @@
package com.semmle.util.exception;
/**
* This is a standard Semmle unchecked exception.
* Usage of this should follow the guidelines described in docs/semmle-unchecked-exceptions.md
*/
public class ResourceError extends NestedError {
private static final long serialVersionUID = 4132771414092814913L;
public ResourceError(String message) {
super(message);
}
@Deprecated // A ResourceError may be presented to the user, so should always have a message
public ResourceError(Throwable throwable) {
super(throwable);
}
public ResourceError(String message, Throwable throwable) {
super(message,throwable);
}
@Override
public String toString() {
// The message here should always be meaningful enough that we can return that.
return getMessage() != null ? getMessage() : super.toString();
}
}

View File

@@ -0,0 +1,46 @@
package com.semmle.util.exception;
/**
* This is a standard Semmle unchecked exception.
* Usage of this should follow the guidelines described in docs/semmle-unchecked-exceptions.md
*/
public class UserError extends NestedError {
private static final long serialVersionUID = 4132771414092814913L;
private final boolean reportAsInfoMessage;
public UserError(String message) {
this(message, false);
}
/**
* A user-visible error
*
* @param message The message to display
* @param reportAsInfoMessage If <code>true</code>, report as information only - not an error
*/
public UserError(String message, boolean reportAsInfoMessage) {
super(message);
this.reportAsInfoMessage = reportAsInfoMessage;
}
public UserError(String message, Throwable throwable) {
super(message,throwable);
this.reportAsInfoMessage = false;
}
/**
* If <code>true</code>, report the message without interpreting it as a fatal error
*/
public boolean reportAsInfoMessage() {
return reportAsInfoMessage;
}
@Override
public String toString() {
// The message here should always be meaningful enough that we can return that.
return getMessage() != null ? getMessage() : super.toString();
}
}

View File

@@ -0,0 +1,893 @@
package com.semmle.util.expansion;
import java.io.ByteArrayOutputStream;
import java.io.File;
import java.security.GeneralSecurityException;
import java.util.ArrayList;
import java.util.Collections;
import java.util.LinkedHashMap;
import java.util.LinkedHashSet;
import java.util.List;
import java.util.Map;
import java.util.Map.Entry;
import java.util.Properties;
import java.util.Set;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import com.semmle.util.data.StringUtil;
import com.semmle.util.exception.CatastrophicError;
import com.semmle.util.exception.ResourceError;
import com.semmle.util.exception.UserError;
import com.semmle.util.files.FileUtil;
import com.semmle.util.process.Builder;
import com.semmle.util.process.Env;
import com.semmle.util.process.Env.Var;
import com.semmle.util.process.LeakPrevention;
/**
* An environment for performing variable expansions.
*
* <p>
* The environment is defined by a set of variable definitions, which are
* name/value pairs of strings. Once this has been populated (via the
* {@link #defineVar(String, String)} and {@link #defineVars(Map)} methods),
* arbitrary strings can be expanded.
* </p>
*
* <p>
* Two modes of expansion are supported:
* </p>
* <ul>
* <li>String mode ({@link #strExpand(String)}): The result is intended to be a
* single string.</li>
* <li>List mode ({@link #listExpand(String)}): The result will be interpreted
* as a command line, and hence is a list of strings.
* </ul>
*
* <p>
* Variables are referenced by <code>${name}</code> to trigger a string-mode
* expansion, and by <code>${=name}</code> to trigger a list-mode expansion.
* This makes {@code $} a meta-character, and so it has to be escaped; the
* escape sequence for it is <code>${}</code>.
* </p>
*
* <p>
* In list mode, strings are split in a platform-independent way similar (but
* not identical) to normal shell argument splitting. Runs of white-space
* separate arguments, and double-quotes can be used to protect whitespace from
* splitting. The escape character is backslash. All of these metacharacters
* have no special meaning in string mode.
* </p>
*
* <p>
* The {@code define*} and {@link #doNotExpand(String...)} methods of this
* class are not thread-safe; they mutate instance state in an unsynchronized
* way. By contrast, the expansion methods ({@link #strExpand(String)},
* {@link #strExpandVar(String)}, {@link #listExpand(String)},
* {@link #listExpandVar(String)} and {@link #varLookup(String)})
* are thread safe relative to each
* other. This means that it's fine to construct an expansion environment once,
* and then use it from multiple threads concurrently, as long as no new variables
* are defined. In addition, {@link #validate(String)} is safe to call once an
* {@link ExpansionEnvironment} is fully initialised, even concurrently.
* </p>
*
* <p>
* Upon encountering any error (malformed variable expansion, malformed quoted
* string (in list mode), reference to unknown variable, cyclic variable
* definitions), the {@link #strExpand(String)} and {@link #listExpand(String)}
* methods will throw {@link UserError} with a suitable message.
* </p>
*
* <p>
* As an advanced feature, command substitutions can be supported. They take the
* form of <code>$(cmd arg1 arg2)</code> for string-mode expansion, and
* <code>$(=cmd arg1
* arg2)</code> for list-mode. The contents of the <code>$(..)</code> operator
* undergo normal splitting, and are then run as a new process with the given
* list of arguments. The working directory is unspecified, and it is an error
* to depend upon it. A non-zero exit code, or a non-empty {@code stderr} stream
* of the command, will result in a {@link UserError} indicating that something
* went wrong; otherwise, the {@code stdout} output is collected and substituted
* (possibly undergoing splitting, in the second form).
* </p>
*/
public class ExpansionEnvironment {
/**
* A source for variable definitions to be used in an expansion environment.
*/
public static interface VariableSource {
/**
* A callback which is expected to add all variables in the source to
* the given environment.
*
* @param env
* The environment that should be filled in.
*/
public void fillIn(ExpansionEnvironment env);
}
private final Map<String, String> vars = new LinkedHashMap<String, String>();
private final Set<String> unexpandedVars = new LinkedHashSet<String>();
private final boolean commandSubstitutions;
/**
* Construct an empty {@link ExpansionEnvironment}.
*/
public ExpansionEnvironment(boolean commandSubstitutions) {
this.commandSubstitutions = commandSubstitutions;
}
/**
* This the old default constructor, which always enables command substutitions.
* <b>Doing so is a security risk</b> whenever the string you expand may come
* from an untrusted source, so you should only do that when you explicitly want
* to do it and have decided that it is safe. (And then use the constructor that
* has an explicit argument to say so!)
*/
@Deprecated
public ExpansionEnvironment() {
this(true);
}
/**
* Construct an environment based on an existing map.
*/
public ExpansionEnvironment(boolean commandSubstitutions, Map<String, String> vars) {
this(commandSubstitutions);
this.vars.putAll(vars);
}
/**
* Construct a copy of an existing {@link ExpansionEnvironment}.
*/
public ExpansionEnvironment(ExpansionEnvironment other) {
this(other.commandSubstitutions);
this.vars.putAll(other.vars);
this.unexpandedVars.addAll(other.unexpandedVars);
}
/**
* Add a set of variable definitions to this environment.
*
* @param vars
* A mapping from variable names to variable values. Recursive
* variable references are allowed, but cycles are an error.
*/
public void defineVars(Map<String, String> vars) {
this.vars.putAll(vars);
}
/**
* Add the specified variable definition to this environment.
*
* @param name
* A variable name.
* @param value
* The value that the variable should expand to. References to
* other variables or expansions are allowed, but cycles are an
* error.
*/
public void defineVar(String name, String value) {
this.vars.put(name, value);
}
/**
* Try to load a file as a Java properties file and add all of its key/value
* pairs as variable definitions.
*
* @param vars
* A {@link File} that will be loaded as a Java properties file,
* if it exists. May be <code>null</code> or a file whose
* existence has not been checked.
* @throws ResourceError
* if the file exists but can't be read, or exists as a
* directory, or reading it fails.
*/
public void defineVarsFromFile(File vars) {
if (vars == null || !vars.exists())
return;
if (vars.isDirectory())
throw new ResourceError(vars
+ " is a directory, cannot load variables from it.");
Properties properties = FileUtil.loadProperties(vars);
for (String key : properties.stringPropertyNames())
defineVar(key, properties.getProperty(key));
}
/**
* Add a variable definition of {@code env.foo=bar} for each system
* environment variable {@code foo=bar}. Typically it is desirable to allow
* the environment to override previously specified variables, so this
* should be called once all other variables have been defined.
*
* <p>
* The values of variables taken from the environment are escaped to prevent
* recursive expansion; in particular, this prevents accidental command
* execution if a command substitution is encountered in the environment.
* </p>
*/
public void defineVarsFromEnvironment(Env environment) {
String extraVars = environment.get(Var.ODASA_EXTRA_VARIABLES);
if (extraVars != null)
defineVarsFromFile(new File(extraVars));
for (Entry<String, String> var : environment.getenv().entrySet())
defineVar("env." + var.getKey(), var.getValue().replace("$", "${}"));
environment.addEnvironmentToNewEnv(this);
}
/**
* Indicate that references to the given set of variable names should not be
* expanded. This means that they need not be defined, and the output will
* contain the literal variable expansion sequences.
*
* @param vars
* A list of variable names.
*/
public void doNotExpand(String... vars) {
for (String var : vars)
unexpandedVars.add(var);
}
/**
* Supply a "default value" for a variable, meaning that the variable will
* be set to the given default value if it hasn't already been defined. No
* change is made to this environment if a definition exists.
* @param var A variable name.
* @param defaultValue The default value for the named variable.
*/
public void setDefault(String var, String defaultValue) {
if (!vars.containsKey(var))
vars.put(var, defaultValue);
}
/**
* Expand the given string in "string mode", resolving variable references
* and command substitutions.
*/
public String strExpand(String s) {
try {
return new Expander().new ExpansionParser(s).parseAsString().expandAsString();
} catch (UserError e) {
throw new UserError("Failed to expand '" + s + "'.", e);
}
}
/**
* Expand the given string in "list mode", resolving variable references and
* command substitutions.
*/
public List<String> listExpand(String s) {
try {
return new Expander().new ExpansionParser(s).parseAsList().expandAsList();
} catch (UserError e) {
throw new UserError("Failed to expand '" + s
+ "' as an argument list.", e);
}
}
/**
* Expand the given variable fully in "string mode", resolving variable
* references and command substitutions. The entire string is interpreted as
* the name of the initial variable.
*/
public String strExpandVar(String varName) {
return new Expander().new Variable(varName).expandAsString();
}
/**
* Expand the given variable fully in "list mode", resolving variable
* references and command substitutions. The entire string is interpreted as
* the name of the initial variable.
*/
public List<String> listExpandVar(String varName) {
return new Expander().new SplitVariable(varName).expandAsList();
}
/**
* Validate the given string for expansion. This verifies the absence of
* parse errors, and the fact that all directly referenced variables are
* defined by this environment.
*
* <p>
* Expansion using {@link #strExpand(String)} or {@link #listExpand(String)}
* may still not succeed, if there are semantic errors (like circular
* variable definitions) or a command substitution introduces a reference to
* an undefined variable.
* </p>
*
* @param str
* A string that should be validated.
* @throws UserError
* if validation fails, with a suitable error message.
*/
public void validate(String str) {
new Expander().new ExpansionParser(str).parseAsList().validate();
}
/**
* Look up the (raw) value of a given variable, without performing expansion
* on it.
*
* @param name
* The variable name.
* @return The value that this variable is mapped to.
* @throws UserError
* if the variable is not defined.
*/
public synchronized String varLookup(String name) {
String value = vars.get(name);
if (value == null) {
ArrayList<String> available = new ArrayList<String>(vars.keySet());
Collections.sort(available);
throw new UserError("Attempting to expand unknown variable: "
+ name + ", available variables are: " + available);
}
return value;
}
/**
* Check whether this environment defines a variable of the given name, without
* performing expansion on it -- such full expansion may still fail.
*
* @param name The variable name.
* @return <code>true</code> if this environment contains a direct definition
*/
public boolean definesVar(String name) {
return vars.containsKey(name);
}
private static class ExpansionTokeniser {
/**
* The delimiters which should be returned as their own tokens. Order of
* alternatives matters! The recognised tokens are, in order:
*
* <ul>
* <li>{@code \\}</li>
* <li>{@code \"}</li>
* <li>{@code "}</li>
* <li><code>${}</code></li>
* <li><code>${=</code></li>
* <li><code>${</code></li>
* <li><code>$(=</code></li>
* <li><code>$(</code></li>
* <li><code>$</code></li>
* <li><code>}</code></li>
* <li><code>)</code></li>
* <li>Runs of whitespace.</li>
* </ul>
*
* <p>
* By defining the alternatives in this order, longer matches will be
* preferred, so that checking for escape sequences is easy. Note that
* in the regular expression source, a literal {@code \} must undergo
* two levels of escaping: Java strings and regular expression
* metacharacters; it thus becomes {@code \\\\}.
*/
private static final Pattern delims = Pattern
.compile("\\\\\\\\|\\\\\"|\"|\\$\\{\\}|\\$\\{=|\\$\\{|"
+ "\\$\\(=|\\$\\(|\\$|\\}|\\)|\\s+");
private final List<String> tokens = new ArrayList<String>();
private final int[] positions;
private int nextToken = 0;
public ExpansionTokeniser(String str) {
Matcher matcher = delims.matcher(str);
StringBuffer tmp = new StringBuffer();
while (matcher.find()) {
matcher.appendReplacement(tmp, "");
if (tmp.length() > 0) {
tokens.add(tmp.toString());
tmp = new StringBuffer();
}
tokens.add(matcher.group());
}
matcher.appendTail(tmp);
if (tmp.length() > 0)
tokens.add(tmp.toString());
positions = new int[tokens.size()];
int pos = 0;
for (int i = 0; i < tokens.size(); i++) {
positions[i] = pos;
pos += tokens.get(i).length();
}
}
public boolean hasMoreTokens() {
return nextToken < tokens.size();
}
public String nextToken() {
return tokens.get(nextToken++);
}
public boolean isDelimiter(String token) {
return delims.matcher(token).matches();
}
public int pos() {
return positions[nextToken - 1] + 1;
}
}
/**
* A wrapper around the various expansion classes, holding some expansion
* state to detect things like circular variable definitions.
*/
private class Expander {
private final Set<String> expansionsInProgress = new LinkedHashSet<String>();
/**
* A string expansion. This can be a literal string, a variable reference or
* a command substitution; the latter two can optionally be "split". Each
* expansion can be interpreted to yield a single string or a list of
* strings (typically as program arguments).
*/
abstract class Expansion {
public abstract String expandAsString();
public abstract List<String> expandAsList();
public abstract void validate();
}
class Sentence extends Expansion {
private final List<List<Expansion>> words = new ArrayList<List<Expansion>>();
public Sentence(List<List<Expansion>> words) {
this.words.addAll(words);
}
@Override
public void validate() {
for (List<Expansion> expansions : words)
for (Expansion expansion : expansions)
expansion.validate();
}
private String expandWord(List<Expansion> word) {
StringBuilder result = new StringBuilder();
for (Expansion e : word)
result.append(e.expandAsString());
return result.toString();
}
@Override
public String expandAsString() {
StringBuilder result = new StringBuilder();
for (List<Expansion> word : words) {
if (result.length() > 0)
result.append(' ');
result.append(expandWord(word));
}
return result.toString();
}
@Override
public List<String> expandAsList() {
List<String> result = new ArrayList<String>();
for (List<Expansion> word : words) {
List<List<String>> segments = new ArrayList<List<String>>();
for (Expansion e : word) {
segments.add(e.expandAsList());
}
result.addAll(glue(segments));
}
return result;
}
/**
* This is a non-quadratic implementation of the following Haskell code:
*
* <pre>
* <code>
* glue :: [[String]] -&gt; [String]
* glue = foldr join []
* where join [] xs = xs
* join xs [] = xs
* join xs ys = init xs ++ [last xs ++ head ys] ++ tail ys
* </code>
* </pre>
*/
private List<String> glue(List<List<String>> segments) {
String trailingWord = null;
List<String> result = new ArrayList<String>();
for (List<String> segment : segments)
trailingWord = glue_join_accum(result, segment, trailingWord);
if (trailingWord != null)
result.add(trailingWord);
return result;
}
private String glue_join_accum(List<String> result,
List<String> segment, String trailingWord) {
int n = segment.size();
switch (n) {
case 0:
return trailingWord;
case 1:
return combine(trailingWord, segment.get(0));
default:
result.add(combine(trailingWord, segment.get(0)));
result.addAll(segment.subList(1, n - 1));
return segment.get(n - 1);
}
}
private String combine(String a, String b) {
if (a == null)
return b;
return a + b;
}
}
class Literal extends Expansion {
private final String value;
public Literal(String value) {
this.value = value;
}
@Override
public void validate() {
// Always valid.
}
@Override
public String expandAsString() {
return value;
}
@Override
public List<String> expandAsList() {
return Collections.singletonList(value);
}
}
class QuotedString extends Sentence {
public QuotedString(List<Expansion> content) {
super(Collections.singletonList(content));
}
@Override
public List<String> expandAsList() {
return Collections.singletonList(this.expandAsString());
}
}
class Variable extends Expansion {
protected final String name;
public Variable(String name) {
this.name = name;
}
@Override
public void validate() {
varLookup(name); // Will throw if variable is undefined.
}
protected void startExpanding(String name) {
if (!expansionsInProgress.add(name))
throw new UserError("Circular expansion of variable " + name);
}
protected void doneWith(String name) {
if (!expansionsInProgress.remove(name))
throw new CatastrophicError("Not currently expanding " + name);
}
protected String ref() {
return "${" + name + "}";
}
@Override
public final String expandAsString() {
if (unexpandedVars.contains(name))
return ref();
startExpanding(name);
String result = expandAsStringImpl();
doneWith(name);
return result;
}
public String expandAsStringImpl() {
// Not calling ExpansionEnvironment.strExpand(), since
// we must run in the same enclosing instance of Expander.
return new ExpansionParser(varLookup(name)).parseAsString().expandAsString();
}
@Override
public final List<String> expandAsList() {
if (unexpandedVars.contains(name))
return Collections.singletonList(ref());
startExpanding(name);
List<String> result = expandAsListImpl();
doneWith(name);
return result;
}
public List<String> expandAsListImpl() {
return Collections.singletonList(expandAsStringImpl());
}
}
class SplitVariable extends Variable {
public SplitVariable(String name) {
super(name);
}
@Override
protected String ref() {
return "${=" + name + "}";
}
@Override
public String expandAsStringImpl() {
return StringUtil.glue(" ", expandAsListImpl());
}
@Override
public List<String> expandAsListImpl() {
return listExpand(varLookup(name));
}
}
class Command extends Expansion {
private final Sentence argv;
public Command(List<List<Expansion>> args) {
this.argv = new Sentence(args);
}
@Override
public void validate() {
argv.validate();
}
protected String run() {
List<String> args = argv.expandAsList();
ByteArrayOutputStream result = new ByteArrayOutputStream();
ByteArrayOutputStream err = new ByteArrayOutputStream();
Builder builder = new Builder(args, result, err);
builder.setLeakPrevention(LeakPrevention.ALL);
try {
int exitCode = builder.execute();
if (exitCode != 0)
throw new UserError("Exit code " + exitCode
+ " from command "
+ builder.toString());
if (err.size() > 0)
throw new UserError("Command \""
+ builder.toString()
+ "\" produced output on stderr: " + err.toString());
} catch (RuntimeException e) {
throw new UserError("Could not execute command "
+ builder.toString(), e);
}
return result.toString();
}
@Override
public String expandAsString() {
return run();
}
@Override
public List<String> expandAsList() {
return Collections.singletonList(expandAsString());
}
}
class SplitCommand extends Command {
public SplitCommand(List<List<Expansion>> argv) {
super(argv);
}
@Override
public String expandAsString() {
return StringUtil.glue(" ", expandAsList());
}
@Override
public List<String> expandAsList() {
return new ExpansionParser(run()).splitAsString().expandAsList();
}
}
private class ExpansionParser {
private final ExpansionTokeniser tokens;
public ExpansionParser(String str) {
tokens = new ExpansionTokeniser(str);
}
public Sentence parseAsString() {
List<List<Expansion>> words = new ArrayList<List<Expansion>>();
words.add(parseTerminatedString(null));
return new Sentence(words);
}
public Sentence parseAsList() {
return new Sentence(parseTerminatedList(null, false));
}
public Sentence splitAsString() {
return new Sentence(parseTerminatedList(null, true));
}
private List<Expansion> parseTerminatedString(String terminator) {
List<Expansion> result = new ArrayList<Expansion>();
while (tokens.hasMoreTokens()) {
String next = tokens.nextToken();
if (next.equals(terminator)) {
return result;
} else if (next.equals("\\\"")) {
result.add(new Literal("\""));
} else if (next.equals("\\\\")) {
result.add(new Literal("\\"));
} else if (!tryParseExpansion(result, next)) {
result.add(new Literal(next));
}
}
if (terminator != null)
throw new UserError(
"Premature end of input while looking for matching '"
+ terminator + "'.");
return result;
}
private List<List<Expansion>> parseTerminatedList(String terminator,
boolean noExpansions) {
List<List<Expansion>> result = new ArrayList<List<Expansion>>();
List<Expansion> accum = new ArrayList<Expansion>();
boolean mustSeeSpace = false;
while (tokens.hasMoreTokens()) {
String next = tokens.nextToken();
if (next.equals(terminator)) {
if (accum.size() > 0)
result.add(accum);
return result;
} else if (mustSeeSpace
&& !Character.isWhitespace(next.charAt(0))) {
throw new UserError("The quoted string ending at "
+ tokens.pos()
+ " must be surrounded by whitespace.");
} else if (next.length() > 0
&& Character.isWhitespace(next.charAt(0))) {
mustSeeSpace = false;
if (accum.size() > 0) {
result.add(accum);
accum = new ArrayList<Expansion>();
}
} else if (next.equals("\"")) {
if (!accum.isEmpty())
throw new UserError(
"At position "
+ tokens.pos()
+ ", the quote should "
+ "either be preceded by a space (if it is intended to start an argument) "
+ "or escaped as \\\".");
accum.add(new QuotedString(parseTerminatedString("\"")));
result.add(accum);
accum = new ArrayList<Expansion>();
mustSeeSpace = true;
} else if (next.equals("\\\"")) {
// An escaped quote means a literal quote.
accum.add(new Literal("\""));
} else if (next.equals("\\\\")) {
// An escaped backslash means a literal backslash.
accum.add(new Literal("\\"));
} else if (noExpansions || !tryParseExpansion(accum, next)) {
accum.add(new Literal(next));
}
}
if (terminator != null)
throw new UserError(
"Premature end of expansion while looking for '"
+ terminator + "'.");
if (accum.size() > 0)
result.add(accum);
return result;
}
private boolean tryParseExpansion(List<Expansion> result,
String curToken) {
if (curToken.equals("${}")) {
result.add(new Literal("$"));
} else if (curToken.equals("$(=") && commandSubstitutions) {
result.add(new SplitCommand(parseTerminatedList(")", false)));
} else if (curToken.equals("$(") && commandSubstitutions) {
result.add(new Command(parseTerminatedList(")", false)));
} else if (curToken.equals("${=")) {
result.add(new SplitVariable(parseVarName()));
} else if (curToken.equals("${")) {
result.add(new Variable(parseVarName()));
} else if (curToken.equals("$")) {
throw new UserError(
"Malformed expansion: A standalone '$' character should be escaped as '${}'.");
} else {
return false;
}
return true;
}
protected String parseVarName() {
if (!tokens.hasMoreTokens())
throw new UserError(
"Malformed variable substitution: stray '${' at " + tokens.pos());
String name = tokens.nextToken();
if (tokens.isDelimiter(name))
throw new UserError(
"Malformed variable substitution: Unexpected '" + name
+ "' at " + tokens.pos());
if (!tokens.hasMoreTokens())
throw new UserError(
"Malformed variable substitution for '" + name +
"': Missing '}' at " + tokens.pos());
String next = tokens.nextToken();
if (!next.equals("}"))
throw new UserError(
"Malformed variable substitution: Expecting '}' at "
+ tokens.pos() + ", found '" + next + "'.");
return name;
}
}
}
/**
* Resolve a path. Any variables in the path will be expanded. If
* the path is an absolute path after expansion, it is returned as is.
* Otherwise, it is combined with the given base path.
*/
public File expandPath(File base, String path) {
String expanded = strExpand(path);
if (FileUtil.isAbsolute(expanded)) {
return new File(expanded);
} else {
return FileUtil.fileRelativeTo(base, expanded);
}
}
/**
* Escape a string so that any '$'s inside it will be interpreted literally, rather than
* as parts of variable references.
*/
public static String escape(String base) {
return base.replace("$", "${}");
}
/**
* Escape {@code argument} as an argument, so that any {@code $}, {@code \} or {@code "} is interpreted literally.
*
* @param argument - the String to escape.
* @return the escaped String.
*/
public static String escapeArgument(String argument) {
return escape(argument).replaceAll(Matcher.quoteReplacement("\\"), Matcher.quoteReplacement("\\\\")).replaceAll(Matcher.quoteReplacement("\""), Matcher.quoteReplacement("\\\""));
}
}

View File

@@ -0,0 +1,100 @@
package com.semmle.util.extraction;
import java.io.BufferedReader;
import java.io.File;
import java.io.FileReader;
import java.io.IOException;
import java.util.ArrayList;
import java.util.List;
import com.semmle.util.exception.ResourceError;
import com.semmle.util.files.FileUtil;
import com.semmle.util.process.Env;
import com.semmle.util.trap.pathtransformers.PathTransformer;
/**
* A file listing patterns of source files and which ODASA project
* each should be populated to (if any).
*/
public class PopulationSpecFile {
private final List<SpecFileEntry> specs = new ArrayList<SpecFileEntry>();
public PopulationSpecFile(File specFile) {
FileReader fileReader = null;
BufferedReader reader = null;
try {
fileReader = new FileReader(specFile);
reader = new BufferedReader(fileReader);
File dbPath = null;
File trapFolder = null;
File sourceArchivePath = null;
List<String> patterns = new ArrayList<String>();
String line;
while ((line = reader.readLine()) != null) {
line = line.trim();
if (line.length() == 0 || line.startsWith("@"))
continue;
if (line.startsWith("#")) {
if (dbPath != null)
specs.add(new SpecFileEntry(trapFolder, sourceArchivePath, patterns));
dbPath = null;
sourceArchivePath = null;
patterns = new ArrayList<String>();
} else if (line.startsWith("TRAP_FOLDER=")) {
trapFolder = new File(line.substring("TRAP_FOLDER=".length()));
} else if (line.startsWith("ODASA_DB=")) {
dbPath = new File(line.substring("ODASA_DB=".length()));
} else if (line.startsWith("SOURCE_ARCHIVE=")) {
sourceArchivePath = new File(line.substring("SOURCE_ARCHIVE=".length()));
} else if (line.startsWith("BUILD_ERROR_DIR=")) {
// Accept and ignore for backwards compatibility
} else if (line.startsWith("-")) {
File path = new File(line.substring(1).trim());
patterns.add("-" + normalisePathAndCase(path) + "/");
} else {
File path = new File(line);
patterns.add(normalisePathAndCase(path) + "/");
}
}
if (dbPath != null)
specs.add(new SpecFileEntry(trapFolder, sourceArchivePath, patterns));
} catch (IOException e) {
throw new ResourceError("I/O error while reading specification file at " + specFile, e);
} finally {
FileUtil.close(reader);
FileUtil.close(fileReader);
}
}
/**
* Get the entry for a file, or <code>null</code> if there is no matching entry
*/
public SpecFileEntry getEntryFor(File f) {
String path = normalisePathAndCase(f);
for (SpecFileEntry entry : specs)
if (entry.matches(path))
return entry;
return null;
}
/**
* Normalises the path like {@link PathTransformer#fileAsDatabaseString(File)}, and, in
* addition, converts it to all-lowercase if we're on a case-insensitive
* filesystem.
* @param file the file to normalise
* @return a normalised path that is lowercased if the file system is case-insensitive.
*/
private static String normalisePathAndCase(File file) {
String path = PathTransformer.std().fileAsDatabaseString(file);
if (!Env.getOS().isFileSystemCaseSensitive())
path = path.toLowerCase();
return path;
}
}

View File

@@ -0,0 +1,48 @@
package com.semmle.util.extraction;
import java.io.File;
import java.util.List;
import com.semmle.util.data.StringUtil;
public class SpecFileEntry {
private final File trapFolder;
private final File sourceArchivePath;
private final List<String> patterns;
public SpecFileEntry(File trapFolder, File sourceArchivePath, List<String> patterns) {
this.trapFolder = trapFolder;
this.sourceArchivePath = sourceArchivePath;
this.patterns = patterns;
}
public boolean matches(String path) {
boolean matches = false;
for (String pattern : patterns) {
if (pattern.startsWith("-")) {
if (path.startsWith(pattern.substring(1)))
matches = false;
} else {
if (path.startsWith(pattern))
matches = true;
}
}
return matches;
}
public File getTrapFolder() {
return trapFolder;
}
public File getSourceArchivePath() {
return sourceArchivePath;
}
@Override
public String toString() {
return
"TRAP_FOLDER=" + trapFolder + "\n" +
"SOURCE_ARCHIVE=" + sourceArchivePath + "\n" +
StringUtil.glue("\n", patterns);
}
}

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,160 @@
package com.semmle.util.files;
import java.util.regex.Pattern;
import com.semmle.util.data.StringUtil;
/**
* Utility class to match a string to a pattern, which can either be
* an ant-like include/exclude pattern (with wildcards), or a rsync-like
* pattern.
* <p>
* In ant-like mode:
* <ul>
* <li>'**' matches zero or more characters (most notably including '/').
* <li>'*' matches zero or more characters except for '/'.
* <li>'?' matches any character (other than '/').
* </ul>
* <p>
* In rsync-like mode:
* <ul>
* <li>A pattern is matched only at the root if it starts with '/', and otherwise
* it is matched against each level of the directory tree.
* <li>'**', '*' and '?' have the same meaning as for ant.
* <li>Other rsync features (like [:..:] groups and backslash-escapes) are not supported.
* </ul>
*/
public class PathMatcher {
public enum Mode {
Ant, Rsync;
}
private final Mode mode;
private final Pattern pattern;
private final String originalPattern;
/**
* Create a {@link PathMatcher}.
*
* @param pattern An ant-like pattern
*/
public PathMatcher(String pattern) {
this(Mode.Ant, pattern);
}
/** Create a {@link PathMatcher}.
*
* @param mode The {@link Mode} to use
* @param pattern A pattern, interpreted as ant-like or rsync-like depending on
* the value of {@code mode}
*/
public PathMatcher(Mode mode, String pattern) {
this.mode = mode;
this.originalPattern = pattern;
StringBuilder b = new StringBuilder();
toRegex(b, pattern);
this.pattern = Pattern.compile(b.toString());
}
/** Create a {@link PathMatcher}.
*
* @param patterns Several ant-like patterns
*/
public PathMatcher(Iterable<String> patterns) {
this(Mode.Ant, patterns);
}
/** Create a {@link PathMatcher}.
*
* @param mode The {@link Mode} to use.
* @param patterns Several patterns, interpreted as ant-like or rsync-like depending
* on the value of {@code mode}.
*/
public PathMatcher(Mode mode, Iterable<String> patterns) {
this.mode = mode;
this.originalPattern = patterns.toString();
StringBuilder b = new StringBuilder();
for (String pattern : patterns) {
if (b.length() > 0)
b.append('|');
toRegex(b, pattern);
}
this.pattern = Pattern.compile(b.toString());
}
private void toRegex(StringBuilder b, String pattern) {
if (pattern.length() == 0) return;
//normalize pattern path separators
pattern = pattern.replace('\\', '/');
//replace double slashes
pattern = pattern.replaceAll("//+", "/");
// escape
pattern = StringUtil.escapeStringLiteralForRegexp(pattern, "*?");
// for ant, ending with '/' is shorthand for "/**"
if (mode == Mode.Ant && pattern.endsWith("/")) pattern = pattern + "**";
// replace "**/" with (^|.*/)"
// replace "**" with ".*"
// replace "*" with "[^/]*
// replace "?" with "[^/]"
int i = 0;
// In rsync-mode, a leading slash is an 'anchor' -- the pattern is only matched
// when rooted at the start of the path. This is the default behaviour for ant-like
// patterns.
if (mode == Mode.Rsync) {
if (pattern.charAt(0) == '/') {
// The slash is just anchoring, and may actually be missing
// in the case of a relative path.
b.append("/?");
i++;
} else {
// Non-anchored rsync pattern: the pattern can match at any level in the tree.
b.append("(.*/)?");
}
}
while (i < pattern.length()) {
char c = pattern.charAt(i);
if (c == '*' && i < pattern.length() - 2 && pattern.charAt(i+1) == '*' && pattern.charAt(i+2) == '/') {
b.append("(?:^|.*/)");
i += 3;
}
else if (c == '*' && i < pattern.length() - 1 && pattern.charAt(i+1) == '*') {
b.append(".*");
i += 2;
}
else if(c == '*') {
b.append("[^/]*");
i++;
}
else if(c == '?') {
b.append("[^/]");
i++;
}
else {
b.append(c);
i++;
}
}
}
/**
* Match the specified path against a shell pattern. The path is normalised by replacing '\' with '/'.
* @param path The path to match.
*/
public boolean matches(String path) {
// normalise path
path = path.replace('\\', '/');
if(path.endsWith("/"))
path = path.substring(0, path.length()-1);
return pattern.matcher(path).matches();
}
@Override
public String toString() {
return "Matches " + originalPattern + " [" + pattern + "]";
}
}

View File

@@ -0,0 +1,103 @@
package com.semmle.util.io;
import java.io.BufferedReader;
import java.io.Closeable;
import java.io.IOException;
import java.io.Reader;
import com.semmle.util.files.FileUtil;
/**
* A custom buffered reader akin to {@link BufferedReader}, except that it preserves
* line terminators (and so its {@code readLine()} method is called
* {@link #readLineAndTerminator()}). The other {@link Reader} methods should not
* be called, and will throw.
*/
public class BufferedLineReader implements Closeable {
private final char[] buffer = new char[8192];
private int nextChar = 0, nChars = 0;
private final Reader in;
public BufferedLineReader(Reader in) {
this.in = in;
}
/**
* Read the string up to and including the next CRLF or LF terminator. This method
* may return a non-terminated string at EOF, or if a line is too long to fit in the
* internal buffer. Calls will block until enough data has been read to fill the
* buffer or find a line terminator.
* @return The next line (or buffer-full) of text.
* @throws IOException if the underlying stream throws.
*/
public String readLineAndTerminator() throws IOException {
int terminator = findNextLineTerminator();
if (terminator == -1)
return null;
String result = new String(buffer, nextChar, terminator - nextChar + 1);
nextChar = terminator + 1;
return result;
}
/**
* Get the index of the last character that should be included in the next line.
* Usually, this is the LF in a LF or CRLF line terminator, but it might be the
* end of the buffer (if it is full, and no newlines are present), or it may be
* -1 (but only if EOF has been reached, and the buffer is currently empty).
* The first character of the line is pointed to by {@link #nextChar}, which
* may be modified by this method if the buffer is refilled.
*/
private int findNextLineTerminator() throws IOException {
int alreadyChecked = 0;
do {
for (int i = nextChar + alreadyChecked; i < nChars; i++) {
if (buffer[i] == '\r' && i+1 < nChars && buffer[i+1] == '\n')
return i+1; // CRLF
else if (buffer[i] == '\n')
return i; // LF
}
// We didn't find a full newline in the existing buffer: Try to fill.
alreadyChecked = nChars - nextChar;
int newlyRead = fill();
if (newlyRead <= 0)
return nChars - 1;
} while (true);
}
/**
* Block until at least one character from the underlying stream is read,
* or EOF is reached.
*/
private int fill() throws IOException {
if (nextChar >= nChars) {
// No unread characters.
nextChar = 0;
nChars = 0;
} else if (nextChar > 0) {
// Some unread characters.
System.arraycopy(buffer, nextChar, buffer, 0, nChars - nextChar);
nChars = nChars - nextChar;
nextChar = 0;
}
// Is the buffer full?
if (nChars == buffer.length)
return 0;
int read;
do {
read = in.read(buffer, nChars, buffer.length - nChars);
} while (read == 0);
if (read > 0) {
nChars += read;
}
return read;
}
@Override
public void close() {
FileUtil.close(in);
}
}

View File

@@ -0,0 +1,34 @@
package com.semmle.util.io;
import com.semmle.util.exception.Exceptions;
import com.semmle.util.files.FileUtil;
import java.io.IOException;
import java.io.InputStream;
import java.io.OutputStream;
/**
* A thread that copies data from an input stream to an output stream. When
* the input stream runs out, it closes both the input and output streams.
*/
public class RawStreamMuncher extends Thread {
private final InputStream in;
private final OutputStream out;
public RawStreamMuncher(InputStream in, OutputStream out) {
this.in = in;
this.out = out;
}
@Override
public void run() {
try {
StreamUtil.copy(in, out);
} catch (IOException e) {
Exceptions.ignore(e, "When the process exits, a harmless IOException will occur here");
} finally {
FileUtil.close(in);
FileUtil.close(out);
}
}
}

View File

@@ -0,0 +1,49 @@
package com.semmle.util.io;
import com.semmle.util.exception.Exceptions;
import com.semmle.util.files.FileUtil;
import com.semmle.util.io.BufferedLineReader;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.io.OutputStream;
import java.io.PrintStream;
/**
* A thread that forwards data from one stream to another. It waits for
* entire lines of input from one stream before writing data to the next
* stream, and it flushes as it goes.
*/
public class StreamMuncher extends Thread {
private final InputStream is;
private PrintStream output;
private BufferedLineReader reader;
public StreamMuncher(InputStream is, OutputStream output) {
this.is = is;
if (output != null)
this.output = new PrintStream(output);
}
@Override
public void run() {
InputStreamReader isr = null;
try {
isr = new InputStreamReader(is);
reader = new BufferedLineReader(isr);
String line;
while ((line = reader.readLineAndTerminator()) != null) {
if (output != null) {
output.print(line);
output.flush();
}
}
} catch (IOException e) {
Exceptions.ignore(e, "When the process exits, a harmless IOException will occur here");
} finally {
FileUtil.close(reader);
FileUtil.close(isr);
}
}
}

View File

@@ -0,0 +1,201 @@
package com.semmle.util.io;
import java.io.BufferedReader;
import java.io.BufferedWriter;
import java.io.ByteArrayOutputStream;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.io.OutputStream;
import java.io.OutputStreamWriter;
import java.io.Reader;
import java.io.Writer;
import java.nio.charset.StandardCharsets;
import com.semmle.util.exception.CatastrophicError;
/**
* Utility methods concerning {@link InputStream}s and {@link OutputStream}s.
*/
public class StreamUtil
{
/**
* Copy all bytes that can be read from an {@link InputStream}, into an {@link OutputStream}.
*
* @param inputStream The InputStream from which to read, until an
* {@link InputStream#read(byte[])} operation returns indicating that the input stream
* has reached its end.
* @param outputStream The OutputStream to which all bytes read from {@code inputStream} should be
* written.
* @return The number of bytes copied.
* @throws IOException from {@link InputStream#read(byte[])} or
* {@link OutputStream#write(byte[], int, int)}
* @throws CatastrophicError if either of the streams is {@code null}
*/
public static long copy(InputStream inputStream, OutputStream outputStream) throws IOException
{
nullCheck(inputStream, outputStream);
// Copy byte data
long total = 0;
byte[] bytes = new byte[1024];
int read;
while ((read = inputStream.read(bytes)) > 0) {
outputStream.write(bytes, 0, read);
total += read;
}
return total;
}
/**
* Copy all chars that can be read from a {@link Reader}, into a {@link Writer}.
*
* @param reader The Reader from which to read, until a {@link Reader#read(char[])} operation
* returns indicating that the reader has reached its end.
* @param writer The Writer to which all characters read from {@code reader} should be written.
* @return The number of bytes copied.
* @throws IOException from {@link Reader#read(char[])} or
* {@link Writer#write(char[], int, int)}
* @throws CatastrophicError if either of the streams is {@code null}
*/
public static long copy(Reader reader, Writer writer) throws IOException
{
nullCheck(reader, writer);
// Copy byte data
long total = 0;
char[] chars = new char[1024];
int read;
while ((read = reader.read(chars)) > 0) {
writer.write(chars, 0, read);
total += read;
}
return total;
}
/**
* Copy at most {@code length} bytes from an {@link InputStream}, into an {@link OutputStream}.
* <p>
* Note that this method will busy-wait during periods for which the {@code inputStream} cannot
* supply any data, but has not reached its end.
* </p>
*
* @param inputStream The InputStream from which to read, until {@code length} bytes have
* been read or {@link InputStream#read(byte[], int, int)} operation returns
* indicating that the input stream has reached its end.
* @param outputStream The OutputStream to which all bytes read from {@code inputStream} should be
* written.
* @param length The maximum number of bytes to copy
* @return The number of bytes copied.
* @throws IOException from {@link InputStream#read(byte[], int, int)} or
* {@link OutputStream#write(byte[], int, int)}
* @throws CatastrophicError if either of the streams is {@code null}
*/
public static long limitedCopy(InputStream inputStream, OutputStream outputStream, long length) throws IOException
{
nullCheck(inputStream, outputStream);
// Copy byte data
long total = 0;
byte[] bytes = new byte[1024];
int read;
while ((read = inputStream.read(bytes, 0, (int) Math.min(bytes.length, length))) > 0) {
outputStream.write(bytes, 0, read);
length -= read;
total += read;
}
return total;
}
private static void nullCheck(Object input, Object output) {
CatastrophicError.throwIfAnyNull(input, output);
}
/**
* Skips over and discards n bytes of data from an input stream. If n is negative then no bytes are skipped.
* @param stream the InputStream
* @param n the number of bytes to be skipped.
* @return false if the end-of-file was reached before successfully skipping n bytes
*/
public static boolean skip(InputStream stream, long n) throws IOException {
if (n <= 0)
return true;
long toSkip = n - 1;
while (toSkip > 0) {
long skipped = stream.skip(toSkip);
if (skipped == 0) {
if(stream.read() == -1)
return false;
else
skipped++;
}
toSkip -= skipped;
}
if(stream.read() == -1)
return false;
else
return true;
}
/**
* Reads n bytes from the input stream and returns them. This method will block
* until all n bytes are available. If the end of the stream is reached before n bytes are
* read it returns just the read bytes.
*
* @param stream the InputStream
* @param n the number of bytes to read
* @return the read bytes
* @throws IOException if an IOException occurs when accessing the stream
* @throws IllegalArgumentException if n is negative
*/
public static byte[] readN(InputStream stream, int n) throws IOException {
if (n < 0) throw new IllegalArgumentException("n must be positive");
ByteArrayOutputStream bOut = new ByteArrayOutputStream();
limitedCopy(stream, bOut, n);
return bOut.toByteArray();
}
/**
* Reads bytes from the input stream into the given buffer. This method will block
* until all bytes are available. If the end of the stream is reached before enough bytes are
* read it reads as much as it can.
*
* @param stream the InputStream
* @param buf the buffer to read into
* @param offset the offset to read into
* @param length the number of bytes to read
* @return the total number of read bytes
* @throws IOException if an IOException occurs when accessing the stream
* @throws IllegalArgumentException if n is negative
*/
public static int read(InputStream stream, byte[] buf, int offset, int length) throws IOException {
if (length < 0) throw new IllegalArgumentException("length must be positive");
// Copy byte data
int total = 0;
int read;
while ((read = stream.read(buf, offset, length)) > 0) {
length -= read;
total += read;
}
return total;
}
/**
* Convenience method for constructing a buffered reader with a UTF8 charset.
*/
public static BufferedReader newUTF8BufferedReader(InputStream inputStream) {
return new BufferedReader(new InputStreamReader(inputStream, StandardCharsets.UTF_8));
}
/**
* Convenience method for constructing a buffered writer with a UTF8 charset.
*/
public static BufferedWriter newUTF8BufferedWriter(OutputStream outputStream) {
return new BufferedWriter(new OutputStreamWriter(outputStream, StandardCharsets.UTF_8));
}
}

View File

@@ -0,0 +1,548 @@
package com.semmle.util.io;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileNotFoundException;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.InputStream;
import java.io.OutputStreamWriter;
import java.io.UnsupportedEncodingException;
import java.io.Writer;
import java.nio.charset.Charset;
import java.nio.file.Files;
import java.nio.file.OpenOption;
import java.nio.file.Path;
import java.nio.file.StandardOpenOption;
import java.util.Arrays;
import java.util.regex.Pattern;
import com.semmle.util.array.ArrayUtil;
import com.semmle.util.data.IntRef;
import com.semmle.util.exception.ResourceError;
import com.semmle.util.files.FileUtil;
/**
* A class that allows bulk operations on entire files,
* reading or writing them as {@link String} values.
*
* This is intended to address the woeful inadequacy of
* the Java standard libraries in this area.
*/
public class WholeIO {
private IOException e;
/**
* Regular expression {@link Pattern}
*/
private final static Pattern rpLineEndingCRLF = Pattern.compile("\r\n");
/**
* The default encoding to use for writing, and for reading if no
* encoding can be detected.
*/
private final String defaultEncoding;
/**
* Construct a new {@link WholeIO} instance using ODASA's default
* charset ({@code "UTF-8"}) for all input and output (unless a
* different encoding is detected for a file being read).
*/
public WholeIO() {
this("UTF-8");
}
/**
* Construct a new {@link WholeIO} instance using the specified
* encoding for all input and output (unless a different encoding
* is detected for a file being read).
*
* @param encoding The encoding name, e.g. {@code "UTF-8"}.
*/
public WholeIO(String encoding) {
defaultEncoding = encoding;
}
/**
* Open the given file for reading, get the entire content
* and return it as a {@link String}. Returns <code>null</code>
* on error, in which case you can check the getLastException()
* method for the exception that occurred.
*
* <b>Warning:</b> This method trims the content of the file, removing
* leading and trailing whitespace. Do not use it if you care about file
* locations being preserved; use 'read' instead.
*
* @param file The file to read
* @return The <b>trimmed</b> contents of the file, or <code>null</code> on error.
*/
public String readAndTrim(File file) {
e = null;
FileInputStream f = null;
try {
f = new FileInputStream(file);
String contents = readString(f);
return contents == null ? null : contents.trim();
} catch (IOException e) {
this.e = e;
return null;
} finally {
FileUtil.close(f);
}
}
/**
* Open the given filename for writing and dump the given
* {@link String} into it. Returns <code>false</code>
* on error, in which case you can check the getLastException()
* method for the exception that occurred. Tries to create any
* enclosing directories that do not exist.
*
* @param filename The name of the file to write to
* @param contents the string to write out
* @return the success state
*/
public boolean write(String filename, String contents) {
return write(new File(filename), contents);
}
/**
* Open the given filename for writing and dump the given
* {@link String} into it. Returns <code>false</code>
* on error, in which case you can check the getLastException()
* method for the exception that occurred. Tries to create any
* enclosing directories that do not exist.
*
* @param file The file to write to
* @param contents the string to write out
* @return the success state
*/
public boolean write(File file, String contents) {
return write(file, contents, false);
}
/**
* Open the given path for writing and dump the given
* {@link String} into it. Returns <code>false</code>
* on error, in which case you can check the getLastException()
* method for the exception that occurred. Tries to create any
* enclosing directories that do not exist.
*
* @param path The path to write to
* @param contents the string to write out
* @return the success state
*/
public boolean write(Path path, String contents) {
return write(path, contents, false);
}
/**
* Open the given filename for writing and dump the given
* {@link String} into it. Throws {@link ResourceError}
* if we fail.
*
* @param file The file to write to
* @param contents the string to write out
*/
public void strictwrite(File file, String contents) {
strictwrite(file, contents, false);
}
/**
* Open the given path for writing and dump the given
* {@link String} into it. Throws {@link ResourceError}
* if we fail.
*
* @param path The path to write to
* @param contents the string to write out
*/
public void strictwrite(Path path, String contents) {
strictwrite(path, contents, false);
}
/**
* This is the same as {@link #write(File,String)},
* except that this method allows appending to an existing file.
*
* @param file the file to write to
* @param contents the string to write out
* @param append whether or not to append to any existing file
* @return the success state
*/
public boolean write(File file, String contents, boolean append) {
if (file.getParentFile() != null)
file.getParentFile().mkdirs();
FileOutputStream fos = null;
try {
fos = new FileOutputStream(file, append);
Writer writer = new OutputStreamWriter(fos, Charset.forName(defaultEncoding));
writer.append(contents);
writer.close();
return true;
} catch (IOException e) {
this.e = e;
return false;
} finally {
FileUtil.close(fos);
}
}
/**
* This is the same as {@link #write(Path,String)},
* except that this method allows appending to an existing file.
*
* @param path the path to write to
* @param contents the string to write out
* @param append whether or not to append to any existing file
* @return the success state
*/
public boolean write(Path path, String contents, boolean append) {
try {
if (path.getParent() != null)
Files.createDirectories(path.getParent());
try (Writer writer = Files.newBufferedWriter(path, Charset.forName(defaultEncoding),
StandardOpenOption.CREATE, StandardOpenOption.WRITE,
append ? StandardOpenOption.APPEND : StandardOpenOption.TRUNCATE_EXISTING)) {
writer.append(contents);
}
} catch (IOException e) {
this.e = e;
return false;
}
return true;
}
/**
* This is the same as {@link #strictwrite(File,String)},
* except that this method allows appending to an existing file.
*/
public void strictwrite(File file, String contents, boolean append) {
if (!write(file, contents, append))
throw new ResourceError("Failed to write file " + file, getLastException());
}
/**
* This is the same as {@link #strictwrite(Path,String)},
* except that this method allows appending to an existing file.
*/
public void strictwrite(Path path, String contents, boolean append) {
if (!write(path, contents, append))
throw new ResourceError("Failed to write path " + path, getLastException());
}
/**
* Get the exception that occurred during the last call to
* read(), if any. If the last read() call completed normally,
* this returns null.
* @return The last caught exception, or <code>null</code> if N/A.
*/
public IOException getLastException() {
return e;
}
public String read(File file) {
InputStream is = null;
try {
is = new FileInputStream(file);
return readString(is);
}
catch (IOException e) {
this.e = e;
return null;
}
finally {
FileUtil.close(is);
}
}
public String read(Path path) {
InputStream is = null;
try {
is = Files.newInputStream(path);
return readString(is);
}
catch (IOException e) {
this.e = e;
return null;
}
finally {
FileUtil.close(is);
}
}
/**
* Read the contents of the given {@link File} as text (line endings are normalised to "\n" in the output).
*
* @param file The file to read.
* @return The text contents of the file, if possible, or null if the file cannot be read.
*/
public String readText(File file) {
String result = read(file);
return result != null ? result.replaceAll("\r\n", "\n") : null;
}
/**
* Read the contents of the given {@link Path} as text (line endings are normalised to "\n" in the output).
*
* @param path The path to read.
* @return The text contents of the path, if possible, or null if the file cannot be read.
*/
public String readText(Path path) {
String result = read(path);
return result != null ? result.replaceAll("\r\n", "\n") : null;
}
/**
* Read the contents of the given {@link File}, throwing a {@link ResourceError}
* if we fail.
*/
public String strictread(File f) {
String content = read(f);
if (content == null)
throw new ResourceError("Failed to read file " + f, getLastException());
return content;
}
/**
* Read the contents of the given {@link Path}, throwing a {@link ResourceError}
* if we fail.
*/
public String strictread(Path f) {
String content = read(f);
if (content == null)
throw new ResourceError("Failed to read path " + f, getLastException());
return content;
}
/**
* Read the contents of the given {@link File} as text (line endings are normalised to "\n" in the output).
*
* @param file The file to read.
* @return The text contents of the file, if possible.
* @throws ResourceError If the file cannot be read.
*/
public String strictreadText(File file) {
return rpLineEndingCRLF.matcher(strictread(file)).replaceAll("\n");
}
/**
* Read the contents of the given {@link Path} as text (line endings are normalised to "\n" in the output).
*
* @param path The path to read.
* @return The text contents of the path, if possible.
* @throws ResourceError If the path cannot be read.
*/
public String strictreadText(Path path) {
return rpLineEndingCRLF.matcher(strictread(path)).replaceAll("\n");
}
/**
* Get the entire content of an {@link InputStream}
* and interpret it as a {@link String} trying to detect its character set.
* Returns <code>null</code> on error, in which case you can check
* the getLastException() method for the exception that occurred.
*
* @param stream the stream to read from
* @return The contents of the file, or <code>null</code> on error.
*/
public String readString(InputStream stream) {
IntRef length = new IntRef(0);
byte[] bytes = readBinary(stream, length);
if (bytes == null) return null;
try {
IntRef start = new IntRef(0);
String charset = determineCharset(bytes, length.get(), start);
return new String(bytes, start.get(), length.get() - start.get(), charset);
} catch (UnsupportedEncodingException e) {
this.e = e;
return null;
}
}
/**
* Get the entire content of an {@link InputStream}
* and interpret it as a {@link String} trying to detect its character set.
* Throws a {@link ResourceError} on error.
*
* @param stream the stream to read from
* @return the contents of the input stream
*/
public String strictReadString(InputStream stream) {
String content = readString(stream);
if (content == null)
throw new ResourceError("Could not read from stream", getLastException());
return content;
}
/**
* Get the entire content of an {@link InputStream}, interpreting it
* as a sequence of bytes. This removes restrictions regarding invalid
* code points that would potentially prevent reading a file's contents
* as a String.
*
* This method returns <code>null</code> on error, in which case you can
* check {@link #getLastException()} for the exception that occurred.
*
* @param stream the stream to read from
* @return The binary contents of the file, or <code>null</code> on error.
*/
public byte[] readBinary(InputStream stream) {
IntRef length = new IntRef(0);
byte[] bytes = readBinary(stream, length);
return bytes == null ? null : Arrays.copyOf(bytes, length.get());
}
/**
* Get the entire content of an {@link InputStream}, interpreting it
* as a sequence of bytes. This removes restrictions regarding invalid
* code points that would potentially prevent reading a file's contents
* as a String.
*
* @param stream the stream to read from
* @return The binary contents of the file -- always non-null.
* @throws ResourceError if an exception occurs during IO.
*/
public byte[] strictReadBinary(InputStream stream) {
byte[] result = readBinary(stream);
if (result == null)
throw new ResourceError("Couldn't read from stream", e);
return result;
}
/**
* Get the entire binary contents of a {@link File} as a sequence of bytes.
*
* @param file the file to read
* @return the file's contents as a byte[] -- always non-null.
* @throws ResourceError if an exception occurs during IO.
*/
public byte[] strictReadBinary(File file) {
FileInputStream stream = null;
try {
stream = new FileInputStream(file);
byte[] result = readBinary(stream);
if (result == null)
throw new ResourceError("Couldn't read from file " + file + ".", e);
return result;
} catch (FileNotFoundException e) {
throw new ResourceError("Couldn't read from file " + file + ".", e);
} finally {
FileUtil.close(stream);
}
}
/**
* Get the entire binary contents of a {@link Path} as a sequence of bytes.
*
* @param path the path to read
* @return the file's contents as a byte[] -- always non-null.
* @throws ResourceError if an exception occurs during IO.
*/
public byte[] strictReadBinary(Path path) {
InputStream stream = null;
try {
stream = Files.newInputStream(path);
byte[] result = readBinary(stream);
if (result == null)
throw new ResourceError("Couldn't read from path " + path + ".", e);
return result;
} catch (IOException e) {
throw new ResourceError("Couldn't read from path " + path + ".", e);
} finally {
FileUtil.close(stream);
}
}
/**
* Get the entire binary contents of a {@link Path} as a sequence of bytes.
*
* @param path the path to read
* @return the file's contents as a byte[] -- always non-null.
*/
public byte[] readBinary(Path path) throws IOException {
InputStream stream = null;
try {
stream = Files.newInputStream(path);
byte[] result = readBinary(stream);
if (result == null)
throw new ResourceError("Couldn't read from path " + path + ".", e);
return result;
} finally {
FileUtil.close(stream);
}
}
private byte[] readBinary(InputStream stream, IntRef offsetHolder) {
try {
byte[] bytes = new byte[16384];
int offset = 0;
int readThisTime;
do {
readThisTime = stream.read(bytes, offset, bytes.length - offset);
if (readThisTime > 0) {
offset += readThisTime;
if (offset == bytes.length)
bytes = safeArrayDouble(bytes);
}
} while (readThisTime > 0);
offsetHolder.set(offset);
return bytes;
} catch (IOException e) {
this.e = e;
return null;
}
}
/**
* Safely attempt to double the length of an array.
* @param array The array which want to be doubled
* @return a new array that is longer than array
*/
private byte[] safeArrayDouble(byte[] array) {
if (array.length >= ArrayUtil.MAX_ARRAY_LENGTH) {
throw new ResourceError("Cannot stream into array as it exceed the maximum array size");
}
// Compute desired capacity
long newCapacity = array.length * 2L;
// Ensure it is at least as large as minCapacity
if (newCapacity < 16)
newCapacity = 16;
// Ensure it is at most MAX_ARRAY_LENGTH
if (newCapacity > ArrayUtil.MAX_ARRAY_LENGTH) {
newCapacity = ArrayUtil.MAX_ARRAY_LENGTH;
}
return Arrays.copyOf(array, (int)newCapacity);
}
/**
* Try to determine the encoding of a byte[] using a byte-order mark (if present)
* Defaults to UTF-8 if none found.
*/
private String determineCharset(byte[] bom, int length, IntRef start) {
start.set(0);
String ret = defaultEncoding;
if(length < 2)
return ret;
if (length >= 3 && byteToInt(bom[0]) == 0xEF && byteToInt(bom[1]) == 0xBB && byteToInt(bom[2]) == 0xBF) {
ret = "UTF-8";
start.set(3);
} else if (byteToInt(bom[0]) == 0xFE && byteToInt(bom[1]) == 0xFF) {
ret = "UTF-16BE";
start.set(2);
} else if (byteToInt(bom[0]) == 0xFF && byteToInt(bom[1]) == 0xFE) {
ret = "UTF-16LE";
start.set(2);
}
return ret;
}
private static int byteToInt(byte b) {
return b & 0xFF;
}
}

View File

@@ -0,0 +1,207 @@
package com.semmle.util.io.csv;
/**
Copyright 2005 Bytecode Pty Ltd.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
import java.io.IOException;
import java.util.ArrayList;
import java.util.List;
/**
* A very simple CSV parser released under a commercial-friendly license.
* This just implements splitting a single line into fields.
*
* @author Glen Smith
* @author Rainer Pruy
*
*/
public class CSVParser {
private final char separator;
private final char quotechar;
private final char escape;
private final boolean strictQuotes;
private StringBuilder buf = new StringBuilder(INITIAL_READ_SIZE);
/** The default separator to use if none is supplied to the constructor. */
public static final char DEFAULT_SEPARATOR = ',';
private static final int INITIAL_READ_SIZE = 128;
/**
* The default quote character to use if none is supplied to the
* constructor.
*/
public static final char DEFAULT_QUOTE_CHARACTER = '"';
/**
* The default escape character to use if none is supplied to the
* constructor.
*/
public static final char DEFAULT_ESCAPE_CHARACTER = '"';
/**
* The default strict quote behavior to use if none is supplied to the
* constructor
*/
public static final boolean DEFAULT_STRICT_QUOTES = false;
/**
* Constructs CSVReader with supplied separator and quote char.
* Allows setting the "strict quotes" flag
* @param separator
* the delimiter to use for separating entries
* @param quotechar
* the character to use for quoted elements
* @param escape
* the character to use for escaping a separator or quote
* @param strictQuotes
* if true, characters outside the quotes are ignored
*/
CSVParser(char separator, char quotechar, char escape, boolean strictQuotes) {
this.separator = separator;
this.quotechar = quotechar;
this.escape = escape;
this.strictQuotes = strictQuotes;
}
/**
*
* @return true if something was left over from last call(s)
*/
public boolean isPending() {
return buf.length() != 0;
}
public String[] parseLineMulti(String nextLine) throws IOException {
return parseLine(nextLine, true);
}
public String[] parseLine(String nextLine) throws IOException {
return parseLine(nextLine, false);
}
/**
* Parses an incoming String and returns an array of elements.
*
* @param nextLine
* the string to parse
* @return the comma-tokenized list of elements, or null if nextLine is null
* @throws IOException if bad things happen during the read
*/
private String[] parseLine(String nextLine, boolean multi) throws IOException {
if (!multi && isPending()) {
clear();
}
if (nextLine == null) {
if (isPending()) {
String s = buf.toString();
clear();
return new String[] {s};
} else {
return null;
}
}
List<String>tokensOnThisLine = new ArrayList<String>();
boolean inQuotes = isPending();
for (int i = 0; i < nextLine.length(); i++) {
char c = nextLine.charAt(i);
if (c == this.escape && isNextCharacterEscapable(nextLine, inQuotes, i)) {
buf.append(nextLine.charAt(i+1));
i++;
} else if (c == quotechar) {
if( isNextCharacterEscapedQuote(nextLine, inQuotes, i) ){
buf.append(nextLine.charAt(i+1));
i++;
}else{
inQuotes = !inQuotes;
// the tricky case of an embedded quote in the middle: a,bc"d"ef,g
if (!strictQuotes) {
if(i>2 //not on the beginning of the line
&& nextLine.charAt(i-1) != this.separator //not at the beginning of an escape sequence
&& nextLine.length()>(i+1) &&
nextLine.charAt(i+1) != this.separator //not at the end of an escape sequence
){
buf.append(c);
}
}
}
} else if (c == separator && !inQuotes) {
tokensOnThisLine.add(buf.toString());
clear(); // start work on next token
} else {
if (!strictQuotes || inQuotes)
buf.append(c);
}
}
// line is done - check status
if (inQuotes) {
if (multi) {
// continuing a quoted section, re-append newline
buf.append('\n');
// this partial content is not to be added to field list yet
} else {
throw new IOException("Un-terminated quoted field at end of CSV line");
}
} else {
tokensOnThisLine.add(buf.toString());
clear();
}
return tokensOnThisLine.toArray(new String[tokensOnThisLine.size()]);
}
/**
* precondition: the current character is a quote or an escape
* @param nextLine the current line
* @param inQuotes true if the current context is quoted
* @param i current index in line
* @return true if the following character is a quote
*/
private boolean isNextCharacterEscapedQuote(String nextLine, boolean inQuotes, int i) {
return inQuotes // we are in quotes, therefore there can be escaped quotes in here.
&& nextLine.length() > (i+1) // there is indeed another character to check.
&& nextLine.charAt(i+1) == quotechar;
}
/**
* precondition: the current character is an escape
* @param nextLine the current line
* @param inQuotes true if the current context is quoted
* @param i current index in line
* @return true if the following character is a quote
*/
protected boolean isNextCharacterEscapable(String nextLine, boolean inQuotes, int i) {
return inQuotes // we are in quotes, therefore there can be escaped quotes in here.
&& nextLine.length() > (i+1) // there is indeed another character to check.
&& ( nextLine.charAt(i+1) == quotechar || nextLine.charAt(i+1) == this.escape);
}
/**
* Reset the buffer used for storing the current field's value
*/
private void clear() {
buf.setLength(0);
}
}

View File

@@ -0,0 +1,192 @@
package com.semmle.util.io.csv;
/**
Copyright 2005 Bytecode Pty Ltd.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
import java.io.BufferedReader;
import java.io.Closeable;
import java.io.IOException;
import java.io.Reader;
import java.util.ArrayList;
import java.util.List;
/**
* A very simple CSV reader released under a commercial-friendly license.
*
* @author Glen Smith
*
*/
public class CSVReader implements Closeable {
private final BufferedReader br;
private boolean hasNext = true;
private final CSVParser parser;
private final int skipLines;
private boolean linesSkipped;
/** The line number of the last physical line read (one-based). */
private int curline = 0;
/** The physical line number at which the last logical line read started (one-based). */
private int startLine = 0;
/**
* The default line to start reading.
*/
private static final int DEFAULT_SKIP_LINES = 0;
/**
* Constructs CSVReader using a comma for the separator.
*
* @param reader
* the reader to an underlying CSV source.
*/
public CSVReader(Reader reader) {
this(reader,
CSVParser.DEFAULT_SEPARATOR, CSVParser.DEFAULT_QUOTE_CHARACTER,
CSVParser.DEFAULT_ESCAPE_CHARACTER, DEFAULT_SKIP_LINES,
CSVParser.DEFAULT_STRICT_QUOTES);
}
/**
* Constructs CSVReader with supplied separator and quote char.
*
* @param reader
* the reader to an underlying CSV source.
* @param separator
* the delimiter to use for separating entries
* @param quotechar
* the character to use for quoted elements
* @param escape
* the character to use for escaping a separator or quote
* @param line
* the line number to skip for start reading
* @param strictQuotes
* sets if characters outside the quotes are ignored
*/
private CSVReader(Reader reader, char separator, char quotechar, char escape, int line, boolean strictQuotes) {
this.br = new BufferedReader(reader);
this.parser = new CSVParser(separator, quotechar, escape, strictQuotes);
this.skipLines = line;
}
/**
* Reads the entire file into a List with each element being a String[] of
* tokens.
*
* @return a List of String[], with each String[] representing a line of the
* file.
*
* @throws IOException
* if bad things happen during the read
*/
public List<String[]> readAll() throws IOException {
List<String[]> allElements = new ArrayList<String[]>();
while (hasNext) {
String[] nextLineAsTokens = readNext();
if (nextLineAsTokens != null)
allElements.add(nextLineAsTokens);
}
return allElements;
}
/**
* Reads the next line from the buffer and converts to a string array.
*
* @return a string array with each comma-separated element as a separate
* entry, or null if there are no more lines to read.
*
* @throws IOException
* if bad things happen during the read
*/
public String[] readNext() throws IOException {
boolean first = true;
String[] result = null;
do {
String nextLine = getNextLine();
if (first) {
startLine = curline;
first = false;
}
if (!hasNext) {
return result; // should throw if still pending?
}
String[] r = parser.parseLineMulti(nextLine);
if (r.length > 0) {
if (result == null) {
result = r;
} else {
String[] t = new String[result.length+r.length];
System.arraycopy(result, 0, t, 0, result.length);
System.arraycopy(r, 0, t, result.length, r.length);
result = t;
}
}
} while (parser.isPending());
return result;
}
/**
* Reads the next line from the file.
*
* @return the next line from the file without trailing newline
* @throws IOException
* if bad things happen during the read
*/
private String getNextLine() throws IOException {
if (!this.linesSkipped) {
for (int i = 0; i < skipLines; i++) {
br.readLine();
++curline;
}
this.linesSkipped = true;
}
String nextLine = br.readLine();
if (nextLine == null) {
hasNext = false;
} else {
++curline;
}
return hasNext ? nextLine : null;
}
/**
* Closes the underlying reader.
*
* @throws IOException if the close fails
*/
@Override
public void close() throws IOException{
br.close();
}
/**
* Return the physical line number (one-based) at which the last logical line read started,
* or zero if no line has been read yet.
*/
public int getStartLine() {
return startLine;
}
}

View File

@@ -0,0 +1,226 @@
package com.semmle.util.io.csv;
/**
Copyright 2005 Bytecode Pty Ltd.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
import java.io.Closeable;
import java.io.IOException;
import java.io.Writer;
import java.util.List;
/**
* A very simple CSV writer released under a commercial-friendly license.
*
* @author Glen Smith
*
*/
public class CSVWriter implements Closeable {
private static final int INITIAL_STRING_SIZE = 128;
private Writer rawWriter;
private char separator;
private char quotechar;
private char escapechar;
private String lineEnd;
/** The quote constant to use when you wish to suppress all quoting. */
public static final char NO_QUOTE_CHARACTER = '\u0000';
/** The escape constant to use when you wish to suppress all escaping. */
private static final char NO_ESCAPE_CHARACTER = '\u0000';
/** Default line terminator uses platform encoding. */
private static final String DEFAULT_LINE_END = "\n";
private boolean[] eagerQuotingFlags = {};
/**
* Constructs CSVWriter using a comma for the separator.
*
* @param writer
* the writer to an underlying CSV source.
*/
public CSVWriter(Writer writer) {
this(writer,
CSVParser.DEFAULT_SEPARATOR,
CSVParser.DEFAULT_QUOTE_CHARACTER,
CSVParser.DEFAULT_ESCAPE_CHARACTER
);
}
/**
* Constructs CSVWriter with supplied separator and quote char.
*
* @param writer
* the writer to an underlying CSV source.
* @param separator
* the delimiter to use for separating entries
* @param quotechar
* the character to use for quoted elements
* @param escapechar
* the character to use for escaping quotechars or escapechars
*/
public CSVWriter(Writer writer, char separator, char quotechar, char escapechar) {
this(writer, separator, quotechar, escapechar, DEFAULT_LINE_END);
}
/**
* Constructs CSVWriter with supplied separator, quote char, escape char and line ending.
*
* @param writer
* the writer to an underlying CSV source.
* @param separator
* the delimiter to use for separating entries
* @param quotechar
* the character to use for quoted elements
* @param escapechar
* the character to use for escaping quotechars or escapechars
* @param lineEnd
* the line feed terminator to use
*/
private CSVWriter(Writer writer, char separator, char quotechar, char escapechar, String lineEnd) {
this.rawWriter = writer;
this.separator = separator;
this.quotechar = quotechar;
this.escapechar = escapechar;
this.lineEnd = lineEnd;
}
/**
* Call with an array of booleans, corresponding to columns, where columns that have
* <code>false</code> will not be quoted unless they contain special characters.
* <p>
* If there are more columns to print than have been configured here, any additional
* columns will be treated as if <code>true</code> was passed.
*/
public void setEagerQuotingColumns(boolean... flags) {
eagerQuotingFlags = flags;
}
/**
* Writes the entire list to a CSV file. The list is assumed to be a
* String[]
*
* @param allLines
* a List of String[], with each String[] representing a line of
* the file.
*/
public void writeAll(List<String[]> allLines) throws IOException {
for (String[] line : allLines) {
writeNext(line);
}
}
/**
* Writes the next line to the file.
*
* @param nextLine
* a string array with each comma-separated element as a separate
* entry.
*/
public void writeNext(String... nextLine) throws IOException {
if (nextLine == null)
return;
StringBuilder sb = new StringBuilder(INITIAL_STRING_SIZE);
for (int i = 0; i < nextLine.length; i++) {
if (i != 0) {
sb.append(separator);
}
String nextElement = nextLine[i];
if (nextElement == null)
continue;
boolean hasSpecials = stringContainsSpecialCharacters(nextElement);
if (hasSpecials || i >= eagerQuotingFlags.length || eagerQuotingFlags[i]
|| stringContainsSomewhatSpecialCharacter(nextElement)) {
if (quotechar != NO_QUOTE_CHARACTER)
sb.append(quotechar);
sb.append(hasSpecials ? processLine(nextElement) : nextElement);
if (quotechar != NO_QUOTE_CHARACTER)
sb.append(quotechar);
} else {
sb.append(nextElement);
}
}
sb.append(lineEnd);
rawWriter.write(sb.toString());
}
/**
* Return true if there are characters that need to be escaped in addition to
* being quoted.
*/
private boolean stringContainsSpecialCharacters(String line) {
return line.indexOf(quotechar) != -1 || line.indexOf(escapechar) != -1;
}
/**
* Return true if there are characters that should not appear in a completely
* unquoted field.
*/
private boolean stringContainsSomewhatSpecialCharacter(String s) {
return s.indexOf('"') != -1 || s.indexOf('\'') != -1 || s.indexOf('\t') != -1 || s.indexOf(separator) != -1;
}
protected StringBuilder processLine(String nextElement)
{
StringBuilder sb = new StringBuilder(INITIAL_STRING_SIZE);
for (int j = 0; j < nextElement.length(); j++) {
char nextChar = nextElement.charAt(j);
if (escapechar != NO_ESCAPE_CHARACTER && nextChar == quotechar) {
sb.append(escapechar).append(nextChar);
} else if (escapechar != NO_ESCAPE_CHARACTER && nextChar == escapechar) {
sb.append(escapechar).append(nextChar);
} else {
sb.append(nextChar);
}
}
return sb;
}
/**
* Flush underlying stream to writer.
*
* @throws IOException if bad things happen
*/
public void flush() throws IOException {
rawWriter.flush();
}
/**
* Close the underlying stream writer flushing any buffered content.
*
* @throws IOException if bad things happen
*
*/
@Override
public void close() throws IOException {
rawWriter.close();
}
}

View File

@@ -0,0 +1,101 @@
package com.semmle.util.logging;
import java.io.InputStream;
import java.io.OutputStream;
import java.io.PrintStream;
import java.util.Stack;
import com.semmle.util.exception.CatastrophicError;
/**
* A class to wrap around accesses to {@link System#out} and
* {@link System#err}, so that tools can behave consistently when
* run in-process or out-of-process.
*/
public class Streams {
private static final InheritableThreadLocal<PrintStream> out =
new InheritableThreadLocal<PrintStream>() {
@Override
protected PrintStream initialValue() {
return System.out;
}
};
private static final InheritableThreadLocal<PrintStream> err =
new InheritableThreadLocal<PrintStream>() {
@Override
protected PrintStream initialValue() {
return System.err;
}
};
private static final InheritableThreadLocal<InputStream> in =
new InheritableThreadLocal<InputStream>() {
@Override
protected InputStream initialValue() {
return System.in;
}
};
private static class SavedContext {
public PrintStream out, err;
public InputStream in;
}
private static final ThreadLocal<Stack<SavedContext>> contexts =
new ThreadLocal<Stack<SavedContext>>() {
@Override
protected Stack<SavedContext> initialValue() {
return new Stack<SavedContext>();
}
};
public static PrintStream out() {
return out.get();
}
public static PrintStream err() {
return err.get();
}
public static InputStream in() {
return in.get();
}
public static void pushContext(OutputStream stdout, OutputStream stderr, InputStream stdin) {
SavedContext context = new SavedContext();
context.out = out.get();
context.err = err.get();
context.in = in.get();
// When we run in-process, we don't benefit from
// a clean slate like we do when starting a new
// process. We need to reset anything that we care
// about manually.
// In particular, the parent VM may well have set
// showAllLogs=True, and we don't want the extra
// noise when executing the child, so we set a
// fresh log state for the duration of the child.
contexts.get().push(context);
out.set(asPrintStream(stdout));
err.set(asPrintStream(stderr));
in.set(stdin);
}
private static PrintStream asPrintStream(OutputStream stdout) {
return stdout instanceof PrintStream ?
(PrintStream)stdout : new PrintStream(stdout);
}
public static void popContext() {
Stack<SavedContext> context = contexts.get();
out.get().flush();
err.get().flush();
if (context.isEmpty())
throw new CatastrophicError("Popping logging context without preceding push.");
SavedContext old = context.pop();
out.set(old.out);
err.set(old.err);
in.set(old.in);
}
}

View File

@@ -0,0 +1,398 @@
package com.semmle.util.process;
import java.io.File;
import java.io.IOException;
import java.io.InputStream;
import java.io.OutputStream;
import java.util.ArrayList;
import java.util.Iterator;
import java.util.LinkedHashMap;
import java.util.List;
import java.util.Map;
import java.util.Map.Entry;
import java.util.Timer;
import java.util.TimerTask;
import com.github.codeql.Logger;
import com.github.codeql.Severity;
import com.semmle.util.data.StringUtil;
import com.semmle.util.exception.CatastrophicError;
import com.semmle.util.exception.Exceptions;
import com.semmle.util.exception.InterruptedError;
import com.semmle.util.exception.ResourceError;
import com.semmle.util.files.FileUtil;
import com.semmle.util.io.RawStreamMuncher;
/**
* A builder for an external process. This class wraps {@link ProcessBuilder},
* adding support for spawning threads to manage the input and output streams of
* the created process.
*/
public abstract class AbstractProcessBuilder {
public static Logger logger = null;
// timeout for the muncher threads in seconds
protected static final long MUNCH_TIMEOUT = 20;
private final ProcessBuilder builder;
private boolean logFailure = true;
private InputStream in;
private LeakPrevention leakPrevention;
private volatile boolean interrupted = false;
private volatile Thread threadToInterrupt = null;
private volatile boolean hitTimeout = false;
private final Map<String, String> canonicalEnvVarNames = new LinkedHashMap<>();
private RawStreamMuncher inMuncher;
public AbstractProcessBuilder (List<String> args, File cwd, Map<String, String> env)
{
// Sanity checks
CatastrophicError.throwIfNull(args);
for (int i = 0; i < args.size(); ++i)
CatastrophicError.throwIfNull(args.get(i));
leakPrevention = LeakPrevention.NONE;
builder = new ProcessBuilder(new ArrayList<>(args));
if (cwd != null) {
builder.directory(cwd);
}
// Make sure that values that have been explicitly removed from Env.systemEnv()
// -- such as the variables representing command-line arguments --
// are not taken over by the new ProcessBuilder.
Map<String, String> keepThese = Env.systemEnv().getenv();
for (Iterator<String> it = builder.environment().keySet().iterator(); it.hasNext();) {
String name = it.next();
if (!keepThese.containsKey(name))
it.remove();
}
if (env != null) {
addEnvironment(env);
}
}
public void setLeakPrevention(LeakPrevention leakPrevention) {
CatastrophicError.throwIfNull(leakPrevention);
this.leakPrevention = leakPrevention;
}
/**
* See {@link ProcessBuilder#redirectErrorStream(boolean)}.
*/
public void setRedirectErrorStream(boolean redirectErrorStream) {
this.builder.redirectErrorStream(redirectErrorStream);
}
public final boolean hasEnvVar(String name) {
return builder.environment().containsKey(getCanonicalVarName(name));
}
/**
* Add the specified key/value pair to the environment of the builder,
* overriding any previous environment entry of that name. This method
* provides additional logic to handle systems where environment
* variable names are case-insensitive, ensuring the last-added value
* for a name ends up in the final environment regardless of case.
* @param name The name of the environment variable. Whether case matters
* is OS-dependent.
* @param value The value for the environment variable.
*/
public final void addEnvVar(String name, String value) {
builder.environment().put(getCanonicalVarName(name), value);
}
/**
* Prepend a specified set of arguments to this process builder's command line.
* This only makes sense before the builder is started.
*/
public void prependArgs(List<String> args) {
builder.command().addAll(0, args);
}
/**
* Compute a canonical environment variable name relative to this process
* builder.
*
* The need for this method arises on platforms where the environment is
* case-insensitive -- any inspection of it in such a situation needs to
* canonicalise the variable name to have well-defined behaviour. This is
* builder-specific, because it depends on its existing environment. For
* example, if it already defines a variable called <code>Path</code>, and the
* environment is case-insensitive, then setting a variable called
* <code>PATH</code> should overwrite this, and checking whether a variable
* called <code>PATH</code> is already defined should return <code>true</code>.
*/
public String getCanonicalVarName(String name) {
if (!Env.getOS().isEnvironmentCaseSensitive()) {
// We need to canonicalise the variable name to work around Java API limitations.
if (canonicalEnvVarNames.isEmpty())
for (String var : builder.environment().keySet())
canonicalEnvVarNames.put(StringUtil.lc(var), var);
String canonical = canonicalEnvVarNames.get(StringUtil.lc(name));
if (canonical == null)
canonicalEnvVarNames.put(StringUtil.lc(name), name);
else
name = canonical;
}
return name;
}
/**
* Get a snapshot of this builder's environment, using canonical variable names
* (as per {@link #getCanonicalVarName(String)}) as keys. Modifications to this
* map do not propagate back to the builder; use
* {@link #addEnvVar(String, String)} or {@link #addEnvironment(Map)} to extend
* its environment.
*/
public Map<String, String> getCanonicalCurrentEnv() {
Map<String, String> result = new LinkedHashMap<>();
for (Entry<String, String> e : builder.environment().entrySet())
result.put(getCanonicalVarName(e.getKey()), e.getValue());
return result;
}
/**
* Specify an input stream of data that will be piped to the process's
* standard input.
*
* CAUTION: if this stream is the current process' standard in and no
* input is ever received, then we will leak an uninterruptible thread
* waiting for some input. This will terminate only when the standard in
* is closed, i.e. when the current process terminates.
*/
public final void setIn(InputStream in) {
this.in = in;
}
/**
* Set the environment of this builder to the given map. Any
* existing environment entries (either from the current process
* environment or from previous calls to {@link #addEnvVar(String, String)},
* {@link #addEnvironment(Map)} or {@link #setEnvironment(Map)})
* are discarded.
* @param env The environment to use.
*/
public final void setEnvironment(Map<String, String> env) {
builder.environment().clear();
canonicalEnvVarNames.clear();
addEnvironment(env);
}
/**
* Add the specified set of environment variables to the environment for
* the builder. This leaves existing variable definitions in place, but
* can override them.
* @param env The environment to merge into the current environment.
*/
public final void addEnvironment(Map<String, String> env) {
for (Entry<String, String> entry : env.entrySet())
addEnvVar(entry.getKey(), entry.getValue());
}
public final int execute() {
return execute(0);
}
/**
* Set the flag indicating that a non-zero exit code may be expected. This
* will suppress the log of failed commands.
*/
public final void expectFailure() {
logFailure = false;
}
public final int execute(long timeout) {
Process process = null;
boolean processStopped = true;
Timer timer = null;
try {
synchronized (this) {
// Handle the case where we called kill() too early to use
// Thread.interrupt()
if (interrupted)
throw new InterruptedException();
threadToInterrupt = Thread.currentThread();
}
processStopped = false;
String directory;
if (builder.directory() == null) {
directory = "current directory ('" + System.getProperty("user.dir") + "')";
} else {
directory = "'" + builder.directory().toString() + "'";
}
logger.debug("Running command: '" + toString() + "' in " + directory);
process = builder.start();
setupInputHandling(process.getOutputStream());
setupOutputHandling(process.getInputStream(),
process.getErrorStream());
if (timeout != 0) {
// create the timer's thread as a "daemon" thread, so it does not
// prevent the jvm from terminating
timer = new Timer(true);
final Thread current = Thread.currentThread();
timer.schedule(new TimerTask() {
@Override
public void run() {
hitTimeout = true;
current.interrupt();
}
}, timeout);
}
int result = process.waitFor();
processStopped = true;
if (result != 0 && logFailure)
logger.error("Spawned process exited abnormally (code " + result
+ "; tried to run: " + getBuilderCommand() + ")");
return result;
} catch (IOException e) {
throw new ResourceError(
"IOException while executing process with args: "
+ getBuilderCommand(), e);
} catch (InterruptedException e) {
throw new InterruptedError(
"InterruptedException while executing process with args: "
+ getBuilderCommand(), e);
} finally {
// cancel the timer
if (timer != null) {
timer.cancel();
}
// clear the interrupted flag of the current thread
// in case it was set earlier (ie by the Timer or a call to kill())
synchronized (this) {
threadToInterrupt = null;
Thread.interrupted();
}
// get rid of the process, in case it is still running.
if (process != null && !processStopped) {
killProcess(process);
}
try {
cleanupInputHandling();
cleanupOutputHandling();
} finally {
if (process != null) {
FileUtil.close(process.getErrorStream());
FileUtil.close(process.getInputStream());
FileUtil.close(process.getOutputStream());
}
}
}
}
/**
* Provides the implementation of actually stopping the child
* process. Provided as an extension point so that this can
* be customised for later Java versions or for other reasons.
*/
protected void killProcess(Process process) {
process.destroy();
}
/**
* Setup handling of the process input stream (stdin).
*
* @param outputStream OutputStream connected to the process's standard input.
*/
protected void setupInputHandling(OutputStream outputStream) {
if (in == null) {
FileUtil.close(outputStream);
return;
}
inMuncher = new RawStreamMuncher(in, outputStream);
inMuncher.start();
}
/**
* Setup handling of the process' output streams (stdout and stderr).
*
* @param stdout
* InputStream connected to the process' standard output stream.
* @param stderr
* InputStream connected to the process' standard error stream.
*/
protected abstract void setupOutputHandling(InputStream stdout, InputStream stderr);
/**
* Cleanup resources related to output handling. The method is always called, either after the process
* has exited normally, or after an abnormal termination due to an exception. As a result cleanupOutputHandling()
* might be called, without a previous call to setupOutputHandling. The implementation of this method should
* handle this case.
*/
protected abstract void cleanupOutputHandling();
private void cleanupInputHandling() {
if (inMuncher != null && inMuncher.isAlive()) {
// There's no real need to wait for the muncher to terminate -- on the contrary,
// if it's still alive it will typically be waiting for a closing action that
// will only happen after execute() returns anyway.
// The best we can do is try to interrupt it.
inMuncher.interrupt();
}
}
protected void waitForMuncher(String which, Thread muncher, long timeout) {
// wait for termination of the muncher until a deadline is reached
try {
muncher.join(timeout);
} catch (InterruptedException e) {
Exceptions.ignore(e,"Further interruption attempts are ineffective --"
+ " we're already waiting for termination.");
}
// if muncher is still alive, report an error
if(muncher.isAlive()){
muncher.interrupt();
logger.error(String.format("Standard %s stream hasn't closed %s seconds after termination of subprocess '%s'.", which, MUNCH_TIMEOUT, this));
}
}
public final void kill() {
synchronized (this) {
interrupted = true;
if (threadToInterrupt != null)
threadToInterrupt.interrupt();
}
}
public boolean processTimedOut() {
return hitTimeout;
}
@Override
public String toString() {
return commandLineToString(getBuilderCommand());
}
private List<String> getBuilderCommand() {
return leakPrevention.cleanUpArguments(builder.command());
}
private static String commandLineToString(List<String> commandLine) {
StringBuilder sb = new StringBuilder();
boolean first = true;
for (String s : commandLine) {
boolean tricky = s.isEmpty() || s.contains(" ") ;
if (!first)
sb.append(" ");
first = false;
if (tricky)
sb.append("\"");
sb.append(s.replace("\"", "\\\""));
if (tricky)
sb.append("\"");
}
return sb.toString();
}
}

View File

@@ -0,0 +1,81 @@
package com.semmle.util.process;
import java.io.File;
import java.io.InputStream;
import java.io.OutputStream;
import java.util.Arrays;
import java.util.List;
import java.util.Map;
import com.semmle.util.io.StreamMuncher;
import com.semmle.util.logging.Streams;
public class Builder extends AbstractProcessBuilder {
private final OutputStream err;
private final OutputStream out;
protected StreamMuncher errMuncher;
protected StreamMuncher outMuncher;
public Builder(OutputStream out, OutputStream err, File cwd, String... args) {
this(out, err, cwd, null, args);
}
public Builder(OutputStream out, OutputStream err, File cwd,
Map<String, String> env, String... args) {
this(Arrays.asList(args), out, err, env, cwd);
}
public Builder(List<String> args, OutputStream out, OutputStream err) {
this(args, out, err, null, null);
}
public Builder(List<String> args, OutputStream out, OutputStream err,
File cwd) {
this(args, out, err, null, cwd);
}
public Builder(List<String> args, OutputStream out, OutputStream err,
Map<String, String> env) {
this(args, out, err, env, null);
}
public Builder(List<String> args, OutputStream out, OutputStream err,
Map<String, String> env, File cwd) {
super(args, cwd, env);
this.out = out;
this.err = err;
}
/**
* Convenience method that executes the given command line in the current
* working directory with the current environment, blocking until
* completion. The process's output stream is redirected to System.out, and
* its error stream to System.err. It returns the exit code of the command.
*/
public static int run(List<String> commandLine) {
return new Builder(commandLine, Streams.out(), Streams.err()).execute();
}
@Override
protected void cleanupOutputHandling() {
// wait for munchers to finish munching.
long deadline = 1000*MUNCH_TIMEOUT;
// note: check that munchers are not null, in case setupOutputHandling was
// not called to initialize them
if(outMuncher != null) {
waitForMuncher("output", outMuncher,deadline);
}
if(errMuncher != null) {
waitForMuncher("error", errMuncher,deadline);
}
}
@Override
protected void setupOutputHandling(InputStream stdout, InputStream stderr) {
errMuncher = new StreamMuncher(stderr, err);
errMuncher.start();
outMuncher = new StreamMuncher(stdout, out);
outMuncher.start();
}
}

View File

@@ -0,0 +1,725 @@
package com.semmle.util.process;
import java.io.Serializable;
import java.util.Collection;
import java.util.Collections;
import java.util.Comparator;
import java.util.LinkedHashMap;
import java.util.Locale;
import java.util.Map;
import java.util.Stack;
import java.util.TreeMap;
import com.semmle.util.exception.Exceptions;
import com.semmle.util.expansion.ExpansionEnvironment;
/**
* Helper methods for finding out environment properties like the OS type.
*/
public class Env {
/**
* Enum for commonly used environment variables.
*
* <p>
* The intention is that the name of the enum constant is the same as the environment
* variable itself. This means that the <code>toString</code> method does the right thing,
* as does calling {@link Enum#name() }.
* </p>
*
* <p>
* Should you wish to rename an environment variable (which you're unlikely to, due to the
* fact that there are many non-Java consumers), you can do a rename refactoring to make the
* Java consumers do the right thing.
* </p>
*/
public enum Var {
/*
* Core toolchain variables
*/
/**
* The location of the toolchain.
*
* Deprecated in favour of {@link Var#SEMMLE_DIST}, {@link Var#SEMMLE_HOME}, and
* {@link Var#SEMMLE_DATA}.
*/
@Deprecated
ODASA_HOME,
/**
* The location of the user's configuration files, including project configurations,
* dashboard configurations, team insight configurations, licenses etc.
*/
SEMMLE_HOME,
/**
* The location of the user's data, including snapshots, built dashboards, team
* insight data, etc.
*/
SEMMLE_DATA,
/**
* The location of any caches used by the toolchain, including compilation caches, trap caches, etc.
*/
SEMMLE_CACHE,
/**
* The location of the toolchain files, including the odasa jar, our queries etc.
*/
SEMMLE_DIST,
/**
* If running from a git tree, the root of the tree.
*/
SEMMLE_GIT_ROOT,
/**
* The root from which relative paths in a DOOD file are resolved.
*/
SEMMLE_QUERY_ROOT,
/**
* The directory where lock files are kept.
*/
SEMMLE_LOCK_DIR,
/**
* The directory which will be checked for licenses.
*/
SEMMLE_LICENSE_DIR,
/**
* The location where our queries are kept.
*/
ODASA_QUERIES,
/**
* The location of the 'tools' directory
*/
ODASA_TOOLS,
/**
* Whether we are running in 'prototyping mode'.
*/
ODASA_PROTOTYPE_MODE,
/**
* The location of the default compilation cache, as a space-separated list of URIs.
*
* Multiple entries are tried in sequence.
*/
SEMMLE_COMPILATION_CACHE,
/**
* Override the versions used in compilation caching.
*
* This is useful for testing without modifying the version manually.
*/
SEMMLE_OVERRIDE_OPTIMISER_VERSION,
/**
* If set, do not use compilation caching.
*/
SEMMLE_NO_COMPILATION_CACHING,
/**
* If set, use this as the size of compilation caches, in bytes. If set to 'INFINITY', no
* limit will be placed on the size.
*/
SEMMLE_COMPILATION_CACHE_SIZE,
/*
* Other toolchain variables
*/
SEMMLE_JAVA_HOME,
ODASA_JAVA_HOME,
ODASA_TRACER_CONFIGURATION,
/**
* The Java tracer agent to propagate to JVM processes.
*/
SEMMLE_JAVA_TOOL_OPTIONS,
/**
* Whether to run jar-based subprocesses in-process instead.
*/
ODASA_IN_PROCESS,
/**
* The executable to use for importing trap files.
*/
SEMMLE_TRAP_IMPORTER,
SEMMLE_PRESERVE_SYMLINKS,
SEMMLE_PATH_TRANSFORMER,
/*
* Environment variables for password for credential stores.
* Either is accepted to allow a single entry point in the code
* while documenting as appropriate for the audience.
*/
SEMMLE_CREDENTIALS_PASSWORD,
LGTM_CREDENTIALS_PASSWORD,
/*
*
* Internal config variables
*/
/**
* Extra arguments to pass to JVMs launched by Semmle tools.
*/
SEMMLE_JAVA_ARGS,
/**
* A list of log levels to set, of the form:
* "foo.bar=TRACE,bar.baz=DEBUG"
*/
SEMMLE_LOG_LEVELS,
/**
* The default heap size for commands that accept a ram parameter.
*/
SEMMLE_DEFAULT_HEAP_SIZE,
SEMMLE_MAX_RAM_MB,
/**
* Whether to disable asynchronous logging in the query server (otherwise it may drop messages).
*/
SEMMLE_SYNCHRONOUS_LOGGING,
/**
* Whether or not to use memory mapping
*/
SEMMLE_MEMORY_MAPPING,
SEMMLE_METRICS_DIR,
/**
* Whether we are running in our own unit tests.
*/
SEMMLE_UNIT_TEST_MODE,
/**
* Whether to include the source QL in a QLO.
*/
SEMMLE_DEBUG_QL_IN_QLO,
/**
* Whether to enable extra assertions
*/
ODASA_ASSERTIONS,
/**
* A file containing extra variables for ExpansionEnvironments.
*/
ODASA_EXTRA_VARIABLES,
ODASA_TUNE_GC,
/**
* Whether to run PI in hosted mode.
*/
SEMMLE_ODASA_DEBUG,
/**
* The python executable to use for Qltest.
*/
SEMMLE_PYTHON,
/**
* The platform we are running on; one of "linux", "osx" and "win".
*/
SEMMLE_PLATFORM,
/**
* Location of platform specific tools, currently only used in universal LGTM distributions
*/
SEMMLE_PLATFORM_TOOLS,
/**
* PATH to use to look up tooling required by macOS Relocator scripts.
*/
CODEQL_TOOL_PATH,
/**
* This can override the heuristics for BDD factory resetting. Most useful for measurements
* and debugging.
*/
CODEQL_BDD_RESET_FRACTION,
/**
* How many TRAPLinker errors to report.
*/
SEMMLE_MAX_TRAP_ERRORS,
/**
* How many tuples to accumulate in memory before pushing to disk.
*/
SEMMLE_MAX_TRAP_INMEMORY_TUPLES,
/**
* How many files to merge at each merge step.
*/
SEMMLE_MAX_TRAP_MERGE,
/*
* Variables used by extractors.
*/
/**
* Whether the C++ extractor should copy executables before
* running them (works around System Integrity Protection
* on OS X 10.11+).
*/
SEMMLE_COPY_EXECUTABLES,
/**
* When SEMMLE_COPY_EXECUTABLES is in operation, where to
* create the directory to copy the executables to.
*/
SEMMLE_COPY_EXECUTABLES_SUPER_ROOT,
/**
* When SEMMLE_COPY_EXECUTABLES is in operation, the
* directory we are copying executables to.
*/
SEMMLE_COPY_EXECUTABLES_ROOT,
/**
* The executable which should be used as an implicit runner on Windows.
*/
SEMMLE_WINDOWS_RUNNER_BINARY,
/**
* Verbosity level for the Java interceptor.
*/
SEMMLE_INTERCEPT_VERBOSITY,
/**
* Verbosity level for the Java extractor.
*/
ODASA_JAVAC_VERBOSE,
/**
* Whether to use class origin tracking for the Java extractor.
*/
ODASA_JAVA_CLASS_ORIGIN_TRACKING,
ODASA_JAVAC_CORRECT_EXCEPTIONS,
ODASA_JAVAC_EXTRA_CLASSPATH,
ODASA_NO_ECLIPSE_BUILD,
/*
* Variables set during snapshot builds
*/
/**
* The location of the project being built.
*/
ODASA_PROJECT,
/**
* The location of the snapshot being built.
*/
ODASA_SNAPSHOT,
ODASA_SNAPSHOT_NAME,
ODASA_SRC,
ODASA_DB,
ODASA_BUILD_ERROR_DIR,
TRAP_FOLDER,
SOURCE_ARCHIVE,
ODASA_OUTPUT,
ODASA_SUBPROJECT_THREADS,
/*
* Layout variables
*/
ODASA_JAVA_LAYOUT,
ODASA_CPP_LAYOUT,
ODASA_CSHARP_LAYOUT,
ODASA_PYTHON_LAYOUT,
ODASA_JAVASCRIPT_LAYOUT,
/*
* External variables
*/
JAVA_HOME,
PATH,
LINUX_VARIANT,
/*
* If set, use this proxy for HTTP requests
*/
HTTP_PROXY,
http_proxy,
/*
* If set, use this proxy for HTTPS requests
*/
HTTPS_PROXY,
https_proxy,
/*
* If set, ignore the variables above and do not use any proxies for requests
*/
NO_PROXY,
no_proxy,
/*
* Variables set by the codeql-action. All variables will
* be unset if the CLI is not in the context of the
* codeql-action.
*/
/**
* Either {@code actions} or {@code runner}.
*/
CODEQL_ACTION_RUN_MODE,
/**
* Semantic version of the codeql-action.
*/
CODEQL_ACTION_VERSION,
/*
* tracer variables
*/
/**
* Colon-separated list of enabled tracing languages
*/
CODEQL_TRACER_LANGUAGES,
/**
* Path to the build-tracer log file
*/
CODEQL_TRACER_LOG,
/**
* Prefix to a language-specific root directory
*/
CODEQL_TRACER_ROOT_,
;
}
private static final int DEFAULT_RAM_MB_32 = 1024;
private static final int DEFAULT_RAM_MB = 4096;
private static final Env instance = new Env();
private final Stack<Map<String, String>> envVarContexts;
public static synchronized Env systemEnv() {
return instance;
}
/**
* Create an instance of Env containing no variables. Intended for use in
* testing to isolate the test from the local machine environment.
*/
public static Env emptyEnv() {
Env env = new Env();
env.envVarContexts.clear();
env.envVarContexts.push(Collections.unmodifiableMap(makeContext()));
return env;
}
private static Map<String, String> makeContext() {
if (getOS().equals(OS.WINDOWS)) {
// We want to compare in the same way Windows does, which means
// upper-casing. For example, '_' needs to come after 'Z', but
// would come before 'z'.
return new TreeMap<>((a, b) -> a.toUpperCase(Locale.ENGLISH).compareTo(b.toUpperCase(Locale.ENGLISH)));
} else {
return new LinkedHashMap<>();
}
}
public Env() {
envVarContexts = new Stack<>();
Map<String, String> env = makeContext();
try {
env.putAll(System.getenv());
} catch (SecurityException ex) {
Exceptions.ignore(ex, "Treat an inaccessible environment variable as not existing");
}
envVarContexts.push(Collections.unmodifiableMap(env));
}
public synchronized void unsetAll(Collection<String> names) {
if (!names.isEmpty()) {
Map<String, String> map = envVarContexts.pop();
map = new LinkedHashMap<>(map);
for (String name : names)
map.remove(name);
envVarContexts.push(Collections.unmodifiableMap(map));
}
}
public synchronized Map<String, String> getenv() {
return envVarContexts.peek();
}
/**
* Get the value of an environment variable, or <code>null</code> if
* the environment variable is not set. WARNING: not all systems may
* make a difference between an empty variable or <code>null</code>,
* so don't rely on that behavior.
*/
public synchronized String get(Var var) {
return get(var.name());
}
/**
* Get the value of an environment variable, or <code>null</code> if
* the environment variable is not set. WARNING: not all systems may
* make a difference between an empty variable or <code>null</code>,
* so don't rely on that behavior.
*/
public synchronized String get(String envVarName) {
return getenv().get(envVarName);
}
/**
* Get the non-empty value of an environment variable, or <code>null</code>
* if the environment variable is not set or set to an empty value.
*/
public synchronized String getNonEmpty(Var var) {
return getNonEmpty(var.name());
}
/**
* Get the value of an environment variable, or the empty string if it is not
* set.
*/
public synchronized String getPossiblyEmpty(String envVarName) {
String got = getenv().get(envVarName);
return got != null ? got : "";
}
/**
* Get the non-empty value of an environment variable, or <code>null</code>
* if the environment variable is not set or set to an empty value.
*/
public synchronized String getNonEmpty(String envVarName) {
String s = get(envVarName);
return s == null || s.isEmpty() ? null : s;
}
/**
* Gets the value of the first environment variable among <code>envVarNames</code>
* whose value is non-empty, or <code>null</code> if all variables have empty values.
*/
public synchronized String getFirstNonEmpty(String... envVarNames) {
for (String envVarName : envVarNames) {
String s = getNonEmpty(envVarName);
if (s != null)
return s;
}
return null;
}
/**
* Gets the value of the first environment variable among <code>envVars</code>
* whose value is non-empty, or <code>null</code> if all variables have empty values.
*/
public synchronized String getFirstNonEmpty(Var... envVars) {
String[] envVarNames = new String[envVars.length];
for (int i = 0; i < envVars.length; ++i)
envVarNames[i] = envVars[i].name();
return getFirstNonEmpty(envVarNames);
}
/**
* Read a boolean from the given environment variable. If the variable
* is not set, then return <code>false</code>. Otherwise, interpret the
* environment variable using {@link Boolean#parseBoolean(String)}.
*/
public boolean getBoolean(Var var) {
return getBoolean(var.name());
}
/**
* Read a boolean from the given environment variable name. If the variable
* is not set, then return <code>false</code>. Otherwise, interpret the
* environment variable using {@link Boolean#parseBoolean(String)}.
*/
public boolean getBoolean(String envVarName) {
return getBoolean(envVarName, false);
}
/**
* Read a boolean from the given environment variable. If the variable
* is not set, then return <code>def</code>. Otherwise, interpret the
* environment variable using {@link Boolean#parseBoolean(String)}.
*/
public boolean getBoolean(Var var, boolean def) {
return getBoolean(var.name(), def);
}
/**
* Read a boolean from the given environment variable name. If the variable
* is not set, then return <code>def</code>. Otherwise, interpret the
* environment variable using {@link Boolean#parseBoolean(String)}.
*/
public boolean getBoolean(String envVarName, boolean def) {
String v = get(envVarName);
return v == null ? def : Boolean.parseBoolean(v);
}
/**
* Read an integer setting from the given environment variable name. If the
* variable is not set, or fails to parse, return the supplied default value.
*/
public int getInt(Var var, int defaultValue) {
return getInt(var.name(), defaultValue);
}
/**
* Read an integer setting from the given environment variable name. If the
* variable is not set, or fails to parse, return the supplied default value.
*/
public int getInt(String envVarName, int defaultValue) {
String value = get(envVarName);
if (value == null)
return defaultValue;
try {
return Integer.parseInt(value);
} catch (NumberFormatException e) {
Exceptions.ignore(e, "We'll just use the default value.");
return defaultValue;
}
}
/**
* Enter a new context for environment variables, with the given
* new variable values. The values will override the current environment
* values if they define the same variables.
*/
public synchronized void pushEnvironmentContext(Map<String, String> addedValues) {
Map<String, String> newValues = makeContext();
newValues.putAll(envVarContexts.peek());
newValues.putAll(addedValues);
envVarContexts.push(Collections.unmodifiableMap(newValues));
}
/**
* Leave a context for environment variables that was created with
* <code>pushEnvironmentContext</code>
*/
public synchronized void popEnvironmentContext() {
envVarContexts.pop();
}
/**
* Add all the custom environment variables to a process builder, so that
* they are passed on to the child process.
*/
public synchronized void addEnvironmentToNewProcess(ProcessBuilder builder) {
if (envVarContexts.size() > 1)
builder.environment().putAll(envVarContexts.peek());
}
public synchronized void addEnvironmentToNewEnv(ExpansionEnvironment env) {
if (envVarContexts.size() > 1)
env.defineVars(envVarContexts.peek());
}
/**
* Get a string representing the OS type. This
* is not guaranteed to have any particular form, and
* is for displaying to users. Might return <code>null</code> if
* the property is not defined by the JVM.
*/
public static String getOSName() {
return System.getProperty("os.name");
}
/**
* Determine which OS is currently being run (somewhat best-effort).
* Does not determine whether a program is being run under Cygwin
* or not - Windows will be the OS even under Cygwin.
*/
public static OS getOS() {
String name = getOSName();
if (name == null)
return OS.UNKNOWN;
if (name.contains("Windows"))
return OS.WINDOWS;
else if (name.contains("Mac OS X"))
return OS.MACOS;
else if (name.contains("Linux"))
return OS.LINUX;
else
// Guess that we are probably some Unix flavour
return OS.UNKNOWN_UNIX;
}
/**
* Kinds of operating systems. A notable absence is Cygwin: this just
* gets reported as Windows.
*/
public static enum OS {
WINDOWS(false, false), LINUX(true, true), MACOS(false, true), UNKNOWN_UNIX(true, true), UNKNOWN(true, true),;
private final boolean fileSystemCaseSensitive;
private final boolean envVarsCaseSensitive;
private OS(boolean fileSystemCaseSensitive, boolean envVarsCaseSensitive) {
this.fileSystemCaseSensitive = fileSystemCaseSensitive;
this.envVarsCaseSensitive = envVarsCaseSensitive;
}
/**
* Get an OS value from the short display name. Acceptable
* inputs (case insensitive) are: Windows, Linux, MacOS or
* Mac OS.
*
* @throws IllegalArgumentException if the given name does not
* correspond to an OS
*/
public static OS fromDisplayName(String name) {
if (name != null) {
name = name.toUpperCase();
if ("WINDOWS".equals(name))
return WINDOWS;
if ("LINUX".equals(name))
return LINUX;
if ("MACOS".equals(name.replace(" ", "")))
return MACOS;
}
throw new IllegalArgumentException("No OS type found with name " + name);
}
public boolean isFileSystemCaseSensitive() {
return fileSystemCaseSensitive;
}
public boolean isEnvironmentCaseSensitive() {
return envVarsCaseSensitive;
}
/** The short name of this operating system, in the style of {@link Var#SEMMLE_PLATFORM}. */
public String getShortName() {
switch (this) {
case WINDOWS:
return "win";
case LINUX:
return "linux";
case MACOS:
return "osx";
default:
return "unknown";
}
}
}
public static enum Architecture {
X86(true, false), X64(false, true), UNDETERMINED(false, false);
private final boolean is32Bit;
private final boolean is64Bit;
private Architecture(boolean is32Bit, boolean is64Bit) {
this.is32Bit = is32Bit;
this.is64Bit = is64Bit;
}
/** Is this definitely a 32-bit architecture? */
public boolean is32Bit() {
return is32Bit;
}
/** Is this definitely a 64-bit architecture? */
public boolean is64Bit() {
return is64Bit;
}
}
/**
* Try to detect whether the JVM is 32-bit or 64-bit. Since there is no documented,
* portable way to do this it is best effort.
*/
public Architecture tryDetermineJvmArchitecture() {
String value = System.getProperty("sun.arch.data.model");
if ("32".equals(value))
return Architecture.X86;
else if ("64".equals(value))
return Architecture.X64;
// Look at the max heap value - if >= 4G we *must* be in 64-bit
long maxHeap = Runtime.getRuntime().maxMemory();
if (maxHeap < Long.MAX_VALUE && maxHeap >= 4096L << 20)
return Architecture.X64;
// Try to get the OS arch - it *appears* to give JVM bitness
String osArch = System.getProperty("os.arch");
if ("x86".equals(osArch) || "i386".equals(osArch))
return Architecture.X86;
else if ("x86_64".equals(osArch) || "amd64".equals(osArch))
return Architecture.X64;
return Architecture.UNDETERMINED;
}
/**
* Get the default amount of ram to use for new JVMs, depending on the
* current architecture. If it looks like we're running on a 32-bit
* machine, the result is sufficiently small to be representable.
*/
public int defaultRamMb() {
return getInt(
Var.SEMMLE_DEFAULT_HEAP_SIZE,
tryDetermineJvmArchitecture().is32Bit() ? DEFAULT_RAM_MB_32 : DEFAULT_RAM_MB);
}
}

View File

@@ -0,0 +1,95 @@
package com.semmle.util.process;
import java.util.ArrayList;
import java.util.BitSet;
import java.util.Collections;
import java.util.List;
public abstract class LeakPrevention {
public abstract List<String> cleanUpArguments(List<String> args);
/**
* What to put in place of any suppressed arguments.
*/
static final String REPLACEMENT_STRING = "*****";
/**
* Hides all arguments. Will only show the command name.
* e.g. "foo bar baz" is changed to "foo"
*/
public static final LeakPrevention ALL = new LeakPrevention() {
@Override
public List<String> cleanUpArguments(List<String> args) {
return args.isEmpty() ? args : Collections.singletonList(args.get(0));
}
};
/**
* Does not hide any arguments.
*/
public static final LeakPrevention NONE = new LeakPrevention() {
@Override
public List<String> cleanUpArguments(List<String> args) {
return args;
}
};
/**
* Hides the arguments at the given indexes.
*/
public static LeakPrevention suppressedArguments(int... args) {
if (args.length == 0)
return NONE;
final BitSet suppressed = new BitSet();
for (int index : args) {
suppressed.set(index);
}
return new LeakPrevention() {
@Override
public List<String> cleanUpArguments(List<String> args) {
List<String> result = new ArrayList<>(args.size());
int index = 0;
for (String arg : args) {
if (suppressed.get(index))
result.add(REPLACEMENT_STRING);
else
result.add(arg);
index++;
}
return result;
}
};
}
/**
* Hides the given string from any arguments that it appears in.
* The substring will be replaced while leaving the rest of the
* argument unmodified.
* <p>
* There are some potential pitfalls to be aware of when using this
* method.
* <ul>
* <li>This only suppresses exact textual matches. If the argument that
* appears is only derived from the secret instead of being an exact
* copy then it will not be suppressed.
* <li>If the secret value appears elsewhere in a known string, then it
* could leak the contents of the secret because the viewer knows what
* should have been there in the known case.
* </ul>
*/
public static LeakPrevention suppressSubstring(final String substringToSuppress) {
return new LeakPrevention() {
@Override
public List<String> cleanUpArguments(List<String> args) {
List<String> result = new ArrayList<>(args.size());
for (String arg : args) {
result.add(arg.replace(substringToSuppress, REPLACEMENT_STRING));
}
return result;
}
};
}
}

View File

@@ -0,0 +1,529 @@
package com.semmle.util.projectstructure;
import java.io.File;
import java.io.IOException;
import java.io.StringWriter;
import java.io.Writer;
import java.util.ArrayList;
import java.util.Collections;
import java.util.Comparator;
import java.util.LinkedHashMap;
import java.util.List;
import java.util.Map;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import com.semmle.util.data.StringUtil;
import com.semmle.util.exception.CatastrophicError;
import com.semmle.util.exception.UserError;
import com.semmle.util.io.WholeIO;
/**
* A project-layout file optionally begins with an '@'
* followed by the name the project should be renamed to.
* Optionally, it can then be followed by a list of
* include/exclude patterns (see below) which are kept
* as untransformed paths. This is followed by one or
* more clauses. Each clause has the following form:
*
* #virtual-path
* path/to/include
* another/path/to/include
* -/path/to/include/except/this
*
* i.e. one or more paths (to include) and zero or more paths
* prefixed by minus-signs (to exclude).
*/
public class ProjectLayout
{
public static final char PROJECT_NAME_PREFIX = '@';
private String project;
/**
* Map from virtual path prefixes (following the '#' in the project-layout)
* to the sequence of patterns that fall into that section. Declared as a
* {@link LinkedHashMap} since iteration order matters -- we process blocks in
* the same order as they occur in the project-layout.
*/
private final LinkedHashMap<String, Section> sections = new LinkedHashMap<String, Section>();
/**
* A file name, or similar string, to use in error messages so that the
* user knows what to fix.
*/
private String source;
/**
* Load a project-layout file.
*
* @param file the project-layout to load
*/
public ProjectLayout(File file) {
this(StringUtil.lines(new WholeIO().strictread(file)), file.toString());
}
/**
* Construct a project-layout object from an array of strings, each
* corresponding to one line of the project-layout. This constructor
* is for testing. For other uses see {@link ProjectLayout#ProjectLayout(File)}.
*
* @param lines the lines of the project-layout
*/
public ProjectLayout(String... lines) {
this(lines, null);
}
private ProjectLayout(String[] lines, String source) {
this.source = source;
String virtual = "";
Section section = new Section("");
sections.put("", section);
int num = 0;
for (String line : lines) {
num++;
line = line.trim();
if (line.isEmpty())
continue;
switch (line.charAt(0)) {
case PROJECT_NAME_PREFIX:
if (project != null)
throw error("Only one project name is allowed", source, num);
project = tail(line);
break;
case '#':
virtual = tail(line);
if (sections.containsKey(virtual))
throw error("Duplicate virtual path prefix " + virtual, source, num);
section = new Section(virtual);
sections.put(virtual, section);
break;
case '-':
section.add(new Rewrite(tail(line), source, num));
break;
default:
section.add(new Rewrite(line, virtual, source, num));
}
}
}
private static String tail(String line) {
return line.substring(1).trim();
}
/**
* Get the project name, if specified by the project-layout. This
* method should only be called if it is guaranteed that the
* project-layout will contain a project name, and it throws
* a {@link UserError} if it doesn't.
* @return the project name -- guaranteed not <code>null</code>.
* @throws UserError if the project-layout file did not specify a
* project name.
*/
public String projectName() {
if (project == null)
throw error("No project name is defined", source);
return project;
}
/**
* Get the project name, if specified by the project-layout file.
* If the file contains no renaming specification, return the
* given default value.
* @param defaultName The name to use if the project-layout doesn't
* specify a target project name.
* @return the specified name or default value.
*/
public String projectName(String defaultName) {
return project == null ? defaultName : project;
}
/**
* @return the section headings (aka virtual paths)
*/
public List<String> sections() {
List<String> result = new ArrayList<String>();
result.addAll(sections.keySet());
return result;
}
/**
* Determine whether or not a particular section in this
* project-layout is empty (has no include/exclude patterns).
*
* @param section the name of the section
* @return <code>true</code> if the section is empty
*/
public boolean sectionIsEmpty(String section) {
if (!sections.containsKey(section))
throw new CatastrophicError("Section does not exist: " + section);
return sections.get(section).isEmpty();
}
/**
* Reaname a section in this project-layout.
*
* @param oldName the old name of the section
* @param newName the new name
*/
public void renameSection(String oldName, String newName) {
if (!sections.containsKey(oldName))
throw new CatastrophicError("Section does not exist: " + oldName);
Section section = sections.remove(oldName);
section.rename(newName);
sections.put(newName, section);
}
/**
* Return a project-layout file for just one of the sections in this
* project-layout. This is done by copying all the rules from the
* section, and changing the section heading (beginning with '#')
* to a project name (beginning with '@').
*
* @param sectionName the section to create a project-layout from
* @return the text of the newly created project-layout
*/
public String subLayout(String sectionName) {
Section section = sections.get(sectionName);
if (section == null)
throw new CatastrophicError("Section does not exist: " + section);
return section.toLayout();
}
/**
* Maps a path to its corresponding artificial path according to the
* rules in this project-layout. If the path is excluded (either
* explicitly, or because it is not mentioned in the project-layout)
* then <code>null</code> is returned.
* <p>
* Paths should start with a leading forward-slash
*
* @param path the path to map
* @return the artificial path, or <code>null</code> if the path is excluded
*/
public String artificialPath(String path) {
// If there is no leading slash, the path does not conform to the expected
// format and there is no match. (An exception is made for a completely
// empty string, which will get the sole prefix '/' and be mapped as usual).
if (path.length() > 0 && path.charAt(0) != '/')
return null;
List<String> prefixes = Section.prefixes(path);
for (Section section : sections.values()) {
Rewrite rewrite = section.match(prefixes);
String rewritten = null;
if (rewrite != null)
rewritten = rewrite.rewrite(path);
if (rewritten != null)
return rewritten;
}
return null;
}
/**
* Checks whether a path should be included in the project specified by
* this file. A file is included if it is mapped to some location.
* <p>
* Paths should start with a leading forward-slash
*
* @param path the path to check
* @return <code>true</code> if the path should be included
*/
public boolean includeFile(String path) {
return artificialPath(path) != null;
}
public void writeTo(Writer writer) throws IOException {
if (project != null) {
writer.write(PROJECT_NAME_PREFIX);
writer.write(project);
writer.write("\n");
}
for (Section section : sections.values()) {
if (!section.virtual.isEmpty()) {
writer.write("#");
writer.write(section.virtual);
writer.write("\n");
}
section.outputRules(writer);
}
}
public void addPattern(String section, String pattern) {
if (pattern == null || pattern.isEmpty()) {
throw new IllegalArgumentException("ProjectLayout.addPattern: pattern must be a non-empty string");
}
boolean exclude = pattern.charAt(0) == '-';
Rewrite rewrite = exclude ?
new Rewrite(pattern.substring(1), null, 0) :
new Rewrite(pattern, section, null, 0);
Section s = sections.get(section);
if (s == null) {
s = new Section(section);
sections.put(section, s);
}
s.add(rewrite);
}
private static UserError error(String message, String source) {
return error(message, source, 0);
}
private static UserError error(String message, String source, int line) {
if (source == null)
return new UserError(message);
StringBuilder sb = new StringBuilder(message);
sb.append(" (");
if (line > 0)
sb.append("line ").append(line).append(" of ");
sb.append(source).append(")");
return new UserError(sb.toString());
}
/**
* Each section corresponds to a block beginning with '#some/path'. There
* is also an initial section for any include/exclude patterns before the
* first '#'.
*/
private static class Section {
private String virtual;
private final Map<String, Rewrite> simpleRewrites;
private final List<Rewrite> complexRewrites;
public Section(String virtual) {
this.virtual = virtual;
simpleRewrites = new LinkedHashMap<String, Rewrite>();
complexRewrites = new ArrayList<Rewrite>();
}
public String toLayout() {
StringWriter result = new StringWriter();
result.append('@').append(virtual).append('\n');
try {
outputRules(result);
} catch (IOException e) {
throw new CatastrophicError("StringWriter.append threw an IOException", e);
}
return result.toString();
}
private void outputRules(Writer writer) throws IOException {
List<Rewrite> all = new ArrayList<Rewrite>();
all.addAll(simpleRewrites.values());
all.addAll(complexRewrites);
Collections.sort(all, Rewrite.COMPARATOR);
for (Rewrite rewrite : all)
writer.append(rewrite.toString()).append('\n');
}
public void rename(String newName) {
virtual = newName;
for (Rewrite rewrite : simpleRewrites.values())
rewrite.virtual = newName;
for (Rewrite rewrite : complexRewrites)
rewrite.virtual = newName;
}
public void add(Rewrite rewrite) {
int index = simpleRewrites.size() + complexRewrites.size();
rewrite.setIndex(index);
if (rewrite.isSimple())
simpleRewrites.put(rewrite.simplePrefix(), rewrite);
else
complexRewrites.add(rewrite);
}
public boolean isEmpty() {
return simpleRewrites.isEmpty() && complexRewrites.isEmpty();
}
private static List<String> prefixes(String path) {
List<String> result = new ArrayList<String>();
result.add(path);
int i = path.length();
while (i > 1) {
i = path.lastIndexOf('/', i - 1);
result.add(path.substring(0, i));
}
result.add("/");
return result;
}
public Rewrite match(List<String> prefixes) {
Rewrite best = null;
for (String prefix : prefixes) {
Rewrite match = simpleRewrites.get(prefix);
if (match != null)
if (best == null || best.index < match.index)
best = match;
}
// Last matching rewrite 'wins'
for (int i = complexRewrites.size() - 1; i >= 0; i--) {
Rewrite rewrite = complexRewrites.get(i);
if (rewrite.matches(prefixes.get(0))) {
if (best == null || best.index < rewrite.index)
best = rewrite;
// no point continuing
break;
}
}
return best;
}
}
/**
* Each Rewrite corresponds to a single include or exclude line in the project-layout.
* For example, for following clause there would be three Rewrite objects:
*
* #Source
* /src
* /lib
* -/src/tests
*
* For includes use the two-argument constructor; for excludes the one-argument constructor.
*/
private static class Rewrite {
private static final Comparator<Rewrite> COMPARATOR = new Comparator<Rewrite>() {
@Override
public int compare(Rewrite t, Rewrite o) {
if (t.index < o.index)
return -1;
if (t.index == o.index)
return 0;
return 1;
}
};
private int index;
private final String original;
private final Pattern pattern;
private String virtual;
private final String simple;
/**
* The intention is to allow the ** wildcard when followed by a slash only. The
* following should be invalid:
* - a / *** / b (too many stars)
* - a / ** (** at the end should be omitted)
* - a / **b (illegal)
* - a / b** (illegal)
* - ** (the same as a singleton '/')
* This regex matches ** when followed by a non-/ character, or the end of string.
*/
private static final Pattern verifyStars = Pattern.compile(".*(?:\\*\\*[^/].*|\\*\\*$|[^/]\\*\\*.*)");
public Rewrite(String exclude, String source, int line) {
original = '-' + exclude;
if (!exclude.startsWith("/"))
exclude = '/' + exclude;
if (exclude.indexOf("//") != -1)
throw error("Illegal '//' in exclude path", source, line);
if (verifyStars.matcher(exclude).matches())
throw error("Illegal use of '**' in exclude path", source, line);
if (exclude.endsWith("/"))
exclude = exclude.substring(0, exclude.length() - 1);
pattern = compilePrefix(exclude);
exclude = exclude.replace("//", "/");
if (exclude.length() > 1 && exclude.endsWith("/"))
exclude = exclude.substring(0, exclude.length() - 1);
simple = exclude.contains("*") ? null : exclude;
}
public void setIndex(int index) {
this.index = index;
}
public Rewrite(String include, String virtual, String source, int line) {
original = include;
if (!include.startsWith("/"))
include = '/' + include;
int doubleslash = include.indexOf("//");
if (doubleslash != include.lastIndexOf("//"))
throw error("More than one '//' in include path", source, line);
if (verifyStars.matcher(include).matches())
throw error("Illegal use of '**' in include path", source, line);
if (!virtual.startsWith("/"))
virtual = "/" + virtual;
if (virtual.endsWith("/"))
virtual = virtual.substring(0, virtual.length() - 1);
this.virtual = virtual;
this.pattern = compilePrefix(include);
include = include.replace("//", "/");
if (include.length() > 1 && include.endsWith("/"))
include = include.substring(0, include.length() - 1);
simple = include.contains("*") ? null : include;
}
/**
* Patterns are matched by translation to regex. The following invariants
* are assumed to hold:
*
* - The pattern starts with a '/'.
* - There are no occurrences of '**' that is not surrounded by slashes
* (unless it is at the start of a pattern).
* - There is at most one double slash.
*
* The result of the translation has precisely one capture group, which
* (after successful matching) will contain the part of the path that
* should be glued to the virtual prefix.
*
* It proceeds by starting the capture group either after the double
* slash or at the start of the pattern, and then replacing '*' with
* '[^/]*' (meaning any number of non-slash characters) and '/**' with
* '(?:|/.*)' (meaning empty string or a slash followed by any number of
* characters including '/').
*
* The pattern is terminated by the term '(?:/.*|$)', saying 'either the
* next character is a '/' or the string ends' -- this avoids accidental
* matching of partial directory/file names.
*
* <b>IMPORTANT:</b> Run the ProjectLayoutTests when changing this!
*/
private static Pattern compilePrefix(String pattern) {
pattern = StringUtil.escapeStringLiteralForRegexp(pattern, "*");
if (pattern.contains("//"))
pattern = pattern.replace("//", "(/");
else
pattern = "(" + pattern;
if (pattern.endsWith("/"))
pattern = pattern.substring(0, pattern.length() - 1);
pattern = pattern.replace("/**", "-///-")
.replace("*", "[^/]*")
.replace("-///-", "(?:|/.*)");
return Pattern.compile(pattern + "(?:/.*|$))");
}
/** Is this rewrite simple? (i.e. contains no wildcards) */
public boolean isSimple() {
return simple != null;
}
/** Returns the path included/excluded by this rewrite, if it is
* simple, or <code>null</code> if it is not.
*
* @return included/excluded path, or <code>null</code>
*/
public String simplePrefix() {
return simple;
}
public boolean matches(String path) {
return pattern.matcher(path).matches();
}
public String rewrite(String path) {
if (virtual == null)
return null;
Matcher matcher = pattern.matcher(path);
if (!matcher.matches())
return null;
return virtual + matcher.group(1);
}
@Override
public String toString() {
return original;
}
}
}

View File

@@ -0,0 +1,29 @@
package com.semmle.util.trap;
import java.io.IOException;
import java.io.InputStream;
import java.nio.file.Files;
import java.nio.file.Path;
import com.semmle.util.zip.MultiMemberGZIPInputStream;
public class CompressedFileInputStream {
/**
* Create an input stream for reading the uncompressed data from a (possibly) compressed file, with
* the decompression method chosen based on the file extension.
*
* @param f The compressed file to read
* @return An input stream from which you can read the file's uncompressed data.
* @throws IOException From the underlying decompression input stream.
*/
public static InputStream fromFile(Path f) throws IOException {
InputStream fileInputStream = Files.newInputStream(f);
if (f.getFileName().toString().endsWith(".gz")) {
return new MultiMemberGZIPInputStream(fileInputStream, 8192);
//} else if (f.getFileName().toString().endsWith(".br")) {
// return new BrotliInputStream(fileInputStream);
} else {
return fileInputStream;
}
}
}

View File

@@ -0,0 +1,125 @@
package com.semmle.util.trap.dependencies;
import java.io.BufferedReader;
import java.io.IOException;
import java.io.InputStream;
import java.nio.charset.StandardCharsets;
import java.nio.file.Files;
import java.nio.file.Path;
import java.util.LinkedHashSet;
import java.util.Set;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import com.semmle.util.exception.ResourceError;
import com.semmle.util.io.StreamUtil;
import com.semmle.util.io.WholeIO;
import com.semmle.util.trap.CompressedFileInputStream;
public abstract class TextFile {
static final String TRAPS = "TRAPS";
private static final Pattern HEADER = Pattern.compile("([^\r\n]+?) (\\d\\.\\d)");
protected String version;
protected final Set<String> traps = new LinkedHashSet<String>();
protected abstract Set<String> getSet(Path path, String label);
protected abstract void parseError(Path path);
public TextFile(String version) {
this.version = version;
}
/**
* Load the current text file, checking that it matches the expected header.
*
* <p>
* This method is somewhat performance-sensitive, as at least our C++ extractors
* can generate very large input files. The format is therefore parsed by hand.
* </p>
*
* <p>
* The accepted format consists of:
* <ul>
* <li>Zero or more EOL comments, marked with {@code //}.
* <li>Precisely one header line, of the form {@code $HEADER $VERSION}; this is
* checked against {@code expected_header}.
* <li>Zero or more "file lists", each beginning with the name of a set (see
* {@link #getSet(File, String)}) on a line by itself, followed by file paths,
* one per line.
* </ul>
*
* <p>
* Empty lines are permitted throughout.
* </p>
*/
protected void load(String expected_header, Path path) {
try (InputStream is = CompressedFileInputStream.fromFile(path);
BufferedReader lines = StreamUtil.newUTF8BufferedReader(is)) {
boolean commentsPermitted = true;
Set<String> currentSet = null;
for (String line = lines.readLine(); line != null; line = lines.readLine()) {
// Skip empty lines.
if (line.isEmpty())
continue;
// If comments are still permitted, skip comment lines.
if (commentsPermitted && line.startsWith("//"))
continue;
// If comments are still permitted, the first non-comment line is the header.
// In addition, we allow no further comments.
if (commentsPermitted) {
Matcher matcher = HEADER.matcher(line);
if (!matcher.matches() || !matcher.group(1).equals(expected_header))
parseError(path);
commentsPermitted = false;
version = matcher.group(2);
continue;
}
// We have a non-blank line; this either names the new set, or is a line that
// should be put into the current set.
Set<String> newSet = getSet(path, line);
if (newSet != null) {
currentSet = newSet;
} else {
if (currentSet == null)
parseError(path);
else
currentSet.add(line);
}
}
} catch (IOException e) {
throw new ResourceError("Couldn't read " + path, e);
}
}
/**
* @return the format version of the loaded file
*/
public String version() {
return version;
}
/**
* Save this object to a file (or throw a ResourceError on failure)
*
* @param file the file in which to save this object
*/
public void save(Path file) {
new WholeIO().strictwrite(file, toString());
}
protected void appendHeaderString(StringBuilder sb, String header, String version) {
sb.append(header).append(' ').append(version).append('\n');
}
protected void appendSet(StringBuilder sb, String title, Set<String> set) {
sb.append('\n').append(title).append('\n');
for (String s : set)
sb.append(s).append('\n');
}
protected void appendSingleton(StringBuilder sb, String title, String s) {
sb.append('\n').append(title).append('\n');
sb.append(s).append('\n');
}
}

View File

@@ -0,0 +1,109 @@
package com.semmle.util.trap.dependencies;
import java.io.File;
import java.nio.file.Path;
import java.util.AbstractSet;
import java.util.Collections;
import java.util.Iterator;
import java.util.Set;
import com.semmle.util.exception.ResourceError;
/**
* The immediate dependencies of a particular TRAP file
*/
public class TrapDependencies extends TextFile
{
static final String TRAP = "TRAP";
private String trap;
/**
* Create an empty dependencies node for a TRAP file
*/
public TrapDependencies(String trap) {
super(TrapSet.LATEST_VERSION);
this.trap = trap;
}
/**
* Load a TRAP dependencies (.dep) file
*
* @param file the file to load
*/
public TrapDependencies(Path file) {
super(null);
load(TrapSet.HEADER, file);
if(trap == null)
parseError(file);
}
@Override
protected Set<String> getSet(final Path file, String label) {
if(label.equals(TRAP)) {
return new AbstractSet<String>() {
@Override
public Iterator<String> iterator() {
return null;
}
@Override
public int size() {
return 0;
}
@Override
public boolean add(String s) {
if(trap != null)
parseError(file);
trap = s;
return true;
}
};
}
if(label.equals(TRAPS)) return traps;
return null;
}
@Override
protected void parseError(Path file) {
throw new ResourceError("Corrupt TRAP dependencies: " + file);
}
/**
* @return the path of the TRAP with the dependencies stored in this object
* (relative to the source location)
*/
public String trapFile() {
return trap;
}
/**
* @return the paths of the TRAP file dependencies
* (relative to the trap directory)
*
*/
public Set<String> dependencies() {
return Collections.unmodifiableSet(traps);
}
/**
* Add a path to a TRAP file (relative to the trap directory).
*
* @param trap the path to the trap file to add
*/
public void addDependency(String trap) {
traps.add(trap);
}
/*
* (non-Javadoc)
* @see java.lang.Object#toString()
*/
@Override
public String toString() {
StringBuilder sb = new StringBuilder();
appendHeaderString(sb, TrapSet.HEADER, TrapSet.LATEST_VERSION);
appendSingleton(sb, TRAP, trap);
appendSet(sb, TRAPS, traps);
return sb.toString();
}
}

View File

@@ -0,0 +1,196 @@
package com.semmle.util.trap.dependencies;
import java.nio.file.Path;
import java.util.Collections;
import java.util.LinkedHashSet;
import java.util.Set;
import com.semmle.util.exception.ResourceError;
/**
* A set of source files and the TRAP files that were generated when
* compiling them.
* <p>
* The set of TRAP files is not necessarily sufficient to create a
* consistent database, unless combined with inter-TRAP dependency
* information from .dep files (see {@link TrapDependencies}).
*/
public class TrapSet extends TextFile
{
static final String HEADER = "TRAP dependencies";
static final String LATEST_VERSION = "1.2";
static final String SOURCES = "SOURCES";
static final String INCLUDES = "INCLUDES";
static final String OBJECTS = "OBJECTS";
static final String INPUT_OBJECTS = "INPUT_OBJECTS";
// state
private final Set<String> sources = new LinkedHashSet<String>();
private final Set<String> includes = new LinkedHashSet<String>();
private final Set<String> objects = new LinkedHashSet<String>();
private final Set<String> inputObjects = new LinkedHashSet<String>();
private Path file;
/**
* Create an empty TRAP set
*/
public TrapSet() {
super(LATEST_VERSION);
}
@Override
protected Set<String> getSet(Path file, String label) {
if (label.equals(SOURCES)) return sources;
if (label.equals(INCLUDES)) return includes;
if (label.equals(OBJECTS)) return objects;
if (label.equals(INPUT_OBJECTS)) return inputObjects;
if (label.equals(TRAPS)) return traps;
return null;
}
/**
* Load a TRAP set (.set) file
*
* @param path the file to load
*/
public TrapSet(Path path) {
super(null);
load(HEADER, path);
this.file = path;
}
/**
* Return the most recent file used when loading or saving this
* trap set. If this set was constructed, rather than loaded, and
* has not been saved then the result is <code>null</code>.
*
* @return the file or <code>null</code>
*/
public Path getFile() {
return file;
}
@Override
protected void parseError(Path file) {
throw new ResourceError("Corrupt TRAP set: " + file);
}
/**
* @return the paths of the source files contained in this TRAP set
*/
public Set<String> sourceFiles() {
return Collections.unmodifiableSet(sources);
}
/**
* @return the paths to the include files contained in this TRAP set
*/
public Set<String> includeFiles() {
return Collections.unmodifiableSet(includes);
}
/**
* @return the paths of the TRAP files contained in this TRAP set
* (relative to the trap directory)
*
*/
public Set<String> trapFiles() {
return Collections.unmodifiableSet(traps);
}
/**
* @return the object names in this TRAP set
*
*/
public Set<String> objectNames() {
return Collections.unmodifiableSet(objects);
}
/**
* @return the object names in this TRAP set
*
*/
public Set<String> inputObjectNames() {
return Collections.unmodifiableSet(inputObjects);
}
/**
* Add a fully-qualified path to a source-file.
*
* @param source the path to the source file to add
*/
public void addSource(String source) {
sources.add(source);
}
/**
* Add a fully-qualified path to an include-file.
*
* @param include the path to the include file to add
*/
public void addInclude(String include) {
includes.add(include);
}
/**
* Add a path to a TRAP file (relative to the trap directory).
*
* @param trap the path to the trap file to add
* @return true if the path was not already present
*/
public boolean addTrap(String trap) {
return traps.add(trap);
}
/**
* Check if this set contains a TRAP path
*
* @param trap the path to check
* @return true if this set contains the path
*/
public boolean containsTrap(String trap) {
return trap.contains(trap);
}
/**
* Are the sources mentioned in this TRAP set disjoint from the given
* set of paths?
*
* @param paths the set of paths to check disjointness with
* @return true if and only if the paths are disjoint
*/
public boolean sourcesDisjointFrom(Set<String> paths) {
for (String source : sources)
if (paths.contains(source))
return false;
return true;
}
/**
* Save this TRAP set to a .set file (or throw a ResourceError on failure)
*
* @param file the file in which to save this set
*/
@Override
public void save(Path file) {
super.save(file);
this.file = file;
}
/*
* (non-Javadoc)
* @see java.lang.Object#toString()
*/
@Override
public String toString() {
StringBuilder sb = new StringBuilder();
appendHeaderString(sb, HEADER, LATEST_VERSION);
appendSet(sb, SOURCES, sources);
appendSet(sb, INCLUDES, includes);
appendSet(sb, OBJECTS, objects);
appendSet(sb, INPUT_OBJECTS, inputObjects);
appendSet(sb, TRAPS, traps);
return sb.toString();
}
}

View File

@@ -0,0 +1,8 @@
package com.semmle.util.trap.pathtransformers;
public class NoopTransformer extends PathTransformer {
@Override
public String transform(String input) {
return input;
}
}

View File

@@ -0,0 +1,46 @@
package com.semmle.util.trap.pathtransformers;
import java.io.File;
import com.semmle.util.files.FileUtil;
import com.semmle.util.process.Env;
import com.semmle.util.process.Env.Var;
public abstract class PathTransformer {
public abstract String transform(String input);
/**
* Convert a file to its path in the (code) database. Turns file paths into
* canonical, absolute, strings and normalises away Unix/Windows differences.
*/
public String fileAsDatabaseString(File file) {
String path;
if (Boolean.valueOf(Env.systemEnv().get(Var.SEMMLE_PRESERVE_SYMLINKS)))
path = FileUtil.simplifyPath(file);
else
path = FileUtil.tryMakeCanonical(file).getPath();
return transform(FileUtil.normalisePath(path));
}
/**
* Utility method for extractors: Canonicalise the given path as required
* for the current extraction. Unlike {@link FileUtil#tryMakeCanonical(File)},
* this method is consistent with {@link #fileAsDatabaseString(File)}.
*/
public File canonicalFile(String path) {
return new File(fileAsDatabaseString(new File(path)));
}
private static final PathTransformer DEFAULT_TRANSFORMER;
static {
String layout = Env.systemEnv().get(Var.SEMMLE_PATH_TRANSFORMER);
if (layout == null)
DEFAULT_TRANSFORMER = new NoopTransformer();
else
DEFAULT_TRANSFORMER = new ProjectLayoutTransformer(new File(layout));
}
public static PathTransformer std() {
return DEFAULT_TRANSFORMER;
}
}

View File

@@ -0,0 +1,37 @@
package com.semmle.util.trap.pathtransformers;
import java.io.File;
import com.semmle.util.projectstructure.ProjectLayout;
public class ProjectLayoutTransformer extends PathTransformer {
private final ProjectLayout layout;
public ProjectLayoutTransformer(File file) {
layout = new ProjectLayout(file);
}
@Override
public String transform(String input) {
if (isWindowsPath(input, 0)) {
String result = layout.artificialPath('/' + input);
if (result == null) {
return input;
} else if (isWindowsPath(result, 1) && result.charAt(0) == '/') {
return result.substring(1);
} else {
return result;
}
} else {
String result = layout.artificialPath(input);
return result != null ? result : input;
}
}
private static boolean isWindowsPath(String s, int startAt) {
return s.length() >= (3 + startAt) &&
s.charAt(startAt) != '/' &&
s.charAt(startAt + 1) == ':' &&
s.charAt(startAt + 2) == '/';
}
}

View File

@@ -0,0 +1,52 @@
package com.semmle.util.unicode;
public class UTF8Util {
/**
* Get the length (in Unicode code units, not code points) of the longest prefix of
* a string that can be UTF-8 encoded in no more than the given number of bytes.
*
* <p>
* Unencodable characters (such as lone surrogate halves or low surrogates
* that do not follow a high surrogate) are treated as being encoded in
* three bytes. This is safe since on encoding they will be replaced by
* a replacement character, which in turn will take at most three bytes to
* encode.
* </p>
*
* @param str string to encode
* @param maxEncodedLength maximum number of bytes for the encoded prefix
* @return length of the prefix
*/
public static int encodablePrefixLength(String str, int maxEncodedLength) {
// no character takes more than three bytes to encode
if (str.length() > maxEncodedLength / 3) {
int encodedLength = 0;
for (int i = 0; i < str.length(); ++i) {
int oldI = i;
char c = str.charAt(i);
if (c <= 0x7f) {
encodedLength += 1;
} else if (c <= 0x7ff) {
encodedLength += 2;
} else if (Character.isHighSurrogate(c)) {
// surrogate pairs take four bytes to encode
if (i+1 < str.length() && Character.isLowSurrogate(str.charAt(i+1))) {
encodedLength += 4;
++i;
} else {
// lone high surrogate, assume length three
encodedLength += 3;
}
} else {
encodedLength += 3;
}
if (encodedLength > maxEncodedLength) {
return oldI;
}
}
}
return str.length();
}
}

View File

@@ -0,0 +1,71 @@
package com.semmle.util.zip;
import java.io.IOException;
import java.io.InputStream;
import java.io.PushbackInputStream;
import java.util.zip.GZIPInputStream;
public class MultiMemberGZIPInputStream extends GZIPInputStream {
public MultiMemberGZIPInputStream(InputStream in, int size) throws IOException {
// Wrap the stream in a PushbackInputStream...
super(new PushbackInputStream(in, size), size);
this.size = size;
}
public MultiMemberGZIPInputStream(InputStream in) throws IOException {
// Wrap the stream in a PushbackInputStream...
super(new PushbackInputStream(in, 1024));
this.size = -1;
}
private MultiMemberGZIPInputStream child;
private int size;
private boolean eos;
@Override
public int read(byte[] inputBuffer, int inputBufferOffset, int inputBufferLen) throws IOException {
if (eos) {
return -1;
}
else if (child != null) {
return child.read(inputBuffer, inputBufferOffset, inputBufferLen);
}
int charsRead = super.read(inputBuffer, inputBufferOffset, inputBufferLen);
if (charsRead == -1) {
// Push any remaining buffered data back onto the stream
// If the stream is then not empty, use it to construct
// a new instance of this class and delegate this and any
// future calls to it...
int n = inf.getRemaining() - 8;
if (n > 0) {
// More than 8 bytes remaining in deflater
// First 8 are gzip trailer. Add the rest to
// any un-read data...
((PushbackInputStream) this.in).unread(buf, len - n, n);
} else {
// Nothing in the buffer. We need to know whether or not
// there is unread data available in the underlying stream
// since the base class will not handle an empty file.
// Read a byte to see if there is data and if so,
// push it back onto the stream...
byte[] b = new byte[1];
int ret = in.read(b, 0, 1);
if (ret == -1) {
eos = true;
return -1;
} else {
((PushbackInputStream) this.in).unread(b, 0, 1);
}
}
if(size == -1)
child = new MultiMemberGZIPInputStream(in);
else
child = new MultiMemberGZIPInputStream(in, size);
return child.read(inputBuffer, inputBufferOffset, inputBufferLen);
} else {
return charsRead;
}
}
}

View File

@@ -0,0 +1,117 @@
package com.github.codeql
import com.github.codeql.utils.isExternalDeclaration
import com.github.codeql.utils.isExternalFileClassMember
import com.semmle.extractor.java.OdasaOutput
import com.semmle.util.data.StringDigestor
import org.jetbrains.kotlin.backend.common.extensions.IrPluginContext
import org.jetbrains.kotlin.ir.declarations.*
import org.jetbrains.kotlin.ir.util.isFileClass
import org.jetbrains.kotlin.ir.util.packageFqName
import org.jetbrains.kotlin.ir.util.parentClassOrNull
import java.io.File
import java.util.ArrayList
import java.util.HashSet
import java.util.zip.GZIPOutputStream
class ExternalDeclExtractor(val logger: FileLogger, val invocationTrapFile: String, val sourceFilePath: String, val primitiveTypeMapping: PrimitiveTypeMapping, val pluginContext: IrPluginContext, val globalExtensionState: KotlinExtractorGlobalState, val diagnosticTrapWriter: TrapWriter) {
val externalDeclsDone = HashSet<IrDeclaration>()
val externalDeclWorkList = ArrayList<Pair<IrDeclaration, String>>()
fun extractLater(d: IrDeclaration, signature: String): Boolean {
if (d !is IrClass && !isExternalFileClassMember(d)) {
logger.warnElement("External declaration is neither a class, nor a top-level declaration", d)
return false
}
val ret = externalDeclsDone.add(d)
if (ret) externalDeclWorkList.add(Pair(d, signature))
return ret
}
val propertySignature = ";property"
val fieldSignature = ";field"
fun extractLater(p: IrProperty) = extractLater(p, propertySignature)
fun extractLater(f: IrField) = extractLater(f, fieldSignature)
fun extractLater(c: IrClass) = extractLater(c, "")
fun extractExternalClasses() {
val output = OdasaOutput(false, logger)
output.setCurrentSourceFile(File(sourceFilePath))
do {
val nextBatch = ArrayList(externalDeclWorkList)
externalDeclWorkList.clear()
nextBatch.forEach { workPair ->
val (irDecl, possiblyLongSignature) = workPair
// In order to avoid excessively long signatures which can lead to trap file names longer than the filesystem
// limit, we truncate and add a hash to preserve uniqueness if necessary.
val signature = if (possiblyLongSignature.length > 100) {
possiblyLongSignature.substring(0, 92) + "#" + StringDigestor.digest(possiblyLongSignature).substring(0, 8)
} else { possiblyLongSignature }
output.getTrapLockerForDecl(irDecl, signature).useAC { locker ->
locker.trapFileManager.useAC { manager ->
val shortName = when(irDecl) {
is IrDeclarationWithName -> irDecl.name.asString()
else -> "(unknown name)"
}
if(manager == null) {
logger.info("Skipping extracting external decl $shortName")
} else {
val trapFile = manager.file
val trapTmpFile = File.createTempFile("${trapFile.nameWithoutExtension}.", ".${trapFile.extension}.tmp", trapFile.parentFile)
val containingClass = getContainingClassOrSelf(irDecl)
if (containingClass == null) {
logger.warnElement("Unable to get containing class", irDecl)
return
}
val binaryPath = getIrClassBinaryPath(containingClass)
try {
GZIPOutputStream(trapTmpFile.outputStream()).bufferedWriter().use { trapFileBW ->
// We want our comments to be the first thing in the file,
// so start off with a mere TrapWriter
val tw = TrapWriter(logger.loggerBase, TrapLabelManager(), trapFileBW, diagnosticTrapWriter)
tw.writeComment("Generated by the CodeQL Kotlin extractor for external dependencies")
tw.writeComment("Part of invocation $invocationTrapFile")
if (signature != possiblyLongSignature) {
tw.writeComment("Function signature abbreviated; full signature is: $possiblyLongSignature")
}
// Now elevate to a SourceFileTrapWriter, and populate the
// file information if needed:
val ftw = tw.makeFileTrapWriter(binaryPath, irDecl is IrClass)
val fileExtractor = KotlinFileExtractor(logger, ftw, binaryPath, manager, this, primitiveTypeMapping, pluginContext, globalExtensionState)
if (irDecl is IrClass) {
// Populate a location and compilation-unit package for the file. This is similar to
// the beginning of `KotlinFileExtractor.extractFileContents` but without an `IrFile`
// to start from.
val pkg = irDecl.packageFqName?.asString() ?: ""
val pkgId = fileExtractor.extractPackage(pkg)
ftw.writeHasLocation(ftw.fileId, ftw.getWholeFileLocation())
ftw.writeCupackage(ftw.fileId, pkgId)
fileExtractor.extractClassSource(irDecl, !irDecl.isFileClass, false)
} else {
fileExtractor.extractDeclaration(irDecl)
}
}
if (!trapTmpFile.renameTo(trapFile)) {
logger.error("Failed to rename $trapTmpFile to $trapFile")
}
} catch (e: Exception) {
manager.setHasError()
logger.error("Failed to extract '$shortName'. Partial TRAP file location is $trapTmpFile", e)
}
}
}
}
}
} while (externalDeclWorkList.isNotEmpty())
output.writeTrapSet()
}
}

View File

@@ -0,0 +1,74 @@
package com.github.codeql
import org.jetbrains.kotlin.compiler.plugin.AbstractCliOption
import org.jetbrains.kotlin.compiler.plugin.CliOption
import org.jetbrains.kotlin.compiler.plugin.CommandLineProcessor
import org.jetbrains.kotlin.config.CompilerConfiguration
import org.jetbrains.kotlin.config.CompilerConfigurationKey
class KotlinExtractorCommandLineProcessor : CommandLineProcessor {
override val pluginId = "kotlin-extractor"
override val pluginOptions = listOf(
CliOption(
optionName = OPTION_INVOCATION_TRAP_FILE,
valueDescription = "Invocation TRAP file",
description = "Extractor will append invocation-related TRAP to this file",
required = true,
allowMultipleOccurrences = false
),
CliOption(
optionName = OPTION_CHECK_TRAP_IDENTICAL,
valueDescription = "Check whether different invocations produce identical TRAP",
description = "Check whether different invocations produce identical TRAP",
required = false,
allowMultipleOccurrences = false
),
CliOption(
optionName = OPTION_COMPILATION_STARTTIME,
valueDescription = "The start time of the compilation as a Unix timestamp",
description = "The start time of the compilation as a Unix timestamp",
required = false,
allowMultipleOccurrences = false
),
CliOption(
optionName = OPTION_EXIT_AFTER_EXTRACTION,
valueDescription = "Specify whether to call exitProcess after the extraction has completed",
description = "Specify whether to call exitProcess after the extraction has completed",
required = false,
allowMultipleOccurrences = false
)
)
override fun processOption(
option: AbstractCliOption,
value: String,
configuration: CompilerConfiguration
) = when (option.optionName) {
OPTION_INVOCATION_TRAP_FILE -> configuration.put(KEY_INVOCATION_TRAP_FILE, value)
OPTION_CHECK_TRAP_IDENTICAL -> processBooleanOption(value, OPTION_CHECK_TRAP_IDENTICAL, KEY_CHECK_TRAP_IDENTICAL, configuration)
OPTION_EXIT_AFTER_EXTRACTION -> processBooleanOption(value, OPTION_EXIT_AFTER_EXTRACTION, KEY_EXIT_AFTER_EXTRACTION, configuration)
OPTION_COMPILATION_STARTTIME ->
when (val v = value.toLongOrNull()) {
is Long -> configuration.put(KEY_COMPILATION_STARTTIME, v)
else -> error("kotlin extractor: Bad argument $value for $OPTION_COMPILATION_STARTTIME")
}
else -> error("kotlin extractor: Bad option: ${option.optionName}")
}
private fun processBooleanOption(value: String, optionName: String, configKey: CompilerConfigurationKey<Boolean>, configuration: CompilerConfiguration) =
when (value) {
"true" -> configuration.put(configKey, true)
"false" -> configuration.put(configKey, false)
else -> error("kotlin extractor: Bad argument $value for $optionName")
}
}
private val OPTION_INVOCATION_TRAP_FILE = "invocationTrapFile"
val KEY_INVOCATION_TRAP_FILE = CompilerConfigurationKey<String>(OPTION_INVOCATION_TRAP_FILE)
private val OPTION_CHECK_TRAP_IDENTICAL = "checkTrapIdentical"
val KEY_CHECK_TRAP_IDENTICAL= CompilerConfigurationKey<Boolean>(OPTION_CHECK_TRAP_IDENTICAL)
private val OPTION_COMPILATION_STARTTIME = "compilationStartTime"
val KEY_COMPILATION_STARTTIME= CompilerConfigurationKey<Long>(OPTION_COMPILATION_STARTTIME)
private val OPTION_EXIT_AFTER_EXTRACTION = "exitAfterExtraction"
val KEY_EXIT_AFTER_EXTRACTION= CompilerConfigurationKey<Boolean>(OPTION_EXIT_AFTER_EXTRACTION)

View File

@@ -0,0 +1,23 @@
package com.github.codeql
import org.jetbrains.kotlin.backend.common.extensions.IrGenerationExtension
import com.intellij.mock.MockProject
import org.jetbrains.kotlin.compiler.plugin.ComponentRegistrar
import org.jetbrains.kotlin.config.CompilerConfiguration
class KotlinExtractorComponentRegistrar : ComponentRegistrar {
override fun registerProjectComponents(
project: MockProject,
configuration: CompilerConfiguration
) {
val invocationTrapFile = configuration[KEY_INVOCATION_TRAP_FILE]
if (invocationTrapFile == null) {
throw Exception("Required argument for TRAP invocation file not given")
}
IrGenerationExtension.registerExtension(project, KotlinExtractorExtension(
invocationTrapFile,
configuration[KEY_CHECK_TRAP_IDENTICAL] ?: false,
configuration[KEY_COMPILATION_STARTTIME],
configuration[KEY_EXIT_AFTER_EXTRACTION] ?: false))
}
}

View File

@@ -0,0 +1,303 @@
package com.github.codeql
import org.jetbrains.kotlin.backend.common.extensions.IrGenerationExtension
import org.jetbrains.kotlin.backend.common.extensions.IrPluginContext
import org.jetbrains.kotlin.ir.declarations.*
import org.jetbrains.kotlin.ir.util.*
import org.jetbrains.kotlin.ir.IrElement
import java.io.File
import java.io.FileOutputStream
import java.nio.file.Files
import java.nio.file.Paths
import com.semmle.util.files.FileUtil
import kotlin.system.exitProcess
/*
* KotlinExtractorExtension is the main entry point of the CodeQL Kotlin
* extractor. When the jar is used as a kotlinc plugin, kotlinc will
* call the `generate` method.
*/
class KotlinExtractorExtension(
// The filepath for the invocation TRAP file.
// This TRAP file is for this invocation of the extractor as a
// whole, not tied to a particular source file. It contains
// information about which files this invocation compiled, and
// any warnings or errors encountered during the invocation.
private val invocationTrapFile: String,
// By default, if a TRAP file we want to generate for a source
// file already exists, then we will do nothing. If this is set,
// then we will instead generate the TRAP file, and give a
// warning if we would generate different TRAP to that which
// already exists.
private val checkTrapIdentical: Boolean,
// If non-null, then this is the number of milliseconds since
// midnight, January 1, 1970 UTC (as returned by Java's
// `System.currentTimeMillis()`. If this is given, then it is used
// to record the time taken to compile the source code, which is
// presumed to be the difference between this time and the time
// that this plugin is invoked.
private val compilationStartTime: Long?,
// Under normal conditions, the extractor runs during a build of
// the project, and kotlinc continues after the plugin has finished.
// If the plugin is being used independently of a build, then this
// can be set to true to make the plugin terminate the kotlinc
// invocation when it has finished. This means that kotlinc will not
// write any `.class` files etc.
private val exitAfterExtraction: Boolean)
: IrGenerationExtension {
// This is the main entry point to the extractor.
// It will be called by kotlinc with the IR for the files being
// compiled in `moduleFragment`, and `pluginContext` providing
// various utility functions.
override fun generate(moduleFragment: IrModuleFragment, pluginContext: IrPluginContext) {
try {
runExtractor(moduleFragment, pluginContext)
// We catch Throwable rather than Exception, as we want to
// continue trying to extract everything else even if we get a
// stack overflow or an assertion failure in one file.
} catch(e: Throwable) {
// If we get an exception at the top level, then something's
// gone very wrong. Don't try to be too fancy, but try to
// log a simple message.
val msg = "[ERROR] CodeQL Kotlin extractor: Top-level exception."
// First, if we can find our log directory, then let's try
// making a log file there:
val extractorLogDir = System.getenv("CODEQL_EXTRACTOR_JAVA_LOG_DIR")
if (extractorLogDir != null || extractorLogDir != "") {
// We use a slightly different filename pattern compared
// to normal logs. Just the existence of a `-top` log is
// a sign that something's gone very wrong.
val logFile = File.createTempFile("kotlin-extractor-top.", ".log", File(extractorLogDir))
logFile.writeText(msg)
// Now we've got that out, let's see if we can append a stack trace too
logFile.appendText(e.stackTraceToString())
} else {
// We don't have much choice here except to print to
// stderr and hope for the best.
System.err.println(msg)
e.printStackTrace(System.err)
}
}
if (exitAfterExtraction) {
exitProcess(0)
}
}
private fun runExtractor(moduleFragment: IrModuleFragment, pluginContext: IrPluginContext) {
val startTimeMs = System.currentTimeMillis()
// This default should be kept in sync with com.semmle.extractor.java.interceptors.KotlinInterceptor.initializeExtractionContext
val trapDir = File(System.getenv("CODEQL_EXTRACTOR_JAVA_TRAP_DIR").takeUnless { it.isNullOrEmpty() } ?: "kotlin-extractor/trap")
// The invocation TRAP file will already have been started
// before the plugin is run, so we open it in append mode.
FileOutputStream(File(invocationTrapFile), true).bufferedWriter().use { invocationTrapFileBW ->
val invocationExtractionProblems = ExtractionProblems()
val lm = TrapLabelManager()
val logCounter = LogCounter()
val loggerBase = LoggerBase(logCounter)
val tw = TrapWriter(loggerBase, lm, invocationTrapFileBW, null)
// The interceptor has already defined #compilation = *
val compilation: Label<DbCompilation> = StringLabel("compilation")
tw.writeCompilation_started(compilation)
if (compilationStartTime != null) {
tw.writeCompilation_compiler_times(compilation, -1.0, (System.currentTimeMillis()-compilationStartTime)/1000.0)
}
tw.flush()
val logger = Logger(loggerBase, tw)
logger.info("Extraction started")
logger.flush()
logger.info("Extraction for invocation TRAP file $invocationTrapFile")
logger.flush()
if (System.getenv("CODEQL_EXTRACTOR_JAVA_KOTLIN_DUMP") == "true") {
logger.info("moduleFragment:\n" + moduleFragment.dump())
}
val primitiveTypeMapping = PrimitiveTypeMapping(logger, pluginContext)
// FIXME: FileUtil expects a static global logger
// which should be provided by SLF4J's factory facility. For now we set it here.
FileUtil.logger = logger
val srcDir = File(System.getenv("CODEQL_EXTRACTOR_JAVA_SOURCE_ARCHIVE_DIR").takeUnless { it.isNullOrEmpty() } ?: "kotlin-extractor/src")
srcDir.mkdirs()
val globalExtensionState = KotlinExtractorGlobalState()
moduleFragment.files.mapIndexed { index: Int, file: IrFile ->
val fileExtractionProblems = FileExtractionProblems(invocationExtractionProblems)
val fileTrapWriter = tw.makeSourceFileTrapWriter(file, true)
loggerBase.setFileNumber(index)
fileTrapWriter.writeCompilation_compiling_files(compilation, index, fileTrapWriter.fileId)
doFile(fileExtractionProblems, invocationTrapFile, fileTrapWriter, checkTrapIdentical, loggerBase, trapDir, srcDir, file, primitiveTypeMapping, pluginContext, globalExtensionState)
fileTrapWriter.writeCompilation_compiling_files_completed(compilation, index, fileExtractionProblems.extractionResult())
}
loggerBase.printLimitedDiagnosticCounts(tw)
logger.info("Extraction completed")
logger.flush()
val compilationTimeMs = System.currentTimeMillis() - startTimeMs
tw.writeCompilation_finished(compilation, -1.0, compilationTimeMs.toDouble() / 1000, invocationExtractionProblems.extractionResult())
tw.flush()
loggerBase.close()
}
}
}
class KotlinExtractorGlobalState {
val genericSpecialisationsExtracted = HashSet<String>()
// These three record mappings of classes, functions and fields that should be replaced wherever they are found.
// As of now these are only used to fix IR generated by the Gradle Android Extensions plugin, hence e.g. IrProperty
// doesn't have a map as that plugin doesn't generate them. If and when these are used more widely additional maps
// should be added here.
val syntheticToRealClassMap = HashMap<IrClass, IrClass?>()
val syntheticToRealFunctionMap = HashMap<IrSimpleFunction, IrSimpleFunction?>()
val syntheticToRealFieldMap = HashMap<IrField, IrField?>()
}
/*
The `ExtractionProblems` class is used to record whether this invocation
had any problems. It distinguish 2 kinds of problem:
* Recoverable problems: e.g. if we check something that we expect to be
non-null and find that it is null.
* Non-recoverable problems: if we catch an exception.
*/
open class ExtractionProblems {
private var recoverableProblem = false
private var nonRecoverableProblem = false
open fun setRecoverableProblem() {
recoverableProblem = true
}
open fun setNonRecoverableProblem() {
nonRecoverableProblem = true
}
fun extractionResult(): Int {
if(nonRecoverableProblem) {
return 2
} else if(recoverableProblem) {
return 1
} else {
return 0
}
}
}
/*
The `FileExtractionProblems` is analogous to `ExtractionProblems`,
except it records whether there were any problems while extracting a
particular source file.
*/
class FileExtractionProblems(val invocationExtractionProblems: ExtractionProblems): ExtractionProblems() {
override fun setRecoverableProblem() {
super.setRecoverableProblem()
invocationExtractionProblems.setRecoverableProblem()
}
override fun setNonRecoverableProblem() {
super.setNonRecoverableProblem()
invocationExtractionProblems.setNonRecoverableProblem()
}
}
/*
This function determines whether 2 TRAP files should be considered to be
equivalent. It returns `true` iff all of their non-comment lines are
identical.
*/
private fun equivalentTrap(f1: File, f2: File): Boolean {
f1.bufferedReader().use { bw1 ->
f2.bufferedReader().use { bw2 ->
while(true) {
val l1 = bw1.readLine()
val l2 = bw2.readLine()
if (l1 == null && l2 == null) {
return true
} else if (l1 == null || l2 == null) {
return false
} else if (l1 != l2) {
if (!l1.startsWith("//") || !l2.startsWith("//")) {
return false
}
}
}
}
}
}
private fun doFile(
fileExtractionProblems: FileExtractionProblems,
invocationTrapFile: String,
fileTrapWriter: FileTrapWriter,
checkTrapIdentical: Boolean,
loggerBase: LoggerBase,
dbTrapDir: File,
dbSrcDir: File,
srcFile: IrFile,
primitiveTypeMapping: PrimitiveTypeMapping,
pluginContext: IrPluginContext,
globalExtensionState: KotlinExtractorGlobalState) {
val srcFilePath = srcFile.path
val logger = FileLogger(loggerBase, fileTrapWriter)
logger.info("Extracting file $srcFilePath")
logger.flush()
val context = logger.loggerBase.extractorContextStack
if (!context.empty()) {
logger.warn("Extractor context was not empty. It thought:")
context.clear()
}
val dbSrcFilePath = Paths.get("$dbSrcDir/$srcFilePath")
val dbSrcDirPath = dbSrcFilePath.parent
Files.createDirectories(dbSrcDirPath)
val srcTmpFile = File.createTempFile(dbSrcFilePath.fileName.toString() + ".", ".src.tmp", dbSrcDirPath.toFile())
srcTmpFile.outputStream().use {
Files.copy(Paths.get(srcFilePath), it)
}
srcTmpFile.renameTo(dbSrcFilePath.toFile())
val trapFile = File("$dbTrapDir/$srcFilePath.trap")
val trapFileDir = trapFile.parentFile
trapFileDir.mkdirs()
if (checkTrapIdentical || !trapFile.exists()) {
val trapTmpFile = File.createTempFile("$srcFilePath.", ".trap.tmp", trapFileDir)
try {
trapTmpFile.bufferedWriter().use { trapFileBW ->
// We want our comments to be the first thing in the file,
// so start off with a mere TrapWriter
val tw = TrapWriter(loggerBase, TrapLabelManager(), trapFileBW, fileTrapWriter)
tw.writeComment("Generated by the CodeQL Kotlin extractor for kotlin source code")
tw.writeComment("Part of invocation $invocationTrapFile")
// Now elevate to a SourceFileTrapWriter, and populate the
// file information
val sftw = tw.makeSourceFileTrapWriter(srcFile, true)
val externalDeclExtractor = ExternalDeclExtractor(logger, invocationTrapFile, srcFilePath, primitiveTypeMapping, pluginContext, globalExtensionState, fileTrapWriter)
val fileExtractor = KotlinFileExtractor(logger, sftw, srcFilePath, null, externalDeclExtractor, primitiveTypeMapping, pluginContext, globalExtensionState)
fileExtractor.extractFileContents(srcFile, sftw.fileId)
externalDeclExtractor.extractExternalClasses()
}
if (checkTrapIdentical && trapFile.exists()) {
if (equivalentTrap(trapTmpFile, trapFile)) {
if (!trapTmpFile.delete()) {
logger.warn("Failed to delete $trapTmpFile")
}
} else {
val trapDifferentFile = File.createTempFile("$srcFilePath.", ".trap.different", dbTrapDir)
if (trapTmpFile.renameTo(trapDifferentFile)) {
logger.warn("TRAP difference: $trapFile vs $trapDifferentFile")
} else {
logger.warn("Failed to rename $trapTmpFile to $trapFile")
}
}
} else {
if (!trapTmpFile.renameTo(trapFile)) {
logger.warn("Failed to rename $trapTmpFile to $trapFile")
}
}
// We catch Throwable rather than Exception, as we want to
// continue trying to extract everything else even if we get a
// stack overflow or an assertion failure in one file.
} catch (e: Throwable) {
logger.error("Failed to extract '$srcFilePath'. Partial TRAP file location is $trapTmpFile", e)
context.clear()
fileExtractionProblems.setNonRecoverableProblem()
}
}
}

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,43 @@
package com.github.codeql
import java.io.PrintWriter
import java.io.StringWriter
/**
* This represents a label (`#...`) in a TRAP file.
*/
interface Label<T> {
fun <U> cast(): Label<U> {
@Suppress("UNCHECKED_CAST")
return this as Label<U>
}
}
/**
* The label `#i`, e.g. `#123`. Most labels we generate are of this
* form.
*/
class IntLabel<T>(val i: Int): Label<T> {
override fun toString(): String = "#$i"
}
/**
* The label `#name`, e.g. `#compilation`. This is used when labels are
* shared between different components (e.g. when both the interceptor
* and the extractor need to refer to the same label).
*/
class StringLabel<T>(val name: String): Label<T> {
override fun toString(): String = "#$name"
}
// TODO: Remove this and all of its uses
fun <T> fakeLabel(): Label<T> {
if (false) {
println("Fake label")
} else {
val sw = StringWriter()
Exception().printStackTrace(PrintWriter(sw))
println("Fake label from:\n$sw")
}
return IntLabel(0)
}

View File

@@ -0,0 +1,84 @@
package com.github.codeql
import org.jetbrains.kotlin.backend.common.extensions.IrPluginContext
import org.jetbrains.kotlin.ir.declarations.IrClass
import org.jetbrains.kotlin.ir.types.IrSimpleType
import org.jetbrains.kotlin.ir.types.IdSignatureValues
import org.jetbrains.kotlin.ir.util.IdSignature
import org.jetbrains.kotlin.name.FqName
class PrimitiveTypeMapping(val logger: Logger, val pluginContext: IrPluginContext) {
fun getPrimitiveInfo(s: IrSimpleType) = mapping[s.classifier.signature]
data class PrimitiveTypeInfo(
val primitiveName: String?,
val otherIsPrimitive: Boolean,
val javaClass: IrClass,
val kotlinPackageName: String, val kotlinClassName: String
)
private fun findClass(fqName: String, fallback: IrClass): IrClass {
val symbol = pluginContext.referenceClass(FqName(fqName))
if(symbol == null) {
logger.warn("Can't find $fqName")
// Do the best we can
return fallback
} else {
return symbol.owner
}
}
private val mapping = {
val kotlinByte = pluginContext.irBuiltIns.byteClass.owner
val javaLangByte = findClass("java.lang.Byte", kotlinByte)
val kotlinShort = pluginContext.irBuiltIns.shortClass.owner
val javaLangShort = findClass("java.lang.Short", kotlinShort)
val kotlinInt = pluginContext.irBuiltIns.intClass.owner
val javaLangInteger = findClass("java.lang.Integer", kotlinInt)
val kotlinLong = pluginContext.irBuiltIns.longClass.owner
val javaLangLong = findClass("java.lang.Long", kotlinLong)
val kotlinUByte = findClass("kotlin.UByte", kotlinByte)
val kotlinUShort = findClass("kotlin.UShort", kotlinShort)
val kotlinUInt = findClass("kotlin.UInt", kotlinInt)
val kotlinULong = findClass("kotlin.ULong", kotlinLong)
val kotlinDouble = pluginContext.irBuiltIns.doubleClass.owner
val javaLangDouble = findClass("java.lang.Double", kotlinDouble)
val kotlinFloat = pluginContext.irBuiltIns.floatClass.owner
val javaLangFloat = findClass("java.lang.Float", kotlinFloat)
val kotlinBoolean = pluginContext.irBuiltIns.booleanClass.owner
val javaLangBoolean = findClass("java.lang.Boolean", kotlinBoolean)
val kotlinChar = pluginContext.irBuiltIns.charClass.owner
val javaLangCharacter = findClass("java.lang.Character", kotlinChar)
val kotlinUnit = pluginContext.irBuiltIns.unitClass.owner
val kotlinNothing = pluginContext.irBuiltIns.nothingClass.owner
val javaLangVoid = findClass("java.lang.Void", kotlinNothing)
mapOf(
IdSignatureValues._byte to PrimitiveTypeInfo("byte", true, javaLangByte, "kotlin", "Byte"),
IdSignatureValues._short to PrimitiveTypeInfo("short", true, javaLangShort, "kotlin", "Short"),
IdSignatureValues._int to PrimitiveTypeInfo("int", true, javaLangInteger, "kotlin", "Int"),
IdSignatureValues._long to PrimitiveTypeInfo("long", true, javaLangLong, "kotlin", "Long"),
IdSignatureValues.uByte to PrimitiveTypeInfo("byte", true, kotlinUByte, "kotlin", "UByte"),
IdSignatureValues.uShort to PrimitiveTypeInfo("short", true, kotlinUShort, "kotlin", "UShort"),
IdSignatureValues.uInt to PrimitiveTypeInfo("int", true, kotlinUInt, "kotlin", "UInt"),
IdSignatureValues.uLong to PrimitiveTypeInfo("long", true, kotlinULong, "kotlin", "ULong"),
IdSignatureValues._double to PrimitiveTypeInfo("double", true, javaLangDouble, "kotlin", "Double"),
IdSignatureValues._float to PrimitiveTypeInfo("float", true, javaLangFloat, "kotlin", "Float"),
IdSignatureValues._boolean to PrimitiveTypeInfo("boolean", true, javaLangBoolean, "kotlin", "Boolean"),
IdSignatureValues._char to PrimitiveTypeInfo("char", true, javaLangCharacter, "kotlin", "Char"),
IdSignatureValues.unit to PrimitiveTypeInfo("void", false, kotlinUnit, "kotlin", "Unit"),
IdSignatureValues.nothing to PrimitiveTypeInfo(null, true, javaLangVoid, "kotlin", "Nothing"),
)
}()
}

View File

@@ -0,0 +1,368 @@
package com.github.codeql
import com.github.codeql.KotlinUsesExtractor.LocallyVisibleFunctionLabels
import com.github.codeql.KotlinUsesExtractor.TypeResults
import com.github.codeql.utils.versions.FileEntry
import java.io.BufferedWriter
import java.io.File
import org.jetbrains.kotlin.ir.IrElement
import org.jetbrains.kotlin.ir.declarations.path
import org.jetbrains.kotlin.ir.declarations.IrClass
import org.jetbrains.kotlin.ir.declarations.IrFile
import org.jetbrains.kotlin.ir.declarations.IrFunction
import org.jetbrains.kotlin.ir.declarations.IrVariable
import org.jetbrains.kotlin.ir.UNDEFINED_OFFSET
import org.jetbrains.kotlin.ir.util.SYNTHETIC_OFFSET
import com.semmle.extractor.java.PopulateFile
import com.semmle.util.unicode.UTF8Util
/**
* Each `.trap` file has a `TrapLabelManager` while we are writing it.
* It provides fresh TRAP label names, and maintains a mapping from keys
* (`@"..."`) to labels.
*/
class TrapLabelManager {
/** The next integer to use as a label name. */
private var nextInt: Int = 100
/** Returns a fresh label. */
fun <T> getFreshLabel(): Label<T> {
return IntLabel(nextInt++)
}
/**
* A mapping from a key (`@"..."`) to the label defined to be that
* key, if any.
*/
val labelMapping: MutableMap<String, Label<*>> = mutableMapOf<String, Label<*>>()
val anonymousTypeMapping: MutableMap<IrClass, TypeResults> = mutableMapOf()
val locallyVisibleFunctionLabelMapping: MutableMap<IrFunction, LocallyVisibleFunctionLabels> = mutableMapOf()
}
/**
* A `TrapWriter` is used to write TRAP to a particular TRAP file.
* There may be multiple `TrapWriter`s for the same file, as different
* instances will have different additional state, but they must all
* share the same `TrapLabelManager` and `BufferedWriter`.
*/
// TODO lm was `protected` before anonymousTypeMapping and locallyVisibleFunctionLabelMapping moved into it. Should we re-protect it and provide accessors?
open class TrapWriter (protected val loggerBase: LoggerBase, val lm: TrapLabelManager, private val bw: BufferedWriter, val diagnosticTrapWriter: TrapWriter?) {
/**
* Returns the label that is defined to be the given key, if such
* a label exists, and `null` otherwise. Most users will want to use
* `getLabelFor` instead, which allows non-existent labels to be
* initialised.
*/
fun <T> getExistingLabelFor(key: String): Label<T>? {
return lm.labelMapping.get(key)?.cast<T>()
}
/**
* Returns the label for the given key, if one exists.
* Otherwise, a fresh label is bound to that key, `initialise`
* is run on it, and it is returned.
*/
@JvmOverloads // Needed so Java can call a method with an optional argument
fun <T> getLabelFor(key: String, initialise: (Label<T>) -> Unit = {}): Label<T> {
val maybeLabel: Label<T>? = getExistingLabelFor(key)
if(maybeLabel == null) {
val label: Label<T> = lm.getFreshLabel()
lm.labelMapping.put(key, label)
writeTrap("$label = $key\n")
initialise(label)
return label
} else {
return maybeLabel
}
}
/**
* Returns a label for a fresh ID (i.e. a new label bound to `*`).
*/
fun <T> getFreshIdLabel(): Label<T> {
val label: Label<T> = lm.getFreshLabel()
writeTrap("$label = *\n")
return label
}
/**
* It is not easy to assign keys to local variables, so they get
* given `*` IDs. However, the same variable may be referred to
* from distant places in the IR, so we need a way to find out
* which label is used for a given local variable. This information
* is stored in this mapping.
*/
private val variableLabelMapping: MutableMap<IrVariable, Label<out DbLocalvar>> = mutableMapOf<IrVariable, Label<out DbLocalvar>>()
/**
* This returns the label used for a local variable, creating one
* if none currently exists.
*/
fun <T> getVariableLabelFor(v: IrVariable): Label<out DbLocalvar> {
val maybeLabel = variableLabelMapping.get(v)
if(maybeLabel == null) {
val label = getFreshIdLabel<DbLocalvar>()
variableLabelMapping.put(v, label)
return label
} else {
return maybeLabel
}
}
/**
* This returns a label for the location described by its arguments.
* Typically users will not want to call this directly, but instead
* use `unknownLocation`, or overloads of this defined by subclasses.
*/
fun getLocation(fileId: Label<DbFile>, startLine: Int, startColumn: Int, endLine: Int, endColumn: Int): Label<DbLocation> {
return getLabelFor("@\"loc,{$fileId},$startLine,$startColumn,$endLine,$endColumn\"") {
writeLocations_default(it, fileId, startLine, startColumn, endLine, endColumn)
}
}
/**
* The label for the 'unknown' file ID.
* Users will want to use `unknownLocation` instead.
* This is lazy, as we don't want to define it in a TRAP file unless
* the TRAP file actually contains something in the 'unknown' file.
*/
protected val unknownFileId: Label<DbFile> by lazy {
val unknownFileLabel = "@\";sourcefile\""
getLabelFor(unknownFileLabel, {
writeFiles(it, "")
})
}
/**
* The label for the 'unknown' location.
* This is lazy, as we don't want to define it in a TRAP file unless
* the TRAP file actually contains something with an 'unknown'
* location.
*/
val unknownLocation: Label<DbLocation> by lazy {
getWholeFileLocation(unknownFileId)
}
/**
* Returns the label for the file `filePath`.
* If `populateFileTables` is true, then this also adds rows to the
* `files` and `folders` tables for this file.
*/
fun mkFileId(filePath: String, populateFileTables: Boolean): Label<DbFile> {
// If a file is in a jar, then the Kotlin compiler gives
// `<jar file>!/<path within jar>` as its path. We need to split
// it as appropriate, to make the right file ID.
val populateFile = PopulateFile(this)
val splitFilePath = filePath.split("!/")
if(splitFilePath.size == 1) {
return populateFile.getFileLabel(File(filePath), populateFileTables)
} else {
return populateFile.getFileInJarLabel(File(splitFilePath.get(0)), splitFilePath.get(1), populateFileTables)
}
}
/**
* If you have an ID for a file, then this gets a label for the
* location representing the whole of that file.
*/
fun getWholeFileLocation(fileId: Label<DbFile>): Label<DbLocation> {
return getLocation(fileId, 0, 0, 0, 0)
}
/**
* Write a raw string into the TRAP file. Users should call one of
* the wrapper functions instead.
*/
fun writeTrap(trap: String) {
bw.write(trap)
}
/**
* Write a comment into the TRAP file.
*/
fun writeComment(comment: String) {
writeTrap("// ${comment.replace("\n", "\n// ")}\n")
}
/**
* Flush the TRAP file.
*/
fun flush() {
bw.flush()
}
/**
* Escape a string so that it can be used in a TRAP string literal,
* i.e. with `"` escaped as `""`.
*/
fun escapeTrapString(str: String) = str.replace("\"", "\"\"")
/**
* TRAP string literals are limited to 1 megabyte.
*/
private val MAX_STRLEN = 1.shl(20)
/**
* Truncate a string, if necessary, so that it can be used as a TRAP
* string literal. TRAP string literals are limited to 1 megabyte.
*/
fun truncateString(str: String): String {
val len = str.length
val newLen = UTF8Util.encodablePrefixLength(str, MAX_STRLEN)
if (newLen < len) {
loggerBase.warn(diagnosticTrapWriter ?: this,
"Truncated string of length $len",
"Truncated string of length $len, starting '${str.take(100)}', ending '${str.takeLast(100)}'")
return str.take(newLen)
} else {
return str
}
}
/**
* Gets a FileTrapWriter like this one (using the same label manager,
* writer etc), but using the given `filePath` for locations.
*/
fun makeFileTrapWriter(filePath: String, populateFileTables: Boolean) =
FileTrapWriter(loggerBase, lm, bw, diagnosticTrapWriter, filePath, populateFileTables)
/**
* Gets a FileTrapWriter like this one (using the same label manager,
* writer etc), but using the given `IrFile` for locations.
*/
fun makeSourceFileTrapWriter(file: IrFile, populateFileTables: Boolean) =
SourceFileTrapWriter(loggerBase, lm, bw, diagnosticTrapWriter, file, populateFileTables)
}
/**
* A `FileTrapWriter` is used when we know which file we are extracting
* entities from, so we can at least give the right file as a location.
*
* An ID for the file will be created, and if `populateFileTables` is
* true then we will also add rows to the `files` and `folders` tables
* for it.
*/
open class FileTrapWriter (
loggerBase: LoggerBase,
lm: TrapLabelManager,
bw: BufferedWriter,
diagnosticTrapWriter: TrapWriter?,
val filePath: String,
populateFileTables: Boolean
): TrapWriter (loggerBase, lm, bw, diagnosticTrapWriter) {
/**
* The ID for the file that we are extracting from.
*/
val fileId = mkFileId(filePath, populateFileTables)
/**
* Gets a label for the location of `e`.
*/
fun getLocation(e: IrElement): Label<DbLocation> {
return getLocation(e.startOffset, e.endOffset)
}
/**
* Gets a label for the location representing the whole of this file.
*/
fun getWholeFileLocation(): Label<DbLocation> {
return getWholeFileLocation(fileId)
}
/**
* Gets a label for the location corresponding to `startOffset` and
* `endOffset` within this file.
*/
open fun getLocation(startOffset: Int, endOffset: Int): Label<DbLocation> {
// We don't have a FileEntry to look up the offsets in, so all
// we can do is return a whole-file location.
return getWholeFileLocation()
}
/**
* Gets the location of `e` as a human-readable string. Only used in
* log messages and exception messages.
*/
open fun getLocationString(e: IrElement): String {
// We don't have a FileEntry to look up the offsets in, so all
// we can do is return a whole-file location. We omit the
// `:0:0:0:0` so that it is easy to distinguish from a location
// where we have actually determined the start/end lines/columns
// to be 0.
return "file://$filePath"
}
}
/**
* A `SourceFileTrapWriter` is used when not only do we know which file
* we are extracting entities from, but we also have an `IrFileEntry`
* (from an `IrFile`) which allows us to map byte offsets to line
* and column numbers.
*
* An ID for the file will be created, and if `populateFileTables` is
* true then we will also add rows to the `files` and `folders` tables
* for it.
*/
class SourceFileTrapWriter (
loggerBase: LoggerBase,
lm: TrapLabelManager,
bw: BufferedWriter,
diagnosticTrapWriter: TrapWriter?,
irFile: IrFile,
populateFileTables: Boolean) :
FileTrapWriter(loggerBase, lm, bw, diagnosticTrapWriter, irFile.path, populateFileTables) {
/**
* The file entry for the file that we are extracting from.
* Used to map offsets to line/column numbers.
*/
private val fileEntry = irFile.fileEntry
override fun getLocation(startOffset: Int, endOffset: Int): Label<DbLocation> {
if (startOffset == UNDEFINED_OFFSET || endOffset == UNDEFINED_OFFSET) {
if (startOffset != endOffset) {
loggerBase.warn(this, "Location with inconsistent offsets (start $startOffset, end $endOffset)", null)
}
return getWholeFileLocation()
}
if (startOffset == SYNTHETIC_OFFSET || endOffset == SYNTHETIC_OFFSET) {
if (startOffset != endOffset) {
loggerBase.warn(this, "Location with inconsistent offsets (start $startOffset, end $endOffset)", null)
}
return getWholeFileLocation()
}
// If this is the location for a compiler-generated element, then it will
// be a zero-width location. QL doesn't support these, so we translate it
// into a one-width location.
val endColumnOffset = if (startOffset == endOffset) 1 else 0
return getLocation(
fileId,
fileEntry.getLineNumber(startOffset) + 1,
fileEntry.getColumnNumber(startOffset) + 1,
fileEntry.getLineNumber(endOffset) + 1,
fileEntry.getColumnNumber(endOffset) + endColumnOffset)
}
override fun getLocationString(e: IrElement): String {
if (e.startOffset == UNDEFINED_OFFSET || e.endOffset == UNDEFINED_OFFSET) {
if (e.startOffset != e.endOffset) {
loggerBase.warn(this, "Location with inconsistent offsets (start ${e.startOffset}, end ${e.endOffset})", null)
}
return "<unknown location while processing $filePath>"
}
if (e.startOffset == SYNTHETIC_OFFSET || e.endOffset == SYNTHETIC_OFFSET) {
if (e.startOffset != e.endOffset) {
loggerBase.warn(this, "Location with inconsistent offsets (start ${e.startOffset}, end ${e.endOffset})", null)
}
return "<synthetic location while processing $filePath>"
}
val startLine = fileEntry.getLineNumber(e.startOffset) + 1
val startColumn = fileEntry.getColumnNumber(e.startOffset) + 1
val endLine = fileEntry.getLineNumber(e.endOffset) + 1
val endColumn = fileEntry.getColumnNumber(e.endOffset)
return "file://$filePath:$startLine:$startColumn:$endLine:$endColumn"
}
}

View File

@@ -0,0 +1,121 @@
package com.github.codeql.comments
import com.github.codeql.*
import com.github.codeql.utils.IrVisitorLookup
import com.github.codeql.utils.versions.Psi2Ir
import com.intellij.psi.PsiComment
import com.intellij.psi.PsiElement
import org.jetbrains.kotlin.ir.IrElement
import org.jetbrains.kotlin.ir.declarations.path
import org.jetbrains.kotlin.ir.declarations.IrFile
import org.jetbrains.kotlin.ir.declarations.IrValueParameter
import org.jetbrains.kotlin.kdoc.psi.api.KDoc
import org.jetbrains.kotlin.lexer.KtTokens
import org.jetbrains.kotlin.psi.KtVisitor
import org.jetbrains.kotlin.psi.psiUtil.endOffset
import org.jetbrains.kotlin.psi.psiUtil.startOffset
class CommentExtractor(private val fileExtractor: KotlinFileExtractor, private val file: IrFile, private val fileLabel: Label<out DbFile>) {
private val tw = fileExtractor.tw
private val logger = fileExtractor.logger
private val ktFile = Psi2Ir().getKtFile(file)
fun extract() {
if (ktFile == null) {
logger.warn("Comments are not being processed in ${file.path}.")
} else {
ktFile.accept(commentVisitor)
}
}
private val commentVisitor =
object : KtVisitor<Unit, Unit>() {
override fun visitElement(element: PsiElement) {
element.acceptChildren(this)
// Slightly hacky, but `visitComment` doesn't seem to visit comments with `tokenType` `KtTokens.DOC_COMMENT`
if (element is PsiComment){
visitCommentElement(element)
}
}
private fun visitCommentElement(comment: PsiComment) {
val type: CommentType = when (comment.tokenType) {
KtTokens.EOL_COMMENT -> {
CommentType.SingleLine
}
KtTokens.BLOCK_COMMENT -> {
CommentType.Block
}
KtTokens.DOC_COMMENT -> {
CommentType.Doc
}
else -> {
logger.warn("Unhandled comment token type: ${comment.tokenType}")
return
}
}
val commentLabel = tw.getFreshIdLabel<DbKtcomment>()
tw.writeKtComments(commentLabel, type.value, comment.text)
val locId = tw.getLocation(comment.startOffset, comment.endOffset)
tw.writeHasLocation(commentLabel, locId)
if (comment.tokenType != KtTokens.DOC_COMMENT) {
return
}
if (comment !is KDoc) {
logger.warn("Unexpected comment type with DocComment token type.")
return
}
for (sec in comment.getAllSections()) {
val commentSectionLabel = tw.getFreshIdLabel<DbKtcommentsection>()
tw.writeKtCommentSections(commentSectionLabel, commentLabel, sec.getContent())
val name = sec.name
if (name != null) {
tw.writeKtCommentSectionNames(commentSectionLabel, name)
}
val subjectName = sec.getSubjectName()
if (subjectName != null) {
tw.writeKtCommentSectionSubjectNames(commentSectionLabel, subjectName)
}
}
// Only storing the owner of doc comments:
val ownerPsi = getKDocOwner(comment) ?: return
val owners = mutableListOf<IrElement>()
file.accept(IrVisitorLookup(ownerPsi, file), owners)
for (ownerIr in owners) {
val ownerLabel =
if (ownerIr == file)
fileLabel
else {
if (ownerIr is IrValueParameter && ownerIr.index == -1) {
// Don't attribute comments to the implicit `this` parameter of a function.
continue
}
val label = fileExtractor.getLabel(ownerIr) ?: continue
val existingLabel = tw.getExistingLabelFor<DbTop>(label)
if (existingLabel == null) {
logger.warn("Couldn't get existing label for $label")
continue
}
existingLabel
}
tw.writeKtCommentOwners(commentLabel, ownerLabel)
}
}
private fun getKDocOwner(comment: KDoc) : PsiElement? {
val owner = comment.owner
if (owner == null) {
logger.warn("Couldn't get owner of KDoc.")
}
return owner
}
}
}

View File

@@ -0,0 +1,5 @@
package com.github.codeql.comments
enum class CommentType(val value: Int) {
SingleLine(1), Block(2), Doc(3)
}

View File

@@ -0,0 +1,43 @@
package com.github.codeql
// Functions copied from stdlib/jdk7/src/kotlin/AutoCloseable.kt, which is not available within kotlinc,
// but allows the `.use` pattern to be applied to JDK7 AutoCloseables:
/**
* Executes the given [block] function on this resource and then closes it down correctly whether an exception
* is thrown or not.
*
* In case if the resource is being closed due to an exception occurred in [block], and the closing also fails with an exception,
* the latter is added to the [suppressed][java.lang.Throwable.addSuppressed] exceptions of the former.
*
* @param block a function to process this [AutoCloseable] resource.
* @return the result of [block] function invoked on this resource.
*/
public inline fun <T : AutoCloseable?, R> T.useAC(block: (T) -> R): R {
var exception: Throwable? = null
try {
return block(this)
} catch (e: Throwable) {
exception = e
throw e
} finally {
this.closeFinallyAC(exception)
}
}
/**
* Closes this [AutoCloseable], suppressing possible exception or error thrown by [AutoCloseable.close] function when
* it's being closed due to some other [cause] exception occurred.
*
* The suppressed exception is added to the list of suppressed exceptions of [cause] exception.
*/
fun AutoCloseable?.closeFinallyAC(cause: Throwable?) = when {
this == null -> {}
cause == null -> close()
else ->
try {
close()
} catch (closeException: Throwable) {
cause.addSuppressed(closeException)
}
}

View File

@@ -0,0 +1,87 @@
package com.github.codeql
import com.intellij.openapi.vfs.StandardFileSystems
import org.jetbrains.kotlin.load.java.sources.JavaSourceElement
import org.jetbrains.kotlin.load.java.structure.impl.classFiles.BinaryJavaClass
import org.jetbrains.kotlin.load.kotlin.VirtualFileKotlinClass
import org.jetbrains.kotlin.load.kotlin.KotlinJvmBinarySourceElement
import com.intellij.openapi.vfs.VirtualFile
import org.jetbrains.kotlin.ir.declarations.*
import org.jetbrains.kotlin.ir.util.parentClassOrNull
import org.jetbrains.kotlin.load.kotlin.JvmPackagePartSource
// Adapted from Kotlin's interpreter/Utils.kt function 'internalName'
// Translates class names into their JLS section 13.1 binary name,
// and declarations within them into the parent class' JLS 13.1 name as
// specified above, followed by a `$` separator and then the short name
// for `that`.
fun getIrDeclBinaryName(that: IrDeclaration): String {
val shortName = when(that) {
is IrDeclarationWithName -> that.name.asString()
else -> "(unknown-name)"
}
val internalName = StringBuilder(shortName);
generateSequence(that.parent) { (it as? IrDeclaration)?.parent }
.forEach {
when (it) {
is IrClass -> internalName.insert(0, it.name.asString() + "$")
is IrPackageFragment -> it.fqName.asString().takeIf { it.isNotEmpty() }?.let { internalName.insert(0, "$it.") }
}
}
return internalName.toString()
}
fun getIrClassVirtualFile(irClass: IrClass): VirtualFile? {
val cSource = irClass.source
// Don't emit a location for multi-file classes until we're sure we can cope with different declarations
// inside a class disagreeing about their source file. In particular this currently causes problems when
// a source-location for a declarations tries to refer to a file-id which is assumed to be declared in
// the class trap file.
if (irClass.origin == IrDeclarationOrigin.JVM_MULTIFILE_CLASS)
return null
when(cSource) {
is JavaSourceElement -> {
val element = cSource.javaElement
when(element) {
is BinaryJavaClass -> return element.virtualFile
}
}
is KotlinJvmBinarySourceElement -> {
val binaryClass = cSource.binaryClass
when(binaryClass) {
is VirtualFileKotlinClass -> return binaryClass.file
}
}
is JvmPackagePartSource -> {
val binaryClass = cSource.knownJvmBinaryClass
if (binaryClass != null && binaryClass is VirtualFileKotlinClass) {
return binaryClass.file
}
}
}
return null
}
fun getRawIrClassBinaryPath(irClass: IrClass) =
getIrClassVirtualFile(irClass)?.let {
val path = it.path
if(it.fileSystem.protocol == StandardFileSystems.JRT_PROTOCOL)
// For JRT files, which we assume to be the JDK, hide the containing JAR path to match the Java extractor's behaviour.
"/${path.split("!/", limit = 2)[1]}"
else
path
}
fun getIrClassBinaryPath(irClass: IrClass): String {
return getRawIrClassBinaryPath(irClass)
// Otherwise, make up a fake location:
?: "/!unknown-binary-location/${getIrDeclBinaryName(irClass).replace(".", "/")}.class"
}
fun getContainingClassOrSelf(decl: IrDeclaration): IrClass? {
return when(decl) {
is IrClass -> decl
else -> decl.parentClassOrNull
}
}

View File

@@ -0,0 +1,19 @@
package com.github.codeql.utils
import org.jetbrains.kotlin.ir.declarations.IrClass
import org.jetbrains.kotlin.ir.declarations.IrDeclaration
import org.jetbrains.kotlin.ir.declarations.IrDeclarationOrigin
import org.jetbrains.kotlin.ir.util.isFileClass
import org.jetbrains.kotlin.ir.util.parentClassOrNull
fun isExternalDeclaration(d: IrDeclaration): Boolean {
return d.origin == IrDeclarationOrigin.IR_EXTERNAL_DECLARATION_STUB ||
d.origin == IrDeclarationOrigin.IR_EXTERNAL_JAVA_DECLARATION_STUB ||
d.origin.toString() == "FUNCTION_INTERFACE_CLASS" // Treat kotlin.coroutines.* like ordinary library classes
}
/**
* Returns true if `d` is not itself a class, but is a member of an external file class.
*/
fun isExternalFileClassMember(d: IrDeclaration) = d !is IrClass && (d.parentClassOrNull?.let { it.isFileClass } ?: false)

View File

@@ -0,0 +1,38 @@
package com.github.codeql.utils
import com.github.codeql.utils.versions.Psi2Ir
import com.intellij.psi.PsiElement
import org.jetbrains.kotlin.ir.IrElement
import org.jetbrains.kotlin.ir.declarations.IrDeclaration
import org.jetbrains.kotlin.ir.declarations.IrFile
import org.jetbrains.kotlin.ir.util.isFakeOverride
import org.jetbrains.kotlin.ir.visitors.IrElementVisitor
class IrVisitorLookup(private val psi: PsiElement, private val file: IrFile) :
IrElementVisitor<Unit, MutableCollection<IrElement>> {
private val location = psi.getLocation()
override fun visitElement(element: IrElement, data: MutableCollection<IrElement>): Unit {
val elementLocation = element.getLocation()
if (!location.intersects(elementLocation)) {
// No need to visit children.
return
}
if (element is IrDeclaration && element.isFakeOverride) {
// These aren't extracted, so we don't expect anything to exist
// to which we could ascribe a comment.
return
}
if (location.contains(elementLocation)) {
val psiElement = Psi2Ir().findPsiElement(element, file)
if (psiElement == psi) {
// There can be multiple IrElements that match the same PSI element.
data.add(element)
}
}
element.acceptChildren(this, data)
}
}

View File

@@ -0,0 +1,24 @@
package com.github.codeql.utils
import com.intellij.psi.PsiElement
import org.jetbrains.kotlin.ir.IrElement
import org.jetbrains.kotlin.psi.psiUtil.endOffset
import org.jetbrains.kotlin.psi.psiUtil.startOffset
data class Location(val startOffset: Int, val endOffset: Int){
fun contains(location: Location) : Boolean {
return this.startOffset <= location.startOffset && this.endOffset >= location.endOffset
}
fun intersects(location: Location): Boolean {
return this.endOffset >= location.startOffset && this.startOffset <= location.endOffset
}
}
fun IrElement.getLocation() : Location {
return Location(this.startOffset, this.endOffset)
}
fun PsiElement.getLocation() : Location {
return Location(this.startOffset, this.endOffset)
}

View File

@@ -0,0 +1,246 @@
package com.github.codeql
import java.io.File
import java.io.FileWriter
import java.io.OutputStreamWriter
import java.io.Writer
import java.text.SimpleDateFormat
import java.util.Date
import java.util.Stack
import org.jetbrains.kotlin.ir.IrElement
class LogCounter() {
public val diagnosticCounts = mutableMapOf<String, Int>()
public val diagnosticLimit: Int
init {
diagnosticLimit = System.getenv("CODEQL_EXTRACTOR_KOTLIN_DIAGNOSTIC_LIMIT")?.toIntOrNull() ?: 100
}
}
enum class Severity(val sev: Int) {
WarnLow(1),
Warn(2),
WarnHigh(3),
/** Minor extractor errors, with minimal impact on analysis. */
ErrorLow(4),
/** Most extractor errors, with local impact on analysis. */
Error(5),
/** Javac errors. */
ErrorHigh(6),
/** Severe extractor errors affecting a single source file. */
ErrorSevere(7),
/** Severe extractor errors likely to affect multiple source files. */
ErrorGlobal(8)
}
data class ExtractorContext(val kind: String, val element: IrElement, val name: String, val loc: String)
open class LoggerBase(val logCounter: LogCounter) {
val extractorContextStack = Stack<ExtractorContext>()
private val verbosity: Int
init {
verbosity = System.getenv("CODEQL_EXTRACTOR_KOTLIN_VERBOSITY")?.toIntOrNull() ?: 3
}
private val logStream: Writer
init {
val extractorLogDir = System.getenv("CODEQL_EXTRACTOR_JAVA_LOG_DIR")
if (extractorLogDir == null || extractorLogDir == "") {
logStream = OutputStreamWriter(System.out)
} else {
val logFile = File.createTempFile("kotlin-extractor.", ".log", File(extractorLogDir))
logStream = FileWriter(logFile)
}
}
private fun timestamp(): String {
return "[${SimpleDateFormat("yyyy-MM-dd HH:mm:ss").format(Date())} K]"
}
private fun getDiagnosticLocation(): String? {
val st = Exception().stackTrace
for(x in st) {
when(x.className) {
"com.github.codeql.LoggerBase",
"com.github.codeql.Logger",
"com.github.codeql.FileLogger" -> {}
else -> {
return x.toString()
}
}
}
return null
}
private var file_number = -1
private var file_number_diagnostic_number = 0
fun setFileNumber(index: Int) {
file_number = index
file_number_diagnostic_number = 0
}
fun diagnostic(tw: TrapWriter, severity: Severity, msg: String, extraInfo: String?, locationString: String? = null, mkLocationId: () -> Label<DbLocation> = { tw.unknownLocation }) {
val diagnosticLoc = getDiagnosticLocation()
val diagnosticLocStr = if(diagnosticLoc == null) "<unknown location>" else diagnosticLoc
val extraInfoStr = if (extraInfo == null) "" else (extraInfo + "\n")
val suffix =
if(diagnosticLoc == null) {
" Missing caller information.\n"
} else {
val count = logCounter.diagnosticCounts.getOrDefault(diagnosticLoc, 0) + 1
logCounter.diagnosticCounts[diagnosticLoc] = count
when {
logCounter.diagnosticLimit <= 0 -> ""
count == logCounter.diagnosticLimit -> " Limit reached for diagnostics from $diagnosticLoc.\n"
count > logCounter.diagnosticLimit -> return
else -> ""
}
}
val fullMsgBuilder = StringBuilder()
fullMsgBuilder.append(msg)
fullMsgBuilder.append('\n')
fullMsgBuilder.append(extraInfoStr)
val iter = extractorContextStack.listIterator(extractorContextStack.size)
while (iter.hasPrevious()) {
val x = iter.previous()
fullMsgBuilder.append(" ...while extracting a ${x.kind} (${x.name}) at ${x.loc}\n")
}
fullMsgBuilder.append(suffix)
val fullMsg = fullMsgBuilder.toString()
val ts = timestamp()
// We don't actually make the location until after the `return` above
val locationId = mkLocationId()
val diagLabel = tw.getFreshIdLabel<DbDiagnostic>()
tw.writeDiagnostics(diagLabel, "CodeQL Kotlin extractor", severity.sev, "", msg, "$ts $fullMsg", locationId)
tw.writeDiagnostic_for(diagLabel, StringLabel("compilation"), file_number, file_number_diagnostic_number++)
val locStr = if (locationString == null) "" else "At " + locationString + ": "
val kind = if (severity <= Severity.WarnHigh) "WARN" else "ERROR"
logStream.write("$ts [$kind] Diagnostic($diagnosticLocStr): $locStr$fullMsg")
}
fun trace(tw: TrapWriter, msg: String) {
if (verbosity >= 4) {
val fullMsg = "${timestamp()} [TRACE] $msg"
tw.writeComment(fullMsg)
logStream.write(fullMsg + "\n")
}
}
fun debug(tw: TrapWriter, msg: String) {
if (verbosity >= 4) {
val fullMsg = "${timestamp()} [DEBUG] $msg"
tw.writeComment(fullMsg)
logStream.write(fullMsg + "\n")
}
}
fun info(tw: TrapWriter, msg: String) {
if (verbosity >= 3) {
val fullMsg = "${timestamp()} [INFO] $msg"
tw.writeComment(fullMsg)
logStream.write(fullMsg + "\n")
}
}
fun warn(tw: TrapWriter, msg: String, extraInfo: String?) {
if (verbosity >= 2) {
diagnostic(tw, Severity.Warn, msg, extraInfo)
}
}
fun error(tw: TrapWriter, msg: String, extraInfo: String?) {
if (verbosity >= 1) {
diagnostic(tw, Severity.Error, msg, extraInfo)
}
}
fun printLimitedDiagnosticCounts(tw: TrapWriter) {
for((caller, count) in logCounter.diagnosticCounts) {
if(count >= logCounter.diagnosticLimit) {
val msg = "Total of $count diagnostics from $caller.\n"
tw.writeComment(msg)
logStream.write(msg)
}
}
}
fun flush() {
logStream.flush()
}
fun close() {
logStream.close()
}
}
open class Logger(val loggerBase: LoggerBase, open val tw: TrapWriter) {
private fun getDiagnosticLocation(): String? {
val st = Exception().stackTrace
for(x in st) {
when(x.className) {
"com.github.codeql.Logger",
"com.github.codeql.FileLogger" -> {}
else -> {
return x.toString()
}
}
}
return null
}
fun flush() {
tw.flush()
loggerBase.flush()
}
fun trace(msg: String) {
loggerBase.trace(tw, msg)
}
fun trace(msg: String, exn: Throwable) {
trace(msg + "\n" + exn.stackTraceToString())
}
fun debug(msg: String) {
loggerBase.debug(tw, msg)
}
fun info(msg: String) {
loggerBase.info(tw, msg)
}
fun warn(msg: String, extraInfo: String?) {
loggerBase.warn(tw, msg, extraInfo)
}
fun warn(msg: String, exn: Throwable) {
warn(msg, exn.stackTraceToString())
}
fun warn(msg: String) {
warn(msg, null)
}
fun error(msg: String, extraInfo: String?) {
loggerBase.error(tw, msg, extraInfo)
}
fun error(msg: String) {
error(msg, null)
}
fun error(msg: String, exn: Throwable) {
error(msg, exn.stackTraceToString())
}
}
class FileLogger(loggerBase: LoggerBase, override val tw: FileTrapWriter): Logger(loggerBase, tw) {
fun warnElement(msg: String, element: IrElement) {
val locationString = tw.getLocationString(element)
val mkLocationId = { tw.getLocation(element) }
loggerBase.diagnostic(tw, Severity.Warn, msg, null, locationString, mkLocationId)
}
fun errorElement(msg: String, element: IrElement) {
val locationString = tw.getLocationString(element)
val mkLocationId = { tw.getLocation(element) }
loggerBase.diagnostic(tw, Severity.Error, msg, null, locationString, mkLocationId)
}
}

View File

@@ -0,0 +1,221 @@
package com.github.codeql.utils
import com.github.codeql.KotlinUsesExtractor
import org.jetbrains.kotlin.backend.common.extensions.IrPluginContext
import org.jetbrains.kotlin.backend.common.ir.createImplicitParameterDeclarationWithWrappedDescriptor
import org.jetbrains.kotlin.descriptors.ClassKind
import org.jetbrains.kotlin.ir.builders.declarations.addConstructor
import org.jetbrains.kotlin.ir.builders.declarations.buildClass
import org.jetbrains.kotlin.ir.declarations.IrClass
import org.jetbrains.kotlin.ir.declarations.IrTypeParameter
import org.jetbrains.kotlin.ir.declarations.IrTypeParametersContainer
import org.jetbrains.kotlin.ir.declarations.impl.IrExternalPackageFragmentImpl
import org.jetbrains.kotlin.ir.declarations.impl.IrFactoryImpl
import org.jetbrains.kotlin.ir.expressions.IrConstructorCall
import org.jetbrains.kotlin.ir.expressions.impl.IrConstructorCallImpl
import org.jetbrains.kotlin.ir.symbols.IrTypeParameterSymbol
import org.jetbrains.kotlin.ir.symbols.impl.DescriptorlessExternalPackageFragmentSymbol
import org.jetbrains.kotlin.ir.types.*
import org.jetbrains.kotlin.ir.types.impl.IrSimpleTypeImpl
import org.jetbrains.kotlin.ir.types.impl.IrStarProjectionImpl
import org.jetbrains.kotlin.ir.types.impl.makeTypeProjection
import org.jetbrains.kotlin.ir.util.constructedClassType
import org.jetbrains.kotlin.ir.util.constructors
import org.jetbrains.kotlin.ir.util.parentAsClass
import org.jetbrains.kotlin.name.FqName
import org.jetbrains.kotlin.name.Name
import org.jetbrains.kotlin.types.Variance
fun IrType.substituteTypeArguments(params: List<IrTypeParameter>, arguments: List<IrTypeArgument>) =
when(this) {
is IrSimpleType -> substituteTypeArguments(params.map { it.symbol }.zip(arguments).toMap())
else -> this
}
/**
* Returns true if substituting `innerVariance T` into the context `outerVariance []` discards all knowledge about
* what T could be.
*
* Note this throws away slightly more information than it could: for example, the projection "in (out List)" can refer to
* any superclass of anything that implements List, which specifically excludes e.g. String, but can't be represented as
* a type projection. The projection "out (in List)" on the other hand really is equivalent to "out Any?", which is to
* say no bound at all.
*/
private fun conflictingVariance(outerVariance: Variance, innerVariance: Variance) =
(outerVariance == Variance.IN_VARIANCE && innerVariance == Variance.OUT_VARIANCE) ||
(outerVariance == Variance.OUT_VARIANCE && innerVariance == Variance.IN_VARIANCE)
/**
* When substituting `innerVariance T` into the context `outerVariance []`, returns the variance part of the result
* `resultVariance T`. We already know they don't conflict.
*/
private fun combineVariance(outerVariance: Variance, innerVariance: Variance) =
when {
outerVariance != Variance.INVARIANT -> outerVariance
innerVariance != Variance.INVARIANT -> innerVariance
else -> Variance.INVARIANT
}
private fun subProjectedType(substitutionMap: Map<IrTypeParameterSymbol, IrTypeArgument>, t: IrSimpleType, outerVariance: Variance): IrTypeArgument =
substitutionMap[t.classifier]?.let { substitutedTypeArg ->
if (substitutedTypeArg is IrTypeProjection) {
if (conflictingVariance(outerVariance, substitutedTypeArg.variance))
IrStarProjectionImpl
else {
val newProjectedType = substitutedTypeArg.type.let { if (t.hasQuestionMark) it.withHasQuestionMark(true) else it }
val newVariance = combineVariance(outerVariance, substitutedTypeArg.variance)
makeTypeProjection(newProjectedType, newVariance)
}
} else {
substitutedTypeArg
}
} ?: makeTypeProjection(t.substituteTypeArguments(substitutionMap), outerVariance)
fun IrSimpleType.substituteTypeArguments(substitutionMap: Map<IrTypeParameterSymbol, IrTypeArgument>): IrSimpleType {
if (substitutionMap.isEmpty()) return this
val newArguments = arguments.map {
if (it is IrTypeProjection) {
val itType = it.type
if (itType is IrSimpleType) {
subProjectedType(substitutionMap, itType, it.variance)
} else {
it
}
} else {
it
}
}
return IrSimpleTypeImpl(
classifier,
hasQuestionMark,
newArguments,
annotations
)
}
fun IrTypeArgument.upperBound(context: IrPluginContext) =
when(this) {
is IrStarProjection -> context.irBuiltIns.anyNType
is IrTypeProjection -> when(this.variance) {
Variance.INVARIANT -> this.type
Variance.IN_VARIANCE -> if (this.type.isNullable()) context.irBuiltIns.anyNType else context.irBuiltIns.anyType
Variance.OUT_VARIANCE -> this.type
}
else -> context.irBuiltIns.anyNType
}
fun IrTypeArgument.lowerBound(context: IrPluginContext) =
when(this) {
is IrStarProjection -> context.irBuiltIns.nothingType
is IrTypeProjection -> when(this.variance) {
Variance.INVARIANT -> this.type
Variance.IN_VARIANCE -> this.type
Variance.OUT_VARIANCE -> if (this.type.isNullable()) context.irBuiltIns.nothingNType else context.irBuiltIns.nothingType
}
else -> context.irBuiltIns.nothingType
}
fun IrType.substituteTypeAndArguments(substitutionMap: Map<IrTypeParameterSymbol, IrTypeArgument>?, useContext: KotlinUsesExtractor.TypeContext, pluginContext: IrPluginContext): IrType =
substitutionMap?.let { substMap ->
this.classifierOrNull?.let { typeClassifier ->
substMap[typeClassifier]?.let {
when(useContext) {
KotlinUsesExtractor.TypeContext.RETURN -> it.upperBound(pluginContext)
else -> it.lowerBound(pluginContext)
}
} ?: (this as IrSimpleType).substituteTypeArguments(substMap)
} ?: this
} ?: this
object RawTypeAnnotation {
// Much of this is taken from JvmGeneratorExtensionsImpl.kt, which is not easily accessible in plugin context.
// The constants "kotlin.internal.ir" and "RawType" could be referred to symbolically, but they move package
// between different versions of the Kotlin compiler.
val annotationConstructor: IrConstructorCall by lazy {
val irInternalPackage = FqName("kotlin.internal.ir")
val parent = IrExternalPackageFragmentImpl(
DescriptorlessExternalPackageFragmentSymbol(),
irInternalPackage
)
val annoClass = IrFactoryImpl.buildClass {
kind = ClassKind.ANNOTATION_CLASS
name = irInternalPackage.child(Name.identifier("RawType")).shortName()
}.apply {
createImplicitParameterDeclarationWithWrappedDescriptor()
this.parent = parent
addConstructor {
isPrimary = true
}
}
val constructor = annoClass.constructors.single()
IrConstructorCallImpl.fromSymbolOwner(
constructor.constructedClassType,
constructor.symbol
)
}
}
fun IrType.toRawType(): IrType =
when(this) {
is IrSimpleType -> {
when(val owner = this.classifier.owner) {
is IrClass -> {
if (this.arguments.isNotEmpty())
this.addAnnotations(listOf(RawTypeAnnotation.annotationConstructor))
else
this
}
is IrTypeParameter -> owner.superTypes[0].toRawType()
else -> this
}
}
else -> this
}
fun IrClass.toRawType(): IrType {
val result = this.typeWith(listOf())
return if (this.typeParameters.isNotEmpty())
result.addAnnotations(listOf(RawTypeAnnotation.annotationConstructor))
else
result
}
fun IrTypeArgument.withQuestionMark(b: Boolean): IrTypeArgument =
when(this) {
is IrStarProjection -> this
is IrTypeProjection ->
this.type.let { when(it) {
is IrSimpleType -> if (it.hasQuestionMark == b) this else makeTypeProjection(it.withHasQuestionMark(b), this.variance)
else -> this
}}
else -> this
}
typealias TypeSubstitution = (IrType, KotlinUsesExtractor.TypeContext, IrPluginContext) -> IrType
// Returns true if type is C<T1, T2, ...> where C is declared `class C<T1, T2, ...> { ... }`
fun isUnspecialised(paramsContainer: IrTypeParametersContainer, args: List<IrTypeArgument>): Boolean {
val unspecialisedHere = paramsContainer.typeParameters.zip(args).all { paramAndArg ->
(paramAndArg.second as? IrTypeProjection)?.let {
// Type arg refers to the class' own type parameter?
it.variance == Variance.INVARIANT &&
it.type.classifierOrNull?.owner === paramAndArg.first
} ?: false
}
val remainingArgs = args.drop(paramsContainer.typeParameters.size)
val parent = paramsContainer.parent as? IrTypeParametersContainer
val parentUnspecialised = when {
remainingArgs.isEmpty() -> true
parent == null -> false
parent !is IrClass -> false
else -> isUnspecialised(paramsContainer.parentAsClass, remainingArgs)
}
return unspecialisedHere && parentUnspecialised
}
// Returns true if type is C<T1, T2, ...> where C is declared `class C<T1, T2, ...> { ... }`
fun isUnspecialised(type: IrSimpleType) = (type.classifier.owner as? IrClass)?.let {
isUnspecialised(it, type.arguments)
} ?: false

View File

@@ -0,0 +1,11 @@
package com.github.codeql.utils.versions
import com.intellij.psi.PsiElement
import org.jetbrains.kotlin.ir.IrElement
import org.jetbrains.kotlin.ir.declarations.IrFile
import org.jetbrains.kotlin.psi.KtFile
interface Psi2IrFacade {
fun getKtFile(irFile: IrFile): KtFile?
fun findPsiElement(irElement: IrElement, irFile: IrFile): PsiElement?
}

View File

@@ -0,0 +1,17 @@
package com.github.codeql.utils.versions
import com.github.codeql.KotlinUsesExtractor
import com.github.codeql.Severity
import org.jetbrains.kotlin.ir.ObsoleteDescriptorBasedAPI
import org.jetbrains.kotlin.ir.util.DeclarationStubGenerator
import org.jetbrains.kotlin.ir.util.SymbolTable
@OptIn(ObsoleteDescriptorBasedAPI::class)
fun <TIrStub> KotlinUsesExtractor.getIrStubFromDescriptor(generateStub: (DeclarationStubGenerator) -> TIrStub) : TIrStub? =
(pluginContext.symbolTable as? SymbolTable) ?.let {
val stubGenerator = DeclarationStubGenerator(pluginContext.moduleDescriptor, it, pluginContext.languageVersionSettings)
generateStub(stubGenerator)
} ?: run {
logger.error("Plugin context has no symbol table, couldn't get IR stub")
null
}

View File

@@ -0,0 +1,5 @@
package com.github.codeql.utils.versions
import org.jetbrains.kotlin.ir.SourceManager
typealias FileEntry = SourceManager.FileEntry

View File

@@ -0,0 +1,8 @@
package com.github.codeql.utils.versions
import org.jetbrains.kotlin.backend.common.extensions.IrPluginContext
import org.jetbrains.kotlin.ir.declarations.IrClass
fun functionN(pluginContext: IrPluginContext): (Int) -> IrClass {
return { i -> pluginContext.irBuiltIns.functionFactory.functionN(i) }
}

View File

@@ -0,0 +1,21 @@
package com.github.codeql.utils.versions
import com.intellij.psi.PsiElement
import org.jetbrains.kotlin.ir.IrElement
import org.jetbrains.kotlin.ir.declarations.IrFile
import org.jetbrains.kotlin.psi.KtFile
import org.jetbrains.kotlin.psi2ir.PsiSourceManager
class Psi2Ir : Psi2IrFacade {
companion object {
val psiManager = PsiSourceManager()
}
override fun getKtFile(irFile: IrFile): KtFile? {
return psiManager.getKtFile(irFile)
}
override fun findPsiElement(irElement: IrElement, irFile: IrFile): PsiElement? {
return psiManager.findPsiElement(irElement, irFile)
}
}

View File

@@ -0,0 +1,7 @@
package com.github.codeql.utils.versions
import org.jetbrains.kotlin.backend.jvm.codegen.isRawType
import org.jetbrains.kotlin.ir.types.IrSimpleType
fun IrSimpleType.isRawType() = this.isRawType()

View File

@@ -0,0 +1,18 @@
package com.github.codeql.utils.versions
import com.github.codeql.KotlinUsesExtractor
import com.github.codeql.Severity
import org.jetbrains.kotlin.ir.ObsoleteDescriptorBasedAPI
import org.jetbrains.kotlin.ir.util.DeclarationStubGenerator
import org.jetbrains.kotlin.ir.util.SymbolTable
import org.jetbrains.kotlin.psi2ir.generators.DeclarationStubGeneratorImpl
@OptIn(ObsoleteDescriptorBasedAPI::class)
fun <TIrStub> KotlinUsesExtractor.getIrStubFromDescriptor(generateStub: (DeclarationStubGenerator) -> TIrStub) : TIrStub? =
(pluginContext.symbolTable as? SymbolTable) ?.let {
val stubGenerator = DeclarationStubGeneratorImpl(pluginContext.moduleDescriptor, it, pluginContext.languageVersionSettings)
generateStub(stubGenerator)
} ?: run {
logger.error("Plugin context has no symbol table, couldn't get IR stub")
null
}

View File

@@ -0,0 +1,5 @@
package com.github.codeql.utils.versions
import org.jetbrains.kotlin.ir.IrFileEntry
typealias FileEntry = IrFileEntry

View File

@@ -0,0 +1,8 @@
package com.github.codeql.utils.versions
import org.jetbrains.kotlin.backend.common.extensions.IrPluginContext
import org.jetbrains.kotlin.ir.declarations.IrClass
fun functionN(pluginContext: IrPluginContext): (Int) -> IrClass {
return { i -> pluginContext.irBuiltIns.functionFactory.functionN(i) }
}

View File

@@ -0,0 +1,18 @@
package com.github.codeql.utils.versions
import com.intellij.psi.PsiElement
import org.jetbrains.kotlin.backend.common.psi.PsiSourceManager
import org.jetbrains.kotlin.backend.jvm.ir.getKtFile
import org.jetbrains.kotlin.ir.IrElement
import org.jetbrains.kotlin.ir.declarations.IrFile
import org.jetbrains.kotlin.psi.KtFile
class Psi2Ir: Psi2IrFacade {
override fun getKtFile(irFile: IrFile): KtFile? {
return irFile.getKtFile()
}
override fun findPsiElement(irElement: IrElement, irFile: IrFile): PsiElement? {
return PsiSourceManager.findPsiElement(irElement, irFile)
}
}

View File

@@ -0,0 +1,7 @@
package com.github.codeql.utils.versions
import org.jetbrains.kotlin.backend.jvm.codegen.isRawType
import org.jetbrains.kotlin.ir.types.IrSimpleType
fun IrSimpleType.isRawType() = this.isRawType()

View File

@@ -0,0 +1,18 @@
package com.github.codeql.utils.versions
import com.github.codeql.KotlinUsesExtractor
import com.github.codeql.Severity
import org.jetbrains.kotlin.ir.ObsoleteDescriptorBasedAPI
import org.jetbrains.kotlin.ir.util.DeclarationStubGenerator
import org.jetbrains.kotlin.ir.util.SymbolTable
import org.jetbrains.kotlin.psi2ir.generators.DeclarationStubGeneratorImpl
@OptIn(ObsoleteDescriptorBasedAPI::class)
fun <TIrStub> KotlinUsesExtractor.getIrStubFromDescriptor(generateStub: (DeclarationStubGenerator) -> TIrStub) : TIrStub? =
(pluginContext.symbolTable as? SymbolTable) ?.let {
val stubGenerator = DeclarationStubGeneratorImpl(pluginContext.moduleDescriptor, it, pluginContext.irBuiltIns)
generateStub(stubGenerator)
} ?: run {
logger.error("Plugin context has no symbol table, couldn't get IR stub")
null
}

View File

@@ -0,0 +1,5 @@
package com.github.codeql.utils.versions
import org.jetbrains.kotlin.ir.IrFileEntry
typealias FileEntry = IrFileEntry

View File

@@ -0,0 +1,5 @@
package com.github.codeql.utils.versions
import org.jetbrains.kotlin.backend.common.extensions.IrPluginContext
fun functionN(pluginContext: IrPluginContext) = pluginContext.irBuiltIns::functionN

View File

@@ -0,0 +1,18 @@
package com.github.codeql.utils.versions
import com.intellij.psi.PsiElement
import org.jetbrains.kotlin.backend.common.psi.PsiSourceManager
import org.jetbrains.kotlin.backend.jvm.ir.getKtFile
import org.jetbrains.kotlin.ir.IrElement
import org.jetbrains.kotlin.ir.declarations.IrFile
import org.jetbrains.kotlin.psi.KtFile
class Psi2Ir: Psi2IrFacade {
override fun getKtFile(irFile: IrFile): KtFile? {
return irFile.getKtFile()
}
override fun findPsiElement(irElement: IrElement, irFile: IrFile): PsiElement? {
return PsiSourceManager.findPsiElement(irElement, irFile)
}
}

View File

@@ -0,0 +1,7 @@
package com.github.codeql.utils.versions
import org.jetbrains.kotlin.backend.jvm.codegen.isRawType
import org.jetbrains.kotlin.ir.types.IrSimpleType
fun IrSimpleType.isRawType() = this.isRawType()

View File

@@ -0,0 +1,18 @@
package com.github.codeql.utils.versions
import com.github.codeql.KotlinUsesExtractor
import com.github.codeql.Severity
import org.jetbrains.kotlin.ir.ObsoleteDescriptorBasedAPI
import org.jetbrains.kotlin.ir.util.DeclarationStubGenerator
import org.jetbrains.kotlin.ir.util.SymbolTable
import org.jetbrains.kotlin.psi2ir.generators.DeclarationStubGeneratorImpl
@OptIn(ObsoleteDescriptorBasedAPI::class)
fun <TIrStub> KotlinUsesExtractor.getIrStubFromDescriptor(generateStub: (DeclarationStubGenerator) -> TIrStub) : TIrStub? =
(pluginContext.symbolTable as? SymbolTable) ?.let {
val stubGenerator = DeclarationStubGeneratorImpl(pluginContext.moduleDescriptor, it, pluginContext.irBuiltIns)
generateStub(stubGenerator)
} ?: run {
logger.error("Plugin context has no symbol table, couldn't get IR stub")
null
}

View File

@@ -0,0 +1,5 @@
package com.github.codeql.utils.versions
import org.jetbrains.kotlin.ir.IrFileEntry
typealias FileEntry = IrFileEntry

View File

@@ -0,0 +1,5 @@
package com.github.codeql.utils.versions
import org.jetbrains.kotlin.backend.common.extensions.IrPluginContext
fun functionN(pluginContext: IrPluginContext) = pluginContext.irBuiltIns::functionN

View File

@@ -0,0 +1,18 @@
package com.github.codeql.utils.versions
import com.intellij.psi.PsiElement
import org.jetbrains.kotlin.backend.common.psi.PsiSourceManager
import org.jetbrains.kotlin.backend.jvm.ir.getKtFile
import org.jetbrains.kotlin.ir.IrElement
import org.jetbrains.kotlin.ir.declarations.IrFile
import org.jetbrains.kotlin.psi.KtFile
class Psi2Ir: Psi2IrFacade {
override fun getKtFile(irFile: IrFile): KtFile? {
return irFile.getKtFile()
}
override fun findPsiElement(irElement: IrElement, irFile: IrFile): PsiElement? {
return PsiSourceManager.findPsiElement(irElement, irFile)
}
}

View File

@@ -0,0 +1,7 @@
package com.github.codeql.utils.versions
import org.jetbrains.kotlin.backend.jvm.ir.isRawType
import org.jetbrains.kotlin.ir.types.IrSimpleType
fun IrSimpleType.isRawType() = this.isRawType()

View File

@@ -0,0 +1 @@
com.github.codeql.KotlinExtractorCommandLineProcessor

Some files were not shown because too many files have changed in this diff Show More