Merge pull request #10110 from igfoo/igfoo/compression

Kotlin: Add support for TRAP compression
This commit is contained in:
Ian Lynagh
2022-08-24 10:37:20 +01:00
committed by GitHub
3 changed files with 195 additions and 28 deletions

View File

@@ -5,11 +5,20 @@ import org.jetbrains.kotlin.backend.common.extensions.IrPluginContext
import org.jetbrains.kotlin.ir.declarations.*
import org.jetbrains.kotlin.ir.util.*
import org.jetbrains.kotlin.ir.IrElement
import java.io.BufferedReader
import java.io.BufferedWriter
import java.io.BufferedInputStream
import java.io.BufferedOutputStream
import java.io.File
import java.io.FileInputStream
import java.io.FileOutputStream
import java.io.InputStreamReader
import java.io.OutputStreamWriter
import java.lang.management.*
import java.nio.file.Files
import java.nio.file.Paths
import java.util.zip.GZIPInputStream
import java.util.zip.GZIPOutputStream
import com.semmle.util.files.FileUtil
import kotlin.system.exitProcess
@@ -89,8 +98,29 @@ class KotlinExtractorExtension(
val startTimeMs = System.currentTimeMillis()
// This default should be kept in sync with com.semmle.extractor.java.interceptors.KotlinInterceptor.initializeExtractionContext
val trapDir = File(System.getenv("CODEQL_EXTRACTOR_JAVA_TRAP_DIR").takeUnless { it.isNullOrEmpty() } ?: "kotlin-extractor/trap")
val compression_env_var = "CODEQL_EXTRACTOR_JAVA_OPTION_TRAP_COMPRESSION"
val compression_option = System.getenv(compression_env_var)
val defaultCompression = Compression.GZIP
val (compression, compressionWarning) =
if (compression_option == null) {
Pair(defaultCompression, null)
} else {
try {
@OptIn(kotlin.ExperimentalStdlibApi::class) // Annotation required by kotlin versions < 1.5
val requested_compression = Compression.valueOf(compression_option.uppercase())
if (requested_compression == Compression.BROTLI) {
Pair(Compression.GZIP, "Kotlin extractor doesn't support Brotli compression. Using GZip instead.")
} else {
Pair(requested_compression, null)
}
} catch (e: IllegalArgumentException) {
Pair(defaultCompression,
"Unsupported compression type (\$$compression_env_var) \"$compression_option\". Supported values are ${Compression.values().joinToString()}")
}
}
// The invocation TRAP file will already have been started
// before the plugin is run, so we open it in append mode.
// before the plugin is run, so we always use no compression
// and we open it in append mode.
FileOutputStream(File(invocationTrapFile), true).bufferedWriter().use { invocationTrapFileBW ->
val invocationExtractionProblems = ExtractionProblems()
val lm = TrapLabelManager()
@@ -113,6 +143,10 @@ class KotlinExtractorExtension(
if (System.getenv("CODEQL_EXTRACTOR_JAVA_KOTLIN_DUMP") == "true") {
logger.info("moduleFragment:\n" + moduleFragment.dump())
}
if (compressionWarning != null) {
logger.warn(compressionWarning)
}
val primitiveTypeMapping = PrimitiveTypeMapping(logger, pluginContext)
// FIXME: FileUtil expects a static global logger
// which should be provided by SLF4J's factory facility. For now we set it here.
@@ -125,7 +159,7 @@ class KotlinExtractorExtension(
val fileTrapWriter = tw.makeSourceFileTrapWriter(file, true)
loggerBase.setFileNumber(index)
fileTrapWriter.writeCompilation_compiling_files(compilation, index, fileTrapWriter.fileId)
doFile(fileExtractionProblems, invocationTrapFile, fileTrapWriter, checkTrapIdentical, loggerBase, trapDir, srcDir, file, primitiveTypeMapping, pluginContext, globalExtensionState)
doFile(compression, fileExtractionProblems, invocationTrapFile, fileTrapWriter, checkTrapIdentical, loggerBase, trapDir, srcDir, file, primitiveTypeMapping, pluginContext, globalExtensionState)
fileTrapWriter.writeCompilation_compiling_files_completed(compilation, index, fileExtractionProblems.extractionResult())
}
loggerBase.printLimitedDiagnosticCounts(tw)
@@ -218,12 +252,12 @@ This function determines whether 2 TRAP files should be considered to be
equivalent. It returns `true` iff all of their non-comment lines are
identical.
*/
private fun equivalentTrap(f1: File, f2: File): Boolean {
f1.bufferedReader().use { bw1 ->
f2.bufferedReader().use { bw2 ->
private fun equivalentTrap(r1: BufferedReader, r2: BufferedReader): Boolean {
r1.use { br1 ->
r2.use { br2 ->
while(true) {
val l1 = bw1.readLine()
val l2 = bw2.readLine()
val l1 = br1.readLine()
val l2 = br2.readLine()
if (l1 == null && l2 == null) {
return true
} else if (l1 == null || l2 == null) {
@@ -239,6 +273,7 @@ private fun equivalentTrap(f1: File, f2: File): Boolean {
}
private fun doFile(
compression: Compression,
fileExtractionProblems: FileExtractionProblems,
invocationTrapFile: String,
fileTrapWriter: FileTrapWriter,
@@ -270,15 +305,14 @@ private fun doFile(
}
srcTmpFile.renameTo(dbSrcFilePath.toFile())
val trapFile = File("$dbTrapDir/$srcFilePath.trap")
val trapFileDir = trapFile.parentFile
trapFileDir.mkdirs()
val trapFileName = "$dbTrapDir/$srcFilePath.trap"
val trapFileWriter = getTrapFileWriter(compression, logger, trapFileName)
if (checkTrapIdentical || !trapFile.exists()) {
val trapTmpFile = File.createTempFile("$srcFilePath.", ".trap.tmp", trapFileDir)
if (checkTrapIdentical || !trapFileWriter.exists()) {
trapFileWriter.makeParentDirectory()
try {
trapTmpFile.bufferedWriter().use { trapFileBW ->
trapFileWriter.getTempWriter().use { trapFileBW ->
// We want our comments to be the first thing in the file,
// so start off with a mere TrapWriter
val tw = TrapWriter(loggerBase, TrapLabelManager(), trapFileBW, fileTrapWriter)
@@ -294,31 +328,114 @@ private fun doFile(
externalDeclExtractor.extractExternalClasses()
}
if (checkTrapIdentical && trapFile.exists()) {
if (equivalentTrap(trapTmpFile, trapFile)) {
if (!trapTmpFile.delete()) {
logger.warn("Failed to delete $trapTmpFile")
if (checkTrapIdentical && trapFileWriter.exists()) {
if (equivalentTrap(trapFileWriter.getTempReader(), trapFileWriter.getRealReader())) {
trapFileWriter.deleteTemp()
} else {
trapFileWriter.renameTempToDifferent()
}
} else {
val trapDifferentFile = File.createTempFile("$srcFilePath.", ".trap.different", dbTrapDir)
if (trapTmpFile.renameTo(trapDifferentFile)) {
logger.warn("TRAP difference: $trapFile vs $trapDifferentFile")
} else {
logger.warn("Failed to rename $trapTmpFile to $trapFile")
}
}
} else {
if (!trapTmpFile.renameTo(trapFile)) {
logger.warn("Failed to rename $trapTmpFile to $trapFile")
}
trapFileWriter.renameTempToReal()
}
// We catch Throwable rather than Exception, as we want to
// continue trying to extract everything else even if we get a
// stack overflow or an assertion failure in one file.
} catch (e: Throwable) {
logger.error("Failed to extract '$srcFilePath'. Partial TRAP file location is $trapTmpFile", e)
logger.error("Failed to extract '$srcFilePath'. " + trapFileWriter.debugInfo(), e)
context.clear()
fileExtractionProblems.setNonRecoverableProblem()
}
}
}
enum class Compression { NONE, GZIP, BROTLI }
private fun getTrapFileWriter(compression: Compression, logger: FileLogger, trapFileName: String): TrapFileWriter {
return when (compression) {
Compression.NONE -> NonCompressedTrapFileWriter(logger, trapFileName)
Compression.GZIP -> GZipCompressedTrapFileWriter(logger, trapFileName)
Compression.BROTLI -> throw Exception("Brotli compression is not supported by the Kotlin extractor")
}
}
private abstract class TrapFileWriter(val logger: FileLogger, trapName: String, val extension: String) {
private val realFile = File(trapName + extension)
private val parentDir = realFile.parentFile
lateinit private var tempFile: File
fun debugInfo(): String {
if (this::tempFile.isInitialized) {
return "Partial TRAP file location is $tempFile"
} else {
return "Temporary file not yet created."
}
}
fun makeParentDirectory() {
parentDir.mkdirs()
}
fun exists(): Boolean {
return realFile.exists()
}
abstract protected fun getReader(file: File): BufferedReader
abstract protected fun getWriter(file: File): BufferedWriter
fun getRealReader(): BufferedReader {
return getReader(realFile)
}
fun getTempReader(): BufferedReader {
return getReader(tempFile)
}
fun getTempWriter(): BufferedWriter {
if (this::tempFile.isInitialized) {
logger.error("Temp writer reinitiailised for $realFile")
}
tempFile = File.createTempFile(realFile.getName() + ".", ".trap.tmp" + extension, parentDir)
return getWriter(tempFile)
}
fun deleteTemp() {
if (!tempFile.delete()) {
logger.warn("Failed to delete $tempFile")
}
}
fun renameTempToDifferent() {
val trapDifferentFile = File.createTempFile(realFile.getName() + ".", ".trap.different" + extension, parentDir)
if (tempFile.renameTo(trapDifferentFile)) {
logger.warn("TRAP difference: $realFile vs $trapDifferentFile")
} else {
logger.warn("Failed to rename $tempFile to $realFile")
}
}
fun renameTempToReal() {
if (!tempFile.renameTo(realFile)) {
logger.warn("Failed to rename $tempFile to $realFile")
}
}
}
private class NonCompressedTrapFileWriter(logger: FileLogger, trapName: String): TrapFileWriter(logger, trapName, "") {
override protected fun getReader(file: File): BufferedReader {
return file.bufferedReader()
}
override protected fun getWriter(file: File): BufferedWriter {
return file.bufferedWriter()
}
}
private class GZipCompressedTrapFileWriter(logger: FileLogger, trapName: String): TrapFileWriter(logger, trapName, ".gz") {
override protected fun getReader(file: File): BufferedReader {
return BufferedReader(InputStreamReader(GZIPInputStream(BufferedInputStream(FileInputStream(file)))))
}
override protected fun getWriter(file: File): BufferedWriter {
return BufferedWriter(OutputStreamWriter(GZIPOutputStream(BufferedOutputStream(FileOutputStream(file)))))
}
}

View File

@@ -0,0 +1,2 @@
class Test {
}

View File

@@ -0,0 +1,48 @@
from create_database_utils import *
def check_extension(directory, expected_extension):
if expected_extension == '.trap':
# We start TRAP files with a comment
expected_start = b'//'
elif expected_extension == '.trap.gz':
# The GZip magic numbers
expected_start = b'\x1f\x8b'
else:
raise Exception('Unknown expected extension ' + expected_extension)
count = check_extension_worker(directory, expected_extension, expected_start)
if count != 1:
raise Exception('Expected 1 relevant file, but found ' + str(count) + ' in ' + directory)
def check_extension_worker(directory, expected_extension, expected_start):
count = 0
for f in os.listdir(directory):
x = os.path.join(directory, f)
if os.path.isdir(x):
count += check_extension_worker(x, expected_extension, expected_start)
else:
if f.startswith('test.kt') and not f.endswith('.set'):
if f.endswith(expected_extension):
with open(x, 'rb') as f_in:
content = f_in.read()
if content.startswith(expected_start):
count += 1
else:
raise Exception('Unexpected start to content of ' + x)
else:
raise Exception('Expected test.kt TRAP file to have extension ' + expected_extension + ', but found ' + x)
return count
run_codeql_database_create(['kotlinc test.kt'], test_db="default-db", db=None, lang="java")
check_extension('default-db/trap', '.trap.gz')
os.environ["CODEQL_EXTRACTOR_JAVA_OPTION_TRAP_COMPRESSION"] = "nOnE"
run_codeql_database_create(['kotlinc test.kt'], test_db="none-db", db=None, lang="java")
check_extension('none-db/trap', '.trap')
os.environ["CODEQL_EXTRACTOR_JAVA_OPTION_TRAP_COMPRESSION"] = "gzip"
run_codeql_database_create(['kotlinc test.kt'], test_db="gzip-db", db=None, lang="java")
check_extension('gzip-db/trap', '.trap.gz')
os.environ["CODEQL_EXTRACTOR_JAVA_OPTION_TRAP_COMPRESSION"] = "brotli"
run_codeql_database_create(['kotlinc test.kt'], test_db="brotli-db", db=None, lang="java")
check_extension('brotli-db/trap', '.trap.gz')
os.environ["CODEQL_EXTRACTOR_JAVA_OPTION_TRAP_COMPRESSION"] = "invalidValue"
run_codeql_database_create(['kotlinc test.kt'], test_db="invalid-db", db=None, lang="java")
check_extension('invalid-db/trap', '.trap.gz')