From 62779944e85240732f3f50e99e53c96cd6441efa Mon Sep 17 00:00:00 2001 From: Ian Lynagh Date: Wed, 17 Aug 2022 16:14:35 +0100 Subject: [PATCH 1/2] Kotlin: Compress TRAP files The Kotlin extractor can now be told to write TRAP with no compression, gzip compression or Brotli compression - although Brotli is not yet supported and it will fall back to gzip. The invocation TRAP file is a bit more complicated, as it's already been started before the extractor starts. For now that continues to always be uncompressed. --- .../main/kotlin/KotlinExtractorExtension.kt | 173 +++++++++++++++--- 1 file changed, 145 insertions(+), 28 deletions(-) diff --git a/java/kotlin-extractor/src/main/kotlin/KotlinExtractorExtension.kt b/java/kotlin-extractor/src/main/kotlin/KotlinExtractorExtension.kt index 55e94135fee..d6f25d73efa 100644 --- a/java/kotlin-extractor/src/main/kotlin/KotlinExtractorExtension.kt +++ b/java/kotlin-extractor/src/main/kotlin/KotlinExtractorExtension.kt @@ -5,11 +5,20 @@ import org.jetbrains.kotlin.backend.common.extensions.IrPluginContext import org.jetbrains.kotlin.ir.declarations.* import org.jetbrains.kotlin.ir.util.* import org.jetbrains.kotlin.ir.IrElement +import java.io.BufferedReader +import java.io.BufferedWriter +import java.io.BufferedInputStream +import java.io.BufferedOutputStream import java.io.File +import java.io.FileInputStream import java.io.FileOutputStream +import java.io.InputStreamReader +import java.io.OutputStreamWriter import java.lang.management.* import java.nio.file.Files import java.nio.file.Paths +import java.util.zip.GZIPInputStream +import java.util.zip.GZIPOutputStream import com.semmle.util.files.FileUtil import kotlin.system.exitProcess @@ -89,8 +98,29 @@ class KotlinExtractorExtension( val startTimeMs = System.currentTimeMillis() // This default should be kept in sync with com.semmle.extractor.java.interceptors.KotlinInterceptor.initializeExtractionContext val trapDir = File(System.getenv("CODEQL_EXTRACTOR_JAVA_TRAP_DIR").takeUnless { it.isNullOrEmpty() } ?: "kotlin-extractor/trap") + val compression_env_var = "CODEQL_EXTRACTOR_JAVA_OPTION_TRAP_COMPRESSION" + val compression_option = System.getenv(compression_env_var) + val defaultCompression = Compression.GZIP + val (compression, compressionWarning) = + if (compression_option == null) { + Pair(defaultCompression, null) + } else { + try { + @OptIn(kotlin.ExperimentalStdlibApi::class) // Annotation required by kotlin versions < 1.5 + val requested_compression = Compression.valueOf(compression_option.uppercase()) + if (requested_compression == Compression.BROTLI) { + Pair(Compression.GZIP, "Kotlin extractor doesn't support Brotli compression. Using GZip instead.") + } else { + Pair(requested_compression, null) + } + } catch (e: IllegalArgumentException) { + Pair(defaultCompression, + "Unsupported compression type (\$$compression_env_var) \"$compression_option\". Supported values are ${Compression.values().joinToString()}") + } + } // The invocation TRAP file will already have been started - // before the plugin is run, so we open it in append mode. + // before the plugin is run, so we always use no compression + // and we open it in append mode. FileOutputStream(File(invocationTrapFile), true).bufferedWriter().use { invocationTrapFileBW -> val invocationExtractionProblems = ExtractionProblems() val lm = TrapLabelManager() @@ -113,6 +143,10 @@ class KotlinExtractorExtension( if (System.getenv("CODEQL_EXTRACTOR_JAVA_KOTLIN_DUMP") == "true") { logger.info("moduleFragment:\n" + moduleFragment.dump()) } + if (compressionWarning != null) { + logger.warn(compressionWarning) + } + val primitiveTypeMapping = PrimitiveTypeMapping(logger, pluginContext) // FIXME: FileUtil expects a static global logger // which should be provided by SLF4J's factory facility. For now we set it here. @@ -125,7 +159,7 @@ class KotlinExtractorExtension( val fileTrapWriter = tw.makeSourceFileTrapWriter(file, true) loggerBase.setFileNumber(index) fileTrapWriter.writeCompilation_compiling_files(compilation, index, fileTrapWriter.fileId) - doFile(fileExtractionProblems, invocationTrapFile, fileTrapWriter, checkTrapIdentical, loggerBase, trapDir, srcDir, file, primitiveTypeMapping, pluginContext, globalExtensionState) + doFile(compression, fileExtractionProblems, invocationTrapFile, fileTrapWriter, checkTrapIdentical, loggerBase, trapDir, srcDir, file, primitiveTypeMapping, pluginContext, globalExtensionState) fileTrapWriter.writeCompilation_compiling_files_completed(compilation, index, fileExtractionProblems.extractionResult()) } loggerBase.printLimitedDiagnosticCounts(tw) @@ -218,12 +252,12 @@ This function determines whether 2 TRAP files should be considered to be equivalent. It returns `true` iff all of their non-comment lines are identical. */ -private fun equivalentTrap(f1: File, f2: File): Boolean { - f1.bufferedReader().use { bw1 -> - f2.bufferedReader().use { bw2 -> +private fun equivalentTrap(r1: BufferedReader, r2: BufferedReader): Boolean { + r1.use { br1 -> + r2.use { br2 -> while(true) { - val l1 = bw1.readLine() - val l2 = bw2.readLine() + val l1 = br1.readLine() + val l2 = br2.readLine() if (l1 == null && l2 == null) { return true } else if (l1 == null || l2 == null) { @@ -239,6 +273,7 @@ private fun equivalentTrap(f1: File, f2: File): Boolean { } private fun doFile( + compression: Compression, fileExtractionProblems: FileExtractionProblems, invocationTrapFile: String, fileTrapWriter: FileTrapWriter, @@ -270,15 +305,14 @@ private fun doFile( } srcTmpFile.renameTo(dbSrcFilePath.toFile()) - val trapFile = File("$dbTrapDir/$srcFilePath.trap") - val trapFileDir = trapFile.parentFile - trapFileDir.mkdirs() + val trapFileName = "$dbTrapDir/$srcFilePath.trap" + val trapFileWriter = getTrapFileWriter(compression, logger, trapFileName) - if (checkTrapIdentical || !trapFile.exists()) { - val trapTmpFile = File.createTempFile("$srcFilePath.", ".trap.tmp", trapFileDir) + if (checkTrapIdentical || !trapFileWriter.exists()) { + trapFileWriter.makeParentDirectory() try { - trapTmpFile.bufferedWriter().use { trapFileBW -> + trapFileWriter.getTempWriter().use { trapFileBW -> // We want our comments to be the first thing in the file, // so start off with a mere TrapWriter val tw = TrapWriter(loggerBase, TrapLabelManager(), trapFileBW, fileTrapWriter) @@ -294,31 +328,114 @@ private fun doFile( externalDeclExtractor.extractExternalClasses() } - if (checkTrapIdentical && trapFile.exists()) { - if (equivalentTrap(trapTmpFile, trapFile)) { - if (!trapTmpFile.delete()) { - logger.warn("Failed to delete $trapTmpFile") - } + if (checkTrapIdentical && trapFileWriter.exists()) { + if (equivalentTrap(trapFileWriter.getTempReader(), trapFileWriter.getRealReader())) { + trapFileWriter.deleteTemp() } else { - val trapDifferentFile = File.createTempFile("$srcFilePath.", ".trap.different", dbTrapDir) - if (trapTmpFile.renameTo(trapDifferentFile)) { - logger.warn("TRAP difference: $trapFile vs $trapDifferentFile") - } else { - logger.warn("Failed to rename $trapTmpFile to $trapFile") - } + trapFileWriter.renameTempToDifferent() } } else { - if (!trapTmpFile.renameTo(trapFile)) { - logger.warn("Failed to rename $trapTmpFile to $trapFile") - } + trapFileWriter.renameTempToReal() } // We catch Throwable rather than Exception, as we want to // continue trying to extract everything else even if we get a // stack overflow or an assertion failure in one file. } catch (e: Throwable) { - logger.error("Failed to extract '$srcFilePath'. Partial TRAP file location is $trapTmpFile", e) + logger.error("Failed to extract '$srcFilePath'. " + trapFileWriter.debugInfo(), e) context.clear() fileExtractionProblems.setNonRecoverableProblem() } } } + +enum class Compression { NONE, GZIP, BROTLI } + +private fun getTrapFileWriter(compression: Compression, logger: FileLogger, trapFileName: String): TrapFileWriter { + return when (compression) { + Compression.NONE -> NonCompressedTrapFileWriter(logger, trapFileName) + Compression.GZIP -> GZipCompressedTrapFileWriter(logger, trapFileName) + Compression.BROTLI -> throw Exception("Brotli compression is not supported by the Kotlin extractor") + } +} + +private abstract class TrapFileWriter(val logger: FileLogger, trapName: String, val extension: String) { + private val realFile = File(trapName + extension) + private val parentDir = realFile.parentFile + lateinit private var tempFile: File + + fun debugInfo(): String { + if (this::tempFile.isInitialized) { + return "Partial TRAP file location is $tempFile" + } else { + return "Temporary file not yet created." + } + } + + fun makeParentDirectory() { + parentDir.mkdirs() + } + + fun exists(): Boolean { + return realFile.exists() + } + + abstract protected fun getReader(file: File): BufferedReader + abstract protected fun getWriter(file: File): BufferedWriter + + fun getRealReader(): BufferedReader { + return getReader(realFile) + } + + fun getTempReader(): BufferedReader { + return getReader(tempFile) + } + + fun getTempWriter(): BufferedWriter { + if (this::tempFile.isInitialized) { + logger.error("Temp writer reinitiailised for $realFile") + } + tempFile = File.createTempFile(realFile.getName() + ".", ".trap.tmp" + extension, parentDir) + return getWriter(tempFile) + } + + fun deleteTemp() { + if (!tempFile.delete()) { + logger.warn("Failed to delete $tempFile") + } + } + + fun renameTempToDifferent() { + val trapDifferentFile = File.createTempFile(realFile.getName() + ".", ".trap.different" + extension, parentDir) + if (tempFile.renameTo(trapDifferentFile)) { + logger.warn("TRAP difference: $realFile vs $trapDifferentFile") + } else { + logger.warn("Failed to rename $tempFile to $realFile") + } + } + + fun renameTempToReal() { + if (!tempFile.renameTo(realFile)) { + logger.warn("Failed to rename $tempFile to $realFile") + } + } +} + +private class NonCompressedTrapFileWriter(logger: FileLogger, trapName: String): TrapFileWriter(logger, trapName, "") { + override protected fun getReader(file: File): BufferedReader { + return file.bufferedReader() + } + + override protected fun getWriter(file: File): BufferedWriter { + return file.bufferedWriter() + } +} + +private class GZipCompressedTrapFileWriter(logger: FileLogger, trapName: String): TrapFileWriter(logger, trapName, ".gz") { + override protected fun getReader(file: File): BufferedReader { + return BufferedReader(InputStreamReader(GZIPInputStream(BufferedInputStream(FileInputStream(file))))) + } + + override protected fun getWriter(file: File): BufferedWriter { + return BufferedWriter(OutputStreamWriter(GZIPOutputStream(BufferedOutputStream(FileOutputStream(file))))) + } +} From a6cee9edf9d1d0b9d46be22ef574315ac7e7f30e Mon Sep 17 00:00:00 2001 From: Ian Lynagh Date: Fri, 19 Aug 2022 13:20:20 +0100 Subject: [PATCH 2/2] Kotlin: Add an integration test for TRAP compression --- .../kotlin/trap_compression/test.kt | 2 + .../kotlin/trap_compression/test.py | 48 +++++++++++++++++++ 2 files changed, 50 insertions(+) create mode 100644 java/ql/integration-tests/posix-only/kotlin/trap_compression/test.kt create mode 100644 java/ql/integration-tests/posix-only/kotlin/trap_compression/test.py diff --git a/java/ql/integration-tests/posix-only/kotlin/trap_compression/test.kt b/java/ql/integration-tests/posix-only/kotlin/trap_compression/test.kt new file mode 100644 index 00000000000..2fc18b1217a --- /dev/null +++ b/java/ql/integration-tests/posix-only/kotlin/trap_compression/test.kt @@ -0,0 +1,2 @@ +class Test { +} diff --git a/java/ql/integration-tests/posix-only/kotlin/trap_compression/test.py b/java/ql/integration-tests/posix-only/kotlin/trap_compression/test.py new file mode 100644 index 00000000000..5dbe1d4e970 --- /dev/null +++ b/java/ql/integration-tests/posix-only/kotlin/trap_compression/test.py @@ -0,0 +1,48 @@ +from create_database_utils import * + +def check_extension(directory, expected_extension): + if expected_extension == '.trap': + # We start TRAP files with a comment + expected_start = b'//' + elif expected_extension == '.trap.gz': + # The GZip magic numbers + expected_start = b'\x1f\x8b' + else: + raise Exception('Unknown expected extension ' + expected_extension) + count = check_extension_worker(directory, expected_extension, expected_start) + if count != 1: + raise Exception('Expected 1 relevant file, but found ' + str(count) + ' in ' + directory) + +def check_extension_worker(directory, expected_extension, expected_start): + count = 0 + for f in os.listdir(directory): + x = os.path.join(directory, f) + if os.path.isdir(x): + count += check_extension_worker(x, expected_extension, expected_start) + else: + if f.startswith('test.kt') and not f.endswith('.set'): + if f.endswith(expected_extension): + with open(x, 'rb') as f_in: + content = f_in.read() + if content.startswith(expected_start): + count += 1 + else: + raise Exception('Unexpected start to content of ' + x) + else: + raise Exception('Expected test.kt TRAP file to have extension ' + expected_extension + ', but found ' + x) + return count + +run_codeql_database_create(['kotlinc test.kt'], test_db="default-db", db=None, lang="java") +check_extension('default-db/trap', '.trap.gz') +os.environ["CODEQL_EXTRACTOR_JAVA_OPTION_TRAP_COMPRESSION"] = "nOnE" +run_codeql_database_create(['kotlinc test.kt'], test_db="none-db", db=None, lang="java") +check_extension('none-db/trap', '.trap') +os.environ["CODEQL_EXTRACTOR_JAVA_OPTION_TRAP_COMPRESSION"] = "gzip" +run_codeql_database_create(['kotlinc test.kt'], test_db="gzip-db", db=None, lang="java") +check_extension('gzip-db/trap', '.trap.gz') +os.environ["CODEQL_EXTRACTOR_JAVA_OPTION_TRAP_COMPRESSION"] = "brotli" +run_codeql_database_create(['kotlinc test.kt'], test_db="brotli-db", db=None, lang="java") +check_extension('brotli-db/trap', '.trap.gz') +os.environ["CODEQL_EXTRACTOR_JAVA_OPTION_TRAP_COMPRESSION"] = "invalidValue" +run_codeql_database_create(['kotlinc test.kt'], test_db="invalid-db", db=None, lang="java") +check_extension('invalid-db/trap', '.trap.gz')