From 03d38ca54b7859942720244dd2bc515cccd0b55f Mon Sep 17 00:00:00 2001 From: Esben Sparre Andreasen Date: Thu, 5 Sep 2019 08:16:18 +0100 Subject: [PATCH] JS: simplify cache interaction --- .../js/extractor/ExtractionMetrics.java | 74 +++++++------------ .../semmle/js/extractor/FileExtractor.java | 8 +- .../src/com/semmle/js/extractor/Main.java | 2 +- .../semmle/js/extractor/test/TrapTests.java | 10 +++ .../javascript/meta/ExtractionMetrics.qll | 27 ++++--- 5 files changed, 56 insertions(+), 65 deletions(-) diff --git a/javascript/extractor/src/com/semmle/js/extractor/ExtractionMetrics.java b/javascript/extractor/src/com/semmle/js/extractor/ExtractionMetrics.java index 6460daae2f6..3e9844faf25 100644 --- a/javascript/extractor/src/com/semmle/js/extractor/ExtractionMetrics.java +++ b/javascript/extractor/src/com/semmle/js/extractor/ExtractionMetrics.java @@ -1,19 +1,11 @@ package com.semmle.js.extractor; -import com.semmle.util.exception.Exceptions; -import com.semmle.util.files.FileUtil; import com.semmle.util.trap.TrapWriter; import com.semmle.util.trap.TrapWriter.Label; -import com.semmle.util.trap.pathtransformers.PathTransformer; -import java.io.BufferedWriter; import java.io.File; -import java.io.FileOutputStream; -import java.io.OutputStreamWriter; import java.lang.management.ManagementFactory; import java.lang.management.ThreadMXBean; -import java.nio.charset.Charset; import java.util.Stack; -import java.util.zip.GZIPOutputStream; /** Metrics for the (single-threaded) extraction of a single file. */ public class ExtractionMetrics { @@ -79,55 +71,39 @@ public class ExtractionMetrics { private boolean timingsFailed; /** - * Appends these metrics to a trap file. Note that this makes the resulting trap file content + * Writes the data metrics to a trap file. Note that this makes the resulting trap file content * non-deterministic. */ - public void appendToTrapFile(File trapFileToAppendTo) { - if (trapFileToAppendTo == null) { - return; + public void writeDataToTrap(TrapWriter trapwriter) { + trapwriter.addTuple( + "extraction_data", + fileLabel, + cacheFile != null ? cacheFile.getAbsolutePath() : "", + canReuseCacheFile, + length); + } + + /** + * Writes the timing metrics to a trap file. Note that this makes the resulting trap file content + * non-deterministic. + */ + public void writeTimingsToTrap(TrapWriter trapwriter) { + if (!stack.isEmpty()) { + failTimings( + String.format( + "Could not properly record extraction times for %s. (stack = %s)%n", + fileLabel, stack.toString())); } - - BufferedWriter out = null; - FileOutputStream fos = null; - GZIPOutputStream gzip = null; - TrapWriter trapwriter = null; - try { - fos = new FileOutputStream(trapFileToAppendTo, true); - gzip = new GZIPOutputStream(fos); - out = new BufferedWriter(new OutputStreamWriter(gzip, Charset.forName("UTF-8"))); - - trapwriter = new TrapWriter(out, PathTransformer.std()); - trapwriter.addTuple( - "extraction_data", - fileLabel, - cacheFile != null ? cacheFile.getAbsolutePath() : "", - canReuseCacheFile, - length); - - if (!stack.isEmpty()) { - failTimings( - String.format( - "Could not properly record extraction times for %s. (stack = %s)%n", - fileLabel, stack.toString())); + if (!timingsFailed) { + for (int i = 0; i < ExtractionPhase.values().length; i++) { + trapwriter.addTuple("extraction_time", fileLabel, i, 0, (float) cpuTimes[i]); + trapwriter.addTuple("extraction_time", fileLabel, i, 1, (float) wallclockTimes[i]); } - if (!timingsFailed) { - for (int i = 0; i < ExtractionPhase.values().length; i++) { - trapwriter.addTuple("extraction_time", fileLabel, i, 0, (float) cpuTimes[i]); - trapwriter.addTuple("extraction_time", fileLabel, i, 1, (float) wallclockTimes[i]); - } - } - FileUtil.close(trapwriter); - } catch (Exception e) { - FileUtil.close(fos); - FileUtil.close(gzip); - FileUtil.close(out); - FileUtil.close(trapwriter); - Exceptions.ignore(e, "Ignoring exception for extraction metrics writing"); } } private void failTimings(String msg) { - System.err.printf(msg); + System.err.println(msg); System.err.flush(); this.timingsFailed = true; } diff --git a/javascript/extractor/src/com/semmle/js/extractor/FileExtractor.java b/javascript/extractor/src/com/semmle/js/extractor/FileExtractor.java index 1db12b79004..d80ddeaf303 100644 --- a/javascript/extractor/src/com/semmle/js/extractor/FileExtractor.java +++ b/javascript/extractor/src/com/semmle/js/extractor/FileExtractor.java @@ -450,14 +450,12 @@ public class FileExtractor { metrics.setCacheFile(cacheFile); metrics.setCanReuseCacheFile(canReuseCacheFile); - + metrics.writeDataToTrap(trapwriter); if (canUseCacheFile) { FileUtil.close(trapwriter); if (canReuseCacheFile) { FileUtil.append(cacheFile, resultFile); - metrics.stopPhase(ExtractionPhase.FileExtractor_extractContents); - metrics.appendToTrapFile(resultFile); return null; } @@ -481,14 +479,14 @@ public class FileExtractor { int linesOfCode = loc.getLinesOfCode(), linesOfComments = loc.getLinesOfComments(); trapwriter.addTuple("numlines", fileLabel, numLines, linesOfCode, linesOfComments); trapwriter.addTuple("filetype", fileLabel, fileType.toString()); + metrics.stopPhase(ExtractionPhase.FileExtractor_extractContents); + metrics.writeTimingsToTrap(trapwriter); successful = true; return linesOfCode; } finally { if (!successful && trapwriter instanceof CachingTrapWriter) ((CachingTrapWriter) trapwriter).discard(); FileUtil.close(trapwriter); - metrics.stopPhase(ExtractionPhase.FileExtractor_extractContents); - metrics.appendToTrapFile(resultFile); } } diff --git a/javascript/extractor/src/com/semmle/js/extractor/Main.java b/javascript/extractor/src/com/semmle/js/extractor/Main.java index 5a33840a4fd..3f4ba8e5a18 100644 --- a/javascript/extractor/src/com/semmle/js/extractor/Main.java +++ b/javascript/extractor/src/com/semmle/js/extractor/Main.java @@ -37,7 +37,7 @@ public class Main { * A version identifier that should be updated every time the extractor changes in such a way that * it may produce different tuples for the same file under the same {@link ExtractorConfig}. */ - public static final String EXTRACTOR_VERSION = "2019-09-03"; + public static final String EXTRACTOR_VERSION = "2019-09-04"; public static final Pattern NEWLINE = Pattern.compile("\n"); diff --git a/javascript/extractor/src/com/semmle/js/extractor/test/TrapTests.java b/javascript/extractor/src/com/semmle/js/extractor/test/TrapTests.java index 29831a0e369..6f3f7e1e445 100644 --- a/javascript/extractor/src/com/semmle/js/extractor/test/TrapTests.java +++ b/javascript/extractor/src/com/semmle/js/extractor/test/TrapTests.java @@ -166,6 +166,16 @@ public class TrapTests { String expected = new WholeIO().strictreadText(trap); expectedVsActual.add(Pair.make(expected, actual)); } + + @Override + public void addTuple(String tableName, Object... values) { + if ("extraction_data".equals(tableName) + || "extraction_time".equals(tableName)) { + // ignore non-deterministic tables + return; + } + super.addTuple(tableName, values); + } }; } diff --git a/javascript/ql/src/semmle/javascript/meta/ExtractionMetrics.qll b/javascript/ql/src/semmle/javascript/meta/ExtractionMetrics.qll index d3386f3af60..4e2c39ff92a 100644 --- a/javascript/ql/src/semmle/javascript/meta/ExtractionMetrics.qll +++ b/javascript/ql/src/semmle/javascript/meta/ExtractionMetrics.qll @@ -10,8 +10,9 @@ module ExtractionMetrics { * A file with extraction metrics. */ class FileWithExtractionMetrics extends File { - - FileWithExtractionMetrics() { extraction_data(this, _, _, _) and extraction_time(this, _, _, _)} + FileWithExtractionMetrics() { + extraction_data(this, _, _, _) and extraction_time(this, _, _, _) + } /** * Gets the CPU time in nanoseconds it took to extract this file. @@ -49,13 +50,18 @@ module ExtractionMetrics { int getLength() { extraction_data(this, _, _, result) } private float getTime(PhaseName phaseName, int timerKind) { - // note that we use strictsum to make it clear if data is missing because it comes from an upgraded database. - strictsum(int phaseId, float r | - phaseName = getExtractionPhaseName(phaseId) and - extraction_time(this, phaseId, timerKind, r) + exists(float time | + // note that we use strictsum to make it clear if data is missing because it comes from an upgraded database. + strictsum(int phaseId, float r | + phaseName = getExtractionPhaseName(phaseId) and + extraction_time(this, phaseId, timerKind, r) + | + r + ) = time | - r - ) = result + // assume the cache-lookup was for free + if isFromCache() then result = 0 else result = time + ) } } @@ -84,7 +90,6 @@ module ExtractionMetrics { "TypeScriptParser_talkToParserWrapper" = result and 8 = phaseId } - /** * The name of a phase of the extraction. */ @@ -105,7 +110,9 @@ module ExtractionMetrics { /** * Gets the total wallclock time spent on extraction. */ - float getWallclockTime() { result = strictsum(any(FileWithExtractionMetrics f).getWallclockTime()) } + float getWallclockTime() { + result = strictsum(any(FileWithExtractionMetrics f).getWallclockTime()) + } /** * Gets the total CPU time spent in phase `phaseName` of the extraction.