JS: record metrics during extraction

This commit is contained in:
Esben Sparre Andreasen
2019-09-03 10:36:32 +02:00
parent 7fcde4c130
commit 5665cf9328
13 changed files with 283 additions and 21 deletions

View File

@@ -98,6 +98,7 @@ import com.semmle.js.ast.jsx.JSXMemberExpression;
import com.semmle.js.ast.jsx.JSXNamespacedName;
import com.semmle.js.ast.jsx.JSXOpeningElement;
import com.semmle.js.ast.jsx.JSXSpreadAttribute;
import com.semmle.js.extractor.ExtractionMetrics.ExtractionPhase;
import com.semmle.js.extractor.ExtractorConfig.Platform;
import com.semmle.js.extractor.ExtractorConfig.SourceType;
import com.semmle.js.extractor.ScopeManager.DeclKind;
@@ -192,6 +193,10 @@ public class ASTExtractor {
return scopeManager;
}
public ExtractionMetrics getMetrics() {
return lexicalExtractor.getMetrics();
}
/**
* The binding semantics for an identifier.
*
@@ -1945,9 +1950,11 @@ public class ASTExtractor {
}
public void extract(Node root, Platform platform, SourceType sourceType, int toplevelKind) {
lexicalExtractor.getMetrics().startPhase(ExtractionPhase.ASTExtractor_extract);
trapwriter.addTuple("toplevels", toplevelLabel, toplevelKind);
locationManager.emitNodeLocation(root, toplevelLabel);
root.accept(new V(platform, sourceType), null);
lexicalExtractor.getMetrics().stopPhase(ExtractionPhase.ASTExtractor_extract);
}
}

View File

@@ -93,6 +93,7 @@ import com.semmle.js.ast.jsx.JSXMemberExpression;
import com.semmle.js.ast.jsx.JSXNamespacedName;
import com.semmle.js.ast.jsx.JSXOpeningElement;
import com.semmle.js.ast.jsx.JSXSpreadAttribute;
import com.semmle.js.extractor.ExtractionMetrics.ExtractionPhase;
import com.semmle.ts.ast.DecoratorList;
import com.semmle.ts.ast.EnumDeclaration;
import com.semmle.ts.ast.EnumMember;
@@ -171,11 +172,13 @@ public class CFGExtractor {
private final TrapWriter trapwriter;
private final Label toplevelLabel;
private final LocationManager locationManager;
private final ExtractionMetrics metrics;
public CFGExtractor(ASTExtractor astExtractor) {
this.trapwriter = astExtractor.getTrapwriter();
this.toplevelLabel = astExtractor.getToplevelLabel();
this.locationManager = astExtractor.getLocationManager();
this.metrics = astExtractor.getMetrics();
}
@SuppressWarnings("unchecked")
@@ -1955,6 +1958,8 @@ public class CFGExtractor {
}
public void extract(Node nd) {
metrics.startPhase(ExtractionPhase.CFGExtractor_extract);
nd.accept(new V(), new SimpleSuccessorInfo(null));
metrics.stopPhase(ExtractionPhase.CFGExtractor_extract);
}
}

View File

@@ -0,0 +1,191 @@
package com.semmle.js.extractor;
import com.semmle.util.exception.Exceptions;
import com.semmle.util.files.FileUtil;
import com.semmle.util.trap.TrapWriter;
import com.semmle.util.trap.TrapWriter.Label;
import com.semmle.util.trap.pathtransformers.PathTransformer;
import java.io.BufferedWriter;
import java.io.File;
import java.io.FileOutputStream;
import java.io.OutputStreamWriter;
import java.lang.management.ManagementFactory;
import java.lang.management.ThreadMXBean;
import java.nio.charset.Charset;
import java.util.Stack;
import java.util.zip.GZIPOutputStream;
/** Metrics for the (single-threaded) extraction of a single file. */
public class ExtractionMetrics {
/**
* The phase of the extraction that should be measured time for.
*
* <p>Convention: the enum names have the format <code>{ClassName}_{MethodName}</code>, and should
* identify the methods they correspond to.
*/
public enum ExtractionPhase {
ASTExtractor_extract(0),
CFGExtractor_extract(1),
FileExtractor_extractContents(2),
JSExtractor_extract(3),
JSParser_parse(4),
LexicalExtractor_extractLines(5),
LexicalExtractor_extractTokens(6),
TypeScriptASTConverter_convertAST(7),
TypeScriptParser_talkToParserWrapper(8);
/** The id used in the database for the time spent performing this phase of the extraction. */
final int dbschemeId;
ExtractionPhase(int dbschemeId) {
this.dbschemeId = dbschemeId;
}
}
/** The cache file, if any. */
private File cacheFile;
/** True iff the extraction of this file reuses an existing trap cache file. */
private boolean canReuseCacheFile;
/** The cumulative CPU-time spent in each extraction phase so far. */
private final long[] cpuTimes = new long[ExtractionPhase.values().length];
/** The label for the file that is being extracted. */
private Label fileLabel;
/** The number of characters in the file that is being extracted. */
private int length;
/** The previous time a CPU-time measure was performed. */
private long previousCpuTime;
/** The previous time a wallclock-time measure was performed. */
private long previousWallclockTime;
/** The extraction phase stack. */
private final Stack<ExtractionPhase> stack = new Stack<>();
/** The current thread, used for measuring CPU-time. */
private final ThreadMXBean thread = ManagementFactory.getThreadMXBean();
/** The cumulative wallclock-time spent in each extraction phase so far. */
private final long[] wallclockTimes = new long[ExtractionPhase.values().length];
/**
* True iff extraction metrics could not be obtained for this file (due to an unforeseen error
* that should not prevent the ordinary extraction from succeeding).
*/
private boolean timingsFailed;
/**
* Appends these metrics to a trap file. Note that this makes the resulting trap file content
* non-deterministic.
*/
public void appendToTrapFile(File trapFileToAppendTo) {
if (trapFileToAppendTo == null) {
return;
}
BufferedWriter out = null;
FileOutputStream fos = null;
GZIPOutputStream gzip = null;
TrapWriter trapwriter = null;
try {
fos = new FileOutputStream(trapFileToAppendTo, true);
gzip = new GZIPOutputStream(fos);
out = new BufferedWriter(new OutputStreamWriter(gzip, Charset.forName("UTF-8")));
trapwriter = new TrapWriter(out, PathTransformer.std());
trapwriter.addTuple(
"extraction_data",
fileLabel,
cacheFile != null ? cacheFile.getAbsolutePath() : "",
canReuseCacheFile,
length);
if (!stack.isEmpty()) {
failTimings(
String.format(
"Could not properly record extraction times for %s. (stack = %s)%n",
fileLabel, stack.toString()));
}
if (!timingsFailed) {
for (int i = 0; i < ExtractionPhase.values().length; i++) {
trapwriter.addTuple("extraction_time", fileLabel, i, 0, (float) cpuTimes[i]);
trapwriter.addTuple("extraction_time", fileLabel, i, 1, (float) wallclockTimes[i]);
}
}
FileUtil.close(trapwriter);
} catch (Exception e) {
FileUtil.close(fos);
FileUtil.close(gzip);
FileUtil.close(out);
FileUtil.close(trapwriter);
Exceptions.ignore(e, "Ignoring exception for extraction metrics writing");
}
}
private void failTimings(String msg) {
System.err.printf(msg);
System.err.flush();
this.timingsFailed = true;
}
private void incrementCurrentTimer() {
long nowWallclock = System.nanoTime();
long nowCpu = thread.getCurrentThreadCpuTime();
if (!stack.isEmpty()) {
// increment by the time elapsed
wallclockTimes[stack.peek().dbschemeId] += nowWallclock - previousWallclockTime;
cpuTimes[stack.peek().dbschemeId] += nowCpu - previousCpuTime;
}
// update the running clock
previousWallclockTime = nowWallclock;
previousCpuTime = nowCpu;
}
public void setCacheFile(File cacheFile) {
this.cacheFile = cacheFile;
}
public void setCanReuseCacheFile(boolean canReuseCacheFile) {
this.canReuseCacheFile = canReuseCacheFile;
}
public void setFileLabel(Label fileLabel) {
this.fileLabel = fileLabel;
}
public void setLength(int length) {
this.length = length;
}
public void startPhase(ExtractionPhase event) {
incrementCurrentTimer();
stack.push(event);
}
public void stopPhase(
ExtractionPhase
event /* technically not needed, but useful for documentation and sanity checking */) {
if (stack.isEmpty()) {
failTimings(
String.format(
"Inconsistent extraction time recording: trying to stop timer %s, but no timer is running",
event));
return;
}
if (stack.peek() != event) {
failTimings(
String.format(
"Inconsistent extraction time recording: trying to stop timer %s, but current timer is: %s",
event, stack.peek()));
return;
}
incrementCurrentTimer();
stack.pop();
}
}

View File

@@ -1,5 +1,6 @@
package com.semmle.js.extractor;
import com.semmle.js.extractor.ExtractionMetrics.ExtractionPhase;
import com.semmle.js.extractor.trapcache.CachingTrapWriter;
import com.semmle.js.extractor.trapcache.ITrapCache;
import com.semmle.util.data.StringUtil;
@@ -386,6 +387,7 @@ public class FileExtractor {
/** @return the number of lines of code extracted, or {@code null} if the file was cached */
public Integer extract(File f, ExtractorState state) throws IOException {
// populate source archive
String source = new WholeIO(config.getDefaultEncoding()).strictread(f);
outputConfig.getSourceArchive().add(f, source);
@@ -393,6 +395,7 @@ public class FileExtractor {
// extract language-independent bits
TrapWriter trapwriter = outputConfig.getTrapWriterFactory().mkTrapWriter(f);
Label fileLabel = trapwriter.populateFile(f);
LocationManager locationManager = new LocationManager(f, trapwriter, fileLabel);
locationManager.emitFileLocation(fileLabel, 0, 0, 0, 0);
@@ -424,23 +427,37 @@ public class FileExtractor {
private Integer extractContents(
File f, Label fileLabel, String source, LocationManager locationManager, ExtractorState state)
throws IOException {
ExtractionMetrics metrics = new ExtractionMetrics();
metrics.startPhase(ExtractionPhase.FileExtractor_extractContents);
metrics.setLength(source.length());
metrics.setFileLabel(fileLabel);
TrapWriter trapwriter = locationManager.getTrapWriter();
FileType fileType = getFileType(f);
File cacheFile = null, // the cache file for this extraction
resultFile = null; // the final result TRAP file for this extraction
// check whether we can perform caching
if (bumpIdCounter(trapwriter) && fileType.isTrapCachingAllowed()) {
if (bumpIdCounter(trapwriter)) {
resultFile = outputConfig.getTrapWriterFactory().getTrapFileFor(f);
if (resultFile != null) cacheFile = trapCache.lookup(source, config, fileType);
}
// check whether we can perform caching
if (resultFile != null && fileType.isTrapCachingAllowed()) {
cacheFile = trapCache.lookup(source, config, fileType);
}
if (cacheFile != null) {
boolean canUseCacheFile = cacheFile != null;
boolean canReuseCacheFile = canUseCacheFile && cacheFile.exists();
metrics.setCacheFile(cacheFile);
metrics.setCanReuseCacheFile(canReuseCacheFile);
if (canUseCacheFile) {
FileUtil.close(trapwriter);
if (cacheFile.exists()) {
if (canReuseCacheFile) {
FileUtil.append(cacheFile, resultFile);
metrics.stopPhase(ExtractionPhase.FileExtractor_extractContents);
metrics.appendToTrapFile(resultFile);
return null;
}
@@ -457,7 +474,8 @@ public class FileExtractor {
try {
IExtractor extractor = fileType.mkExtractor(config, state);
TextualExtractor textualExtractor =
new TextualExtractor(trapwriter, locationManager, source, config.getExtractLines());
new TextualExtractor(
trapwriter, locationManager, source, config.getExtractLines(), metrics);
LoCInfo loc = extractor.extract(textualExtractor);
int numLines = textualExtractor.getNumLines();
int linesOfCode = loc.getLinesOfCode(), linesOfComments = loc.getLinesOfComments();
@@ -468,8 +486,9 @@ public class FileExtractor {
} finally {
if (!successful && trapwriter instanceof CachingTrapWriter)
((CachingTrapWriter) trapwriter).discard();
FileUtil.close(trapwriter);
metrics.stopPhase(ExtractionPhase.FileExtractor_extractContents);
metrics.appendToTrapFile(resultFile);
}
}

View File

@@ -191,7 +191,12 @@ public class HTMLExtractor implements IExtractor {
JSExtractor extractor = new JSExtractor(config);
try {
TextualExtractor tx =
new TextualExtractor(trapwriter, scriptLocationManager, source, config.getExtractLines());
new TextualExtractor(
trapwriter,
scriptLocationManager,
source,
config.getExtractLines(),
textualExtractor.getMetrics());
return extractor.extract(tx, source, toplevelKind, scopeManager).snd();
} catch (ParseError e) {
e.setPosition(scriptLocationManager.translatePosition(e.getPosition()));

View File

@@ -3,6 +3,7 @@ package com.semmle.js.extractor;
import com.semmle.js.ast.Comment;
import com.semmle.js.ast.Node;
import com.semmle.js.ast.Token;
import com.semmle.js.extractor.ExtractionMetrics.ExtractionPhase;
import com.semmle.js.extractor.ExtractorConfig.ECMAVersion;
import com.semmle.js.extractor.ExtractorConfig.Platform;
import com.semmle.js.extractor.ExtractorConfig.SourceType;
@@ -52,7 +53,8 @@ public class JSExtractor {
SourceType sourceType = establishSourceType(source, true);
JSParser.Result parserRes = JSParser.parse(config, sourceType, source);
JSParser.Result parserRes =
JSParser.parse(config, sourceType, source, textualExtractor.getMetrics());
return extract(textualExtractor, source, toplevelKind, scopeManager, sourceType, parserRes);
}
@@ -87,6 +89,7 @@ public class JSExtractor {
SourceType sourceType,
JSParser.Result parserRes)
throws ParseError {
textualExtractor.getMetrics().startPhase(ExtractionPhase.JSExtractor_extract);
Label toplevelLabel;
TrapWriter trapwriter = textualExtractor.getTrapwriter();
LocationManager locationManager = textualExtractor.getLocationManager();
@@ -104,7 +107,6 @@ public class JSExtractor {
new LexicalExtractor(textualExtractor, parserRes.getTokens(), parserRes.getComments());
ASTExtractor scriptExtractor = new ASTExtractor(lexicalExtractor, scopeManager);
toplevelLabel = scriptExtractor.getToplevelLabel();
lexicalExtractor.extractComments(toplevelLabel);
loc = lexicalExtractor.extractLines(parserRes.getSource(), toplevelLabel);
lexicalExtractor.extractTokens(toplevelLabel);
@@ -126,7 +128,6 @@ public class JSExtractor {
for (ParseError parseError : parserRes.getErrors()) {
if (!config.isTolerateParseErrors()) throw parseError;
Label key = trapwriter.freshLabel();
String errorLine = textualExtractor.getLine(parseError.getPosition().getLine());
trapwriter.addTuple("jsParseErrors", key, toplevelLabel, "Error: " + parseError, errorLine);
@@ -139,6 +140,8 @@ public class JSExtractor {
if (platform == Platform.NODE && sourceType == SourceType.COMMONJS_MODULE)
textualExtractor.getTrapwriter().addTuple("isNodejs", toplevelLabel);
textualExtractor.getMetrics().stopPhase(ExtractionPhase.JSExtractor_extract);
return Pair.make(toplevelLabel, loc);
}

View File

@@ -4,6 +4,7 @@ import com.semmle.js.ast.Comment;
import com.semmle.js.ast.Position;
import com.semmle.js.ast.SourceElement;
import com.semmle.js.ast.Token;
import com.semmle.js.extractor.ExtractionMetrics.ExtractionPhase;
import com.semmle.util.trap.TrapWriter;
import com.semmle.util.trap.TrapWriter.Label;
import java.util.List;
@@ -40,9 +41,16 @@ public class LexicalExtractor {
return comments;
}
public ExtractionMetrics getMetrics() {
return textualExtractor.getMetrics();
}
public LoCInfo extractLines(String src, Label toplevelKey) {
textualExtractor.getMetrics().startPhase(ExtractionPhase.LexicalExtractor_extractLines);
Position end = textualExtractor.extractLines(src, toplevelKey);
return emitNumlines(toplevelKey, new Position(1, 0, 0), end);
LoCInfo info = emitNumlines(toplevelKey, new Position(1, 0, 0), end);
textualExtractor.getMetrics().stopPhase(ExtractionPhase.LexicalExtractor_extractLines);
return info;
}
/**
@@ -112,11 +120,11 @@ public class LexicalExtractor {
}
public void extractTokens(Label toplevelKey) {
textualExtractor.getMetrics().startPhase(ExtractionPhase.LexicalExtractor_extractTokens);
int j = 0;
for (int i = 0, n = tokens.size(), idx = 0; i < n; ++i) {
Token token = tokens.get(i);
if (token == null) continue;
Label key = trapwriter.freshLabel();
int kind = -1;
switch (token.getType()) {
@@ -164,6 +172,7 @@ public class LexicalExtractor {
if (token.getLoc().equals(next.getLoc())) tokens.set(i + 1, null);
}
}
textualExtractor.getMetrics().stopPhase(ExtractionPhase.LexicalExtractor_extractTokens);
}
public void extractComments(Label toplevelKey) {

View File

@@ -37,7 +37,7 @@ public class Main {
* A version identifier that should be updated every time the extractor changes in such a way that
* it may produce different tuples for the same file under the same {@link ExtractorConfig}.
*/
public static final String EXTRACTOR_VERSION = "2019-09-02";
public static final String EXTRACTOR_VERSION = "2019-09-03";
public static final Pattern NEWLINE = Pattern.compile("\n");

View File

@@ -20,14 +20,20 @@ public class TextualExtractor {
private final LocationManager locationManager;
private final Label fileLabel;
private final boolean extractLines;
private final ExtractionMetrics metrics;
public TextualExtractor(
TrapWriter trapwriter, LocationManager locationManager, String source, boolean extractLines) {
TrapWriter trapwriter,
LocationManager locationManager,
String source,
boolean extractLines,
ExtractionMetrics metrics) {
this.trapwriter = trapwriter;
this.locationManager = locationManager;
this.source = source;
this.fileLabel = locationManager.getFileLabel();
this.extractLines = extractLines;
this.metrics = metrics;
}
public TrapWriter getTrapwriter() {
@@ -42,6 +48,10 @@ public class TextualExtractor {
return source;
}
public ExtractionMetrics getMetrics() {
return metrics;
}
public String mkToString(SourceElement nd) {
return sanitiseToString(nd.getLoc().getSource());
}

View File

@@ -21,7 +21,7 @@ public class TypeScriptExtractor implements IExtractor {
LocationManager locationManager = textualExtractor.getLocationManager();
String source = textualExtractor.getSource();
File sourceFile = locationManager.getSourceFile();
Result res = parser.parse(sourceFile, source);
Result res = parser.parse(sourceFile, source, textualExtractor.getMetrics());
ScopeManager scopeManager =
new ScopeManager(textualExtractor.getTrapwriter(), ECMAVersion.ECMA2017);
try {

View File

@@ -1,6 +1,7 @@
package com.semmle.js.extractor.test;
import com.semmle.js.ast.Node;
import com.semmle.js.extractor.ExtractionMetrics;
import com.semmle.js.extractor.ExtractorConfig;
import com.semmle.js.extractor.ExtractorConfig.SourceType;
import com.semmle.js.extractor.NodeJSDetector;
@@ -13,7 +14,7 @@ public class NodeJSDetectorTests {
private static final ExtractorConfig CONFIG = new ExtractorConfig(false);
private void isNodeJS(String src, boolean expected) {
Result res = JSParser.parse(CONFIG, SourceType.SCRIPT, src);
Result res = JSParser.parse(CONFIG, SourceType.SCRIPT, src, new ExtractionMetrics());
Node ast = res.getAST();
Assert.assertNotNull(ast);
Assert.assertTrue(NodeJSDetector.looksLikeNodeJS(ast) == expected);

View File

@@ -3,6 +3,8 @@ package com.semmle.js.parser;
import com.semmle.js.ast.Comment;
import com.semmle.js.ast.Node;
import com.semmle.js.ast.Token;
import com.semmle.js.extractor.ExtractionMetrics;
import com.semmle.js.extractor.ExtractionMetrics.ExtractionPhase;
import com.semmle.js.extractor.ExtractorConfig;
import com.semmle.js.extractor.ExtractorConfig.SourceType;
import java.util.List;
@@ -65,7 +67,11 @@ public class JSParser {
}
}
public static Result parse(ExtractorConfig config, SourceType sourceType, String source) {
return JcornWrapper.parse(config, sourceType, source);
public static Result parse(
ExtractorConfig config, SourceType sourceType, String source, ExtractionMetrics metrics) {
metrics.startPhase(ExtractionPhase.JSParser_parse);
Result result = JcornWrapper.parse(config, sourceType, source);
metrics.stopPhase(ExtractionPhase.JSParser_parse);
return result;
}
}

View File

@@ -7,6 +7,7 @@ import com.google.gson.JsonObject;
import com.google.gson.JsonParseException;
import com.google.gson.JsonParser;
import com.google.gson.JsonPrimitive;
import com.semmle.js.extractor.ExtractionMetrics;
import com.semmle.js.parser.JSParser.Result;
import com.semmle.ts.extractor.TypeTable;
import com.semmle.util.data.StringUtil;
@@ -362,17 +363,22 @@ public class TypeScriptParser {
*
* <p>If the file is not part of a project, only syntactic information will be extracted.
*/
public Result parse(File sourceFile, String source) {
public Result parse(File sourceFile, String source, ExtractionMetrics metrics) {
JsonObject request = new JsonObject();
request.add("command", new JsonPrimitive("parse"));
request.add("filename", new JsonPrimitive(sourceFile.getAbsolutePath()));
metrics.startPhase(ExtractionMetrics.ExtractionPhase.TypeScriptParser_talkToParserWrapper);
JsonObject response = talkToParserWrapper(request);
metrics.stopPhase(ExtractionMetrics.ExtractionPhase.TypeScriptParser_talkToParserWrapper);
try {
checkResponseType(response, "ast");
JsonObject nodeFlags = response.get("nodeFlags").getAsJsonObject();
JsonObject syntaxKinds = response.get("syntaxKinds").getAsJsonObject();
JsonObject ast = response.get("ast").getAsJsonObject();
return new TypeScriptASTConverter(nodeFlags, syntaxKinds).convertAST(ast, source);
metrics.startPhase(ExtractionMetrics.ExtractionPhase.TypeScriptASTConverter_convertAST);
Result converted = new TypeScriptASTConverter(nodeFlags, syntaxKinds).convertAST(ast, source);
metrics.stopPhase(ExtractionMetrics.ExtractionPhase.TypeScriptASTConverter_convertAST);
return converted;
} catch (IllegalStateException e) {
throw new CatastrophicError(
"TypeScript parser wrapper sent unexpected response: " + response, e);