From 6013b918fc80311e4ef407e6bc365388bc31b275 Mon Sep 17 00:00:00 2001 From: Max Schaefer Date: Tue, 29 Jan 2019 13:00:17 +0000 Subject: [PATCH] JavaScript: Extract tokens and comments before AST. This allows us to discard token/comment information sooner, thereby reducing heap pressure for very large files. --- .../src/com/semmle/js/extractor/JSExtractor.java | 6 ++++-- .../src/com/semmle/js/extractor/LexicalExtractor.java | 8 ++++++++ 2 files changed, 12 insertions(+), 2 deletions(-) diff --git a/javascript/extractor/src/com/semmle/js/extractor/JSExtractor.java b/javascript/extractor/src/com/semmle/js/extractor/JSExtractor.java index 1d5a14183cd..9b252011264 100644 --- a/javascript/extractor/src/com/semmle/js/extractor/JSExtractor.java +++ b/javascript/extractor/src/com/semmle/js/extractor/JSExtractor.java @@ -94,12 +94,14 @@ public class JSExtractor { ASTExtractor scriptExtractor = new ASTExtractor(lexicalExtractor, scopeManager); toplevelLabel = scriptExtractor.getToplevelLabel(); - scriptExtractor.extract(ast, platform, sourceType, toplevelKind); lexicalExtractor.extractComments(toplevelLabel); loc = lexicalExtractor.extractLines(parserRes.getSource(), toplevelLabel); lexicalExtractor.extractTokens(toplevelLabel); - new CFGExtractor(scriptExtractor).extract(ast); new JSDocExtractor(textualExtractor).extract(lexicalExtractor.getComments()); + lexicalExtractor.purge(); + + scriptExtractor.extract(ast, platform, sourceType, toplevelKind); + new CFGExtractor(scriptExtractor).extract(ast); } else { lexicalExtractor = new LexicalExtractor(textualExtractor, new ArrayList(), new ArrayList()); ASTExtractor scriptExtractor = new ASTExtractor(lexicalExtractor, null); diff --git a/javascript/extractor/src/com/semmle/js/extractor/LexicalExtractor.java b/javascript/extractor/src/com/semmle/js/extractor/LexicalExtractor.java index c7c9d017cff..8716dde5b91 100644 --- a/javascript/extractor/src/com/semmle/js/extractor/LexicalExtractor.java +++ b/javascript/extractor/src/com/semmle/js/extractor/LexicalExtractor.java @@ -173,4 +173,12 @@ public class LexicalExtractor { public String mkToString(SourceElement nd) { return textualExtractor.mkToString(nd); } + + /** + * Purge token and comments information to reduce heap pressure. + */ + public void purge() { + this.tokens.clear(); + this.comments.clear(); + } }