diff --git a/javascript/extractor/src/com/semmle/js/extractor/AutoBuild.java b/javascript/extractor/src/com/semmle/js/extractor/AutoBuild.java index bf5b4e8cb03..0aa3d532e5d 100644 --- a/javascript/extractor/src/com/semmle/js/extractor/AutoBuild.java +++ b/javascript/extractor/src/com/semmle/js/extractor/AutoBuild.java @@ -1,6 +1,7 @@ package com.semmle.js.extractor; import java.io.File; +import java.io.FileNotFoundException; import java.io.IOException; import java.io.Reader; import java.lang.ProcessBuilder.Redirect; @@ -17,6 +18,7 @@ import java.nio.file.SimpleFileVisitor; import java.nio.file.attribute.BasicFileAttributes; import java.util.ArrayList; import java.util.Arrays; +import java.util.Collections; import java.util.Comparator; import java.util.LinkedHashMap; import java.util.LinkedHashSet; @@ -27,6 +29,7 @@ import java.util.concurrent.CompletableFuture; import java.util.concurrent.ExecutorService; import java.util.concurrent.Executors; import java.util.concurrent.TimeUnit; +import java.util.concurrent.atomic.AtomicInteger; import java.util.function.Predicate; import java.util.stream.Collectors; import java.util.stream.Stream; @@ -41,11 +44,15 @@ import com.semmle.js.extractor.FileExtractor.FileType; import com.semmle.js.extractor.trapcache.DefaultTrapCache; import com.semmle.js.extractor.trapcache.DummyTrapCache; import com.semmle.js.extractor.trapcache.ITrapCache; +import com.semmle.js.parser.ParseError; import com.semmle.js.parser.ParsedProject; import com.semmle.ts.extractor.TypeExtractor; import com.semmle.ts.extractor.TypeScriptParser; +import com.semmle.ts.extractor.TypeScriptWrapperOOMError; import com.semmle.ts.extractor.TypeTable; import com.semmle.util.data.StringUtil; +import com.semmle.util.diagnostics.DiagnosticLevel; +import com.semmle.util.diagnostics.DiagnosticWriter; import com.semmle.util.exception.CatastrophicError; import com.semmle.util.exception.Exceptions; import com.semmle.util.exception.ResourceError; @@ -444,35 +451,129 @@ public class AutoBuild { /** Perform extraction. */ public int run() throws IOException { - startThreadPool(); - try { - CompletableFuture sourceFuture = extractSource(); - sourceFuture.join(); // wait for source extraction to complete - if (hasSeenCode()) { // don't bother with the externs if no code was seen - extractExterns(); + startThreadPool(); + try { + CompletableFuture sourceFuture = extractSource(); + sourceFuture.join(); // wait for source extraction to complete + if (hasSeenCode()) { // don't bother with the externs if no code was seen + extractExterns(); + } + extractXml(); + } catch (OutOfMemoryError oom) { + System.err.println("Out of memory while extracting the project."); + return 137; // the CodeQL CLI will interpret this as an out-of-memory error + // purpusely not doing anything else (printing stack, etc.), as the JVM + // basically guarantees nothing after an OOM + } catch (TypeScriptWrapperOOMError oom) { + System.err.println("Out of memory while extracting the project."); + System.err.println(oom.getMessage()); + oom.printStackTrace(System.err); + return 137; + } catch (RuntimeException | IOException e) { + writeDiagnostics("Internal error: " + e, JSDiagnosticKind.INTERNAL_ERROR); + e.printStackTrace(System.err); + return 1; + } finally { + shutdownThreadPool(); + diagnosticsToClose.forEach(DiagnosticWriter::close); } - extractXml(); - } finally { - shutdownThreadPool(); - } - if (!hasSeenCode()) { - if (seenFiles) { - warn("Only found JavaScript or TypeScript files that were empty or contained syntax errors."); - } else { - warn("No JavaScript or TypeScript code found."); + + if (!hasSeenCode()) { + if (seenFiles) { + warn("Only found JavaScript or TypeScript files that were empty or contained syntax errors."); + } else { + warn("No JavaScript or TypeScript code found."); + } + // ensuring that the finalize steps detects that no code was seen. + Path srcFolder = Paths.get(EnvironmentVariables.getWipDatabase(), "src"); + // check that the srcFolder is empty + if (Files.list(srcFolder).count() == 0) { + // Non-recursive delete because "src/" should be empty. + FileUtil8.delete(srcFolder); + } + return 0; } - // ensuring that the finalize steps detects that no code was seen. - Path srcFolder = Paths.get(EnvironmentVariables.getWipDatabase(), "src"); - // check that the srcFolder is empty - if (Files.list(srcFolder).count() == 0) { - // Non-recursive delete because "src/" should be empty. - FileUtil8.delete(srcFolder); - } - return 0; - } return 0; } + /** + * A kind of error that can happen during extraction of JavaScript or TypeScript + * code. + * For use with the {@link #writeDiagnostics(String, JSDiagnosticKind)} method. + */ + public static enum JSDiagnosticKind { + PARSE_ERROR("parse-error", "Parse error", DiagnosticLevel.Warning), + INTERNAL_ERROR("internal-error", "Internal error", DiagnosticLevel.Debug); + + private final String id; + private final String name; + private final DiagnosticLevel level; + + private JSDiagnosticKind(String id, String name, DiagnosticLevel level) { + this.id = id; + this.name = name; + this.level = level; + } + + public String getId() { + return id; + } + + public String getName() { + return name; + } + + public DiagnosticLevel getLevel() { + return level; + } + } + + private AtomicInteger diagnosticCount = new AtomicInteger(0); + private List diagnosticsToClose = Collections.synchronizedList(new ArrayList<>()); + private ThreadLocal diagnostics = new ThreadLocal(){ + @Override protected DiagnosticWriter initialValue() { + DiagnosticWriter result = initDiagnosticsWriter(diagnosticCount.incrementAndGet()); + diagnosticsToClose.add(result); + return result; + } + }; + + /** + * Persist a diagnostic message to a file in the diagnostics directory. + * See {@link JSDiagnosticKind} for the kinds of errors that can be reported, + * and see + * {@link DiagnosticWriter} for more details. + */ + public void writeDiagnostics(String message, JSDiagnosticKind error) throws IOException { + if (diagnostics.get() == null) { + warn("No diagnostics directory, so not writing diagnostic: " + message); + return; + } + + // DiagnosticLevel level, String extractorName, String sourceId, String sourceName, String markdown + diagnostics.get().writeMarkdown(error.getLevel(), "javascript", "javascript/" + error.getId(), error.getName(), + message); + } + + private DiagnosticWriter initDiagnosticsWriter(int count) { + String diagnosticsDir = System.getenv("CODEQL_EXTRACTOR_JAVASCRIPT_DIAGNOSTIC_DIR"); + + if (diagnosticsDir != null) { + File diagnosticsDirFile = new File(diagnosticsDir); + if (!diagnosticsDirFile.isDirectory()) { + warn("Diagnostics directory " + diagnosticsDir + " does not exist"); + } else { + File diagnosticsFile = new File(diagnosticsDirFile, "autobuilder-" + count + ".jsonl"); + try { + return new DiagnosticWriter(diagnosticsFile); + } catch (FileNotFoundException e) { + warn("Failed to open diagnostics file " + diagnosticsFile); + } + } + } + return null; + } + private void startThreadPool() { int defaultNumThreads = 1; int numThreads = Env.systemEnv().getInt("LGTM_THREADS", defaultNumThreads); @@ -1113,13 +1214,26 @@ protected DependencyInstallationResult preparePackagesAndDependencies(Set try { long start = logBeginProcess("Extracting " + file); - Integer loc = extractor.extract(f, state); - if (!extractor.getConfig().isExterns() && (loc == null || loc != 0)) seenCode = true; + ParseResultInfo loc = extractor.extract(f, state); + if (!extractor.getConfig().isExterns() && (loc == null || loc.getLinesOfCode() != 0)) seenCode = true; if (!extractor.getConfig().isExterns()) seenFiles = true; + for (ParseError err : loc.getParseErrors()) { + String msg = "A parse error occurred: " + err.getMessage() + ". Check the syntax of the file. If the file is invalid, correct the error or exclude the file from analysis."; + writeDiagnostics(msg, JSDiagnosticKind.PARSE_ERROR); + } logEndProcess(start, "Done extracting " + file); + } catch (OutOfMemoryError oom) { + System.err.println("Out of memory while extracting " + file + "."); + oom.printStackTrace(System.err); + System.exit(137); // caught by the CodeQL CLI } catch (Throwable t) { System.err.println("Exception while extracting " + file + "."); t.printStackTrace(System.err); + try { + writeDiagnostics("Internal error: " + t, JSDiagnosticKind.INTERNAL_ERROR); + } catch (IOException ignored) { + // ignore - we are already crashing + } System.exit(1); } } diff --git a/javascript/extractor/src/com/semmle/js/extractor/FileExtractor.java b/javascript/extractor/src/com/semmle/js/extractor/FileExtractor.java index 9c880f7490f..5ac4ac5ea44 100644 --- a/javascript/extractor/src/com/semmle/js/extractor/FileExtractor.java +++ b/javascript/extractor/src/com/semmle/js/extractor/FileExtractor.java @@ -5,7 +5,6 @@ import java.io.File; import java.io.FileInputStream; import java.io.FileReader; import java.io.IOException; -import java.nio.charset.Charset; import java.nio.charset.StandardCharsets; import java.nio.file.Path; import java.util.LinkedHashSet; @@ -434,7 +433,7 @@ public class FileExtractor { } /** @return the number of lines of code extracted, or {@code null} if the file was cached */ - public Integer extract(File f, ExtractorState state) throws IOException { + public ParseResultInfo extract(File f, ExtractorState state) throws IOException { FileSnippet snippet = state.getSnippets().get(f.toPath()); if (snippet != null) { return this.extractSnippet(f.toPath(), snippet, state); @@ -461,7 +460,7 @@ public class FileExtractor { *

A trap file will be derived from the snippet file, but its file label, source locations, and * source archive entry are based on the original file. */ - private Integer extractSnippet(Path file, FileSnippet origin, ExtractorState state) throws IOException { + private ParseResultInfo extractSnippet(Path file, FileSnippet origin, ExtractorState state) throws IOException { TrapWriter trapwriter = outputConfig.getTrapWriterFactory().mkTrapWriter(file.toFile()); File originalFile = origin.getOriginalFile().toFile(); @@ -495,7 +494,7 @@ public class FileExtractor { *

Also note that we support extraction with TRAP writer factories that are not file-backed; * obviously, no caching is done in that scenario. */ - private Integer extractContents( + private ParseResultInfo extractContents( File extractedFile, Label fileLabel, String source, LocationManager locationManager, ExtractorState state) throws IOException { ExtractionMetrics metrics = new ExtractionMetrics(); @@ -545,7 +544,7 @@ public class FileExtractor { TextualExtractor textualExtractor = new TextualExtractor( trapwriter, locationManager, source, config.getExtractLines(), metrics, extractedFile); - LoCInfo loc = extractor.extract(textualExtractor); + ParseResultInfo loc = extractor.extract(textualExtractor); int numLines = textualExtractor.isSnippet() ? 0 : textualExtractor.getNumLines(); int linesOfCode = loc.getLinesOfCode(), linesOfComments = loc.getLinesOfComments(); trapwriter.addTuple("numlines", fileLabel, numLines, linesOfCode, linesOfComments); @@ -553,7 +552,7 @@ public class FileExtractor { metrics.stopPhase(ExtractionPhase.FileExtractor_extractContents); metrics.writeTimingsToTrap(trapwriter); successful = true; - return linesOfCode; + return loc; } finally { if (!successful && trapwriter instanceof CachingTrapWriter) ((CachingTrapWriter) trapwriter).discard(); diff --git a/javascript/extractor/src/com/semmle/js/extractor/HTMLExtractor.java b/javascript/extractor/src/com/semmle/js/extractor/HTMLExtractor.java index dfce800af76..83fd3236b2e 100644 --- a/javascript/extractor/src/com/semmle/js/extractor/HTMLExtractor.java +++ b/javascript/extractor/src/com/semmle/js/extractor/HTMLExtractor.java @@ -3,6 +3,7 @@ package com.semmle.js.extractor; import java.io.File; import java.io.IOException; import java.nio.file.Path; +import java.util.Collections; import java.util.List; import java.util.function.Supplier; import java.util.regex.Matcher; @@ -29,7 +30,7 @@ import net.htmlparser.jericho.Source; /** Extractor for handling HTML and XHTML files. */ public class HTMLExtractor implements IExtractor { - private LoCInfo locInfo = new LoCInfo(0, 0); + private ParseResultInfo locInfo = new ParseResultInfo(0, 0, Collections.emptyList()); private class JavaScriptHTMLElementHandler implements HtmlPopulator.ElementHandler { private final ScopeManager scopeManager; @@ -212,11 +213,11 @@ public class HTMLExtractor implements IExtractor { } @Override - public LoCInfo extract(TextualExtractor textualExtractor) throws IOException { + public ParseResultInfo extract(TextualExtractor textualExtractor) throws IOException { return extractEx(textualExtractor).snd(); } - public Pair, LoCInfo> extractEx(TextualExtractor textualExtractor) { + public Pair, ParseResultInfo> extractEx(TextualExtractor textualExtractor) { // Angular templates contain attribute names that are not valid HTML/XML, such // as [foo], (foo), [(foo)], and *foo. // Allow a large number of errors in attribute names, so the Jericho parser does @@ -369,7 +370,7 @@ public class HTMLExtractor implements IExtractor { config.getExtractLines(), textualExtractor.getMetrics(), textualExtractor.getExtractedFile()); - Pair result = extractor.extract(tx, source, toplevelKind, scopeManager); + Pair result = extractor.extract(tx, source, toplevelKind, scopeManager); Label toplevelLabel = result.fst(); if (toplevelLabel != null) { // can be null when script ends up being parsed as JSON emitTopLevelXmlNodeBinding(parentLabel, toplevelLabel, trapWriter); diff --git a/javascript/extractor/src/com/semmle/js/extractor/IExtractor.java b/javascript/extractor/src/com/semmle/js/extractor/IExtractor.java index ff81a6bafa4..baa67b9b078 100644 --- a/javascript/extractor/src/com/semmle/js/extractor/IExtractor.java +++ b/javascript/extractor/src/com/semmle/js/extractor/IExtractor.java @@ -1,6 +1,7 @@ package com.semmle.js.extractor; import java.io.IOException; +import com.semmle.js.parser.ParseError; /** Generic extractor interface. */ public interface IExtractor { @@ -9,5 +10,5 @@ public interface IExtractor { * TextualExtractor}, and return information about the number of lines of code and the number of * lines of comments extracted. */ - public LoCInfo extract(TextualExtractor textualExtractor) throws IOException; + public ParseResultInfo extract(TextualExtractor textualExtractor) throws IOException; } diff --git a/javascript/extractor/src/com/semmle/js/extractor/JSExtractor.java b/javascript/extractor/src/com/semmle/js/extractor/JSExtractor.java index 07c62e1baa3..6b4b05fcf61 100644 --- a/javascript/extractor/src/com/semmle/js/extractor/JSExtractor.java +++ b/javascript/extractor/src/com/semmle/js/extractor/JSExtractor.java @@ -1,6 +1,7 @@ package com.semmle.js.extractor; import java.util.ArrayList; +import java.util.Collections; import java.util.regex.Matcher; import java.util.regex.Pattern; @@ -18,6 +19,7 @@ import com.semmle.util.exception.Exceptions; import com.semmle.util.exception.UserError; import com.semmle.util.trap.TrapWriter; import com.semmle.util.trap.TrapWriter.Label; +import com.semmle.js.extractor.ParseResultInfo; /** * Extractor for populating JavaScript source code, including AST information, lexical information @@ -36,14 +38,14 @@ public class JSExtractor { private static final Pattern containsModuleIndicator = Pattern.compile("(?m)^([ \t]*)(import|export|goog\\.module)\\b"); - public Pair extract( + public Pair extract( TextualExtractor textualExtractor, String source, TopLevelKind toplevelKind, ScopeManager scopeManager) throws ParseError { // if the file starts with `{ "":` it won't parse as JavaScript; try parsing as JSON // instead if (FileExtractor.JSON_OBJECT_START.matcher(textualExtractor.getSource()).matches()) { try { - LoCInfo loc = + ParseResultInfo loc = new JSONExtractor(config.withTolerateParseErrors(false)).extract(textualExtractor); return Pair.make(null, loc); } catch (UserError ue) { @@ -82,7 +84,7 @@ public class JSExtractor { return SourceType.SCRIPT; } - public Pair extract( + public Pair extract( TextualExtractor textualExtractor, String source, TopLevelKind toplevelKind, @@ -97,7 +99,7 @@ public class JSExtractor { Platform platform = config.getPlatform(); Node ast = parserRes.getAST(); LexicalExtractor lexicalExtractor; - LoCInfo loc; + ParseResultInfo loc; if (ast != null) { platform = getPlatform(platform, ast); if (sourceType == SourceType.SCRIPT && platform == Platform.NODE) { @@ -124,9 +126,10 @@ public class JSExtractor { trapwriter.addTuple("toplevels", toplevelLabel, toplevelKind.getValue()); locationManager.emitSnippetLocation(toplevelLabel, 1, 1, 1, 1); - loc = new LoCInfo(0, 0); + loc = new ParseResultInfo(0, 0, Collections.emptyList()); } + loc.addParseErrors(parserRes.getErrors()); for (ParseError parseError : parserRes.getErrors()) { if (!config.isTolerateParseErrors()) throw parseError; Label key = trapwriter.freshLabel(); diff --git a/javascript/extractor/src/com/semmle/js/extractor/JSONExtractor.java b/javascript/extractor/src/com/semmle/js/extractor/JSONExtractor.java index fcec30a3434..c2cad57c3c9 100644 --- a/javascript/extractor/src/com/semmle/js/extractor/JSONExtractor.java +++ b/javascript/extractor/src/com/semmle/js/extractor/JSONExtractor.java @@ -10,6 +10,8 @@ import com.semmle.js.parser.ParseError; import com.semmle.util.data.Pair; import com.semmle.util.trap.TrapWriter; import com.semmle.util.trap.TrapWriter.Label; +import com.semmle.js.extractor.ParseResultInfo; +import java.util.Collections; import java.util.List; /** Extractor for populating JSON files. */ @@ -31,12 +33,12 @@ public class JSONExtractor implements IExtractor { } @Override - public LoCInfo extract(final TextualExtractor textualExtractor) { + public ParseResultInfo extract(final TextualExtractor textualExtractor) { final TrapWriter trapwriter = textualExtractor.getTrapwriter(); final LocationManager locationManager = textualExtractor.getLocationManager(); try { String source = textualExtractor.getSource(); - Pair> res = new JSONParser().parseValue(source); + Pair> res = JSONParser.parseValue(source); JSONValue v = res.fst(); List recoverableErrors = res.snd(); if (!recoverableErrors.isEmpty() && !tolerateParseErrors) @@ -90,13 +92,14 @@ public class JSONExtractor implements IExtractor { for (ParseError e : recoverableErrors) populateError(textualExtractor, trapwriter, locationManager, e); + + return new ParseResultInfo(0, 0, recoverableErrors); } catch (ParseError e) { if (!this.tolerateParseErrors) throw e.asUserError(); populateError(textualExtractor, trapwriter, locationManager, e); + return new ParseResultInfo(0, 0, Collections.emptyList()); } - - return new LoCInfo(0, 0); } private void populateError( diff --git a/javascript/extractor/src/com/semmle/js/extractor/LexicalExtractor.java b/javascript/extractor/src/com/semmle/js/extractor/LexicalExtractor.java index 4e40160b1e7..3ad52ead5ad 100644 --- a/javascript/extractor/src/com/semmle/js/extractor/LexicalExtractor.java +++ b/javascript/extractor/src/com/semmle/js/extractor/LexicalExtractor.java @@ -1,5 +1,6 @@ package com.semmle.js.extractor; +import java.util.Collections; import java.util.List; import com.semmle.js.ast.Comment; @@ -50,10 +51,10 @@ public class LexicalExtractor { return textualExtractor.getMetrics(); } - public LoCInfo extractLines(String src, Label toplevelKey) { + public ParseResultInfo extractLines(String src, Label toplevelKey) { textualExtractor.getMetrics().startPhase(ExtractionPhase.LexicalExtractor_extractLines); Position end = textualExtractor.extractLines(src, toplevelKey); - LoCInfo info = emitNumlines(toplevelKey, new Position(1, 0, 0), end); + ParseResultInfo info = emitNumlines(toplevelKey, new Position(1, 0, 0), end); textualExtractor.getMetrics().stopPhase(ExtractionPhase.LexicalExtractor_extractLines); return info; } @@ -65,7 +66,7 @@ public class LexicalExtractor { * @param start the start position of the node * @param end the end position of the node */ - public LoCInfo emitNumlines(Label key, Position start, Position end) { + public ParseResultInfo emitNumlines(Label key, Position start, Position end) { int num_code = 0, num_comment = 0, num_lines = end.getLine() - start.getLine() + 1; if (tokens != null && comments != null) { @@ -104,7 +105,7 @@ public class LexicalExtractor { } trapwriter.addTuple("numlines", key, num_lines, num_code, num_comment); - return new LoCInfo(num_code, num_comment); + return new ParseResultInfo(num_code, num_comment, Collections.emptyList()); } private int findNode(List ts, Position start) { diff --git a/javascript/extractor/src/com/semmle/js/extractor/LoCInfo.java b/javascript/extractor/src/com/semmle/js/extractor/LoCInfo.java deleted file mode 100644 index 56e9b36d522..00000000000 --- a/javascript/extractor/src/com/semmle/js/extractor/LoCInfo.java +++ /dev/null @@ -1,27 +0,0 @@ -package com.semmle.js.extractor; - -/** - * Utility class for representing LoC information; really just a glorified - * Pair<Integer, Integer>. - */ -public class LoCInfo { - private int linesOfCode, linesOfComments; - - public LoCInfo(int linesOfCode, int linesOfComments) { - this.linesOfCode = linesOfCode; - this.linesOfComments = linesOfComments; - } - - public void add(LoCInfo that) { - this.linesOfCode += that.linesOfCode; - this.linesOfComments += that.linesOfComments; - } - - public int getLinesOfCode() { - return linesOfCode; - } - - public int getLinesOfComments() { - return linesOfComments; - } -} diff --git a/javascript/extractor/src/com/semmle/js/extractor/Main.java b/javascript/extractor/src/com/semmle/js/extractor/Main.java index a90711545a5..ef712e171ce 100644 --- a/javascript/extractor/src/com/semmle/js/extractor/Main.java +++ b/javascript/extractor/src/com/semmle/js/extractor/Main.java @@ -41,7 +41,7 @@ public class Main { * A version identifier that should be updated every time the extractor changes in such a way that * it may produce different tuples for the same file under the same {@link ExtractorConfig}. */ - public static final String EXTRACTOR_VERSION = "2023-02-15"; + public static final String EXTRACTOR_VERSION = "2023-03-03"; public static final Pattern NEWLINE = Pattern.compile("\n"); diff --git a/javascript/extractor/src/com/semmle/js/extractor/ParseResultInfo.java b/javascript/extractor/src/com/semmle/js/extractor/ParseResultInfo.java new file mode 100644 index 00000000000..6a1b14447ce --- /dev/null +++ b/javascript/extractor/src/com/semmle/js/extractor/ParseResultInfo.java @@ -0,0 +1,44 @@ +package com.semmle.js.extractor; +import com.semmle.js.parser.ParseError; +import java.util.ArrayList; +import java.util.List; + +/** + * Utility class for representing LoC information and parse errors from running a parser. + * Just a glorified 3-tuple for lines of code, lines of comments, and parse errors. + */ +public class ParseResultInfo { + private int linesOfCode, linesOfComments; + private List parseErrors; + + public ParseResultInfo(int linesOfCode, int linesOfComments, List parseErrors) { + this.linesOfCode = linesOfCode; + this.linesOfComments = linesOfComments; + this.parseErrors = new ArrayList<>(parseErrors); + } + + public void add(ParseResultInfo that) { + this.linesOfCode += that.linesOfCode; + this.linesOfComments += that.linesOfComments; + } + + public void addParseError(ParseError err) { + this.parseErrors.add(err); + } + + public void addParseErrors(List errs) { + this.parseErrors.addAll(errs); + } + + public int getLinesOfCode() { + return linesOfCode; + } + + public int getLinesOfComments() { + return linesOfComments; + } + + public List getParseErrors() { + return parseErrors; + } +} diff --git a/javascript/extractor/src/com/semmle/js/extractor/ScriptExtractor.java b/javascript/extractor/src/com/semmle/js/extractor/ScriptExtractor.java index b4b47a786f9..7c539d70e63 100644 --- a/javascript/extractor/src/com/semmle/js/extractor/ScriptExtractor.java +++ b/javascript/extractor/src/com/semmle/js/extractor/ScriptExtractor.java @@ -39,7 +39,7 @@ public class ScriptExtractor implements IExtractor { } @Override - public LoCInfo extract(TextualExtractor textualExtractor) { + public ParseResultInfo extract(TextualExtractor textualExtractor) { LocationManager locationManager = textualExtractor.getLocationManager(); String source = textualExtractor.getSource(); String shebangLine = null, shebangLineTerm = null; @@ -79,9 +79,9 @@ public class ScriptExtractor implements IExtractor { ScopeManager scopeManager = new ScopeManager(textualExtractor.getTrapwriter(), config.getEcmaVersion(), ScopeManager.FileKind.PLAIN); Label toplevelLabel = null; - LoCInfo loc; + ParseResultInfo loc; try { - Pair res = + Pair res = new JSExtractor(config).extract(textualExtractor, source, TopLevelKind.SCRIPT, scopeManager); toplevelLabel = res.fst(); loc = res.snd(); diff --git a/javascript/extractor/src/com/semmle/js/extractor/TypeScriptExtractor.java b/javascript/extractor/src/com/semmle/js/extractor/TypeScriptExtractor.java index 623c6ec7fc8..f8ef8af2642 100644 --- a/javascript/extractor/src/com/semmle/js/extractor/TypeScriptExtractor.java +++ b/javascript/extractor/src/com/semmle/js/extractor/TypeScriptExtractor.java @@ -17,7 +17,7 @@ public class TypeScriptExtractor implements IExtractor { } @Override - public LoCInfo extract(TextualExtractor textualExtractor) { + public ParseResultInfo extract(TextualExtractor textualExtractor) { LocationManager locationManager = textualExtractor.getLocationManager(); String source = textualExtractor.getSource(); File sourceFile = textualExtractor.getExtractedFile(); diff --git a/javascript/extractor/src/com/semmle/js/extractor/YAMLExtractor.java b/javascript/extractor/src/com/semmle/js/extractor/YAMLExtractor.java index 2ae0448db4a..2f98edfe964 100644 --- a/javascript/extractor/src/com/semmle/js/extractor/YAMLExtractor.java +++ b/javascript/extractor/src/com/semmle/js/extractor/YAMLExtractor.java @@ -7,6 +7,9 @@ import com.semmle.util.locations.LineTable; import com.semmle.util.trap.TrapWriter; import com.semmle.util.trap.TrapWriter.Label; import com.semmle.util.trap.TrapWriter.Table; + +import java.util.Collections; + import org.yaml.snakeyaml.composer.Composer; import org.yaml.snakeyaml.error.Mark; import org.yaml.snakeyaml.error.MarkedYAMLException; @@ -104,7 +107,7 @@ public class YAMLExtractor implements IExtractor { } @Override - public LoCInfo extract(TextualExtractor textualExtractor) { + public ParseResultInfo extract(TextualExtractor textualExtractor) { this.textualExtractor = textualExtractor; locationManager = textualExtractor.getLocationManager(); trapWriter = textualExtractor.getTrapwriter(); @@ -137,7 +140,7 @@ public class YAMLExtractor implements IExtractor { // ReaderExceptions } - return new LoCInfo(0, 0); + return new ParseResultInfo(0, 0, Collections.emptyList()); } /** Check whether the parser has encountered the end of the YAML input stream. */ diff --git a/javascript/extractor/src/com/semmle/js/extractor/test/NodeJSDetectorTests.java b/javascript/extractor/src/com/semmle/js/extractor/test/NodeJSDetectorTests.java index 14d5b323e7c..fe709033cbe 100644 --- a/javascript/extractor/src/com/semmle/js/extractor/test/NodeJSDetectorTests.java +++ b/javascript/extractor/src/com/semmle/js/extractor/test/NodeJSDetectorTests.java @@ -1,5 +1,7 @@ package com.semmle.js.extractor.test; +import java.io.File; + import com.semmle.js.ast.Node; import com.semmle.js.extractor.ExtractionMetrics; import com.semmle.js.extractor.ExtractorConfig; diff --git a/javascript/extractor/src/com/semmle/js/parser/JSONParser.java b/javascript/extractor/src/com/semmle/js/parser/JSONParser.java index 8ff774ec3f7..2e7f7a96b5f 100644 --- a/javascript/extractor/src/com/semmle/js/parser/JSONParser.java +++ b/javascript/extractor/src/com/semmle/js/parser/JSONParser.java @@ -34,21 +34,25 @@ public class JSONParser { private String src; private List recoverableErrors; - public Pair> parseValue(String json) throws ParseError { - line = 1; - column = 0; - offset = 0; - recoverableErrors = new ArrayList(); + public static Pair> parseValue(String json) throws ParseError { + JSONParser parser = new JSONParser(json); + + JSONValue value = parser.readValue(); + parser.consumeWhitespace(); + if (parser.offset < parser.length) parser.raise("Expected end of input"); + + return Pair.make(value, parser.recoverableErrors); + } + + private JSONParser(String json) throws ParseError { + this.line = 1; + this.column = 0; + this.offset = 0; + this.recoverableErrors = new ArrayList(); if (json == null) raise("Input string may not be null"); - length = json.length(); - src = json; - - JSONValue value = readValue(); - consumeWhitespace(); - if (offset < length) raise("Expected end of input"); - - return Pair.make(value, recoverableErrors); + this.length = json.length(); + this.src = json; } private T raise(String msg) throws ParseError { @@ -385,7 +389,6 @@ public class JSONParser { } public static void main(String[] args) throws ParseError { - JSONParser parser = new JSONParser(); - System.out.println(parser.parseValue(new WholeIO().strictread(new File(args[0]))).fst()); + System.out.println(JSONParser.parseValue(new WholeIO().strictread(new File(args[0]))).fst()); } } diff --git a/javascript/extractor/src/com/semmle/ts/extractor/TypeScriptParser.java b/javascript/extractor/src/com/semmle/ts/extractor/TypeScriptParser.java index 70a06656df8..f526b4f1b1a 100644 --- a/javascript/extractor/src/com/semmle/ts/extractor/TypeScriptParser.java +++ b/javascript/extractor/src/com/semmle/ts/extractor/TypeScriptParser.java @@ -409,7 +409,8 @@ public class TypeScriptParser { exitCode = parserWrapperProcess.waitFor(); } if (exitCode != null && (exitCode == NODEJS_EXIT_CODE_FATAL_ERROR || exitCode == NODEJS_EXIT_CODE_SIG_ABORT)) { - return new ResourceError("The TypeScript parser wrapper crashed, possibly from running out of memory.", e); + // this is caught in the auto-builder, and handled as an OOM. Check there is the message is changed. + return new TypeScriptWrapperOOMError("The TypeScript parser wrapper crashed, possibly from running out of memory.", e); } if (exitCode != null) { return new CatastrophicError("The TypeScript parser wrapper crashed with exit code " + exitCode); diff --git a/javascript/extractor/src/com/semmle/ts/extractor/TypeScriptWrapperOOMError.java b/javascript/extractor/src/com/semmle/ts/extractor/TypeScriptWrapperOOMError.java new file mode 100644 index 00000000000..67f2d5a0763 --- /dev/null +++ b/javascript/extractor/src/com/semmle/ts/extractor/TypeScriptWrapperOOMError.java @@ -0,0 +1,9 @@ +package com.semmle.ts.extractor; + +import com.semmle.util.exception.ResourceError; + +public class TypeScriptWrapperOOMError extends ResourceError { + public TypeScriptWrapperOOMError(String message, Throwable throwable) { + super(message,throwable); + } +}