/* * Based on org.mozilla.javascript.json.JsonParser from Rhino. * * Original licensing information: * * This Source Code Form is subject to the terms of the Mozilla Public * License, v. 2.0. If a copy of the MPL was not distributed with this * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ package com.semmle.js.parser; import com.semmle.js.ast.Position; import com.semmle.js.ast.SourceLocation; import com.semmle.js.ast.json.JSONArray; import com.semmle.js.ast.json.JSONLiteral; import com.semmle.js.ast.json.JSONObject; import com.semmle.js.ast.json.JSONValue; import com.semmle.util.data.Pair; import com.semmle.util.exception.Exceptions; import com.semmle.util.io.WholeIO; import java.io.File; import java.util.ArrayList; import java.util.List; import java.util.regex.Matcher; import java.util.regex.Pattern; public class JSONParser { public static final Pattern JSON_LINE_ENDING = Pattern.compile("(\r\n|\n|\r)"); private int line, column; private int offset; private int length; private String src; private List recoverableErrors; public static Pair> parseValue(String json) throws ParseError { JSONParser parser = new JSONParser(json); JSONValue value = parser.readValue(); parser.consumeWhitespace(); if (parser.offset < parser.length) parser.raise("Expected end of input"); return Pair.make(value, parser.recoverableErrors); } private JSONParser(String json) throws ParseError { this.line = 1; this.column = 0; this.offset = 0; this.recoverableErrors = new ArrayList(); if (json == null) raise("Input string may not be null"); this.length = json.length(); this.src = json; } private T raise(String msg) throws ParseError { throw new ParseError(msg, line, column - 1, offset); } private char next() throws ParseError { if (offset >= length) raise("Unexpected end of input"); char c = src.charAt(offset++); if (c == '\r') { if (offset < length && src.charAt(offset) == '\n') { ++column; } else { ++line; column = 0; } } else if (c == '\n') { ++line; column = 0; } else { ++column; } return c; } private char peek() { return offset < length ? src.charAt(offset) : (char) -1; } private JSONValue readValue() throws ParseError { consumeWhitespace(); while (offset < length) { int startoff = offset; Position start = getCurPos(); char c = next(); switch (c) { case '{': return readObject(startoff, start); case '[': return readArray(startoff, start); case 't': consume("rue"); return mkLiteral(startoff, start, true); case 'f': consume("alse"); return mkLiteral(startoff, start, false); case '"': return mkLiteral(startoff, start, readString()); case 'n': consume("ull"); return mkLiteral(startoff, start, null); case '1': case '2': case '3': case '4': case '5': case '6': case '7': case '8': case '9': case '0': case '-': return mkLiteral(startoff, start, readNumber()); default: raise("Unexpected token"); } } return raise("Unexpected token"); } private Position getCurPos() { return new Position(line, column, offset); } private JSONLiteral mkLiteral(int startoff, Position start, Object value) { int endoff = offset; Position end = getCurPos(); return new JSONLiteral(new SourceLocation(src.substring(startoff, endoff), start, end), value); } private JSONObject readObject(int startoff, Position start) throws ParseError { List> properties = new ArrayList>(); int endoff; Position end; consumeWhitespace(); // handle empty object literal case early out: if (peek() == '}') { next(); } else { String id; JSONValue value; boolean needsComma = false; while (offset < length) { char c = next(); switch (c) { case '}': if (!needsComma) { raise("Trailing commas are not allowed in JSON."); } break out; case ',': if (!needsComma) { raise("Unexpected comma in object literal"); } needsComma = false; break; case '"': if (needsComma) { raise("Missing comma in object literal"); } id = readString(); consumeWhitespace(); consume(':'); value = readValue(); properties.add(Pair.make(id, value)); needsComma = true; break; default: raise("JSON object property keys must be string literals."); } consumeWhitespace(); } ++column; raise("Unexpected token"); } endoff = offset; end = getCurPos(); return new JSONObject( new SourceLocation(src.substring(startoff, endoff), start, end), properties); } private JSONArray readArray(int startoff, Position start) throws ParseError { List elements = new ArrayList(); int endoff; Position end; consumeWhitespace(); // handle empty array literal case early out: if (peek() == ']') { next(); } else { boolean needsComma = false; while (offset < length) { char c = peek(); switch (c) { case ']': if (!needsComma) { raise("Omitted elements are not allowed in JSON."); } next(); break out; case ',': if (!needsComma) { next(); raise("Omitted elements are not allowed in JSON."); } needsComma = false; next(); break; default: if (needsComma) { raise("Missing comma in array literal"); } elements.add(readValue()); needsComma = true; } consumeWhitespace(); } raise("Unterminated array literal"); } endoff = offset; end = getCurPos(); return new JSONArray(new SourceLocation(src.substring(startoff, endoff), start, end), elements); } private static final String ESCAPES = "\"\"\\\\//b\bn\nf\fr\rt\t"; private String readString() throws ParseError { /* * Optimization: if the source contains no escaped characters, create the * string directly from the source text. */ int stringStart = offset; while (offset < length) { char c = next(); if (c <= '\u001F') { raise("String contains control character"); } else if (c == '\\') { break; } else if (c == '"') { return src.substring(stringStart, offset - 1); } } /* * Slow case: string contains escaped characters. Copy a maximal sequence * of unescaped characters into a temporary buffer, then an escaped * character, and repeat until the entire string is consumed. */ StringBuilder b = new StringBuilder(); while (offset < length) { b.append(src, stringStart, offset - 1); char c = next(); int i = ESCAPES.indexOf(c); if (i >= 0) { b.append(ESCAPES.charAt(i + 1)); } else if (c == 'u') { try { String esc = src.substring(offset, offset + 4); int code = Integer.parseInt(esc, 16); if (code < 0) throw new NumberFormatException(); b.append((char) code); offset += 4; column += 4; } catch (NumberFormatException nfe) { raise("Invalid character escape"); } catch (IndexOutOfBoundsException ioobe) { Exceptions.ignore(ioobe, "Raise semantically more meaningful exception instead."); raise("Invalid character escape"); } } else { raise("Unexpected character in string literal"); } stringStart = offset; while (offset < length) { c = next(); if (c <= '\u001F') { raise("String contains control character"); } else if (c == '\\') { break; } else if (c == '"') { b.append(src, stringStart, offset - 1); return b.toString(); } } } return raise("Unterminated string literal"); } private static final Pattern NUMBER = Pattern.compile("-?(0|[1-9][0-9]*)(\\.[0-9]+)?([eE][-+]?[0-9]+)?"); private Number readNumber() throws ParseError { Matcher m = NUMBER.matcher(src); if (m.find(offset - 1)) { try { String matched = m.group(); // -1 because offset is already one past the start of the number int l = matched.length() - 1; Double d = Double.valueOf(matched); offset += l; column += l; if (d.longValue() == d) return d.longValue(); return d; } catch (NumberFormatException nfe) { Exceptions.ignore(nfe, "A corresponding exception is raised below."); } } return raise("Invalid number literal"); } private void consumeWhitespace() throws ParseError { while (offset < length) { char c = peek(); switch (c) { case ' ': case '\t': case '\r': case '\n': next(); break; case '/': if (offset + 1 < length) { switch (src.charAt(offset + 1)) { case '*': skipBlockComment(); continue; case '/': skipLineComment(); continue; } } default: return; } } } /** Skips the line comment starting at the current position and records a recoverable error. */ private void skipLineComment() throws ParseError { Position pos = new Position(line, column, offset); char c; next(); next(); while ((c = peek()) != '\r' && c != '\n' && c != -1) next(); recoverableErrors.add(new ParseError("Comments are not legal in JSON.", pos)); } /** Skips the block comment starting at the current position and records a recoverable error. */ private void skipBlockComment() throws ParseError { Position pos = new Position(line, column, offset); char c; next(); next(); do { c = peek(); if (c < 0) raise("Unterminated comment."); next(); if (c == '*' && peek() == '/') { next(); break; } } while (true); recoverableErrors.add(new ParseError("Comments are not legal in JSON.", pos)); } private void consume(char token) throws ParseError { char c = next(); if (c != token) raise("Expected " + token + " found " + c); } private void consume(String chars) throws ParseError { for (int i = 0; i < chars.length(); ++i) consume(chars.charAt(i)); } public static void main(String[] args) throws ParseError { System.out.println(JSONParser.parseValue(new WholeIO().strictread(new File(args[0]))).fst()); } }