Files
codeql/javascript/extractor/src/com/semmle/js/parser/JSONParser.java
2023-03-02 14:54:54 +01:00

395 lines
11 KiB
Java

/*
* Based on org.mozilla.javascript.json.JsonParser from Rhino.
*
* Original licensing information:
*
* This Source Code Form is subject to the terms of the Mozilla Public
* License, v. 2.0. If a copy of the MPL was not distributed with this
* file, You can obtain one at http://mozilla.org/MPL/2.0/.
*/
package com.semmle.js.parser;
import com.semmle.js.ast.Position;
import com.semmle.js.ast.SourceLocation;
import com.semmle.js.ast.json.JSONArray;
import com.semmle.js.ast.json.JSONLiteral;
import com.semmle.js.ast.json.JSONObject;
import com.semmle.js.ast.json.JSONValue;
import com.semmle.util.data.Pair;
import com.semmle.util.exception.Exceptions;
import com.semmle.util.io.WholeIO;
import java.io.File;
import java.util.ArrayList;
import java.util.List;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
public class JSONParser {
public static final Pattern JSON_LINE_ENDING = Pattern.compile("(\r\n|\n|\r)");
private int line, column;
private int offset;
private int length;
private String src;
private List<ParseError> recoverableErrors;
public static Pair<JSONValue, List<ParseError>> parseValue(String json) throws ParseError {
JSONParser parser = new JSONParser(json);
JSONValue value = parser.readValue();
parser.consumeWhitespace();
if (parser.offset < parser.length) parser.raise("Expected end of input");
return Pair.make(value, parser.recoverableErrors);
}
private JSONParser(String json) throws ParseError {
this.line = 1;
this.column = 0;
this.offset = 0;
this.recoverableErrors = new ArrayList<ParseError>();
if (json == null) raise("Input string may not be null");
this.length = json.length();
this.src = json;
}
private <T> T raise(String msg) throws ParseError {
throw new ParseError(msg, line, column - 1, offset);
}
private char next() throws ParseError {
if (offset >= length) raise("Unexpected end of input");
char c = src.charAt(offset++);
if (c == '\r') {
if (offset < length && src.charAt(offset) == '\n') {
++column;
} else {
++line;
column = 0;
}
} else if (c == '\n') {
++line;
column = 0;
} else {
++column;
}
return c;
}
private char peek() {
return offset < length ? src.charAt(offset) : (char) -1;
}
private JSONValue readValue() throws ParseError {
consumeWhitespace();
while (offset < length) {
int startoff = offset;
Position start = getCurPos();
char c = next();
switch (c) {
case '{':
return readObject(startoff, start);
case '[':
return readArray(startoff, start);
case 't':
consume("rue");
return mkLiteral(startoff, start, true);
case 'f':
consume("alse");
return mkLiteral(startoff, start, false);
case '"':
return mkLiteral(startoff, start, readString());
case 'n':
consume("ull");
return mkLiteral(startoff, start, null);
case '1':
case '2':
case '3':
case '4':
case '5':
case '6':
case '7':
case '8':
case '9':
case '0':
case '-':
return mkLiteral(startoff, start, readNumber());
default:
raise("Unexpected token");
}
}
return raise("Unexpected token");
}
private Position getCurPos() {
return new Position(line, column, offset);
}
private JSONLiteral mkLiteral(int startoff, Position start, Object value) {
int endoff = offset;
Position end = getCurPos();
return new JSONLiteral(new SourceLocation(src.substring(startoff, endoff), start, end), value);
}
private JSONObject readObject(int startoff, Position start) throws ParseError {
List<Pair<String, JSONValue>> properties = new ArrayList<Pair<String, JSONValue>>();
int endoff;
Position end;
consumeWhitespace();
// handle empty object literal case early
out:
if (peek() == '}') {
next();
} else {
String id;
JSONValue value;
boolean needsComma = false;
while (offset < length) {
char c = next();
switch (c) {
case '}':
if (!needsComma) {
raise("Trailing commas are not allowed in JSON.");
}
break out;
case ',':
if (!needsComma) {
raise("Unexpected comma in object literal");
}
needsComma = false;
break;
case '"':
if (needsComma) {
raise("Missing comma in object literal");
}
id = readString();
consumeWhitespace();
consume(':');
value = readValue();
properties.add(Pair.make(id, value));
needsComma = true;
break;
default:
raise("JSON object property keys must be string literals.");
}
consumeWhitespace();
}
++column;
raise("Unexpected token");
}
endoff = offset;
end = getCurPos();
return new JSONObject(
new SourceLocation(src.substring(startoff, endoff), start, end), properties);
}
private JSONArray readArray(int startoff, Position start) throws ParseError {
List<JSONValue> elements = new ArrayList<JSONValue>();
int endoff;
Position end;
consumeWhitespace();
// handle empty array literal case early
out:
if (peek() == ']') {
next();
} else {
boolean needsComma = false;
while (offset < length) {
char c = peek();
switch (c) {
case ']':
if (!needsComma) {
raise("Omitted elements are not allowed in JSON.");
}
next();
break out;
case ',':
if (!needsComma) {
next();
raise("Omitted elements are not allowed in JSON.");
}
needsComma = false;
next();
break;
default:
if (needsComma) {
raise("Missing comma in array literal");
}
elements.add(readValue());
needsComma = true;
}
consumeWhitespace();
}
raise("Unterminated array literal");
}
endoff = offset;
end = getCurPos();
return new JSONArray(new SourceLocation(src.substring(startoff, endoff), start, end), elements);
}
private static final String ESCAPES = "\"\"\\\\//b\bn\nf\fr\rt\t";
private String readString() throws ParseError {
/*
* Optimization: if the source contains no escaped characters, create the
* string directly from the source text.
*/
int stringStart = offset;
while (offset < length) {
char c = next();
if (c <= '\u001F') {
raise("String contains control character");
} else if (c == '\\') {
break;
} else if (c == '"') {
return src.substring(stringStart, offset - 1);
}
}
/*
* Slow case: string contains escaped characters. Copy a maximal sequence
* of unescaped characters into a temporary buffer, then an escaped
* character, and repeat until the entire string is consumed.
*/
StringBuilder b = new StringBuilder();
while (offset < length) {
b.append(src, stringStart, offset - 1);
char c = next();
int i = ESCAPES.indexOf(c);
if (i >= 0) {
b.append(ESCAPES.charAt(i + 1));
} else if (c == 'u') {
try {
String esc = src.substring(offset, offset + 4);
int code = Integer.parseInt(esc, 16);
if (code < 0) throw new NumberFormatException();
b.append((char) code);
offset += 4;
column += 4;
} catch (NumberFormatException nfe) {
raise("Invalid character escape");
} catch (IndexOutOfBoundsException ioobe) {
Exceptions.ignore(ioobe, "Raise semantically more meaningful exception instead.");
raise("Invalid character escape");
}
} else {
raise("Unexpected character in string literal");
}
stringStart = offset;
while (offset < length) {
c = next();
if (c <= '\u001F') {
raise("String contains control character");
} else if (c == '\\') {
break;
} else if (c == '"') {
b.append(src, stringStart, offset - 1);
return b.toString();
}
}
}
return raise("Unterminated string literal");
}
private static final Pattern NUMBER =
Pattern.compile("-?(0|[1-9][0-9]*)(\\.[0-9]+)?([eE][-+]?[0-9]+)?");
private Number readNumber() throws ParseError {
Matcher m = NUMBER.matcher(src);
if (m.find(offset - 1)) {
try {
String matched = m.group();
// -1 because offset is already one past the start of the number
int l = matched.length() - 1;
Double d = Double.valueOf(matched);
offset += l;
column += l;
if (d.longValue() == d) return d.longValue();
return d;
} catch (NumberFormatException nfe) {
Exceptions.ignore(nfe, "A corresponding exception is raised below.");
}
}
return raise("Invalid number literal");
}
private void consumeWhitespace() throws ParseError {
while (offset < length) {
char c = peek();
switch (c) {
case ' ':
case '\t':
case '\r':
case '\n':
next();
break;
case '/':
if (offset + 1 < length) {
switch (src.charAt(offset + 1)) {
case '*':
skipBlockComment();
continue;
case '/':
skipLineComment();
continue;
}
}
default:
return;
}
}
}
/** Skips the line comment starting at the current position and records a recoverable error. */
private void skipLineComment() throws ParseError {
Position pos = new Position(line, column, offset);
char c;
next();
next();
while ((c = peek()) != '\r' && c != '\n' && c != -1) next();
recoverableErrors.add(new ParseError("Comments are not legal in JSON.", pos));
}
/** Skips the block comment starting at the current position and records a recoverable error. */
private void skipBlockComment() throws ParseError {
Position pos = new Position(line, column, offset);
char c;
next();
next();
do {
c = peek();
if (c < 0) raise("Unterminated comment.");
next();
if (c == '*' && peek() == '/') {
next();
break;
}
} while (true);
recoverableErrors.add(new ParseError("Comments are not legal in JSON.", pos));
}
private void consume(char token) throws ParseError {
char c = next();
if (c != token) raise("Expected " + token + " found " + c);
}
private void consume(String chars) throws ParseError {
for (int i = 0; i < chars.length(); ++i) consume(chars.charAt(i));
}
public static void main(String[] args) throws ParseError {
System.out.println(JSONParser.parseValue(new WholeIO().strictread(new File(args[0]))).fst());
}
}