mirror of
https://github.com/github/codeql.git
synced 2026-04-30 03:05:15 +02:00
JavaScript: Port regular expression parser to Java.
This commit is contained in:
@@ -7,6 +7,20 @@ import com.semmle.js.ast.SourceLocation;
|
||||
* An error encountered while parsing a regular expression.
|
||||
*/
|
||||
public class Error extends SourceElement {
|
||||
public static final int UNEXPECTED_EOS = 0;
|
||||
public static final int UNEXPECTED_CHARACTER = 1;
|
||||
public static final int EXPECTED_DIGIT = 2;
|
||||
public static final int EXPECTED_HEX_DIGIT = 3;
|
||||
public static final int EXPECTED_CONTROL_LETTER = 4;
|
||||
public static final int EXPECTED_CLOSING_PAREN = 5;
|
||||
public static final int EXPECTED_CLOSING_BRACE = 6;
|
||||
public static final int EXPECTED_EOS = 7;
|
||||
public static final int OCTAL_ESCAPE = 8;
|
||||
public static final int INVALID_BACKREF = 9;
|
||||
public static final int EXPECTED_RBRACKET = 10;
|
||||
public static final int EXPECTED_IDENTIFIER = 11;
|
||||
public static final int EXPECTED_CLOSING_ANGLE = 12;
|
||||
|
||||
private final int code;
|
||||
|
||||
public Error(SourceLocation loc, Number code) {
|
||||
|
||||
@@ -1,17 +1,9 @@
|
||||
package com.semmle.js.parser;
|
||||
|
||||
import java.util.ArrayList;
|
||||
import java.util.Arrays;
|
||||
import java.util.Collections;
|
||||
import java.util.LinkedHashMap;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
|
||||
import org.mozilla.javascript.Function;
|
||||
import org.mozilla.javascript.NativeArray;
|
||||
import org.mozilla.javascript.NativeObject;
|
||||
import org.mozilla.javascript.ScriptableObject;
|
||||
|
||||
import com.semmle.js.ast.Position;
|
||||
import com.semmle.js.ast.SourceLocation;
|
||||
import com.semmle.js.ast.regexp.BackReference;
|
||||
import com.semmle.js.ast.regexp.Caret;
|
||||
@@ -47,54 +39,9 @@ import com.semmle.js.ast.regexp.ZeroWidthPositiveLookahead;
|
||||
import com.semmle.js.ast.regexp.ZeroWidthPositiveLookbehind;
|
||||
|
||||
/**
|
||||
* Wrapper for invoking esregex through Rhino.
|
||||
* A parser for ECMAScript 2018 regular expressions.
|
||||
*/
|
||||
public class RegExpParser extends ScriptLoader {
|
||||
/**
|
||||
* Specification for esregex AST types.
|
||||
*/
|
||||
private static final Map<Class<? extends RegExpTerm>, List<String>> spec = new LinkedHashMap<Class<? extends RegExpTerm>, List<String>>();
|
||||
static {
|
||||
spec.put(BackReference.class, Arrays.asList("value", "raw"));
|
||||
spec.put(Caret.class, Collections.<String>emptyList());
|
||||
spec.put(CharacterClass.class, Arrays.asList("elements", "inverted"));
|
||||
spec.put(CharacterClassEscape.class, Arrays.asList("class", "raw"));
|
||||
spec.put(CharacterClassRange.class, Arrays.asList("left", "right"));
|
||||
spec.put(Constant.class, Arrays.asList("value"));
|
||||
spec.put(ControlEscape.class, Arrays.asList("value", "codepoint", "raw"));
|
||||
spec.put(ControlLetter.class, Arrays.asList("value", "codepoint", "raw"));
|
||||
spec.put(DecimalEscape.class, Arrays.asList("value", "codepoint", "raw"));
|
||||
spec.put(Disjunction.class, Arrays.asList("disjuncts"));
|
||||
spec.put(Dollar.class, Collections.<String>emptyList());
|
||||
spec.put(Dot.class, Collections.<String>emptyList());
|
||||
spec.put(Group.class, Arrays.asList("capture", "number", "name", "operand"));
|
||||
spec.put(HexEscapeSequence.class, Arrays.asList("value", "codepoint", "raw"));
|
||||
spec.put(IdentityEscape.class, Arrays.asList("value", "codepoint", "raw"));
|
||||
spec.put(NamedBackReference.class, Arrays.asList("name", "raw"));
|
||||
spec.put(NonWordBoundary.class, Collections.<String>emptyList());
|
||||
spec.put(OctalEscape.class, Arrays.asList("value", "codepoint", "raw"));
|
||||
spec.put(Opt.class, Arrays.asList("operand", "greedy"));
|
||||
spec.put(Plus.class, Arrays.asList("operand", "greedy"));
|
||||
spec.put(Range.class, Arrays.asList("operand", "greedy", "lo", "hi"));
|
||||
spec.put(Sequence.class, Arrays.asList("elements"));
|
||||
spec.put(Star.class, Arrays.asList("operand", "greedy"));
|
||||
spec.put(UnicodeEscapeSequence.class, Arrays.asList("value", "codepoint", "raw"));
|
||||
spec.put(WordBoundary.class, Collections.<String>emptyList());
|
||||
spec.put(ZeroWidthNegativeLookahead.class, Arrays.asList("operand"));
|
||||
spec.put(ZeroWidthPositiveLookahead.class, Arrays.asList("operand"));
|
||||
spec.put(ZeroWidthNegativeLookbehind.class, Arrays.asList("operand"));
|
||||
spec.put(ZeroWidthPositiveLookbehind.class, Arrays.asList("operand"));
|
||||
spec.put(UnicodePropertyEscape.class, Arrays.asList("name", "value", "raw"));
|
||||
}
|
||||
|
||||
/**
|
||||
* Specification for esregex parse errors.
|
||||
*/
|
||||
private static final Map<Class<? extends Error>, List<String>> errspec = new LinkedHashMap<Class<? extends Error>, List<String>>();
|
||||
static {
|
||||
errspec.put(Error.class, Arrays.asList("code"));
|
||||
}
|
||||
|
||||
public class RegExpParser {
|
||||
/**
|
||||
* The result of a parse.
|
||||
*/
|
||||
@@ -102,12 +49,12 @@ public class RegExpParser extends ScriptLoader {
|
||||
/**
|
||||
* The root of the parsed AST.
|
||||
*/
|
||||
private final RegExpTerm ast;
|
||||
public final RegExpTerm ast;
|
||||
|
||||
/**
|
||||
* A list of errors encountered during parsing.
|
||||
*/
|
||||
private final List<Error> errors;
|
||||
public final List<Error> errors;
|
||||
|
||||
public Result(RegExpTerm ast, List<Error> errors) {
|
||||
this.ast = ast;
|
||||
@@ -123,28 +70,450 @@ public class RegExpParser extends ScriptLoader {
|
||||
}
|
||||
}
|
||||
|
||||
public RegExpParser() {
|
||||
super("/regexparser.js");
|
||||
}
|
||||
private String src;
|
||||
private int pos;
|
||||
private List<Error> errors;
|
||||
private List<BackReference> backrefs;
|
||||
private int maxbackref;
|
||||
|
||||
/**
|
||||
* Parse the given string as a regular expression.
|
||||
*/
|
||||
public Result parse(String src) {
|
||||
Function ctor = (Function)readGlobal("RegExpParser");
|
||||
ScriptableObject parser = construct(ctor, src);
|
||||
NativeObject ast = (NativeObject)callMethod(parser, "Pattern");
|
||||
NativeArray errors = (NativeArray)readProperty(parser, "errors");
|
||||
JSObjectDecoder<RegExpTerm> decoder = new JSObjectDecoder<RegExpTerm>(src, this, "com.semmle.js.ast.regexp", spec);
|
||||
List<Error> errs = null;
|
||||
RegExpTerm term = null;
|
||||
try {
|
||||
term = decoder.decodeObject(ast);
|
||||
errs = new JSObjectDecoder<Error>(src, this, "com.semmle.js.ast.regexp", errspec).decodeObjects(errors);
|
||||
} catch (ParseError e) {
|
||||
errs = new ArrayList<Error>();
|
||||
errs.add(new Error(new SourceLocation("", e.getPosition(), e.getPosition()), 1));
|
||||
this.src = src;
|
||||
this.pos = 0;
|
||||
this.errors = new ArrayList<>();
|
||||
this.backrefs = new ArrayList<>();
|
||||
this.maxbackref = 0;
|
||||
RegExpTerm root = parsePattern();
|
||||
for (BackReference backref : backrefs)
|
||||
if (backref.getValue() > maxbackref)
|
||||
errors.add(new Error(backref.getLoc(), Error.INVALID_BACKREF));
|
||||
return new Result(root, errors);
|
||||
}
|
||||
|
||||
private static String fromCodePoint(int codepoint) {
|
||||
if (Character.isValidCodePoint(codepoint))
|
||||
return new String(Character.toChars(codepoint));
|
||||
// replacement character
|
||||
return "\ufffd";
|
||||
}
|
||||
|
||||
private Position pos() {
|
||||
return new Position(1, pos, pos);
|
||||
}
|
||||
|
||||
private void error(int code, int start, int end) {
|
||||
Position startPos, endPos;
|
||||
startPos = new Position(1, start, start);
|
||||
endPos = new Position(1, end, end);
|
||||
this.errors.add(new Error(new SourceLocation(inputSubstring(start, end), startPos, endPos), code));
|
||||
}
|
||||
|
||||
private void error(int code, int start) {
|
||||
error(code, start, start+1);
|
||||
}
|
||||
|
||||
private void error(int code) {
|
||||
error(code, this.pos);
|
||||
}
|
||||
|
||||
private boolean atEOS() {
|
||||
return pos >= src.length();
|
||||
}
|
||||
|
||||
private char peekChar(boolean opt) {
|
||||
if (this.atEOS()) {
|
||||
if (!opt)
|
||||
this.error(Error.UNEXPECTED_EOS);
|
||||
return '\0';
|
||||
} else {
|
||||
return this.src.charAt(this.pos);
|
||||
}
|
||||
return new Result(term, errs);
|
||||
}
|
||||
|
||||
private char nextChar() {
|
||||
char c = peekChar(false);
|
||||
if (this.pos < src.length())
|
||||
++this.pos;
|
||||
return c;
|
||||
}
|
||||
|
||||
private String readHexDigit() {
|
||||
char c = this.peekChar(false);
|
||||
if (c >= '0' && c <= '9' || c >= 'a' && c <= 'f' || c >= 'A' && c <= 'F') {
|
||||
++this.pos;
|
||||
return String.valueOf(c);
|
||||
}
|
||||
if (c != '\0')
|
||||
this.error(Error.EXPECTED_HEX_DIGIT, this.pos);
|
||||
return "";
|
||||
}
|
||||
|
||||
private String readHexDigits(int n) {
|
||||
StringBuilder res = new StringBuilder();
|
||||
while (n-->0) {
|
||||
res.append(readHexDigit());
|
||||
}
|
||||
if (res.length() == 0)
|
||||
return "0";
|
||||
return res.toString();
|
||||
}
|
||||
|
||||
private String readDigits(boolean opt) {
|
||||
StringBuilder res = new StringBuilder();
|
||||
for (char c=peekChar(true); c >= '0' && c <= '9'; nextChar(), c=peekChar(true))
|
||||
res.append(c);
|
||||
if (res.length() == 0 && !opt)
|
||||
this.error(Error.EXPECTED_DIGIT);
|
||||
return res.toString();
|
||||
}
|
||||
|
||||
private Double toNumber(String s) {
|
||||
if (s.isEmpty())
|
||||
return 0.0;
|
||||
return Double.valueOf(s);
|
||||
}
|
||||
|
||||
private String readIdentifier() {
|
||||
StringBuilder res = new StringBuilder();
|
||||
for (char c=peekChar(true);
|
||||
c != '\0' && Character.isJavaIdentifierPart(c);
|
||||
nextChar(), c=peekChar(true))
|
||||
res.append(c);
|
||||
if (res.length() == 0)
|
||||
this.error(Error.EXPECTED_IDENTIFIER);
|
||||
return res.toString();
|
||||
}
|
||||
|
||||
private void expectRParen() {
|
||||
if (!this.match(")"))
|
||||
this.error(Error.EXPECTED_CLOSING_PAREN, this.pos-1);
|
||||
}
|
||||
|
||||
private void expectRBrace() {
|
||||
if (!this.match("}"))
|
||||
this.error(Error.EXPECTED_CLOSING_BRACE, this.pos-1);
|
||||
}
|
||||
|
||||
private void expectRAngle() {
|
||||
if (!this.match(">"))
|
||||
this.error(Error.EXPECTED_CLOSING_ANGLE, this.pos-1);
|
||||
}
|
||||
|
||||
private boolean lookahead(String... arguments) {
|
||||
for (String prefix : arguments) {
|
||||
if (prefix == null) {
|
||||
if (atEOS())
|
||||
return true;
|
||||
} else if (inputSubstring(pos, pos+prefix.length()).equals(prefix)) {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
private boolean match(String... arguments) {
|
||||
for (String prefix : arguments) {
|
||||
if (this.lookahead(prefix)) {
|
||||
if (prefix == null)
|
||||
prefix = "";
|
||||
this.pos += prefix.length();
|
||||
return true;
|
||||
}
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
private RegExpTerm parsePattern() {
|
||||
RegExpTerm res = parseDisjunction();
|
||||
if (!this.atEOS())
|
||||
this.error(Error.EXPECTED_EOS);
|
||||
return res;
|
||||
}
|
||||
|
||||
protected String inputSubstring(int start, int end) {
|
||||
if (start >= src.length())
|
||||
return "";
|
||||
if (end > src.length())
|
||||
end = src.length();
|
||||
return src.substring(start, end);
|
||||
}
|
||||
|
||||
private <T extends RegExpTerm> T finishTerm(T term) {
|
||||
SourceLocation loc = term.getLoc();
|
||||
Position end = pos();
|
||||
loc.setSource(inputSubstring(loc.getStart().getOffset(), end.getOffset()));
|
||||
loc.setEnd(end);
|
||||
return term;
|
||||
}
|
||||
|
||||
private RegExpTerm parseDisjunction() {
|
||||
SourceLocation loc = new SourceLocation(pos());
|
||||
List<RegExpTerm> disjuncts = new ArrayList<>();
|
||||
disjuncts.add(this.parseAlternative());
|
||||
while (this.match("|"))
|
||||
disjuncts.add(this.parseAlternative());
|
||||
if (disjuncts.size() == 1)
|
||||
return disjuncts.get(0);
|
||||
return this.finishTerm(new Disjunction(loc, disjuncts));
|
||||
}
|
||||
|
||||
private RegExpTerm parseAlternative() {
|
||||
SourceLocation loc = new SourceLocation(pos());
|
||||
List<RegExpTerm> elements = new ArrayList<>();
|
||||
while (!this.lookahead(null, "|", ")"))
|
||||
elements.add(this.parseTerm());
|
||||
if (elements.size() == 1)
|
||||
return elements.get(0);
|
||||
return this.finishTerm(new Sequence(loc, elements));
|
||||
}
|
||||
|
||||
private RegExpTerm parseTerm() {
|
||||
SourceLocation loc = new SourceLocation(pos());
|
||||
|
||||
if (this.match("^"))
|
||||
return this.finishTerm(new Caret(loc));
|
||||
|
||||
if (this.match("$"))
|
||||
return this.finishTerm(new Dollar(loc));
|
||||
|
||||
if (this.match("\\b"))
|
||||
return this.finishTerm(new WordBoundary(loc));
|
||||
|
||||
if (this.match("\\B"))
|
||||
return this.finishTerm(new NonWordBoundary(loc));
|
||||
|
||||
if (this.match("(?=")) {
|
||||
RegExpTerm dis = this.parseDisjunction();
|
||||
this.expectRParen();
|
||||
return this.finishTerm(new ZeroWidthPositiveLookahead(loc, dis));
|
||||
}
|
||||
|
||||
if (this.match("(?!")) {
|
||||
RegExpTerm dis = this.parseDisjunction();
|
||||
this.expectRParen();
|
||||
return this.finishTerm(new ZeroWidthNegativeLookahead(loc, dis));
|
||||
}
|
||||
|
||||
if (this.match("(?<=")) {
|
||||
RegExpTerm dis = this.parseDisjunction();
|
||||
this.expectRParen();
|
||||
return this.finishTerm(new ZeroWidthPositiveLookbehind(loc, dis));
|
||||
}
|
||||
|
||||
if (this.match("(?<!")) {
|
||||
RegExpTerm dis = this.parseDisjunction();
|
||||
this.expectRParen();
|
||||
return this.finishTerm(new ZeroWidthNegativeLookbehind(loc, dis));
|
||||
}
|
||||
|
||||
return this.finishTerm(this.parseQuantifierOpt(loc, this.parseAtom()));
|
||||
}
|
||||
|
||||
private RegExpTerm parseQuantifierOpt(SourceLocation loc, RegExpTerm atom) {
|
||||
if (this.match("*"))
|
||||
return this.finishTerm(new Star(loc, atom, !this.match("?")));
|
||||
if (this.match("+"))
|
||||
return this.finishTerm(new Plus(loc, atom, !this.match("?")));
|
||||
if (this.match("?"))
|
||||
return this.finishTerm(new Opt(loc, atom, !this.match("?")));
|
||||
if (this.match("{")) {
|
||||
Double lo = toNumber(this.readDigits(false)),
|
||||
hi = null;
|
||||
if (this.match(",") && !this.lookahead("}"))
|
||||
hi = toNumber(this.readDigits(false));
|
||||
this.expectRBrace();
|
||||
return this.finishTerm(new Range(loc, atom, !this.match("?"), lo, hi));
|
||||
}
|
||||
return atom;
|
||||
}
|
||||
|
||||
private RegExpTerm parseAtom() {
|
||||
SourceLocation loc = new SourceLocation(pos());
|
||||
|
||||
if (this.match("."))
|
||||
return this.finishTerm(new Dot(loc));
|
||||
|
||||
if (this.match("\\"))
|
||||
return this.parseAtomEscape(loc, false);
|
||||
|
||||
if (this.lookahead("["))
|
||||
return this.parseCharacterClass();
|
||||
|
||||
if (this.match("(")) {
|
||||
boolean capture = !this.match("?:");
|
||||
String name = null;
|
||||
|
||||
if (this.match("?<")) {
|
||||
name = this.readIdentifier();
|
||||
this.expectRAngle();
|
||||
}
|
||||
|
||||
if (capture)
|
||||
++this.maxbackref;
|
||||
int number = this.maxbackref;
|
||||
RegExpTerm dis = this.parseDisjunction();
|
||||
this.expectRParen();
|
||||
return this.finishTerm(new Group(loc, capture, number, name, dis));
|
||||
}
|
||||
|
||||
char c = this.nextChar();
|
||||
if ("^$\\.*+?()[]{}|".indexOf(c) != -1)
|
||||
this.error(Error.UNEXPECTED_CHARACTER, this.pos-1);
|
||||
return this.finishTerm(new Constant(loc, String.valueOf(c)));
|
||||
}
|
||||
|
||||
private RegExpTerm parseAtomEscape(SourceLocation loc, boolean inCharClass) {
|
||||
String raw, value;
|
||||
double codepoint;
|
||||
|
||||
if (this.match("x")) {
|
||||
raw = this.readHexDigits(2);
|
||||
codepoint = Integer.parseInt(raw, 16);
|
||||
value = fromCodePoint((int) codepoint);
|
||||
return this.finishTerm(new HexEscapeSequence(loc, value, (double)codepoint, "\\x" + raw));
|
||||
}
|
||||
|
||||
if (this.match("u")) {
|
||||
if (this.match("{")) {
|
||||
int closePos = this.src.indexOf("}", this.pos);
|
||||
int n;
|
||||
if (closePos == -1) {
|
||||
// don't attempt to read any digits, but
|
||||
// report missing `}`
|
||||
n = 0;
|
||||
} else if (closePos == this.pos) {
|
||||
// empty escape sequence, trigger an error
|
||||
n = 1;
|
||||
} else {
|
||||
n = closePos - this.pos;
|
||||
}
|
||||
raw = this.readHexDigits(n);
|
||||
this.expectRBrace();
|
||||
try {
|
||||
codepoint = Long.parseLong(raw, 16);
|
||||
} catch (NumberFormatException nfe) {
|
||||
codepoint = 0;
|
||||
}
|
||||
raw = "{" + raw + "}";
|
||||
} else {
|
||||
raw = this.readHexDigits(4);
|
||||
codepoint = Integer.parseInt(raw, 16);
|
||||
}
|
||||
value = fromCodePoint((int) codepoint);
|
||||
return this.finishTerm(new UnicodeEscapeSequence(loc, value, (double)codepoint, "\\u" + raw));
|
||||
}
|
||||
|
||||
if (this.match("k<")) {
|
||||
String name = this.readIdentifier();
|
||||
this.expectRAngle();
|
||||
return this.finishTerm(new NamedBackReference(loc, name, "\\k<" + name + ">"));
|
||||
}
|
||||
|
||||
if (this.match("p{", "P{")) {
|
||||
String name = this.readIdentifier();
|
||||
if (this.match("=")) {
|
||||
value = this.readIdentifier();
|
||||
raw = "\\p{" + name + "=" + value + "}";
|
||||
} else {
|
||||
value = null;
|
||||
raw = "\\p{" + name + "}";
|
||||
}
|
||||
this.expectRBrace();
|
||||
return this.finishTerm(new UnicodePropertyEscape(loc, name, value, raw));
|
||||
}
|
||||
|
||||
int startpos = this.pos-1;
|
||||
char c = this.nextChar();
|
||||
|
||||
if (c >= '0' && c <= '9') {
|
||||
raw = c + this.readDigits(true);
|
||||
if (c == '0' || inCharClass) {
|
||||
int base = c == '0' && raw.length() > 1 ? 8 : 10;
|
||||
try {
|
||||
codepoint = Long.parseLong(raw, base);
|
||||
value = fromCodePoint((int) codepoint);
|
||||
} catch (NumberFormatException nfe) {
|
||||
codepoint = 0;
|
||||
value = "\0";
|
||||
}
|
||||
if (base == 8) {
|
||||
this.error(Error.OCTAL_ESCAPE, startpos, this.pos);
|
||||
return this.finishTerm(new OctalEscape(loc, value, (double)codepoint, "\\" + raw));
|
||||
} else {
|
||||
return this.finishTerm(new DecimalEscape(loc, value, (double)codepoint, "\\" + raw));
|
||||
}
|
||||
} else {
|
||||
try {
|
||||
codepoint = Long.parseLong(raw, 10);
|
||||
} catch (NumberFormatException nfe) {
|
||||
codepoint = 0;
|
||||
}
|
||||
BackReference br = this.finishTerm(new BackReference(loc, (double)codepoint, "\\" + raw));
|
||||
this.backrefs.add(br);
|
||||
return br;
|
||||
}
|
||||
}
|
||||
|
||||
String ctrltab = "f\fn\nr\rt\tv\u000b";
|
||||
int idx;
|
||||
if ((idx=ctrltab.indexOf(c)) % 2 == 0) {
|
||||
codepoint = ctrltab.charAt(idx+1);
|
||||
value = String.valueOf((char)codepoint);
|
||||
return this.finishTerm(new ControlEscape(loc, value, codepoint, "\\" + c));
|
||||
}
|
||||
|
||||
if (c == 'c') {
|
||||
c = this.nextChar();
|
||||
if (!(c >= 'a' && c <= 'z' || c >= 'A' && c <= 'Z'))
|
||||
this.error(Error.EXPECTED_CONTROL_LETTER, this.pos-1);
|
||||
codepoint = c % 32;
|
||||
value = String.valueOf((char)codepoint);
|
||||
return this.finishTerm(new ControlLetter(loc, value, codepoint, "\\c" + c));
|
||||
}
|
||||
|
||||
if ("dDsSwW".indexOf(c) >= 0) {
|
||||
return this.finishTerm(new CharacterClassEscape(loc, String.valueOf(c), "\\" + c));
|
||||
}
|
||||
|
||||
codepoint = c;
|
||||
value = String.valueOf((char)codepoint);
|
||||
return this.finishTerm(new IdentityEscape(loc, value, codepoint, "\\" + c));
|
||||
}
|
||||
|
||||
private RegExpTerm parseCharacterClass() {
|
||||
SourceLocation loc = new SourceLocation(pos());
|
||||
List<RegExpTerm> elements = new ArrayList<>();
|
||||
|
||||
this.match("[");
|
||||
boolean inverted = this.match("^");
|
||||
while (!this.match("]")) {
|
||||
if (this.atEOS()) {
|
||||
this.error(Error.EXPECTED_RBRACKET);
|
||||
break;
|
||||
}
|
||||
elements.add(this.parseCharacterClassElement());
|
||||
}
|
||||
return this.finishTerm(new CharacterClass(loc, elements, inverted));
|
||||
}
|
||||
|
||||
private RegExpTerm parseCharacterClassElement() {
|
||||
SourceLocation loc = new SourceLocation(pos());
|
||||
RegExpTerm atom = this.parseCharacterClassAtom();
|
||||
if (!this.lookahead("-]") && this.match("-"))
|
||||
return this.finishTerm(new CharacterClassRange(loc, atom, this.parseCharacterClassAtom()));
|
||||
return atom;
|
||||
}
|
||||
|
||||
private RegExpTerm parseCharacterClassAtom() {
|
||||
SourceLocation loc = new SourceLocation(pos());
|
||||
char c = this.nextChar();
|
||||
if (c == '\\') {
|
||||
if (this.match("b"))
|
||||
return this.finishTerm(new ControlEscape(loc, "\b", 8, "\\b"));
|
||||
return this.finishTerm(this.parseAtomEscape(loc, true));
|
||||
}
|
||||
return this.finishTerm(new Constant(loc, String.valueOf(c)));
|
||||
}
|
||||
}
|
||||
|
||||
File diff suppressed because it is too large
Load Diff
@@ -3,3 +3,4 @@
|
||||
/(?<!.)/;
|
||||
/\p{Number}/u;
|
||||
/\P{Script=Greek}/u;
|
||||
/\k</;
|
||||
|
||||
2
javascript/extractor/tests/regexp/input/tst.js
Normal file
2
javascript/extractor/tests/regexp/input/tst.js
Normal file
@@ -0,0 +1,2 @@
|
||||
/\u{10400}/;
|
||||
/\c/;
|
||||
@@ -10,8 +10,8 @@ hasLocation(#10000,#10002)
|
||||
scopes(#20000,0)
|
||||
#20001=@"script;{#10000},1,1"
|
||||
toplevels(#20001,0)
|
||||
#20002=@"loc,{#10000},1,1,6,0"
|
||||
locations_default(#20002,#10000,1,1,6,0)
|
||||
#20002=@"loc,{#10000},1,1,7,0"
|
||||
locations_default(#20002,#10000,1,1,7,0)
|
||||
hasLocation(#20001,#20002)
|
||||
#20003=*
|
||||
stmts(#20003,2,#20001,0,"/^(?<ws ... <ws>$/;")
|
||||
@@ -169,81 +169,125 @@ hasLocation(#20051,#20052)
|
||||
unicodePropertyEscapeName(#20051,"Script")
|
||||
unicodePropertyEscapeValue(#20051,"Greek")
|
||||
#20053=*
|
||||
lines(#20053,#20001,"/^(?<ws>\s+)\w+\k<ws>$/;","
|
||||
")
|
||||
hasLocation(#20053,#20004)
|
||||
#20054=*
|
||||
lines(#20054,#20001,"/(?<=.)/;","
|
||||
")
|
||||
hasLocation(#20054,#20026)
|
||||
stmts(#20053,2,#20001,5,"/\k</;")
|
||||
#20054=@"loc,{#10000},6,1,6,6"
|
||||
locations_default(#20054,#10000,6,1,6,6)
|
||||
hasLocation(#20053,#20054)
|
||||
stmtContainers(#20053,#20001)
|
||||
#20055=*
|
||||
lines(#20055,#20001,"/(?<!.)/;","
|
||||
")
|
||||
hasLocation(#20055,#20034)
|
||||
#20056=*
|
||||
lines(#20056,#20001,"/\p{Number}/u;","
|
||||
")
|
||||
hasLocation(#20056,#20042)
|
||||
exprs(#20055,5,#20053,0,"/\k</")
|
||||
#20056=@"loc,{#10000},6,1,6,5"
|
||||
locations_default(#20056,#10000,6,1,6,5)
|
||||
hasLocation(#20055,#20056)
|
||||
enclosingStmt(#20055,#20053)
|
||||
exprContainers(#20055,#20001)
|
||||
literals("/\k</","/\k</",#20055)
|
||||
#20057=*
|
||||
lines(#20057,#20001,"/\P{Script=Greek}/u;","
|
||||
")
|
||||
hasLocation(#20057,#20048)
|
||||
numlines(#20001,5,5,0)
|
||||
#20058=*
|
||||
tokeninfo(#20058,5,#20001,0,"/^(?<ws>\s+)\w+\k<ws>$/")
|
||||
hasLocation(#20058,#20006)
|
||||
regexpterm(#20057,22,#20055,0,"\k<")
|
||||
#20058=@"loc,{#10000},6,2,6,4"
|
||||
locations_default(#20058,#10000,6,2,6,4)
|
||||
hasLocation(#20057,#20058)
|
||||
namedBackref(#20057,"")
|
||||
#20059=*
|
||||
tokeninfo(#20059,8,#20001,1,";")
|
||||
#20060=@"loc,{#10000},1,24,1,24"
|
||||
locations_default(#20060,#10000,1,24,1,24)
|
||||
regexpParseErrors(#20059,#20057,"expected identifier")
|
||||
#20060=@"loc,{#10000},6,5,6,5"
|
||||
locations_default(#20060,#10000,6,5,6,5)
|
||||
hasLocation(#20059,#20060)
|
||||
#20061=*
|
||||
tokeninfo(#20061,5,#20001,2,"/(?<=.)/")
|
||||
hasLocation(#20061,#20028)
|
||||
#20062=*
|
||||
tokeninfo(#20062,8,#20001,3,";")
|
||||
#20063=@"loc,{#10000},2,9,2,9"
|
||||
locations_default(#20063,#10000,2,9,2,9)
|
||||
hasLocation(#20062,#20063)
|
||||
regexpParseErrors(#20061,#20057,"expected '>'")
|
||||
#20062=@"loc,{#10000},6,4,6,4"
|
||||
locations_default(#20062,#10000,6,4,6,4)
|
||||
hasLocation(#20061,#20062)
|
||||
#20063=*
|
||||
lines(#20063,#20001,"/^(?<ws>\s+)\w+\k<ws>$/;","
|
||||
")
|
||||
hasLocation(#20063,#20004)
|
||||
#20064=*
|
||||
tokeninfo(#20064,5,#20001,4,"/(?<!.)/")
|
||||
hasLocation(#20064,#20036)
|
||||
lines(#20064,#20001,"/(?<=.)/;","
|
||||
")
|
||||
hasLocation(#20064,#20026)
|
||||
#20065=*
|
||||
tokeninfo(#20065,8,#20001,5,";")
|
||||
#20066=@"loc,{#10000},3,9,3,9"
|
||||
locations_default(#20066,#10000,3,9,3,9)
|
||||
hasLocation(#20065,#20066)
|
||||
lines(#20065,#20001,"/(?<!.)/;","
|
||||
")
|
||||
hasLocation(#20065,#20034)
|
||||
#20066=*
|
||||
lines(#20066,#20001,"/\p{Number}/u;","
|
||||
")
|
||||
hasLocation(#20066,#20042)
|
||||
#20067=*
|
||||
tokeninfo(#20067,5,#20001,6,"/\p{Number}/u")
|
||||
hasLocation(#20067,#20044)
|
||||
lines(#20067,#20001,"/\P{Script=Greek}/u;","
|
||||
")
|
||||
hasLocation(#20067,#20048)
|
||||
#20068=*
|
||||
tokeninfo(#20068,8,#20001,7,";")
|
||||
#20069=@"loc,{#10000},4,14,4,14"
|
||||
locations_default(#20069,#10000,4,14,4,14)
|
||||
hasLocation(#20068,#20069)
|
||||
lines(#20068,#20001,"/\k</;","
|
||||
")
|
||||
hasLocation(#20068,#20054)
|
||||
numlines(#20001,6,6,0)
|
||||
#20069=*
|
||||
tokeninfo(#20069,5,#20001,0,"/^(?<ws>\s+)\w+\k<ws>$/")
|
||||
hasLocation(#20069,#20006)
|
||||
#20070=*
|
||||
tokeninfo(#20070,5,#20001,8,"/\P{Script=Greek}/u")
|
||||
hasLocation(#20070,#20050)
|
||||
#20071=*
|
||||
tokeninfo(#20071,8,#20001,9,";")
|
||||
#20072=@"loc,{#10000},5,20,5,20"
|
||||
locations_default(#20072,#10000,5,20,5,20)
|
||||
hasLocation(#20071,#20072)
|
||||
tokeninfo(#20070,8,#20001,1,";")
|
||||
#20071=@"loc,{#10000},1,24,1,24"
|
||||
locations_default(#20071,#10000,1,24,1,24)
|
||||
hasLocation(#20070,#20071)
|
||||
#20072=*
|
||||
tokeninfo(#20072,5,#20001,2,"/(?<=.)/")
|
||||
hasLocation(#20072,#20028)
|
||||
#20073=*
|
||||
tokeninfo(#20073,0,#20001,10,"")
|
||||
#20074=@"loc,{#10000},6,1,6,0"
|
||||
locations_default(#20074,#10000,6,1,6,0)
|
||||
tokeninfo(#20073,8,#20001,3,";")
|
||||
#20074=@"loc,{#10000},2,9,2,9"
|
||||
locations_default(#20074,#10000,2,9,2,9)
|
||||
hasLocation(#20073,#20074)
|
||||
#20075=*
|
||||
entry_cfg_node(#20075,#20001)
|
||||
#20076=@"loc,{#10000},1,1,1,0"
|
||||
locations_default(#20076,#10000,1,1,1,0)
|
||||
hasLocation(#20075,#20076)
|
||||
#20077=*
|
||||
exit_cfg_node(#20077,#20001)
|
||||
hasLocation(#20077,#20074)
|
||||
tokeninfo(#20075,5,#20001,4,"/(?<!.)/")
|
||||
hasLocation(#20075,#20036)
|
||||
#20076=*
|
||||
tokeninfo(#20076,8,#20001,5,";")
|
||||
#20077=@"loc,{#10000},3,9,3,9"
|
||||
locations_default(#20077,#10000,3,9,3,9)
|
||||
hasLocation(#20076,#20077)
|
||||
#20078=*
|
||||
tokeninfo(#20078,5,#20001,6,"/\p{Number}/u")
|
||||
hasLocation(#20078,#20044)
|
||||
#20079=*
|
||||
tokeninfo(#20079,8,#20001,7,";")
|
||||
#20080=@"loc,{#10000},4,14,4,14"
|
||||
locations_default(#20080,#10000,4,14,4,14)
|
||||
hasLocation(#20079,#20080)
|
||||
#20081=*
|
||||
tokeninfo(#20081,5,#20001,8,"/\P{Script=Greek}/u")
|
||||
hasLocation(#20081,#20050)
|
||||
#20082=*
|
||||
tokeninfo(#20082,8,#20001,9,";")
|
||||
#20083=@"loc,{#10000},5,20,5,20"
|
||||
locations_default(#20083,#10000,5,20,5,20)
|
||||
hasLocation(#20082,#20083)
|
||||
#20084=*
|
||||
tokeninfo(#20084,5,#20001,10,"/\k</")
|
||||
hasLocation(#20084,#20056)
|
||||
#20085=*
|
||||
tokeninfo(#20085,8,#20001,11,";")
|
||||
#20086=@"loc,{#10000},6,6,6,6"
|
||||
locations_default(#20086,#10000,6,6,6,6)
|
||||
hasLocation(#20085,#20086)
|
||||
#20087=*
|
||||
tokeninfo(#20087,0,#20001,12,"")
|
||||
#20088=@"loc,{#10000},7,1,7,0"
|
||||
locations_default(#20088,#10000,7,1,7,0)
|
||||
hasLocation(#20087,#20088)
|
||||
#20089=*
|
||||
entry_cfg_node(#20089,#20001)
|
||||
#20090=@"loc,{#10000},1,1,1,0"
|
||||
locations_default(#20090,#10000,1,1,1,0)
|
||||
hasLocation(#20089,#20090)
|
||||
#20091=*
|
||||
exit_cfg_node(#20091,#20001)
|
||||
hasLocation(#20091,#20088)
|
||||
successor(#20053,#20055)
|
||||
successor(#20055,#20091)
|
||||
successor(#20047,#20049)
|
||||
successor(#20049,#20077)
|
||||
successor(#20049,#20053)
|
||||
successor(#20041,#20043)
|
||||
successor(#20043,#20047)
|
||||
successor(#20033,#20035)
|
||||
@@ -252,6 +296,6 @@ successor(#20025,#20027)
|
||||
successor(#20027,#20033)
|
||||
successor(#20003,#20005)
|
||||
successor(#20005,#20025)
|
||||
successor(#20075,#20003)
|
||||
numlines(#10000,5,5,0)
|
||||
successor(#20089,#20003)
|
||||
numlines(#10000,6,6,0)
|
||||
filetype(#10000,"javascript")
|
||||
|
||||
BIN
javascript/extractor/tests/regexp/output/trap/tst.js.trap
Normal file
BIN
javascript/extractor/tests/regexp/output/trap/tst.js.trap
Normal file
Binary file not shown.
Reference in New Issue
Block a user