mirror of
https://github.com/github/codeql.git
synced 2026-05-01 03:35:13 +02:00
Add support for '\q{}' escape sequence in regular expressions.
This commit is contained in:
@@ -0,0 +1,25 @@
|
||||
package com.semmle.js.ast.regexp;
|
||||
|
||||
import com.semmle.js.ast.SourceLocation;
|
||||
|
||||
/**
|
||||
* A '\q{}' escape sequence in a regular expression, which is a special extension
|
||||
* to standard regular expressions.
|
||||
*/
|
||||
public class CharacterClassQuotedString extends RegExpTerm {
|
||||
private final RegExpTerm term;
|
||||
|
||||
public CharacterClassQuotedString(SourceLocation loc, RegExpTerm term) {
|
||||
super(loc, "CharacterClassQuotedString");
|
||||
this.term = term;
|
||||
}
|
||||
|
||||
public RegExpTerm getTerm() {
|
||||
return term;
|
||||
}
|
||||
|
||||
@Override
|
||||
public void accept(Visitor v) {
|
||||
v.visit(this);
|
||||
}
|
||||
}
|
||||
@@ -61,4 +61,6 @@ public interface Visitor {
|
||||
public void visit(ZeroWidthNegativeLookbehind nd);
|
||||
|
||||
public void visit(UnicodePropertyEscape nd);
|
||||
|
||||
public void visit(CharacterClassQuotedString nd);
|
||||
}
|
||||
|
||||
@@ -10,6 +10,7 @@ import com.semmle.js.ast.regexp.BackReference;
|
||||
import com.semmle.js.ast.regexp.Caret;
|
||||
import com.semmle.js.ast.regexp.CharacterClass;
|
||||
import com.semmle.js.ast.regexp.CharacterClassEscape;
|
||||
import com.semmle.js.ast.regexp.CharacterClassQuotedString;
|
||||
import com.semmle.js.ast.regexp.CharacterClassRange;
|
||||
import com.semmle.js.ast.regexp.Constant;
|
||||
import com.semmle.js.ast.regexp.ControlEscape;
|
||||
@@ -92,6 +93,7 @@ public class RegExpExtractor {
|
||||
termkinds.put("ZeroWidthPositiveLookbehind", 25);
|
||||
termkinds.put("ZeroWidthNegativeLookbehind", 26);
|
||||
termkinds.put("UnicodePropertyEscape", 27);
|
||||
termkinds.put("CharacterClassQuotedString", 28);
|
||||
}
|
||||
|
||||
private static final String[] errmsgs =
|
||||
@@ -344,6 +346,12 @@ public class RegExpExtractor {
|
||||
visit(nd.getLeft(), lbl, 0);
|
||||
visit(nd.getRight(), lbl, 1);
|
||||
}
|
||||
|
||||
@Override
|
||||
public void visit(CharacterClassQuotedString nd) {
|
||||
Label lbl = extractTerm(nd, parent, idx);
|
||||
visit(nd.getTerm(), lbl, 0);
|
||||
}
|
||||
}
|
||||
|
||||
public void extract(String src, SourceMap sourceMap, Node parent, boolean isSpeculativeParsing, String flags) {
|
||||
|
||||
@@ -6,6 +6,7 @@ import com.semmle.js.ast.regexp.BackReference;
|
||||
import com.semmle.js.ast.regexp.Caret;
|
||||
import com.semmle.js.ast.regexp.CharacterClass;
|
||||
import com.semmle.js.ast.regexp.CharacterClassEscape;
|
||||
import com.semmle.js.ast.regexp.CharacterClassQuotedString;
|
||||
import com.semmle.js.ast.regexp.CharacterClassRange;
|
||||
import com.semmle.js.ast.regexp.Constant;
|
||||
import com.semmle.js.ast.regexp.ControlEscape;
|
||||
@@ -283,6 +284,45 @@ public class RegExpParser {
|
||||
return this.finishTerm(this.parseQuantifierOpt(loc, this.parseAtom()));
|
||||
}
|
||||
|
||||
private RegExpTerm parseDisjunctionInsideQuotedString() {
|
||||
SourceLocation loc = new SourceLocation(pos());
|
||||
List<RegExpTerm> disjuncts = new ArrayList<>();
|
||||
disjuncts.add(this.parseAlternativeInsideQuotedString());
|
||||
while (this.match("|")) {
|
||||
disjuncts.add(this.parseAlternativeInsideQuotedString());
|
||||
}
|
||||
if (disjuncts.size() == 1) return disjuncts.get(0);
|
||||
return this.finishTerm(new Disjunction(loc, disjuncts));
|
||||
}
|
||||
|
||||
private RegExpTerm parseAlternativeInsideQuotedString() {
|
||||
SourceLocation loc = new SourceLocation(pos());
|
||||
StringBuilder sb = new StringBuilder();
|
||||
boolean escaped = false;
|
||||
while (true) {
|
||||
// If we're at the end of the string, something went wrong.
|
||||
if (this.atEOS()) {
|
||||
this.error(Error.UNEXPECTED_EOS);
|
||||
break;
|
||||
}
|
||||
// We can end parsing if we're not escaped and we see a `|` which would mean Alternation
|
||||
// or `}` which would mean the end of the Quoted String.
|
||||
if(!escaped && this.lookahead(null, "|", "}")){
|
||||
break;
|
||||
}
|
||||
char c = this.nextChar();
|
||||
// Track whether the character is an escape character.
|
||||
escaped = !escaped && (c == '\\');
|
||||
sb.append(c);
|
||||
}
|
||||
|
||||
String literal = sb.toString();
|
||||
loc.setEnd(pos());
|
||||
loc.setSource(literal);
|
||||
|
||||
return new Constant(loc, literal);
|
||||
}
|
||||
|
||||
private RegExpTerm parseQuantifierOpt(SourceLocation loc, RegExpTerm atom) {
|
||||
if (this.match("*")) return this.finishTerm(new Star(loc, atom, !this.match("?")));
|
||||
if (this.match("+")) return this.finishTerm(new Plus(loc, atom, !this.match("?")));
|
||||
@@ -427,6 +467,12 @@ public class RegExpParser {
|
||||
return this.finishTerm(new NamedBackReference(loc, name, "\\k<" + name + ">"));
|
||||
}
|
||||
|
||||
if (this.match("q{")) {
|
||||
RegExpTerm term = parseDisjunctionInsideQuotedString();
|
||||
this.expectRBrace();
|
||||
return this.finishTerm(new CharacterClassQuotedString(loc, term));
|
||||
}
|
||||
|
||||
if (this.match("p{", "P{")) {
|
||||
String name = this.readIdentifier();
|
||||
if (this.match("=")) {
|
||||
|
||||
Reference in New Issue
Block a user