mirror of
https://github.com/github/codeql.git
synced 2026-04-27 17:55:19 +02:00
Added intersection support
This commit is contained in:
@@ -0,0 +1,22 @@
|
||||
package com.semmle.js.ast.regexp;
|
||||
|
||||
import com.semmle.js.ast.SourceLocation;
|
||||
import java.util.List;
|
||||
|
||||
public class CharacterClassIntersection extends RegExpTerm {
|
||||
private final List<RegExpTerm> intersections;
|
||||
|
||||
public CharacterClassIntersection(SourceLocation loc, List<RegExpTerm> intersections) {
|
||||
super(loc, "CharacterClassIntersection");
|
||||
this.intersections = intersections;
|
||||
}
|
||||
|
||||
@Override
|
||||
public void accept(Visitor v) {
|
||||
v.visit(this);
|
||||
}
|
||||
|
||||
public List<RegExpTerm> getIntersections() {
|
||||
return intersections;
|
||||
}
|
||||
}
|
||||
@@ -63,4 +63,6 @@ public interface Visitor {
|
||||
public void visit(UnicodePropertyEscape nd);
|
||||
|
||||
public void visit(CharacterClassQuotedString nd);
|
||||
|
||||
public void visit(CharacterClassIntersection nd);
|
||||
}
|
||||
|
||||
@@ -23,6 +23,7 @@ import com.semmle.js.ast.regexp.Error;
|
||||
import com.semmle.js.ast.regexp.Group;
|
||||
import com.semmle.js.ast.regexp.HexEscapeSequence;
|
||||
import com.semmle.js.ast.regexp.IdentityEscape;
|
||||
import com.semmle.js.ast.regexp.CharacterClassIntersection;
|
||||
import com.semmle.js.ast.regexp.Literal;
|
||||
import com.semmle.js.ast.regexp.NamedBackReference;
|
||||
import com.semmle.js.ast.regexp.NonWordBoundary;
|
||||
@@ -94,6 +95,7 @@ public class RegExpExtractor {
|
||||
termkinds.put("ZeroWidthNegativeLookbehind", 26);
|
||||
termkinds.put("UnicodePropertyEscape", 27);
|
||||
termkinds.put("CharacterClassQuotedString", 28);
|
||||
termkinds.put("CharacterClassIntersection", 29);
|
||||
}
|
||||
|
||||
private static final String[] errmsgs =
|
||||
@@ -352,6 +354,14 @@ public class RegExpExtractor {
|
||||
Label lbl = extractTerm(nd, parent, idx);
|
||||
visit(nd.getTerm(), lbl, 0);
|
||||
}
|
||||
|
||||
@Override
|
||||
public void visit(CharacterClassIntersection nd) {
|
||||
Label lbl = extractTerm(nd, parent, idx);
|
||||
int i = 0;
|
||||
for (RegExpTerm element : nd.getIntersections())
|
||||
visit(element, lbl, i++);
|
||||
}
|
||||
}
|
||||
|
||||
public void extract(String src, SourceMap sourceMap, Node parent, boolean isSpeculativeParsing, String flags) {
|
||||
|
||||
@@ -19,6 +19,7 @@ import com.semmle.js.ast.regexp.Error;
|
||||
import com.semmle.js.ast.regexp.Group;
|
||||
import com.semmle.js.ast.regexp.HexEscapeSequence;
|
||||
import com.semmle.js.ast.regexp.IdentityEscape;
|
||||
import com.semmle.js.ast.regexp.CharacterClassIntersection;
|
||||
import com.semmle.js.ast.regexp.NamedBackReference;
|
||||
import com.semmle.js.ast.regexp.NonWordBoundary;
|
||||
import com.semmle.js.ast.regexp.OctalEscape;
|
||||
@@ -37,6 +38,7 @@ import com.semmle.js.ast.regexp.ZeroWidthPositiveLookahead;
|
||||
import com.semmle.js.ast.regexp.ZeroWidthPositiveLookbehind;
|
||||
import java.util.ArrayList;
|
||||
import java.util.Arrays;
|
||||
import java.util.Collections;
|
||||
import java.util.List;
|
||||
|
||||
/** A parser for ECMAScript 2018 regular expressions. */
|
||||
@@ -561,10 +563,16 @@ public class RegExpParser {
|
||||
return this.finishTerm(new CharacterClass(loc, elements, inverted));
|
||||
}
|
||||
|
||||
private enum CharacterClassType {
|
||||
STANDARD,
|
||||
INTERSECTION,
|
||||
}
|
||||
|
||||
// ECMA 2024 `v` flag allows nested character classes.
|
||||
private RegExpTerm parseNestedCharacterClass() {
|
||||
SourceLocation loc = new SourceLocation(pos());
|
||||
List<RegExpTerm> elements = new ArrayList<>();
|
||||
CharacterClassType classType = CharacterClassType.STANDARD;
|
||||
|
||||
this.match("[");
|
||||
boolean inverted = this.match("^");
|
||||
@@ -576,11 +584,23 @@ public class RegExpParser {
|
||||
if (lookahead("[")) {
|
||||
elements.add(parseNestedCharacterClass());
|
||||
}
|
||||
else if (lookahead("&&")) {
|
||||
this.match("&&");
|
||||
classType = CharacterClassType.INTERSECTION;
|
||||
}
|
||||
else {
|
||||
elements.add(this.parseCharacterClassElement());
|
||||
}
|
||||
}
|
||||
return this.finishTerm(new CharacterClass(loc, elements, inverted));
|
||||
|
||||
// Create appropriate RegExpTerm based on the detected class type
|
||||
switch (classType) {
|
||||
case INTERSECTION:
|
||||
return this.finishTerm(new CharacterClass(loc, Collections.singletonList(new CharacterClassIntersection(loc, elements)), inverted));
|
||||
case STANDARD:
|
||||
default:
|
||||
return this.finishTerm(new CharacterClass(loc, elements, inverted));
|
||||
}
|
||||
}
|
||||
|
||||
private static final List<String> escapeClasses = Arrays.asList("d", "D", "s", "S", "w", "W");
|
||||
|
||||
Reference in New Issue
Block a user