Added support for -- subtraction opetor.

This commit is contained in:
Napalys
2025-03-02 19:46:56 +01:00
parent ee83c42b71
commit 3664d50772
5 changed files with 180 additions and 198 deletions

View File

@@ -0,0 +1,22 @@
package com.semmle.js.ast.regexp;
import com.semmle.js.ast.SourceLocation;
import java.util.List;
public class CharacterClassSubtraction extends RegExpTerm {
private final List<RegExpTerm> subtraction;
public CharacterClassSubtraction(SourceLocation loc, List<RegExpTerm> subtraction) {
super(loc, "CharacterClassSubtraction");
this.subtraction = subtraction;
}
@Override
public void accept(Visitor v) {
v.visit(this);
}
public List<RegExpTerm> getSubtraction() {
return subtraction;
}
}

View File

@@ -65,4 +65,6 @@ public interface Visitor {
public void visit(CharacterClassQuotedString nd);
public void visit(CharacterClassIntersection nd);
public void visit(CharacterClassSubtraction nd);
}

View File

@@ -12,6 +12,7 @@ import com.semmle.js.ast.regexp.CharacterClass;
import com.semmle.js.ast.regexp.CharacterClassEscape;
import com.semmle.js.ast.regexp.CharacterClassQuotedString;
import com.semmle.js.ast.regexp.CharacterClassRange;
import com.semmle.js.ast.regexp.CharacterClassSubtraction;
import com.semmle.js.ast.regexp.Constant;
import com.semmle.js.ast.regexp.ControlEscape;
import com.semmle.js.ast.regexp.ControlLetter;
@@ -96,6 +97,7 @@ public class RegExpExtractor {
termkinds.put("UnicodePropertyEscape", 27);
termkinds.put("CharacterClassQuotedString", 28);
termkinds.put("CharacterClassIntersection", 29);
termkinds.put("CharacterClassSubtraction", 30);
}
private static final String[] errmsgs =
@@ -362,6 +364,14 @@ public class RegExpExtractor {
for (RegExpTerm element : nd.getIntersections())
visit(element, lbl, i++);
}
@Override
public void visit(CharacterClassSubtraction nd) {
Label lbl = extractTerm(nd, parent, idx);
int i = 0;
for (RegExpTerm element : nd.getSubtraction())
visit(element, lbl, i++);
}
}
public void extract(String src, SourceMap sourceMap, Node parent, boolean isSpeculativeParsing, String flags) {

View File

@@ -8,6 +8,7 @@ import com.semmle.js.ast.regexp.CharacterClass;
import com.semmle.js.ast.regexp.CharacterClassEscape;
import com.semmle.js.ast.regexp.CharacterClassQuotedString;
import com.semmle.js.ast.regexp.CharacterClassRange;
import com.semmle.js.ast.regexp.CharacterClassSubtraction;
import com.semmle.js.ast.regexp.Constant;
import com.semmle.js.ast.regexp.ControlEscape;
import com.semmle.js.ast.regexp.ControlLetter;
@@ -566,6 +567,7 @@ public class RegExpParser {
private enum CharacterClassType {
STANDARD,
INTERSECTION,
SUBTRACTION,
}
// ECMA 2024 `v` flag allows nested character classes.
@@ -588,6 +590,10 @@ public class RegExpParser {
this.match("&&");
classType = CharacterClassType.INTERSECTION;
}
else if (lookahead("--")) {
this.match("--");
classType = CharacterClassType.SUBTRACTION;
}
else {
elements.add(this.parseCharacterClassElement());
}
@@ -597,6 +603,8 @@ public class RegExpParser {
switch (classType) {
case INTERSECTION:
return this.finishTerm(new CharacterClass(loc, Collections.singletonList(new CharacterClassIntersection(loc, elements)), inverted));
case SUBTRACTION:
return this.finishTerm(new CharacterClass(loc, Collections.singletonList(new CharacterClassSubtraction(loc, elements)), inverted));
case STANDARD:
default:
return this.finishTerm(new CharacterClass(loc, elements, inverted));
@@ -614,7 +622,7 @@ public class RegExpParser {
return atom;
}
}
if (!this.lookahead("-]") && this.match("-") && !(atom instanceof CharacterClassEscape))
if (!this.lookahead("-]") && !this.lookahead("--") && this.match("-") && !(atom instanceof CharacterClassEscape))
return this.finishTerm(new CharacterClassRange(loc, atom, this.parseCharacterClassAtom()));
return atom;
}