Removed Union as standard character class is already an union.

This commit is contained in:
Napalys
2025-03-05 10:07:20 +01:00
parent 8099423b6d
commit 8086c25abe
8 changed files with 518 additions and 602 deletions

View File

@@ -1,22 +0,0 @@
package com.semmle.js.ast.regexp;
import com.semmle.js.ast.SourceLocation;
import java.util.List;
public class CharacterClassUnion extends RegExpTerm {
private final List<RegExpTerm> elements;
public CharacterClassUnion(SourceLocation loc, List<RegExpTerm> elements) {
super(loc, "CharacterClassUnion");
this.elements = elements;
}
@Override
public void accept(Visitor v) {
v.visit(this);
}
public List<RegExpTerm> getElements() {
return elements;
}
}

View File

@@ -67,6 +67,4 @@ public interface Visitor {
public void visit(CharacterClassIntersection nd);
public void visit(CharacterClassSubtraction nd);
public void visit(CharacterClassUnion nd);
}

View File

@@ -13,7 +13,6 @@ import com.semmle.js.ast.regexp.CharacterClassEscape;
import com.semmle.js.ast.regexp.CharacterClassQuotedString;
import com.semmle.js.ast.regexp.CharacterClassRange;
import com.semmle.js.ast.regexp.CharacterClassSubtraction;
import com.semmle.js.ast.regexp.CharacterClassUnion;
import com.semmle.js.ast.regexp.Constant;
import com.semmle.js.ast.regexp.ControlEscape;
import com.semmle.js.ast.regexp.ControlLetter;
@@ -99,7 +98,6 @@ public class RegExpExtractor {
termkinds.put("CharacterClassQuotedString", 28);
termkinds.put("CharacterClassIntersection", 29);
termkinds.put("CharacterClassSubtraction", 30);
termkinds.put("CharacterClassUnion", 31);
}
private static final String[] errmsgs =
@@ -374,14 +372,6 @@ public class RegExpExtractor {
for (RegExpTerm element : nd.getElements())
visit(element, lbl, i++);
}
@Override
public void visit(CharacterClassUnion nd) {
Label lbl = extractTerm(nd, parent, idx);
int i = 0;
for (RegExpTerm element : nd.getElements())
visit(element, lbl, i++);
}
}
public void extract(String src, SourceMap sourceMap, Node parent, boolean isSpeculativeParsing, String flags) {

View File

@@ -9,7 +9,6 @@ import com.semmle.js.ast.regexp.CharacterClassEscape;
import com.semmle.js.ast.regexp.CharacterClassQuotedString;
import com.semmle.js.ast.regexp.CharacterClassRange;
import com.semmle.js.ast.regexp.CharacterClassSubtraction;
import com.semmle.js.ast.regexp.CharacterClassUnion;
import com.semmle.js.ast.regexp.Constant;
import com.semmle.js.ast.regexp.ControlEscape;
import com.semmle.js.ast.regexp.ControlLetter;
@@ -568,8 +567,7 @@ public class RegExpParser {
private enum CharacterClassType {
STANDARD,
INTERSECTION,
SUBTRACTION,
UNION
SUBTRACTION
}
// ECMA 2024 `v` flag allows nested character classes.
@@ -601,26 +599,12 @@ public class RegExpParser {
}
}
boolean containsComplex = elements.stream().anyMatch(term -> term instanceof UnicodePropertyEscape ||
term instanceof CharacterClassQuotedString ||
term instanceof CharacterClass);
// Set type to UNION only if:
// 1. We haven't already determined a specific type (intersection/subtraction)
// 2. We have more than one element
// 3. We have at least one complex element (i.e. a nested character class or a UnicodePropertyEscape)
if (containsComplex && classType == CharacterClassType.STANDARD && elements.size() > 1) {
classType = CharacterClassType.UNION;
}
// Create appropriate RegExpTerm based on the detected class type
switch (classType) {
case INTERSECTION:
return this.finishTerm(new CharacterClass(loc, Collections.singletonList(new CharacterClassIntersection(loc, elements)), inverted));
case SUBTRACTION:
return this.finishTerm(new CharacterClass(loc, Collections.singletonList(new CharacterClassSubtraction(loc, elements)), inverted));
case UNION:
return this.finishTerm(new CharacterClass(loc, Collections.singletonList(new CharacterClassUnion(loc, elements)), inverted));
case STANDARD:
default:
return this.finishTerm(new CharacterClass(loc, elements, inverted));