mirror of
https://github.com/github/codeql.git
synced 2026-02-12 21:21:16 +01:00
Added support for character class union in regex processing
This commit is contained in:
@@ -0,0 +1,22 @@
|
||||
package com.semmle.js.ast.regexp;
|
||||
|
||||
import com.semmle.js.ast.SourceLocation;
|
||||
import java.util.List;
|
||||
|
||||
public class CharacterClassUnion extends RegExpTerm {
|
||||
private final List<RegExpTerm> union;
|
||||
|
||||
public CharacterClassUnion(SourceLocation loc, List<RegExpTerm> union) {
|
||||
super(loc, "CharacterClassUnion");
|
||||
this.union = union;
|
||||
}
|
||||
|
||||
@Override
|
||||
public void accept(Visitor v) {
|
||||
v.visit(this);
|
||||
}
|
||||
|
||||
public List<RegExpTerm> getUnion() {
|
||||
return union;
|
||||
}
|
||||
}
|
||||
@@ -67,4 +67,6 @@ public interface Visitor {
|
||||
public void visit(CharacterClassIntersection nd);
|
||||
|
||||
public void visit(CharacterClassSubtraction nd);
|
||||
|
||||
public void visit(CharacterClassUnion nd);
|
||||
}
|
||||
|
||||
@@ -13,6 +13,7 @@ import com.semmle.js.ast.regexp.CharacterClassEscape;
|
||||
import com.semmle.js.ast.regexp.CharacterClassQuotedString;
|
||||
import com.semmle.js.ast.regexp.CharacterClassRange;
|
||||
import com.semmle.js.ast.regexp.CharacterClassSubtraction;
|
||||
import com.semmle.js.ast.regexp.CharacterClassUnion;
|
||||
import com.semmle.js.ast.regexp.Constant;
|
||||
import com.semmle.js.ast.regexp.ControlEscape;
|
||||
import com.semmle.js.ast.regexp.ControlLetter;
|
||||
@@ -98,6 +99,7 @@ public class RegExpExtractor {
|
||||
termkinds.put("CharacterClassQuotedString", 28);
|
||||
termkinds.put("CharacterClassIntersection", 29);
|
||||
termkinds.put("CharacterClassSubtraction", 30);
|
||||
termkinds.put("CharacterClassUnion", 31);
|
||||
}
|
||||
|
||||
private static final String[] errmsgs =
|
||||
@@ -372,6 +374,14 @@ public class RegExpExtractor {
|
||||
for (RegExpTerm element : nd.getSubtraction())
|
||||
visit(element, lbl, i++);
|
||||
}
|
||||
|
||||
@Override
|
||||
public void visit(CharacterClassUnion nd) {
|
||||
Label lbl = extractTerm(nd, parent, idx);
|
||||
int i = 0;
|
||||
for (RegExpTerm element : nd.getUnion())
|
||||
visit(element, lbl, i++);
|
||||
}
|
||||
}
|
||||
|
||||
public void extract(String src, SourceMap sourceMap, Node parent, boolean isSpeculativeParsing, String flags) {
|
||||
|
||||
@@ -9,6 +9,7 @@ import com.semmle.js.ast.regexp.CharacterClassEscape;
|
||||
import com.semmle.js.ast.regexp.CharacterClassQuotedString;
|
||||
import com.semmle.js.ast.regexp.CharacterClassRange;
|
||||
import com.semmle.js.ast.regexp.CharacterClassSubtraction;
|
||||
import com.semmle.js.ast.regexp.CharacterClassUnion;
|
||||
import com.semmle.js.ast.regexp.Constant;
|
||||
import com.semmle.js.ast.regexp.ControlEscape;
|
||||
import com.semmle.js.ast.regexp.ControlLetter;
|
||||
@@ -568,6 +569,7 @@ public class RegExpParser {
|
||||
STANDARD,
|
||||
INTERSECTION,
|
||||
SUBTRACTION,
|
||||
UNION
|
||||
}
|
||||
|
||||
// ECMA 2024 `v` flag allows nested character classes.
|
||||
@@ -599,12 +601,26 @@ public class RegExpParser {
|
||||
}
|
||||
}
|
||||
|
||||
boolean containsComplex = elements.stream().anyMatch(term -> term instanceof UnicodePropertyEscape ||
|
||||
term instanceof CharacterClassQuotedString ||
|
||||
term instanceof CharacterClass);
|
||||
|
||||
// Set type to UNION only if:
|
||||
// 1. We haven't already determined a specific type (intersection/subtraction)
|
||||
// 2. We have more than one element
|
||||
// 3. We have at least one complex element (i.e. a nested character class or a UnicodePropertyEscape)
|
||||
if (containsComplex && classType == CharacterClassType.STANDARD && elements.size() > 1) {
|
||||
classType = CharacterClassType.UNION;
|
||||
}
|
||||
|
||||
// Create appropriate RegExpTerm based on the detected class type
|
||||
switch (classType) {
|
||||
case INTERSECTION:
|
||||
return this.finishTerm(new CharacterClass(loc, Collections.singletonList(new CharacterClassIntersection(loc, elements)), inverted));
|
||||
case SUBTRACTION:
|
||||
return this.finishTerm(new CharacterClass(loc, Collections.singletonList(new CharacterClassSubtraction(loc, elements)), inverted));
|
||||
case UNION:
|
||||
return this.finishTerm(new CharacterClass(loc, Collections.singletonList(new CharacterClassUnion(loc, elements)), inverted));
|
||||
case STANDARD:
|
||||
default:
|
||||
return this.finishTerm(new CharacterClass(loc, elements, inverted));
|
||||
|
||||
Reference in New Issue
Block a user