Merge pull request #18899 from Napalys/js/ecma-2024-regex

JS: Add ECMAScript 2024 `v` Flag Operators for Regex Parsing
This commit is contained in:
Napalys Klicius
2025-03-11 12:50:44 +01:00
committed by GitHub
44 changed files with 8106 additions and 6 deletions

View File

@@ -0,0 +1,7 @@
---
category: feature
---
* Extraction now supports regular expressions with the `v` flag, using the new operators:
- Intersection `&&`
- Subtraction `--`
- `\q` quoted string

View File

@@ -301,6 +301,51 @@ class RegExpAlt extends RegExpTerm, @regexp_alt {
override string getAPrimaryQlClass() { result = "RegExpAlt" }
}
/**
* An intersection term, that is, a term of the form `[[a]&&[ab]]`.
*
* Example:
*
* ```
* /[[abc]&&[bcd]]/v - which matches 'b' and 'c' only.
* ```
*/
class RegExpIntersection extends RegExpTerm, @regexp_intersection {
/** Gets an intersected term of this term. */
RegExpTerm getAnElement() { result = this.getAChild() }
/** Gets the number of intersected terms of this term. */
int getNumIntersectedTerm() { result = this.getNumChild() }
override predicate isNullable() { this.getAnElement().isNullable() }
override string getAPrimaryQlClass() { result = "RegExpIntersection" }
}
/**
* A subtraction term, that is, a term of the form `[[a]--[ab]]`.
*
* Example:
*
* ```
* /[[abc]--[bc]]/v - which matches 'a' only.
* ```
*/
class RegExpSubtraction extends RegExpTerm, @regexp_subtraction {
/** Gets the minuend (left operand) of this subtraction. */
RegExpTerm getFirstTerm() { result = this.getChild(0) }
/** Gets the number of subtractions terms of this term. */
int getNumSubtractedTerm() { result = this.getNumChild() - 1 }
/** Gets a subtrahend (right operand) of this subtraction. */
RegExpTerm getASubtractedTerm() { exists(int i | i > 0 and result = this.getChild(i)) }
override predicate isNullable() { none() }
override string getAPrimaryQlClass() { result = "RegExpSubtraction" }
}
/**
* A sequence term.
*
@@ -1142,6 +1187,28 @@ private class StringConcatRegExpPatternSource extends RegExpPatternSource {
override RegExpTerm getRegExpTerm() { result = this.asExpr().(AddExpr).asRegExp() }
}
/**
* A quoted string escape in a regular expression, using the `\q` syntax.
* The only operation supported inside a quoted string is alternation, using `|`.
*
* Example:
*
* ```
* \q{foo}
* \q{a|b|c}
* ```
*/
class RegExpQuotedString extends RegExpTerm, @regexp_quoted_string {
/** Gets the term representing the contents of this quoted string. */
RegExpTerm getTerm() { result = this.getAChild() }
override predicate isNullable() { none() }
override string getAMatchedString() { result = this.getTerm().getAMatchedString() }
override string getAPrimaryQlClass() { result = "RegExpQuotedString" }
}
module RegExp {
/** Gets the string `"?"` used to represent a regular expression whose flags are unknown. */
string unknownFlag() { result = "?" }

View File

@@ -859,7 +859,10 @@ case @regexpterm.kind of
| 24 = @regexp_char_range
| 25 = @regexp_positive_lookbehind
| 26 = @regexp_negative_lookbehind
| 27 = @regexp_unicode_property_escape;
| 27 = @regexp_unicode_property_escape
| 28 = @regexp_quoted_string
| 29 = @regexp_intersection
| 30 = @regexp_subtraction;
regexp_parse_errors (unique int id: @regexp_parse_error,
int regexp: @regexpterm ref,

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,2 @@
description: Add support for quoted string, intersection and subtraction
compatibility: backwards