mirror of
https://github.com/github/codeql.git
synced 2026-04-23 15:55:18 +02:00
Merge pull request #15244 from Marcono1234/marcono1234/regex-flags
Java: Improve Regex flag parsing
This commit is contained in:
@@ -0,0 +1,4 @@
|
||||
---
|
||||
category: fix
|
||||
---
|
||||
* Fixed regular expressions containing flags not being parsed correctly in some cases.
|
||||
@@ -479,7 +479,7 @@ abstract class RegexString extends StringLiteral {
|
||||
private predicate flagGroupStartNoModes(int start, int end) {
|
||||
this.isGroupStart(start) and
|
||||
this.getChar(start + 1) = "?" and
|
||||
this.getChar(start + 2) in ["i", "m", "s", "u", "x", "U"] and
|
||||
this.getChar(start + 2) in ["-", "i", "d", "m", "s", "u", "x", "U"] and
|
||||
end = start + 2
|
||||
}
|
||||
|
||||
@@ -491,7 +491,7 @@ abstract class RegexString extends StringLiteral {
|
||||
this.flagGroupStartNoModes(start, pos)
|
||||
or
|
||||
this.modeCharacter(start, pos - 1) and
|
||||
this.getChar(pos) in ["i", "m", "s", "u", "x", "U"]
|
||||
this.getChar(pos) in ["-", "i", "d", "m", "s", "u", "x", "U"]
|
||||
}
|
||||
|
||||
/**
|
||||
@@ -499,7 +499,10 @@ abstract class RegexString extends StringLiteral {
|
||||
*/
|
||||
private predicate flagGroupStart(int start, int end) {
|
||||
this.flagGroupStartNoModes(start, _) and
|
||||
end = max(int i | this.modeCharacter(start, i) | i + 1)
|
||||
// Check if this is a capturing group with flags, and therefore the `:` should be excluded
|
||||
exists(int maybeEnd | maybeEnd = max(int i | this.modeCharacter(start, i) | i + 1) |
|
||||
if this.getChar(maybeEnd) = ":" then end = maybeEnd + 1 else end = maybeEnd
|
||||
)
|
||||
}
|
||||
|
||||
/**
|
||||
@@ -510,9 +513,15 @@ abstract class RegexString extends StringLiteral {
|
||||
* ```
|
||||
*/
|
||||
private predicate flag(string c) {
|
||||
exists(int pos |
|
||||
this.modeCharacter(_, pos) and
|
||||
this.getChar(pos) = c
|
||||
exists(int start, int pos |
|
||||
this.modeCharacter(start, pos) and
|
||||
this.getChar(pos) = c and
|
||||
// Ignore if flag is disabled; use `<=` to also exclude `-` itself
|
||||
// This does not properly handle the (contrived) case where a flag is both enabled and
|
||||
// disabled, e.g. `(?i-i)a+`, in which case the flag seems to acts as if it was disabled
|
||||
not exists(int minusPos |
|
||||
this.modeCharacter(start, minusPos) and this.getChar(minusPos) = "-" and minusPos <= pos
|
||||
)
|
||||
)
|
||||
}
|
||||
|
||||
@@ -524,6 +533,8 @@ abstract class RegexString extends StringLiteral {
|
||||
exists(string c | this.flag(c) |
|
||||
c = "i" and result = "IGNORECASE"
|
||||
or
|
||||
c = "d" and result = "UNIXLINES"
|
||||
or
|
||||
c = "m" and result = "MULTILINE"
|
||||
or
|
||||
c = "s" and result = "DOTALL"
|
||||
@@ -930,13 +941,13 @@ class Regex extends RegexString {
|
||||
|
||||
/**
|
||||
* Gets a mode (if any) of this regular expression. Can be any of:
|
||||
* DEBUG
|
||||
* IGNORECASE
|
||||
* MULTILINE
|
||||
* DOTALL
|
||||
* UNICODE
|
||||
* VERBOSE
|
||||
* UNICODECLASS
|
||||
* - IGNORECASE
|
||||
* - UNIXLINES
|
||||
* - MULTILINE
|
||||
* - DOTALL
|
||||
* - UNICODE
|
||||
* - VERBOSE
|
||||
* - UNICODECLASS
|
||||
*/
|
||||
string getAMode() {
|
||||
result != "None" and
|
||||
@@ -946,7 +957,7 @@ class Regex extends RegexString {
|
||||
}
|
||||
|
||||
/**
|
||||
* Holds if this regex is used to match against a full string,
|
||||
* Holds if this regex is used to match against a full string,
|
||||
* as though it was implicitly surrounded by ^ and $.
|
||||
*/
|
||||
predicate matchesFullString() { matches_full_string = true }
|
||||
|
||||
@@ -1,4 +1,8 @@
|
||||
parseFailures
|
||||
modes
|
||||
| Test.java:17:9:17:37 | "(?i)(?=a)(?!b)(?<=c)(?<!d)+" | IGNORECASE |
|
||||
| Test.java:22:9:22:85 | "(?idmsuxU-idmsuxU)a+(?-idmsuxU)b+(?idmsuxU:c)d+(?-idmsuxU:e)f+(?idmsuxU:)g+" | DOTALL,IGNORECASE,MULTILINE,UNICODE,UNICODECLASS,UNIXLINES,VERBOSE |
|
||||
| Test.java:23:9:23:24 | "(?idms-iuxU)a+" | DOTALL,IGNORECASE,MULTILINE,UNIXLINES |
|
||||
#select
|
||||
| Test.java:5:10:5:17 | [A-Z\\d] | [RegExpCharacterClass] |
|
||||
| Test.java:5:10:5:19 | [A-Z\\d]++ | [RegExpPlus] |
|
||||
@@ -205,3 +209,25 @@ parseFailures
|
||||
| Test.java:21:62:21:62 | b | [RegExpConstant,RegExpNormalChar] |
|
||||
| Test.java:21:64:21:64 | b | [RegExpConstant,RegExpNormalChar] |
|
||||
| Test.java:21:66:21:66 | b | [RegExpConstant,RegExpNormalChar] |
|
||||
| Test.java:22:10:22:27 | (?idmsuxU-idmsuxU) | [RegExpZeroWidthMatch] |
|
||||
| Test.java:22:10:22:84 | (?idmsuxU-idmsuxU)a+(?-idmsuxU)b+(?idmsuxU:c)d+(?-idmsuxU:e)f+(?idmsuxU:)g+ | [RegExpSequence] |
|
||||
| Test.java:22:28:22:28 | a | [RegExpConstant,RegExpNormalChar] |
|
||||
| Test.java:22:28:22:29 | a+ | [RegExpPlus] |
|
||||
| Test.java:22:30:22:40 | (?-idmsuxU) | [RegExpZeroWidthMatch] |
|
||||
| Test.java:22:41:22:41 | b | [RegExpConstant,RegExpNormalChar] |
|
||||
| Test.java:22:41:22:42 | b+ | [RegExpPlus] |
|
||||
| Test.java:22:43:22:54 | (?idmsuxU:c) | [RegExpGroup] |
|
||||
| Test.java:22:53:22:53 | c | [RegExpConstant,RegExpNormalChar] |
|
||||
| Test.java:22:55:22:55 | d | [RegExpConstant,RegExpNormalChar] |
|
||||
| Test.java:22:55:22:56 | d+ | [RegExpPlus] |
|
||||
| Test.java:22:57:22:69 | (?-idmsuxU:e) | [RegExpGroup] |
|
||||
| Test.java:22:68:22:68 | e | [RegExpConstant,RegExpNormalChar] |
|
||||
| Test.java:22:70:22:70 | f | [RegExpConstant,RegExpNormalChar] |
|
||||
| Test.java:22:70:22:71 | f+ | [RegExpPlus] |
|
||||
| Test.java:22:72:22:82 | (?idmsuxU:) | [RegExpZeroWidthMatch] |
|
||||
| Test.java:22:83:22:83 | g | [RegExpConstant,RegExpNormalChar] |
|
||||
| Test.java:22:83:22:84 | g+ | [RegExpPlus] |
|
||||
| Test.java:23:10:23:21 | (?idms-iuxU) | [RegExpZeroWidthMatch] |
|
||||
| Test.java:23:10:23:23 | (?idms-iuxU)a+ | [RegExpSequence] |
|
||||
| Test.java:23:22:23:22 | a | [RegExpConstant,RegExpNormalChar] |
|
||||
| Test.java:23:22:23:23 | a+ | [RegExpPlus] |
|
||||
|
||||
@@ -8,5 +8,7 @@ string getQLClases(RegexTreeView::RegExpTerm t) {
|
||||
|
||||
query predicate parseFailures(Regex::Regex r, int i) { r.failedToParse(i) }
|
||||
|
||||
query predicate modes(Regex::Regex r, string modes) { modes = strictconcat(r.getAMode(), ",") }
|
||||
|
||||
from RegexTreeView::RegExpTerm t
|
||||
select t, getQLClases(t)
|
||||
|
||||
@@ -18,7 +18,9 @@ class Test {
|
||||
"a||b|c(d|e|)f|g+",
|
||||
"\\018\\033\\0377\\0777\u1337+",
|
||||
"[|]+",
|
||||
"(a(a(a(a(a(a((((c))))a))))))((((((b(((((d)))))b)b)b)b)b)b)+"
|
||||
"(a(a(a(a(a(a((((c))))a))))))((((((b(((((d)))))b)b)b)b)b)b)+",
|
||||
"(?idmsuxU-idmsuxU)a+(?-idmsuxU)b+(?idmsuxU:c)d+(?-idmsuxU:e)f+(?idmsuxU:)g+",
|
||||
"(?idms-iuxU)a+",
|
||||
};
|
||||
|
||||
void test() {
|
||||
|
||||
Reference in New Issue
Block a user