mirror of
https://github.com/github/codeql.git
synced 2026-02-02 16:23:02 +01:00
Merge pull request #8489 from aibaars/regex-refactor
Ruby: refactor regex libraries
This commit is contained in:
@@ -552,7 +552,7 @@ class RegExpWordBoundary extends RegExpSpecialChar {
|
||||
|
||||
/**
|
||||
* A character class escape in a regular expression.
|
||||
* That is, an escaped charachter that denotes multiple characters.
|
||||
* That is, an escaped character that denotes multiple characters.
|
||||
*
|
||||
* Examples:
|
||||
*
|
||||
|
||||
@@ -188,7 +188,7 @@ abstract class RegexString extends Expr {
|
||||
)
|
||||
}
|
||||
|
||||
/** Hold is a character set starts between `start` and `end`. */
|
||||
/** Holds if a character set starts between `start` and `end`. */
|
||||
predicate char_set_start(int start, int end) {
|
||||
this.char_set_start(start) = true and
|
||||
(
|
||||
@@ -316,8 +316,10 @@ abstract class RegexString extends Expr {
|
||||
result = this.(Bytes).getS()
|
||||
}
|
||||
|
||||
/** Gets the `i`th character of this regex */
|
||||
string getChar(int i) { result = this.getText().charAt(i) }
|
||||
|
||||
/** Gets the `i`th character of this regex, unless it is part of a character escape sequence. */
|
||||
string nonEscapedCharAt(int i) {
|
||||
result = this.getText().charAt(i) and
|
||||
not exists(int x, int y | this.escapedCharacter(x, y) and i in [x .. y - 1])
|
||||
@@ -329,6 +331,9 @@ abstract class RegexString extends Expr {
|
||||
|
||||
private predicate isGroupStart(int i) { this.nonEscapedCharAt(i) = "(" and not this.inCharSet(i) }
|
||||
|
||||
/**
|
||||
* Holds if the `i`th character could not be parsed.
|
||||
*/
|
||||
predicate failedToParse(int i) {
|
||||
exists(this.getChar(i)) and
|
||||
not exists(int start, int end |
|
||||
@@ -417,6 +422,9 @@ abstract class RegexString extends Expr {
|
||||
)
|
||||
}
|
||||
|
||||
/**
|
||||
* Holds if a simple or escaped character is found between `start` and `end`.
|
||||
*/
|
||||
predicate character(int start, int end) {
|
||||
(
|
||||
this.simpleCharacter(start, end) and
|
||||
@@ -428,12 +436,18 @@ abstract class RegexString extends Expr {
|
||||
not exists(int x, int y | this.backreference(x, y) and x <= start and y >= end)
|
||||
}
|
||||
|
||||
/**
|
||||
* Holds if a normal character is found between `start` and `end`.
|
||||
*/
|
||||
predicate normalCharacter(int start, int end) {
|
||||
end = start + 1 and
|
||||
this.character(start, end) and
|
||||
not this.specialCharacter(start, end, _)
|
||||
}
|
||||
|
||||
/**
|
||||
* Holds if a special character is found between `start` and `end`.
|
||||
*/
|
||||
predicate specialCharacter(int start, int end, string char) {
|
||||
not this.inCharSet(start) and
|
||||
this.character(start, end) and
|
||||
@@ -492,7 +506,7 @@ abstract class RegexString extends Expr {
|
||||
this.specialCharacter(start, end, _)
|
||||
}
|
||||
|
||||
/** Whether the text in the range start,end is a group */
|
||||
/** Whether the text in the range `start,end` is a group */
|
||||
predicate group(int start, int end) {
|
||||
this.groupContents(start, end, _, _)
|
||||
or
|
||||
@@ -611,6 +625,7 @@ abstract class RegexString extends Expr {
|
||||
this.simple_group_start(start, end)
|
||||
}
|
||||
|
||||
/** Matches the start of a non-capturing group, e.g. `(?:` */
|
||||
private predicate non_capturing_group_start(int start, int end) {
|
||||
this.isGroupStart(start) and
|
||||
this.getChar(start + 1) = "?" and
|
||||
@@ -618,12 +633,18 @@ abstract class RegexString extends Expr {
|
||||
end = start + 3
|
||||
}
|
||||
|
||||
/** Matches the start of a simple group, e.g. `(a+)`. */
|
||||
private predicate simple_group_start(int start, int end) {
|
||||
this.isGroupStart(start) and
|
||||
this.getChar(start + 1) != "?" and
|
||||
end = start + 1
|
||||
}
|
||||
|
||||
/**
|
||||
* Matches the start of a named group, such as:
|
||||
* - `(?<name>\w+)`
|
||||
* - `(?'name'\w+)`
|
||||
*/
|
||||
private predicate named_group_start(int start, int end) {
|
||||
this.isGroupStart(start) and
|
||||
this.getChar(start + 1) = "?" and
|
||||
@@ -675,6 +696,7 @@ abstract class RegexString extends Expr {
|
||||
)
|
||||
}
|
||||
|
||||
/** Matches the start of a positive lookahead assertion, i.e. `(?=`. */
|
||||
private predicate lookahead_assertion_start(int start, int end) {
|
||||
this.isGroupStart(start) and
|
||||
this.getChar(start + 1) = "?" and
|
||||
@@ -682,6 +704,7 @@ abstract class RegexString extends Expr {
|
||||
end = start + 3
|
||||
}
|
||||
|
||||
/** Matches the start of a negative lookahead assertion, i.e. `(?!`. */
|
||||
private predicate negative_lookahead_assertion_start(int start, int end) {
|
||||
this.isGroupStart(start) and
|
||||
this.getChar(start + 1) = "?" and
|
||||
@@ -689,6 +712,7 @@ abstract class RegexString extends Expr {
|
||||
end = start + 3
|
||||
}
|
||||
|
||||
/** Matches the start of a positive lookbehind assertion, i.e. `(?<=`. */
|
||||
private predicate lookbehind_assertion_start(int start, int end) {
|
||||
this.isGroupStart(start) and
|
||||
this.getChar(start + 1) = "?" and
|
||||
@@ -697,6 +721,7 @@ abstract class RegexString extends Expr {
|
||||
end = start + 4
|
||||
}
|
||||
|
||||
/** Matches the start of a negative lookbehind assertion, i.e. `(?<!`. */
|
||||
private predicate negative_lookbehind_assertion_start(int start, int end) {
|
||||
this.isGroupStart(start) and
|
||||
this.getChar(start + 1) = "?" and
|
||||
@@ -705,6 +730,7 @@ abstract class RegexString extends Expr {
|
||||
end = start + 4
|
||||
}
|
||||
|
||||
/** Matches the start of a comment group, i.e. `(?#`. */
|
||||
private predicate comment_group_start(int start, int end) {
|
||||
this.isGroupStart(start) and
|
||||
this.getChar(start + 1) = "?" and
|
||||
@@ -712,6 +738,7 @@ abstract class RegexString extends Expr {
|
||||
end = start + 3
|
||||
}
|
||||
|
||||
/** Matches the contents of a group. */
|
||||
predicate groupContents(int start, int end, int in_start, int in_end) {
|
||||
this.group_start(start, in_start) and
|
||||
end = in_end + 1 and
|
||||
@@ -719,12 +746,14 @@ abstract class RegexString extends Expr {
|
||||
this.isGroupEnd(in_end)
|
||||
}
|
||||
|
||||
/** Matches a named backreference, e.g. `\k<foo>`. */
|
||||
private predicate named_backreference(int start, int end, string name) {
|
||||
this.named_backreference_start(start, start + 4) and
|
||||
end = min(int i | i > start + 4 and this.getChar(i) = ")") + 1 and
|
||||
name = this.getText().substring(start + 4, end - 2)
|
||||
}
|
||||
|
||||
/** Matches a numbered backreference, e.g. `\1`. */
|
||||
private predicate numbered_backreference(int start, int end, int value) {
|
||||
this.escapingChar(start) and
|
||||
// starting with 0 makes it an octal escape
|
||||
@@ -749,7 +778,7 @@ abstract class RegexString extends Expr {
|
||||
)
|
||||
}
|
||||
|
||||
/** Whether the text in the range start,end is a back reference */
|
||||
/** Whether the text in the range `start,end` is a back reference */
|
||||
predicate backreference(int start, int end) {
|
||||
this.numbered_backreference(start, end, _)
|
||||
or
|
||||
|
||||
Reference in New Issue
Block a user