mirror of
https://github.com/github/codeql.git
synced 2025-12-21 03:06:31 +01:00
Support quote sequences
This commit is contained in:
@@ -40,6 +40,8 @@ newtype TRegExpParent =
|
|||||||
TRegExpSpecialChar(Regex re, int start, int end) { re.specialCharacter(start, end, _) } or
|
TRegExpSpecialChar(Regex re, int start, int end) { re.specialCharacter(start, end, _) } or
|
||||||
/** A normal character */
|
/** A normal character */
|
||||||
TRegExpNormalChar(Regex re, int start, int end) { re.normalCharacter(start, end) } or
|
TRegExpNormalChar(Regex re, int start, int end) { re.normalCharacter(start, end) } or
|
||||||
|
/** A quoted sequence */
|
||||||
|
TRegExpQuote(Regex re, int start, int end) { re.quote(start, end) } or
|
||||||
/** A back reference */
|
/** A back reference */
|
||||||
TRegExpBackRef(Regex re, int start, int end) { re.backreference(start, end) }
|
TRegExpBackRef(Regex re, int start, int end) { re.backreference(start, end) }
|
||||||
|
|
||||||
@@ -107,6 +109,8 @@ class RegExpTerm extends RegExpParent {
|
|||||||
or
|
or
|
||||||
this = TRegExpNormalChar(re, start, end)
|
this = TRegExpNormalChar(re, start, end)
|
||||||
or
|
or
|
||||||
|
this = TRegExpQuote(re, start, end)
|
||||||
|
or
|
||||||
this = TRegExpGroup(re, start, end)
|
this = TRegExpGroup(re, start, end)
|
||||||
or
|
or
|
||||||
this = TRegExpQuantifier(re, start, end)
|
this = TRegExpQuantifier(re, start, end)
|
||||||
@@ -675,9 +679,34 @@ class RegExpNormalChar extends RegExpTerm, TRegExpNormalChar {
|
|||||||
override string getPrimaryQLClass() { result = "RegExpNormalChar" }
|
override string getPrimaryQLClass() { result = "RegExpNormalChar" }
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* A quoted sequence.
|
||||||
|
*
|
||||||
|
* Example:
|
||||||
|
* ```
|
||||||
|
* \Qabc\E
|
||||||
|
* ```
|
||||||
|
*/
|
||||||
|
class RegExpQuote extends RegExpTerm, TRegExpQuote {
|
||||||
|
string value;
|
||||||
|
|
||||||
|
RegExpQuote() {
|
||||||
|
exists(int inner_start, int inner_end |
|
||||||
|
this = TRegExpQuote(re, start, end) and
|
||||||
|
re.quote(start, end, inner_start, inner_end) and
|
||||||
|
value = re.getText().substring(inner_start, inner_end)
|
||||||
|
)
|
||||||
|
}
|
||||||
|
|
||||||
|
/** Gets the string matched by this quote term. */
|
||||||
|
string getValue() { result = value }
|
||||||
|
|
||||||
|
override string getPrimaryQLClass() { result = "RegExpQuote" }
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* A constant regular expression term, that is, a regular expression
|
* A constant regular expression term, that is, a regular expression
|
||||||
* term matching a single string. Currently, this will always be a single character.
|
* term matching a single string. This can be a single character or a quoted sequence.
|
||||||
*
|
*
|
||||||
* Example:
|
* Example:
|
||||||
*
|
*
|
||||||
@@ -689,14 +718,14 @@ class RegExpConstant extends RegExpTerm {
|
|||||||
string value;
|
string value;
|
||||||
|
|
||||||
RegExpConstant() {
|
RegExpConstant() {
|
||||||
this = TRegExpNormalChar(re, start, end) and
|
(this = TRegExpNormalChar(re, start, end) or this = TRegExpQuote(re, start, end)) and
|
||||||
not this instanceof RegExpCharacterClassEscape and
|
not this instanceof RegExpCharacterClassEscape and
|
||||||
// exclude chars in qualifiers
|
// exclude chars in qualifiers
|
||||||
// TODO: push this into regex library
|
// TODO: push this into regex library
|
||||||
not exists(int qstart, int qend | re.qualifiedPart(_, qstart, qend, _, _) |
|
not exists(int qstart, int qend | re.qualifiedPart(_, qstart, qend, _, _) |
|
||||||
qstart <= start and end <= qend
|
qstart <= start and end <= qend
|
||||||
) and
|
) and
|
||||||
value = this.(RegExpNormalChar).getValue()
|
(value = this.(RegExpNormalChar).getValue() or value = this.(RegExpQuote).getValue())
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
|||||||
@@ -189,13 +189,17 @@ abstract class RegexString extends Expr {
|
|||||||
}
|
}
|
||||||
|
|
||||||
/** Holds if the character at `pos` is a "\" that is actually escaping what comes after. */
|
/** Holds if the character at `pos` is a "\" that is actually escaping what comes after. */
|
||||||
predicate escapingChar(int pos) { this.escaping(pos) = true }
|
predicate escapingChar(int pos) {
|
||||||
|
this.escaping(pos) = true and
|
||||||
|
not exists(int x, int y | this.quote(x, y) and pos in [x .. y - 1])
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Helper predicate for `escapingChar`.
|
* Helper predicate for `escapingChar`.
|
||||||
* In order to avoid negative recusrion, we return a boolean.
|
* In order to avoid negative recusrion, we return a boolean.
|
||||||
* This way, we can refer to `escaping(pos - 1).booleanNot()`
|
* This way, we can refer to `escaping(pos - 1).booleanNot()`
|
||||||
* rather than to a negated version of `escaping(pos)`.
|
* rather than to a negated version of `escaping(pos)`.
|
||||||
|
* Does not take into account escape characters inside quote sequences.
|
||||||
*/
|
*/
|
||||||
private boolean escaping(int pos) {
|
private boolean escaping(int pos) {
|
||||||
pos = -1 and result = false
|
pos = -1 and result = false
|
||||||
@@ -205,6 +209,53 @@ abstract class RegexString extends Expr {
|
|||||||
this.getChar(pos) != "\\" and result = false
|
this.getChar(pos) != "\\" and result = false
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Helper predicate for `quoteSequence`.
|
||||||
|
* Holds if the char at `pos` could be the beginning of a quote delimiter, i.e. `\Q` (non-escaped) or `\E` (escaping not checked, as quote sequences turn off escapes).
|
||||||
|
* Result is `true` for `\Q` and `false` for `\E`.
|
||||||
|
*/
|
||||||
|
private boolean quote_delimiter(int pos) {
|
||||||
|
result = true and
|
||||||
|
this.escaping(pos) = true and
|
||||||
|
this.getChar(pos + 1) = "Q"
|
||||||
|
or
|
||||||
|
result = false and
|
||||||
|
this.getChar(pos) = "\\" and
|
||||||
|
this.getChar(pos + 1) = "E"
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Helper predicate for `quoteSequence`.
|
||||||
|
* Holds if the char at `pos` is the one-based `index`th occourence of a quote delimiter (`\Q` or `\E`)
|
||||||
|
* Result is `true` for `\Q` and `false` for `\E`.
|
||||||
|
*/
|
||||||
|
private boolean quote_delimiter(int index, int pos) {
|
||||||
|
result = this.quote_delimiter(pos) and
|
||||||
|
pos = rank[index](int p | this.quote_delimiter(p) = [true, false])
|
||||||
|
}
|
||||||
|
|
||||||
|
/** Holds if a quoted sequence is found between `start` and `end` */
|
||||||
|
predicate quote(int start, int end) { this.quote(start, end, _, _) }
|
||||||
|
|
||||||
|
/** Holds if a quoted sequence is fund between `start` and `end`, with ontent found between `inner_start` and `inner_end`. */
|
||||||
|
predicate quote(int start, int end, int inner_start, int inner_end) {
|
||||||
|
exists(int index |
|
||||||
|
this.quote_delimiter(index, start) = true and
|
||||||
|
(
|
||||||
|
index = 1
|
||||||
|
or
|
||||||
|
this.quote_delimiter(index - 1, _) = false
|
||||||
|
) and
|
||||||
|
inner_start = start + 2 and
|
||||||
|
inner_end = end - 2 and
|
||||||
|
inner_end > inner_start and
|
||||||
|
this.quote_delimiter(inner_end) = false and
|
||||||
|
not exists(int mid |
|
||||||
|
this.quote_delimiter(mid) = false and mid in [inner_start .. inner_end - 1]
|
||||||
|
)
|
||||||
|
)
|
||||||
|
}
|
||||||
|
|
||||||
/** Gets the text of this regex */
|
/** Gets the text of this regex */
|
||||||
string getText() { result = this.(StringLiteral).getValue() }
|
string getText() { result = this.(StringLiteral).getValue() }
|
||||||
|
|
||||||
@@ -212,7 +263,8 @@ abstract class RegexString extends Expr {
|
|||||||
|
|
||||||
string nonEscapedCharAt(int i) {
|
string nonEscapedCharAt(int i) {
|
||||||
result = this.getText().charAt(i) and
|
result = this.getText().charAt(i) and
|
||||||
not exists(int x, int y | this.escapedCharacter(x, y) and i in [x .. y - 1])
|
not exists(int x, int y | this.escapedCharacter(x, y) and i in [x .. y - 1]) and
|
||||||
|
not exists(int x, int y | this.quote(x, y) and i in [x .. y - 1])
|
||||||
}
|
}
|
||||||
|
|
||||||
private predicate isOptionDivider(int i) { this.nonEscapedCharAt(i) = "|" }
|
private predicate isOptionDivider(int i) { this.nonEscapedCharAt(i) = "|" }
|
||||||
@@ -728,7 +780,8 @@ abstract class RegexString extends Expr {
|
|||||||
this.character(start, _) or
|
this.character(start, _) or
|
||||||
this.isGroupStart(start) or
|
this.isGroupStart(start) or
|
||||||
this.charSet(start, _) or
|
this.charSet(start, _) or
|
||||||
this.backreference(start, _)
|
this.backreference(start, _) or
|
||||||
|
this.quote(start, _)
|
||||||
}
|
}
|
||||||
|
|
||||||
private predicate item_end(int end) {
|
private predicate item_end(int end) {
|
||||||
@@ -739,6 +792,8 @@ abstract class RegexString extends Expr {
|
|||||||
this.charSet(_, end)
|
this.charSet(_, end)
|
||||||
or
|
or
|
||||||
this.qualifier(_, end, _, _)
|
this.qualifier(_, end, _, _)
|
||||||
|
or
|
||||||
|
this.quote(_, end)
|
||||||
}
|
}
|
||||||
|
|
||||||
private predicate top_level(int start, int end) {
|
private predicate top_level(int start, int end) {
|
||||||
@@ -846,6 +901,8 @@ abstract class RegexString extends Expr {
|
|||||||
this.qualifiedItem(start, end, _, _)
|
this.qualifiedItem(start, end, _, _)
|
||||||
or
|
or
|
||||||
this.charSet(start, end)
|
this.charSet(start, end)
|
||||||
|
or
|
||||||
|
this.quote(start, end)
|
||||||
) and
|
) and
|
||||||
this.firstPart(start, end)
|
this.firstPart(start, end)
|
||||||
}
|
}
|
||||||
@@ -861,6 +918,8 @@ abstract class RegexString extends Expr {
|
|||||||
this.qualifiedItem(start, end, _, _)
|
this.qualifiedItem(start, end, _, _)
|
||||||
or
|
or
|
||||||
this.charSet(start, end)
|
this.charSet(start, end)
|
||||||
|
or
|
||||||
|
this.quote(start, end)
|
||||||
) and
|
) and
|
||||||
this.lastPart(start, end)
|
this.lastPart(start, end)
|
||||||
}
|
}
|
||||||
|
|||||||
Reference in New Issue
Block a user