mirror of
https://github.com/github/codeql.git
synced 2026-04-29 18:55:14 +02:00
Improve calculation of locations of regex terms
This commit is contained in:
@@ -188,12 +188,18 @@ class RegExpTerm extends RegExpParent {
|
||||
predicate hasLocationInfo(
|
||||
string filepath, int startline, int startcolumn, int endline, int endcolumn
|
||||
) {
|
||||
// This currently gives incorrect results for string literals including backslashes. TODO: fix that.
|
||||
// There are also more complex cases where it fails. Handling all of them would be difficult for not much gain.
|
||||
exists(int re_start, int re_end |
|
||||
/*
|
||||
* This is an approximation that handles the simple and common case of single,
|
||||
* normal string literal written in the source, but does not give correct results in more complex cases
|
||||
* such as compile-time concatenation, or multi-line string literals.
|
||||
*/
|
||||
|
||||
exists(int re_start, int re_end, int src_start, int src_end |
|
||||
re.getLocation().hasLocationInfo(filepath, startline, re_start, endline, re_end) and
|
||||
startcolumn = re_start + start + 1 and
|
||||
endcolumn = re_start + end
|
||||
re.sourceCharacter(start, src_start, _) and
|
||||
re.sourceCharacter(end - 1, _, src_end) and
|
||||
startcolumn = re_start + src_start and
|
||||
endcolumn = re_start + src_end - 1
|
||||
)
|
||||
}
|
||||
|
||||
|
||||
@@ -27,7 +27,6 @@ abstract class RegexString extends StringLiteral {
|
||||
* In order to avoid negative recursion, we return a boolean.
|
||||
* This way, we can refer to `escaping(pos - 1).booleanNot()`
|
||||
* rather than to a negated version of `escaping(pos)`.
|
||||
* Does not take into account escape characters inside quote sequences.
|
||||
*/
|
||||
private boolean escaping(int pos) {
|
||||
pos = -1 and result = false
|
||||
@@ -104,11 +103,10 @@ abstract class RegexString extends StringLiteral {
|
||||
end = start + 3
|
||||
}
|
||||
|
||||
string nonEscapedCharAt(int i) {
|
||||
result = this.getText().charAt(i) and
|
||||
private string nonEscapedCharAt(int i) {
|
||||
result = this.getChar(i) and
|
||||
not exists(int x, int y | this.escapedCharacter(x, y) and i in [x .. y - 1]) and
|
||||
not exists(int x, int y | this.quote(x, y) and i in [x .. y - 1]) and
|
||||
not exists(int x, int y | this.controlEscape(x, y) and i in [x .. y - 1])
|
||||
not exists(int x, int y | this.quote(x, y) and i in [x .. y - 1])
|
||||
}
|
||||
|
||||
/** Holds if a character set starts between `start` and `end`, including any negation character (`^`). */
|
||||
@@ -822,6 +820,66 @@ abstract class RegexString extends StringLiteral {
|
||||
this.alternation(start, end) and
|
||||
this.subalternation(start, part_end, part_start)
|
||||
}
|
||||
|
||||
/**
|
||||
* Gets the `i`th character of this literal as it was written in the source code.
|
||||
*/
|
||||
string getSourceChar(int i) { result = this.(StringLiteral).getLiteral().charAt(i) }
|
||||
|
||||
/**
|
||||
* Helper predicate for `sourceEscapingChar` that
|
||||
* results in a boolean in order to avoid negative recursion.
|
||||
*/
|
||||
private boolean sourceEscaping(int pos) {
|
||||
pos = -1 and result = false
|
||||
or
|
||||
this.getSourceChar(pos) = "\\" and
|
||||
result = this.sourceEscaping(pos - 1).booleanNot()
|
||||
or
|
||||
this.getSourceChar(pos) != "\\" and result = false
|
||||
}
|
||||
|
||||
/**
|
||||
* Equivalent of `escapingChar` for the literal source rather than the string value.
|
||||
* Holds if the character at position `pos` in the source literal is a '\' that is
|
||||
* actually escaping what comes after it.
|
||||
*/
|
||||
private predicate sourceEcapingChar(int pos) { this.sourceEscaping(pos) = true }
|
||||
|
||||
/**
|
||||
* Holds if an escaped character exists between `start` and `end` in the source iteral.
|
||||
*/
|
||||
private predicate sourceEscapedCharacter(int start, int end) {
|
||||
this.sourceEcapingChar(start) and
|
||||
(if this.getSourceChar(start + 1) = "u" then end = start + 6 else end = start + 2)
|
||||
}
|
||||
|
||||
private predicate sourceNonEscapedCharacter(int i) {
|
||||
exists(this.getSourceChar(i)) and
|
||||
not exists(int x, int y | this.sourceEscapedCharacter(x, y) and i in [x .. y - 1])
|
||||
}
|
||||
|
||||
/**
|
||||
* Holds if a character is represented between `start` and `end` in the source literal.
|
||||
*/
|
||||
private predicate sourceCharacter(int start, int end) {
|
||||
sourceEscapedCharacter(start, end)
|
||||
or
|
||||
sourceNonEscapedCharacter(start) and
|
||||
end = start + 1
|
||||
}
|
||||
|
||||
/**
|
||||
* Holds if the `i`th character of the string is represented between offsets
|
||||
* `start` (inclusive) and `end` (exclusive) in the source code of this literal.
|
||||
* This only gives correct results if the literal is written as a normal single-line string literal;
|
||||
* without compile-time concatenation involved.
|
||||
*/
|
||||
predicate sourceCharacter(int pos, int start, int end) {
|
||||
exists(this.getChar(pos)) and
|
||||
sourceCharacter(start, end) and
|
||||
start = rank[pos + 2](int s | sourceCharacter(s, _))
|
||||
}
|
||||
}
|
||||
|
||||
/** A string literal used as a regular expression */
|
||||
|
||||
Reference in New Issue
Block a user