mirror of
https://github.com/github/codeql.git
synced 2025-12-24 04:36:35 +01:00
Correctly account for length of string literal prefix when computing locations for RegExpTerms.
This commit is contained in:
4
python/ql/lib/change-notes/2023-09-22-regex-prefix.md
Normal file
4
python/ql/lib/change-notes/2023-09-22-regex-prefix.md
Normal file
@@ -0,0 +1,4 @@
|
||||
---
|
||||
category: fix
|
||||
---
|
||||
* Subterms of regular expressions encoded as single-line string literals now have better source-location information.
|
||||
@@ -227,10 +227,11 @@ module Impl implements RegexTreeViewSig {
|
||||
predicate hasLocationInfo(
|
||||
string filepath, int startline, int startcolumn, int endline, int endcolumn
|
||||
) {
|
||||
exists(int re_start |
|
||||
exists(int re_start, int prefix_len | prefix_len = re.getPrefix().length() |
|
||||
re.getLocation().hasLocationInfo(filepath, startline, re_start, endline, _) and
|
||||
startcolumn = re_start + start + 4 and
|
||||
endcolumn = re_start + end + 3
|
||||
startcolumn = re_start + start + prefix_len and
|
||||
endcolumn = re_start + end + prefix_len - 1
|
||||
/* inclusive vs exclusive */
|
||||
)
|
||||
}
|
||||
|
||||
|
||||
@@ -101,7 +101,7 @@ private module FindRegexMode {
|
||||
}
|
||||
|
||||
/**
|
||||
* DEPRECATED: Use `Regex` instead.
|
||||
* DEPRECATED: Use `RegExp` instead.
|
||||
*/
|
||||
deprecated class Regex = RegExp;
|
||||
|
||||
@@ -327,6 +327,17 @@ class RegExp extends Expr instanceof StrConst {
|
||||
/** Gets the text of this regex */
|
||||
string getText() { result = super.getText() }
|
||||
|
||||
/**
|
||||
* Gets the prefix of this regex
|
||||
*
|
||||
* Examples:
|
||||
*
|
||||
* - The prefix of `'x*y'` is `'`.
|
||||
* - The prefix of `r''` is `r'`.
|
||||
* - The prefix of `r"""x*y"""` is `r"""`.
|
||||
*/
|
||||
string getPrefix() { result = super.getPrefix() }
|
||||
|
||||
/** Gets the `i`th character of this regex */
|
||||
string getChar(int i) { result = this.getText().charAt(i) }
|
||||
|
||||
|
||||
@@ -1,20 +1,20 @@
|
||||
| locations.py | 14 | 5 |
|
||||
| locations.py | 19 | 5 |
|
||||
| locations.py | 24 | 5 |
|
||||
| locations.py | 29 | 5 |
|
||||
| locations.py | 34 | 5 |
|
||||
| locations.py | 14 | 2 |
|
||||
| locations.py | 19 | 3 |
|
||||
| locations.py | 24 | 3 |
|
||||
| locations.py | 29 | 4 |
|
||||
| locations.py | 34 | 4 |
|
||||
| locations.py | 39 | 5 |
|
||||
| locations.py | 44 | 5 |
|
||||
| locations.py | 49 | 5 |
|
||||
| locations.py | 54 | 5 |
|
||||
| locations.py | 54 | 26 |
|
||||
| locations.py | 59 | 5 |
|
||||
| locations.py | 59 | 26 |
|
||||
| locations.py | 65 | 5 |
|
||||
| locations.py | 65 | 26 |
|
||||
| locations.py | 49 | 6 |
|
||||
| locations.py | 54 | 2 |
|
||||
| locations.py | 54 | 23 |
|
||||
| locations.py | 59 | 2 |
|
||||
| locations.py | 59 | 23 |
|
||||
| locations.py | 65 | 2 |
|
||||
| locations.py | 65 | 23 |
|
||||
| locations.py | 72 | 6 |
|
||||
| locations.py | 72 | 27 |
|
||||
| locations.py | 80 | 6 |
|
||||
| locations.py | 85 | 7 |
|
||||
| locations.py | 90 | 5 |
|
||||
| locations.py | 90 | 26 |
|
||||
| locations.py | 80 | 3 |
|
||||
| locations.py | 85 | 5 |
|
||||
| locations.py | 90 | 2 |
|
||||
| locations.py | 90 | 23 |
|
||||
|
||||
@@ -6,8 +6,8 @@ import re
|
||||
# regexp term `[this]`, appearing in various kinds of regexps.
|
||||
#
|
||||
# To make the location information easier to understand, we generally put each
|
||||
# regexp on its own line, even though this is not the way one would normally
|
||||
# write regexps in Python.
|
||||
# regexp on its own line, even though this is not idiomatic Python.
|
||||
# Comments indicate cases we currently do not handle correctly.
|
||||
|
||||
# plain string
|
||||
re.compile(
|
||||
@@ -49,25 +49,25 @@ re.compile(
|
||||
br'''[this] is a test'''
|
||||
)
|
||||
|
||||
# plain string with multiple parts
|
||||
# plain string with multiple parts (second [this] gets wrong column: 23 instead of 26)
|
||||
re.compile(
|
||||
'[this] is a test' ' and [this] is another test'
|
||||
)
|
||||
|
||||
# plain string with multiple parts across lines
|
||||
# plain string with multiple parts across lines (second [this] gets wrong location: 59:23 instead of 60:7)
|
||||
re.compile(
|
||||
'[this] is a test'
|
||||
' and [this] is another test'
|
||||
)
|
||||
|
||||
# plain string with multiple parts across lines and comments
|
||||
# plain string with multiple parts across lines and comments (second [this] gets wrong location: 65:23 instead of 67:7)
|
||||
re.compile(
|
||||
'[this] is a test'
|
||||
# comment
|
||||
' and [this] is another test'
|
||||
)
|
||||
|
||||
# actual multiline string
|
||||
# actual multiline string (both [this]s get wrong location: 72:6 and 72:27 instead of 73:1 and 74:5)
|
||||
re.compile(
|
||||
r'''
|
||||
[this] is a test
|
||||
@@ -75,7 +75,7 @@ and [this] is another test
|
||||
'''
|
||||
)
|
||||
|
||||
# plain string with escape sequences
|
||||
# plain string with escape sequences ([this] gets wrong location: 80:3 instead of 80:4)
|
||||
re.compile(
|
||||
'\t[this] is a test'
|
||||
)
|
||||
@@ -85,7 +85,7 @@ re.compile(
|
||||
r'\A[this] is a test'
|
||||
)
|
||||
|
||||
# plain string with escaped newline
|
||||
# plain string with escaped newline (second [this] gets wrong location: 90:23 instead of 91:6)
|
||||
re.compile(
|
||||
'[this] is a test\
|
||||
and [this] is another test'
|
||||
|
||||
Reference in New Issue
Block a user