Correctly account for length of string literal prefix when computing locations for RegExpTerms.

This commit is contained in:
Max Schaefer
2023-09-22 10:22:12 +01:00
parent d4ff9c8ed1
commit 6f67055852
5 changed files with 44 additions and 28 deletions

View File

@@ -0,0 +1,4 @@
---
category: fix
---
* Subterms of regular expressions encoded as single-line string literals now have better source-location information.

View File

@@ -227,10 +227,11 @@ module Impl implements RegexTreeViewSig {
predicate hasLocationInfo(
string filepath, int startline, int startcolumn, int endline, int endcolumn
) {
exists(int re_start |
exists(int re_start, int prefix_len | prefix_len = re.getPrefix().length() |
re.getLocation().hasLocationInfo(filepath, startline, re_start, endline, _) and
startcolumn = re_start + start + 4 and
endcolumn = re_start + end + 3
startcolumn = re_start + start + prefix_len and
endcolumn = re_start + end + prefix_len - 1
/* inclusive vs exclusive */
)
}

View File

@@ -101,7 +101,7 @@ private module FindRegexMode {
}
/**
* DEPRECATED: Use `Regex` instead.
* DEPRECATED: Use `RegExp` instead.
*/
deprecated class Regex = RegExp;
@@ -327,6 +327,17 @@ class RegExp extends Expr instanceof StrConst {
/** Gets the text of this regex */
string getText() { result = super.getText() }
/**
* Gets the prefix of this regex
*
* Examples:
*
* - The prefix of `'x*y'` is `'`.
* - The prefix of `r''` is `r'`.
* - The prefix of `r"""x*y"""` is `r"""`.
*/
string getPrefix() { result = super.getPrefix() }
/** Gets the `i`th character of this regex */
string getChar(int i) { result = this.getText().charAt(i) }

View File

@@ -1,20 +1,20 @@
| locations.py | 14 | 5 |
| locations.py | 19 | 5 |
| locations.py | 24 | 5 |
| locations.py | 29 | 5 |
| locations.py | 34 | 5 |
| locations.py | 14 | 2 |
| locations.py | 19 | 3 |
| locations.py | 24 | 3 |
| locations.py | 29 | 4 |
| locations.py | 34 | 4 |
| locations.py | 39 | 5 |
| locations.py | 44 | 5 |
| locations.py | 49 | 5 |
| locations.py | 54 | 5 |
| locations.py | 54 | 26 |
| locations.py | 59 | 5 |
| locations.py | 59 | 26 |
| locations.py | 65 | 5 |
| locations.py | 65 | 26 |
| locations.py | 49 | 6 |
| locations.py | 54 | 2 |
| locations.py | 54 | 23 |
| locations.py | 59 | 2 |
| locations.py | 59 | 23 |
| locations.py | 65 | 2 |
| locations.py | 65 | 23 |
| locations.py | 72 | 6 |
| locations.py | 72 | 27 |
| locations.py | 80 | 6 |
| locations.py | 85 | 7 |
| locations.py | 90 | 5 |
| locations.py | 90 | 26 |
| locations.py | 80 | 3 |
| locations.py | 85 | 5 |
| locations.py | 90 | 2 |
| locations.py | 90 | 23 |

View File

@@ -6,8 +6,8 @@ import re
# regexp term `[this]`, appearing in various kinds of regexps.
#
# To make the location information easier to understand, we generally put each
# regexp on its own line, even though this is not the way one would normally
# write regexps in Python.
# regexp on its own line, even though this is not idiomatic Python.
# Comments indicate cases we currently do not handle correctly.
# plain string
re.compile(
@@ -49,25 +49,25 @@ re.compile(
br'''[this] is a test'''
)
# plain string with multiple parts
# plain string with multiple parts (second [this] gets wrong column: 23 instead of 26)
re.compile(
'[this] is a test' ' and [this] is another test'
)
# plain string with multiple parts across lines
# plain string with multiple parts across lines (second [this] gets wrong location: 59:23 instead of 60:7)
re.compile(
'[this] is a test'
' and [this] is another test'
)
# plain string with multiple parts across lines and comments
# plain string with multiple parts across lines and comments (second [this] gets wrong location: 65:23 instead of 67:7)
re.compile(
'[this] is a test'
# comment
' and [this] is another test'
)
# actual multiline string
# actual multiline string (both [this]s get wrong location: 72:6 and 72:27 instead of 73:1 and 74:5)
re.compile(
r'''
[this] is a test
@@ -75,7 +75,7 @@ and [this] is another test
'''
)
# plain string with escape sequences
# plain string with escape sequences ([this] gets wrong location: 80:3 instead of 80:4)
re.compile(
'\t[this] is a test'
)
@@ -85,7 +85,7 @@ re.compile(
r'\A[this] is a test'
)
# plain string with escaped newline
# plain string with escaped newline (second [this] gets wrong location: 90:23 instead of 91:6)
re.compile(
'[this] is a test\
and [this] is another test'