mirror of
https://github.com/github/codeql.git
synced 2025-12-21 19:26:31 +01:00
Merge pull request #14317 from yoff/python/fix-regex-string-part-locations
Python: Improve computation of regex fragments inside string parts
This commit is contained in:
@@ -0,0 +1,4 @@
|
|||||||
|
---
|
||||||
|
category: minorAnalysis
|
||||||
|
---
|
||||||
|
* Regular expression fragments residing inside implicitly concatenated strings now have better location information.
|
||||||
@@ -154,6 +154,28 @@ class StringPart extends StringPart_, AstNode {
|
|||||||
override string toString() { result = StringPart_.super.toString() }
|
override string toString() { result = StringPart_.super.toString() }
|
||||||
|
|
||||||
override Location getLocation() { result = StringPart_.super.getLocation() }
|
override Location getLocation() { result = StringPart_.super.getLocation() }
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Holds if the content of string `StringPart` is surrounded by
|
||||||
|
* a prefix (including a quote) of length `prefixLength` and
|
||||||
|
* a quote of length `quoteLength`.
|
||||||
|
*/
|
||||||
|
predicate contextSize(int prefixLength, int quoteLength) {
|
||||||
|
exists(int occurrenceOffset |
|
||||||
|
quoteLength = this.getText().regexpFind("\"{3}|\"{1}|'{3}|'{1}", 0, occurrenceOffset).length() and
|
||||||
|
prefixLength = occurrenceOffset + quoteLength
|
||||||
|
)
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Gets the length of the content, that is the text between the prefix and the quote.
|
||||||
|
* See `context` for obtaining the prefix and the quote.
|
||||||
|
*/
|
||||||
|
int getContentLength() {
|
||||||
|
exists(int prefixLength, int quoteLength | this.contextSize(prefixLength, quoteLength) |
|
||||||
|
result = this.getText().length() - prefixLength - quoteLength
|
||||||
|
)
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
class StringPartList extends StringPartList_ { }
|
class StringPartList extends StringPartList_ { }
|
||||||
|
|||||||
@@ -223,16 +223,55 @@ module Impl implements RegexTreeViewSig {
|
|||||||
*/
|
*/
|
||||||
Location getLocation() { result = re.getLocation() }
|
Location getLocation() { result = re.getLocation() }
|
||||||
|
|
||||||
|
/** Gets the accumulated length of string parts with lower index than `index`, if any. */
|
||||||
|
private int getPartOffset(int index) {
|
||||||
|
index = 0 and result = 0
|
||||||
|
or
|
||||||
|
index > 0 and
|
||||||
|
exists(int previousOffset | previousOffset = this.getPartOffset(index - 1) |
|
||||||
|
result =
|
||||||
|
previousOffset + re.(StrConst).getImplicitlyConcatenatedPart(index - 1).getContentLength()
|
||||||
|
)
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Gets the `StringPart` in which this `RegExpTerm` resides, if any.
|
||||||
|
* `localOffset` will be the offset of this `RegExpTerm` inside `result`.
|
||||||
|
*/
|
||||||
|
StringPart getPart(int localOffset) {
|
||||||
|
exists(int index, int prefixLength | index = max(int i | this.getPartOffset(i) <= start) |
|
||||||
|
result = re.(StrConst).getImplicitlyConcatenatedPart(index) and
|
||||||
|
result.contextSize(prefixLength, _) and
|
||||||
|
// Example:
|
||||||
|
// re.compile('...' r"""...this..""")
|
||||||
|
// - `start` is the offset from `(` to `this` as counted after concatenating all parts.
|
||||||
|
// - we subtract the length of the previous `StringPart`s, `'...'`, to know how far into this `StringPart` we go.
|
||||||
|
// - as the prefix 'r"""' is part of the `StringPart`, `this` is found that much further in.
|
||||||
|
localOffset = start - this.getPartOffset(index) + prefixLength
|
||||||
|
)
|
||||||
|
}
|
||||||
|
|
||||||
/** Holds if this term is found at the specified location offsets. */
|
/** Holds if this term is found at the specified location offsets. */
|
||||||
predicate hasLocationInfo(
|
predicate hasLocationInfo(
|
||||||
string filepath, int startline, int startcolumn, int endline, int endcolumn
|
string filepath, int startline, int startcolumn, int endline, int endcolumn
|
||||||
) {
|
) {
|
||||||
|
not exists(this.getPart(_)) and
|
||||||
exists(int re_start, int prefix_len | prefix_len = re.getPrefix().length() |
|
exists(int re_start, int prefix_len | prefix_len = re.getPrefix().length() |
|
||||||
re.getLocation().hasLocationInfo(filepath, startline, re_start, endline, _) and
|
re.getLocation().hasLocationInfo(filepath, startline, re_start, _, _) and
|
||||||
startcolumn = re_start + start + prefix_len and
|
startcolumn = re_start + start + prefix_len and
|
||||||
|
endline = startline and
|
||||||
endcolumn = re_start + end + prefix_len - 1
|
endcolumn = re_start + end + prefix_len - 1
|
||||||
/* inclusive vs exclusive */
|
/* inclusive vs exclusive */
|
||||||
)
|
)
|
||||||
|
or
|
||||||
|
exists(StringPart part, int localOffset, int partStartColumn |
|
||||||
|
part = this.getPart(localOffset)
|
||||||
|
|
|
||||||
|
part.getLocation().hasLocationInfo(filepath, startline, partStartColumn, _, _) and
|
||||||
|
startcolumn = partStartColumn + localOffset and
|
||||||
|
endline = startline and
|
||||||
|
endcolumn = (end - start) + startcolumn
|
||||||
|
)
|
||||||
}
|
}
|
||||||
|
|
||||||
/** Gets the file in which this term is found. */
|
/** Gets the file in which this term is found. */
|
||||||
|
|||||||
@@ -50,23 +50,31 @@ br'''[this] is a test'''
|
|||||||
)
|
)
|
||||||
|
|
||||||
# plain string with multiple parts
|
# plain string with multiple parts
|
||||||
re.compile( # $ location=1:2 SPURIOUS:location=1:23 MISSING:location=1:26
|
re.compile( # $ location=1:2 location=1:26
|
||||||
'[this] is a test' ' and [this] is another test'
|
'[this] is a test' ' and [this] is another test'
|
||||||
)
|
)
|
||||||
|
|
||||||
# plain string with multiple parts across lines
|
# plain string with multiple parts across lines
|
||||||
re.compile( # $ location=1:2 SPURIOUS:location=1:23 MISSING:location=2:7
|
re.compile( # $ location=1:2 location=2:7 location=3:2
|
||||||
'[this] is a test'
|
'[this] is a test'
|
||||||
' and [this] is another test'
|
' and [this] is another test'
|
||||||
|
'[this] comes right at the start of a part'
|
||||||
)
|
)
|
||||||
|
|
||||||
# plain string with multiple parts across lines and comments
|
# plain string with multiple parts across lines and comments
|
||||||
re.compile( # $ location=1:2 SPURIOUS:location=1:23 MISSING:location=3:7
|
re.compile( # $ location=1:2 location=3:7
|
||||||
'[this] is a test'
|
'[this] is a test'
|
||||||
# comment
|
# comment
|
||||||
' and [this] is another test'
|
' and [this] is another test'
|
||||||
)
|
)
|
||||||
|
|
||||||
|
# multiple parts of different kinds
|
||||||
|
re.compile( # $ location=1:2 location=1:28 location=2:11 location=3:8
|
||||||
|
'[this] is a test' ''' and [this] is another test'''
|
||||||
|
br""" and [this] is yet another test"""
|
||||||
|
r' and [this] is one more'
|
||||||
|
)
|
||||||
|
|
||||||
# actual multiline string
|
# actual multiline string
|
||||||
re.compile( # $ SPURIOUS:location=1:6 location=1:27 MISSING:location=2:1 location=3:5
|
re.compile( # $ SPURIOUS:location=1:6 location=1:27 MISSING:location=2:1 location=3:5
|
||||||
r'''
|
r'''
|
||||||
|
|||||||
@@ -1,5 +1,5 @@
|
|||||||
| KnownCVEs.py:15:20:15:22 | \\d+ | This part of the regular expression may cause exponential backtracking on strings containing many repetitions of '0'. |
|
| KnownCVEs.py:15:20:15:22 | \\d+ | This part of the regular expression may cause exponential backtracking on strings containing many repetitions of '0'. |
|
||||||
| KnownCVEs.py:30:21:31:22 | .* | This part of the regular expression may cause exponential backtracking on strings containing many repetitions of ','. |
|
| KnownCVEs.py:30:21:30:23 | .* | This part of the regular expression may cause exponential backtracking on strings containing many repetitions of ','. |
|
||||||
| KnownCVEs.py:35:18:35:81 | ([-/:,#%.'"\\s!\\w]\|\\w-\\w\|'[\\s\\w]+'\\s*\|"[\\s\\w]+"\|\\([\\d,%\\.\\s]+\\))* | This part of the regular expression may cause exponential backtracking on strings containing many repetitions of '"\\t"'. |
|
| KnownCVEs.py:35:18:35:81 | ([-/:,#%.'"\\s!\\w]\|\\w-\\w\|'[\\s\\w]+'\\s*\|"[\\s\\w]+"\|\\([\\d,%\\.\\s]+\\))* | This part of the regular expression may cause exponential backtracking on strings containing many repetitions of '"\\t"'. |
|
||||||
| redos.py:6:28:6:42 | (?:__\|[\\s\\S])+? | This part of the regular expression may cause exponential backtracking on strings containing many repetitions of '__'. |
|
| redos.py:6:28:6:42 | (?:__\|[\\s\\S])+? | This part of the regular expression may cause exponential backtracking on strings containing many repetitions of '__'. |
|
||||||
| redos.py:6:52:6:68 | (?:\\*\\*\|[\\s\\S])+? | This part of the regular expression may cause exponential backtracking on strings starting with '*' and containing many repetitions of '**'. |
|
| redos.py:6:52:6:68 | (?:\\*\\*\|[\\s\\S])+? | This part of the regular expression may cause exponential backtracking on strings starting with '*' and containing many repetitions of '**'. |
|
||||||
|
|||||||
Reference in New Issue
Block a user