Merge pull request #20495 from github/tausbn/python-fix-unmatchable-dollar-in-lookahead

Python: Fix false positive for unmatchable dollar/caret
This commit is contained in:
Taus
2025-09-25 15:27:32 +02:00
committed by GitHub
5 changed files with 66 additions and 29 deletions

View File

@@ -964,7 +964,7 @@ module Impl implements RegexTreeViewSig {
* ```
*/
class RegExpPositiveLookahead extends RegExpLookahead {
RegExpPositiveLookahead() { re.positiveLookaheadAssertionGroup(start, end) }
RegExpPositiveLookahead() { re.positiveLookaheadAssertionGroup(start, end, _, _) }
override string getPrimaryQLClass() { result = "RegExpPositiveLookahead" }
}
@@ -979,7 +979,7 @@ module Impl implements RegexTreeViewSig {
* ```
*/
additional class RegExpNegativeLookahead extends RegExpLookahead {
RegExpNegativeLookahead() { re.negativeLookaheadAssertionGroup(start, end) }
RegExpNegativeLookahead() { re.negativeLookaheadAssertionGroup(start, end, _, _) }
override string getPrimaryQLClass() { result = "RegExpNegativeLookahead" }
}
@@ -1006,7 +1006,7 @@ module Impl implements RegexTreeViewSig {
* ```
*/
class RegExpPositiveLookbehind extends RegExpLookbehind {
RegExpPositiveLookbehind() { re.positiveLookbehindAssertionGroup(start, end) }
RegExpPositiveLookbehind() { re.positiveLookbehindAssertionGroup(start, end, _, _) }
override string getPrimaryQLClass() { result = "RegExpPositiveLookbehind" }
}
@@ -1021,7 +1021,7 @@ module Impl implements RegexTreeViewSig {
* ```
*/
additional class RegExpNegativeLookbehind extends RegExpLookbehind {
RegExpNegativeLookbehind() { re.negativeLookbehindAssertionGroup(start, end) }
RegExpNegativeLookbehind() { re.negativeLookbehindAssertionGroup(start, end, _, _) }
override string getPrimaryQLClass() { result = "RegExpNegativeLookbehind" }
}

View File

@@ -554,9 +554,9 @@ class RegExp extends Expr instanceof StringLiteral {
or
this.negativeAssertionGroup(start, end)
or
this.positiveLookaheadAssertionGroup(start, end)
this.positiveLookaheadAssertionGroup(start, end, _, _)
or
this.positiveLookbehindAssertionGroup(start, end)
this.positiveLookbehindAssertionGroup(start, end, _, _)
}
/** Holds if an empty group is found between `start` and `end`. */
@@ -572,7 +572,7 @@ class RegExp extends Expr instanceof StringLiteral {
or
this.negativeAssertionGroup(start, end)
or
this.positiveLookaheadAssertionGroup(start, end)
this.positiveLookaheadAssertionGroup(start, end, _, _)
}
private predicate emptyMatchAtEndGroup(int start, int end) {
@@ -580,7 +580,7 @@ class RegExp extends Expr instanceof StringLiteral {
or
this.negativeAssertionGroup(start, end)
or
this.positiveLookbehindAssertionGroup(start, end)
this.positiveLookbehindAssertionGroup(start, end, _, _)
}
private predicate negativeAssertionGroup(int start, int end) {
@@ -593,32 +593,40 @@ class RegExp extends Expr instanceof StringLiteral {
)
}
/** Holds if a negative lookahead is found between `start` and `end` */
predicate negativeLookaheadAssertionGroup(int start, int end) {
exists(int in_start | this.negative_lookahead_assertion_start(start, in_start) |
this.groupContents(start, end, in_start, _)
)
/**
* Holds if a negative lookahead is found between `start` and `end`, with contents
* between `in_start` and `in_end`.
*/
predicate negativeLookaheadAssertionGroup(int start, int end, int in_start, int in_end) {
this.negative_lookahead_assertion_start(start, in_start) and
this.groupContents(start, end, in_start, in_end)
}
/** Holds if a negative lookbehind is found between `start` and `end` */
predicate negativeLookbehindAssertionGroup(int start, int end) {
exists(int in_start | this.negative_lookbehind_assertion_start(start, in_start) |
this.groupContents(start, end, in_start, _)
)
/**
* Holds if a negative lookbehind is found between `start` and `end`, with contents
* between `in_start` and `in_end`.
*/
predicate negativeLookbehindAssertionGroup(int start, int end, int in_start, int in_end) {
this.negative_lookbehind_assertion_start(start, in_start) and
this.groupContents(start, end, in_start, in_end)
}
/** Holds if a positive lookahead is found between `start` and `end` */
predicate positiveLookaheadAssertionGroup(int start, int end) {
exists(int in_start | this.lookahead_assertion_start(start, in_start) |
this.groupContents(start, end, in_start, _)
)
/**
* Holds if a positive lookahead is found between `start` and `end`, with contents
* between `in_start` and `in_end`.
*/
predicate positiveLookaheadAssertionGroup(int start, int end, int in_start, int in_end) {
this.lookahead_assertion_start(start, in_start) and
this.groupContents(start, end, in_start, in_end)
}
/** Holds if a positive lookbehind is found between `start` and `end` */
predicate positiveLookbehindAssertionGroup(int start, int end) {
exists(int in_start | this.lookbehind_assertion_start(start, in_start) |
this.groupContents(start, end, in_start, _)
)
/**
* Holds if a positive lookbehind is found between `start` and `end`, with contents
* between `in_start` and `in_end`.
*/
predicate positiveLookbehindAssertionGroup(int start, int end, int in_start, int in_end) {
this.lookbehind_assertion_start(start, in_start) and
this.groupContents(start, end, in_start, in_end)
}
private predicate group_start(int start, int end) {
@@ -1049,6 +1057,13 @@ class RegExp extends Expr instanceof StringLiteral {
or
this.alternationOption(x, y, start, end)
)
or
// Lookbehind assertions can potentially match the start of the string
(
this.positiveLookbehindAssertionGroup(_, _, start, _) or
this.negativeLookbehindAssertionGroup(_, _, start, _)
) and
this.item(start, end)
}
/** A part of the regex that may match the end of the string. */
@@ -1074,6 +1089,13 @@ class RegExp extends Expr instanceof StringLiteral {
or
this.alternationOption(x, y, start, end)
)
or
// Lookahead assertions can potentially match the end of the string
(
this.positiveLookaheadAssertionGroup(_, _, _, end) or
this.negativeLookaheadAssertionGroup(_, _, _, end)
) and
this.item(start, end)
}
/**

View File

@@ -0,0 +1,5 @@
---
category: minorAnalysis
---
- The queries that check for unmatchable `$` and `^` in regular expressions did not account correctly for occurrences inside lookahead and lookbehind assertions. These occurrences are now handled correctly, eliminating this source of false positives.

View File

@@ -4,6 +4,7 @@
| (?!not-this)^[A-Z_]+$ | first | 12 | 13 |
| (?!not-this)^[A-Z_]+$ | first | 13 | 19 |
| (?!not-this)^[A-Z_]+$ | first | 13 | 20 |
| (?!not-this)^[A-Z_]+$ | last | 3 | 11 |
| (?!not-this)^[A-Z_]+$ | last | 13 | 19 |
| (?!not-this)^[A-Z_]+$ | last | 13 | 20 |
| (?!not-this)^[A-Z_]+$ | last | 20 | 21 |
@@ -101,6 +102,7 @@
| ^[A-Z_]+$(?<!not-this) | first | 0 | 1 |
| ^[A-Z_]+$(?<!not-this) | first | 1 | 7 |
| ^[A-Z_]+$(?<!not-this) | first | 1 | 8 |
| ^[A-Z_]+$(?<!not-this) | first | 13 | 21 |
| ^[A-Z_]+$(?<!not-this) | last | 1 | 7 |
| ^[A-Z_]+$(?<!not-this) | last | 1 | 8 |
| ^[A-Z_]+$(?<!not-this) | last | 8 | 9 |

View File

@@ -150,4 +150,12 @@ re.compile(r"[\U00010000-\U0010FFFF]")
re.compile(r"[\u0000-\uFFFF]")
#Allow unicode names
re.compile(r"[\N{degree sign}\N{EM DASH}]")
re.compile(r"[\N{degree sign}\N{EM DASH}]")
#Lookahead assertions. None of these are unmatchable dollars:
re.compile(r"^(?=a$)[ab]")
re.compile(r"^(?!a$)[ab]")
#Lookbehind assertions. None of these are unmatchable carets:
re.compile(r"(?<=^a)a")
re.compile(r"(?<!^a)a")