Merge pull request #20495 from github/tausbn/python-fix-unmatchable-dollar-in-lookahead

Python: Fix false positive for unmatchable dollar/caret
This commit is contained in:
Taus
2025-09-25 15:27:32 +02:00
committed by GitHub
5 changed files with 66 additions and 29 deletions

View File

@@ -964,7 +964,7 @@ module Impl implements RegexTreeViewSig {
* ``` * ```
*/ */
class RegExpPositiveLookahead extends RegExpLookahead { class RegExpPositiveLookahead extends RegExpLookahead {
RegExpPositiveLookahead() { re.positiveLookaheadAssertionGroup(start, end) } RegExpPositiveLookahead() { re.positiveLookaheadAssertionGroup(start, end, _, _) }
override string getPrimaryQLClass() { result = "RegExpPositiveLookahead" } override string getPrimaryQLClass() { result = "RegExpPositiveLookahead" }
} }
@@ -979,7 +979,7 @@ module Impl implements RegexTreeViewSig {
* ``` * ```
*/ */
additional class RegExpNegativeLookahead extends RegExpLookahead { additional class RegExpNegativeLookahead extends RegExpLookahead {
RegExpNegativeLookahead() { re.negativeLookaheadAssertionGroup(start, end) } RegExpNegativeLookahead() { re.negativeLookaheadAssertionGroup(start, end, _, _) }
override string getPrimaryQLClass() { result = "RegExpNegativeLookahead" } override string getPrimaryQLClass() { result = "RegExpNegativeLookahead" }
} }
@@ -1006,7 +1006,7 @@ module Impl implements RegexTreeViewSig {
* ``` * ```
*/ */
class RegExpPositiveLookbehind extends RegExpLookbehind { class RegExpPositiveLookbehind extends RegExpLookbehind {
RegExpPositiveLookbehind() { re.positiveLookbehindAssertionGroup(start, end) } RegExpPositiveLookbehind() { re.positiveLookbehindAssertionGroup(start, end, _, _) }
override string getPrimaryQLClass() { result = "RegExpPositiveLookbehind" } override string getPrimaryQLClass() { result = "RegExpPositiveLookbehind" }
} }
@@ -1021,7 +1021,7 @@ module Impl implements RegexTreeViewSig {
* ``` * ```
*/ */
additional class RegExpNegativeLookbehind extends RegExpLookbehind { additional class RegExpNegativeLookbehind extends RegExpLookbehind {
RegExpNegativeLookbehind() { re.negativeLookbehindAssertionGroup(start, end) } RegExpNegativeLookbehind() { re.negativeLookbehindAssertionGroup(start, end, _, _) }
override string getPrimaryQLClass() { result = "RegExpNegativeLookbehind" } override string getPrimaryQLClass() { result = "RegExpNegativeLookbehind" }
} }

View File

@@ -554,9 +554,9 @@ class RegExp extends Expr instanceof StringLiteral {
or or
this.negativeAssertionGroup(start, end) this.negativeAssertionGroup(start, end)
or or
this.positiveLookaheadAssertionGroup(start, end) this.positiveLookaheadAssertionGroup(start, end, _, _)
or or
this.positiveLookbehindAssertionGroup(start, end) this.positiveLookbehindAssertionGroup(start, end, _, _)
} }
/** Holds if an empty group is found between `start` and `end`. */ /** Holds if an empty group is found between `start` and `end`. */
@@ -572,7 +572,7 @@ class RegExp extends Expr instanceof StringLiteral {
or or
this.negativeAssertionGroup(start, end) this.negativeAssertionGroup(start, end)
or or
this.positiveLookaheadAssertionGroup(start, end) this.positiveLookaheadAssertionGroup(start, end, _, _)
} }
private predicate emptyMatchAtEndGroup(int start, int end) { private predicate emptyMatchAtEndGroup(int start, int end) {
@@ -580,7 +580,7 @@ class RegExp extends Expr instanceof StringLiteral {
or or
this.negativeAssertionGroup(start, end) this.negativeAssertionGroup(start, end)
or or
this.positiveLookbehindAssertionGroup(start, end) this.positiveLookbehindAssertionGroup(start, end, _, _)
} }
private predicate negativeAssertionGroup(int start, int end) { private predicate negativeAssertionGroup(int start, int end) {
@@ -593,32 +593,40 @@ class RegExp extends Expr instanceof StringLiteral {
) )
} }
/** Holds if a negative lookahead is found between `start` and `end` */ /**
predicate negativeLookaheadAssertionGroup(int start, int end) { * Holds if a negative lookahead is found between `start` and `end`, with contents
exists(int in_start | this.negative_lookahead_assertion_start(start, in_start) | * between `in_start` and `in_end`.
this.groupContents(start, end, in_start, _) */
) predicate negativeLookaheadAssertionGroup(int start, int end, int in_start, int in_end) {
this.negative_lookahead_assertion_start(start, in_start) and
this.groupContents(start, end, in_start, in_end)
} }
/** Holds if a negative lookbehind is found between `start` and `end` */ /**
predicate negativeLookbehindAssertionGroup(int start, int end) { * Holds if a negative lookbehind is found between `start` and `end`, with contents
exists(int in_start | this.negative_lookbehind_assertion_start(start, in_start) | * between `in_start` and `in_end`.
this.groupContents(start, end, in_start, _) */
) predicate negativeLookbehindAssertionGroup(int start, int end, int in_start, int in_end) {
this.negative_lookbehind_assertion_start(start, in_start) and
this.groupContents(start, end, in_start, in_end)
} }
/** Holds if a positive lookahead is found between `start` and `end` */ /**
predicate positiveLookaheadAssertionGroup(int start, int end) { * Holds if a positive lookahead is found between `start` and `end`, with contents
exists(int in_start | this.lookahead_assertion_start(start, in_start) | * between `in_start` and `in_end`.
this.groupContents(start, end, in_start, _) */
) predicate positiveLookaheadAssertionGroup(int start, int end, int in_start, int in_end) {
this.lookahead_assertion_start(start, in_start) and
this.groupContents(start, end, in_start, in_end)
} }
/** Holds if a positive lookbehind is found between `start` and `end` */ /**
predicate positiveLookbehindAssertionGroup(int start, int end) { * Holds if a positive lookbehind is found between `start` and `end`, with contents
exists(int in_start | this.lookbehind_assertion_start(start, in_start) | * between `in_start` and `in_end`.
this.groupContents(start, end, in_start, _) */
) predicate positiveLookbehindAssertionGroup(int start, int end, int in_start, int in_end) {
this.lookbehind_assertion_start(start, in_start) and
this.groupContents(start, end, in_start, in_end)
} }
private predicate group_start(int start, int end) { private predicate group_start(int start, int end) {
@@ -1049,6 +1057,13 @@ class RegExp extends Expr instanceof StringLiteral {
or or
this.alternationOption(x, y, start, end) this.alternationOption(x, y, start, end)
) )
or
// Lookbehind assertions can potentially match the start of the string
(
this.positiveLookbehindAssertionGroup(_, _, start, _) or
this.negativeLookbehindAssertionGroup(_, _, start, _)
) and
this.item(start, end)
} }
/** A part of the regex that may match the end of the string. */ /** A part of the regex that may match the end of the string. */
@@ -1074,6 +1089,13 @@ class RegExp extends Expr instanceof StringLiteral {
or or
this.alternationOption(x, y, start, end) this.alternationOption(x, y, start, end)
) )
or
// Lookahead assertions can potentially match the end of the string
(
this.positiveLookaheadAssertionGroup(_, _, _, end) or
this.negativeLookaheadAssertionGroup(_, _, _, end)
) and
this.item(start, end)
} }
/** /**

View File

@@ -0,0 +1,5 @@
---
category: minorAnalysis
---
- The queries that check for unmatchable `$` and `^` in regular expressions did not account correctly for occurrences inside lookahead and lookbehind assertions. These occurrences are now handled correctly, eliminating this source of false positives.

View File

@@ -4,6 +4,7 @@
| (?!not-this)^[A-Z_]+$ | first | 12 | 13 | | (?!not-this)^[A-Z_]+$ | first | 12 | 13 |
| (?!not-this)^[A-Z_]+$ | first | 13 | 19 | | (?!not-this)^[A-Z_]+$ | first | 13 | 19 |
| (?!not-this)^[A-Z_]+$ | first | 13 | 20 | | (?!not-this)^[A-Z_]+$ | first | 13 | 20 |
| (?!not-this)^[A-Z_]+$ | last | 3 | 11 |
| (?!not-this)^[A-Z_]+$ | last | 13 | 19 | | (?!not-this)^[A-Z_]+$ | last | 13 | 19 |
| (?!not-this)^[A-Z_]+$ | last | 13 | 20 | | (?!not-this)^[A-Z_]+$ | last | 13 | 20 |
| (?!not-this)^[A-Z_]+$ | last | 20 | 21 | | (?!not-this)^[A-Z_]+$ | last | 20 | 21 |
@@ -101,6 +102,7 @@
| ^[A-Z_]+$(?<!not-this) | first | 0 | 1 | | ^[A-Z_]+$(?<!not-this) | first | 0 | 1 |
| ^[A-Z_]+$(?<!not-this) | first | 1 | 7 | | ^[A-Z_]+$(?<!not-this) | first | 1 | 7 |
| ^[A-Z_]+$(?<!not-this) | first | 1 | 8 | | ^[A-Z_]+$(?<!not-this) | first | 1 | 8 |
| ^[A-Z_]+$(?<!not-this) | first | 13 | 21 |
| ^[A-Z_]+$(?<!not-this) | last | 1 | 7 | | ^[A-Z_]+$(?<!not-this) | last | 1 | 7 |
| ^[A-Z_]+$(?<!not-this) | last | 1 | 8 | | ^[A-Z_]+$(?<!not-this) | last | 1 | 8 |
| ^[A-Z_]+$(?<!not-this) | last | 8 | 9 | | ^[A-Z_]+$(?<!not-this) | last | 8 | 9 |

View File

@@ -150,4 +150,12 @@ re.compile(r"[\U00010000-\U0010FFFF]")
re.compile(r"[\u0000-\uFFFF]") re.compile(r"[\u0000-\uFFFF]")
#Allow unicode names #Allow unicode names
re.compile(r"[\N{degree sign}\N{EM DASH}]") re.compile(r"[\N{degree sign}\N{EM DASH}]")
#Lookahead assertions. None of these are unmatchable dollars:
re.compile(r"^(?=a$)[ab]")
re.compile(r"^(?!a$)[ab]")
#Lookbehind assertions. None of these are unmatchable carets:
re.compile(r"(?<=^a)a")
re.compile(r"(?<!^a)a")