mirror of
https://github.com/github/codeql.git
synced 2025-12-16 16:53:25 +01:00
Python: Fix false positive for unmatchable dollar/caret
Our previous modelling did not account for the fact that a lookahead can potentially extend all the way to the end of the input (and similarly, that a lookbehind can extend all the way to the beginning). To fix this, I extended `firstPart` and `lastPart` to handle lookbehinds and lookaheads correctly, and added some test cases (all of which yield no new results). Fixes #20429.
This commit is contained in:
@@ -964,7 +964,7 @@ module Impl implements RegexTreeViewSig {
|
||||
* ```
|
||||
*/
|
||||
class RegExpPositiveLookahead extends RegExpLookahead {
|
||||
RegExpPositiveLookahead() { re.positiveLookaheadAssertionGroup(start, end) }
|
||||
RegExpPositiveLookahead() { re.positiveLookaheadAssertionGroup(start, end, _, _) }
|
||||
|
||||
override string getPrimaryQLClass() { result = "RegExpPositiveLookahead" }
|
||||
}
|
||||
@@ -979,7 +979,7 @@ module Impl implements RegexTreeViewSig {
|
||||
* ```
|
||||
*/
|
||||
additional class RegExpNegativeLookahead extends RegExpLookahead {
|
||||
RegExpNegativeLookahead() { re.negativeLookaheadAssertionGroup(start, end) }
|
||||
RegExpNegativeLookahead() { re.negativeLookaheadAssertionGroup(start, end, _, _) }
|
||||
|
||||
override string getPrimaryQLClass() { result = "RegExpNegativeLookahead" }
|
||||
}
|
||||
@@ -1006,7 +1006,7 @@ module Impl implements RegexTreeViewSig {
|
||||
* ```
|
||||
*/
|
||||
class RegExpPositiveLookbehind extends RegExpLookbehind {
|
||||
RegExpPositiveLookbehind() { re.positiveLookbehindAssertionGroup(start, end) }
|
||||
RegExpPositiveLookbehind() { re.positiveLookbehindAssertionGroup(start, end, _, _) }
|
||||
|
||||
override string getPrimaryQLClass() { result = "RegExpPositiveLookbehind" }
|
||||
}
|
||||
@@ -1021,7 +1021,7 @@ module Impl implements RegexTreeViewSig {
|
||||
* ```
|
||||
*/
|
||||
additional class RegExpNegativeLookbehind extends RegExpLookbehind {
|
||||
RegExpNegativeLookbehind() { re.negativeLookbehindAssertionGroup(start, end) }
|
||||
RegExpNegativeLookbehind() { re.negativeLookbehindAssertionGroup(start, end, _, _) }
|
||||
|
||||
override string getPrimaryQLClass() { result = "RegExpNegativeLookbehind" }
|
||||
}
|
||||
|
||||
@@ -554,9 +554,9 @@ class RegExp extends Expr instanceof StringLiteral {
|
||||
or
|
||||
this.negativeAssertionGroup(start, end)
|
||||
or
|
||||
this.positiveLookaheadAssertionGroup(start, end)
|
||||
this.positiveLookaheadAssertionGroup(start, end, _, _)
|
||||
or
|
||||
this.positiveLookbehindAssertionGroup(start, end)
|
||||
this.positiveLookbehindAssertionGroup(start, end, _, _)
|
||||
}
|
||||
|
||||
/** Holds if an empty group is found between `start` and `end`. */
|
||||
@@ -572,7 +572,7 @@ class RegExp extends Expr instanceof StringLiteral {
|
||||
or
|
||||
this.negativeAssertionGroup(start, end)
|
||||
or
|
||||
this.positiveLookaheadAssertionGroup(start, end)
|
||||
this.positiveLookaheadAssertionGroup(start, end, _, _)
|
||||
}
|
||||
|
||||
private predicate emptyMatchAtEndGroup(int start, int end) {
|
||||
@@ -580,7 +580,7 @@ class RegExp extends Expr instanceof StringLiteral {
|
||||
or
|
||||
this.negativeAssertionGroup(start, end)
|
||||
or
|
||||
this.positiveLookbehindAssertionGroup(start, end)
|
||||
this.positiveLookbehindAssertionGroup(start, end, _, _)
|
||||
}
|
||||
|
||||
private predicate negativeAssertionGroup(int start, int end) {
|
||||
@@ -593,32 +593,40 @@ class RegExp extends Expr instanceof StringLiteral {
|
||||
)
|
||||
}
|
||||
|
||||
/** Holds if a negative lookahead is found between `start` and `end` */
|
||||
predicate negativeLookaheadAssertionGroup(int start, int end) {
|
||||
exists(int in_start | this.negative_lookahead_assertion_start(start, in_start) |
|
||||
this.groupContents(start, end, in_start, _)
|
||||
)
|
||||
/**
|
||||
* Holds if a negative lookahead is found between `start` and `end`, with contents
|
||||
* between `in_start` and `in_end`.
|
||||
*/
|
||||
predicate negativeLookaheadAssertionGroup(int start, int end, int in_start, int in_end) {
|
||||
this.negative_lookahead_assertion_start(start, in_start) and
|
||||
this.groupContents(start, end, in_start, in_end)
|
||||
}
|
||||
|
||||
/** Holds if a negative lookbehind is found between `start` and `end` */
|
||||
predicate negativeLookbehindAssertionGroup(int start, int end) {
|
||||
exists(int in_start | this.negative_lookbehind_assertion_start(start, in_start) |
|
||||
this.groupContents(start, end, in_start, _)
|
||||
)
|
||||
/**
|
||||
* Holds if a negative lookbehind is found between `start` and `end`, with contents
|
||||
* between `in_start` and `in_end`.
|
||||
*/
|
||||
predicate negativeLookbehindAssertionGroup(int start, int end, int in_start, int in_end) {
|
||||
this.negative_lookbehind_assertion_start(start, in_start) and
|
||||
this.groupContents(start, end, in_start, in_end)
|
||||
}
|
||||
|
||||
/** Holds if a positive lookahead is found between `start` and `end` */
|
||||
predicate positiveLookaheadAssertionGroup(int start, int end) {
|
||||
exists(int in_start | this.lookahead_assertion_start(start, in_start) |
|
||||
this.groupContents(start, end, in_start, _)
|
||||
)
|
||||
/**
|
||||
* Holds if a positive lookahead is found between `start` and `end`, with contents
|
||||
* between `in_start` and `in_end`.
|
||||
*/
|
||||
predicate positiveLookaheadAssertionGroup(int start, int end, int in_start, int in_end) {
|
||||
this.lookahead_assertion_start(start, in_start) and
|
||||
this.groupContents(start, end, in_start, in_end)
|
||||
}
|
||||
|
||||
/** Holds if a positive lookbehind is found between `start` and `end` */
|
||||
predicate positiveLookbehindAssertionGroup(int start, int end) {
|
||||
exists(int in_start | this.lookbehind_assertion_start(start, in_start) |
|
||||
this.groupContents(start, end, in_start, _)
|
||||
)
|
||||
/**
|
||||
* Holds if a positive lookbehind is found between `start` and `end`, with contents
|
||||
* between `in_start` and `in_end`.
|
||||
*/
|
||||
predicate positiveLookbehindAssertionGroup(int start, int end, int in_start, int in_end) {
|
||||
this.lookbehind_assertion_start(start, in_start) and
|
||||
this.groupContents(start, end, in_start, in_end)
|
||||
}
|
||||
|
||||
private predicate group_start(int start, int end) {
|
||||
@@ -1049,6 +1057,13 @@ class RegExp extends Expr instanceof StringLiteral {
|
||||
or
|
||||
this.alternationOption(x, y, start, end)
|
||||
)
|
||||
or
|
||||
// Lookbehind assertions can potentially match the start of the string
|
||||
(
|
||||
this.positiveLookbehindAssertionGroup(_, _, start, _) or
|
||||
this.negativeLookbehindAssertionGroup(_, _, start, _)
|
||||
) and
|
||||
this.item(start, end)
|
||||
}
|
||||
|
||||
/** A part of the regex that may match the end of the string. */
|
||||
@@ -1074,6 +1089,13 @@ class RegExp extends Expr instanceof StringLiteral {
|
||||
or
|
||||
this.alternationOption(x, y, start, end)
|
||||
)
|
||||
or
|
||||
// Lookahead assertions can potentially match the end of the string
|
||||
(
|
||||
this.positiveLookaheadAssertionGroup(_, _, _, end) or
|
||||
this.negativeLookaheadAssertionGroup(_, _, _, end)
|
||||
) and
|
||||
this.item(start, end)
|
||||
}
|
||||
|
||||
/**
|
||||
|
||||
@@ -0,0 +1,5 @@
|
||||
---
|
||||
category: minorAnalysis
|
||||
---
|
||||
|
||||
- The queries that check for unmatchable `$` and `^` in regular expressions did not account correctly for occurrences inside lookahead and lookbehind assertions. These occurrences are now handled correctly, eliminating this source of false positives.
|
||||
@@ -150,4 +150,12 @@ re.compile(r"[\U00010000-\U0010FFFF]")
|
||||
re.compile(r"[\u0000-\uFFFF]")
|
||||
|
||||
#Allow unicode names
|
||||
re.compile(r"[\N{degree sign}\N{EM DASH}]")
|
||||
re.compile(r"[\N{degree sign}\N{EM DASH}]")
|
||||
|
||||
#Lookahead assertions. None of these are unmatchable dollars:
|
||||
re.compile(r"^(?=a$)[ab]")
|
||||
re.compile(r"^(?!a$)[ab]")
|
||||
|
||||
#Lookbehind assertions. None of these are unmatchable carets:
|
||||
re.compile(r"(?<=^a)a")
|
||||
re.compile(r"(?<!^a)a")
|
||||
|
||||
Reference in New Issue
Block a user