mirror of
https://github.com/github/codeql.git
synced 2025-12-23 20:26:32 +01:00
make the character search skip unencodable characters
This commit is contained in:
@@ -122,6 +122,13 @@ class RegExpRepetition extends RegExpParent {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* A constant in a regular expression that represents valid Unicode character(s).
|
||||||
|
*/
|
||||||
|
class RegexpCharacterConstant extends RegExpConstant {
|
||||||
|
RegexpCharacterConstant() { this.isCharacter() }
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Gets the root containing the given term, that is, the root of the literal,
|
* Gets the root containing the given term, that is, the root of the literal,
|
||||||
* or a branch of the root disjunction.
|
* or a branch of the root disjunction.
|
||||||
@@ -136,7 +143,9 @@ RegExpRoot getRoot(RegExpTerm term) {
|
|||||||
*/
|
*/
|
||||||
newtype TInputSymbol =
|
newtype TInputSymbol =
|
||||||
/** An input symbol corresponding to character `c`. */
|
/** An input symbol corresponding to character `c`. */
|
||||||
Char(string c) { c = any(RegExpConstant cc | getRoot(cc).isRelevant()).getValue().charAt(_) } or
|
Char(string c) {
|
||||||
|
c = any(RegexpCharacterConstant cc | getRoot(cc).isRelevant()).getValue().charAt(_)
|
||||||
|
} or
|
||||||
/**
|
/**
|
||||||
* An input symbol representing all characters matched by
|
* An input symbol representing all characters matched by
|
||||||
* (non-universal) character class `recc`.
|
* (non-universal) character class `recc`.
|
||||||
@@ -173,7 +182,7 @@ private predicate sharesRoot(TInputSymbol a, TInputSymbol b) {
|
|||||||
*/
|
*/
|
||||||
private predicate belongsTo(TInputSymbol a, RegExpRoot root) {
|
private predicate belongsTo(TInputSymbol a, RegExpRoot root) {
|
||||||
exists(RegExpTerm term | getRoot(term) = root |
|
exists(RegExpTerm term | getRoot(term) = root |
|
||||||
a = Char(term.(RegExpConstant).getValue().charAt(_))
|
a = Char(term.(RegexpCharacterConstant).getValue().charAt(_))
|
||||||
or
|
or
|
||||||
a = CharClass(term)
|
a = CharClass(term)
|
||||||
)
|
)
|
||||||
@@ -236,7 +245,7 @@ private module CharacterClasses {
|
|||||||
predicate hasChildThatMatches(RegExpCharacterClass cc, string char) {
|
predicate hasChildThatMatches(RegExpCharacterClass cc, string char) {
|
||||||
exists(CharClass(cc)) and
|
exists(CharClass(cc)) and
|
||||||
exists(RegExpTerm child | child = cc.getAChild() |
|
exists(RegExpTerm child | child = cc.getAChild() |
|
||||||
char = child.(RegExpConstant).getValue()
|
char = child.(RegexpCharacterConstant).getValue()
|
||||||
or
|
or
|
||||||
rangeMatchesOnLetterOrDigits(child, char)
|
rangeMatchesOnLetterOrDigits(child, char)
|
||||||
or
|
or
|
||||||
@@ -300,7 +309,7 @@ private module CharacterClasses {
|
|||||||
private string getARelevantChar() {
|
private string getARelevantChar() {
|
||||||
exists(ascii(result))
|
exists(ascii(result))
|
||||||
or
|
or
|
||||||
exists(RegExpConstant c | result = c.getValue().charAt(_))
|
exists(RegexpCharacterConstant c | result = c.getValue().charAt(_))
|
||||||
or
|
or
|
||||||
classEscapeMatches(_, result)
|
classEscapeMatches(_, result)
|
||||||
}
|
}
|
||||||
@@ -310,7 +319,7 @@ private module CharacterClasses {
|
|||||||
*/
|
*/
|
||||||
private string getAMentionedChar(RegExpCharacterClass c) {
|
private string getAMentionedChar(RegExpCharacterClass c) {
|
||||||
exists(RegExpTerm child | child = c.getAChild() |
|
exists(RegExpTerm child | child = c.getAChild() |
|
||||||
result = child.(RegExpConstant).getValue()
|
result = child.(RegexpCharacterConstant).getValue()
|
||||||
or
|
or
|
||||||
child.(RegExpCharacterRange).isRange(result, _)
|
child.(RegExpCharacterRange).isRange(result, _)
|
||||||
or
|
or
|
||||||
@@ -439,7 +448,7 @@ newtype TState =
|
|||||||
(
|
(
|
||||||
i = 0
|
i = 0
|
||||||
or
|
or
|
||||||
exists(t.(RegExpConstant).getValue().charAt(i))
|
exists(t.(RegexpCharacterConstant).getValue().charAt(i))
|
||||||
)
|
)
|
||||||
} or
|
} or
|
||||||
Accept(RegExpRoot l) { l.isRelevant() }
|
Accept(RegExpRoot l) { l.isRelevant() }
|
||||||
@@ -511,7 +520,7 @@ State after(RegExpTerm t) {
|
|||||||
* Holds if the NFA has a transition from `q1` to `q2` labelled with `lbl`.
|
* Holds if the NFA has a transition from `q1` to `q2` labelled with `lbl`.
|
||||||
*/
|
*/
|
||||||
predicate delta(State q1, EdgeLabel lbl, State q2) {
|
predicate delta(State q1, EdgeLabel lbl, State q2) {
|
||||||
exists(RegExpConstant s, int i |
|
exists(RegexpCharacterConstant s, int i |
|
||||||
q1 = Match(s, i) and
|
q1 = Match(s, i) and
|
||||||
lbl = Char(s.getValue().charAt(i)) and
|
lbl = Char(s.getValue().charAt(i)) and
|
||||||
(
|
(
|
||||||
|
|||||||
@@ -272,4 +272,10 @@ var bad60 = /(.thisisagoddamnlongstringforstresstestingthequery|\sthisisagoddamn
|
|||||||
var bad61 = /(thisisagoddamnlongstringforstresstestingthequery|this\w+query)*-/
|
var bad61 = /(thisisagoddamnlongstringforstresstestingthequery|this\w+query)*-/
|
||||||
|
|
||||||
// GOOD
|
// GOOD
|
||||||
var good27 = /(thisisagoddamnlongstringforstresstestingthequery|imanotherbutunrelatedstringcomparedtotheotherstring)*-/
|
var good27 = /(thisisagoddamnlongstringforstresstestingthequery|imanotherbutunrelatedstringcomparedtotheotherstring)*-/
|
||||||
|
|
||||||
|
// GOOD
|
||||||
|
var good28 = /foo([\uDC66\uDC67]|[\uDC68\uDC69])*foo/
|
||||||
|
|
||||||
|
// GOOD
|
||||||
|
var good29 = /foo((\uDC66|\uDC67)|(\uDC68|\uDC69))*foo/
|
||||||
Reference in New Issue
Block a user