mirror of
https://github.com/github/codeql.git
synced 2026-04-29 10:45:15 +02:00
remove false positives where the analysis would wrongly conclude that the accept state could not be reached
This commit is contained in:
@@ -711,16 +711,11 @@ private string getAOverlapBetweenCharacterClasses(CharacterClass c, CharacterCla
|
||||
*/
|
||||
string intersect(InputSymbol c, InputSymbol d) {
|
||||
c = Char(result) and
|
||||
d = getAnInputSymbolMatching(result) and
|
||||
(
|
||||
sharesRoot(c, d) and
|
||||
(
|
||||
d = Char(result)
|
||||
or
|
||||
d.(CharacterClass).matches(result)
|
||||
)
|
||||
sharesRoot(c, d)
|
||||
or
|
||||
d = Dot() and
|
||||
not (result = "\n" or result = "\r")
|
||||
d = Dot()
|
||||
or
|
||||
d = Any()
|
||||
)
|
||||
@@ -749,6 +744,21 @@ string intersect(InputSymbol c, InputSymbol d) {
|
||||
result = intersect(d, c)
|
||||
}
|
||||
|
||||
/**
|
||||
* Gets a symbol that matches `char`.
|
||||
*/
|
||||
bindingset[char]
|
||||
InputSymbol getAnInputSymbolMatching(string char) {
|
||||
result = Char(char)
|
||||
or
|
||||
result.(CharacterClass).matches(char)
|
||||
or
|
||||
result = Dot() and
|
||||
not (char = "\n" or char = "\r")
|
||||
or
|
||||
result = Any()
|
||||
}
|
||||
|
||||
/**
|
||||
* Gets the char after `c` (from a simplified ASCII table).
|
||||
*/
|
||||
@@ -825,14 +835,8 @@ predicate isPumpable(State fork, string w) {
|
||||
}
|
||||
|
||||
/**
|
||||
* Gets a state that can be reached from pumpable `fork` consuming
|
||||
* the first `i+1` characters of `w`.
|
||||
*
|
||||
* Character classes are overapproximated as intervals; for example,
|
||||
* `[a-ln-z]` is treated the same as `[a-z]`, and hence considered
|
||||
* to match `m`, even though in fact it does not. This is fine for
|
||||
* our purposes, since we only use this predicate to avoid false
|
||||
* positives.
|
||||
* Gets a state that can be reached from pumpable `fork` consuming all
|
||||
* chars in `w` any number of times followed by the first `i+1` characters of `w`.
|
||||
*/
|
||||
State process(State fork, string w, int i) {
|
||||
isPumpable(fork, w) and
|
||||
@@ -841,11 +845,12 @@ State process(State fork, string w, int i) {
|
||||
i = 0 and prev = fork
|
||||
or
|
||||
prev = process(fork, w, i - 1)
|
||||
or
|
||||
// repeat until fixpoint
|
||||
i = 0 and
|
||||
prev = process(fork, w, w.length() - 1)
|
||||
|
|
||||
exists(InputSymbol s |
|
||||
deltaClosed(prev, s, result) and
|
||||
exists(intersect(Char(w.charAt(i)), s))
|
||||
)
|
||||
deltaClosed(prev, getAnInputSymbolMatching(w.charAt(i)), result)
|
||||
)
|
||||
}
|
||||
|
||||
@@ -873,7 +878,7 @@ from RegExpTerm t, string c, int i
|
||||
where
|
||||
c = min(string w | isPumpable(Match(t, i), w)) and
|
||||
not isPumpable(epsilonSucc+(Match(t, i)), _) and
|
||||
not epsilonSucc*(process(Match(t, i), c, c.length() - 1)) = Accept(_)
|
||||
not epsilonSucc*(process(Match(t, i), c, [0 .. c.length() - 1])) = Accept(_)
|
||||
select t,
|
||||
"This part of the regular expression may cause exponential backtracking on strings " +
|
||||
"containing many repetitions of '" + escape(rotate(c, i)) + "'."
|
||||
|
||||
@@ -7,24 +7,10 @@
|
||||
| regexplib/address.js:75:220:75:222 | \\w+ | This part of the regular expression may cause exponential backtracking on strings containing many repetitions of '0'. |
|
||||
| regexplib/address.js:75:616:75:618 | \\w+ | This part of the regular expression may cause exponential backtracking on strings containing many repetitions of '0'. |
|
||||
| regexplib/address.js:75:803:75:811 | [A-Za-z]+ | This part of the regular expression may cause exponential backtracking on strings containing many repetitions of 'A'. |
|
||||
| regexplib/dates.js:66:133:66:139 | JANUARY | This part of the regular expression may cause exponential backtracking on strings containing many repetitions of 'JANUARY'. |
|
||||
| regexplib/dates.js:66:141:66:148 | FEBRUARY | This part of the regular expression may cause exponential backtracking on strings containing many repetitions of 'FEBRUARY'. |
|
||||
| regexplib/dates.js:66:150:66:154 | MARCH | This part of the regular expression may cause exponential backtracking on strings containing many repetitions of 'MARCH'. |
|
||||
| regexplib/dates.js:66:156:66:160 | APRIL | This part of the regular expression may cause exponential backtracking on strings containing many repetitions of 'APRIL'. |
|
||||
| regexplib/dates.js:66:162:66:164 | MAY | This part of the regular expression may cause exponential backtracking on strings containing many repetitions of 'MAY'. |
|
||||
| regexplib/dates.js:66:166:66:169 | JUNE | This part of the regular expression may cause exponential backtracking on strings containing many repetitions of 'JUNE'. |
|
||||
| regexplib/dates.js:66:171:66:174 | JULY | This part of the regular expression may cause exponential backtracking on strings containing many repetitions of 'JULY'. |
|
||||
| regexplib/dates.js:66:176:66:181 | AUGUST | This part of the regular expression may cause exponential backtracking on strings containing many repetitions of 'AUGUST'. |
|
||||
| regexplib/dates.js:66:183:66:191 | SEPTEMBER | This part of the regular expression may cause exponential backtracking on strings containing many repetitions of 'SEPTEMBER'. |
|
||||
| regexplib/dates.js:66:193:66:199 | OCTOBER | This part of the regular expression may cause exponential backtracking on strings containing many repetitions of 'OCTOBER'. |
|
||||
| regexplib/dates.js:66:201:66:208 | NOVEMBER | This part of the regular expression may cause exponential backtracking on strings containing many repetitions of 'NOVEMBER'. |
|
||||
| regexplib/dates.js:66:210:66:217 | DECEMBER | This part of the regular expression may cause exponential backtracking on strings containing many repetitions of 'DECEMBER'. |
|
||||
| regexplib/dates.js:66:234:66:240 | PRESENT | This part of the regular expression may cause exponential backtracking on strings containing many repetitions of 'PRESENT'. |
|
||||
| regexplib/email.js:1:16:1:22 | [-.\\w]* | This part of the regular expression may cause exponential backtracking on strings containing many repetitions of '0'. |
|
||||
| regexplib/email.js:5:24:5:35 | [a-zA-Z0-9]+ | This part of the regular expression may cause exponential backtracking on strings containing many repetitions of '0'. |
|
||||
| regexplib/email.js:5:63:5:74 | [a-zA-Z0-9]+ | This part of the regular expression may cause exponential backtracking on strings containing many repetitions of '0'. |
|
||||
| regexplib/email.js:6:10:6:35 | (?:[a-zA-Z0-9][\\.\\-\\+_]?)* | This part of the regular expression may cause exponential backtracking on strings containing many repetitions of '0'. |
|
||||
| regexplib/email.js:12:71:12:80 | ([-.]\\w+)* | This part of the regular expression may cause exponential backtracking on strings containing many repetitions of '.0.0,0@0'. |
|
||||
| regexplib/email.js:25:67:25:78 | [a-zA-Z0-9]+ | This part of the regular expression may cause exponential backtracking on strings containing many repetitions of '0'. |
|
||||
| regexplib/email.js:25:106:25:117 | [a-zA-Z0-9]+ | This part of the regular expression may cause exponential backtracking on strings containing many repetitions of '0'. |
|
||||
| regexplib/email.js:25:212:25:223 | [a-zA-Z0-9]+ | This part of the regular expression may cause exponential backtracking on strings containing many repetitions of '0'. |
|
||||
@@ -50,7 +36,6 @@
|
||||
| regexplib/misc.js:123:17:123:19 | \\d+ | This part of the regular expression may cause exponential backtracking on strings containing many repetitions of '0'. |
|
||||
| regexplib/misc.js:142:3:142:25 | (\\/w\|\\/W\|[^<>+?$%{}&])+ | This part of the regular expression may cause exponential backtracking on strings containing many repetitions of '/W'. |
|
||||
| regexplib/misc.js:148:20:148:22 | \\s+ | This part of the regular expression may cause exponential backtracking on strings containing many repetitions of ' '. |
|
||||
| regexplib/misc.js:148:23:148:29 | [^"'=]+ | This part of the regular expression may cause exponential backtracking on strings containing many repetitions of '> '. |
|
||||
| regexplib/strings.js:19:31:19:57 | [a-zæøå0-9]+ | This part of the regular expression may cause exponential backtracking on strings containing many repetitions of '#'. |
|
||||
| regexplib/strings.js:57:17:57:19 | \\d+ | This part of the regular expression may cause exponential backtracking on strings containing many repetitions of '0'. |
|
||||
| regexplib/strings.js:81:17:81:19 | \\d+ | This part of the regular expression may cause exponential backtracking on strings containing many repetitions of '0'. |
|
||||
@@ -65,7 +50,6 @@
|
||||
| regexplib/uri.js:63:393:63:429 | [a-zA-Z0-9\\.\\,\\?\\'\\\\/\\+&%\\$#\\=~_\\-@]* | This part of the regular expression may cause exponential backtracking on strings containing many repetitions of '/#'. |
|
||||
| tst.js:4:18:4:32 | (?:__\|[\\s\\S])+? | This part of the regular expression may cause exponential backtracking on strings containing many repetitions of '__'. |
|
||||
| tst.js:4:42:4:58 | (?:\\*\\*\|[\\s\\S])+? | This part of the regular expression may cause exponential backtracking on strings containing many repetitions of '**'. |
|
||||
| tst.js:14:14:14:15 | .* | This part of the regular expression may cause exponential backtracking on strings containing many repetitions of ','. |
|
||||
| tst.js:19:24:19:43 | (?:[^"\\\\]\|\\\\\\\\\|\\\\.)+ | This part of the regular expression may cause exponential backtracking on strings containing many repetitions of '\\\\\\\\'. |
|
||||
| tst.js:19:47:19:66 | (?:[^'\\\\]\|\\\\\\\\\|\\\\.)+ | This part of the regular expression may cause exponential backtracking on strings containing many repetitions of '\\\\\\\\'. |
|
||||
| tst.js:19:71:19:90 | (?:[^)\\\\]\|\\\\\\\\\|\\\\.)+ | This part of the regular expression may cause exponential backtracking on strings containing many repetitions of '\\\\\\\\'. |
|
||||
@@ -86,7 +70,6 @@
|
||||
| tst.js:83:14:83:20 | (.\|\\n)* | This part of the regular expression may cause exponential backtracking on strings containing many repetitions of '\\n'. |
|
||||
| tst.js:89:25:89:32 | (a\|aa?)* | This part of the regular expression may cause exponential backtracking on strings containing many repetitions of 'a'. |
|
||||
| tst.js:95:15:95:25 | ([^]\|[^a])* | This part of the regular expression may cause exponential backtracking on strings containing many repetitions of 'b'. |
|
||||
| tst.js:98:15:98:20 | [^"']+ | This part of the regular expression may cause exponential backtracking on strings containing many repetitions of '('. |
|
||||
| tst.js:101:15:101:23 | (.\|[^a])* | This part of the regular expression may cause exponential backtracking on strings containing many repetitions of 'b'. |
|
||||
| tst.js:107:15:107:23 | (b\|[^a])* | This part of the regular expression may cause exponential backtracking on strings containing many repetitions of 'b'. |
|
||||
| tst.js:110:15:110:23 | (G\|[^a])* | This part of the regular expression may cause exponential backtracking on strings containing many repetitions of 'G'. |
|
||||
@@ -109,3 +92,19 @@
|
||||
| tst.js:167:15:167:27 | (1s\|[\\da-z])* | This part of the regular expression may cause exponential backtracking on strings containing many repetitions of '1s'. |
|
||||
| tst.js:170:15:170:23 | (0\|[\\d])* | This part of the regular expression may cause exponential backtracking on strings containing many repetitions of '0'. |
|
||||
| tst.js:173:16:173:20 | [\\d]+ | This part of the regular expression may cause exponential backtracking on strings containing many repetitions of '0'. |
|
||||
| tst.js:188:17:188:21 | [^>]+ | This part of the regular expression may cause exponential backtracking on strings containing many repetitions of '?'. |
|
||||
| tst.js:191:16:191:21 | [^>a]+ | This part of the regular expression may cause exponential backtracking on strings containing many repetitions of 'b'. |
|
||||
| tst.js:194:17:194:19 | \\s* | This part of the regular expression may cause exponential backtracking on strings containing many repetitions of '\\n'. |
|
||||
| tst.js:197:18:197:20 | \\s+ | This part of the regular expression may cause exponential backtracking on strings containing many repetitions of ' '. |
|
||||
| tst.js:200:68:200:79 | [ a-zA-Z{}]+ | This part of the regular expression may cause exponential backtracking on strings containing many repetitions of ' A:'. |
|
||||
| tst.js:200:81:200:82 | ,? | This part of the regular expression may cause exponential backtracking on strings containing many repetitions of ',A: '. |
|
||||
| tst.js:203:15:203:16 | a+ | This part of the regular expression may cause exponential backtracking on strings containing many repetitions of 'a'. |
|
||||
| tst.js:203:18:203:19 | b+ | This part of the regular expression may cause exponential backtracking on strings containing many repetitions of 'b'. |
|
||||
| tst.js:206:17:206:18 | a+ | This part of the regular expression may cause exponential backtracking on strings containing many repetitions of 'a'. |
|
||||
| tst.js:209:15:209:16 | a+ | This part of the regular expression may cause exponential backtracking on strings containing many repetitions of 'a'. |
|
||||
| tst.js:215:15:215:16 | a+ | This part of the regular expression may cause exponential backtracking on strings containing many repetitions of 'a'. |
|
||||
| tst.js:221:15:221:17 | \\n+ | This part of the regular expression may cause exponential backtracking on strings containing many repetitions of '\\n'. |
|
||||
| tst.js:224:15:224:19 | [^X]+ | This part of the regular expression may cause exponential backtracking on strings containing many repetitions of 'Y'. |
|
||||
| tst.js:227:20:227:20 | b | This part of the regular expression may cause exponential backtracking on strings containing many repetitions of 'bY'. |
|
||||
| tst.js:233:20:233:20 | b | This part of the regular expression may cause exponential backtracking on strings containing many repetitions of 'bY'. |
|
||||
| tst.js:248:16:248:17 | ab | This part of the regular expression may cause exponential backtracking on strings containing many repetitions of 'ab'. |
|
||||
|
||||
@@ -8,7 +8,7 @@ var bad1 = /^\b_((?:__|[\s\S])+?)_\b|^\*((?:\*\*|[\s\S])+?)\*(?!\*)/;
|
||||
// under the MIT license; see file marked-LICENSE.
|
||||
var good1 = /^\b_((?:__|[^_])+?)_\b|^\*((?:\*\*|[^*])+?)\*(?!\*)/;
|
||||
|
||||
// NOT GOOD
|
||||
// GOOD - there is no witness in the end that could cause the regexp to not match
|
||||
// Adapted from brace-expansion (https://github.com/juliangruber/brace-expansion),
|
||||
// which is licensed under the MIT license; see file brace-expansion-LICENSE.
|
||||
var bad2 = /(.*,)+.+/;
|
||||
@@ -94,7 +94,7 @@ var good9 = '(a|aa?)*b';
|
||||
// NOT GOOD
|
||||
var bad18 = /(([^]|[^a])*)"/;
|
||||
|
||||
// NOT GOOD
|
||||
// GOOD - there is no witness in the end that could cause the regexp to not match
|
||||
var bad19 = /([^"']+)*/g;
|
||||
|
||||
// NOT GOOD
|
||||
@@ -170,4 +170,82 @@ var bad39 = /((1s|[\da-z])*)"/;
|
||||
var bad40 = /((0|[\d])*)"/;
|
||||
|
||||
// NOT GOOD
|
||||
var bad41 = /(([\d]+)*)"/;
|
||||
var bad41 = /(([\d]+)*)"/;
|
||||
|
||||
// GOOD - there is no witness in the end that could cause the regexp to not match
|
||||
var good12 = /(\d+(X\d+)?)+/;
|
||||
|
||||
// GOOD - there is no witness in the end that could cause the regexp to not match
|
||||
var good13 = /([0-9]+(X[0-9]*)?)*/;
|
||||
|
||||
// NOT GOOD
|
||||
var bad42 = /([\n\s]+)*(.)/;
|
||||
|
||||
// GOOD - any witness passes through the accept state.
|
||||
var good14 = /(A*A*X)*/;
|
||||
|
||||
// GOOD - but still flagged (always matches something)
|
||||
var good15 = /^([^>]+)*(>|$)/;
|
||||
|
||||
// NOT GOOD
|
||||
var bad43 = /^([^>a]+)*(>|$)/;
|
||||
|
||||
// NOT GOOD
|
||||
var bad44 = /(\n\s*)+$/;
|
||||
|
||||
// NOT GOOD
|
||||
var bad45 = /^(?:\s+|#.*|\(\?#[^)]*\))*(?:[?*+]|{\d+(?:,\d*)?})/;
|
||||
|
||||
// NOT GOOD
|
||||
var bad46 = /\{\[\s*([a-zA-Z]+)\(([a-zA-Z]+)\)((\s*([a-zA-Z]+)\: ?([ a-zA-Z{}]+),?)+)*\s*\]\}/;
|
||||
|
||||
// NOT GOOD
|
||||
var bad47 = /(a+|b+|c+)*c/;
|
||||
|
||||
// NOT GOOD
|
||||
var bad48 = /(((a+a?)*)+b+)/;
|
||||
|
||||
// NOT GOOD
|
||||
var bad49 = /(a+)+bbbb/;
|
||||
|
||||
// GOOD
|
||||
var good16 = /(a+)+aaaaa*a+/;
|
||||
|
||||
// NOT GOOD
|
||||
var bad50 = /(a+)+aaaaa$/;
|
||||
|
||||
// GOOD
|
||||
var good17 = /(\n+)+\n\n/;
|
||||
|
||||
// NOT GOOD
|
||||
var bad51 = /(\n+)+\n\n$/;
|
||||
|
||||
// NOT GOOD
|
||||
var bad52 = /([^X]+)*$/;
|
||||
|
||||
// NOT GOOD
|
||||
var bad53 = /(([^X]b)+)*$/;
|
||||
|
||||
// GOOD
|
||||
var good18 = /(([^X]b)+)*($|[^X]b)/;
|
||||
|
||||
// NOT GOOD
|
||||
var bad54 = /(([^X]b)+)*($|[^X]c)/;
|
||||
|
||||
// GOOD
|
||||
var good19 = /(.*,)+.+/;
|
||||
|
||||
// GOOD
|
||||
var good20 = /((ab)+)*ababab/;
|
||||
|
||||
// GOOD
|
||||
var good21 = /((ab)+)*abab(ab)*(ab)+/;
|
||||
|
||||
// GOOD
|
||||
var good22 = /((ab)+)*/;
|
||||
|
||||
// NOT GOOD
|
||||
var bad55 = /((ab)+)*$/;
|
||||
|
||||
// GOOD
|
||||
var good23 = /((ab)+)*[a1][b1][a2][b2][a3][b3]/;
|
||||
|
||||
Reference in New Issue
Block a user