mirror of
https://github.com/github/codeql.git
synced 2026-04-29 18:55:14 +02:00
extract regexp literals from string concatenations
This commit is contained in:
@@ -1559,6 +1559,14 @@ class URShiftExpr extends @urshift_expr, BinaryExpr {
|
||||
*/
|
||||
class AddExpr extends @add_expr, BinaryExpr {
|
||||
override string getOperator() { result = "+" }
|
||||
|
||||
/**
|
||||
* Gets the value of this string concatenation parsed as a regular expression, if possible.
|
||||
*
|
||||
* All string literals have an associated regular expression tree, provided they can
|
||||
* be parsed without syntax errors.
|
||||
*/
|
||||
RegExpTerm asRegExp() { this = result.getParent() }
|
||||
}
|
||||
|
||||
/**
|
||||
|
||||
@@ -155,7 +155,7 @@ class RegExpTerm extends Locatable, @regexpterm {
|
||||
exists(RegExpParent parent | parent = getRootTerm().getParent() |
|
||||
parent instanceof RegExpLiteral
|
||||
or
|
||||
parent.(StringLiteral).flow() instanceof RegExpPatternSource
|
||||
parent.(Expr).flow() instanceof RegExpPatternSource
|
||||
)
|
||||
}
|
||||
|
||||
@@ -1104,6 +1104,30 @@ private class StringRegExpPatternSource extends RegExpPatternSource {
|
||||
override RegExpTerm getRegExpTerm() { result = asExpr().(StringLiteral).asRegExp() }
|
||||
}
|
||||
|
||||
/**
|
||||
* A node whose string value may flow to a position where it is interpreted
|
||||
* as a part of a regular expression.
|
||||
*/
|
||||
private class StringConcatRegExpPatternSource extends RegExpPatternSource {
|
||||
DataFlow::Node parse;
|
||||
|
||||
StringConcatRegExpPatternSource() { this = regExpSource(parse) }
|
||||
|
||||
override DataFlow::Node getAParse() { result = parse }
|
||||
|
||||
override DataFlow::SourceNode getARegExpObject() {
|
||||
exists(DataFlow::InvokeNode constructor |
|
||||
constructor = DataFlow::globalVarRef("RegExp").getAnInvocation() and
|
||||
parse = constructor.getArgument(0) and
|
||||
result = constructor
|
||||
)
|
||||
}
|
||||
|
||||
override string getPattern() { result = getStringValue() }
|
||||
|
||||
override RegExpTerm getRegExpTerm() { result = asExpr().(AddExpr).asRegExp() }
|
||||
}
|
||||
|
||||
module RegExp {
|
||||
/** Gets the string `"?"` used to represent a regular expression whose flags are unknown. */
|
||||
string unknownFlag() { result = "?" }
|
||||
|
||||
@@ -855,7 +855,7 @@ regexpterm (unique int id: @regexpterm,
|
||||
int idx: int ref,
|
||||
varchar(900) tostring: string ref);
|
||||
|
||||
@regexpparent = @regexpterm | @regexp_literal | @string_literal;
|
||||
@regexpparent = @regexpterm | @regexp_literal | @string_literal | @add_expr;
|
||||
|
||||
case @regexpterm.kind of
|
||||
0 = @regexp_alt
|
||||
|
||||
@@ -512,3 +512,8 @@
|
||||
| tst.js:384:15:384:26 | ([AB]\|[ab])* | Strings with many repetitions of 'A' can start matching anywhere after the start of the preceeding ([AB]\|[ab])*C |
|
||||
| tst.js:385:14:385:25 | ([DE]\|[de])* | Strings with many repetitions of 'd' can start matching anywhere after the start of the preceeding ([DE]\|[de])*F |
|
||||
| tst.js:388:14:388:20 | (a\|aa)* | Strings with many repetitions of 'a' can start matching anywhere after the start of the preceeding (a\|aa)*$ |
|
||||
| tst.js:391:6:394:6 | (a\|aa)* | Strings with many repetitions of 'a' can start matching anywhere after the start of the preceeding (a\|aa)*b$ |
|
||||
| tst.js:398:7:399:5 | (c\|cc)* | Strings with many repetitions of 'c' can start matching anywhere after the start of the preceeding ((c\|cc)*\|(d\|dd)*\|(e\|ee)*)f$ |
|
||||
| tst.js:399:7:400:5 | (d\|dd)* | Strings with many repetitions of 'd' can start matching anywhere after the start of the preceeding ((c\|cc)*\|(d\|dd)*\|(e\|ee)*)f$ |
|
||||
| tst.js:400:7:401:2 | (e\|ee)* | Strings with many repetitions of 'e' can start matching anywhere after the start of the preceeding ((c\|cc)*\|(d\|dd)*\|(e\|ee)*)f$ |
|
||||
| tst.js:404:6:405:8 | (g\|gg)* | Strings with many repetitions of 'g' can start matching anywhere after the start of the preceeding (g\|gg)*h$ |
|
||||
|
||||
@@ -183,3 +183,8 @@
|
||||
| tst.js:385:14:385:25 | ([DE]\|[de])* | This part of the regular expression may cause exponential backtracking on strings containing many repetitions of 'd'. |
|
||||
| tst.js:387:27:387:33 | (a\|aa)* | This part of the regular expression may cause exponential backtracking on strings containing many repetitions of 'aa'. |
|
||||
| tst.js:388:14:388:20 | (a\|aa)* | This part of the regular expression may cause exponential backtracking on strings containing many repetitions of 'aa'. |
|
||||
| tst.js:391:6:394:6 | (a\|aa)* | This part of the regular expression may cause exponential backtracking on strings containing many repetitions of 'aa'. |
|
||||
| tst.js:398:7:399:5 | (c\|cc)* | This part of the regular expression may cause exponential backtracking on strings containing many repetitions of 'cc'. |
|
||||
| tst.js:399:7:400:5 | (d\|dd)* | This part of the regular expression may cause exponential backtracking on strings containing many repetitions of 'dd'. |
|
||||
| tst.js:400:7:401:2 | (e\|ee)* | This part of the regular expression may cause exponential backtracking on strings containing many repetitions of 'ee'. |
|
||||
| tst.js:404:6:405:8 | (g\|gg)* | This part of the regular expression may cause exponential backtracking on strings containing many repetitions of 'gg'. |
|
||||
|
||||
@@ -385,4 +385,21 @@ var good47 = /([AB]|[ab])*C/;
|
||||
var bad92 = /([DE]|[de])*F/i;
|
||||
|
||||
var bad93 = /(?<=^v?|\sv?)(a|aa)*$/;
|
||||
var bad94 = /(a|aa)*$/;
|
||||
var bad94 = /(a|aa)*$/;
|
||||
|
||||
var bad95 = new RegExp(
|
||||
"(a" +
|
||||
"|" +
|
||||
"aa)*" +
|
||||
"b$"
|
||||
);
|
||||
|
||||
var bad96 = new RegExp("(" +
|
||||
"(c|cc)*|" +
|
||||
"(d|dd)*|" +
|
||||
"(e|ee)*" +
|
||||
")f$");
|
||||
|
||||
var bad97 = new RegExp(
|
||||
"(g|gg" +
|
||||
")*h$");
|
||||
|
||||
@@ -15,11 +15,15 @@
|
||||
| tst-IncompleteHostnameRegExp.js:38:3:38:43 | ^(http\|https):\\/\\/www.example.com\\/p\\/f\\/ | This regular expression has an unescaped '.' before 'example.com', so it might match more hosts than expected. | tst-IncompleteHostnameRegExp.js:38:2:38:44 | /^(http ... p\\/f\\// | here |
|
||||
| tst-IncompleteHostnameRegExp.js:39:5:39:30 | http:\\/\\/sub.example.com\\/ | This regular expression has an unescaped '.' before 'example.com', so it might match more hosts than expected. | tst-IncompleteHostnameRegExp.js:39:2:39:33 | /^(http ... om\\/)/g | here |
|
||||
| tst-IncompleteHostnameRegExp.js:40:3:40:29 | ^https?:\\/\\/api.example.com | This regular expression has an unescaped '.' before 'example.com', so it might match more hosts than expected. | tst-IncompleteHostnameRegExp.js:40:2:40:30 | /^https ... le.com/ | here |
|
||||
| tst-IncompleteHostnameRegExp.js:41:42:41:48 | ^https?://.+\\.example\\.com/ | This regular expression has an unrestricted wildcard '.+' which may cause 'example\\.com/' to be matched anywhere in the URL, outside the hostname. | tst-IncompleteHostnameRegExp.js:41:13:41:71 | '^http: ... \\.com/' | here |
|
||||
| tst-IncompleteHostnameRegExp.js:41:42:41:70 | ^https?://.+\\.example\\.com/ | This string, which is used as a regular expression $@, has an unrestricted wildcard '.+' which may cause 'example\\.com/' to be matched anywhere in the URL, outside the hostname. | tst-IncompleteHostnameRegExp.js:41:13:41:71 | '^http: ... \\.com/' | here |
|
||||
| tst-IncompleteHostnameRegExp.js:43:3:43:32 | ^https:\\/\\/[a-z]*.example.com$ | This regular expression has an unescaped '.' before 'example.com', so it might match more hosts than expected. | tst-IncompleteHostnameRegExp.js:43:2:43:33 | /^https ... e.com$/ | here |
|
||||
| tst-IncompleteHostnameRegExp.js:44:32:44:45 | .+.example.net | This regular expression has an unescaped '.' before 'example.net', so it might match more hosts than expected. | tst-IncompleteHostnameRegExp.js:44:9:44:101 | '^proto ... ernal)' | here |
|
||||
| tst-IncompleteHostnameRegExp.js:44:47:44:62 | .+.example-a.com | This regular expression has an unescaped '.' before 'example-a.com', so it might match more hosts than expected. | tst-IncompleteHostnameRegExp.js:44:9:44:101 | '^proto ... ernal)' | here |
|
||||
| tst-IncompleteHostnameRegExp.js:44:64:44:79 | .+.example-b.com | This regular expression has an unescaped '.' before 'example-b.com', so it might match more hosts than expected. | tst-IncompleteHostnameRegExp.js:44:9:44:101 | '^proto ... ernal)' | here |
|
||||
| tst-IncompleteHostnameRegExp.js:48:42:48:47 | ^https?://.+.example\\.com/ | This regular expression has an unescaped '.' before 'example\\.com/', so it might match more hosts than expected. | tst-IncompleteHostnameRegExp.js:48:13:48:69 | '^http: ... \\.com/' | here |
|
||||
| tst-IncompleteHostnameRegExp.js:48:42:48:47 | ^https?://.+.example\\.com/ | This regular expression has an unrestricted wildcard '.+' which may cause 'example\\.com/' to be matched anywhere in the URL, outside the hostname. | tst-IncompleteHostnameRegExp.js:48:13:48:69 | '^http: ... \\.com/' | here |
|
||||
| tst-IncompleteHostnameRegExp.js:48:42:48:68 | ^https?://.+.example\\.com/ | This string, which is used as a regular expression $@, has an unescaped '.' before 'example\\.com/', so it might match more hosts than expected. | tst-IncompleteHostnameRegExp.js:48:13:48:69 | '^http: ... \\.com/' | here |
|
||||
| tst-IncompleteHostnameRegExp.js:48:42:48:68 | ^https?://.+.example\\.com/ | This string, which is used as a regular expression $@, has an unrestricted wildcard '.+' which may cause 'example\\.com/' to be matched anywhere in the URL, outside the hostname. | tst-IncompleteHostnameRegExp.js:48:13:48:69 | '^http: ... \\.com/' | here |
|
||||
| tst-IncompleteHostnameRegExp.js:53:14:53:35 | test.example.com$ | This regular expression has an unescaped '.' before 'example.com', so it might match more hosts than expected. | tst-IncompleteHostnameRegExp.js:53:13:53:36 | 'test.' ... e.com$' | here |
|
||||
| tst-IncompleteHostnameRegExp.js:59:5:59:20 | foo.example\\.com | This regular expression has an unescaped '.' before 'example\\.com', so it might match more hosts than expected. | tst-IncompleteHostnameRegExp.js:59:2:59:32 | /^(foo. ... ever)$/ | here |
|
||||
|
||||
Reference in New Issue
Block a user