mirror of
https://github.com/github/codeql.git
synced 2025-12-17 01:03:14 +01:00
Merge pull request #4648 from erik-krogh/regexpParse
Approved by asgerf
This commit is contained in:
@@ -43,7 +43,7 @@ public class Main {
|
||||
* A version identifier that should be updated every time the extractor changes in such a way that
|
||||
* it may produce different tuples for the same file under the same {@link ExtractorConfig}.
|
||||
*/
|
||||
public static final String EXTRACTOR_VERSION = "2020-09-17";
|
||||
public static final String EXTRACTOR_VERSION = "2020-11-11";
|
||||
|
||||
public static final Pattern NEWLINE = Pattern.compile("\n");
|
||||
|
||||
|
||||
@@ -35,6 +35,7 @@ import com.semmle.js.ast.regexp.ZeroWidthNegativeLookbehind;
|
||||
import com.semmle.js.ast.regexp.ZeroWidthPositiveLookahead;
|
||||
import com.semmle.js.ast.regexp.ZeroWidthPositiveLookbehind;
|
||||
import java.util.ArrayList;
|
||||
import java.util.Arrays;
|
||||
import java.util.List;
|
||||
|
||||
/** A parser for ECMAScript 2018 regular expressions. */
|
||||
@@ -496,10 +497,18 @@ public class RegExpParser {
|
||||
return this.finishTerm(new CharacterClass(loc, elements, inverted));
|
||||
}
|
||||
|
||||
private static final List<String> escapeClasses = Arrays.asList("d", "D", "s", "S", "w", "W");
|
||||
|
||||
private RegExpTerm parseCharacterClassElement() {
|
||||
SourceLocation loc = new SourceLocation(pos());
|
||||
RegExpTerm atom = this.parseCharacterClassAtom();
|
||||
if (!this.lookahead("-]") && this.match("-"))
|
||||
if (this.lookahead("-\\")) {
|
||||
for (String c : escapeClasses) {
|
||||
if (this.lookahead("-\\" + c))
|
||||
return atom;
|
||||
}
|
||||
}
|
||||
if (!this.lookahead("-]") && this.match("-") && !(atom instanceof CharacterClassEscape))
|
||||
return this.finishTerm(new CharacterClassRange(loc, atom, this.parseCharacterClassAtom()));
|
||||
return atom;
|
||||
}
|
||||
|
||||
@@ -0,0 +1,10 @@
|
||||
range
|
||||
| tst.js:1:13:1:17 | [w-z] | tst.js:1:14:1:16 | w-z |
|
||||
| tst.js:6:13:6:19 | [\\n-\\r] | tst.js:6:14:6:18 | \\n-\\r |
|
||||
| tst.js:7:13:7:18 | [\\n-z] | tst.js:7:14:7:17 | \\n-z |
|
||||
escapeClass
|
||||
| tst.js:2:13:2:16 | [\\w] | tst.js:2:14:2:15 | \\w |
|
||||
| tst.js:3:13:3:18 | [\\w-z] | tst.js:3:14:3:15 | \\w |
|
||||
| tst.js:4:13:4:19 | [\\w-\\w] | tst.js:4:14:4:15 | \\w |
|
||||
| tst.js:4:13:4:19 | [\\w-\\w] | tst.js:4:17:4:18 | \\w |
|
||||
| tst.js:5:13:5:18 | [z-\\w] | tst.js:5:16:5:17 | \\w |
|
||||
@@ -0,0 +1,9 @@
|
||||
import javascript
|
||||
|
||||
query predicate range(RegExpCharacterClass cla, RegExpCharacterRange range) {
|
||||
cla.getAChild() = range
|
||||
}
|
||||
|
||||
query predicate escapeClass(RegExpCharacterClass cla, RegExpCharacterClassEscape escape) {
|
||||
cla.getAChild() = escape
|
||||
}
|
||||
@@ -0,0 +1,7 @@
|
||||
var reg1 = /[w-z]/; // normal range w-z, matches: wxyz
|
||||
var reg2 = /[\w]/; // escape class, same as \w.
|
||||
var reg3 = /[\w-z]/; // escape class \w and "-" and "z", same as [a-zA-Z0-9\-z]
|
||||
var reg4 = /[\w-\w]/; // escape class \w (twice) and the char "-".
|
||||
var reg5 = /[z-\w]/; // same as reg3
|
||||
var reg6 = /[\n-\r]/; // from \n (code 10) to \r (code 13).
|
||||
var reg7 = /[\n-z]/; // from \n (code 10) to z (code 122).
|
||||
@@ -81,9 +81,11 @@
|
||||
| regexplib/address.js:95:379:95:755 | [a-zA-Z0-9ÀÁÂÃÄÅÆÇÈÉÊËÌÍÎÏÐÑÒÓÔÕÖØÙÚÛÜÝßàáâãäåæçèéêëìíîïñòóôõöøùúûüýÿ\\.\\,\\-\\/\\' ]+ | it can start matching anywhere after the start of the preceeding '[a-zA-Z0-9ÀÁÂÃÄÅÆÇÈÉÊËÌÍÎÏÐÑÒÓÔÕÖØÙÚÛÜÝßàáâãäåæçèéêëìíîïñòóôõöøùúûüýÿ\\.\\,\\-\\/\\']+' |
|
||||
| regexplib/email.js:8:16:8:49 | [^ \\t\\(\\)\\<\\>@,;\\:\\\\\\"\\.\\[\\]\\r\\n]+ | it can start matching anywhere |
|
||||
| regexplib/email.js:12:2:12:4 | \\w+ | it can start matching anywhere |
|
||||
| regexplib/email.js:15:6:15:13 | [\\w-\\.]* | it can start matching anywhere after the start of the preceeding '\\w+' |
|
||||
| regexplib/email.js:15:28:15:30 | \\w* | it can start matching anywhere after the start of the preceeding '\\w+' |
|
||||
| regexplib/email.js:20:3:20:6 | \\w+? | it can start matching anywhere |
|
||||
| regexplib/email.js:28:2:28:4 | \\w+ | it can start matching anywhere |
|
||||
| regexplib/email.js:28:5:28:12 | [\\w-\\.]* | it can start matching anywhere after the start of the preceeding '\\w+' |
|
||||
| regexplib/email.js:28:27:28:29 | \\w* | it can start matching anywhere after the start of the preceeding '\\w+' |
|
||||
| regexplib/email.js:28:73:28:87 | [0-9a-zA-Z'\\.]+ | it can start matching anywhere |
|
||||
| regexplib/email.js:28:125:28:139 | [0-9a-zA-Z'\\.]+ | it can start matching anywhere |
|
||||
@@ -173,9 +175,11 @@
|
||||
| regexplib/uri.js:34:3:34:9 | [^\\=&]+ | it can start matching anywhere |
|
||||
| regexplib/uri.js:39:7:39:9 | .*? | it can start matching anywhere after the start of the preceeding '<a' |
|
||||
| regexplib/uri.js:44:2:44:4 | .*? | it can start matching anywhere |
|
||||
| regexplib/uri.js:47:31:47:36 | [\\w-]+ | it can start matching anywhere after the start of the preceeding '[\\w-\\s]*' |
|
||||
| regexplib/uri.js:53:3:53:9 | [^\\=&]+ | it can start matching anywhere |
|
||||
| regexplib/uri.js:58:2:58:45 | ((http\\:\\/\\/\|https\\:\\/\\/\|ftp\\:\\/\\/)\|(www.))+ | it can start matching anywhere |
|
||||
| regexplib/uri.js:59:2:59:13 | [a-zA-Z]{3,} | it can start matching anywhere |
|
||||
| regexplib/uri.js:64:31:64:36 | [\\w-]+ | it can start matching anywhere after the start of the preceeding '[\\w-\\s]*' |
|
||||
| regexplib/uri.js:73:2:73:4 | .*? | it can start matching anywhere |
|
||||
| tst.js:14:13:14:18 | (.*,)+ | it can start matching anywhere |
|
||||
| tst.js:14:14:14:15 | .* | it can start matching anywhere |
|
||||
|
||||
Reference in New Issue
Block a user