Merge pull request #4648 from erik-krogh/regexpParse

Approved by asgerf
This commit is contained in:
CodeQL CI
2020-11-16 08:20:40 +00:00
committed by GitHub
6 changed files with 41 additions and 2 deletions

View File

@@ -43,7 +43,7 @@ public class Main {
* A version identifier that should be updated every time the extractor changes in such a way that
* it may produce different tuples for the same file under the same {@link ExtractorConfig}.
*/
public static final String EXTRACTOR_VERSION = "2020-09-17";
public static final String EXTRACTOR_VERSION = "2020-11-11";
public static final Pattern NEWLINE = Pattern.compile("\n");

View File

@@ -35,6 +35,7 @@ import com.semmle.js.ast.regexp.ZeroWidthNegativeLookbehind;
import com.semmle.js.ast.regexp.ZeroWidthPositiveLookahead;
import com.semmle.js.ast.regexp.ZeroWidthPositiveLookbehind;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.List;
/** A parser for ECMAScript 2018 regular expressions. */
@@ -496,10 +497,18 @@ public class RegExpParser {
return this.finishTerm(new CharacterClass(loc, elements, inverted));
}
private static final List<String> escapeClasses = Arrays.asList("d", "D", "s", "S", "w", "W");
private RegExpTerm parseCharacterClassElement() {
SourceLocation loc = new SourceLocation(pos());
RegExpTerm atom = this.parseCharacterClassAtom();
if (!this.lookahead("-]") && this.match("-"))
if (this.lookahead("-\\")) {
for (String c : escapeClasses) {
if (this.lookahead("-\\" + c))
return atom;
}
}
if (!this.lookahead("-]") && this.match("-") && !(atom instanceof CharacterClassEscape))
return this.finishTerm(new CharacterClassRange(loc, atom, this.parseCharacterClassAtom()));
return atom;
}

View File

@@ -0,0 +1,10 @@
range
| tst.js:1:13:1:17 | [w-z] | tst.js:1:14:1:16 | w-z |
| tst.js:6:13:6:19 | [\\n-\\r] | tst.js:6:14:6:18 | \\n-\\r |
| tst.js:7:13:7:18 | [\\n-z] | tst.js:7:14:7:17 | \\n-z |
escapeClass
| tst.js:2:13:2:16 | [\\w] | tst.js:2:14:2:15 | \\w |
| tst.js:3:13:3:18 | [\\w-z] | tst.js:3:14:3:15 | \\w |
| tst.js:4:13:4:19 | [\\w-\\w] | tst.js:4:14:4:15 | \\w |
| tst.js:4:13:4:19 | [\\w-\\w] | tst.js:4:17:4:18 | \\w |
| tst.js:5:13:5:18 | [z-\\w] | tst.js:5:16:5:17 | \\w |

View File

@@ -0,0 +1,9 @@
import javascript
query predicate range(RegExpCharacterClass cla, RegExpCharacterRange range) {
cla.getAChild() = range
}
query predicate escapeClass(RegExpCharacterClass cla, RegExpCharacterClassEscape escape) {
cla.getAChild() = escape
}

View File

@@ -0,0 +1,7 @@
var reg1 = /[w-z]/; // normal range w-z, matches: wxyz
var reg2 = /[\w]/; // escape class, same as \w.
var reg3 = /[\w-z]/; // escape class \w and "-" and "z", same as [a-zA-Z0-9\-z]
var reg4 = /[\w-\w]/; // escape class \w (twice) and the char "-".
var reg5 = /[z-\w]/; // same as reg3
var reg6 = /[\n-\r]/; // from \n (code 10) to \r (code 13).
var reg7 = /[\n-z]/; // from \n (code 10) to z (code 122).

View File

@@ -81,9 +81,11 @@
| regexplib/address.js:95:379:95:755 | [a-zA-Z0-9&#192;&#193;&#194;&#195;&#196;&#197;&#198;&#199;&#200;&#201;&#202;&#203;&#204;&#205;&#206;&#207;&#208;&#209;&#210;&#211;&#212;&#213;&#214;&#216;&#217;&#218;&#219;&#220;&#221;&#223;&#224;&#225;&#226;&#227;&#228;&#229;&#230;&#231;&#232;&#233;&#234;&#235;&#236;&#237;&#238;&#239;&#241;&#242;&#243;&#244;&#245;&#246;&#248;&#249;&#250;&#251;&#252;&#253;&#255;\\.\\,\\-\\/\\' ]+ | it can start matching anywhere after the start of the preceeding '[a-zA-Z0-9&#192;&#193;&#194;&#195;&#196;&#197;&#198;&#199;&#200;&#201;&#202;&#203;&#204;&#205;&#206;&#207;&#208;&#209;&#210;&#211;&#212;&#213;&#214;&#216;&#217;&#218;&#219;&#220;&#221;&#223;&#224;&#225;&#226;&#227;&#228;&#229;&#230;&#231;&#232;&#233;&#234;&#235;&#236;&#237;&#238;&#239;&#241;&#242;&#243;&#244;&#245;&#246;&#248;&#249;&#250;&#251;&#252;&#253;&#255;\\.\\,\\-\\/\\']+' |
| regexplib/email.js:8:16:8:49 | [^ \\t\\(\\)\\<\\>@,;\\:\\\\\\"\\.\\[\\]\\r\\n]+ | it can start matching anywhere |
| regexplib/email.js:12:2:12:4 | \\w+ | it can start matching anywhere |
| regexplib/email.js:15:6:15:13 | [\\w-\\.]* | it can start matching anywhere after the start of the preceeding '\\w+' |
| regexplib/email.js:15:28:15:30 | \\w* | it can start matching anywhere after the start of the preceeding '\\w+' |
| regexplib/email.js:20:3:20:6 | \\w+? | it can start matching anywhere |
| regexplib/email.js:28:2:28:4 | \\w+ | it can start matching anywhere |
| regexplib/email.js:28:5:28:12 | [\\w-\\.]* | it can start matching anywhere after the start of the preceeding '\\w+' |
| regexplib/email.js:28:27:28:29 | \\w* | it can start matching anywhere after the start of the preceeding '\\w+' |
| regexplib/email.js:28:73:28:87 | [0-9a-zA-Z'\\.]+ | it can start matching anywhere |
| regexplib/email.js:28:125:28:139 | [0-9a-zA-Z'\\.]+ | it can start matching anywhere |
@@ -173,9 +175,11 @@
| regexplib/uri.js:34:3:34:9 | [^\\=&]+ | it can start matching anywhere |
| regexplib/uri.js:39:7:39:9 | .*? | it can start matching anywhere after the start of the preceeding '<a' |
| regexplib/uri.js:44:2:44:4 | .*? | it can start matching anywhere |
| regexplib/uri.js:47:31:47:36 | [\\w-]+ | it can start matching anywhere after the start of the preceeding '[\\w-\\s]*' |
| regexplib/uri.js:53:3:53:9 | [^\\=&]+ | it can start matching anywhere |
| regexplib/uri.js:58:2:58:45 | ((http\\:\\/\\/\|https\\:\\/\\/\|ftp\\:\\/\\/)\|(www.))+ | it can start matching anywhere |
| regexplib/uri.js:59:2:59:13 | [a-zA-Z]{3,} | it can start matching anywhere |
| regexplib/uri.js:64:31:64:36 | [\\w-]+ | it can start matching anywhere after the start of the preceeding '[\\w-\\s]*' |
| regexplib/uri.js:73:2:73:4 | .*? | it can start matching anywhere |
| tst.js:14:13:14:18 | (.*,)+ | it can start matching anywhere |
| tst.js:14:14:14:15 | .* | it can start matching anywhere |