fix parse error in regular expressions

This commit is contained in:
Erik Krogh Kristensen
2021-03-08 11:57:28 +01:00
parent 84554af7f5
commit bff59a1aaa
5 changed files with 33 additions and 4 deletions

View File

@@ -43,7 +43,7 @@ public class Main {
* A version identifier that should be updated every time the extractor changes in such a way that
* it may produce different tuples for the same file under the same {@link ExtractorConfig}.
*/
public static final String EXTRACTOR_VERSION = "2021-02-24";
public static final String EXTRACTOR_VERSION = "2021-03-08";
public static final Pattern NEWLINE = Pattern.compile("\n");

View File

@@ -282,11 +282,18 @@ public class RegExpParser {
if (this.match("+")) return this.finishTerm(new Plus(loc, atom, !this.match("?")));
if (this.match("?")) return this.finishTerm(new Opt(loc, atom, !this.match("?")));
if (this.match("{")) {
Double lo = toNumber(this.readDigits(false)), hi;
String matched = "{"; // keeping track of the string matched so far, in case this turns out not to be a quantifier.
String digits = this.readDigits(false);
matched += digits;
Double lo = toNumber(digits), hi;
int prevPos = this.pos;
if (this.match(",")) {
matched += ",";
if (!this.lookahead("}")) {
// atom{lo, hi}
hi = toNumber(this.readDigits(false));
digits = this.readDigits(false);
matched += digits;
hi = toNumber(digits);
} else {
// atom{lo,}
hi = null;
@@ -295,7 +302,11 @@ public class RegExpParser {
// atom{lo}
hi = lo;
}
this.expectRBrace();
if (!this.match("}")) {
// Not a quantifier, just parsing it as a constant.
// E.g. a Regexp such as `/a{|X/`, where there is no matching `}`.
return this.finishTerm(new Sequence(loc, Arrays.asList(atom, new Constant(loc, matched))));
}
return this.finishTerm(new Range(loc, atom, !this.match("?"), lo, hi));
}
return atom;