JavaScript: Distinguish {lo} and {lo,} in the regular expression parser.

This commit is contained in:
Max Schaefer
2020-02-14 14:08:31 +00:00
parent 9e3ed214d0
commit 4346691cdc
7 changed files with 45 additions and 6 deletions

View File

@@ -37,7 +37,7 @@ public class Main {
* A version identifier that should be updated every time the extractor changes in such a way that
* it may produce different tuples for the same file under the same {@link ExtractorConfig}.
*/
public static final String EXTRACTOR_VERSION = "2020-02-05";
public static final String EXTRACTOR_VERSION = "2020-02-14";
public static final Pattern NEWLINE = Pattern.compile("\n");

View File

@@ -281,8 +281,19 @@ public class RegExpParser {
if (this.match("+")) return this.finishTerm(new Plus(loc, atom, !this.match("?")));
if (this.match("?")) return this.finishTerm(new Opt(loc, atom, !this.match("?")));
if (this.match("{")) {
Double lo = toNumber(this.readDigits(false)), hi = null;
if (this.match(",") && !this.lookahead("}")) hi = toNumber(this.readDigits(false));
Double lo = toNumber(this.readDigits(false)), hi;
if (this.match(",")) {
if (!this.lookahead("}")) {
// atom{lo, hi}
hi = toNumber(this.readDigits(false));
} else {
// atom{lo,}
hi = null;
}
} else {
// atom{lo}
hi = lo;
}
this.expectRBrace();
return this.finishTerm(new Range(loc, atom, !this.match("?"), lo, hi));
}

View File

@@ -1088,6 +1088,7 @@ locations_default(#20376,#10000,15,2,15,5)
hasLocation(#20375,#20376)
isGreedy(#20375)
rangeQuantifierLowerBound(#20375,1)
rangeQuantifierUpperBound(#20375,1)
#20377=*
regexpterm(#20377,14,#20375,0,"a")
#20378=@"loc,{#10000},15,2,15,2"
@@ -1157,6 +1158,7 @@ regexpterm(#20393,11,#20392,0,"a{1}?")
locations_default(#20394,#10000,18,2,18,6)
hasLocation(#20393,#20394)
rangeQuantifierLowerBound(#20393,1)
rangeQuantifierUpperBound(#20393,1)
#20395=*
regexpterm(#20395,14,#20393,0,"a")
#20396=@"loc,{#10000},18,2,18,2"
@@ -1670,6 +1672,7 @@ locations_default(#20543,#10000,37,2,37,3)
locations_default(#20543,#10000,37,2,37,3)
hasLocation(#20542,#20543)
isGreedy(#20542)
rangeQuantifierLowerBound(#20542,0)
rangeQuantifierUpperBound(#20542,0)
#20544=*
regexpterm(#20544,14,#20542,0,"a")
@@ -1708,6 +1711,7 @@ locations_default(#20555,#10000,38,2,38,3)
locations_default(#20555,#10000,38,2,38,3)
hasLocation(#20554,#20555)
isGreedy(#20554)
rangeQuantifierLowerBound(#20554,0)
rangeQuantifierUpperBound(#20554,0)
#20556=*
regexpterm(#20556,14,#20554,0,"a")
@@ -1754,6 +1758,7 @@ locations_default(#20569,#10000,39,2,39,4)
locations_default(#20569,#10000,39,2,39,4)
hasLocation(#20568,#20569)
isGreedy(#20568)
rangeQuantifierLowerBound(#20568,2)
rangeQuantifierUpperBound(#20568,2)
#20570=*
regexpterm(#20570,14,#20568,0,"a")

View File

@@ -506,17 +506,25 @@ class RegExpOpt extends RegExpQuantifier, @regexp_opt {
/**
* A range-quantified term
*
* Example:
* Examples:
*
* ```
* \w{2,4}
* \w{2,}
* \w{2}
* ```
*/
class RegExpRange extends RegExpQuantifier, @regexp_range {
/** Gets the lower bound of the range, if any. */
/** Gets the lower bound of the range. */
int getLowerBound() { rangeQuantifierLowerBound(this, result) }
/** Gets the upper bound of the range, if any. */
/**
* Gets the upper bound of the range, if any.
*
* If there is no upper bound, any number of repetitions is allowed.
* For a term of the form `r{lo}`, both the lower and the upper bound
* are `lo`.
*/
int getUpperBound() { rangeQuantifierUpperBound(this, result) }
override predicate isNullable() {

View File

@@ -0,0 +1,4 @@
| tst.js:1:2:1:5 | a{1} | 1 | 1 |
| tst.js:2:2:2:6 | a{1,} | 1 | <none> |
| tst.js:3:2:3:7 | a{1,5} | 1 | 5 |
| tst.js:4:2:4:6 | a{,5} | 0 | 5 |

View File

@@ -0,0 +1,7 @@
import javascript
from RegExpRange rr, string lb, string ub
where
lb = rr.getLowerBound().toString() and
if exists(rr.getUpperBound()) then ub = rr.getUpperBound().toString() else ub = "<none>"
select rr, lb, ub

View File

@@ -0,0 +1,4 @@
/a{1}/;
/a{1,}/;
/a{1,5}/;
/a{,5}/;