Python: Fix parsing of octal escapes

This commit is contained in:
Rasmus Lerchedahl Petersen
2021-08-04 20:41:56 +02:00
parent 34b054ff53
commit c08f94ec04
9 changed files with 42 additions and 17 deletions

View File

@@ -75,7 +75,11 @@ class RegExpTerm extends RegExpParent {
int end;
RegExpTerm() {
this = TRegExpAlt(re, start, end)
this = TRegExpAlt(re, start, end) and
exists(int part_end |
re.alternationOption(start, end, start, part_end) and
part_end < end
) // if an alternation does not have more than one element, it should be treated as that element instead.
or
this = TRegExpBackRef(re, start, end)
or

View File

@@ -371,10 +371,13 @@ abstract class RegexString extends Expr {
or
// octal value \ooo
end in [start + 2 .. start + 4] and
this.getText().substring(start + 1, end).toInt() >= 0 and
// this.isOctal([start + 1 .. end]) and
forall(int i | i in [start + 1 .. end - 1] | this.isOctal(i)) and
// this.getText().substring(start + 1, end).toInt() >= 0 and
not (
end < start + 4 and
exists(this.getText().substring(start + 1, end + 1).toInt())
this.isOctal(end) //and
// exists(this.getText().substring(start + 1, end + 1).toInt())
)
or
// 16-bit hex value \uhhhh
@@ -392,6 +395,11 @@ abstract class RegexString extends Expr {
)
}
pragma[inline]
private predicate isOctal(int index) {
this.getChar(index) in ["0", "1", "2", "3", "4", "5", "6", "7"]
}
/** Holds if `index` is inside a character set. */
predicate inCharSet(int index) {
exists(int x, int y | this.charSet(x, y) and index in [x + 1 .. y - 2])
@@ -690,6 +698,7 @@ abstract class RegexString extends Expr {
private predicate numbered_backreference(int start, int end, int value) {
this.escapingChar(start) and
// starting with 0 makes it an octal escape
not this.getChar(start + 1) = "0" and
exists(string text, string svalue, int len |
end = start + len and
@@ -698,8 +707,18 @@ abstract class RegexString extends Expr {
|
svalue = text.substring(start + 1, start + len) and
value = svalue.toInt() and
not exists(text.substring(start + 1, start + len + 1).toInt()) and
value > 0
// value is composed of digits
forall(int i | i in [start + 1 .. start + len - 1] |
this.getChar(i) in ["0", "1", "2", "3", "4", "5", "6", "7", "8", "9"]
) and
// a longer reference is not possible
not (
len = 2 and
exists(text.substring(start + 1, start + len + 1).toInt())
) and
// 3 octal digits makes it an octal escape
not forall(int i | i in [start + 1 .. start + 4] | this.isOctal(i))
// TODO: Inside a character set, all numeric escapes are treated as characters.
)
}