Python: Move constraints into pranch charpreds

For sequences and alternations, we require at least one child.
Otherwise, we wish to represent the term differently.
This avoids multiple representations.
This commit is contained in:
Rasmus Lerchedahl Petersen
2021-08-23 11:44:00 +02:00
parent c4554836ca
commit 34d7772a0d

View File

@@ -7,6 +7,10 @@ private import semmle.python.regex
* An element containing a regular expression term, that is, either
* a string literal (parsed as a regular expression)
* or another regular expression term.
*
* For sequences and alternations, we require at least one child.
* Otherwise, we wish to represent the term differently.
* This avoids multiple representations of the same term.
*/
newtype TRegExpParent =
/** A string literal used as a regular expression */
@@ -14,9 +18,18 @@ newtype TRegExpParent =
/** A quantified term */
TRegExpQuantifier(Regex re, int start, int end) { re.qualifiedItem(start, end, _, _) } or
/** A sequence term */
TRegExpSequence(Regex re, int start, int end) { re.sequence(start, end) } or
TRegExpSequence(Regex re, int start, int end) {
re.sequence(start, end) and
exists(seqChild(re, start, end, 1)) // if a sequence does not have more than one element, it should be treated as that element instead.
} or
/** An alternatio term */
TRegExpAlt(Regex re, int start, int end) { re.alternation(start, end) } or
TRegExpAlt(Regex re, int start, int end) {
re.alternation(start, end) and
exists(int part_end |
re.alternationOption(start, end, start, part_end) and
part_end < end
) // if an alternation does not have more than one element, it should be treated as that element instead.
} or
/** A character class term */
TRegExpCharacterClass(Regex re, int start, int end) { re.charSet(start, end) } or
/** A character range term */
@@ -75,11 +88,7 @@ class RegExpTerm extends RegExpParent {
int end;
RegExpTerm() {
this = TRegExpAlt(re, start, end) and
exists(int part_end |
re.alternationOption(start, end, start, part_end) and
part_end < end
) // if an alternation does not have more than one element, it should be treated as that element instead.
this = TRegExpAlt(re, start, end)
or
this = TRegExpBackRef(re, start, end)
or
@@ -93,8 +102,7 @@ class RegExpTerm extends RegExpParent {
or
this = TRegExpQuantifier(re, start, end)
or
this = TRegExpSequence(re, start, end) and
exists(seqChild(re, start, end, 1)) // if a sequence does not have more than one element, it should be treated as that element instead.
this = TRegExpSequence(re, start, end)
or
this = TRegExpSpecialChar(re, start, end)
}
@@ -341,10 +349,7 @@ class RegExpRange extends RegExpQuantifier {
* This is a sequence with the elements `(ECMA|Java)` and `Script`.
*/
class RegExpSequence extends RegExpTerm, TRegExpSequence {
RegExpSequence() {
this = TRegExpSequence(re, start, end) and
exists(seqChild(re, start, end, 1)) // if a sequence does not have more than one element, it should be treated as that element instead.
}
RegExpSequence() { this = TRegExpSequence(re, start, end) }
override RegExpTerm getChild(int i) { result = seqChild(re, start, end, i) }