mirror of
https://github.com/github/codeql.git
synced 2025-12-24 12:46:34 +01:00
add a Python implementation of RegexTreeViewSig
This commit is contained in:
@@ -2,6 +2,10 @@
|
|||||||
|
|
||||||
import python
|
import python
|
||||||
private import semmle.python.regex
|
private import semmle.python.regex
|
||||||
|
private import codeql.regex.nfa.NfaUtils as NfaUtils
|
||||||
|
private import codeql.regex.RegexTreeView
|
||||||
|
// exporting as RegexTreeView, and in the top-level scope.
|
||||||
|
import Impl as RegexTreeView
|
||||||
import Impl
|
import Impl
|
||||||
|
|
||||||
/** Gets the parse tree resulting from parsing `re`, if such has been constructed. */
|
/** Gets the parse tree resulting from parsing `re`, if such has been constructed. */
|
||||||
@@ -52,8 +56,34 @@ private newtype TRegExpParent =
|
|||||||
/** A back reference */
|
/** A back reference */
|
||||||
TRegExpBackRef(Regex re, int start, int end) { re.backreference(start, end) }
|
TRegExpBackRef(Regex re, int start, int end) { re.backreference(start, end) }
|
||||||
|
|
||||||
|
pragma[nomagic]
|
||||||
|
private int seqChildEnd(Regex re, int start, int end, int i) {
|
||||||
|
result = seqChild(re, start, end, i).getEnd()
|
||||||
|
}
|
||||||
|
|
||||||
|
// moved out so we can use it in the charpred
|
||||||
|
private RegExpTerm seqChild(Regex re, int start, int end, int i) {
|
||||||
|
re.sequence(start, end) and
|
||||||
|
(
|
||||||
|
i = 0 and
|
||||||
|
result.getRegex() = re and
|
||||||
|
result.getStart() = start and
|
||||||
|
exists(int itemEnd |
|
||||||
|
re.item(start, itemEnd) and
|
||||||
|
result.getEnd() = itemEnd
|
||||||
|
)
|
||||||
|
or
|
||||||
|
i > 0 and
|
||||||
|
result.getRegex() = re and
|
||||||
|
exists(int itemStart | itemStart = seqChildEnd(re, start, end, i - 1) |
|
||||||
|
result.getStart() = itemStart and
|
||||||
|
re.item(itemStart, result.getEnd())
|
||||||
|
)
|
||||||
|
)
|
||||||
|
}
|
||||||
|
|
||||||
/** An implementation that statisfies the RegexTreeView signature. */
|
/** An implementation that statisfies the RegexTreeView signature. */
|
||||||
module Impl {
|
module Impl implements RegexTreeViewSig {
|
||||||
/**
|
/**
|
||||||
* An element containing a regular expression term, that is, either
|
* An element containing a regular expression term, that is, either
|
||||||
* a string literal (parsed as a regular expression)
|
* a string literal (parsed as a regular expression)
|
||||||
@@ -391,32 +421,6 @@ module Impl {
|
|||||||
override string getPrimaryQLClass() { result = "RegExpSequence" }
|
override string getPrimaryQLClass() { result = "RegExpSequence" }
|
||||||
}
|
}
|
||||||
|
|
||||||
pragma[nomagic]
|
|
||||||
private int seqChildEnd(Regex re, int start, int end, int i) {
|
|
||||||
result = seqChild(re, start, end, i).getEnd()
|
|
||||||
}
|
|
||||||
|
|
||||||
// moved out so we can use it in the charpred
|
|
||||||
private RegExpTerm seqChild(Regex re, int start, int end, int i) {
|
|
||||||
re.sequence(start, end) and
|
|
||||||
(
|
|
||||||
i = 0 and
|
|
||||||
result.getRegex() = re and
|
|
||||||
result.getStart() = start and
|
|
||||||
exists(int itemEnd |
|
|
||||||
re.item(start, itemEnd) and
|
|
||||||
result.getEnd() = itemEnd
|
|
||||||
)
|
|
||||||
or
|
|
||||||
i > 0 and
|
|
||||||
result.getRegex() = re and
|
|
||||||
exists(int itemStart | itemStart = seqChildEnd(re, start, end, i - 1) |
|
|
||||||
result.getStart() = itemStart and
|
|
||||||
re.item(itemStart, result.getEnd())
|
|
||||||
)
|
|
||||||
)
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* An alternative term, that is, a term of the form `a|b`.
|
* An alternative term, that is, a term of the form `a|b`.
|
||||||
*
|
*
|
||||||
@@ -1030,4 +1034,62 @@ module Impl {
|
|||||||
|
|
||||||
override string getPrimaryQLClass() { result = "RegExpBackRef" }
|
override string getPrimaryQLClass() { result = "RegExpBackRef" }
|
||||||
}
|
}
|
||||||
|
|
||||||
|
class Top = RegExpParent;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Holds if `term` is an escape class representing e.g. `\d`.
|
||||||
|
* `clazz` is which character class it represents, e.g. "d" for `\d`.
|
||||||
|
*/
|
||||||
|
predicate isEscapeClass(RegExpTerm term, string clazz) {
|
||||||
|
exists(RegExpCharacterClassEscape escape | term = escape | escape.getValue() = clazz)
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Holds if `term` is a possessive quantifier.
|
||||||
|
* As python's regexes do not support possessive quantifiers, this never holds, but is used by the shared library.
|
||||||
|
*/
|
||||||
|
predicate isPossessive(RegExpQuantifier term) { none() }
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Holds if the regex that `term` is part of is used in a way that ignores any leading prefix of the input it's matched against.
|
||||||
|
* Not yet implemented for Python.
|
||||||
|
*/
|
||||||
|
predicate matchesAnyPrefix(RegExpTerm term) { any() }
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Holds if the regex that `term` is part of is used in a way that ignores any trailing suffix of the input it's matched against.
|
||||||
|
* Not yet implemented for Python.
|
||||||
|
*/
|
||||||
|
predicate matchesAnySuffix(RegExpTerm term) { any() }
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Holds if the regular expression should not be considered.
|
||||||
|
*
|
||||||
|
* We make the pragmatic performance optimization to ignore regular expressions in files
|
||||||
|
* that does not belong to the project code (such as installed dependencies).
|
||||||
|
*/
|
||||||
|
predicate isExcluded(RegExpParent parent) {
|
||||||
|
not exists(parent.getRegex().getLocation().getFile().getRelativePath())
|
||||||
|
or
|
||||||
|
// Regexes with many occurrences of ".*" may cause the polynomial ReDoS computation to explode, so
|
||||||
|
// we explicitly exclude these.
|
||||||
|
count(int i | exists(parent.getRegex().getText().regexpFind("\\.\\*", i, _)) | i) > 10
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Holds if `root` has the `i` flag for case-insensitive matching.
|
||||||
|
*/
|
||||||
|
predicate isIgnoreCase(RegExpTerm root) {
|
||||||
|
root.isRootTerm() and
|
||||||
|
root.getLiteral().isIgnoreCase()
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Holds if `root` has the `s` flag for multi-line matching.
|
||||||
|
*/
|
||||||
|
predicate isDotAll(RegExpTerm root) {
|
||||||
|
root.isRootTerm() and
|
||||||
|
root.getLiteral().isDotAll()
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
Reference in New Issue
Block a user