Revert "Revert "Python: switch to shared implementation of IncompleteHostnameRegExp.ql""

This reverts commit 6d24591416.
This commit is contained in:
Arthur Baars
2022-03-18 16:31:22 +01:00
parent 117fb5be7d
commit 9412b331db
6 changed files with 249 additions and 30 deletions

View File

@@ -2,6 +2,7 @@
import python
private import semmle.python.regex
private import semmle.python.dataflow.new.DataFlow
/**
* An element containing a regular expression term, that is, either
@@ -48,6 +49,19 @@ newtype TRegExpParent =
/** A back reference */
TRegExpBackRef(Regex re, int start, int end) { re.backreference(start, end) }
/**
* Provides utility predicates related to regular expressions.
*/
module RegExpPatterns {
/**
* Gets a pattern that matches common top-level domain names in lower case.
*/
string getACommonTld() {
// according to ranking by http://google.com/search?q=site:.<<TLD>>
result = "(?:com|org|edu|gov|uk|net|io)(?![a-z0-9])"
}
}
/**
* An element containing a regular expression term, that is, either
* a string literal (parsed as a regular expression)
@@ -445,6 +459,8 @@ class RegExpAlt extends RegExpTerm, TRegExpAlt {
override string getPrimaryQLClass() { result = "RegExpAlt" }
}
class RegExpCharEscape = RegExpEscape;
/**
* An escaped regular expression term, that is, a regular expression
* term starting with a backslash, which is not a backreference.
@@ -751,6 +767,9 @@ class RegExpGroup extends RegExpTerm, TRegExpGroup {
*/
int getNumber() { result = re.getGroupNumber(start, end) }
/** Holds if this is a capture group. */
predicate isCapture() { exists(this.getNumber()) }
/** Holds if this is a named capture group. */
predicate isNamed() { exists(this.getName()) }
@@ -1009,3 +1028,24 @@ class RegExpBackRef extends RegExpTerm, TRegExpBackRef {
/** Gets the parse tree resulting from parsing `re`, if such has been constructed. */
RegExpTerm getParsedRegExp(StrConst re) { result.getRegex() = re and result.isRootTerm() }
/**
* A node whose value may flow to a position where it is interpreted
* as a part of a regular expression.
*/
class RegExpPatternSource extends DataFlow::CfgNode {
private Regex astNode;
RegExpPatternSource() { astNode = this.asExpr() }
/**
* Gets a node where the pattern of this node is parsed as a part of
* a regular expression.
*/
DataFlow::Node getAParse() { result = this }
/**
* Gets the root term of the regular expression parsed from this pattern.
*/
RegExpTerm getRegExpTerm() { result.getRegex() = astNode }
}