Merge pull request #8486 from aibaars/incomplete-hostname-python

Python: switch to shared implementation of IncompleteHostnameRegExp.ql
This commit is contained in:
yoff
2022-03-22 15:06:14 +01:00
committed by GitHub
7 changed files with 256 additions and 30 deletions

View File

@@ -445,6 +445,8 @@ class RegExpAlt extends RegExpTerm, TRegExpAlt {
override string getPrimaryQLClass() { result = "RegExpAlt" }
}
class RegExpCharEscape = RegExpEscape;
/**
* An escaped regular expression term, that is, a regular expression
* term starting with a backslash, which is not a backreference.
@@ -751,6 +753,9 @@ class RegExpGroup extends RegExpTerm, TRegExpGroup {
*/
int getNumber() { result = re.getGroupNumber(start, end) }
/** Holds if this is a capture group. */
predicate isCapture() { exists(this.getNumber()) }
/** Holds if this is a named capture group. */
predicate isNamed() { exists(this.getName()) }

View File

@@ -0,0 +1,41 @@
/**
* Provides classes for working with regular expressions.
*/
private import semmle.python.RegexTreeView
private import semmle.python.regex
private import semmle.python.dataflow.new.DataFlow
/**
* Provides utility predicates related to regular expressions.
*/
module RegExpPatterns {
/**
* Gets a pattern that matches common top-level domain names in lower case.
*/
string getACommonTld() {
// according to ranking by http://google.com/search?q=site:.<<TLD>>
result = "(?:com|org|edu|gov|uk|net|io)(?![a-z0-9])"
}
}
/**
* A node whose value may flow to a position where it is interpreted
* as a part of a regular expression.
*/
class RegExpPatternSource extends DataFlow::CfgNode {
private Regex astNode;
RegExpPatternSource() { astNode = this.asExpr() }
/**
* Gets a node where the pattern of this node is parsed as a part of
* a regular expression.
*/
DataFlow::Node getAParse() { result = this }
/**
* Gets the root term of the regular expression parsed from this pattern.
*/
RegExpTerm getRegExpTerm() { result.getRegex() = astNode }
}