Python: create semmle/python/dataflow/new/Regex.qll

This commit is contained in:
Arthur Baars
2022-03-21 13:16:22 +01:00
parent 9412b331db
commit 79cd7bf8ed
3 changed files with 42 additions and 35 deletions

View File

@@ -2,7 +2,6 @@
import python
private import semmle.python.regex
private import semmle.python.dataflow.new.DataFlow
/**
* An element containing a regular expression term, that is, either
@@ -49,19 +48,6 @@ newtype TRegExpParent =
/** A back reference */
TRegExpBackRef(Regex re, int start, int end) { re.backreference(start, end) }
/**
* Provides utility predicates related to regular expressions.
*/
module RegExpPatterns {
/**
* Gets a pattern that matches common top-level domain names in lower case.
*/
string getACommonTld() {
// according to ranking by http://google.com/search?q=site:.<<TLD>>
result = "(?:com|org|edu|gov|uk|net|io)(?![a-z0-9])"
}
}
/**
* An element containing a regular expression term, that is, either
* a string literal (parsed as a regular expression)
@@ -1028,24 +1014,3 @@ class RegExpBackRef extends RegExpTerm, TRegExpBackRef {
/** Gets the parse tree resulting from parsing `re`, if such has been constructed. */
RegExpTerm getParsedRegExp(StrConst re) { result.getRegex() = re and result.isRootTerm() }
/**
* A node whose value may flow to a position where it is interpreted
* as a part of a regular expression.
*/
class RegExpPatternSource extends DataFlow::CfgNode {
private Regex astNode;
RegExpPatternSource() { astNode = this.asExpr() }
/**
* Gets a node where the pattern of this node is parsed as a part of
* a regular expression.
*/
DataFlow::Node getAParse() { result = this }
/**
* Gets the root term of the regular expression parsed from this pattern.
*/
RegExpTerm getRegExpTerm() { result.getRegex() = astNode }
}

View File

@@ -0,0 +1,41 @@
/**
* Provides classes for working with regular expressions.
*/
private import semmle.python.RegexTreeView
private import semmle.python.regex
private import semmle.python.dataflow.new.DataFlow
/**
* Provides utility predicates related to regular expressions.
*/
module RegExpPatterns {
/**
* Gets a pattern that matches common top-level domain names in lower case.
*/
string getACommonTld() {
// according to ranking by http://google.com/search?q=site:.<<TLD>>
result = "(?:com|org|edu|gov|uk|net|io)(?![a-z0-9])"
}
}
/**
* A node whose value may flow to a position where it is interpreted
* as a part of a regular expression.
*/
class RegExpPatternSource extends DataFlow::CfgNode {
private Regex astNode;
RegExpPatternSource() { astNode = this.asExpr() }
/**
* Gets a node where the pattern of this node is parsed as a part of
* a regular expression.
*/
DataFlow::Node getAParse() { result = this }
/**
* Gets the root term of the regular expression parsed from this pattern.
*/
RegExpTerm getRegExpTerm() { result.getRegex() = astNode }
}

View File

@@ -1,2 +1,3 @@
import semmle.python.security.performance.RegExpTreeView
import semmle.python.dataflow.new.DataFlow
import semmle.python.dataflow.new.Regexp