introduce RegExpInterpretation instead of RegexString, and move RegexTreeView.qll into a regexp folder

This commit is contained in:
erik-krogh
2023-03-17 17:38:13 +01:00
parent e677b62241
commit f0254fc089
15 changed files with 1174 additions and 1157 deletions

View File

@@ -7,7 +7,7 @@
*/
import python
import semmle.python.RegexTreeView
import semmle.python.regexp.RegexTreeView
import semmle.python.Yaml
private newtype TPrintAstConfiguration = MkPrintAstConfiguration()

File diff suppressed because it is too large Load Diff

View File

@@ -2,7 +2,7 @@
* Provides classes for working with regular expressions.
*/
private import semmle.python.RegexTreeView
private import semmle.python.regexp.RegexTreeView
private import semmle.python.regex
private import semmle.python.dataflow.new.DataFlow
private import semmle.python.regexp.internal.RegExpTracking

View File

@@ -2512,9 +2512,10 @@ module PrivateDjango {
any(int i | i < routeHandler.getFirstPossibleRoutedParamIndex() | routeHandler.getArg(i))
)
or
exists(DjangoRouteHandler routeHandler, DjangoRouteRegex regex |
exists(DjangoRouteHandler routeHandler, DjangoRouteRegex regexUse, Regex regex |
regex.getAUse() = regexUse and
routeHandler = this.getARequestHandler() and
regex.getRouteSetup() = this
regexUse.getRouteSetup() = this
|
// either using named capture groups (passed as keyword arguments) or using
// unnamed capture groups (passed as positional arguments)
@@ -2533,14 +2534,12 @@ module PrivateDjango {
/**
* A regex that is used to set up a route.
*
* Needs this subclass to be considered a RegexString.
* Needs this subclass to be considered a RegExpInterpretation.
*/
private class DjangoRouteRegex extends RegexString instanceof StrConst {
private class DjangoRouteRegex extends RegExpInterpretation::Range {
DjangoRegexRouteSetup rePathCall;
DjangoRouteRegex() {
rePathCall.getUrlPatternArg().getALocalSource() = DataFlow::exprNode(this)
}
DjangoRouteRegex() { this = rePathCall.getUrlPatternArg() }
DjangoRegexRouteSetup getRouteSetup() { result = rePathCall }
}

View File

@@ -384,12 +384,12 @@ module Tornado {
/**
* A regex that is used to set up a route.
*
* Needs this subclass to be considered a RegexString.
* Needs this subclass to be considered a RegExpInterpretation.
*/
private class TornadoRouteRegex extends RegexString instanceof StrConst {
private class TornadoRouteRegex extends RegExpInterpretation::Range {
TornadoRouteSetup setup;
TornadoRouteRegex() { setup.getUrlPatternArg().getALocalSource() = DataFlow::exprNode(this) }
TornadoRouteRegex() { this = setup.getUrlPatternArg() }
TornadoRouteSetup getRouteSetup() { result = setup }
}
@@ -423,9 +423,10 @@ module Tornado {
not result = requestHandler.getArg(0)
)
or
exists(Function requestHandler, TornadoRouteRegex regex |
exists(Function requestHandler, TornadoRouteRegex regexUse, Regex regex |
regex.getAUse() = regexUse and
requestHandler = this.getARequestHandler() and
regex.getRouteSetup() = this
regexUse.getRouteSetup() = this
|
// first group will have group number 1
result = requestHandler.getArg(regex.getGroupNumber(_, _))

View File

@@ -1,6 +1,6 @@
import python
private import semmle.python.ApiGraphs
// Need to import since frameworks can extend the abstract `RegexString`
// Need to import since frameworks can extend the abstract `RegExpInterpretation::Range`
private import semmle.python.Frameworks
private import semmle.python.Concepts as Concepts
@@ -45,7 +45,7 @@ private API::Node relevant_re_member(string name) {
*
* This predicate has not done any data-flow tracking.
*/
// TODO: This thing should be refactored, along with removing RegexString.
// TODO: This should only be used to get the `mode`, and nowhere else.
predicate used_as_regex_internal(Expr e, string mode) {
/* Call to re.xxx(regex, ... [mode]) */
exists(DataFlow::CallCfgNode call |
@@ -70,24 +70,8 @@ predicate used_as_regex_internal(Expr e, string mode) {
}
private import regexp.internal.RegExpTracking as RegExpTracking
/**
* Holds if the string-constant `s` ends up being used as a regex with the `re` module, with the regex-mode `mode` (if known).
* If regex mode is not known, `mode` will be `"None"`.
*
* This predicate has done data-flow tracking to find the string-constant that is used as a regex.
*/
predicate used_as_regex(Expr s, string mode) {
(s instanceof Bytes or s instanceof Unicode) and
exists(DataFlow::Node source, DataFlow::Node sink |
source = RegExpTracking::regExpSource(sink) and
used_as_regex_internal(sink.asExpr(), mode) and
s = source.asExpr()
)
}
private import semmle.python.Concepts
private import semmle.python.RegexTreeView
private import semmle.python.regexp.RegexTreeView
/** Gets a parsed regular expression term that is executed at `exec`. */
RegExpTerm getTermForExecution(RegexExecution exec) {
@@ -137,16 +121,70 @@ private DataFlow::Node re_flag_tracker(string flag_name) {
}
/** Gets a regular expression mode flag associated with the given data flow node. */
// TODO: Move this into a RegexFlag module, along with related code?
string mode_from_node(DataFlow::Node node) { node = re_flag_tracker(result) }
/** Provides a class for modeling regular expression interpretations. */
module RegExpInterpretation {
/**
* A node that is not a regular expression literal, but is used in places that
* may interpret it as one. Instances of this class are typically strings that
* flow to method calls like `re.compile`.
*/
abstract class Range extends DataFlow::Node { }
}
/**
* A node interpreted as a regular expression.
* Speficically nodes where string values are interpreted as regular expressions.
*/
class StdLibRegExpInterpretation extends RegExpInterpretation::Range {
StdLibRegExpInterpretation() {
this =
API::moduleImport("re").getMember(any(string name | name != "escape")).getACall().getArg(0)
}
}
/** A StrConst used as a regular expression */
abstract class RegexString extends Expr {
RegexString() {
deprecated class RegexString extends Regex {
RegexString() { this = RegExpTracking::regExpSource(_).asExpr() }
}
/** A StrConst used as a regular expression */
class Regex extends Expr {
DataFlow::Node sink;
Regex() {
(this instanceof Bytes or this instanceof Unicode) and
this = RegExpTracking::regExpSource(sink).asExpr() and
// is part of the user code
exists(this.getLocation().getFile().getRelativePath())
}
/** Gets a data-flow node where this string value is used as a regular expression. */
DataFlow::Node getAUse() { result = sink }
/**
* Gets a mode (if any) of this regular expression. Can be any of:
* DEBUG
* IGNORECASE
* LOCALE
* MULTILINE
* DOTALL
* UNICODE
* VERBOSE
*/
string getAMode() {
exists(string mode |
used_as_regex_internal(sink.asExpr(), mode) and
result != "None" and
result = mode
)
or
result = this.getModeFromPrefix()
}
// TODO: Refactor all of the below into a regex parsing file, similar to Ruby.
/**
* Helper predicate for `char_set_start(int start, int end)`.
*
@@ -1082,25 +1120,3 @@ abstract class RegexString extends Expr {
this.lastPart(start, end)
}
}
/** A StrConst used as a regular expression */
class Regex extends RegexString {
Regex() { used_as_regex(this, _) }
/**
* Gets a mode (if any) of this regular expression. Can be any of:
* DEBUG
* IGNORECASE
* LOCALE
* MULTILINE
* DOTALL
* UNICODE
* VERBOSE
*/
string getAMode() {
result != "None" and
used_as_regex(this, result)
or
result = this.getModeFromPrefix()
}
}

File diff suppressed because it is too large Load Diff

View File

@@ -26,8 +26,7 @@ private import semmle.python.regex as Regex
DataFlow::Node regSink() {
result = any(Concepts::RegexExecution exec).getRegex()
or
// TODO: Refactor into something nicer, and remove the above import of `semmle.python.regex`
Regex::used_as_regex_internal(result.asExpr(), _)
result instanceof Regex::RegExpInterpretation::Range
}
/**

View File

@@ -11,7 +11,7 @@ private import semmle.python.dataflow.new.TaintTracking
private import semmle.python.Concepts
private import semmle.python.dataflow.new.RemoteFlowSources
private import semmle.python.dataflow.new.BarrierGuards
private import semmle.python.RegexTreeView::RegexTreeView as TreeView
private import semmle.python.regexp.RegexTreeView::RegexTreeView as TreeView
private import semmle.python.ApiGraphs
private import semmle.python.regex

View File

@@ -5,7 +5,7 @@
private import python
private import semmle.python.dataflow.new.DataFlow
private import semmle.python.RegexTreeView::RegexTreeView as TreeImpl
private import semmle.python.regexp.RegexTreeView::RegexTreeView as TreeImpl
private import semmle.python.dataflow.new.Regexp as Regexp
private import codeql.regex.HostnameRegexp as Shared

View File

@@ -12,7 +12,7 @@
* external/cwe/cwe-020
*/
private import semmle.python.RegexTreeView::RegexTreeView as TreeView
private import semmle.python.regexp.RegexTreeView::RegexTreeView as TreeView
import codeql.regex.OverlyLargeRangeQuery::Make<TreeView>
from TreeView::RegExpCharacterRange range, string reason

View File

@@ -14,7 +14,7 @@
* external/cwe/cwe-186
*/
private import semmle.python.RegexTreeView::RegexTreeView as TreeView
private import semmle.python.regexp.RegexTreeView::RegexTreeView as TreeView
import codeql.regex.nfa.BadTagFilterQuery::Make<TreeView>
from HtmlMatchingRegExp regexp, string msg

View File

@@ -14,7 +14,7 @@
* external/cwe/cwe-400
*/
private import semmle.python.RegexTreeView::RegexTreeView as TreeView
private import semmle.python.regexp.RegexTreeView::RegexTreeView as TreeView
import codeql.regex.nfa.ExponentialBackTracking::Make<TreeView>
from TreeView::RegExpTerm t, string pump, State s, string prefixMsg

View File

@@ -3,7 +3,7 @@
*/
import python
import semmle.python.RegexTreeView
import semmle.python.regexp.RegexTreeView
from string str, int counter, Location loc
where

View File

@@ -1,5 +1,5 @@
import python
private import semmle.python.RegexTreeView::RegexTreeView as TreeView
private import semmle.python.regexp.RegexTreeView::RegexTreeView as TreeView
import codeql.regex.nfa.SuperlinearBackTracking::Make<TreeView>
from PolynomialBackTrackingTerm t