mirror of
https://github.com/github/codeql.git
synced 2026-04-30 11:15:13 +02:00
introduce RegExpInterpretation instead of RegexString, and move RegexTreeView.qll into a regexp folder
This commit is contained in:
@@ -7,7 +7,7 @@
|
||||
*/
|
||||
|
||||
import python
|
||||
import semmle.python.RegexTreeView
|
||||
import semmle.python.regexp.RegexTreeView
|
||||
import semmle.python.Yaml
|
||||
|
||||
private newtype TPrintAstConfiguration = MkPrintAstConfiguration()
|
||||
|
||||
File diff suppressed because it is too large
Load Diff
@@ -2,7 +2,7 @@
|
||||
* Provides classes for working with regular expressions.
|
||||
*/
|
||||
|
||||
private import semmle.python.RegexTreeView
|
||||
private import semmle.python.regexp.RegexTreeView
|
||||
private import semmle.python.regex
|
||||
private import semmle.python.dataflow.new.DataFlow
|
||||
private import semmle.python.regexp.internal.RegExpTracking
|
||||
|
||||
@@ -2512,9 +2512,10 @@ module PrivateDjango {
|
||||
any(int i | i < routeHandler.getFirstPossibleRoutedParamIndex() | routeHandler.getArg(i))
|
||||
)
|
||||
or
|
||||
exists(DjangoRouteHandler routeHandler, DjangoRouteRegex regex |
|
||||
exists(DjangoRouteHandler routeHandler, DjangoRouteRegex regexUse, Regex regex |
|
||||
regex.getAUse() = regexUse and
|
||||
routeHandler = this.getARequestHandler() and
|
||||
regex.getRouteSetup() = this
|
||||
regexUse.getRouteSetup() = this
|
||||
|
|
||||
// either using named capture groups (passed as keyword arguments) or using
|
||||
// unnamed capture groups (passed as positional arguments)
|
||||
@@ -2533,14 +2534,12 @@ module PrivateDjango {
|
||||
/**
|
||||
* A regex that is used to set up a route.
|
||||
*
|
||||
* Needs this subclass to be considered a RegexString.
|
||||
* Needs this subclass to be considered a RegExpInterpretation.
|
||||
*/
|
||||
private class DjangoRouteRegex extends RegexString instanceof StrConst {
|
||||
private class DjangoRouteRegex extends RegExpInterpretation::Range {
|
||||
DjangoRegexRouteSetup rePathCall;
|
||||
|
||||
DjangoRouteRegex() {
|
||||
rePathCall.getUrlPatternArg().getALocalSource() = DataFlow::exprNode(this)
|
||||
}
|
||||
DjangoRouteRegex() { this = rePathCall.getUrlPatternArg() }
|
||||
|
||||
DjangoRegexRouteSetup getRouteSetup() { result = rePathCall }
|
||||
}
|
||||
|
||||
@@ -384,12 +384,12 @@ module Tornado {
|
||||
/**
|
||||
* A regex that is used to set up a route.
|
||||
*
|
||||
* Needs this subclass to be considered a RegexString.
|
||||
* Needs this subclass to be considered a RegExpInterpretation.
|
||||
*/
|
||||
private class TornadoRouteRegex extends RegexString instanceof StrConst {
|
||||
private class TornadoRouteRegex extends RegExpInterpretation::Range {
|
||||
TornadoRouteSetup setup;
|
||||
|
||||
TornadoRouteRegex() { setup.getUrlPatternArg().getALocalSource() = DataFlow::exprNode(this) }
|
||||
TornadoRouteRegex() { this = setup.getUrlPatternArg() }
|
||||
|
||||
TornadoRouteSetup getRouteSetup() { result = setup }
|
||||
}
|
||||
@@ -423,9 +423,10 @@ module Tornado {
|
||||
not result = requestHandler.getArg(0)
|
||||
)
|
||||
or
|
||||
exists(Function requestHandler, TornadoRouteRegex regex |
|
||||
exists(Function requestHandler, TornadoRouteRegex regexUse, Regex regex |
|
||||
regex.getAUse() = regexUse and
|
||||
requestHandler = this.getARequestHandler() and
|
||||
regex.getRouteSetup() = this
|
||||
regexUse.getRouteSetup() = this
|
||||
|
|
||||
// first group will have group number 1
|
||||
result = requestHandler.getArg(regex.getGroupNumber(_, _))
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
import python
|
||||
private import semmle.python.ApiGraphs
|
||||
// Need to import since frameworks can extend the abstract `RegexString`
|
||||
// Need to import since frameworks can extend the abstract `RegExpInterpretation::Range`
|
||||
private import semmle.python.Frameworks
|
||||
private import semmle.python.Concepts as Concepts
|
||||
|
||||
@@ -45,7 +45,7 @@ private API::Node relevant_re_member(string name) {
|
||||
*
|
||||
* This predicate has not done any data-flow tracking.
|
||||
*/
|
||||
// TODO: This thing should be refactored, along with removing RegexString.
|
||||
// TODO: This should only be used to get the `mode`, and nowhere else.
|
||||
predicate used_as_regex_internal(Expr e, string mode) {
|
||||
/* Call to re.xxx(regex, ... [mode]) */
|
||||
exists(DataFlow::CallCfgNode call |
|
||||
@@ -70,24 +70,8 @@ predicate used_as_regex_internal(Expr e, string mode) {
|
||||
}
|
||||
|
||||
private import regexp.internal.RegExpTracking as RegExpTracking
|
||||
|
||||
/**
|
||||
* Holds if the string-constant `s` ends up being used as a regex with the `re` module, with the regex-mode `mode` (if known).
|
||||
* If regex mode is not known, `mode` will be `"None"`.
|
||||
*
|
||||
* This predicate has done data-flow tracking to find the string-constant that is used as a regex.
|
||||
*/
|
||||
predicate used_as_regex(Expr s, string mode) {
|
||||
(s instanceof Bytes or s instanceof Unicode) and
|
||||
exists(DataFlow::Node source, DataFlow::Node sink |
|
||||
source = RegExpTracking::regExpSource(sink) and
|
||||
used_as_regex_internal(sink.asExpr(), mode) and
|
||||
s = source.asExpr()
|
||||
)
|
||||
}
|
||||
|
||||
private import semmle.python.Concepts
|
||||
private import semmle.python.RegexTreeView
|
||||
private import semmle.python.regexp.RegexTreeView
|
||||
|
||||
/** Gets a parsed regular expression term that is executed at `exec`. */
|
||||
RegExpTerm getTermForExecution(RegexExecution exec) {
|
||||
@@ -137,16 +121,70 @@ private DataFlow::Node re_flag_tracker(string flag_name) {
|
||||
}
|
||||
|
||||
/** Gets a regular expression mode flag associated with the given data flow node. */
|
||||
// TODO: Move this into a RegexFlag module, along with related code?
|
||||
string mode_from_node(DataFlow::Node node) { node = re_flag_tracker(result) }
|
||||
|
||||
/** Provides a class for modeling regular expression interpretations. */
|
||||
module RegExpInterpretation {
|
||||
/**
|
||||
* A node that is not a regular expression literal, but is used in places that
|
||||
* may interpret it as one. Instances of this class are typically strings that
|
||||
* flow to method calls like `re.compile`.
|
||||
*/
|
||||
abstract class Range extends DataFlow::Node { }
|
||||
}
|
||||
|
||||
/**
|
||||
* A node interpreted as a regular expression.
|
||||
* Speficically nodes where string values are interpreted as regular expressions.
|
||||
*/
|
||||
class StdLibRegExpInterpretation extends RegExpInterpretation::Range {
|
||||
StdLibRegExpInterpretation() {
|
||||
this =
|
||||
API::moduleImport("re").getMember(any(string name | name != "escape")).getACall().getArg(0)
|
||||
}
|
||||
}
|
||||
|
||||
/** A StrConst used as a regular expression */
|
||||
abstract class RegexString extends Expr {
|
||||
RegexString() {
|
||||
deprecated class RegexString extends Regex {
|
||||
RegexString() { this = RegExpTracking::regExpSource(_).asExpr() }
|
||||
}
|
||||
|
||||
/** A StrConst used as a regular expression */
|
||||
class Regex extends Expr {
|
||||
DataFlow::Node sink;
|
||||
|
||||
Regex() {
|
||||
(this instanceof Bytes or this instanceof Unicode) and
|
||||
this = RegExpTracking::regExpSource(sink).asExpr() and
|
||||
// is part of the user code
|
||||
exists(this.getLocation().getFile().getRelativePath())
|
||||
}
|
||||
|
||||
/** Gets a data-flow node where this string value is used as a regular expression. */
|
||||
DataFlow::Node getAUse() { result = sink }
|
||||
|
||||
/**
|
||||
* Gets a mode (if any) of this regular expression. Can be any of:
|
||||
* DEBUG
|
||||
* IGNORECASE
|
||||
* LOCALE
|
||||
* MULTILINE
|
||||
* DOTALL
|
||||
* UNICODE
|
||||
* VERBOSE
|
||||
*/
|
||||
string getAMode() {
|
||||
exists(string mode |
|
||||
used_as_regex_internal(sink.asExpr(), mode) and
|
||||
result != "None" and
|
||||
result = mode
|
||||
)
|
||||
or
|
||||
result = this.getModeFromPrefix()
|
||||
}
|
||||
|
||||
// TODO: Refactor all of the below into a regex parsing file, similar to Ruby.
|
||||
/**
|
||||
* Helper predicate for `char_set_start(int start, int end)`.
|
||||
*
|
||||
@@ -1082,25 +1120,3 @@ abstract class RegexString extends Expr {
|
||||
this.lastPart(start, end)
|
||||
}
|
||||
}
|
||||
|
||||
/** A StrConst used as a regular expression */
|
||||
class Regex extends RegexString {
|
||||
Regex() { used_as_regex(this, _) }
|
||||
|
||||
/**
|
||||
* Gets a mode (if any) of this regular expression. Can be any of:
|
||||
* DEBUG
|
||||
* IGNORECASE
|
||||
* LOCALE
|
||||
* MULTILINE
|
||||
* DOTALL
|
||||
* UNICODE
|
||||
* VERBOSE
|
||||
*/
|
||||
string getAMode() {
|
||||
result != "None" and
|
||||
used_as_regex(this, result)
|
||||
or
|
||||
result = this.getModeFromPrefix()
|
||||
}
|
||||
}
|
||||
|
||||
1090
python/ql/lib/semmle/python/regexp/RegexTreeView.qll
Normal file
1090
python/ql/lib/semmle/python/regexp/RegexTreeView.qll
Normal file
File diff suppressed because it is too large
Load Diff
@@ -26,8 +26,7 @@ private import semmle.python.regex as Regex
|
||||
DataFlow::Node regSink() {
|
||||
result = any(Concepts::RegexExecution exec).getRegex()
|
||||
or
|
||||
// TODO: Refactor into something nicer, and remove the above import of `semmle.python.regex`
|
||||
Regex::used_as_regex_internal(result.asExpr(), _)
|
||||
result instanceof Regex::RegExpInterpretation::Range
|
||||
}
|
||||
|
||||
/**
|
||||
|
||||
@@ -11,7 +11,7 @@ private import semmle.python.dataflow.new.TaintTracking
|
||||
private import semmle.python.Concepts
|
||||
private import semmle.python.dataflow.new.RemoteFlowSources
|
||||
private import semmle.python.dataflow.new.BarrierGuards
|
||||
private import semmle.python.RegexTreeView::RegexTreeView as TreeView
|
||||
private import semmle.python.regexp.RegexTreeView::RegexTreeView as TreeView
|
||||
private import semmle.python.ApiGraphs
|
||||
private import semmle.python.regex
|
||||
|
||||
|
||||
@@ -5,7 +5,7 @@
|
||||
|
||||
private import python
|
||||
private import semmle.python.dataflow.new.DataFlow
|
||||
private import semmle.python.RegexTreeView::RegexTreeView as TreeImpl
|
||||
private import semmle.python.regexp.RegexTreeView::RegexTreeView as TreeImpl
|
||||
private import semmle.python.dataflow.new.Regexp as Regexp
|
||||
private import codeql.regex.HostnameRegexp as Shared
|
||||
|
||||
|
||||
@@ -12,7 +12,7 @@
|
||||
* external/cwe/cwe-020
|
||||
*/
|
||||
|
||||
private import semmle.python.RegexTreeView::RegexTreeView as TreeView
|
||||
private import semmle.python.regexp.RegexTreeView::RegexTreeView as TreeView
|
||||
import codeql.regex.OverlyLargeRangeQuery::Make<TreeView>
|
||||
|
||||
from TreeView::RegExpCharacterRange range, string reason
|
||||
|
||||
@@ -14,7 +14,7 @@
|
||||
* external/cwe/cwe-186
|
||||
*/
|
||||
|
||||
private import semmle.python.RegexTreeView::RegexTreeView as TreeView
|
||||
private import semmle.python.regexp.RegexTreeView::RegexTreeView as TreeView
|
||||
import codeql.regex.nfa.BadTagFilterQuery::Make<TreeView>
|
||||
|
||||
from HtmlMatchingRegExp regexp, string msg
|
||||
|
||||
@@ -14,7 +14,7 @@
|
||||
* external/cwe/cwe-400
|
||||
*/
|
||||
|
||||
private import semmle.python.RegexTreeView::RegexTreeView as TreeView
|
||||
private import semmle.python.regexp.RegexTreeView::RegexTreeView as TreeView
|
||||
import codeql.regex.nfa.ExponentialBackTracking::Make<TreeView>
|
||||
|
||||
from TreeView::RegExpTerm t, string pump, State s, string prefixMsg
|
||||
|
||||
@@ -3,7 +3,7 @@
|
||||
*/
|
||||
|
||||
import python
|
||||
import semmle.python.RegexTreeView
|
||||
import semmle.python.regexp.RegexTreeView
|
||||
|
||||
from string str, int counter, Location loc
|
||||
where
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
import python
|
||||
private import semmle.python.RegexTreeView::RegexTreeView as TreeView
|
||||
private import semmle.python.regexp.RegexTreeView::RegexTreeView as TreeView
|
||||
import codeql.regex.nfa.SuperlinearBackTracking::Make<TreeView>
|
||||
|
||||
from PolynomialBackTrackingTerm t
|
||||
|
||||
Reference in New Issue
Block a user