Merge pull request #12552 from erik-krogh/py-type-trackers

Py: refactor regex tracking to type-trackers
This commit is contained in:
Rasmus Wriedt Larsen
2023-05-11 16:18:34 +02:00
committed by GitHub
34 changed files with 2335 additions and 2632 deletions

View File

@@ -47,7 +47,6 @@
"python/ql/lib/semmle/python/dataflow/new/internal/DataFlowImpl2.qll",
"python/ql/lib/semmle/python/dataflow/new/internal/DataFlowImpl3.qll",
"python/ql/lib/semmle/python/dataflow/new/internal/DataFlowImpl4.qll",
"python/ql/lib/semmle/python/dataflow/new/internal/DataFlowImplForRegExp.qll",
"ruby/ql/lib/codeql/ruby/dataflow/internal/DataFlowImpl1.qll",
"ruby/ql/lib/codeql/ruby/dataflow/internal/DataFlowImpl2.qll",
"ruby/ql/lib/codeql/ruby/dataflow/internal/DataFlowImplForHttpClientLibraries.qll",

View File

@@ -421,6 +421,24 @@ module RegexExecution {
}
}
/**
* A node where a string is interpreted as a regular expression,
* for instance an argument to `re.compile`.
*
* Extend this class to refine existing API models. If you want to model new APIs,
* extend `RegExpInterpretation::Range` instead.
*/
class RegExpInterpretation extends DataFlow::Node instanceof RegExpInterpretation::Range { }
/** Provides a class for modeling regular expression interpretations. */
module RegExpInterpretation {
/**
* A node where a string is interpreted as a regular expression,
* for instance an argument to `re.compile`.
*/
abstract class Range extends DataFlow::Node { }
}
/** Provides classes for modeling XML-related APIs. */
module XML {
/**

View File

@@ -7,7 +7,7 @@
*/
import python
import semmle.python.RegexTreeView
import semmle.python.regexp.RegexTreeView
import semmle.python.Yaml
private newtype TPrintAstConfiguration = MkPrintAstConfiguration()

File diff suppressed because it is too large Load Diff

View File

@@ -2,9 +2,10 @@
* Provides classes for working with regular expressions.
*/
private import semmle.python.RegexTreeView
private import semmle.python.regexp.RegexTreeView
private import semmle.python.regex
private import semmle.python.dataflow.new.DataFlow
private import semmle.python.regexp.internal.RegExpTracking
/**
* Provides utility predicates related to regular expressions.
@@ -25,18 +26,18 @@ deprecated module RegExpPatterns {
* as a part of a regular expression.
*/
class RegExpPatternSource extends DataFlow::CfgNode {
private Regex astNode;
private RegExpSink sink;
RegExpPatternSource() { astNode = this.asExpr() }
RegExpPatternSource() { this = regExpSource(sink) }
/**
* Gets a node where the pattern of this node is parsed as a part of
* a regular expression.
*/
DataFlow::Node getAParse() { result = this }
RegExpSink getAParse() { result = sink }
/**
* Gets the root term of the regular expression parsed from this pattern.
*/
RegExpTerm getRegExpTerm() { result.getRegex() = astNode }
RegExpTerm getRegExpTerm() { result.getRegex() = this.asExpr() }
}

View File

@@ -1,398 +0,0 @@
/**
* DEPRECATED: Use `Global` and `GlobalWithState` instead.
*
* Provides a `Configuration` class backwards-compatible interface to the data
* flow library.
*/
private import DataFlowImplCommon
private import DataFlowImplSpecific::Private
import DataFlowImplSpecific::Public
private import DataFlowImpl
import DataFlowImplCommonPublic
import FlowStateString
private import codeql.util.Unit
/**
* A configuration of interprocedural data flow analysis. This defines
* sources, sinks, and any other configurable aspect of the analysis. Each
* use of the global data flow library must define its own unique extension
* of this abstract class. To create a configuration, extend this class with
* a subclass whose characteristic predicate is a unique singleton string.
* For example, write
*
* ```ql
* class MyAnalysisConfiguration extends DataFlow::Configuration {
* MyAnalysisConfiguration() { this = "MyAnalysisConfiguration" }
* // Override `isSource` and `isSink`.
* // Optionally override `isBarrier`.
* // Optionally override `isAdditionalFlowStep`.
* }
* ```
* Conceptually, this defines a graph where the nodes are `DataFlow::Node`s and
* the edges are those data-flow steps that preserve the value of the node
* along with any additional edges defined by `isAdditionalFlowStep`.
* Specifying nodes in `isBarrier` will remove those nodes from the graph, and
* specifying nodes in `isBarrierIn` and/or `isBarrierOut` will remove in-going
* and/or out-going edges from those nodes, respectively.
*
* Then, to query whether there is flow between some `source` and `sink`,
* write
*
* ```ql
* exists(MyAnalysisConfiguration cfg | cfg.hasFlow(source, sink))
* ```
*
* Multiple configurations can coexist, but two classes extending
* `DataFlow::Configuration` should never depend on each other. One of them
* should instead depend on a `DataFlow2::Configuration`, a
* `DataFlow3::Configuration`, or a `DataFlow4::Configuration`.
*/
abstract class Configuration extends string {
bindingset[this]
Configuration() { any() }
/**
* Holds if `source` is a relevant data flow source.
*/
predicate isSource(Node source) { none() }
/**
* Holds if `source` is a relevant data flow source with the given initial
* `state`.
*/
predicate isSource(Node source, FlowState state) { none() }
/**
* Holds if `sink` is a relevant data flow sink.
*/
predicate isSink(Node sink) { none() }
/**
* Holds if `sink` is a relevant data flow sink accepting `state`.
*/
predicate isSink(Node sink, FlowState state) { none() }
/**
* Holds if data flow through `node` is prohibited. This completely removes
* `node` from the data flow graph.
*/
predicate isBarrier(Node node) { none() }
/**
* Holds if data flow through `node` is prohibited when the flow state is
* `state`.
*/
predicate isBarrier(Node node, FlowState state) { none() }
/** Holds if data flow into `node` is prohibited. */
predicate isBarrierIn(Node node) { none() }
/** Holds if data flow out of `node` is prohibited. */
predicate isBarrierOut(Node node) { none() }
/**
* DEPRECATED: Use `isBarrier` and `BarrierGuard` module instead.
*
* Holds if data flow through nodes guarded by `guard` is prohibited.
*/
deprecated predicate isBarrierGuard(BarrierGuard guard) { none() }
/**
* DEPRECATED: Use `isBarrier` and `BarrierGuard` module instead.
*
* Holds if data flow through nodes guarded by `guard` is prohibited when
* the flow state is `state`
*/
deprecated predicate isBarrierGuard(BarrierGuard guard, FlowState state) { none() }
/**
* Holds if data may flow from `node1` to `node2` in addition to the normal data-flow steps.
*/
predicate isAdditionalFlowStep(Node node1, Node node2) { none() }
/**
* Holds if data may flow from `node1` to `node2` in addition to the normal data-flow steps.
* This step is only applicable in `state1` and updates the flow state to `state2`.
*/
predicate isAdditionalFlowStep(Node node1, FlowState state1, Node node2, FlowState state2) {
none()
}
/**
* Holds if an arbitrary number of implicit read steps of content `c` may be
* taken at `node`.
*/
predicate allowImplicitRead(Node node, ContentSet c) { none() }
/**
* Gets the virtual dispatch branching limit when calculating field flow.
* This can be overridden to a smaller value to improve performance (a
* value of 0 disables field flow), or a larger value to get more results.
*/
int fieldFlowBranchLimit() { result = 2 }
/**
* Gets a data flow configuration feature to add restrictions to the set of
* valid flow paths.
*
* - `FeatureHasSourceCallContext`:
* Assume that sources have some existing call context to disallow
* conflicting return-flow directly following the source.
* - `FeatureHasSinkCallContext`:
* Assume that sinks have some existing call context to disallow
* conflicting argument-to-parameter flow directly preceding the sink.
* - `FeatureEqualSourceSinkCallContext`:
* Implies both of the above and additionally ensures that the entire flow
* path preserves the call context.
*
* These features are generally not relevant for typical end-to-end data flow
* queries, but should only be used for constructing paths that need to
* somehow be pluggable in another path context.
*/
FlowFeature getAFeature() { none() }
/** Holds if sources should be grouped in the result of `hasFlowPath`. */
predicate sourceGrouping(Node source, string sourceGroup) { none() }
/** Holds if sinks should be grouped in the result of `hasFlowPath`. */
predicate sinkGrouping(Node sink, string sinkGroup) { none() }
/**
* Holds if data may flow from `source` to `sink` for this configuration.
*/
predicate hasFlow(Node source, Node sink) { hasFlow(source, sink, this) }
/**
* Holds if data may flow from `source` to `sink` for this configuration.
*
* The corresponding paths are generated from the end-points and the graph
* included in the module `PathGraph`.
*/
predicate hasFlowPath(PathNode source, PathNode sink) { hasFlowPath(source, sink, this) }
/**
* Holds if data may flow from some source to `sink` for this configuration.
*/
predicate hasFlowTo(Node sink) { hasFlowTo(sink, this) }
/**
* Holds if data may flow from some source to `sink` for this configuration.
*/
predicate hasFlowToExpr(DataFlowExpr sink) { this.hasFlowTo(exprNode(sink)) }
/**
* DEPRECATED: Use `FlowExploration<explorationLimit>` instead.
*
* Gets the exploration limit for `hasPartialFlow` and `hasPartialFlowRev`
* measured in approximate number of interprocedural steps.
*/
deprecated int explorationLimit() { none() }
/**
* Holds if hidden nodes should be included in the data flow graph.
*
* This feature should only be used for debugging or when the data flow graph
* is not visualized (for example in a `path-problem` query).
*/
predicate includeHiddenNodes() { none() }
}
/**
* This class exists to prevent mutual recursion between the user-overridden
* member predicates of `Configuration` and the rest of the data-flow library.
* Good performance cannot be guaranteed in the presence of such recursion, so
* it should be replaced by using more than one copy of the data flow library.
*/
abstract private class ConfigurationRecursionPrevention extends Configuration {
bindingset[this]
ConfigurationRecursionPrevention() { any() }
override predicate hasFlow(Node source, Node sink) {
strictcount(Node n | this.isSource(n)) < 0
or
strictcount(Node n | this.isSource(n, _)) < 0
or
strictcount(Node n | this.isSink(n)) < 0
or
strictcount(Node n | this.isSink(n, _)) < 0
or
strictcount(Node n1, Node n2 | this.isAdditionalFlowStep(n1, n2)) < 0
or
strictcount(Node n1, Node n2 | this.isAdditionalFlowStep(n1, _, n2, _)) < 0
or
super.hasFlow(source, sink)
}
}
/** A bridge class to access the deprecated `isBarrierGuard`. */
private class BarrierGuardGuardedNodeBridge extends Unit {
abstract predicate guardedNode(Node n, Configuration config);
abstract predicate guardedNode(Node n, FlowState state, Configuration config);
}
private class BarrierGuardGuardedNode extends BarrierGuardGuardedNodeBridge {
deprecated override predicate guardedNode(Node n, Configuration config) {
exists(BarrierGuard g |
config.isBarrierGuard(g) and
n = g.getAGuardedNode()
)
}
deprecated override predicate guardedNode(Node n, FlowState state, Configuration config) {
exists(BarrierGuard g |
config.isBarrierGuard(g, state) and
n = g.getAGuardedNode()
)
}
}
private FlowState relevantState(Configuration config) {
config.isSource(_, result) or
config.isSink(_, result) or
config.isBarrier(_, result) or
config.isAdditionalFlowStep(_, result, _, _) or
config.isAdditionalFlowStep(_, _, _, result)
}
private newtype TConfigState =
TMkConfigState(Configuration config, FlowState state) {
state = relevantState(config) or state instanceof FlowStateEmpty
}
private Configuration getConfig(TConfigState state) { state = TMkConfigState(result, _) }
private FlowState getState(TConfigState state) { state = TMkConfigState(_, result) }
private predicate singleConfiguration() { 1 = strictcount(Configuration c) }
private module Config implements FullStateConfigSig {
class FlowState = TConfigState;
predicate isSource(Node source, FlowState state) {
getConfig(state).isSource(source, getState(state))
or
getConfig(state).isSource(source) and getState(state) instanceof FlowStateEmpty
}
predicate isSink(Node sink, FlowState state) {
getConfig(state).isSink(sink, getState(state))
or
getConfig(state).isSink(sink) and getState(state) instanceof FlowStateEmpty
}
predicate isBarrier(Node node) { none() }
predicate isBarrier(Node node, FlowState state) {
getConfig(state).isBarrier(node, getState(state)) or
getConfig(state).isBarrier(node) or
any(BarrierGuardGuardedNodeBridge b).guardedNode(node, getState(state), getConfig(state)) or
any(BarrierGuardGuardedNodeBridge b).guardedNode(node, getConfig(state))
}
predicate isBarrierIn(Node node) { any(Configuration config).isBarrierIn(node) }
predicate isBarrierOut(Node node) { any(Configuration config).isBarrierOut(node) }
predicate isAdditionalFlowStep(Node node1, Node node2) {
singleConfiguration() and
any(Configuration config).isAdditionalFlowStep(node1, node2)
}
predicate isAdditionalFlowStep(Node node1, FlowState state1, Node node2, FlowState state2) {
getConfig(state1).isAdditionalFlowStep(node1, getState(state1), node2, getState(state2)) and
getConfig(state2) = getConfig(state1)
or
not singleConfiguration() and
getConfig(state1).isAdditionalFlowStep(node1, node2) and
state2 = state1
}
predicate allowImplicitRead(Node node, ContentSet c) {
any(Configuration config).allowImplicitRead(node, c)
}
int fieldFlowBranchLimit() { result = min(any(Configuration config).fieldFlowBranchLimit()) }
FlowFeature getAFeature() { result = any(Configuration config).getAFeature() }
predicate sourceGrouping(Node source, string sourceGroup) {
any(Configuration config).sourceGrouping(source, sourceGroup)
}
predicate sinkGrouping(Node sink, string sinkGroup) {
any(Configuration config).sinkGrouping(sink, sinkGroup)
}
predicate includeHiddenNodes() { any(Configuration config).includeHiddenNodes() }
}
private import Impl<Config> as I
/**
* A `Node` augmented with a call context (except for sinks), an access path, and a configuration.
* Only those `PathNode`s that are reachable from a source, and which can reach a sink, are generated.
*/
class PathNode instanceof I::PathNode {
/** Gets a textual representation of this element. */
final string toString() { result = super.toString() }
/**
* Gets a textual representation of this element, including a textual
* representation of the call context.
*/
final string toStringWithContext() { result = super.toStringWithContext() }
/**
* Holds if this element is at the specified location.
* The location spans column `startcolumn` of line `startline` to
* column `endcolumn` of line `endline` in file `filepath`.
* For more information, see
* [Locations](https://codeql.github.com/docs/writing-codeql-queries/providing-locations-in-codeql-queries/).
*/
final predicate hasLocationInfo(
string filepath, int startline, int startcolumn, int endline, int endcolumn
) {
super.hasLocationInfo(filepath, startline, startcolumn, endline, endcolumn)
}
/** Gets the underlying `Node`. */
final Node getNode() { result = super.getNode() }
/** Gets the `FlowState` of this node. */
final FlowState getState() { result = getState(super.getState()) }
/** Gets the associated configuration. */
final Configuration getConfiguration() { result = getConfig(super.getState()) }
/** Gets a successor of this node, if any. */
final PathNode getASuccessor() { result = super.getASuccessor() }
/** Holds if this node is a source. */
final predicate isSource() { super.isSource() }
/** Holds if this node is a grouping of source nodes. */
final predicate isSourceGroup(string group) { super.isSourceGroup(group) }
/** Holds if this node is a grouping of sink nodes. */
final predicate isSinkGroup(string group) { super.isSinkGroup(group) }
}
module PathGraph = I::PathGraph;
private predicate hasFlow(Node source, Node sink, Configuration config) {
exists(PathNode source0, PathNode sink0 |
hasFlowPath(source0, sink0, config) and
source0.getNode() = source and
sink0.getNode() = sink
)
}
private predicate hasFlowPath(PathNode source, PathNode sink, Configuration config) {
I::flowPath(source, sink) and source.getConfiguration() = config
}
private predicate hasFlowTo(Node sink, Configuration config) { hasFlow(_, sink, config) }
predicate flowsTo = hasFlow/3;

View File

@@ -2512,9 +2512,10 @@ module PrivateDjango {
any(int i | i < routeHandler.getFirstPossibleRoutedParamIndex() | routeHandler.getArg(i))
)
or
exists(DjangoRouteHandler routeHandler, DjangoRouteRegex regex |
exists(DjangoRouteHandler routeHandler, DjangoRouteRegex regexUse, RegExp regex |
regex.getAUse() = regexUse and
routeHandler = this.getARequestHandler() and
regex.getRouteSetup() = this
regexUse.getRouteSetup() = this
|
// either using named capture groups (passed as keyword arguments) or using
// unnamed capture groups (passed as positional arguments)
@@ -2533,14 +2534,12 @@ module PrivateDjango {
/**
* A regex that is used to set up a route.
*
* Needs this subclass to be considered a RegexString.
* Needs this subclass to be considered a RegExpInterpretation.
*/
private class DjangoRouteRegex extends RegexString instanceof StrConst {
private class DjangoRouteRegex extends RegExpInterpretation::Range {
DjangoRegexRouteSetup rePathCall;
DjangoRouteRegex() {
rePathCall.getUrlPatternArg().getALocalSource() = DataFlow::exprNode(this)
}
DjangoRouteRegex() { this = rePathCall.getUrlPatternArg() }
DjangoRegexRouteSetup getRouteSetup() { result = rePathCall }
}

View File

@@ -3015,6 +3015,17 @@ private module StdlibPrivate {
override string getKind() { result = Escaping::getRegexKind() }
}
/**
* A node interpreted as a regular expression.
* Speficically nodes where string values are interpreted as regular expressions.
*/
private class StdLibRegExpInterpretation extends RegExpInterpretation::Range {
StdLibRegExpInterpretation() {
this =
API::moduleImport("re").getMember("compile").getACall().getParameter(0, "pattern").asSink()
}
}
// ---------------------------------------------------------------------------
// urllib
// ---------------------------------------------------------------------------

View File

@@ -384,12 +384,12 @@ module Tornado {
/**
* A regex that is used to set up a route.
*
* Needs this subclass to be considered a RegexString.
* Needs this subclass to be considered a RegExpInterpretation.
*/
private class TornadoRouteRegex extends RegexString instanceof StrConst {
private class TornadoRouteRegex extends RegExpInterpretation::Range {
TornadoRouteSetup setup;
TornadoRouteRegex() { setup.getUrlPatternArg().getALocalSource() = DataFlow::exprNode(this) }
TornadoRouteRegex() { this = setup.getUrlPatternArg() }
TornadoRouteSetup getRouteSetup() { result = setup }
}
@@ -423,9 +423,10 @@ module Tornado {
not result = requestHandler.getArg(0)
)
or
exists(Function requestHandler, TornadoRouteRegex regex |
exists(Function requestHandler, TornadoRouteRegex regexUse, RegExp regex |
regex.getAUse() = regexUse and
requestHandler = this.getARequestHandler() and
regex.getRouteSetup() = this
regexUse.getRouteSetup() = this
|
// first group will have group number 1
result = requestHandler.getArg(regex.getGroupNumber(_, _))

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,76 @@
/**
* Provides predicates that track strings to where they are used as regular expressions.
* This is implemented using TypeTracking in two phases:
*
* 1: An exploratory backwards analysis that imprecisely tracks all nodes that may be used as regular expressions.
* The exploratory phase ends with a forwards analysis from string constants that were reached by the backwards analysis.
* This is similar to the exploratory phase of the JavaScript global DataFlow library.
*
* 2: A precise type tracking analysis that tracks constant strings to where they are used as regular expressions.
* This phase keeps track of which strings and regular expressions end up in which places.
*/
import python
private import semmle.python.dataflow.new.DataFlow
private import semmle.python.Concepts as Concepts
/** Gets a constant string value that may be used as a regular expression. */
DataFlow::LocalSourceNode strStart() { result.asExpr() instanceof StrConst }
private import semmle.python.regex as Regex
/** A node where regular expressions that flow to the node are used. */
class RegExpSink extends DataFlow::Node {
RegExpSink() {
this = any(Concepts::RegexExecution exec).getRegex()
or
this instanceof Concepts::RegExpInterpretation
}
}
/**
* Gets a dataflow node that may end up being in any regular expression execution.
* This is the backwards exploratory phase of the analysis.
*/
private DataFlow::TypeTrackingNode backwards(DataFlow::TypeBackTracker t) {
t.start() and
result = any(RegExpSink sink).getALocalSource()
or
exists(DataFlow::TypeBackTracker t2 | result = backwards(t2).backtrack(t2, t))
}
/**
* Gets a reference to a string that reaches any regular expression execution.
* This is the forwards exploratory phase of the analysis.
*/
private DataFlow::TypeTrackingNode forwards(DataFlow::TypeTracker t) {
t.start() and
result = backwards(DataFlow::TypeBackTracker::end()) and
result = strStart()
or
exists(DataFlow::TypeTracker t2 | result = forwards(t2).track(t2, t)) and
result = backwards(_)
}
/**
* Gets a node that has been tracked from the string constant `start` to some node.
* This is used to figure out where `start` is evaluated as a regular expression.
*
* The result of the exploratory phase is used to limit the size of the search space in this precise analysis.
*/
private DataFlow::TypeTrackingNode regexTracking(DataFlow::Node start, DataFlow::TypeTracker t) {
result = forwards(t) and
(
t.start() and
start = strStart() and
result = start
or
exists(DataFlow::TypeTracker t2 | result = regexTracking(start, t2).track(t2, t))
)
}
/** Gets a node holding a value for the regular expression that is evaluated at `re`. */
cached
DataFlow::Node regExpSource(RegExpSink re) {
regexTracking(result, DataFlow::TypeTracker::end()).flowsTo(re)
}

View File

@@ -11,7 +11,7 @@ private import semmle.python.dataflow.new.TaintTracking
private import semmle.python.Concepts
private import semmle.python.dataflow.new.RemoteFlowSources
private import semmle.python.dataflow.new.BarrierGuards
private import semmle.python.RegexTreeView::RegexTreeView as TreeView
private import semmle.python.regexp.RegexTreeView::RegexTreeView as TreeView
private import semmle.python.ApiGraphs
private import semmle.python.regex

View File

@@ -5,14 +5,25 @@
private import python
private import semmle.python.dataflow.new.DataFlow
private import semmle.python.RegexTreeView::RegexTreeView as TreeImpl
private import semmle.python.regexp.RegexTreeView::RegexTreeView as TreeImpl
private import semmle.python.dataflow.new.Regexp as Regexp
private import codeql.regex.HostnameRegexp as Shared
private module Impl implements Shared::HostnameRegexpSig<TreeImpl> {
class DataFlowNode = DataFlow::Node;
class RegExpPatternSource = Regexp::RegExpPatternSource;
class RegExpPatternSource extends DataFlow::Node instanceof Regexp::RegExpPatternSource {
/**
* Gets a node where the pattern of this node is parsed as a part of
* a regular expression.
*/
DataFlow::Node getAParse() { result = super.getAParse() }
/**
* Gets the root term of the regular expression parsed from this pattern.
*/
TreeImpl::RegExpTerm getRegExpTerm() { result = super.getRegExpTerm() }
}
}
import Shared::Make<TreeImpl, Impl>

View File

@@ -13,7 +13,7 @@
import python
import semmle.python.regex
from Regex r, int offset
from RegExp r, int offset
where
r.escapingChar(offset) and
r.getChar(offset + 1) = "b" and

View File

@@ -13,7 +13,7 @@
import python
import semmle.python.regex
predicate duplicate_char_in_class(Regex r, string char) {
predicate duplicate_char_in_class(RegExp r, string char) {
exists(int i, int j, int x, int y, int start, int end |
i != x and
j != y and
@@ -36,7 +36,7 @@ predicate duplicate_char_in_class(Regex r, string char) {
)
}
from Regex r, string char
from RegExp r, string char
where duplicate_char_in_class(r, char)
select r,
"This regular expression includes duplicate character '" + char + "' in a set of characters."

View File

@@ -13,6 +13,6 @@
import python
import semmle.python.regex
from Regex r, string missing, string part
from RegExp r, string missing, string part
where r.getText().regexpMatch(".*\\(P<\\w+>.*") and missing = "?" and part = "named group"
select r, "Regular expression is missing '" + missing + "' in " + part + "."

View File

@@ -13,14 +13,14 @@
import python
import semmle.python.regex
predicate unmatchable_caret(Regex r, int start) {
predicate unmatchable_caret(RegExp r, int start) {
not r.getAMode() = "MULTILINE" and
not r.getAMode() = "VERBOSE" and
r.specialCharacter(start, start + 1, "^") and
not r.firstItem(start, start + 1)
}
from Regex r, int offset
from RegExp r, int offset
where unmatchable_caret(r, offset)
select r,
"This regular expression includes an unmatchable caret at offset " + offset.toString() + "."

View File

@@ -13,14 +13,14 @@
import python
import semmle.python.regex
predicate unmatchable_dollar(Regex r, int start) {
predicate unmatchable_dollar(RegExp r, int start) {
not r.getAMode() = "MULTILINE" and
not r.getAMode() = "VERBOSE" and
r.specialCharacter(start, start + 1, "$") and
not r.lastItem(start, start + 1)
}
from Regex r, int offset
from RegExp r, int offset
where unmatchable_dollar(r, offset)
select r,
"This regular expression includes an unmatchable dollar at offset " + offset.toString() + "."

View File

@@ -12,7 +12,7 @@
* external/cwe/cwe-020
*/
private import semmle.python.RegexTreeView::RegexTreeView as TreeView
private import semmle.python.regexp.RegexTreeView::RegexTreeView as TreeView
import codeql.regex.OverlyLargeRangeQuery::Make<TreeView>
from TreeView::RegExpCharacterRange range, string reason

View File

@@ -14,7 +14,7 @@
* external/cwe/cwe-186
*/
private import semmle.python.RegexTreeView::RegexTreeView as TreeView
private import semmle.python.regexp.RegexTreeView::RegexTreeView as TreeView
import codeql.regex.nfa.BadTagFilterQuery::Make<TreeView>
from HtmlMatchingRegExp regexp, string msg

View File

@@ -14,7 +14,7 @@
* external/cwe/cwe-400
*/
private import semmle.python.RegexTreeView::RegexTreeView as TreeView
private import semmle.python.regexp.RegexTreeView::RegexTreeView as TreeView
import codeql.regex.nfa.ExponentialBackTracking::Make<TreeView>
from TreeView::RegExpTerm t, string pump, State s, string prefixMsg

View File

@@ -1,7 +1,7 @@
import python
import semmle.python.regex
from Regex r, int start, int end, int part_start, int part_end
from RegExp r, int start, int end, int part_start, int part_end
where
r.getLocation().getFile().getBaseName() = "test.py" and
r.alternationOption(start, end, part_start, part_end)

View File

@@ -6,6 +6,6 @@
import python
import semmle.python.regex
from Regex r, int start, int end
from RegExp r, int start, int end
where r.character(start, end) and r.getLocation().getFile().getBaseName() = "test.py"
select r.getText(), start, end

View File

@@ -7,6 +7,6 @@ import semmle.python.regex
from string str, Location loc, int counter
where
counter = strictcount(Regex term | term.getLocation() = loc and term.getText() = str) and
counter = strictcount(RegExp term | term.getLocation() = loc and term.getText() = str) and
counter > 1
select str, counter, loc

View File

@@ -1,12 +1,12 @@
import python
import semmle.python.regex
predicate part(Regex r, int start, int end, string kind) {
predicate part(RegExp r, int start, int end, string kind) {
r.lastItem(start, end) and kind = "last"
or
r.firstItem(start, end) and kind = "first"
}
from Regex r, int start, int end, string kind
from RegExp r, int start, int end, string kind
where part(r, start, end, kind) and r.getLocation().getFile().getBaseName() = "test.py"
select r.getText(), kind, start, end

View File

@@ -1,7 +1,7 @@
import python
import semmle.python.regex
from Regex r, int start, int end, int part_start, int part_end
from RegExp r, int start, int end, int part_start, int part_end
where
r.getLocation().getFile().getBaseName() = "test.py" and
r.groupContents(start, end, part_start, part_end)

View File

@@ -1,6 +1,6 @@
import python
import semmle.python.regex
from Regex r
from RegExp r
where r.getLocation().getFile().getBaseName() = "test.py"
select r.getLocation().getStartLine(), r.getAMode()

View File

@@ -1,7 +1,7 @@
import python
import semmle.python.regex
from Regex r, int start, int end, boolean maybe_empty, boolean may_repeat_forever
from RegExp r, int start, int end, boolean maybe_empty, boolean may_repeat_forever
where
r.getLocation().getFile().getBaseName() = "test.py" and
r.qualifiedItem(start, end, maybe_empty, may_repeat_forever)

View File

@@ -1,7 +1,7 @@
import python
import semmle.python.regex
predicate part(Regex r, int start, int end, string kind) {
predicate part(RegExp r, int start, int end, string kind) {
r.alternation(start, end) and kind = "choice"
or
r.normalCharacter(start, end) and kind = "char"
@@ -23,6 +23,6 @@ predicate part(Regex r, int start, int end, string kind) {
r.qualifiedItem(start, end, _, _) and kind = "qualified"
}
from Regex r, int start, int end, string kind
from RegExp r, int start, int end, string kind
where part(r, start, end, kind) and r.getLocation().getFile().getBaseName() = "test.py"
select r.getText(), kind, start, end

View File

@@ -10,7 +10,7 @@ class CharacterSetTest extends InlineExpectationsTest {
override predicate hasActualResult(Location location, string element, string tag, string value) {
exists(location.getFile().getRelativePath()) and
location.getFile().getBaseName() = "charSetTest.py" and
exists(Regex re, int start, int end |
exists(RegExp re, int start, int end |
re.charSet(start, end) and
location = re.getLocation() and
element = re.getText().substring(start, end) and
@@ -28,7 +28,7 @@ class CharacterRangeTest extends InlineExpectationsTest {
override predicate hasActualResult(Location location, string element, string tag, string value) {
exists(location.getFile().getRelativePath()) and
location.getFile().getBaseName() = "charRangeTest.py" and
exists(Regex re, int start, int lower_end, int upper_start, int end |
exists(RegExp re, int start, int lower_end, int upper_start, int end |
re.charRange(_, start, lower_end, upper_start, end) and
location = re.getLocation() and
element = re.getText().substring(start, end) and
@@ -46,7 +46,7 @@ class EscapeTest extends InlineExpectationsTest {
override predicate hasActualResult(Location location, string element, string tag, string value) {
exists(location.getFile().getRelativePath()) and
location.getFile().getBaseName() = "escapedCharacterTest.py" and
exists(Regex re, int start, int end |
exists(RegExp re, int start, int end |
re.escapedCharacter(start, end) and
location = re.getLocation() and
element = re.getText().substring(start, end) and
@@ -64,7 +64,7 @@ class GroupTest extends InlineExpectationsTest {
override predicate hasActualResult(Location location, string element, string tag, string value) {
exists(location.getFile().getRelativePath()) and
location.getFile().getBaseName() = "groupTest.py" and
exists(Regex re, int start, int end |
exists(RegExp re, int start, int end |
re.group(start, end) and
location = re.getLocation() and
element = re.getText().substring(start, end) and

View File

@@ -3,7 +3,7 @@
*/
import python
import semmle.python.RegexTreeView
import semmle.python.regexp.RegexTreeView
from string str, int counter, Location loc
where

View File

@@ -1,5 +1,5 @@
import python
private import semmle.python.RegexTreeView::RegexTreeView as TreeView
private import semmle.python.regexp.RegexTreeView::RegexTreeView as TreeView
import codeql.regex.nfa.SuperlinearBackTracking::Make<TreeView>
from PolynomialBackTrackingTerm t