mirror of
https://github.com/github/codeql.git
synced 2025-12-17 01:03:14 +01:00
Merge pull request #12552 from erik-krogh/py-type-trackers
Py: refactor regex tracking to type-trackers
This commit is contained in:
@@ -47,7 +47,6 @@
|
||||
"python/ql/lib/semmle/python/dataflow/new/internal/DataFlowImpl2.qll",
|
||||
"python/ql/lib/semmle/python/dataflow/new/internal/DataFlowImpl3.qll",
|
||||
"python/ql/lib/semmle/python/dataflow/new/internal/DataFlowImpl4.qll",
|
||||
"python/ql/lib/semmle/python/dataflow/new/internal/DataFlowImplForRegExp.qll",
|
||||
"ruby/ql/lib/codeql/ruby/dataflow/internal/DataFlowImpl1.qll",
|
||||
"ruby/ql/lib/codeql/ruby/dataflow/internal/DataFlowImpl2.qll",
|
||||
"ruby/ql/lib/codeql/ruby/dataflow/internal/DataFlowImplForHttpClientLibraries.qll",
|
||||
|
||||
@@ -421,6 +421,24 @@ module RegexExecution {
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* A node where a string is interpreted as a regular expression,
|
||||
* for instance an argument to `re.compile`.
|
||||
*
|
||||
* Extend this class to refine existing API models. If you want to model new APIs,
|
||||
* extend `RegExpInterpretation::Range` instead.
|
||||
*/
|
||||
class RegExpInterpretation extends DataFlow::Node instanceof RegExpInterpretation::Range { }
|
||||
|
||||
/** Provides a class for modeling regular expression interpretations. */
|
||||
module RegExpInterpretation {
|
||||
/**
|
||||
* A node where a string is interpreted as a regular expression,
|
||||
* for instance an argument to `re.compile`.
|
||||
*/
|
||||
abstract class Range extends DataFlow::Node { }
|
||||
}
|
||||
|
||||
/** Provides classes for modeling XML-related APIs. */
|
||||
module XML {
|
||||
/**
|
||||
|
||||
@@ -7,7 +7,7 @@
|
||||
*/
|
||||
|
||||
import python
|
||||
import semmle.python.RegexTreeView
|
||||
import semmle.python.regexp.RegexTreeView
|
||||
import semmle.python.Yaml
|
||||
|
||||
private newtype TPrintAstConfiguration = MkPrintAstConfiguration()
|
||||
|
||||
File diff suppressed because it is too large
Load Diff
@@ -2,9 +2,10 @@
|
||||
* Provides classes for working with regular expressions.
|
||||
*/
|
||||
|
||||
private import semmle.python.RegexTreeView
|
||||
private import semmle.python.regexp.RegexTreeView
|
||||
private import semmle.python.regex
|
||||
private import semmle.python.dataflow.new.DataFlow
|
||||
private import semmle.python.regexp.internal.RegExpTracking
|
||||
|
||||
/**
|
||||
* Provides utility predicates related to regular expressions.
|
||||
@@ -25,18 +26,18 @@ deprecated module RegExpPatterns {
|
||||
* as a part of a regular expression.
|
||||
*/
|
||||
class RegExpPatternSource extends DataFlow::CfgNode {
|
||||
private Regex astNode;
|
||||
private RegExpSink sink;
|
||||
|
||||
RegExpPatternSource() { astNode = this.asExpr() }
|
||||
RegExpPatternSource() { this = regExpSource(sink) }
|
||||
|
||||
/**
|
||||
* Gets a node where the pattern of this node is parsed as a part of
|
||||
* a regular expression.
|
||||
*/
|
||||
DataFlow::Node getAParse() { result = this }
|
||||
RegExpSink getAParse() { result = sink }
|
||||
|
||||
/**
|
||||
* Gets the root term of the regular expression parsed from this pattern.
|
||||
*/
|
||||
RegExpTerm getRegExpTerm() { result.getRegex() = astNode }
|
||||
RegExpTerm getRegExpTerm() { result.getRegex() = this.asExpr() }
|
||||
}
|
||||
|
||||
@@ -1,398 +0,0 @@
|
||||
/**
|
||||
* DEPRECATED: Use `Global` and `GlobalWithState` instead.
|
||||
*
|
||||
* Provides a `Configuration` class backwards-compatible interface to the data
|
||||
* flow library.
|
||||
*/
|
||||
|
||||
private import DataFlowImplCommon
|
||||
private import DataFlowImplSpecific::Private
|
||||
import DataFlowImplSpecific::Public
|
||||
private import DataFlowImpl
|
||||
import DataFlowImplCommonPublic
|
||||
import FlowStateString
|
||||
private import codeql.util.Unit
|
||||
|
||||
/**
|
||||
* A configuration of interprocedural data flow analysis. This defines
|
||||
* sources, sinks, and any other configurable aspect of the analysis. Each
|
||||
* use of the global data flow library must define its own unique extension
|
||||
* of this abstract class. To create a configuration, extend this class with
|
||||
* a subclass whose characteristic predicate is a unique singleton string.
|
||||
* For example, write
|
||||
*
|
||||
* ```ql
|
||||
* class MyAnalysisConfiguration extends DataFlow::Configuration {
|
||||
* MyAnalysisConfiguration() { this = "MyAnalysisConfiguration" }
|
||||
* // Override `isSource` and `isSink`.
|
||||
* // Optionally override `isBarrier`.
|
||||
* // Optionally override `isAdditionalFlowStep`.
|
||||
* }
|
||||
* ```
|
||||
* Conceptually, this defines a graph where the nodes are `DataFlow::Node`s and
|
||||
* the edges are those data-flow steps that preserve the value of the node
|
||||
* along with any additional edges defined by `isAdditionalFlowStep`.
|
||||
* Specifying nodes in `isBarrier` will remove those nodes from the graph, and
|
||||
* specifying nodes in `isBarrierIn` and/or `isBarrierOut` will remove in-going
|
||||
* and/or out-going edges from those nodes, respectively.
|
||||
*
|
||||
* Then, to query whether there is flow between some `source` and `sink`,
|
||||
* write
|
||||
*
|
||||
* ```ql
|
||||
* exists(MyAnalysisConfiguration cfg | cfg.hasFlow(source, sink))
|
||||
* ```
|
||||
*
|
||||
* Multiple configurations can coexist, but two classes extending
|
||||
* `DataFlow::Configuration` should never depend on each other. One of them
|
||||
* should instead depend on a `DataFlow2::Configuration`, a
|
||||
* `DataFlow3::Configuration`, or a `DataFlow4::Configuration`.
|
||||
*/
|
||||
abstract class Configuration extends string {
|
||||
bindingset[this]
|
||||
Configuration() { any() }
|
||||
|
||||
/**
|
||||
* Holds if `source` is a relevant data flow source.
|
||||
*/
|
||||
predicate isSource(Node source) { none() }
|
||||
|
||||
/**
|
||||
* Holds if `source` is a relevant data flow source with the given initial
|
||||
* `state`.
|
||||
*/
|
||||
predicate isSource(Node source, FlowState state) { none() }
|
||||
|
||||
/**
|
||||
* Holds if `sink` is a relevant data flow sink.
|
||||
*/
|
||||
predicate isSink(Node sink) { none() }
|
||||
|
||||
/**
|
||||
* Holds if `sink` is a relevant data flow sink accepting `state`.
|
||||
*/
|
||||
predicate isSink(Node sink, FlowState state) { none() }
|
||||
|
||||
/**
|
||||
* Holds if data flow through `node` is prohibited. This completely removes
|
||||
* `node` from the data flow graph.
|
||||
*/
|
||||
predicate isBarrier(Node node) { none() }
|
||||
|
||||
/**
|
||||
* Holds if data flow through `node` is prohibited when the flow state is
|
||||
* `state`.
|
||||
*/
|
||||
predicate isBarrier(Node node, FlowState state) { none() }
|
||||
|
||||
/** Holds if data flow into `node` is prohibited. */
|
||||
predicate isBarrierIn(Node node) { none() }
|
||||
|
||||
/** Holds if data flow out of `node` is prohibited. */
|
||||
predicate isBarrierOut(Node node) { none() }
|
||||
|
||||
/**
|
||||
* DEPRECATED: Use `isBarrier` and `BarrierGuard` module instead.
|
||||
*
|
||||
* Holds if data flow through nodes guarded by `guard` is prohibited.
|
||||
*/
|
||||
deprecated predicate isBarrierGuard(BarrierGuard guard) { none() }
|
||||
|
||||
/**
|
||||
* DEPRECATED: Use `isBarrier` and `BarrierGuard` module instead.
|
||||
*
|
||||
* Holds if data flow through nodes guarded by `guard` is prohibited when
|
||||
* the flow state is `state`
|
||||
*/
|
||||
deprecated predicate isBarrierGuard(BarrierGuard guard, FlowState state) { none() }
|
||||
|
||||
/**
|
||||
* Holds if data may flow from `node1` to `node2` in addition to the normal data-flow steps.
|
||||
*/
|
||||
predicate isAdditionalFlowStep(Node node1, Node node2) { none() }
|
||||
|
||||
/**
|
||||
* Holds if data may flow from `node1` to `node2` in addition to the normal data-flow steps.
|
||||
* This step is only applicable in `state1` and updates the flow state to `state2`.
|
||||
*/
|
||||
predicate isAdditionalFlowStep(Node node1, FlowState state1, Node node2, FlowState state2) {
|
||||
none()
|
||||
}
|
||||
|
||||
/**
|
||||
* Holds if an arbitrary number of implicit read steps of content `c` may be
|
||||
* taken at `node`.
|
||||
*/
|
||||
predicate allowImplicitRead(Node node, ContentSet c) { none() }
|
||||
|
||||
/**
|
||||
* Gets the virtual dispatch branching limit when calculating field flow.
|
||||
* This can be overridden to a smaller value to improve performance (a
|
||||
* value of 0 disables field flow), or a larger value to get more results.
|
||||
*/
|
||||
int fieldFlowBranchLimit() { result = 2 }
|
||||
|
||||
/**
|
||||
* Gets a data flow configuration feature to add restrictions to the set of
|
||||
* valid flow paths.
|
||||
*
|
||||
* - `FeatureHasSourceCallContext`:
|
||||
* Assume that sources have some existing call context to disallow
|
||||
* conflicting return-flow directly following the source.
|
||||
* - `FeatureHasSinkCallContext`:
|
||||
* Assume that sinks have some existing call context to disallow
|
||||
* conflicting argument-to-parameter flow directly preceding the sink.
|
||||
* - `FeatureEqualSourceSinkCallContext`:
|
||||
* Implies both of the above and additionally ensures that the entire flow
|
||||
* path preserves the call context.
|
||||
*
|
||||
* These features are generally not relevant for typical end-to-end data flow
|
||||
* queries, but should only be used for constructing paths that need to
|
||||
* somehow be pluggable in another path context.
|
||||
*/
|
||||
FlowFeature getAFeature() { none() }
|
||||
|
||||
/** Holds if sources should be grouped in the result of `hasFlowPath`. */
|
||||
predicate sourceGrouping(Node source, string sourceGroup) { none() }
|
||||
|
||||
/** Holds if sinks should be grouped in the result of `hasFlowPath`. */
|
||||
predicate sinkGrouping(Node sink, string sinkGroup) { none() }
|
||||
|
||||
/**
|
||||
* Holds if data may flow from `source` to `sink` for this configuration.
|
||||
*/
|
||||
predicate hasFlow(Node source, Node sink) { hasFlow(source, sink, this) }
|
||||
|
||||
/**
|
||||
* Holds if data may flow from `source` to `sink` for this configuration.
|
||||
*
|
||||
* The corresponding paths are generated from the end-points and the graph
|
||||
* included in the module `PathGraph`.
|
||||
*/
|
||||
predicate hasFlowPath(PathNode source, PathNode sink) { hasFlowPath(source, sink, this) }
|
||||
|
||||
/**
|
||||
* Holds if data may flow from some source to `sink` for this configuration.
|
||||
*/
|
||||
predicate hasFlowTo(Node sink) { hasFlowTo(sink, this) }
|
||||
|
||||
/**
|
||||
* Holds if data may flow from some source to `sink` for this configuration.
|
||||
*/
|
||||
predicate hasFlowToExpr(DataFlowExpr sink) { this.hasFlowTo(exprNode(sink)) }
|
||||
|
||||
/**
|
||||
* DEPRECATED: Use `FlowExploration<explorationLimit>` instead.
|
||||
*
|
||||
* Gets the exploration limit for `hasPartialFlow` and `hasPartialFlowRev`
|
||||
* measured in approximate number of interprocedural steps.
|
||||
*/
|
||||
deprecated int explorationLimit() { none() }
|
||||
|
||||
/**
|
||||
* Holds if hidden nodes should be included in the data flow graph.
|
||||
*
|
||||
* This feature should only be used for debugging or when the data flow graph
|
||||
* is not visualized (for example in a `path-problem` query).
|
||||
*/
|
||||
predicate includeHiddenNodes() { none() }
|
||||
}
|
||||
|
||||
/**
|
||||
* This class exists to prevent mutual recursion between the user-overridden
|
||||
* member predicates of `Configuration` and the rest of the data-flow library.
|
||||
* Good performance cannot be guaranteed in the presence of such recursion, so
|
||||
* it should be replaced by using more than one copy of the data flow library.
|
||||
*/
|
||||
abstract private class ConfigurationRecursionPrevention extends Configuration {
|
||||
bindingset[this]
|
||||
ConfigurationRecursionPrevention() { any() }
|
||||
|
||||
override predicate hasFlow(Node source, Node sink) {
|
||||
strictcount(Node n | this.isSource(n)) < 0
|
||||
or
|
||||
strictcount(Node n | this.isSource(n, _)) < 0
|
||||
or
|
||||
strictcount(Node n | this.isSink(n)) < 0
|
||||
or
|
||||
strictcount(Node n | this.isSink(n, _)) < 0
|
||||
or
|
||||
strictcount(Node n1, Node n2 | this.isAdditionalFlowStep(n1, n2)) < 0
|
||||
or
|
||||
strictcount(Node n1, Node n2 | this.isAdditionalFlowStep(n1, _, n2, _)) < 0
|
||||
or
|
||||
super.hasFlow(source, sink)
|
||||
}
|
||||
}
|
||||
|
||||
/** A bridge class to access the deprecated `isBarrierGuard`. */
|
||||
private class BarrierGuardGuardedNodeBridge extends Unit {
|
||||
abstract predicate guardedNode(Node n, Configuration config);
|
||||
|
||||
abstract predicate guardedNode(Node n, FlowState state, Configuration config);
|
||||
}
|
||||
|
||||
private class BarrierGuardGuardedNode extends BarrierGuardGuardedNodeBridge {
|
||||
deprecated override predicate guardedNode(Node n, Configuration config) {
|
||||
exists(BarrierGuard g |
|
||||
config.isBarrierGuard(g) and
|
||||
n = g.getAGuardedNode()
|
||||
)
|
||||
}
|
||||
|
||||
deprecated override predicate guardedNode(Node n, FlowState state, Configuration config) {
|
||||
exists(BarrierGuard g |
|
||||
config.isBarrierGuard(g, state) and
|
||||
n = g.getAGuardedNode()
|
||||
)
|
||||
}
|
||||
}
|
||||
|
||||
private FlowState relevantState(Configuration config) {
|
||||
config.isSource(_, result) or
|
||||
config.isSink(_, result) or
|
||||
config.isBarrier(_, result) or
|
||||
config.isAdditionalFlowStep(_, result, _, _) or
|
||||
config.isAdditionalFlowStep(_, _, _, result)
|
||||
}
|
||||
|
||||
private newtype TConfigState =
|
||||
TMkConfigState(Configuration config, FlowState state) {
|
||||
state = relevantState(config) or state instanceof FlowStateEmpty
|
||||
}
|
||||
|
||||
private Configuration getConfig(TConfigState state) { state = TMkConfigState(result, _) }
|
||||
|
||||
private FlowState getState(TConfigState state) { state = TMkConfigState(_, result) }
|
||||
|
||||
private predicate singleConfiguration() { 1 = strictcount(Configuration c) }
|
||||
|
||||
private module Config implements FullStateConfigSig {
|
||||
class FlowState = TConfigState;
|
||||
|
||||
predicate isSource(Node source, FlowState state) {
|
||||
getConfig(state).isSource(source, getState(state))
|
||||
or
|
||||
getConfig(state).isSource(source) and getState(state) instanceof FlowStateEmpty
|
||||
}
|
||||
|
||||
predicate isSink(Node sink, FlowState state) {
|
||||
getConfig(state).isSink(sink, getState(state))
|
||||
or
|
||||
getConfig(state).isSink(sink) and getState(state) instanceof FlowStateEmpty
|
||||
}
|
||||
|
||||
predicate isBarrier(Node node) { none() }
|
||||
|
||||
predicate isBarrier(Node node, FlowState state) {
|
||||
getConfig(state).isBarrier(node, getState(state)) or
|
||||
getConfig(state).isBarrier(node) or
|
||||
any(BarrierGuardGuardedNodeBridge b).guardedNode(node, getState(state), getConfig(state)) or
|
||||
any(BarrierGuardGuardedNodeBridge b).guardedNode(node, getConfig(state))
|
||||
}
|
||||
|
||||
predicate isBarrierIn(Node node) { any(Configuration config).isBarrierIn(node) }
|
||||
|
||||
predicate isBarrierOut(Node node) { any(Configuration config).isBarrierOut(node) }
|
||||
|
||||
predicate isAdditionalFlowStep(Node node1, Node node2) {
|
||||
singleConfiguration() and
|
||||
any(Configuration config).isAdditionalFlowStep(node1, node2)
|
||||
}
|
||||
|
||||
predicate isAdditionalFlowStep(Node node1, FlowState state1, Node node2, FlowState state2) {
|
||||
getConfig(state1).isAdditionalFlowStep(node1, getState(state1), node2, getState(state2)) and
|
||||
getConfig(state2) = getConfig(state1)
|
||||
or
|
||||
not singleConfiguration() and
|
||||
getConfig(state1).isAdditionalFlowStep(node1, node2) and
|
||||
state2 = state1
|
||||
}
|
||||
|
||||
predicate allowImplicitRead(Node node, ContentSet c) {
|
||||
any(Configuration config).allowImplicitRead(node, c)
|
||||
}
|
||||
|
||||
int fieldFlowBranchLimit() { result = min(any(Configuration config).fieldFlowBranchLimit()) }
|
||||
|
||||
FlowFeature getAFeature() { result = any(Configuration config).getAFeature() }
|
||||
|
||||
predicate sourceGrouping(Node source, string sourceGroup) {
|
||||
any(Configuration config).sourceGrouping(source, sourceGroup)
|
||||
}
|
||||
|
||||
predicate sinkGrouping(Node sink, string sinkGroup) {
|
||||
any(Configuration config).sinkGrouping(sink, sinkGroup)
|
||||
}
|
||||
|
||||
predicate includeHiddenNodes() { any(Configuration config).includeHiddenNodes() }
|
||||
}
|
||||
|
||||
private import Impl<Config> as I
|
||||
|
||||
/**
|
||||
* A `Node` augmented with a call context (except for sinks), an access path, and a configuration.
|
||||
* Only those `PathNode`s that are reachable from a source, and which can reach a sink, are generated.
|
||||
*/
|
||||
class PathNode instanceof I::PathNode {
|
||||
/** Gets a textual representation of this element. */
|
||||
final string toString() { result = super.toString() }
|
||||
|
||||
/**
|
||||
* Gets a textual representation of this element, including a textual
|
||||
* representation of the call context.
|
||||
*/
|
||||
final string toStringWithContext() { result = super.toStringWithContext() }
|
||||
|
||||
/**
|
||||
* Holds if this element is at the specified location.
|
||||
* The location spans column `startcolumn` of line `startline` to
|
||||
* column `endcolumn` of line `endline` in file `filepath`.
|
||||
* For more information, see
|
||||
* [Locations](https://codeql.github.com/docs/writing-codeql-queries/providing-locations-in-codeql-queries/).
|
||||
*/
|
||||
final predicate hasLocationInfo(
|
||||
string filepath, int startline, int startcolumn, int endline, int endcolumn
|
||||
) {
|
||||
super.hasLocationInfo(filepath, startline, startcolumn, endline, endcolumn)
|
||||
}
|
||||
|
||||
/** Gets the underlying `Node`. */
|
||||
final Node getNode() { result = super.getNode() }
|
||||
|
||||
/** Gets the `FlowState` of this node. */
|
||||
final FlowState getState() { result = getState(super.getState()) }
|
||||
|
||||
/** Gets the associated configuration. */
|
||||
final Configuration getConfiguration() { result = getConfig(super.getState()) }
|
||||
|
||||
/** Gets a successor of this node, if any. */
|
||||
final PathNode getASuccessor() { result = super.getASuccessor() }
|
||||
|
||||
/** Holds if this node is a source. */
|
||||
final predicate isSource() { super.isSource() }
|
||||
|
||||
/** Holds if this node is a grouping of source nodes. */
|
||||
final predicate isSourceGroup(string group) { super.isSourceGroup(group) }
|
||||
|
||||
/** Holds if this node is a grouping of sink nodes. */
|
||||
final predicate isSinkGroup(string group) { super.isSinkGroup(group) }
|
||||
}
|
||||
|
||||
module PathGraph = I::PathGraph;
|
||||
|
||||
private predicate hasFlow(Node source, Node sink, Configuration config) {
|
||||
exists(PathNode source0, PathNode sink0 |
|
||||
hasFlowPath(source0, sink0, config) and
|
||||
source0.getNode() = source and
|
||||
sink0.getNode() = sink
|
||||
)
|
||||
}
|
||||
|
||||
private predicate hasFlowPath(PathNode source, PathNode sink, Configuration config) {
|
||||
I::flowPath(source, sink) and source.getConfiguration() = config
|
||||
}
|
||||
|
||||
private predicate hasFlowTo(Node sink, Configuration config) { hasFlow(_, sink, config) }
|
||||
|
||||
predicate flowsTo = hasFlow/3;
|
||||
@@ -2512,9 +2512,10 @@ module PrivateDjango {
|
||||
any(int i | i < routeHandler.getFirstPossibleRoutedParamIndex() | routeHandler.getArg(i))
|
||||
)
|
||||
or
|
||||
exists(DjangoRouteHandler routeHandler, DjangoRouteRegex regex |
|
||||
exists(DjangoRouteHandler routeHandler, DjangoRouteRegex regexUse, RegExp regex |
|
||||
regex.getAUse() = regexUse and
|
||||
routeHandler = this.getARequestHandler() and
|
||||
regex.getRouteSetup() = this
|
||||
regexUse.getRouteSetup() = this
|
||||
|
|
||||
// either using named capture groups (passed as keyword arguments) or using
|
||||
// unnamed capture groups (passed as positional arguments)
|
||||
@@ -2533,14 +2534,12 @@ module PrivateDjango {
|
||||
/**
|
||||
* A regex that is used to set up a route.
|
||||
*
|
||||
* Needs this subclass to be considered a RegexString.
|
||||
* Needs this subclass to be considered a RegExpInterpretation.
|
||||
*/
|
||||
private class DjangoRouteRegex extends RegexString instanceof StrConst {
|
||||
private class DjangoRouteRegex extends RegExpInterpretation::Range {
|
||||
DjangoRegexRouteSetup rePathCall;
|
||||
|
||||
DjangoRouteRegex() {
|
||||
rePathCall.getUrlPatternArg().getALocalSource() = DataFlow::exprNode(this)
|
||||
}
|
||||
DjangoRouteRegex() { this = rePathCall.getUrlPatternArg() }
|
||||
|
||||
DjangoRegexRouteSetup getRouteSetup() { result = rePathCall }
|
||||
}
|
||||
|
||||
@@ -3015,6 +3015,17 @@ private module StdlibPrivate {
|
||||
override string getKind() { result = Escaping::getRegexKind() }
|
||||
}
|
||||
|
||||
/**
|
||||
* A node interpreted as a regular expression.
|
||||
* Speficically nodes where string values are interpreted as regular expressions.
|
||||
*/
|
||||
private class StdLibRegExpInterpretation extends RegExpInterpretation::Range {
|
||||
StdLibRegExpInterpretation() {
|
||||
this =
|
||||
API::moduleImport("re").getMember("compile").getACall().getParameter(0, "pattern").asSink()
|
||||
}
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// urllib
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
@@ -384,12 +384,12 @@ module Tornado {
|
||||
/**
|
||||
* A regex that is used to set up a route.
|
||||
*
|
||||
* Needs this subclass to be considered a RegexString.
|
||||
* Needs this subclass to be considered a RegExpInterpretation.
|
||||
*/
|
||||
private class TornadoRouteRegex extends RegexString instanceof StrConst {
|
||||
private class TornadoRouteRegex extends RegExpInterpretation::Range {
|
||||
TornadoRouteSetup setup;
|
||||
|
||||
TornadoRouteRegex() { setup.getUrlPatternArg().getALocalSource() = DataFlow::exprNode(this) }
|
||||
TornadoRouteRegex() { this = setup.getUrlPatternArg() }
|
||||
|
||||
TornadoRouteSetup getRouteSetup() { result = setup }
|
||||
}
|
||||
@@ -423,9 +423,10 @@ module Tornado {
|
||||
not result = requestHandler.getArg(0)
|
||||
)
|
||||
or
|
||||
exists(Function requestHandler, TornadoRouteRegex regex |
|
||||
exists(Function requestHandler, TornadoRouteRegex regexUse, RegExp regex |
|
||||
regex.getAUse() = regexUse and
|
||||
requestHandler = this.getARequestHandler() and
|
||||
regex.getRouteSetup() = this
|
||||
regexUse.getRouteSetup() = this
|
||||
|
|
||||
// first group will have group number 1
|
||||
result = requestHandler.getArg(regex.getGroupNumber(_, _))
|
||||
|
||||
File diff suppressed because it is too large
Load Diff
1094
python/ql/lib/semmle/python/regexp/RegexTreeView.qll
Normal file
1094
python/ql/lib/semmle/python/regexp/RegexTreeView.qll
Normal file
File diff suppressed because it is too large
Load Diff
1063
python/ql/lib/semmle/python/regexp/internal/ParseRegExp.qll
Normal file
1063
python/ql/lib/semmle/python/regexp/internal/ParseRegExp.qll
Normal file
File diff suppressed because it is too large
Load Diff
@@ -0,0 +1,76 @@
|
||||
/**
|
||||
* Provides predicates that track strings to where they are used as regular expressions.
|
||||
* This is implemented using TypeTracking in two phases:
|
||||
*
|
||||
* 1: An exploratory backwards analysis that imprecisely tracks all nodes that may be used as regular expressions.
|
||||
* The exploratory phase ends with a forwards analysis from string constants that were reached by the backwards analysis.
|
||||
* This is similar to the exploratory phase of the JavaScript global DataFlow library.
|
||||
*
|
||||
* 2: A precise type tracking analysis that tracks constant strings to where they are used as regular expressions.
|
||||
* This phase keeps track of which strings and regular expressions end up in which places.
|
||||
*/
|
||||
|
||||
import python
|
||||
private import semmle.python.dataflow.new.DataFlow
|
||||
private import semmle.python.Concepts as Concepts
|
||||
|
||||
/** Gets a constant string value that may be used as a regular expression. */
|
||||
DataFlow::LocalSourceNode strStart() { result.asExpr() instanceof StrConst }
|
||||
|
||||
private import semmle.python.regex as Regex
|
||||
|
||||
/** A node where regular expressions that flow to the node are used. */
|
||||
class RegExpSink extends DataFlow::Node {
|
||||
RegExpSink() {
|
||||
this = any(Concepts::RegexExecution exec).getRegex()
|
||||
or
|
||||
this instanceof Concepts::RegExpInterpretation
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Gets a dataflow node that may end up being in any regular expression execution.
|
||||
* This is the backwards exploratory phase of the analysis.
|
||||
*/
|
||||
private DataFlow::TypeTrackingNode backwards(DataFlow::TypeBackTracker t) {
|
||||
t.start() and
|
||||
result = any(RegExpSink sink).getALocalSource()
|
||||
or
|
||||
exists(DataFlow::TypeBackTracker t2 | result = backwards(t2).backtrack(t2, t))
|
||||
}
|
||||
|
||||
/**
|
||||
* Gets a reference to a string that reaches any regular expression execution.
|
||||
* This is the forwards exploratory phase of the analysis.
|
||||
*/
|
||||
private DataFlow::TypeTrackingNode forwards(DataFlow::TypeTracker t) {
|
||||
t.start() and
|
||||
result = backwards(DataFlow::TypeBackTracker::end()) and
|
||||
result = strStart()
|
||||
or
|
||||
exists(DataFlow::TypeTracker t2 | result = forwards(t2).track(t2, t)) and
|
||||
result = backwards(_)
|
||||
}
|
||||
|
||||
/**
|
||||
* Gets a node that has been tracked from the string constant `start` to some node.
|
||||
* This is used to figure out where `start` is evaluated as a regular expression.
|
||||
*
|
||||
* The result of the exploratory phase is used to limit the size of the search space in this precise analysis.
|
||||
*/
|
||||
private DataFlow::TypeTrackingNode regexTracking(DataFlow::Node start, DataFlow::TypeTracker t) {
|
||||
result = forwards(t) and
|
||||
(
|
||||
t.start() and
|
||||
start = strStart() and
|
||||
result = start
|
||||
or
|
||||
exists(DataFlow::TypeTracker t2 | result = regexTracking(start, t2).track(t2, t))
|
||||
)
|
||||
}
|
||||
|
||||
/** Gets a node holding a value for the regular expression that is evaluated at `re`. */
|
||||
cached
|
||||
DataFlow::Node regExpSource(RegExpSink re) {
|
||||
regexTracking(result, DataFlow::TypeTracker::end()).flowsTo(re)
|
||||
}
|
||||
@@ -11,7 +11,7 @@ private import semmle.python.dataflow.new.TaintTracking
|
||||
private import semmle.python.Concepts
|
||||
private import semmle.python.dataflow.new.RemoteFlowSources
|
||||
private import semmle.python.dataflow.new.BarrierGuards
|
||||
private import semmle.python.RegexTreeView::RegexTreeView as TreeView
|
||||
private import semmle.python.regexp.RegexTreeView::RegexTreeView as TreeView
|
||||
private import semmle.python.ApiGraphs
|
||||
private import semmle.python.regex
|
||||
|
||||
|
||||
@@ -5,14 +5,25 @@
|
||||
|
||||
private import python
|
||||
private import semmle.python.dataflow.new.DataFlow
|
||||
private import semmle.python.RegexTreeView::RegexTreeView as TreeImpl
|
||||
private import semmle.python.regexp.RegexTreeView::RegexTreeView as TreeImpl
|
||||
private import semmle.python.dataflow.new.Regexp as Regexp
|
||||
private import codeql.regex.HostnameRegexp as Shared
|
||||
|
||||
private module Impl implements Shared::HostnameRegexpSig<TreeImpl> {
|
||||
class DataFlowNode = DataFlow::Node;
|
||||
|
||||
class RegExpPatternSource = Regexp::RegExpPatternSource;
|
||||
class RegExpPatternSource extends DataFlow::Node instanceof Regexp::RegExpPatternSource {
|
||||
/**
|
||||
* Gets a node where the pattern of this node is parsed as a part of
|
||||
* a regular expression.
|
||||
*/
|
||||
DataFlow::Node getAParse() { result = super.getAParse() }
|
||||
|
||||
/**
|
||||
* Gets the root term of the regular expression parsed from this pattern.
|
||||
*/
|
||||
TreeImpl::RegExpTerm getRegExpTerm() { result = super.getRegExpTerm() }
|
||||
}
|
||||
}
|
||||
|
||||
import Shared::Make<TreeImpl, Impl>
|
||||
|
||||
@@ -13,7 +13,7 @@
|
||||
import python
|
||||
import semmle.python.regex
|
||||
|
||||
from Regex r, int offset
|
||||
from RegExp r, int offset
|
||||
where
|
||||
r.escapingChar(offset) and
|
||||
r.getChar(offset + 1) = "b" and
|
||||
|
||||
@@ -13,7 +13,7 @@
|
||||
import python
|
||||
import semmle.python.regex
|
||||
|
||||
predicate duplicate_char_in_class(Regex r, string char) {
|
||||
predicate duplicate_char_in_class(RegExp r, string char) {
|
||||
exists(int i, int j, int x, int y, int start, int end |
|
||||
i != x and
|
||||
j != y and
|
||||
@@ -36,7 +36,7 @@ predicate duplicate_char_in_class(Regex r, string char) {
|
||||
)
|
||||
}
|
||||
|
||||
from Regex r, string char
|
||||
from RegExp r, string char
|
||||
where duplicate_char_in_class(r, char)
|
||||
select r,
|
||||
"This regular expression includes duplicate character '" + char + "' in a set of characters."
|
||||
|
||||
@@ -13,6 +13,6 @@
|
||||
import python
|
||||
import semmle.python.regex
|
||||
|
||||
from Regex r, string missing, string part
|
||||
from RegExp r, string missing, string part
|
||||
where r.getText().regexpMatch(".*\\(P<\\w+>.*") and missing = "?" and part = "named group"
|
||||
select r, "Regular expression is missing '" + missing + "' in " + part + "."
|
||||
|
||||
@@ -13,14 +13,14 @@
|
||||
import python
|
||||
import semmle.python.regex
|
||||
|
||||
predicate unmatchable_caret(Regex r, int start) {
|
||||
predicate unmatchable_caret(RegExp r, int start) {
|
||||
not r.getAMode() = "MULTILINE" and
|
||||
not r.getAMode() = "VERBOSE" and
|
||||
r.specialCharacter(start, start + 1, "^") and
|
||||
not r.firstItem(start, start + 1)
|
||||
}
|
||||
|
||||
from Regex r, int offset
|
||||
from RegExp r, int offset
|
||||
where unmatchable_caret(r, offset)
|
||||
select r,
|
||||
"This regular expression includes an unmatchable caret at offset " + offset.toString() + "."
|
||||
|
||||
@@ -13,14 +13,14 @@
|
||||
import python
|
||||
import semmle.python.regex
|
||||
|
||||
predicate unmatchable_dollar(Regex r, int start) {
|
||||
predicate unmatchable_dollar(RegExp r, int start) {
|
||||
not r.getAMode() = "MULTILINE" and
|
||||
not r.getAMode() = "VERBOSE" and
|
||||
r.specialCharacter(start, start + 1, "$") and
|
||||
not r.lastItem(start, start + 1)
|
||||
}
|
||||
|
||||
from Regex r, int offset
|
||||
from RegExp r, int offset
|
||||
where unmatchable_dollar(r, offset)
|
||||
select r,
|
||||
"This regular expression includes an unmatchable dollar at offset " + offset.toString() + "."
|
||||
|
||||
@@ -12,7 +12,7 @@
|
||||
* external/cwe/cwe-020
|
||||
*/
|
||||
|
||||
private import semmle.python.RegexTreeView::RegexTreeView as TreeView
|
||||
private import semmle.python.regexp.RegexTreeView::RegexTreeView as TreeView
|
||||
import codeql.regex.OverlyLargeRangeQuery::Make<TreeView>
|
||||
|
||||
from TreeView::RegExpCharacterRange range, string reason
|
||||
|
||||
@@ -14,7 +14,7 @@
|
||||
* external/cwe/cwe-186
|
||||
*/
|
||||
|
||||
private import semmle.python.RegexTreeView::RegexTreeView as TreeView
|
||||
private import semmle.python.regexp.RegexTreeView::RegexTreeView as TreeView
|
||||
import codeql.regex.nfa.BadTagFilterQuery::Make<TreeView>
|
||||
|
||||
from HtmlMatchingRegExp regexp, string msg
|
||||
|
||||
@@ -14,7 +14,7 @@
|
||||
* external/cwe/cwe-400
|
||||
*/
|
||||
|
||||
private import semmle.python.RegexTreeView::RegexTreeView as TreeView
|
||||
private import semmle.python.regexp.RegexTreeView::RegexTreeView as TreeView
|
||||
import codeql.regex.nfa.ExponentialBackTracking::Make<TreeView>
|
||||
|
||||
from TreeView::RegExpTerm t, string pump, State s, string prefixMsg
|
||||
|
||||
@@ -1,7 +1,7 @@
|
||||
import python
|
||||
import semmle.python.regex
|
||||
|
||||
from Regex r, int start, int end, int part_start, int part_end
|
||||
from RegExp r, int start, int end, int part_start, int part_end
|
||||
where
|
||||
r.getLocation().getFile().getBaseName() = "test.py" and
|
||||
r.alternationOption(start, end, part_start, part_end)
|
||||
|
||||
@@ -6,6 +6,6 @@
|
||||
import python
|
||||
import semmle.python.regex
|
||||
|
||||
from Regex r, int start, int end
|
||||
from RegExp r, int start, int end
|
||||
where r.character(start, end) and r.getLocation().getFile().getBaseName() = "test.py"
|
||||
select r.getText(), start, end
|
||||
|
||||
@@ -7,6 +7,6 @@ import semmle.python.regex
|
||||
|
||||
from string str, Location loc, int counter
|
||||
where
|
||||
counter = strictcount(Regex term | term.getLocation() = loc and term.getText() = str) and
|
||||
counter = strictcount(RegExp term | term.getLocation() = loc and term.getText() = str) and
|
||||
counter > 1
|
||||
select str, counter, loc
|
||||
|
||||
@@ -1,12 +1,12 @@
|
||||
import python
|
||||
import semmle.python.regex
|
||||
|
||||
predicate part(Regex r, int start, int end, string kind) {
|
||||
predicate part(RegExp r, int start, int end, string kind) {
|
||||
r.lastItem(start, end) and kind = "last"
|
||||
or
|
||||
r.firstItem(start, end) and kind = "first"
|
||||
}
|
||||
|
||||
from Regex r, int start, int end, string kind
|
||||
from RegExp r, int start, int end, string kind
|
||||
where part(r, start, end, kind) and r.getLocation().getFile().getBaseName() = "test.py"
|
||||
select r.getText(), kind, start, end
|
||||
|
||||
@@ -1,7 +1,7 @@
|
||||
import python
|
||||
import semmle.python.regex
|
||||
|
||||
from Regex r, int start, int end, int part_start, int part_end
|
||||
from RegExp r, int start, int end, int part_start, int part_end
|
||||
where
|
||||
r.getLocation().getFile().getBaseName() = "test.py" and
|
||||
r.groupContents(start, end, part_start, part_end)
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
import python
|
||||
import semmle.python.regex
|
||||
|
||||
from Regex r
|
||||
from RegExp r
|
||||
where r.getLocation().getFile().getBaseName() = "test.py"
|
||||
select r.getLocation().getStartLine(), r.getAMode()
|
||||
|
||||
@@ -1,7 +1,7 @@
|
||||
import python
|
||||
import semmle.python.regex
|
||||
|
||||
from Regex r, int start, int end, boolean maybe_empty, boolean may_repeat_forever
|
||||
from RegExp r, int start, int end, boolean maybe_empty, boolean may_repeat_forever
|
||||
where
|
||||
r.getLocation().getFile().getBaseName() = "test.py" and
|
||||
r.qualifiedItem(start, end, maybe_empty, may_repeat_forever)
|
||||
|
||||
@@ -1,7 +1,7 @@
|
||||
import python
|
||||
import semmle.python.regex
|
||||
|
||||
predicate part(Regex r, int start, int end, string kind) {
|
||||
predicate part(RegExp r, int start, int end, string kind) {
|
||||
r.alternation(start, end) and kind = "choice"
|
||||
or
|
||||
r.normalCharacter(start, end) and kind = "char"
|
||||
@@ -23,6 +23,6 @@ predicate part(Regex r, int start, int end, string kind) {
|
||||
r.qualifiedItem(start, end, _, _) and kind = "qualified"
|
||||
}
|
||||
|
||||
from Regex r, int start, int end, string kind
|
||||
from RegExp r, int start, int end, string kind
|
||||
where part(r, start, end, kind) and r.getLocation().getFile().getBaseName() = "test.py"
|
||||
select r.getText(), kind, start, end
|
||||
|
||||
@@ -10,7 +10,7 @@ class CharacterSetTest extends InlineExpectationsTest {
|
||||
override predicate hasActualResult(Location location, string element, string tag, string value) {
|
||||
exists(location.getFile().getRelativePath()) and
|
||||
location.getFile().getBaseName() = "charSetTest.py" and
|
||||
exists(Regex re, int start, int end |
|
||||
exists(RegExp re, int start, int end |
|
||||
re.charSet(start, end) and
|
||||
location = re.getLocation() and
|
||||
element = re.getText().substring(start, end) and
|
||||
@@ -28,7 +28,7 @@ class CharacterRangeTest extends InlineExpectationsTest {
|
||||
override predicate hasActualResult(Location location, string element, string tag, string value) {
|
||||
exists(location.getFile().getRelativePath()) and
|
||||
location.getFile().getBaseName() = "charRangeTest.py" and
|
||||
exists(Regex re, int start, int lower_end, int upper_start, int end |
|
||||
exists(RegExp re, int start, int lower_end, int upper_start, int end |
|
||||
re.charRange(_, start, lower_end, upper_start, end) and
|
||||
location = re.getLocation() and
|
||||
element = re.getText().substring(start, end) and
|
||||
@@ -46,7 +46,7 @@ class EscapeTest extends InlineExpectationsTest {
|
||||
override predicate hasActualResult(Location location, string element, string tag, string value) {
|
||||
exists(location.getFile().getRelativePath()) and
|
||||
location.getFile().getBaseName() = "escapedCharacterTest.py" and
|
||||
exists(Regex re, int start, int end |
|
||||
exists(RegExp re, int start, int end |
|
||||
re.escapedCharacter(start, end) and
|
||||
location = re.getLocation() and
|
||||
element = re.getText().substring(start, end) and
|
||||
@@ -64,7 +64,7 @@ class GroupTest extends InlineExpectationsTest {
|
||||
override predicate hasActualResult(Location location, string element, string tag, string value) {
|
||||
exists(location.getFile().getRelativePath()) and
|
||||
location.getFile().getBaseName() = "groupTest.py" and
|
||||
exists(Regex re, int start, int end |
|
||||
exists(RegExp re, int start, int end |
|
||||
re.group(start, end) and
|
||||
location = re.getLocation() and
|
||||
element = re.getText().substring(start, end) and
|
||||
|
||||
@@ -3,7 +3,7 @@
|
||||
*/
|
||||
|
||||
import python
|
||||
import semmle.python.RegexTreeView
|
||||
import semmle.python.regexp.RegexTreeView
|
||||
|
||||
from string str, int counter, Location loc
|
||||
where
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
import python
|
||||
private import semmle.python.RegexTreeView::RegexTreeView as TreeView
|
||||
private import semmle.python.regexp.RegexTreeView::RegexTreeView as TreeView
|
||||
import codeql.regex.nfa.SuperlinearBackTracking::Make<TreeView>
|
||||
|
||||
from PolynomialBackTrackingTerm t
|
||||
|
||||
Reference in New Issue
Block a user