Merge branch 'main' into call-graph-tests

This commit is contained in:
Rasmus Wriedt Larsen
2022-11-22 13:39:27 +01:00
committed by GitHub
1962 changed files with 141281 additions and 89363 deletions

View File

@@ -1,5 +1,5 @@
name: codeql/python-consistency-queries
groups: [python, test, consistency-queries]
dependencies:
codeql/python-all: "*"
codeql/python-all: ${workspace}
extractor: python

View File

@@ -3,4 +3,4 @@ groups:
- python
- examples
dependencies:
codeql/python-all: "*"
codeql/python-all: ${workspace}

View File

@@ -1,3 +1,13 @@
## 0.6.4
### Minor Analysis Improvements
* The ReDoS libraries in `semmle.code.python.security.regexp` have been moved to a shared pack inside the `shared/` folder, and the previous location has been deprecated.
## 0.6.3
No user-facing changes.
## 0.6.2
### Minor Analysis Improvements

View File

@@ -0,0 +1,7 @@
---
category: minorAnalysis
---
* Deleted the deprecated `importNode` predicate from the `DataFlowUtil.qll` file.
* Deleted the deprecated features from `PEP249.qll` that were not inside the `PEP249` module.
* Deleted the deprecated `werkzeug` from the `Werkzeug` module in `Werkzeug.qll`.
* Deleted the deprecated `methodResult` predicate from `PEP249::Cursor`.

View File

@@ -0,0 +1,3 @@
## 0.6.3
No user-facing changes.

View File

@@ -0,0 +1,5 @@
## 0.6.4
### Minor Analysis Improvements
* The ReDoS libraries in `semmle.code.python.security.regexp` have been moved to a shared pack inside the `shared/` folder, and the previous location has been deprecated.

View File

@@ -1,2 +1,2 @@
---
lastReleaseVersion: 0.6.2
lastReleaseVersion: 0.6.4

View File

@@ -1,7 +1,9 @@
name: codeql/python-all
version: 0.6.3-dev
version: 0.6.5-dev
groups: python
dbscheme: semmlecode.python.dbscheme
extractor: python
library: true
upgrades: upgrades
dependencies:
codeql/regex: ${workspace}

View File

@@ -311,7 +311,7 @@ module CodeExecution {
* Often, it is worthy of an alert if an SQL statement is constructed such that
* executing it would be a security risk.
*
* If it is important that the SQL statement is indeed executed, then use `SQLExecution`.
* If it is important that the SQL statement is indeed executed, then use `SqlExecution`.
*
* Extend this class to refine existing API models. If you want to model new APIs,
* extend `SqlConstruction::Range` instead.
@@ -329,7 +329,7 @@ module SqlConstruction {
* Often, it is worthy of an alert if an SQL statement is constructed such that
* executing it would be a security risk.
*
* If it is important that the SQL statement is indeed executed, then use `SQLExecution`.
* If it is important that the SQL statement is indeed executed, then use `SqlExecution`.
*
* Extend this class to model new APIs. If you want to refine existing API models,
* extend `SqlConstruction` instead.

File diff suppressed because it is too large Load Diff

View File

@@ -1,5 +1,5 @@
/**
* Module for parsing access paths from CSV models, both the identifying access path used
* Module for parsing access paths from MaD models, both the identifying access path used
* by dynamic languages, and the input/output specifications for summary steps.
*
* This file is used by the shared data flow library and by the JavaScript libraries

View File

@@ -147,6 +147,12 @@ abstract class Configuration extends string {
*/
FlowFeature getAFeature() { none() }
/** Holds if sources should be grouped in the result of `hasFlowPath`. */
predicate sourceGrouping(Node source, string sourceGroup) { none() }
/** Holds if sinks should be grouped in the result of `hasFlowPath`. */
predicate sinkGrouping(Node sink, string sinkGroup) { none() }
/**
* Holds if data may flow from `source` to `sink` for this configuration.
*/
@@ -158,7 +164,7 @@ abstract class Configuration extends string {
* The corresponding paths are generated from the end-points and the graph
* included in the module `PathGraph`.
*/
predicate hasFlowPath(PathNode source, PathNode sink) { flowsTo(source, sink, _, _, this) }
predicate hasFlowPath(PathNode source, PathNode sink) { hasFlowPath(source, sink, this) }
/**
* Holds if data may flow from some source to `sink` for this configuration.
@@ -2629,6 +2635,7 @@ private predicate evalUnfold(AccessPathApprox apa, boolean unfold, Configuration
/**
* Gets the number of `AccessPath`s that correspond to `apa`.
*/
pragma[assume_small_delta]
private int countAps(AccessPathApprox apa, Configuration config) {
evalUnfold(apa, false, config) and
result = 1 and
@@ -2647,6 +2654,7 @@ private int countAps(AccessPathApprox apa, Configuration config) {
* that it is expanded to a precise head-tail representation.
*/
language[monotonicAggregates]
pragma[assume_small_delta]
private int countPotentialAps(AccessPathApprox apa, Configuration config) {
apa instanceof AccessPathApproxNil and result = 1
or
@@ -2681,6 +2689,7 @@ private newtype TAccessPath =
}
private newtype TPathNode =
pragma[assume_small_delta]
TPathNodeMid(
NodeEx node, FlowState state, CallContext cc, SummaryCtx sc, AccessPath ap, Configuration config
) {
@@ -2709,6 +2718,18 @@ private newtype TPathNode =
state = sink.getState() and
config = sink.getConfiguration()
)
} or
TPathNodeSourceGroup(string sourceGroup, Configuration config) {
exists(PathNodeImpl source |
sourceGroup = source.getSourceGroup() and
config = source.getConfiguration()
)
} or
TPathNodeSinkGroup(string sinkGroup, Configuration config) {
exists(PathNodeSink sink |
sinkGroup = sink.getSinkGroup() and
config = sink.getConfiguration()
)
}
/**
@@ -2778,6 +2799,7 @@ private class AccessPathCons extends AccessPath, TAccessPathCons {
override AccessPathFrontHead getFront() { result = TFrontHead(head) }
pragma[assume_small_delta]
override AccessPathApproxCons getApprox() {
result = TConsNil(head, tail.(AccessPathNil).getType())
or
@@ -2786,6 +2808,7 @@ private class AccessPathCons extends AccessPath, TAccessPathCons {
result = TCons1(head, this.length())
}
pragma[assume_small_delta]
override int length() { result = 1 + tail.length() }
private string toStringImpl(boolean needsSuffix) {
@@ -2874,54 +2897,16 @@ private class AccessPathCons1 extends AccessPath, TAccessPathCons1 {
}
}
/**
* A `Node` augmented with a call context (except for sinks), an access path, and a configuration.
* Only those `PathNode`s that are reachable from a source are generated.
*/
class PathNode extends TPathNode {
/** Gets a textual representation of this element. */
string toString() { none() }
/**
* Gets a textual representation of this element, including a textual
* representation of the call context.
*/
string toStringWithContext() { none() }
/**
* Holds if this element is at the specified location.
* The location spans column `startcolumn` of line `startline` to
* column `endcolumn` of line `endline` in file `filepath`.
* For more information, see
* [Locations](https://codeql.github.com/docs/writing-codeql-queries/providing-locations-in-codeql-queries/).
*/
predicate hasLocationInfo(
string filepath, int startline, int startcolumn, int endline, int endcolumn
) {
none()
}
/** Gets the underlying `Node`. */
final Node getNode() { this.(PathNodeImpl).getNodeEx().projectToNode() = result }
abstract private class PathNodeImpl extends TPathNode {
/** Gets the `FlowState` of this node. */
FlowState getState() { none() }
abstract FlowState getState();
/** Gets the associated configuration. */
Configuration getConfiguration() { none() }
/** Gets a successor of this node, if any. */
final PathNode getASuccessor() {
result = this.(PathNodeImpl).getANonHiddenSuccessor() and
reach(this) and
reach(result)
}
abstract Configuration getConfiguration();
/** Holds if this node is a source. */
predicate isSource() { none() }
}
abstract predicate isSource();
abstract private class PathNodeImpl extends PathNode {
abstract PathNodeImpl getASuccessorImpl();
private PathNodeImpl getASuccessorIfHidden() {
@@ -2953,6 +2938,22 @@ abstract private class PathNodeImpl extends PathNode {
)
}
string getSourceGroup() {
this.isSource() and
this.getConfiguration().sourceGrouping(this.getNodeEx().asNode(), result)
}
predicate isFlowSource() {
this.isSource() and not exists(this.getSourceGroup())
or
this instanceof PathNodeSourceGroup
}
predicate isFlowSink() {
this = any(PathNodeSink sink | not exists(sink.getSinkGroup())) or
this instanceof PathNodeSinkGroup
}
private string ppAp() {
this instanceof PathNodeSink and result = ""
or
@@ -2967,13 +2968,23 @@ abstract private class PathNodeImpl extends PathNode {
result = " <" + this.(PathNodeMid).getCallContext().toString() + ">"
}
override string toString() { result = this.getNodeEx().toString() + this.ppAp() }
/** Gets a textual representation of this element. */
string toString() { result = this.getNodeEx().toString() + this.ppAp() }
override string toStringWithContext() {
result = this.getNodeEx().toString() + this.ppAp() + this.ppCtx()
}
/**
* Gets a textual representation of this element, including a textual
* representation of the call context.
*/
string toStringWithContext() { result = this.getNodeEx().toString() + this.ppAp() + this.ppCtx() }
override predicate hasLocationInfo(
/**
* Holds if this element is at the specified location.
* The location spans column `startcolumn` of line `startline` to
* column `endcolumn` of line `endline` in file `filepath`.
* For more information, see
* [Locations](https://codeql.github.com/docs/writing-codeql-queries/providing-locations-in-codeql-queries/).
*/
predicate hasLocationInfo(
string filepath, int startline, int startcolumn, int endline, int endcolumn
) {
this.getNodeEx().hasLocationInfo(filepath, startline, startcolumn, endline, endcolumn)
@@ -2982,18 +2993,71 @@ abstract private class PathNodeImpl extends PathNode {
/** Holds if `n` can reach a sink. */
private predicate directReach(PathNodeImpl n) {
n instanceof PathNodeSink or directReach(n.getANonHiddenSuccessor())
n instanceof PathNodeSink or
n instanceof PathNodeSinkGroup or
directReach(n.getANonHiddenSuccessor())
}
/** Holds if `n` can reach a sink or is used in a subpath that can reach a sink. */
private predicate reach(PathNode n) { directReach(n) or Subpaths::retReach(n) }
private predicate reach(PathNodeImpl n) { directReach(n) or Subpaths::retReach(n) }
/** Holds if `n1.getASuccessor() = n2` and `n2` can reach a sink. */
private predicate pathSucc(PathNodeImpl n1, PathNode n2) {
private predicate pathSucc(PathNodeImpl n1, PathNodeImpl n2) {
n1.getANonHiddenSuccessor() = n2 and directReach(n2)
}
private predicate pathSuccPlus(PathNode n1, PathNode n2) = fastTC(pathSucc/2)(n1, n2)
private predicate pathSuccPlus(PathNodeImpl n1, PathNodeImpl n2) = fastTC(pathSucc/2)(n1, n2)
/**
* A `Node` augmented with a call context (except for sinks), an access path, and a configuration.
* Only those `PathNode`s that are reachable from a source, and which can reach a sink, are generated.
*/
class PathNode instanceof PathNodeImpl {
PathNode() { reach(this) }
/** Gets a textual representation of this element. */
final string toString() { result = super.toString() }
/**
* Gets a textual representation of this element, including a textual
* representation of the call context.
*/
final string toStringWithContext() { result = super.toStringWithContext() }
/**
* Holds if this element is at the specified location.
* The location spans column `startcolumn` of line `startline` to
* column `endcolumn` of line `endline` in file `filepath`.
* For more information, see
* [Locations](https://codeql.github.com/docs/writing-codeql-queries/providing-locations-in-codeql-queries/).
*/
final predicate hasLocationInfo(
string filepath, int startline, int startcolumn, int endline, int endcolumn
) {
super.hasLocationInfo(filepath, startline, startcolumn, endline, endcolumn)
}
/** Gets the underlying `Node`. */
final Node getNode() { super.getNodeEx().projectToNode() = result }
/** Gets the `FlowState` of this node. */
final FlowState getState() { result = super.getState() }
/** Gets the associated configuration. */
final Configuration getConfiguration() { result = super.getConfiguration() }
/** Gets a successor of this node, if any. */
final PathNode getASuccessor() { result = super.getANonHiddenSuccessor() }
/** Holds if this node is a source. */
final predicate isSource() { super.isSource() }
/** Holds if this node is a grouping of source nodes. */
final predicate isSourceGroup(string group) { this = TPathNodeSourceGroup(group, _) }
/** Holds if this node is a grouping of sink nodes. */
final predicate isSinkGroup(string group) { this = TPathNodeSinkGroup(group, _) }
}
/**
* Provides the query predicates needed to include a graph in a path-problem query.
@@ -3004,7 +3068,7 @@ module PathGraph {
/** Holds if `n` is a node in the graph of data flow path explanations. */
query predicate nodes(PathNode n, string key, string val) {
reach(n) and key = "semmle.label" and val = n.toString()
key = "semmle.label" and val = n.toString()
}
/**
@@ -3013,11 +3077,7 @@ module PathGraph {
* `ret -> out` is summarized as the edge `arg -> out`.
*/
query predicate subpaths(PathNode arg, PathNode par, PathNode ret, PathNode out) {
Subpaths::subpaths(arg, par, ret, out) and
reach(arg) and
reach(par) and
reach(ret) and
reach(out)
Subpaths::subpaths(arg, par, ret, out)
}
}
@@ -3118,9 +3178,66 @@ private class PathNodeSink extends PathNodeImpl, TPathNodeSink {
override Configuration getConfiguration() { result = config }
override PathNodeImpl getASuccessorImpl() { none() }
override PathNodeImpl getASuccessorImpl() {
result = TPathNodeSinkGroup(this.getSinkGroup(), config)
}
override predicate isSource() { sourceNode(node, state, config) }
string getSinkGroup() { config.sinkGrouping(node.asNode(), result) }
}
private class PathNodeSourceGroup extends PathNodeImpl, TPathNodeSourceGroup {
string sourceGroup;
Configuration config;
PathNodeSourceGroup() { this = TPathNodeSourceGroup(sourceGroup, config) }
override NodeEx getNodeEx() { none() }
override FlowState getState() { none() }
override Configuration getConfiguration() { result = config }
override PathNodeImpl getASuccessorImpl() {
result.getSourceGroup() = sourceGroup and
result.getConfiguration() = config
}
override predicate isSource() { none() }
override string toString() { result = sourceGroup }
override predicate hasLocationInfo(
string filepath, int startline, int startcolumn, int endline, int endcolumn
) {
filepath = "" and startline = 0 and startcolumn = 0 and endline = 0 and endcolumn = 0
}
}
private class PathNodeSinkGroup extends PathNodeImpl, TPathNodeSinkGroup {
string sinkGroup;
Configuration config;
PathNodeSinkGroup() { this = TPathNodeSinkGroup(sinkGroup, config) }
override NodeEx getNodeEx() { none() }
override FlowState getState() { none() }
override Configuration getConfiguration() { result = config }
override PathNodeImpl getASuccessorImpl() { none() }
override predicate isSource() { none() }
override string toString() { result = sinkGroup }
override predicate hasLocationInfo(
string filepath, int startline, int startcolumn, int endline, int endcolumn
) {
filepath = "" and startline = 0 and startcolumn = 0 and endline = 0 and endcolumn = 0
}
}
private predicate pathNode(
@@ -3142,6 +3259,7 @@ private predicate pathNode(
* Holds if data may flow from `mid` to `node`. The last step in or out of
* a callable is recorded by `cc`.
*/
pragma[assume_small_delta]
pragma[nomagic]
private predicate pathStep(
PathNodeMid mid, NodeEx node, FlowState state, CallContext cc, SummaryCtx sc, AccessPath ap
@@ -3399,7 +3517,7 @@ private module Subpaths {
*/
pragma[nomagic]
private predicate subpaths02(
PathNode arg, ParamNodeEx par, SummaryCtxSome sc, CallContext innercc, ReturnKindExt kind,
PathNodeImpl arg, ParamNodeEx par, SummaryCtxSome sc, CallContext innercc, ReturnKindExt kind,
NodeEx out, FlowState sout, AccessPath apout
) {
subpaths01(arg, par, sc, innercc, kind, out, sout, apout) and
@@ -3407,14 +3525,14 @@ private module Subpaths {
}
pragma[nomagic]
private Configuration getPathNodeConf(PathNode n) { result = n.getConfiguration() }
private Configuration getPathNodeConf(PathNodeImpl n) { result = n.getConfiguration() }
/**
* Holds if `(arg, par, ret, out)` forms a subpath-tuple.
*/
pragma[nomagic]
private predicate subpaths03(
PathNode arg, ParamNodeEx par, PathNodeMid ret, NodeEx out, FlowState sout, AccessPath apout
PathNodeImpl arg, ParamNodeEx par, PathNodeMid ret, NodeEx out, FlowState sout, AccessPath apout
) {
exists(SummaryCtxSome sc, CallContext innercc, ReturnKindExt kind, RetNodeEx retnode |
subpaths02(arg, par, sc, innercc, kind, out, sout, apout) and
@@ -3444,7 +3562,7 @@ private module Subpaths {
* a subpath between `par` and `ret` with the connecting edges `arg -> par` and
* `ret -> out` is summarized as the edge `arg -> out`.
*/
predicate subpaths(PathNodeImpl arg, PathNodeImpl par, PathNodeImpl ret, PathNode out) {
predicate subpaths(PathNodeImpl arg, PathNodeImpl par, PathNodeImpl ret, PathNodeImpl out) {
exists(ParamNodeEx p, NodeEx o, FlowState sout, AccessPath apout, PathNodeMid out0 |
pragma[only_bind_into](arg).getANonHiddenSuccessor() = pragma[only_bind_into](out0) and
subpaths03(pragma[only_bind_into](arg), p, localStepToHidden*(ret), o, sout, apout) and
@@ -3460,7 +3578,7 @@ private module Subpaths {
* Holds if `n` can reach a return node in a summarized subpath that can reach a sink.
*/
predicate retReach(PathNodeImpl n) {
exists(PathNode out | subpaths(_, _, n, out) | directReach(out) or retReach(out))
exists(PathNodeImpl out | subpaths(_, _, n, out) | directReach(out) or retReach(out))
or
exists(PathNodeImpl mid |
retReach(mid) and
@@ -3476,12 +3594,22 @@ private module Subpaths {
* Will only have results if `configuration` has non-empty sources and
* sinks.
*/
private predicate hasFlowPath(
PathNodeImpl flowsource, PathNodeImpl flowsink, Configuration configuration
) {
flowsource.isFlowSource() and
flowsource.getConfiguration() = configuration and
(flowsource = flowsink or pathSuccPlus(flowsource, flowsink)) and
flowsink.isFlowSink()
}
private predicate flowsTo(
PathNode flowsource, PathNodeSink flowsink, Node source, Node sink, Configuration configuration
PathNodeImpl flowsource, PathNodeSink flowsink, Node source, Node sink,
Configuration configuration
) {
flowsource.isSource() and
flowsource.getConfiguration() = configuration and
flowsource.(PathNodeImpl).getNodeEx().asNode() = source and
flowsource.getNodeEx().asNode() = source and
(flowsource = flowsink or pathSuccPlus(flowsource, flowsink)) and
flowsink.getNodeEx().asNode() = sink
}
@@ -3504,14 +3632,14 @@ private predicate finalStats(
fields = count(TypedContent f0 | exists(PathNodeMid pn | pn.getAp().getHead() = f0)) and
conscand = count(AccessPath ap | exists(PathNodeMid pn | pn.getAp() = ap)) and
states = count(FlowState state | exists(PathNodeMid pn | pn.getState() = state)) and
tuples = count(PathNode pn)
tuples = count(PathNodeImpl pn)
or
fwd = false and
nodes = count(NodeEx n0 | exists(PathNodeImpl pn | pn.getNodeEx() = n0 and reach(pn))) and
fields = count(TypedContent f0 | exists(PathNodeMid pn | pn.getAp().getHead() = f0 and reach(pn))) and
conscand = count(AccessPath ap | exists(PathNodeMid pn | pn.getAp() = ap and reach(pn))) and
states = count(FlowState state | exists(PathNodeMid pn | pn.getState() = state and reach(pn))) and
tuples = count(PathNode pn | reach(pn))
tuples = count(PathNode pn)
}
/**

View File

@@ -147,6 +147,12 @@ abstract class Configuration extends string {
*/
FlowFeature getAFeature() { none() }
/** Holds if sources should be grouped in the result of `hasFlowPath`. */
predicate sourceGrouping(Node source, string sourceGroup) { none() }
/** Holds if sinks should be grouped in the result of `hasFlowPath`. */
predicate sinkGrouping(Node sink, string sinkGroup) { none() }
/**
* Holds if data may flow from `source` to `sink` for this configuration.
*/
@@ -158,7 +164,7 @@ abstract class Configuration extends string {
* The corresponding paths are generated from the end-points and the graph
* included in the module `PathGraph`.
*/
predicate hasFlowPath(PathNode source, PathNode sink) { flowsTo(source, sink, _, _, this) }
predicate hasFlowPath(PathNode source, PathNode sink) { hasFlowPath(source, sink, this) }
/**
* Holds if data may flow from some source to `sink` for this configuration.
@@ -2629,6 +2635,7 @@ private predicate evalUnfold(AccessPathApprox apa, boolean unfold, Configuration
/**
* Gets the number of `AccessPath`s that correspond to `apa`.
*/
pragma[assume_small_delta]
private int countAps(AccessPathApprox apa, Configuration config) {
evalUnfold(apa, false, config) and
result = 1 and
@@ -2647,6 +2654,7 @@ private int countAps(AccessPathApprox apa, Configuration config) {
* that it is expanded to a precise head-tail representation.
*/
language[monotonicAggregates]
pragma[assume_small_delta]
private int countPotentialAps(AccessPathApprox apa, Configuration config) {
apa instanceof AccessPathApproxNil and result = 1
or
@@ -2681,6 +2689,7 @@ private newtype TAccessPath =
}
private newtype TPathNode =
pragma[assume_small_delta]
TPathNodeMid(
NodeEx node, FlowState state, CallContext cc, SummaryCtx sc, AccessPath ap, Configuration config
) {
@@ -2709,6 +2718,18 @@ private newtype TPathNode =
state = sink.getState() and
config = sink.getConfiguration()
)
} or
TPathNodeSourceGroup(string sourceGroup, Configuration config) {
exists(PathNodeImpl source |
sourceGroup = source.getSourceGroup() and
config = source.getConfiguration()
)
} or
TPathNodeSinkGroup(string sinkGroup, Configuration config) {
exists(PathNodeSink sink |
sinkGroup = sink.getSinkGroup() and
config = sink.getConfiguration()
)
}
/**
@@ -2778,6 +2799,7 @@ private class AccessPathCons extends AccessPath, TAccessPathCons {
override AccessPathFrontHead getFront() { result = TFrontHead(head) }
pragma[assume_small_delta]
override AccessPathApproxCons getApprox() {
result = TConsNil(head, tail.(AccessPathNil).getType())
or
@@ -2786,6 +2808,7 @@ private class AccessPathCons extends AccessPath, TAccessPathCons {
result = TCons1(head, this.length())
}
pragma[assume_small_delta]
override int length() { result = 1 + tail.length() }
private string toStringImpl(boolean needsSuffix) {
@@ -2874,54 +2897,16 @@ private class AccessPathCons1 extends AccessPath, TAccessPathCons1 {
}
}
/**
* A `Node` augmented with a call context (except for sinks), an access path, and a configuration.
* Only those `PathNode`s that are reachable from a source are generated.
*/
class PathNode extends TPathNode {
/** Gets a textual representation of this element. */
string toString() { none() }
/**
* Gets a textual representation of this element, including a textual
* representation of the call context.
*/
string toStringWithContext() { none() }
/**
* Holds if this element is at the specified location.
* The location spans column `startcolumn` of line `startline` to
* column `endcolumn` of line `endline` in file `filepath`.
* For more information, see
* [Locations](https://codeql.github.com/docs/writing-codeql-queries/providing-locations-in-codeql-queries/).
*/
predicate hasLocationInfo(
string filepath, int startline, int startcolumn, int endline, int endcolumn
) {
none()
}
/** Gets the underlying `Node`. */
final Node getNode() { this.(PathNodeImpl).getNodeEx().projectToNode() = result }
abstract private class PathNodeImpl extends TPathNode {
/** Gets the `FlowState` of this node. */
FlowState getState() { none() }
abstract FlowState getState();
/** Gets the associated configuration. */
Configuration getConfiguration() { none() }
/** Gets a successor of this node, if any. */
final PathNode getASuccessor() {
result = this.(PathNodeImpl).getANonHiddenSuccessor() and
reach(this) and
reach(result)
}
abstract Configuration getConfiguration();
/** Holds if this node is a source. */
predicate isSource() { none() }
}
abstract predicate isSource();
abstract private class PathNodeImpl extends PathNode {
abstract PathNodeImpl getASuccessorImpl();
private PathNodeImpl getASuccessorIfHidden() {
@@ -2953,6 +2938,22 @@ abstract private class PathNodeImpl extends PathNode {
)
}
string getSourceGroup() {
this.isSource() and
this.getConfiguration().sourceGrouping(this.getNodeEx().asNode(), result)
}
predicate isFlowSource() {
this.isSource() and not exists(this.getSourceGroup())
or
this instanceof PathNodeSourceGroup
}
predicate isFlowSink() {
this = any(PathNodeSink sink | not exists(sink.getSinkGroup())) or
this instanceof PathNodeSinkGroup
}
private string ppAp() {
this instanceof PathNodeSink and result = ""
or
@@ -2967,13 +2968,23 @@ abstract private class PathNodeImpl extends PathNode {
result = " <" + this.(PathNodeMid).getCallContext().toString() + ">"
}
override string toString() { result = this.getNodeEx().toString() + this.ppAp() }
/** Gets a textual representation of this element. */
string toString() { result = this.getNodeEx().toString() + this.ppAp() }
override string toStringWithContext() {
result = this.getNodeEx().toString() + this.ppAp() + this.ppCtx()
}
/**
* Gets a textual representation of this element, including a textual
* representation of the call context.
*/
string toStringWithContext() { result = this.getNodeEx().toString() + this.ppAp() + this.ppCtx() }
override predicate hasLocationInfo(
/**
* Holds if this element is at the specified location.
* The location spans column `startcolumn` of line `startline` to
* column `endcolumn` of line `endline` in file `filepath`.
* For more information, see
* [Locations](https://codeql.github.com/docs/writing-codeql-queries/providing-locations-in-codeql-queries/).
*/
predicate hasLocationInfo(
string filepath, int startline, int startcolumn, int endline, int endcolumn
) {
this.getNodeEx().hasLocationInfo(filepath, startline, startcolumn, endline, endcolumn)
@@ -2982,18 +2993,71 @@ abstract private class PathNodeImpl extends PathNode {
/** Holds if `n` can reach a sink. */
private predicate directReach(PathNodeImpl n) {
n instanceof PathNodeSink or directReach(n.getANonHiddenSuccessor())
n instanceof PathNodeSink or
n instanceof PathNodeSinkGroup or
directReach(n.getANonHiddenSuccessor())
}
/** Holds if `n` can reach a sink or is used in a subpath that can reach a sink. */
private predicate reach(PathNode n) { directReach(n) or Subpaths::retReach(n) }
private predicate reach(PathNodeImpl n) { directReach(n) or Subpaths::retReach(n) }
/** Holds if `n1.getASuccessor() = n2` and `n2` can reach a sink. */
private predicate pathSucc(PathNodeImpl n1, PathNode n2) {
private predicate pathSucc(PathNodeImpl n1, PathNodeImpl n2) {
n1.getANonHiddenSuccessor() = n2 and directReach(n2)
}
private predicate pathSuccPlus(PathNode n1, PathNode n2) = fastTC(pathSucc/2)(n1, n2)
private predicate pathSuccPlus(PathNodeImpl n1, PathNodeImpl n2) = fastTC(pathSucc/2)(n1, n2)
/**
* A `Node` augmented with a call context (except for sinks), an access path, and a configuration.
* Only those `PathNode`s that are reachable from a source, and which can reach a sink, are generated.
*/
class PathNode instanceof PathNodeImpl {
PathNode() { reach(this) }
/** Gets a textual representation of this element. */
final string toString() { result = super.toString() }
/**
* Gets a textual representation of this element, including a textual
* representation of the call context.
*/
final string toStringWithContext() { result = super.toStringWithContext() }
/**
* Holds if this element is at the specified location.
* The location spans column `startcolumn` of line `startline` to
* column `endcolumn` of line `endline` in file `filepath`.
* For more information, see
* [Locations](https://codeql.github.com/docs/writing-codeql-queries/providing-locations-in-codeql-queries/).
*/
final predicate hasLocationInfo(
string filepath, int startline, int startcolumn, int endline, int endcolumn
) {
super.hasLocationInfo(filepath, startline, startcolumn, endline, endcolumn)
}
/** Gets the underlying `Node`. */
final Node getNode() { super.getNodeEx().projectToNode() = result }
/** Gets the `FlowState` of this node. */
final FlowState getState() { result = super.getState() }
/** Gets the associated configuration. */
final Configuration getConfiguration() { result = super.getConfiguration() }
/** Gets a successor of this node, if any. */
final PathNode getASuccessor() { result = super.getANonHiddenSuccessor() }
/** Holds if this node is a source. */
final predicate isSource() { super.isSource() }
/** Holds if this node is a grouping of source nodes. */
final predicate isSourceGroup(string group) { this = TPathNodeSourceGroup(group, _) }
/** Holds if this node is a grouping of sink nodes. */
final predicate isSinkGroup(string group) { this = TPathNodeSinkGroup(group, _) }
}
/**
* Provides the query predicates needed to include a graph in a path-problem query.
@@ -3004,7 +3068,7 @@ module PathGraph {
/** Holds if `n` is a node in the graph of data flow path explanations. */
query predicate nodes(PathNode n, string key, string val) {
reach(n) and key = "semmle.label" and val = n.toString()
key = "semmle.label" and val = n.toString()
}
/**
@@ -3013,11 +3077,7 @@ module PathGraph {
* `ret -> out` is summarized as the edge `arg -> out`.
*/
query predicate subpaths(PathNode arg, PathNode par, PathNode ret, PathNode out) {
Subpaths::subpaths(arg, par, ret, out) and
reach(arg) and
reach(par) and
reach(ret) and
reach(out)
Subpaths::subpaths(arg, par, ret, out)
}
}
@@ -3118,9 +3178,66 @@ private class PathNodeSink extends PathNodeImpl, TPathNodeSink {
override Configuration getConfiguration() { result = config }
override PathNodeImpl getASuccessorImpl() { none() }
override PathNodeImpl getASuccessorImpl() {
result = TPathNodeSinkGroup(this.getSinkGroup(), config)
}
override predicate isSource() { sourceNode(node, state, config) }
string getSinkGroup() { config.sinkGrouping(node.asNode(), result) }
}
private class PathNodeSourceGroup extends PathNodeImpl, TPathNodeSourceGroup {
string sourceGroup;
Configuration config;
PathNodeSourceGroup() { this = TPathNodeSourceGroup(sourceGroup, config) }
override NodeEx getNodeEx() { none() }
override FlowState getState() { none() }
override Configuration getConfiguration() { result = config }
override PathNodeImpl getASuccessorImpl() {
result.getSourceGroup() = sourceGroup and
result.getConfiguration() = config
}
override predicate isSource() { none() }
override string toString() { result = sourceGroup }
override predicate hasLocationInfo(
string filepath, int startline, int startcolumn, int endline, int endcolumn
) {
filepath = "" and startline = 0 and startcolumn = 0 and endline = 0 and endcolumn = 0
}
}
private class PathNodeSinkGroup extends PathNodeImpl, TPathNodeSinkGroup {
string sinkGroup;
Configuration config;
PathNodeSinkGroup() { this = TPathNodeSinkGroup(sinkGroup, config) }
override NodeEx getNodeEx() { none() }
override FlowState getState() { none() }
override Configuration getConfiguration() { result = config }
override PathNodeImpl getASuccessorImpl() { none() }
override predicate isSource() { none() }
override string toString() { result = sinkGroup }
override predicate hasLocationInfo(
string filepath, int startline, int startcolumn, int endline, int endcolumn
) {
filepath = "" and startline = 0 and startcolumn = 0 and endline = 0 and endcolumn = 0
}
}
private predicate pathNode(
@@ -3142,6 +3259,7 @@ private predicate pathNode(
* Holds if data may flow from `mid` to `node`. The last step in or out of
* a callable is recorded by `cc`.
*/
pragma[assume_small_delta]
pragma[nomagic]
private predicate pathStep(
PathNodeMid mid, NodeEx node, FlowState state, CallContext cc, SummaryCtx sc, AccessPath ap
@@ -3399,7 +3517,7 @@ private module Subpaths {
*/
pragma[nomagic]
private predicate subpaths02(
PathNode arg, ParamNodeEx par, SummaryCtxSome sc, CallContext innercc, ReturnKindExt kind,
PathNodeImpl arg, ParamNodeEx par, SummaryCtxSome sc, CallContext innercc, ReturnKindExt kind,
NodeEx out, FlowState sout, AccessPath apout
) {
subpaths01(arg, par, sc, innercc, kind, out, sout, apout) and
@@ -3407,14 +3525,14 @@ private module Subpaths {
}
pragma[nomagic]
private Configuration getPathNodeConf(PathNode n) { result = n.getConfiguration() }
private Configuration getPathNodeConf(PathNodeImpl n) { result = n.getConfiguration() }
/**
* Holds if `(arg, par, ret, out)` forms a subpath-tuple.
*/
pragma[nomagic]
private predicate subpaths03(
PathNode arg, ParamNodeEx par, PathNodeMid ret, NodeEx out, FlowState sout, AccessPath apout
PathNodeImpl arg, ParamNodeEx par, PathNodeMid ret, NodeEx out, FlowState sout, AccessPath apout
) {
exists(SummaryCtxSome sc, CallContext innercc, ReturnKindExt kind, RetNodeEx retnode |
subpaths02(arg, par, sc, innercc, kind, out, sout, apout) and
@@ -3444,7 +3562,7 @@ private module Subpaths {
* a subpath between `par` and `ret` with the connecting edges `arg -> par` and
* `ret -> out` is summarized as the edge `arg -> out`.
*/
predicate subpaths(PathNodeImpl arg, PathNodeImpl par, PathNodeImpl ret, PathNode out) {
predicate subpaths(PathNodeImpl arg, PathNodeImpl par, PathNodeImpl ret, PathNodeImpl out) {
exists(ParamNodeEx p, NodeEx o, FlowState sout, AccessPath apout, PathNodeMid out0 |
pragma[only_bind_into](arg).getANonHiddenSuccessor() = pragma[only_bind_into](out0) and
subpaths03(pragma[only_bind_into](arg), p, localStepToHidden*(ret), o, sout, apout) and
@@ -3460,7 +3578,7 @@ private module Subpaths {
* Holds if `n` can reach a return node in a summarized subpath that can reach a sink.
*/
predicate retReach(PathNodeImpl n) {
exists(PathNode out | subpaths(_, _, n, out) | directReach(out) or retReach(out))
exists(PathNodeImpl out | subpaths(_, _, n, out) | directReach(out) or retReach(out))
or
exists(PathNodeImpl mid |
retReach(mid) and
@@ -3476,12 +3594,22 @@ private module Subpaths {
* Will only have results if `configuration` has non-empty sources and
* sinks.
*/
private predicate hasFlowPath(
PathNodeImpl flowsource, PathNodeImpl flowsink, Configuration configuration
) {
flowsource.isFlowSource() and
flowsource.getConfiguration() = configuration and
(flowsource = flowsink or pathSuccPlus(flowsource, flowsink)) and
flowsink.isFlowSink()
}
private predicate flowsTo(
PathNode flowsource, PathNodeSink flowsink, Node source, Node sink, Configuration configuration
PathNodeImpl flowsource, PathNodeSink flowsink, Node source, Node sink,
Configuration configuration
) {
flowsource.isSource() and
flowsource.getConfiguration() = configuration and
flowsource.(PathNodeImpl).getNodeEx().asNode() = source and
flowsource.getNodeEx().asNode() = source and
(flowsource = flowsink or pathSuccPlus(flowsource, flowsink)) and
flowsink.getNodeEx().asNode() = sink
}
@@ -3504,14 +3632,14 @@ private predicate finalStats(
fields = count(TypedContent f0 | exists(PathNodeMid pn | pn.getAp().getHead() = f0)) and
conscand = count(AccessPath ap | exists(PathNodeMid pn | pn.getAp() = ap)) and
states = count(FlowState state | exists(PathNodeMid pn | pn.getState() = state)) and
tuples = count(PathNode pn)
tuples = count(PathNodeImpl pn)
or
fwd = false and
nodes = count(NodeEx n0 | exists(PathNodeImpl pn | pn.getNodeEx() = n0 and reach(pn))) and
fields = count(TypedContent f0 | exists(PathNodeMid pn | pn.getAp().getHead() = f0 and reach(pn))) and
conscand = count(AccessPath ap | exists(PathNodeMid pn | pn.getAp() = ap and reach(pn))) and
states = count(FlowState state | exists(PathNodeMid pn | pn.getState() = state and reach(pn))) and
tuples = count(PathNode pn | reach(pn))
tuples = count(PathNode pn)
}
/**

View File

@@ -147,6 +147,12 @@ abstract class Configuration extends string {
*/
FlowFeature getAFeature() { none() }
/** Holds if sources should be grouped in the result of `hasFlowPath`. */
predicate sourceGrouping(Node source, string sourceGroup) { none() }
/** Holds if sinks should be grouped in the result of `hasFlowPath`. */
predicate sinkGrouping(Node sink, string sinkGroup) { none() }
/**
* Holds if data may flow from `source` to `sink` for this configuration.
*/
@@ -158,7 +164,7 @@ abstract class Configuration extends string {
* The corresponding paths are generated from the end-points and the graph
* included in the module `PathGraph`.
*/
predicate hasFlowPath(PathNode source, PathNode sink) { flowsTo(source, sink, _, _, this) }
predicate hasFlowPath(PathNode source, PathNode sink) { hasFlowPath(source, sink, this) }
/**
* Holds if data may flow from some source to `sink` for this configuration.
@@ -2629,6 +2635,7 @@ private predicate evalUnfold(AccessPathApprox apa, boolean unfold, Configuration
/**
* Gets the number of `AccessPath`s that correspond to `apa`.
*/
pragma[assume_small_delta]
private int countAps(AccessPathApprox apa, Configuration config) {
evalUnfold(apa, false, config) and
result = 1 and
@@ -2647,6 +2654,7 @@ private int countAps(AccessPathApprox apa, Configuration config) {
* that it is expanded to a precise head-tail representation.
*/
language[monotonicAggregates]
pragma[assume_small_delta]
private int countPotentialAps(AccessPathApprox apa, Configuration config) {
apa instanceof AccessPathApproxNil and result = 1
or
@@ -2681,6 +2689,7 @@ private newtype TAccessPath =
}
private newtype TPathNode =
pragma[assume_small_delta]
TPathNodeMid(
NodeEx node, FlowState state, CallContext cc, SummaryCtx sc, AccessPath ap, Configuration config
) {
@@ -2709,6 +2718,18 @@ private newtype TPathNode =
state = sink.getState() and
config = sink.getConfiguration()
)
} or
TPathNodeSourceGroup(string sourceGroup, Configuration config) {
exists(PathNodeImpl source |
sourceGroup = source.getSourceGroup() and
config = source.getConfiguration()
)
} or
TPathNodeSinkGroup(string sinkGroup, Configuration config) {
exists(PathNodeSink sink |
sinkGroup = sink.getSinkGroup() and
config = sink.getConfiguration()
)
}
/**
@@ -2778,6 +2799,7 @@ private class AccessPathCons extends AccessPath, TAccessPathCons {
override AccessPathFrontHead getFront() { result = TFrontHead(head) }
pragma[assume_small_delta]
override AccessPathApproxCons getApprox() {
result = TConsNil(head, tail.(AccessPathNil).getType())
or
@@ -2786,6 +2808,7 @@ private class AccessPathCons extends AccessPath, TAccessPathCons {
result = TCons1(head, this.length())
}
pragma[assume_small_delta]
override int length() { result = 1 + tail.length() }
private string toStringImpl(boolean needsSuffix) {
@@ -2874,54 +2897,16 @@ private class AccessPathCons1 extends AccessPath, TAccessPathCons1 {
}
}
/**
* A `Node` augmented with a call context (except for sinks), an access path, and a configuration.
* Only those `PathNode`s that are reachable from a source are generated.
*/
class PathNode extends TPathNode {
/** Gets a textual representation of this element. */
string toString() { none() }
/**
* Gets a textual representation of this element, including a textual
* representation of the call context.
*/
string toStringWithContext() { none() }
/**
* Holds if this element is at the specified location.
* The location spans column `startcolumn` of line `startline` to
* column `endcolumn` of line `endline` in file `filepath`.
* For more information, see
* [Locations](https://codeql.github.com/docs/writing-codeql-queries/providing-locations-in-codeql-queries/).
*/
predicate hasLocationInfo(
string filepath, int startline, int startcolumn, int endline, int endcolumn
) {
none()
}
/** Gets the underlying `Node`. */
final Node getNode() { this.(PathNodeImpl).getNodeEx().projectToNode() = result }
abstract private class PathNodeImpl extends TPathNode {
/** Gets the `FlowState` of this node. */
FlowState getState() { none() }
abstract FlowState getState();
/** Gets the associated configuration. */
Configuration getConfiguration() { none() }
/** Gets a successor of this node, if any. */
final PathNode getASuccessor() {
result = this.(PathNodeImpl).getANonHiddenSuccessor() and
reach(this) and
reach(result)
}
abstract Configuration getConfiguration();
/** Holds if this node is a source. */
predicate isSource() { none() }
}
abstract predicate isSource();
abstract private class PathNodeImpl extends PathNode {
abstract PathNodeImpl getASuccessorImpl();
private PathNodeImpl getASuccessorIfHidden() {
@@ -2953,6 +2938,22 @@ abstract private class PathNodeImpl extends PathNode {
)
}
string getSourceGroup() {
this.isSource() and
this.getConfiguration().sourceGrouping(this.getNodeEx().asNode(), result)
}
predicate isFlowSource() {
this.isSource() and not exists(this.getSourceGroup())
or
this instanceof PathNodeSourceGroup
}
predicate isFlowSink() {
this = any(PathNodeSink sink | not exists(sink.getSinkGroup())) or
this instanceof PathNodeSinkGroup
}
private string ppAp() {
this instanceof PathNodeSink and result = ""
or
@@ -2967,13 +2968,23 @@ abstract private class PathNodeImpl extends PathNode {
result = " <" + this.(PathNodeMid).getCallContext().toString() + ">"
}
override string toString() { result = this.getNodeEx().toString() + this.ppAp() }
/** Gets a textual representation of this element. */
string toString() { result = this.getNodeEx().toString() + this.ppAp() }
override string toStringWithContext() {
result = this.getNodeEx().toString() + this.ppAp() + this.ppCtx()
}
/**
* Gets a textual representation of this element, including a textual
* representation of the call context.
*/
string toStringWithContext() { result = this.getNodeEx().toString() + this.ppAp() + this.ppCtx() }
override predicate hasLocationInfo(
/**
* Holds if this element is at the specified location.
* The location spans column `startcolumn` of line `startline` to
* column `endcolumn` of line `endline` in file `filepath`.
* For more information, see
* [Locations](https://codeql.github.com/docs/writing-codeql-queries/providing-locations-in-codeql-queries/).
*/
predicate hasLocationInfo(
string filepath, int startline, int startcolumn, int endline, int endcolumn
) {
this.getNodeEx().hasLocationInfo(filepath, startline, startcolumn, endline, endcolumn)
@@ -2982,18 +2993,71 @@ abstract private class PathNodeImpl extends PathNode {
/** Holds if `n` can reach a sink. */
private predicate directReach(PathNodeImpl n) {
n instanceof PathNodeSink or directReach(n.getANonHiddenSuccessor())
n instanceof PathNodeSink or
n instanceof PathNodeSinkGroup or
directReach(n.getANonHiddenSuccessor())
}
/** Holds if `n` can reach a sink or is used in a subpath that can reach a sink. */
private predicate reach(PathNode n) { directReach(n) or Subpaths::retReach(n) }
private predicate reach(PathNodeImpl n) { directReach(n) or Subpaths::retReach(n) }
/** Holds if `n1.getASuccessor() = n2` and `n2` can reach a sink. */
private predicate pathSucc(PathNodeImpl n1, PathNode n2) {
private predicate pathSucc(PathNodeImpl n1, PathNodeImpl n2) {
n1.getANonHiddenSuccessor() = n2 and directReach(n2)
}
private predicate pathSuccPlus(PathNode n1, PathNode n2) = fastTC(pathSucc/2)(n1, n2)
private predicate pathSuccPlus(PathNodeImpl n1, PathNodeImpl n2) = fastTC(pathSucc/2)(n1, n2)
/**
* A `Node` augmented with a call context (except for sinks), an access path, and a configuration.
* Only those `PathNode`s that are reachable from a source, and which can reach a sink, are generated.
*/
class PathNode instanceof PathNodeImpl {
PathNode() { reach(this) }
/** Gets a textual representation of this element. */
final string toString() { result = super.toString() }
/**
* Gets a textual representation of this element, including a textual
* representation of the call context.
*/
final string toStringWithContext() { result = super.toStringWithContext() }
/**
* Holds if this element is at the specified location.
* The location spans column `startcolumn` of line `startline` to
* column `endcolumn` of line `endline` in file `filepath`.
* For more information, see
* [Locations](https://codeql.github.com/docs/writing-codeql-queries/providing-locations-in-codeql-queries/).
*/
final predicate hasLocationInfo(
string filepath, int startline, int startcolumn, int endline, int endcolumn
) {
super.hasLocationInfo(filepath, startline, startcolumn, endline, endcolumn)
}
/** Gets the underlying `Node`. */
final Node getNode() { super.getNodeEx().projectToNode() = result }
/** Gets the `FlowState` of this node. */
final FlowState getState() { result = super.getState() }
/** Gets the associated configuration. */
final Configuration getConfiguration() { result = super.getConfiguration() }
/** Gets a successor of this node, if any. */
final PathNode getASuccessor() { result = super.getANonHiddenSuccessor() }
/** Holds if this node is a source. */
final predicate isSource() { super.isSource() }
/** Holds if this node is a grouping of source nodes. */
final predicate isSourceGroup(string group) { this = TPathNodeSourceGroup(group, _) }
/** Holds if this node is a grouping of sink nodes. */
final predicate isSinkGroup(string group) { this = TPathNodeSinkGroup(group, _) }
}
/**
* Provides the query predicates needed to include a graph in a path-problem query.
@@ -3004,7 +3068,7 @@ module PathGraph {
/** Holds if `n` is a node in the graph of data flow path explanations. */
query predicate nodes(PathNode n, string key, string val) {
reach(n) and key = "semmle.label" and val = n.toString()
key = "semmle.label" and val = n.toString()
}
/**
@@ -3013,11 +3077,7 @@ module PathGraph {
* `ret -> out` is summarized as the edge `arg -> out`.
*/
query predicate subpaths(PathNode arg, PathNode par, PathNode ret, PathNode out) {
Subpaths::subpaths(arg, par, ret, out) and
reach(arg) and
reach(par) and
reach(ret) and
reach(out)
Subpaths::subpaths(arg, par, ret, out)
}
}
@@ -3118,9 +3178,66 @@ private class PathNodeSink extends PathNodeImpl, TPathNodeSink {
override Configuration getConfiguration() { result = config }
override PathNodeImpl getASuccessorImpl() { none() }
override PathNodeImpl getASuccessorImpl() {
result = TPathNodeSinkGroup(this.getSinkGroup(), config)
}
override predicate isSource() { sourceNode(node, state, config) }
string getSinkGroup() { config.sinkGrouping(node.asNode(), result) }
}
private class PathNodeSourceGroup extends PathNodeImpl, TPathNodeSourceGroup {
string sourceGroup;
Configuration config;
PathNodeSourceGroup() { this = TPathNodeSourceGroup(sourceGroup, config) }
override NodeEx getNodeEx() { none() }
override FlowState getState() { none() }
override Configuration getConfiguration() { result = config }
override PathNodeImpl getASuccessorImpl() {
result.getSourceGroup() = sourceGroup and
result.getConfiguration() = config
}
override predicate isSource() { none() }
override string toString() { result = sourceGroup }
override predicate hasLocationInfo(
string filepath, int startline, int startcolumn, int endline, int endcolumn
) {
filepath = "" and startline = 0 and startcolumn = 0 and endline = 0 and endcolumn = 0
}
}
private class PathNodeSinkGroup extends PathNodeImpl, TPathNodeSinkGroup {
string sinkGroup;
Configuration config;
PathNodeSinkGroup() { this = TPathNodeSinkGroup(sinkGroup, config) }
override NodeEx getNodeEx() { none() }
override FlowState getState() { none() }
override Configuration getConfiguration() { result = config }
override PathNodeImpl getASuccessorImpl() { none() }
override predicate isSource() { none() }
override string toString() { result = sinkGroup }
override predicate hasLocationInfo(
string filepath, int startline, int startcolumn, int endline, int endcolumn
) {
filepath = "" and startline = 0 and startcolumn = 0 and endline = 0 and endcolumn = 0
}
}
private predicate pathNode(
@@ -3142,6 +3259,7 @@ private predicate pathNode(
* Holds if data may flow from `mid` to `node`. The last step in or out of
* a callable is recorded by `cc`.
*/
pragma[assume_small_delta]
pragma[nomagic]
private predicate pathStep(
PathNodeMid mid, NodeEx node, FlowState state, CallContext cc, SummaryCtx sc, AccessPath ap
@@ -3399,7 +3517,7 @@ private module Subpaths {
*/
pragma[nomagic]
private predicate subpaths02(
PathNode arg, ParamNodeEx par, SummaryCtxSome sc, CallContext innercc, ReturnKindExt kind,
PathNodeImpl arg, ParamNodeEx par, SummaryCtxSome sc, CallContext innercc, ReturnKindExt kind,
NodeEx out, FlowState sout, AccessPath apout
) {
subpaths01(arg, par, sc, innercc, kind, out, sout, apout) and
@@ -3407,14 +3525,14 @@ private module Subpaths {
}
pragma[nomagic]
private Configuration getPathNodeConf(PathNode n) { result = n.getConfiguration() }
private Configuration getPathNodeConf(PathNodeImpl n) { result = n.getConfiguration() }
/**
* Holds if `(arg, par, ret, out)` forms a subpath-tuple.
*/
pragma[nomagic]
private predicate subpaths03(
PathNode arg, ParamNodeEx par, PathNodeMid ret, NodeEx out, FlowState sout, AccessPath apout
PathNodeImpl arg, ParamNodeEx par, PathNodeMid ret, NodeEx out, FlowState sout, AccessPath apout
) {
exists(SummaryCtxSome sc, CallContext innercc, ReturnKindExt kind, RetNodeEx retnode |
subpaths02(arg, par, sc, innercc, kind, out, sout, apout) and
@@ -3444,7 +3562,7 @@ private module Subpaths {
* a subpath between `par` and `ret` with the connecting edges `arg -> par` and
* `ret -> out` is summarized as the edge `arg -> out`.
*/
predicate subpaths(PathNodeImpl arg, PathNodeImpl par, PathNodeImpl ret, PathNode out) {
predicate subpaths(PathNodeImpl arg, PathNodeImpl par, PathNodeImpl ret, PathNodeImpl out) {
exists(ParamNodeEx p, NodeEx o, FlowState sout, AccessPath apout, PathNodeMid out0 |
pragma[only_bind_into](arg).getANonHiddenSuccessor() = pragma[only_bind_into](out0) and
subpaths03(pragma[only_bind_into](arg), p, localStepToHidden*(ret), o, sout, apout) and
@@ -3460,7 +3578,7 @@ private module Subpaths {
* Holds if `n` can reach a return node in a summarized subpath that can reach a sink.
*/
predicate retReach(PathNodeImpl n) {
exists(PathNode out | subpaths(_, _, n, out) | directReach(out) or retReach(out))
exists(PathNodeImpl out | subpaths(_, _, n, out) | directReach(out) or retReach(out))
or
exists(PathNodeImpl mid |
retReach(mid) and
@@ -3476,12 +3594,22 @@ private module Subpaths {
* Will only have results if `configuration` has non-empty sources and
* sinks.
*/
private predicate hasFlowPath(
PathNodeImpl flowsource, PathNodeImpl flowsink, Configuration configuration
) {
flowsource.isFlowSource() and
flowsource.getConfiguration() = configuration and
(flowsource = flowsink or pathSuccPlus(flowsource, flowsink)) and
flowsink.isFlowSink()
}
private predicate flowsTo(
PathNode flowsource, PathNodeSink flowsink, Node source, Node sink, Configuration configuration
PathNodeImpl flowsource, PathNodeSink flowsink, Node source, Node sink,
Configuration configuration
) {
flowsource.isSource() and
flowsource.getConfiguration() = configuration and
flowsource.(PathNodeImpl).getNodeEx().asNode() = source and
flowsource.getNodeEx().asNode() = source and
(flowsource = flowsink or pathSuccPlus(flowsource, flowsink)) and
flowsink.getNodeEx().asNode() = sink
}
@@ -3504,14 +3632,14 @@ private predicate finalStats(
fields = count(TypedContent f0 | exists(PathNodeMid pn | pn.getAp().getHead() = f0)) and
conscand = count(AccessPath ap | exists(PathNodeMid pn | pn.getAp() = ap)) and
states = count(FlowState state | exists(PathNodeMid pn | pn.getState() = state)) and
tuples = count(PathNode pn)
tuples = count(PathNodeImpl pn)
or
fwd = false and
nodes = count(NodeEx n0 | exists(PathNodeImpl pn | pn.getNodeEx() = n0 and reach(pn))) and
fields = count(TypedContent f0 | exists(PathNodeMid pn | pn.getAp().getHead() = f0 and reach(pn))) and
conscand = count(AccessPath ap | exists(PathNodeMid pn | pn.getAp() = ap and reach(pn))) and
states = count(FlowState state | exists(PathNodeMid pn | pn.getState() = state and reach(pn))) and
tuples = count(PathNode pn | reach(pn))
tuples = count(PathNode pn)
}
/**

View File

@@ -147,6 +147,12 @@ abstract class Configuration extends string {
*/
FlowFeature getAFeature() { none() }
/** Holds if sources should be grouped in the result of `hasFlowPath`. */
predicate sourceGrouping(Node source, string sourceGroup) { none() }
/** Holds if sinks should be grouped in the result of `hasFlowPath`. */
predicate sinkGrouping(Node sink, string sinkGroup) { none() }
/**
* Holds if data may flow from `source` to `sink` for this configuration.
*/
@@ -158,7 +164,7 @@ abstract class Configuration extends string {
* The corresponding paths are generated from the end-points and the graph
* included in the module `PathGraph`.
*/
predicate hasFlowPath(PathNode source, PathNode sink) { flowsTo(source, sink, _, _, this) }
predicate hasFlowPath(PathNode source, PathNode sink) { hasFlowPath(source, sink, this) }
/**
* Holds if data may flow from some source to `sink` for this configuration.
@@ -2629,6 +2635,7 @@ private predicate evalUnfold(AccessPathApprox apa, boolean unfold, Configuration
/**
* Gets the number of `AccessPath`s that correspond to `apa`.
*/
pragma[assume_small_delta]
private int countAps(AccessPathApprox apa, Configuration config) {
evalUnfold(apa, false, config) and
result = 1 and
@@ -2647,6 +2654,7 @@ private int countAps(AccessPathApprox apa, Configuration config) {
* that it is expanded to a precise head-tail representation.
*/
language[monotonicAggregates]
pragma[assume_small_delta]
private int countPotentialAps(AccessPathApprox apa, Configuration config) {
apa instanceof AccessPathApproxNil and result = 1
or
@@ -2681,6 +2689,7 @@ private newtype TAccessPath =
}
private newtype TPathNode =
pragma[assume_small_delta]
TPathNodeMid(
NodeEx node, FlowState state, CallContext cc, SummaryCtx sc, AccessPath ap, Configuration config
) {
@@ -2709,6 +2718,18 @@ private newtype TPathNode =
state = sink.getState() and
config = sink.getConfiguration()
)
} or
TPathNodeSourceGroup(string sourceGroup, Configuration config) {
exists(PathNodeImpl source |
sourceGroup = source.getSourceGroup() and
config = source.getConfiguration()
)
} or
TPathNodeSinkGroup(string sinkGroup, Configuration config) {
exists(PathNodeSink sink |
sinkGroup = sink.getSinkGroup() and
config = sink.getConfiguration()
)
}
/**
@@ -2778,6 +2799,7 @@ private class AccessPathCons extends AccessPath, TAccessPathCons {
override AccessPathFrontHead getFront() { result = TFrontHead(head) }
pragma[assume_small_delta]
override AccessPathApproxCons getApprox() {
result = TConsNil(head, tail.(AccessPathNil).getType())
or
@@ -2786,6 +2808,7 @@ private class AccessPathCons extends AccessPath, TAccessPathCons {
result = TCons1(head, this.length())
}
pragma[assume_small_delta]
override int length() { result = 1 + tail.length() }
private string toStringImpl(boolean needsSuffix) {
@@ -2874,54 +2897,16 @@ private class AccessPathCons1 extends AccessPath, TAccessPathCons1 {
}
}
/**
* A `Node` augmented with a call context (except for sinks), an access path, and a configuration.
* Only those `PathNode`s that are reachable from a source are generated.
*/
class PathNode extends TPathNode {
/** Gets a textual representation of this element. */
string toString() { none() }
/**
* Gets a textual representation of this element, including a textual
* representation of the call context.
*/
string toStringWithContext() { none() }
/**
* Holds if this element is at the specified location.
* The location spans column `startcolumn` of line `startline` to
* column `endcolumn` of line `endline` in file `filepath`.
* For more information, see
* [Locations](https://codeql.github.com/docs/writing-codeql-queries/providing-locations-in-codeql-queries/).
*/
predicate hasLocationInfo(
string filepath, int startline, int startcolumn, int endline, int endcolumn
) {
none()
}
/** Gets the underlying `Node`. */
final Node getNode() { this.(PathNodeImpl).getNodeEx().projectToNode() = result }
abstract private class PathNodeImpl extends TPathNode {
/** Gets the `FlowState` of this node. */
FlowState getState() { none() }
abstract FlowState getState();
/** Gets the associated configuration. */
Configuration getConfiguration() { none() }
/** Gets a successor of this node, if any. */
final PathNode getASuccessor() {
result = this.(PathNodeImpl).getANonHiddenSuccessor() and
reach(this) and
reach(result)
}
abstract Configuration getConfiguration();
/** Holds if this node is a source. */
predicate isSource() { none() }
}
abstract predicate isSource();
abstract private class PathNodeImpl extends PathNode {
abstract PathNodeImpl getASuccessorImpl();
private PathNodeImpl getASuccessorIfHidden() {
@@ -2953,6 +2938,22 @@ abstract private class PathNodeImpl extends PathNode {
)
}
string getSourceGroup() {
this.isSource() and
this.getConfiguration().sourceGrouping(this.getNodeEx().asNode(), result)
}
predicate isFlowSource() {
this.isSource() and not exists(this.getSourceGroup())
or
this instanceof PathNodeSourceGroup
}
predicate isFlowSink() {
this = any(PathNodeSink sink | not exists(sink.getSinkGroup())) or
this instanceof PathNodeSinkGroup
}
private string ppAp() {
this instanceof PathNodeSink and result = ""
or
@@ -2967,13 +2968,23 @@ abstract private class PathNodeImpl extends PathNode {
result = " <" + this.(PathNodeMid).getCallContext().toString() + ">"
}
override string toString() { result = this.getNodeEx().toString() + this.ppAp() }
/** Gets a textual representation of this element. */
string toString() { result = this.getNodeEx().toString() + this.ppAp() }
override string toStringWithContext() {
result = this.getNodeEx().toString() + this.ppAp() + this.ppCtx()
}
/**
* Gets a textual representation of this element, including a textual
* representation of the call context.
*/
string toStringWithContext() { result = this.getNodeEx().toString() + this.ppAp() + this.ppCtx() }
override predicate hasLocationInfo(
/**
* Holds if this element is at the specified location.
* The location spans column `startcolumn` of line `startline` to
* column `endcolumn` of line `endline` in file `filepath`.
* For more information, see
* [Locations](https://codeql.github.com/docs/writing-codeql-queries/providing-locations-in-codeql-queries/).
*/
predicate hasLocationInfo(
string filepath, int startline, int startcolumn, int endline, int endcolumn
) {
this.getNodeEx().hasLocationInfo(filepath, startline, startcolumn, endline, endcolumn)
@@ -2982,18 +2993,71 @@ abstract private class PathNodeImpl extends PathNode {
/** Holds if `n` can reach a sink. */
private predicate directReach(PathNodeImpl n) {
n instanceof PathNodeSink or directReach(n.getANonHiddenSuccessor())
n instanceof PathNodeSink or
n instanceof PathNodeSinkGroup or
directReach(n.getANonHiddenSuccessor())
}
/** Holds if `n` can reach a sink or is used in a subpath that can reach a sink. */
private predicate reach(PathNode n) { directReach(n) or Subpaths::retReach(n) }
private predicate reach(PathNodeImpl n) { directReach(n) or Subpaths::retReach(n) }
/** Holds if `n1.getASuccessor() = n2` and `n2` can reach a sink. */
private predicate pathSucc(PathNodeImpl n1, PathNode n2) {
private predicate pathSucc(PathNodeImpl n1, PathNodeImpl n2) {
n1.getANonHiddenSuccessor() = n2 and directReach(n2)
}
private predicate pathSuccPlus(PathNode n1, PathNode n2) = fastTC(pathSucc/2)(n1, n2)
private predicate pathSuccPlus(PathNodeImpl n1, PathNodeImpl n2) = fastTC(pathSucc/2)(n1, n2)
/**
* A `Node` augmented with a call context (except for sinks), an access path, and a configuration.
* Only those `PathNode`s that are reachable from a source, and which can reach a sink, are generated.
*/
class PathNode instanceof PathNodeImpl {
PathNode() { reach(this) }
/** Gets a textual representation of this element. */
final string toString() { result = super.toString() }
/**
* Gets a textual representation of this element, including a textual
* representation of the call context.
*/
final string toStringWithContext() { result = super.toStringWithContext() }
/**
* Holds if this element is at the specified location.
* The location spans column `startcolumn` of line `startline` to
* column `endcolumn` of line `endline` in file `filepath`.
* For more information, see
* [Locations](https://codeql.github.com/docs/writing-codeql-queries/providing-locations-in-codeql-queries/).
*/
final predicate hasLocationInfo(
string filepath, int startline, int startcolumn, int endline, int endcolumn
) {
super.hasLocationInfo(filepath, startline, startcolumn, endline, endcolumn)
}
/** Gets the underlying `Node`. */
final Node getNode() { super.getNodeEx().projectToNode() = result }
/** Gets the `FlowState` of this node. */
final FlowState getState() { result = super.getState() }
/** Gets the associated configuration. */
final Configuration getConfiguration() { result = super.getConfiguration() }
/** Gets a successor of this node, if any. */
final PathNode getASuccessor() { result = super.getANonHiddenSuccessor() }
/** Holds if this node is a source. */
final predicate isSource() { super.isSource() }
/** Holds if this node is a grouping of source nodes. */
final predicate isSourceGroup(string group) { this = TPathNodeSourceGroup(group, _) }
/** Holds if this node is a grouping of sink nodes. */
final predicate isSinkGroup(string group) { this = TPathNodeSinkGroup(group, _) }
}
/**
* Provides the query predicates needed to include a graph in a path-problem query.
@@ -3004,7 +3068,7 @@ module PathGraph {
/** Holds if `n` is a node in the graph of data flow path explanations. */
query predicate nodes(PathNode n, string key, string val) {
reach(n) and key = "semmle.label" and val = n.toString()
key = "semmle.label" and val = n.toString()
}
/**
@@ -3013,11 +3077,7 @@ module PathGraph {
* `ret -> out` is summarized as the edge `arg -> out`.
*/
query predicate subpaths(PathNode arg, PathNode par, PathNode ret, PathNode out) {
Subpaths::subpaths(arg, par, ret, out) and
reach(arg) and
reach(par) and
reach(ret) and
reach(out)
Subpaths::subpaths(arg, par, ret, out)
}
}
@@ -3118,9 +3178,66 @@ private class PathNodeSink extends PathNodeImpl, TPathNodeSink {
override Configuration getConfiguration() { result = config }
override PathNodeImpl getASuccessorImpl() { none() }
override PathNodeImpl getASuccessorImpl() {
result = TPathNodeSinkGroup(this.getSinkGroup(), config)
}
override predicate isSource() { sourceNode(node, state, config) }
string getSinkGroup() { config.sinkGrouping(node.asNode(), result) }
}
private class PathNodeSourceGroup extends PathNodeImpl, TPathNodeSourceGroup {
string sourceGroup;
Configuration config;
PathNodeSourceGroup() { this = TPathNodeSourceGroup(sourceGroup, config) }
override NodeEx getNodeEx() { none() }
override FlowState getState() { none() }
override Configuration getConfiguration() { result = config }
override PathNodeImpl getASuccessorImpl() {
result.getSourceGroup() = sourceGroup and
result.getConfiguration() = config
}
override predicate isSource() { none() }
override string toString() { result = sourceGroup }
override predicate hasLocationInfo(
string filepath, int startline, int startcolumn, int endline, int endcolumn
) {
filepath = "" and startline = 0 and startcolumn = 0 and endline = 0 and endcolumn = 0
}
}
private class PathNodeSinkGroup extends PathNodeImpl, TPathNodeSinkGroup {
string sinkGroup;
Configuration config;
PathNodeSinkGroup() { this = TPathNodeSinkGroup(sinkGroup, config) }
override NodeEx getNodeEx() { none() }
override FlowState getState() { none() }
override Configuration getConfiguration() { result = config }
override PathNodeImpl getASuccessorImpl() { none() }
override predicate isSource() { none() }
override string toString() { result = sinkGroup }
override predicate hasLocationInfo(
string filepath, int startline, int startcolumn, int endline, int endcolumn
) {
filepath = "" and startline = 0 and startcolumn = 0 and endline = 0 and endcolumn = 0
}
}
private predicate pathNode(
@@ -3142,6 +3259,7 @@ private predicate pathNode(
* Holds if data may flow from `mid` to `node`. The last step in or out of
* a callable is recorded by `cc`.
*/
pragma[assume_small_delta]
pragma[nomagic]
private predicate pathStep(
PathNodeMid mid, NodeEx node, FlowState state, CallContext cc, SummaryCtx sc, AccessPath ap
@@ -3399,7 +3517,7 @@ private module Subpaths {
*/
pragma[nomagic]
private predicate subpaths02(
PathNode arg, ParamNodeEx par, SummaryCtxSome sc, CallContext innercc, ReturnKindExt kind,
PathNodeImpl arg, ParamNodeEx par, SummaryCtxSome sc, CallContext innercc, ReturnKindExt kind,
NodeEx out, FlowState sout, AccessPath apout
) {
subpaths01(arg, par, sc, innercc, kind, out, sout, apout) and
@@ -3407,14 +3525,14 @@ private module Subpaths {
}
pragma[nomagic]
private Configuration getPathNodeConf(PathNode n) { result = n.getConfiguration() }
private Configuration getPathNodeConf(PathNodeImpl n) { result = n.getConfiguration() }
/**
* Holds if `(arg, par, ret, out)` forms a subpath-tuple.
*/
pragma[nomagic]
private predicate subpaths03(
PathNode arg, ParamNodeEx par, PathNodeMid ret, NodeEx out, FlowState sout, AccessPath apout
PathNodeImpl arg, ParamNodeEx par, PathNodeMid ret, NodeEx out, FlowState sout, AccessPath apout
) {
exists(SummaryCtxSome sc, CallContext innercc, ReturnKindExt kind, RetNodeEx retnode |
subpaths02(arg, par, sc, innercc, kind, out, sout, apout) and
@@ -3444,7 +3562,7 @@ private module Subpaths {
* a subpath between `par` and `ret` with the connecting edges `arg -> par` and
* `ret -> out` is summarized as the edge `arg -> out`.
*/
predicate subpaths(PathNodeImpl arg, PathNodeImpl par, PathNodeImpl ret, PathNode out) {
predicate subpaths(PathNodeImpl arg, PathNodeImpl par, PathNodeImpl ret, PathNodeImpl out) {
exists(ParamNodeEx p, NodeEx o, FlowState sout, AccessPath apout, PathNodeMid out0 |
pragma[only_bind_into](arg).getANonHiddenSuccessor() = pragma[only_bind_into](out0) and
subpaths03(pragma[only_bind_into](arg), p, localStepToHidden*(ret), o, sout, apout) and
@@ -3460,7 +3578,7 @@ private module Subpaths {
* Holds if `n` can reach a return node in a summarized subpath that can reach a sink.
*/
predicate retReach(PathNodeImpl n) {
exists(PathNode out | subpaths(_, _, n, out) | directReach(out) or retReach(out))
exists(PathNodeImpl out | subpaths(_, _, n, out) | directReach(out) or retReach(out))
or
exists(PathNodeImpl mid |
retReach(mid) and
@@ -3476,12 +3594,22 @@ private module Subpaths {
* Will only have results if `configuration` has non-empty sources and
* sinks.
*/
private predicate hasFlowPath(
PathNodeImpl flowsource, PathNodeImpl flowsink, Configuration configuration
) {
flowsource.isFlowSource() and
flowsource.getConfiguration() = configuration and
(flowsource = flowsink or pathSuccPlus(flowsource, flowsink)) and
flowsink.isFlowSink()
}
private predicate flowsTo(
PathNode flowsource, PathNodeSink flowsink, Node source, Node sink, Configuration configuration
PathNodeImpl flowsource, PathNodeSink flowsink, Node source, Node sink,
Configuration configuration
) {
flowsource.isSource() and
flowsource.getConfiguration() = configuration and
flowsource.(PathNodeImpl).getNodeEx().asNode() = source and
flowsource.getNodeEx().asNode() = source and
(flowsource = flowsink or pathSuccPlus(flowsource, flowsink)) and
flowsink.getNodeEx().asNode() = sink
}
@@ -3504,14 +3632,14 @@ private predicate finalStats(
fields = count(TypedContent f0 | exists(PathNodeMid pn | pn.getAp().getHead() = f0)) and
conscand = count(AccessPath ap | exists(PathNodeMid pn | pn.getAp() = ap)) and
states = count(FlowState state | exists(PathNodeMid pn | pn.getState() = state)) and
tuples = count(PathNode pn)
tuples = count(PathNodeImpl pn)
or
fwd = false and
nodes = count(NodeEx n0 | exists(PathNodeImpl pn | pn.getNodeEx() = n0 and reach(pn))) and
fields = count(TypedContent f0 | exists(PathNodeMid pn | pn.getAp().getHead() = f0 and reach(pn))) and
conscand = count(AccessPath ap | exists(PathNodeMid pn | pn.getAp() = ap and reach(pn))) and
states = count(FlowState state | exists(PathNodeMid pn | pn.getState() = state and reach(pn))) and
tuples = count(PathNode pn | reach(pn))
tuples = count(PathNode pn)
}
/**

View File

@@ -136,6 +136,18 @@ module Consistency {
msg = "Local flow step does not preserve enclosing callable."
}
query predicate readStepIsLocal(Node n1, Node n2, string msg) {
readStep(n1, _, n2) and
nodeGetEnclosingCallable(n1) != nodeGetEnclosingCallable(n2) and
msg = "Read step does not preserve enclosing callable."
}
query predicate storeStepIsLocal(Node n1, Node n2, string msg) {
storeStep(n1, _, n2) and
nodeGetEnclosingCallable(n1) != nodeGetEnclosingCallable(n2) and
msg = "Store step does not preserve enclosing callable."
}
private DataFlowType typeRepr() { result = getNodeType(_) }
query predicate compatibleTypesReflexive(DataFlowType t, string msg) {

View File

@@ -26,67 +26,3 @@ predicate localFlowStep(Node nodeFrom, Node nodeTo) {
*/
pragma[inline]
predicate localFlow(Node source, Node sink) { localFlowStep*(source, sink) }
/**
* DEPRECATED. Use the API graphs library (`semmle.python.ApiGraphs`) instead.
*
* For a drop-in replacement, use `API::moduleImport(name).getAUse()`.
*
* Gets a `Node` that refers to the module referenced by `name`.
* Note that for the statement `import pkg.mod`, the new variable introduced is `pkg` that is a
* reference to the module `pkg`.
*
* This predicate handles (with optional `... as <new-name>`):
* 1. `import <name>`
* 2. `from <package> import <module>` when `<name> = <package> + "." + <module>`
* 3. `from <module> import <member>` when `<name> = <module> + "." + <member>`
*
* Finally, in `from <module> import <member>` we consider the `ImportExpr` corresponding to
* `<module>` to be a reference to that module.
*
* Note:
* While it is technically possible that `import mypkg.foo` and `from mypkg import foo` can give different values,
* it's highly unlikely that this will be a problem in production level code.
* Example: If `mypkg/__init__.py` contains `foo = 42`, then `from mypkg import foo` will not import the module
* `mypkg/foo.py` but the variable `foo` containing `42` -- however, `import mypkg.foo` will always cause `mypkg.foo`
* to refer to the module.
*/
deprecated Node importNode(string name) {
exists(Variable var, Import imp, Alias alias |
alias = imp.getAName() and
alias.getAsname() = var.getAStore() and
(
name = alias.getValue().(ImportMember).getImportedModuleName()
or
name = alias.getValue().(ImportExpr).getImportedModuleName()
) and
result.asExpr() = alias.getValue()
)
or
// Although it may seem superfluous to consider the `foo` part of `from foo import bar as baz` to
// be a reference to a module (since that reference only makes sense locally within the `import`
// statement), it's important for our use of type trackers to consider this local reference to
// also refer to the `foo` module. That way, if one wants to track references to the `bar`
// attribute using a type tracker, one can simply write
//
// ```ql
// DataFlow::Node bar_attr_tracker(TypeTracker t) {
// t.startInAttr("bar") and
// result = foo_module_tracker()
// or
// exists(TypeTracker t2 | result = bar_attr_tracker(t2).track(t2, t))
// }
// ```
//
// Where `foo_module_tracker` is a type tracker that tracks references to the `foo` module.
// Because named imports are modeled as `AttrRead`s, the statement `from foo import bar as baz`
// is interpreted as if it was an assignment `baz = foo.bar`, which means `baz` gets tracked as a
// reference to `foo.bar`, as desired.
exists(ImportExpr imp_expr |
imp_expr.getName() = name and
result.asCfgNode().getNode() = imp_expr and
// in `import foo.bar` we DON'T want to give a result for `importNode("foo.bar")`,
// only for `importNode("foo")`. We exclude those cases with the following clause.
not exists(Import imp | imp.getAName().getValue() = imp_expr)
)
}

View File

@@ -892,7 +892,7 @@ module Private {
}
/**
* Provides a means of translating externally (e.g., CSV) defined flow
* Provides a means of translating externally (e.g., MaD) defined flow
* summaries into a `SummarizedCallable`s.
*/
module External {
@@ -1121,7 +1121,7 @@ module Private {
}
/**
* Holds if `node` is specified as a source with the given kind in a CSV flow
* Holds if `node` is specified as a source with the given kind in a MaD flow
* model.
*/
predicate isSourceNode(InterpretNode node, string kind) {
@@ -1132,7 +1132,7 @@ module Private {
}
/**
* Holds if `node` is specified as a sink with the given kind in a CSV flow
* Holds if `node` is specified as a sink with the given kind in a MaD flow
* model.
*/
predicate isSinkNode(InterpretNode node, string kind) {

View File

@@ -1,14 +1,75 @@
/**
* INTERNAL. DO NOT USE.
*
* Provides predicates for resolving imports.
*/
private import python
private import semmle.python.dataflow.new.DataFlow
private import semmle.python.dataflow.new.internal.ImportStar
private import semmle.python.dataflow.new.TypeTracker
private import semmle.python.dataflow.new.internal.DataFlowPrivate
/**
* Python modules and the way imports are resolved are... complicated. Here's a crash course in how
* it works, as well as some caveats to bear in mind when looking at the implementation in this
* module.
*
* First, let's consider the humble `import` statement:
* ```python
* import foo
* import bar.baz
* import ham.eggs as spam
* ```
*
* In the AST, all imports are aliased, as in the last import above. That is, `import foo` becomes
* `import foo as foo`, and `import bar.baz` becomes `import bar as bar`. Note that `import` is
* exclusively used to import modules -- if `eggs` is an attribute of the `ham` module (and not a
* submodule of the `ham` package), then the third line above is an error.
*
* Next, we have the `from` statement. This one is a bit more complicated, but still has the same
* aliasing desugaring as above applied to it. Thus, `from foo import bar` becomes
* `from foo import bar as bar`.
*
* In general, `from foo import bar` can mean two different things:
*
* 1. If `foo` is a module, and `bar` is an attribute of `foo`, then `from foo import bar` imports
* the attribute `bar` into the current module (binding it to the name `bar`).
* 2. If `foo` is a package, and `bar` is already defined in `foo/__init__.py`,
* that value will be imported. If it is not defined, and `bar` is a submodule of `foo`, then
* `bar` is imported to `foo`, and the `bar` submodule imported.
* Note: We don't currently model if the attribute is already defined in `__init__.py`
* and always assume that the submodule will be used.
*
* Now, when it comes to how these imports are represented in the AST, things get a bit complicated.
* First of all, both of the above forms of imports get mapped to the same kind of AST node:
* `Import`. An `Import` node has a sequence of names, each of which is an `Alias` node. This `Alias`
* node represents the `x as y` bit of each imported module.
*
* The same is true for `from` imports. So, how then do we distinguish between the two forms of
* imports? The distinguishing feature is the left hand side of the `as` node. If the left hand side
* is an `ImportExpr`, then it is a plain import. If it is an `ImportMember`, then it is a `from`
* import. (And to confuse matters even more, this `ImportMember` contains another `ImportExpr` for
* the bit between the `from` and `import` keywords.)
*
* Caveats:
*
* - A relative import of the form `from .foo import bar as baz` not only imports `bar` and binds it
* to the name `baz`, but also imports `foo` and binds it to the name `foo`. This only happens with
* relative imports. `from foo import bar as baz` only binds `bar` to `baz`.
* - Modules may also be packages, so e.g. `import foo.bar` may import the `bar` submodule in the `foo`
* package, or the `bar` subpackage of the `foo` package. The practical difference here is the name of
* the module that is imported, as the package `foo.bar` will have the "name" `foo.bar.__init__`,
* corresponding to the fact that the code that is executed is in the `__init__.py` file of the
* `bar` subpackage.
*/
module ImportResolution {
/**
* Holds if the module `m` defines a name `name` by assigning `defn` to it. This is an
* overapproximation, as `name` may not in fact be exported (e.g. by defining an `__all__` that does
* not include `name`).
*/
pragma[nomagic]
predicate module_export(Module m, string name, DataFlow::CfgNode defn) {
exists(EssaVariable v |
v.getName() = name and
@@ -18,12 +79,223 @@ module ImportResolution {
or
defn.getNode() = v.getDefinition().(ArgumentRefinement).getArgument()
)
}
Module getModule(DataFlow::CfgNode node) {
exists(ModuleValue mv |
node.getNode().pointsTo(mv) and
result = mv.getScope()
or
exists(Alias a |
defn.asExpr() = [a.getValue(), a.getValue().(ImportMember).getModule()] and
a.getAsname().(Name).getId() = name and
defn.getScope() = m
)
}
/**
* Holds if the module `m` explicitly exports the name `name` by listing it in `__all__`. Only
* handles simple cases where we can statically tell that this is the case.
*/
private predicate all_mentions_name(Module m, string name) {
exists(DefinitionNode def, SequenceNode n |
def.getValue() = n and
def.(NameNode).getId() = "__all__" and
def.getScope() = m and
any(StrConst s | s.getText() = name) = n.getAnElement().getNode()
)
}
/**
* Holds if the module `m` either does not set `__all__` (and so implicitly exports anything that
* doesn't start with an underscore), or sets `__all__` in a way that's too complicated for us to
* handle (in which case we _also_ pretend that it just exports all such names).
*/
private predicate no_or_complicated_all(Module m) {
// No mention of `__all__` in the module
not exists(DefinitionNode def | def.getScope() = m and def.(NameNode).getId() = "__all__")
or
// `__all__` is set to a non-sequence value
exists(DefinitionNode def |
def.(NameNode).getId() = "__all__" and
def.getScope() = m and
not def.getValue() instanceof SequenceNode
)
or
// `__all__` is used in some way that doesn't involve storing a value in it. This usually means
// it is being mutated through `append` or `extend`, which we don't handle.
exists(NameNode n | n.getId() = "__all__" and n.getScope() = m and n.isLoad())
}
private predicate potential_module_export(Module m, string name) {
all_mentions_name(m, name)
or
no_or_complicated_all(m) and
(
exists(NameNode n | n.getId() = name and n.getScope() = m and name.charAt(0) != "_")
or
exists(Alias a | a.getAsname().(Name).getId() = name and a.getValue().getScope() = m)
)
}
/**
* Holds if the module `reexporter` exports the module `reexported` under the name
* `reexported_name`.
*/
private predicate module_reexport(Module reexporter, string reexported_name, Module reexported) {
exists(DataFlow::Node ref |
ref = getImmediateModuleReference(reexported) and
module_export(reexporter, reexported_name, ref) and
potential_module_export(reexporter, reexported_name)
)
}
/**
* Gets a reference to `sys.modules`.
*/
private DataFlow::Node sys_modules_reference() {
result =
any(DataFlow::AttrRef a |
a.getAttributeName() = "modules" and a.getObject().asExpr().(Name).getId() = "sys"
)
}
/** Gets a module that may have been added to `sys.modules`. */
private Module sys_modules_module_with_name(string name) {
exists(ControlFlowNode n, DataFlow::Node mod |
exists(SubscriptNode sub |
sub.getObject() = sys_modules_reference().asCfgNode() and
sub.getIndex() = n and
n.getNode().(StrConst).getText() = name and
sub.(DefinitionNode).getValue() = mod.asCfgNode() and
mod = getModuleReference(result)
)
)
}
Module getModuleImportedByImportStar(ImportStar i) {
isPreferredModuleForName(result.getFile(), i.getImportedModuleName())
}
/**
* Gets a data-flow node that may be a reference to a module with the name `module_name`.
*
* This is a helper predicate for `getImmediateModuleReference`. It captures the fact that in an
* import such as `import foo`,
* - `foo` may simply be the name of a module, or
* - `foo` may be the name of a package (in which case its name is actually `foo.__init__`), or
* - `foo` may be a module name that has been added to `sys.modules` (in which case its actual name can
* be anything, for instance `os.path` is either `posixpath` or `ntpath`).
*/
private DataFlow::Node getReferenceToModuleName(string module_name) {
// Regular import statements, e.g.
// import foo # implicitly `import foo as foo`
// import foo as foo_alias
exists(Import i, Alias a | a = i.getAName() |
result.asExpr() = a.getAsname() and
module_name = a.getValue().(ImportExpr).getImportedModuleName()
)
or
// The module part of a `from ... import ...` statement, e.g. the `..foo.bar` in
// from ..foo.bar import baz # ..foo.bar might point to, say, package.subpackage.foo.bar
exists(ImportMember i | result.asExpr() = i.getModule() |
module_name = i.getModule().(ImportExpr).getImportedModuleName()
)
or
// Modules (not attributes) imported via `from ... import ... statements`, e.g.
// from foo.bar import baz # imports foo.bar.baz as baz
// from foo.bar import baz as baz_alias # imports foo.bar.baz as baz_alias
exists(Import i, Alias a, ImportMember im | a = i.getAName() and im = a.getValue() |
result.asExpr() = a.getAsname() and
module_name = im.getModule().(ImportExpr).getImportedModuleName() + "." + im.getName()
)
or
// For parity with the points-to based solution, the `ImportExpr` and `ImportMember` bits of the
// above cases should _also_ point to the right modules.
result.asExpr() = any(ImportExpr i | i.getImportedModuleName() = module_name)
or
result.asExpr() =
any(ImportMember i |
i.getModule().(ImportExpr).getImportedModuleName() + "." + i.getName() = module_name
)
}
/**
* Gets a dataflow node that is an immediate reference to the module `m`.
*
* Because of attribute lookups, this is mutually recursive with `getModuleReference`.
*/
DataFlow::Node getImmediateModuleReference(Module m) {
exists(string module_name | result = getReferenceToModuleName(module_name) |
// Depending on whether the referenced module is a package or not, we may need to add a
// trailing `.__init__` to the module name.
isPreferredModuleForName(m.getFile(), module_name + ["", ".__init__"])
or
// Module defined via `sys.modules`
m = sys_modules_module_with_name(module_name)
)
or
// Reading an attribute on a module may return a submodule (or subpackage).
exists(DataFlow::AttrRead ar, Module p, string attr_name |
ar.accesses(getModuleReference(p), attr_name) and
result = ar
|
isPreferredModuleForName(m.getFile(), p.getPackageName() + "." + attr_name + ["", ".__init__"])
)
or
// This is also true for attributes that come from reexports.
exists(Module reexporter, string attr_name |
result.(DataFlow::AttrRead).accesses(getModuleReference(reexporter), attr_name) and
module_reexport(reexporter, attr_name, m)
)
or
// Submodules that are implicitly defined with relative imports of the form `from .foo import ...`.
// In practice, we create a definition for each module in a package, even if it is not imported.
exists(string submodule, Module package |
SsaSource::init_module_submodule_defn(result.asVar().getSourceVariable(),
package.getEntryNode()) and
isPreferredModuleForName(m.getFile(),
package.getPackageName() + "." + submodule + ["", ".__init__"])
)
}
/** Join-order helper for `getModuleReference`. */
pragma[nomagic]
private predicate module_reference_in_scope(DataFlow::Node node, Scope s, string name, Module m) {
node.getScope() = s and
node.asExpr().(Name).getId() = name and
pragma[only_bind_into](node) = getImmediateModuleReference(pragma[only_bind_into](m))
}
/** Join-order helper for `getModuleReference`. */
pragma[nomagic]
private predicate module_name_in_scope(DataFlow::Node node, Scope s, string name) {
node.getScope() = s and
exists(Name n | n = node.asExpr() |
n.getId() = name and
pragma[only_bind_into](n).isUse()
)
}
/**
* Gets a reference to the module `m` (including through certain kinds of local and global flow).
*/
DataFlow::Node getModuleReference(Module m) {
// Immedate references to the module
result = getImmediateModuleReference(m)
or
// Flow (local or global) forward to a later reference to the module.
exists(DataFlow::Node ref | ref = getModuleReference(m) |
simpleLocalFlowStepForTypetracking(ref, result)
or
exists(DataFlow::ModuleVariableNode mv |
mv.getAWrite() = ref and
result = mv.getARead()
)
)
or
// A reference to a name that is bound to a module in an enclosing scope.
exists(DataFlow::Node def, Scope def_scope, Scope use_scope, string name |
module_reference_in_scope(pragma[only_bind_into](def), pragma[only_bind_into](def_scope),
pragma[only_bind_into](name), pragma[only_bind_into](m)) and
module_name_in_scope(result, use_scope, name) and
use_scope.getEnclosingScope*() = def_scope
)
}
Module getModule(DataFlow::CfgNode node) { node = getModuleReference(result) }
}

View File

@@ -76,7 +76,7 @@ module ImportStar {
exists(ImportStar i, DataFlow::CfgNode imported_module |
imported_module.getNode().getNode() = i.getModule() and
i.getScope() = m and
result = ImportResolution::getModule(imported_module)
result = ImportResolution::getModuleImportedByImportStar(i)
)
}

View File

@@ -8,29 +8,170 @@ private import semmle.python.dataflow.new.DataFlow
private import semmle.python.dataflow.new.RemoteFlowSources
private import semmle.python.Concepts
private import semmle.python.ApiGraphs
import semmle.python.frameworks.internal.PEP249Impl
/**
* DEPRECATED: Use `PEP249::PEP249ModuleApiNode` instead.
* Provides classes modeling database interfaces following PEP 249.
* See https://www.python.org/dev/peps/pep-0249/.
*/
deprecated class PEP249ModuleApiNode = PEP249::PEP249ModuleApiNode;
module PEP249 {
/**
* An API graph node representing a module that implements PEP 249.
*/
abstract class PEP249ModuleApiNode extends API::Node {
/** Gets a string representation of this element. */
override string toString() { result = this.(API::Node).toString() }
}
/**
* DEPRECATED: Use `PEP249::Connection` instead.
*/
deprecated module Connection = PEP249::Connection;
/** Gets a reference to the `connect` function of a module that implements PEP 249. */
DataFlow::Node connect() {
result = any(PEP249ModuleApiNode a).getMember("connect").getAValueReachableFromSource()
}
/**
* DEPRECATED: Use `PEP249::Cursor` instead.
*/
deprecated module cursor = PEP249::Cursor;
/**
* Provides models for database connections (following PEP 249).
*
* See https://www.python.org/dev/peps/pep-0249/#connection-objects.
*/
module Connection {
/**
* A source of database connections (following PEP 249), extend this class to model new instances.
*
* This can include instantiations of the class, return values from function
* calls, or a special parameter that will be set when functions are called by external
* libraries.
*
* Use the predicate `Connection::instance()` to get references to database connections (following PEP 249).
*
* Extend this class if the module implementing PEP 249 offers more direct ways to obtain
* a connection than going through `connect`.
*/
abstract class InstanceSource extends DataFlow::Node { }
/**
* DEPRECATED: Use `PEP249::execute` instead.
*/
deprecated predicate execute = PEP249::execute/0;
/** A call to the `connect` function of a module that implements PEP 249. */
private class ConnectCall extends InstanceSource, DataFlow::CallCfgNode {
ConnectCall() { this.getFunction() = connect() }
}
/**
* DEPRECATED: Use `PEP249::connect` instead.
*/
deprecated predicate connect = PEP249::connect/0;
/** Gets a reference to a database connection (following PEP 249). */
private DataFlow::TypeTrackingNode instance(DataFlow::TypeTracker t) {
t.start() and
result instanceof InstanceSource
or
exists(DataFlow::TypeTracker t2 | result = instance(t2).track(t2, t))
}
/** Gets a reference to a database connection (following PEP 249). */
DataFlow::Node instance() { instance(DataFlow::TypeTracker::end()).flowsTo(result) }
}
/**
* Provides models for database cursors (following PEP 249).
*
* These are returned by the `cursor` method on a database connection.
* See https://www.python.org/dev/peps/pep-0249/#cursor.
*/
module Cursor {
/**
* A source of database cursors (following PEP 249), extend this class to model new instances.
*
* This can include instantiations of the class, return values from function
* calls, or a special parameter that will be set when functions are called by external
* libraries.
*
* Use the predicate `Cursor::instance()` to get references to database cursors (following PEP 249).
*
* Extend this class if the module implementing PEP 249 offers more direct ways to obtain
* a connection than going through `connect`.
*/
abstract class InstanceSource extends DataFlow::LocalSourceNode { }
/** Gets a reference to a database cursor. */
private DataFlow::TypeTrackingNode instance(DataFlow::TypeTracker t) {
t.start() and
result instanceof InstanceSource
or
exists(DataFlow::TypeTracker t2 | result = instance(t2).track(t2, t))
}
/** Gets a reference to a database cursor. */
DataFlow::Node instance() { instance(DataFlow::TypeTracker::end()).flowsTo(result) }
/** Gets a reference to the `cursor` method on a database connection. */
private DataFlow::TypeTrackingNode methodRef(DataFlow::TypeTracker t) {
t.startInAttr("cursor") and
result = Connection::instance()
or
exists(DataFlow::TypeTracker t2 | result = methodRef(t2).track(t2, t))
}
/** Gets a reference to the `cursor` method on a database connection. */
DataFlow::Node methodRef() { methodRef(DataFlow::TypeTracker::end()).flowsTo(result) }
/** A call to the `cursor` method on a database connection */
private class CursorCall extends InstanceSource, DataFlow::CallCfgNode {
CursorCall() { this.getFunction() = methodRef() }
}
}
/**
* Gets a reference to the `execute` method on a cursor (or on a connection).
*
* Note: while `execute` method on a connection is not part of PEP249, if it is used, we
* recognize it as an alias for constructing a cursor and calling `execute` on it.
*
* See https://peps.python.org/pep-0249/#execute.
*/
private DataFlow::TypeTrackingNode execute(DataFlow::TypeTracker t) {
t.startInAttr("execute") and
result in [Cursor::instance(), Connection::instance()]
or
exists(DataFlow::TypeTracker t2 | result = execute(t2).track(t2, t))
}
/**
* Gets a reference to the `execute` method on a cursor (or on a connection).
*
* Note: while `execute` method on a connection is not part of PEP249, if it is used, we
* recognize it as an alias for constructing a cursor and calling `execute` on it.
*
* See https://peps.python.org/pep-0249/#execute.
*/
DataFlow::Node execute() { execute(DataFlow::TypeTracker::end()).flowsTo(result) }
/**
* A call to the `execute` method on a cursor or a connection.
*
* See https://peps.python.org/pep-0249/#execute
*
* Note: While `execute` method on a connection is not part of PEP249, if it is used, we
* recognize it as an alias for constructing a cursor and calling `execute` on it.
*/
private class ExecuteCall extends SqlExecution::Range, DataFlow::CallCfgNode {
ExecuteCall() { this.getFunction() = execute() }
override DataFlow::Node getSql() { result in [this.getArg(0), this.getArgByName("sql")] }
}
private DataFlow::TypeTrackingNode executemany(DataFlow::TypeTracker t) {
t.startInAttr("executemany") and
result in [Cursor::instance(), Connection::instance()]
or
exists(DataFlow::TypeTracker t2 | result = executemany(t2).track(t2, t))
}
private DataFlow::Node executemany() { executemany(DataFlow::TypeTracker::end()).flowsTo(result) }
/**
* A call to the `executemany` method on a cursor or a connection.
*
* See https://peps.python.org/pep-0249/#executemany
*
* Note: While `executemany` method on a connection is not part of PEP249, if it is used, we
* recognize it as an alias for constructing a cursor and calling `executemany` on it.
*/
private class ExecutemanyCall extends SqlExecution::Range, DataFlow::CallCfgNode {
ExecutemanyCall() { this.getFunction() = executemany() }
override DataFlow::Node getSql() { result in [this.getArg(0), this.getArgByName("sql")] }
}
}

View File

@@ -231,119 +231,4 @@ module Werkzeug {
override string getAsyncMethodName() { none() }
}
}
import WerkzeugOld
}
/**
* Old version that contains the deprecated modules.
*/
private module WerkzeugOld {
/**
* DEPRECATED: Use the modeling available directly in the `Werkzeug` module instead.
*
* Provides models for the `werkzeug` module.
*/
deprecated module werkzeug {
/**
* DEPRECATED: Use the modeling available directly in the `Werkzeug` module instead.
*
* Provides models for the `werkzeug.datastructures` module.
*/
deprecated module datastructures {
/**
* DEPRECATED: Use `Werkzeug::MultiDict` instead.
*
* Provides models for the `werkzeug.datastructures.MultiDict` class
*
* See https://werkzeug.palletsprojects.com/en/1.0.x/datastructures/#werkzeug.datastructures.MultiDict.
*/
deprecated module MultiDict {
/**
* DEPRECATED. Use `Werkzeug::MultiDict::InstanceSource` instead.
*
* A source of instances of `werkzeug.datastructures.MultiDict`, extend this class to model new instances.
*
* This can include instantiations of the class, return values from function
* calls, or a special parameter that will be set when functions are called by an external
* library.
*
* Use the predicate `MultiDict::instance()` to get references to instances of `werkzeug.datastructures.MultiDict`.
*/
abstract deprecated class InstanceSourceApiNode extends API::Node { }
/**
* DEPRECATED
*
* Gets a reference to the `getlist` method on an instance of `werkzeug.datastructures.MultiDict`.
*
* See https://werkzeug.palletsprojects.com/en/1.0.x/datastructures/#werkzeug.datastructures.Headers.getlist
*/
deprecated DataFlow::Node getlist() {
result = any(InstanceSourceApiNode a).getMember("getlist").getAValueReachableFromSource()
}
private class MultiDictAdditionalTaintStep extends TaintTracking::AdditionalTaintStep {
override predicate step(DataFlow::Node nodeFrom, DataFlow::Node nodeTo) {
// obj -> obj.getlist
exists(DataFlow::AttrRead read |
read.getObject() = nodeFrom and
nodeTo = read and
nodeTo = getlist()
)
or
// getlist -> getlist()
nodeFrom = getlist() and
nodeTo.(DataFlow::CallCfgNode).getFunction() = nodeFrom
}
}
}
/**
* DEPRECATED: Use `Werkzeug::FileStorage` instead.
*
* Provides models for the `werkzeug.datastructures.FileStorage` class
*
* See https://werkzeug.palletsprojects.com/en/1.0.x/datastructures/#werkzeug.datastructures.FileStorage.
*/
deprecated module FileStorage {
/**
* DEPRECATED. Use `Werkzeug::FileStorage::InstanceSource` instead.
*
* A source of instances of `werkzeug.datastructures.FileStorage`, extend this class to model new instances.
*
* This can include instantiations of the class, return values from function
* calls, or a special parameter that will be set when functions are called by an external
* library.
*
* Use the predicate `FileStorage::instance()` to get references to instances of `werkzeug.datastructures.FileStorage`.
*/
abstract deprecated class InstanceSourceApiNode extends API::Node { }
/** Gets a reference to an instance of `werkzeug.datastructures.FileStorage`. */
deprecated DataFlow::Node instance() {
result = any(InstanceSourceApiNode a).getAValueReachableFromSource()
}
private class FileStorageAdditionalTaintStep extends TaintTracking::AdditionalTaintStep {
override predicate step(DataFlow::Node nodeFrom, DataFlow::Node nodeTo) {
nodeFrom = instance() and
exists(DataFlow::AttrRead read | nodeTo = read |
read.getAttributeName() in [
// str
"filename", "name", "content_type", "mimetype",
// file-like
"stream",
// TODO: werkzeug.datastructures.Headers
"headers",
// dict[str, str]
"mimetype_params"
] and
read.getObject() = nodeFrom
)
}
}
}
}
}
}

View File

@@ -1,204 +0,0 @@
/**
* INTERNAL: Do not use.
*
* Provides internal implementation of PEP249. This currently resides in a different
* file than `python/ql/src/semmle/python/frameworks/PEP249.qll`, since we used to
* export everything without being encapsulated in a module, and shadowing rules means
* that we can't just add the module directly to that file :(
*
* So once we can remove those deprecated things (Start of July 2022), we can also move
* the core implementation into its' proper place.
*
* Provides classes modeling PEP 249.
* See https://www.python.org/dev/peps/pep-0249/.
*/
private import python
private import semmle.python.dataflow.new.DataFlow
private import semmle.python.dataflow.new.RemoteFlowSources
private import semmle.python.Concepts
private import semmle.python.ApiGraphs
/**
* Provides classes modeling database interfaces following PEP 249.
* See https://www.python.org/dev/peps/pep-0249/.
*/
module PEP249 {
/**
* An API graph node representing a module that implements PEP 249.
*/
abstract class PEP249ModuleApiNode extends API::Node {
/** Gets a string representation of this element. */
override string toString() { result = this.(API::Node).toString() }
}
/** Gets a reference to the `connect` function of a module that implements PEP 249. */
DataFlow::Node connect() {
result = any(PEP249ModuleApiNode a).getMember("connect").getAValueReachableFromSource()
}
/**
* Provides models for database connections (following PEP 249).
*
* See https://www.python.org/dev/peps/pep-0249/#connection-objects.
*/
module Connection {
/**
* A source of database connections (following PEP 249), extend this class to model new instances.
*
* This can include instantiations of the class, return values from function
* calls, or a special parameter that will be set when functions are called by external
* libraries.
*
* Use the predicate `Connection::instance()` to get references to database connections (following PEP 249).
*
* Extend this class if the module implementing PEP 249 offers more direct ways to obtain
* a connection than going through `connect`.
*/
abstract class InstanceSource extends DataFlow::Node { }
/** A call to the `connect` function of a module that implements PEP 249. */
private class ConnectCall extends InstanceSource, DataFlow::CallCfgNode {
ConnectCall() { this.getFunction() = connect() }
}
/** Gets a reference to a database connection (following PEP 249). */
private DataFlow::TypeTrackingNode instance(DataFlow::TypeTracker t) {
t.start() and
result instanceof InstanceSource
or
exists(DataFlow::TypeTracker t2 | result = instance(t2).track(t2, t))
}
/** Gets a reference to a database connection (following PEP 249). */
DataFlow::Node instance() { instance(DataFlow::TypeTracker::end()).flowsTo(result) }
}
/**
* Provides models for database cursors (following PEP 249).
*
* These are returned by the `cursor` method on a database connection.
* See https://www.python.org/dev/peps/pep-0249/#cursor.
*/
module Cursor {
/**
* A source of database cursors (following PEP 249), extend this class to model new instances.
*
* This can include instantiations of the class, return values from function
* calls, or a special parameter that will be set when functions are called by external
* libraries.
*
* Use the predicate `Cursor::instance()` to get references to database cursors (following PEP 249).
*
* Extend this class if the module implementing PEP 249 offers more direct ways to obtain
* a connection than going through `connect`.
*/
abstract class InstanceSource extends DataFlow::LocalSourceNode { }
/** Gets a reference to a database cursor. */
private DataFlow::TypeTrackingNode instance(DataFlow::TypeTracker t) {
t.start() and
result instanceof InstanceSource
or
exists(DataFlow::TypeTracker t2 | result = instance(t2).track(t2, t))
}
/** Gets a reference to a database cursor. */
DataFlow::Node instance() { instance(DataFlow::TypeTracker::end()).flowsTo(result) }
/** Gets a reference to the `cursor` method on a database connection. */
private DataFlow::TypeTrackingNode methodRef(DataFlow::TypeTracker t) {
t.startInAttr("cursor") and
result = Connection::instance()
or
exists(DataFlow::TypeTracker t2 | result = methodRef(t2).track(t2, t))
}
/** Gets a reference to the `cursor` method on a database connection. */
DataFlow::Node methodRef() { methodRef(DataFlow::TypeTracker::end()).flowsTo(result) }
/** A call to the `cursor` method on a database connection */
private class CursorCall extends InstanceSource, DataFlow::CallCfgNode {
CursorCall() { this.getFunction() = methodRef() }
}
/** Gets a reference to a result of calling the `cursor` method on a database connection. */
private DataFlow::TypeTrackingNode methodResult(DataFlow::TypeTracker t) {
t.start() and
result.asCfgNode().(CallNode).getFunction() = methodRef().asCfgNode()
or
exists(DataFlow::TypeTracker t2 | result = methodResult(t2).track(t2, t))
}
/**
* DEPRECATED: Use `Cursor::instance()` to get references to database cursors instead.
*
* Gets a reference to a result of calling the `cursor` method on a database connection.
*/
deprecated DataFlow::Node methodResult() {
methodResult(DataFlow::TypeTracker::end()).flowsTo(result)
}
}
/**
* Gets a reference to the `execute` method on a cursor (or on a connection).
*
* Note: while `execute` method on a connection is not part of PEP249, if it is used, we
* recognize it as an alias for constructing a cursor and calling `execute` on it.
*
* See https://peps.python.org/pep-0249/#execute.
*/
private DataFlow::TypeTrackingNode execute(DataFlow::TypeTracker t) {
t.startInAttr("execute") and
result in [Cursor::instance(), Connection::instance()]
or
exists(DataFlow::TypeTracker t2 | result = execute(t2).track(t2, t))
}
/**
* Gets a reference to the `execute` method on a cursor (or on a connection).
*
* Note: while `execute` method on a connection is not part of PEP249, if it is used, we
* recognize it as an alias for constructing a cursor and calling `execute` on it.
*
* See https://peps.python.org/pep-0249/#execute.
*/
DataFlow::Node execute() { execute(DataFlow::TypeTracker::end()).flowsTo(result) }
/**
* A call to the `execute` method on a cursor or a connection.
*
* See https://peps.python.org/pep-0249/#execute
*
* Note: While `execute` method on a connection is not part of PEP249, if it is used, we
* recognize it as an alias for constructing a cursor and calling `execute` on it.
*/
private class ExecuteCall extends SqlExecution::Range, DataFlow::CallCfgNode {
ExecuteCall() { this.getFunction() = execute() }
override DataFlow::Node getSql() { result in [this.getArg(0), this.getArgByName("sql")] }
}
private DataFlow::TypeTrackingNode executemany(DataFlow::TypeTracker t) {
t.startInAttr("executemany") and
result in [Cursor::instance(), Connection::instance()]
or
exists(DataFlow::TypeTracker t2 | result = executemany(t2).track(t2, t))
}
private DataFlow::Node executemany() { executemany(DataFlow::TypeTracker::end()).flowsTo(result) }
/**
* A call to the `executemany` method on a cursor or a connection.
*
* See https://peps.python.org/pep-0249/#executemany
*
* Note: While `executemany` method on a connection is not part of PEP249, if it is used, we
* recognize it as an alias for constructing a cursor and calling `executemany` on it.
*/
private class ExecutemanyCall extends SqlExecution::Range, DataFlow::CallCfgNode {
ExecutemanyCall() { this.getFunction() = executemany() }
override DataFlow::Node getSql() { result in [this.getArg(0), this.getArgByName("sql")] }
}
}

View File

@@ -2,155 +2,7 @@
* Provides predicates for reasoning about bad tag filter vulnerabilities.
*/
import regexp.RegexpMatching
/**
* Holds if the regexp `root` should be tested against `str`.
* Implements the `isRegexpMatchingCandidateSig` signature from `RegexpMatching`.
* `ignorePrefix` toggles whether the regular expression should be treated as accepting any prefix if it's unanchored.
* `testWithGroups` toggles whether it's tested which groups are filled by a given input string.
*/
private predicate isBadTagFilterCandidate(
RootTerm root, string str, boolean ignorePrefix, boolean testWithGroups
) {
// the regexp must mention "<" and ">" explicitly.
forall(string angleBracket | angleBracket = ["<", ">"] |
any(RegExpConstant term | term.getValue().matches("%" + angleBracket + "%")).getRootTerm() =
root
) and
ignorePrefix = true and
(
str = ["<!-- foo -->", "<!-- foo --!>", "<!- foo ->", "<foo>", "<script>"] and
testWithGroups = true
or
str =
[
"<!-- foo -->", "<!- foo ->", "<!-- foo --!>", "<!-- foo\n -->", "<script>foo</script>",
"<script \n>foo</script>", "<script >foo\n</script>", "<foo ></foo>", "<foo>",
"<foo src=\"foo\"></foo>", "<script>", "<script src=\"foo\"></script>",
"<script src='foo'></script>", "<SCRIPT>foo</SCRIPT>", "<script\tsrc=\"foo\"/>",
"<script\tsrc='foo'></script>", "<sCrIpT>foo</ScRiPt>", "<script src=\"foo\">foo</script >",
"<script src=\"foo\">foo</script foo=\"bar\">", "<script src=\"foo\">foo</script\t\n bar>"
] and
testWithGroups = false
)
}
/**
* A regexp that matches some string from the `isBadTagFilterCandidate` predicate.
*/
class HtmlMatchingRegExp extends RootTerm {
HtmlMatchingRegExp() { RegexpMatching<isBadTagFilterCandidate/4>::matches(this, _) }
/** Holds if this regexp matched `str`, where `str` is one of the string from `isBadTagFilterCandidate`. */
predicate matches(string str) { RegexpMatching<isBadTagFilterCandidate/4>::matches(this, str) }
/** Holds if this regexp fills capture group `g' when matching `str', where `str` is one of the string from `isBadTagFilterCandidate`. */
predicate fillsCaptureGroup(string str, int g) {
RegexpMatching<isBadTagFilterCandidate/4>::fillsCaptureGroup(this, str, g)
}
}
/** DEPRECATED: Alias for HtmlMatchingRegExp */
deprecated class HTMLMatchingRegExp = HtmlMatchingRegExp;
/**
* Holds if `regexp` matches some HTML tags, but misses some HTML tags that it should match.
*
* When adding a new case to this predicate, make sure the test string used in `matches(..)` calls are present in `HTMLMatchingRegExp::test` / `HTMLMatchingRegExp::testWithGroups`.
*/
predicate isBadRegexpFilter(HtmlMatchingRegExp regexp, string msg) {
// CVE-2021-33829 - matching both "<!-- foo -->" and "<!-- foo --!>", but in different capture groups
regexp.matches("<!-- foo -->") and
regexp.matches("<!-- foo --!>") and
exists(int a, int b | a != b |
regexp.fillsCaptureGroup("<!-- foo -->", a) and
// <!-- foo --> might be ambiguously parsed (matching both capture groups), and that is ok here.
regexp.fillsCaptureGroup("<!-- foo --!>", b) and
not regexp.fillsCaptureGroup("<!-- foo --!>", a) and
msg =
"Comments ending with --> are matched differently from comments ending with --!>. The first is matched with capture group "
+ a + " and comments ending with --!> are matched with capture group " +
strictconcat(int i | regexp.fillsCaptureGroup("<!-- foo --!>", i) | i.toString(), ", ") +
"."
)
or
// CVE-2020-17480 - matching "<!-- foo -->" and other tags, but not "<!-- foo --!>".
exists(int group, int other |
group != other and
regexp.fillsCaptureGroup("<!-- foo -->", group) and
regexp.fillsCaptureGroup("<foo>", other) and
not regexp.matches("<!-- foo --!>") and
not regexp.fillsCaptureGroup("<!-- foo -->", any(int i | i != group)) and
not regexp.fillsCaptureGroup("<!- foo ->", group) and
not regexp.fillsCaptureGroup("<foo>", group) and
not regexp.fillsCaptureGroup("<script>", group) and
msg =
"This regular expression only parses --> (capture group " + group +
") and not --!> as an HTML comment end tag."
)
or
regexp.matches("<!-- foo -->") and
not regexp.matches("<!-- foo\n -->") and
not regexp.matches("<!- foo ->") and
not regexp.matches("<foo>") and
not regexp.matches("<script>") and
msg = "This regular expression does not match comments containing newlines."
or
regexp.matches("<script>foo</script>") and
regexp.matches("<script src=\"foo\"></script>") and
not regexp.matches("<foo ></foo>") and
(
not regexp.matches("<script \n>foo</script>") and
msg = "This regular expression matches <script></script>, but not <script \\n></script>"
or
not regexp.matches("<script >foo\n</script>") and
msg = "This regular expression matches <script>...</script>, but not <script >...\\n</script>"
)
or
regexp.matches("<script>foo</script>") and
regexp.matches("<script src=\"foo\"></script>") and
not regexp.matches("<script src='foo'></script>") and
not regexp.matches("<foo>") and
msg = "This regular expression does not match script tags where the attribute uses single-quotes."
or
regexp.matches("<script>foo</script>") and
regexp.matches("<script src='foo'></script>") and
not regexp.matches("<script src=\"foo\"></script>") and
not regexp.matches("<foo>") and
msg = "This regular expression does not match script tags where the attribute uses double-quotes."
or
regexp.matches("<script>foo</script>") and
regexp.matches("<script src='foo'></script>") and
not regexp.matches("<script\tsrc='foo'></script>") and
not regexp.matches("<foo>") and
not regexp.matches("<foo src=\"foo\"></foo>") and
msg = "This regular expression does not match script tags where tabs are used between attributes."
or
regexp.matches("<script>foo</script>") and
not RegExpFlags::isIgnoreCase(regexp) and
not regexp.matches("<foo>") and
not regexp.matches("<foo ></foo>") and
(
not regexp.matches("<SCRIPT>foo</SCRIPT>") and
msg = "This regular expression does not match upper case <SCRIPT> tags."
or
not regexp.matches("<sCrIpT>foo</ScRiPt>") and
regexp.matches("<SCRIPT>foo</SCRIPT>") and
msg = "This regular expression does not match mixed case <sCrIpT> tags."
)
or
regexp.matches("<script src=\"foo\"></script>") and
not regexp.matches("<foo>") and
not regexp.matches("<foo ></foo>") and
(
not regexp.matches("<script src=\"foo\">foo</script >") and
msg = "This regular expression does not match script end tags like </script >."
or
not regexp.matches("<script src=\"foo\">foo</script foo=\"bar\">") and
msg = "This regular expression does not match script end tags like </script foo=\"bar\">."
or
not regexp.matches("<script src=\"foo\">foo</script\t\n bar>") and
msg = "This regular expression does not match script end tags like </script\\t\\n bar>."
)
}
private import semmle.python.RegexTreeView::RegexTreeView as TreeView
// BadTagFilterQuery should be used directly from the shared pack, and not from this file.
deprecated import codeql.regex.nfa.BadTagFilterQuery::Make<TreeView> as Dep
import Dep

View File

@@ -2,288 +2,7 @@
* Classes and predicates for working with suspicious character ranges.
*/
// We don't need the NFA utils, just the regexp tree.
// but the below is a nice shared library that exposes the API we need.
import regexp.NfaUtils
/**
* Gets a rank for `range` that is unique for ranges in the same file.
* Prioritizes ranges that match more characters.
*/
int rankRange(RegExpCharacterRange range) {
range =
rank[result](RegExpCharacterRange r, Location l, int low, int high |
r.getLocation() = l and
isRange(r, low, high)
|
r order by (high - low) desc, l.getStartLine(), l.getStartColumn()
)
}
/** Holds if `range` spans from the unicode code points `low` to `high` (both inclusive). */
predicate isRange(RegExpCharacterRange range, int low, int high) {
exists(string lowc, string highc |
range.isRange(lowc, highc) and
low.toUnicode() = lowc and
high.toUnicode() = highc
)
}
/** Holds if `char` is an alpha-numeric character. */
predicate isAlphanumeric(string char) {
// written like this to avoid having a bindingset for the predicate
char = [[48 .. 57], [65 .. 90], [97 .. 122]].toUnicode() // 0-9, A-Z, a-z
}
/**
* Holds if the given ranges are from the same character class
* and there exists at least one character matched by both ranges.
*/
predicate overlap(RegExpCharacterRange a, RegExpCharacterRange b) {
exists(RegExpCharacterClass clz |
a = clz.getAChild() and
b = clz.getAChild() and
a != b
|
exists(int alow, int ahigh, int blow, int bhigh |
isRange(a, alow, ahigh) and
isRange(b, blow, bhigh) and
alow <= bhigh and
blow <= ahigh
)
)
}
/**
* Holds if `range` overlaps with the char class `escape` from the same character class.
*/
predicate overlapsWithCharEscape(RegExpCharacterRange range, RegExpCharacterClassEscape escape) {
exists(RegExpCharacterClass clz, string low, string high |
range = clz.getAChild() and
escape = clz.getAChild() and
range.isRange(low, high)
|
escape.getValue() = "w" and
getInRange(low, high).regexpMatch("\\w")
or
escape.getValue() = "d" and
getInRange(low, high).regexpMatch("\\d")
or
escape.getValue() = "s" and
getInRange(low, high).regexpMatch("\\s")
)
}
/** Gets the unicode code point for a `char`. */
bindingset[char]
int toCodePoint(string char) { result.toUnicode() = char }
/** A character range that appears to be overly wide. */
class OverlyWideRange extends RegExpCharacterRange {
OverlyWideRange() {
exists(int low, int high, int numChars |
isRange(this, low, high) and
numChars = (1 + high - low) and
this.getRootTerm().isUsedAsRegExp() and
numChars >= 10
|
// across the Z-a range (which includes backticks)
toCodePoint("Z") >= low and
toCodePoint("a") <= high
or
// across the 9-A range (which includes e.g. ; and ?)
toCodePoint("9") >= low and
toCodePoint("A") <= high
or
// a non-alphanumeric char as part of the range boundaries
exists(int bound | bound = [low, high] | not isAlphanumeric(bound.toUnicode())) and
// while still being ascii
low < 128 and
high < 128
) and
// allowlist for known ranges
not this = allowedWideRanges()
}
/** Gets a string representation of a character class that matches the same chars as this range. */
string printEquivalent() { result = RangePrinter::printEquivalentCharClass(this) }
}
/** Gets a range that should not be reported as an overly wide range. */
RegExpCharacterRange allowedWideRanges() {
// ~ is the last printable ASCII character, it's used right in various wide ranges.
result.isRange(_, "~")
or
// the same with " " and "!". " " is the first printable character, and "!" is the first non-white-space printable character.
result.isRange([" ", "!"], _)
or
// the `[@-_]` range is intentional
result.isRange("@", "_")
or
// starting from the zero byte is a good indication that it's purposely matching a large range.
result.isRange(0.toUnicode(), _)
}
/** Gets a char between (and including) `low` and `high`. */
bindingset[low, high]
private string getInRange(string low, string high) {
result = [toCodePoint(low) .. toCodePoint(high)].toUnicode()
}
/** A module computing an equivalent character class for an overly wide range. */
module RangePrinter {
bindingset[char]
bindingset[result]
private string next(string char) {
exists(int prev, int next |
prev.toUnicode() = char and
next.toUnicode() = result and
next = prev + 1
)
}
/** Gets the points where the parts of the pretty printed range should be cut off. */
private string cutoffs() { result = ["A", "Z", "a", "z", "0", "9"] }
/** Gets the char to use in the low end of a range for a given `cut` */
private string lowCut(string cut) {
cut = ["A", "a", "0"] and
result = cut
or
cut = ["Z", "z", "9"] and
result = next(cut)
}
/** Gets the char to use in the high end of a range for a given `cut` */
private string highCut(string cut) {
cut = ["Z", "z", "9"] and
result = cut
or
cut = ["A", "a", "0"] and
next(result) = cut
}
/** Gets the cutoff char used for a given `part` of a range when pretty-printing it. */
private string cutoff(OverlyWideRange range, int part) {
exists(int low, int high | isRange(range, low, high) |
result =
rank[part + 1](string cut |
cut = cutoffs() and low < toCodePoint(cut) and toCodePoint(cut) < high
|
cut order by toCodePoint(cut)
)
)
}
/** Gets the number of parts we should print for a given `range`. */
private int parts(OverlyWideRange range) { result = 1 + count(cutoff(range, _)) }
/** Holds if the given part of a range should span from `low` to `high`. */
private predicate part(OverlyWideRange range, int part, string low, string high) {
// first part.
part = 0 and
(
range.isRange(low, high) and
parts(range) = 1
or
parts(range) >= 2 and
range.isRange(low, _) and
high = highCut(cutoff(range, part))
)
or
// middle
part >= 1 and
part < parts(range) - 1 and
low = lowCut(cutoff(range, part - 1)) and
high = highCut(cutoff(range, part))
or
// last.
part = parts(range) - 1 and
low = lowCut(cutoff(range, part - 1)) and
range.isRange(_, high)
}
/** Gets an escaped `char` for use in a character class. */
bindingset[char]
private string escape(string char) {
exists(string reg | reg = "(\\[|\\]|\\\\|-|/)" |
if char.regexpMatch(reg) then result = "\\" + char else result = char
)
}
/** Gets a part of the equivalent range. */
private string printEquivalentCharClass(OverlyWideRange range, int part) {
exists(string low, string high | part(range, part, low, high) |
if
isAlphanumeric(low) and
isAlphanumeric(high)
then result = low + "-" + high
else
result =
strictconcat(string char | char = getInRange(low, high) | escape(char) order by char)
)
}
/** Gets the entire pretty printed equivalent range. */
string printEquivalentCharClass(OverlyWideRange range) {
result =
strictconcat(string r, int part |
r = "[" and part = -1 and exists(range)
or
r = printEquivalentCharClass(range, part)
or
r = "]" and part = parts(range)
|
r order by part
)
}
}
/** Gets a char range that is overly large because of `reason`. */
RegExpCharacterRange getABadRange(string reason, int priority) {
result instanceof OverlyWideRange and
priority = 0 and
exists(string equiv | equiv = result.(OverlyWideRange).printEquivalent() |
if equiv.length() <= 50
then reason = "is equivalent to " + equiv
else reason = "is equivalent to " + equiv.substring(0, 50) + "..."
)
or
priority = 1 and
exists(RegExpCharacterRange other |
reason = "overlaps with " + other + " in the same character class" and
rankRange(result) < rankRange(other) and
overlap(result, other)
)
or
priority = 2 and
exists(RegExpCharacterClassEscape escape |
reason = "overlaps with " + escape + " in the same character class" and
overlapsWithCharEscape(result, escape)
)
or
reason = "is empty" and
priority = 3 and
exists(int low, int high |
isRange(result, low, high) and
low > high
)
}
/** Holds if `range` matches suspiciously many characters. */
predicate problem(RegExpCharacterRange range, string reason) {
reason =
strictconcat(string m, int priority |
range = getABadRange(m, priority)
|
m, ", and " order by priority desc
) and
// specifying a range using an escape is usually OK.
not range.getAChild() instanceof RegExpEscape and
// Unicode escapes in strings are interpreted before it turns into a regexp,
// so e.g. [\u0001-\uFFFF] will just turn up as a range between two constants.
// We therefore exclude these ranges.
range.getRootTerm().getParent() instanceof RegExpLiteral and
// is used as regexp (mostly for JS where regular expressions are parsed eagerly)
range.getRootTerm().isUsedAsRegExp()
}
private import semmle.python.RegexTreeView::RegexTreeView as TreeView
// OverlyLargeRangeQuery should be used directly from the shared pack, and not from this file.
deprecated import codeql.regex.OverlyLargeRangeQuery::Make<TreeView> as Dep
import Dep

View File

@@ -11,7 +11,7 @@ private import semmle.python.dataflow.new.TaintTracking
private import semmle.python.Concepts
private import semmle.python.dataflow.new.RemoteFlowSources
private import semmle.python.dataflow.new.BarrierGuards
private import semmle.python.RegexTreeView
private import semmle.python.RegexTreeView::RegexTreeView as TreeView
private import semmle.python.ApiGraphs
/**
@@ -20,6 +20,9 @@ private import semmle.python.ApiGraphs
* vulnerabilities, as well as extension points for adding your own.
*/
module PolynomialReDoS {
private import TreeView
import codeql.regex.nfa.SuperlinearBackTracking::Make<TreeView>
/**
* A data flow source for "polynomial regular expression denial of service (ReDoS)" vulnerabilities.
*/

View File

@@ -103,7 +103,7 @@ module HeuristicNames {
*/
string notSensitiveRegexp() {
result =
"(?is).*([^\\w$.-]|redact|censor|obfuscate|hash|md5|sha|random|((?<!un)(en))?(crypt|code)|certain|concert|secretar|accountant|accountab).*"
"(?is).*([^\\w$.-]|redact|censor|obfuscate|hash|md5|sha|random|((?<!un)(en))?(crypt|(?<!pass)code)|certain|concert|secretar|accountant|accountab).*"
}
/**

View File

@@ -62,284 +62,7 @@
* a suffix `x` (possible empty) that is most likely __not__ accepted.
*/
import NfaUtils
/**
* Holds if state `s` might be inside a backtracking repetition.
*/
pragma[noinline]
private predicate stateInsideBacktracking(State s) {
s.getRepr().getParent*() instanceof MaybeBacktrackingRepetition
}
/**
* A infinitely repeating quantifier that might backtrack.
*/
private class MaybeBacktrackingRepetition extends InfiniteRepetitionQuantifier {
MaybeBacktrackingRepetition() {
exists(RegExpTerm child |
child instanceof RegExpAlt or
child instanceof RegExpQuantifier
|
child.getParent+() = this
)
}
}
/**
* A state in the product automaton.
*/
private newtype TStatePair =
/**
* We lazily only construct those states that we are actually
* going to need: `(q, q)` for every fork state `q`, and any
* pair of states that can be reached from a pair that we have
* already constructed. To cut down on the number of states,
* we only represent states `(q1, q2)` where `q1` is lexicographically
* no bigger than `q2`.
*
* States are only constructed if both states in the pair are
* inside a repetition that might backtrack.
*/
MkStatePair(State q1, State q2) {
isFork(q1, _, _, _, _) and q2 = q1
or
(step(_, _, _, q1, q2) or step(_, _, _, q2, q1)) and
rankState(q1) <= rankState(q2)
}
/**
* Gets a unique number for a `state`.
* Is used to create an ordering of states, where states with the same `toString()` will be ordered differently.
*/
private int rankState(State state) {
state =
rank[result](State s, Location l |
stateInsideBacktracking(s) and
l = s.getRepr().getLocation()
|
s order by l.getStartLine(), l.getStartColumn(), s.toString()
)
}
/**
* A state in the product automaton.
*/
private class StatePair extends TStatePair {
State q1;
State q2;
StatePair() { this = MkStatePair(q1, q2) }
/** Gets a textual representation of this element. */
string toString() { result = "(" + q1 + ", " + q2 + ")" }
/** Gets the first component of the state pair. */
State getLeft() { result = q1 }
/** Gets the second component of the state pair. */
State getRight() { result = q2 }
}
/**
* Holds for `(fork, fork)` state pairs when `isFork(fork, _, _, _, _)` holds.
*
* Used in `statePairDistToFork`
*/
private predicate isStatePairFork(StatePair p) {
exists(State fork | p = MkStatePair(fork, fork) and isFork(fork, _, _, _, _))
}
/**
* Holds if there are transitions from the components of `q` to the corresponding
* components of `r`.
*
* Used in `statePairDistToFork`
*/
private predicate reverseStep(StatePair r, StatePair q) { step(q, _, _, r) }
/**
* Gets the minimum length of a path from `q` to `r` in the
* product automaton.
*/
private int statePairDistToFork(StatePair q, StatePair r) =
shortestDistances(isStatePairFork/1, reverseStep/2)(r, q, result)
/**
* Holds if there are transitions from `q` to `r1` and from `q` to `r2`
* labelled with `s1` and `s2`, respectively, where `s1` and `s2` do not
* trivially have an empty intersection.
*
* This predicate only holds for states associated with regular expressions
* that have at least one repetition quantifier in them (otherwise the
* expression cannot be vulnerable to ReDoS attacks anyway).
*/
pragma[noopt]
private predicate isFork(State q, InputSymbol s1, InputSymbol s2, State r1, State r2) {
stateInsideBacktracking(q) and
exists(State q1, State q2 |
q1 = epsilonSucc*(q) and
delta(q1, s1, r1) and
q2 = epsilonSucc*(q) and
delta(q2, s2, r2) and
// Use pragma[noopt] to prevent intersect(s1,s2) from being the starting point of the join.
// From (s1,s2) it would find a huge number of intermediate state pairs (q1,q2) originating from different literals,
// and discover at the end that no `q` can reach both `q1` and `q2` by epsilon transitions.
exists(intersect(s1, s2))
|
s1 != s2
or
r1 != r2
or
r1 = r2 and q1 != q2
or
// If q can reach itself by epsilon transitions, then there are two distinct paths to the q1/q2 state:
// one that uses the loop and one that doesn't. The engine will separately attempt to match with each path,
// despite ending in the same state. The "fork" thus arises from the choice of whether to use the loop or not.
// To avoid every state in the loop becoming a fork state,
// we arbitrarily pick the InfiniteRepetitionQuantifier state as the canonical fork state for the loop
// (every epsilon-loop must contain such a state).
//
// We additionally require that the there exists another InfiniteRepetitionQuantifier `mid` on the path from `q` to itself.
// This is done to avoid flagging regular expressions such as `/(a?)*b/` - that only has polynomial runtime, and is detected by `js/polynomial-redos`.
// The below code is therefore a heuristic, that only flags regular expressions such as `/(a*)*b/`,
// and does not flag regular expressions such as `/(a?b?)c/`, but the latter pattern is not used frequently.
r1 = r2 and
q1 = q2 and
epsilonSucc+(q) = q and
exists(RegExpTerm term | term = q.getRepr() | term instanceof InfiniteRepetitionQuantifier) and
// One of the mid states is an infinite quantifier itself
exists(State mid, RegExpTerm term |
mid = epsilonSucc+(q) and
term = mid.getRepr() and
term instanceof InfiniteRepetitionQuantifier and
q = epsilonSucc+(mid) and
not mid = q
)
) and
stateInsideBacktracking(r1) and
stateInsideBacktracking(r2)
}
/**
* Gets the state pair `(q1, q2)` or `(q2, q1)`; note that only
* one or the other is defined.
*/
private StatePair mkStatePair(State q1, State q2) {
result = MkStatePair(q1, q2) or result = MkStatePair(q2, q1)
}
/**
* Holds if there are transitions from the components of `q` to the corresponding
* components of `r` labelled with `s1` and `s2`, respectively.
*/
private predicate step(StatePair q, InputSymbol s1, InputSymbol s2, StatePair r) {
exists(State r1, State r2 | step(q, s1, s2, r1, r2) and r = mkStatePair(r1, r2))
}
/**
* Holds if there are transitions from the components of `q` to `r1` and `r2`
* labelled with `s1` and `s2`, respectively.
*
* We only consider transitions where the resulting states `(r1, r2)` are both
* inside a repetition that might backtrack.
*/
pragma[noopt]
private predicate step(StatePair q, InputSymbol s1, InputSymbol s2, State r1, State r2) {
exists(State q1, State q2 | q.getLeft() = q1 and q.getRight() = q2 |
deltaClosed(q1, s1, r1) and
deltaClosed(q2, s2, r2) and
// use noopt to force the join on `intersect` to happen last.
exists(intersect(s1, s2))
) and
stateInsideBacktracking(r1) and
stateInsideBacktracking(r2)
}
private newtype TTrace =
Nil() or
Step(InputSymbol s1, InputSymbol s2, TTrace t) { isReachableFromFork(_, _, s1, s2, t, _) }
/**
* A list of pairs of input symbols that describe a path in the product automaton
* starting from some fork state.
*/
private class Trace extends TTrace {
/** Gets a textual representation of this element. */
string toString() {
this = Nil() and result = "Nil()"
or
exists(InputSymbol s1, InputSymbol s2, Trace t | this = Step(s1, s2, t) |
result = "Step(" + s1 + ", " + s2 + ", " + t + ")"
)
}
}
/**
* Holds if `r` is reachable from `(fork, fork)` under input `w`, and there is
* a path from `r` back to `(fork, fork)` with `rem` steps.
*/
private predicate isReachableFromFork(State fork, StatePair r, Trace w, int rem) {
exists(InputSymbol s1, InputSymbol s2, Trace v |
isReachableFromFork(fork, r, s1, s2, v, rem) and
w = Step(s1, s2, v)
)
}
private predicate isReachableFromFork(
State fork, StatePair r, InputSymbol s1, InputSymbol s2, Trace v, int rem
) {
// base case
exists(State q1, State q2 |
isFork(fork, s1, s2, q1, q2) and
r = MkStatePair(q1, q2) and
v = Nil() and
rem = statePairDistToFork(r, MkStatePair(fork, fork))
)
or
// recursive case
exists(StatePair p |
isReachableFromFork(fork, p, v, rem + 1) and
step(p, s1, s2, r) and
rem = statePairDistToFork(r, MkStatePair(fork, fork))
)
}
/**
* Gets a state in the product automaton from which `(fork, fork)` is
* reachable in zero or more epsilon transitions.
*/
private StatePair getAForkPair(State fork) {
isFork(fork, _, _, _, _) and
result = MkStatePair(epsilonPred*(fork), epsilonPred*(fork))
}
/** An implementation of a chain containing chars for use by `Concretizer`. */
private module CharTreeImpl implements CharTree {
class CharNode = Trace;
CharNode getPrev(CharNode t) { t = Step(_, _, result) }
/** Holds if `n` is a trace that is used by `concretize` in `isPumpable`. */
predicate isARelevantEnd(CharNode n) {
exists(State f | isReachableFromFork(f, getAForkPair(f), n, _))
}
string getChar(CharNode t) {
exists(InputSymbol s1, InputSymbol s2 | t = Step(s1, s2, _) | result = intersect(s1, s2))
}
}
/**
* Holds if `fork` is a pumpable fork with word `w`.
*/
private predicate isPumpable(State fork, string w) {
exists(StatePair q, Trace t |
isReachableFromFork(fork, q, t, _) and
q = getAForkPair(fork) and
w = Concretizer<CharTreeImpl>::concretize(t)
)
}
/** Holds if `state` has exponential ReDoS */
predicate hasReDoSResult = ReDoSPruning<isPumpable/2>::hasReDoSResult/4;
private import semmle.python.RegexTreeView::RegexTreeView as TreeView
// ExponentialBackTracking should be used directly from the shared pack, and not from this file.
deprecated private import codeql.regex.nfa.ExponentialBackTracking::Make<TreeView> as Dep
import Dep

File diff suppressed because it is too large Load Diff

View File

@@ -1,75 +0,0 @@
/**
* Provides Python-specific definitions for use in the NfaUtils module.
*/
import python
import semmle.python.RegexTreeView
/**
* Holds if `term` is an escape class representing e.g. `\d`.
* `clazz` is which character class it represents, e.g. "d" for `\d`.
*/
predicate isEscapeClass(RegExpTerm term, string clazz) {
exists(RegExpCharacterClassEscape escape | term = escape | escape.getValue() = clazz)
}
/**
* Holds if `term` is a possessive quantifier.
* As python's regexes do not support possessive quantifiers, this never holds, but is used by the shared library.
*/
predicate isPossessive(RegExpQuantifier term) { none() }
/**
* Holds if the regex that `term` is part of is used in a way that ignores any leading prefix of the input it's matched against.
* Not yet implemented for Python.
*/
predicate matchesAnyPrefix(RegExpTerm term) { any() }
/**
* Holds if the regex that `term` is part of is used in a way that ignores any trailing suffix of the input it's matched against.
* Not yet implemented for Python.
*/
predicate matchesAnySuffix(RegExpTerm term) { any() }
/**
* Holds if the regular expression should not be considered.
*
* We make the pragmatic performance optimization to ignore regular expressions in files
* that does not belong to the project code (such as installed dependencies).
*/
predicate isExcluded(RegExpParent parent) {
not exists(parent.getRegex().getLocation().getFile().getRelativePath())
or
// Regexes with many occurrences of ".*" may cause the polynomial ReDoS computation to explode, so
// we explicitly exclude these.
count(int i | exists(parent.getRegex().getText().regexpFind("\\.\\*", i, _)) | i) > 10
}
/**
* A module containing predicates for determining which flags a regular expression have.
*/
module RegExpFlags {
/**
* Holds if `root` has the `i` flag for case-insensitive matching.
*/
predicate isIgnoreCase(RegExpTerm root) {
root.isRootTerm() and
root.getLiteral().isIgnoreCase()
}
/**
* Gets the flags for `root`, or the empty string if `root` has no flags.
*/
string getFlags(RegExpTerm root) {
root.isRootTerm() and
result = root.getLiteral().getFlags()
}
/**
* Holds if `root` has the `s` flag for multi-line matching.
*/
predicate isDotAll(RegExpTerm root) {
root.isRootTerm() and
root.getLiteral().isDotAll()
}
}

View File

@@ -3,155 +3,7 @@
* and for testing which capture groups are filled when a particular regexp matches a string.
*/
import NfaUtils
/** A root term */
class RootTerm extends RegExpTerm {
RootTerm() { this.isRootTerm() }
}
/**
* Holds if it should be tested whether `root` matches `str`.
*
* If `ignorePrefix` is true, then a regexp without a start anchor will be treated as if it had a start anchor.
* E.g. a regular expression `/foo$/` will match any string that ends with "foo",
* but if `ignorePrefix` is true, it will only match "foo".
*
* If `testWithGroups` is true, then the `RegexpMatching::fillsCaptureGroup` predicate can be used to determine which capture
* groups are filled by a string.
*/
signature predicate isRegexpMatchingCandidateSig(
RootTerm root, string str, boolean ignorePrefix, boolean testWithGroups
);
/**
* A module for determining if a regexp matches a given string,
* and reasoning about which capture groups are filled by a given string.
*
* The module parameter `isCandidate` determines which strings should be tested,
* and the results can be read from the `matches` and `fillsCaptureGroup` predicates.
*/
module RegexpMatching<isRegexpMatchingCandidateSig/4 isCandidate> {
/**
* Gets a state the regular expression `reg` can be in after matching the `i`th char in `str`.
* The regular expression is modeled as a non-determistic finite automaton,
* the regular expression can therefore be in multiple states after matching a character.
*
* It's a forward search to all possible states, and there is thus no guarantee that the state is on a path to an accepting state.
*/
private State getAState(RootTerm reg, int i, string str, boolean ignorePrefix) {
// start state, the -1 position before any chars have been matched
i = -1 and
isCandidate(reg, str, ignorePrefix, _) and
result.getRepr().getRootTerm() = reg and
isStartState(result)
or
// recursive case
result = getAStateAfterMatching(reg, _, str, i, _, ignorePrefix)
}
/**
* Gets the next state after the `prev` state from `reg`.
* `prev` is the state after matching `fromIndex` chars in `str`,
* and the result is the state after matching `toIndex` chars in `str`.
*
* This predicate is used as a step relation in the forwards search (`getAState`),
* and also as a step relation in the later backwards search (`getAStateThatReachesAccept`).
*/
private State getAStateAfterMatching(
RootTerm reg, State prev, string str, int toIndex, int fromIndex, boolean ignorePrefix
) {
// the basic recursive case - outlined into a noopt helper to make performance work out.
result = getAStateAfterMatchingAux(reg, prev, str, toIndex, fromIndex, ignorePrefix)
or
// we can skip past word boundaries if the next char is a non-word char.
fromIndex = toIndex and
prev.getRepr() instanceof RegExpWordBoundary and
prev = getAState(reg, toIndex, str, ignorePrefix) and
after(prev.getRepr()) = result and
str.charAt(toIndex + 1).regexpMatch("\\W") // \W matches any non-word char.
}
pragma[noopt]
private State getAStateAfterMatchingAux(
RootTerm reg, State prev, string str, int toIndex, int fromIndex, boolean ignorePrefix
) {
prev = getAState(reg, fromIndex, str, ignorePrefix) and
fromIndex = toIndex - 1 and
exists(string char | char = str.charAt(toIndex) | specializedDeltaClosed(prev, char, result)) and
not discardedPrefixStep(prev, result, ignorePrefix)
}
/** Holds if a step from `prev` to `next` should be discarded when the `ignorePrefix` flag is set. */
private predicate discardedPrefixStep(State prev, State next, boolean ignorePrefix) {
prev = mkMatch(any(RegExpRoot r)) and
ignorePrefix = true and
next = prev
}
// The `deltaClosed` relation specialized to the chars that exists in strings tested by a `MatchedRegExp`.
private predicate specializedDeltaClosed(State prev, string char, State next) {
deltaClosed(prev, specializedGetAnInputSymbolMatching(char), next)
}
// The `getAnInputSymbolMatching` relation specialized to the chars that exists in strings tested by a `MatchedRegExp`.
pragma[noinline]
private InputSymbol specializedGetAnInputSymbolMatching(string char) {
exists(string s, RootTerm r | isCandidate(r, s, _, _) | char = s.charAt(_)) and
result = getAnInputSymbolMatching(char)
}
/**
* Gets the `i`th state on a path to the accepting state when `reg` matches `str`.
* Starts with an accepting state as found by `getAState` and searches backwards
* to the start state through the reachable states (as found by `getAState`).
*
* This predicate satisfies the invariant that the result state can be reached with `i` steps from a start state,
* and an accepting state can be found after (`str.length() - 1 - i`) steps from the result.
* The result state is therefore always on a valid path where `reg` accepts `str`.
*
* This predicate is only used to find which capture groups a regular expression has filled,
* and thus the search is only performed for the strings in the `testWithGroups(..)` predicate.
*/
private State getAStateThatReachesAccept(RootTerm reg, int i, string str, boolean ignorePrefix) {
// base case, reaches an accepting state from the last state in `getAState(..)`
isCandidate(reg, str, ignorePrefix, true) and
i = str.length() - 1 and
result = getAState(reg, i, str, ignorePrefix) and
epsilonSucc*(result) = Accept(_)
or
// recursive case. `next` is the next state to be matched after matching `prev`.
// this predicate is doing a backwards search, so `prev` is the result we are looking for.
exists(State next, State prev, int fromIndex, int toIndex |
next = getAStateThatReachesAccept(reg, toIndex, str, ignorePrefix) and
next = getAStateAfterMatching(reg, prev, str, toIndex, fromIndex, ignorePrefix) and
i = fromIndex and
result = prev
)
}
/** Gets the capture group number that `term` belongs to. */
private int group(RegExpTerm term) {
exists(RegExpGroup grp | grp.getNumber() = result | term.getParent*() = grp)
}
/**
* Holds if `reg` matches `str`, where `str` is in the `isCandidate` predicate.
*/
predicate matches(RootTerm reg, string str) {
exists(State state | state = getAState(reg, str.length() - 1, str, _) |
epsilonSucc*(state) = Accept(_)
)
}
/**
* Holds if matching `str` against `reg` may fill capture group number `g`.
* Only holds if `str` is in the `testWithGroups` predicate.
*/
predicate fillsCaptureGroup(RootTerm reg, string str, int g) {
exists(State s |
s = getAStateThatReachesAccept(reg, _, str, _) and
g = group(s.getRepr())
)
}
}
private import semmle.python.RegexTreeView::RegexTreeView as TreeView
// RegexpMatching should be used directly from the shared pack, and not from this file.
deprecated import codeql.regex.nfa.RegexpMatching::Make<TreeView> as Dep
import Dep

View File

@@ -1,11 +1,4 @@
/**
* Provides classes for working with regular expressions that can
* perform backtracking in superlinear time.
*/
import NfaUtils
/*
* This module implements the analysis described in the paper:
* Valentin Wustholz, Oswaldo Olivo, Marijn J. H. Heule, and Isil Dillig:
* Static Detection of DoS Vulnerabilities in
@@ -42,377 +35,7 @@ import NfaUtils
* It also doesn't find all transitions in the product automaton, which can cause false negatives.
*/
/**
* Gets any root (start) state of a regular expression.
*/
private State getRootState() { result = mkMatch(any(RegExpRoot r)) }
private newtype TStateTuple =
MkStateTuple(State q1, State q2, State q3) {
// starts at (pivot, pivot, succ)
isStartLoops(q1, q3) and q1 = q2
or
step(_, _, _, _, q1, q2, q3) and FeasibleTuple::isFeasibleTuple(q1, q2, q3)
}
/**
* A state in the product automaton.
* The product automaton contains 3-tuples of states.
*
* We lazily only construct those states that we are actually
* going to need.
* Either a start state `(pivot, pivot, succ)`, or a state
* where there exists a transition from an already existing state.
*
* The exponential variant of this query (`js/redos`) uses an optimization
* trick where `q1 <= q2`. This trick cannot be used here as the order
* of the elements matter.
*/
class StateTuple extends TStateTuple {
State q1;
State q2;
State q3;
StateTuple() { this = MkStateTuple(q1, q2, q3) }
/**
* Gest a string representation of this tuple.
*/
string toString() { result = "(" + q1 + ", " + q2 + ", " + q3 + ")" }
/**
* Holds if this tuple is `(r1, r2, r3)`.
*/
pragma[noinline]
predicate isTuple(State r1, State r2, State r3) { r1 = q1 and r2 = q2 and r3 = q3 }
}
/**
* A module for determining feasible tuples for the product automaton.
*
* The implementation is split into many predicates for performance reasons.
*/
private module FeasibleTuple {
/**
* Holds if the tuple `(r1, r2, r3)` might be on path from a start-state to an end-state in the product automaton.
*/
pragma[inline]
predicate isFeasibleTuple(State r1, State r2, State r3) {
// The first element is either inside a repetition (or the start state itself)
isRepetitionOrStart(r1) and
// The last element is inside a repetition
stateInsideRepetition(r3) and
// The states are reachable in the NFA in the order r1 -> r2 -> r3
delta+(r1) = r2 and
delta+(r2) = r3 and
// The first element can reach a beginning (the "pivot" state in a `(pivot, succ)` pair).
canReachABeginning(r1) and
// The last element can reach a target (the "succ" state in a `(pivot, succ)` pair).
canReachATarget(r3)
}
/**
* Holds if `s` is either inside a repetition, or is the start state (which is a repetition).
*/
pragma[noinline]
private predicate isRepetitionOrStart(State s) { stateInsideRepetition(s) or s = getRootState() }
/**
* Holds if state `s` might be inside a backtracking repetition.
*/
pragma[noinline]
private predicate stateInsideRepetition(State s) {
s.getRepr().getParent*() instanceof InfiniteRepetitionQuantifier
}
/**
* Holds if there exists a path in the NFA from `s` to a "pivot" state
* (from a `(pivot, succ)` pair that starts the search).
*/
pragma[noinline]
private predicate canReachABeginning(State s) {
delta+(s) = any(State pivot | isStartLoops(pivot, _))
}
/**
* Holds if there exists a path in the NFA from `s` to a "succ" state
* (from a `(pivot, succ)` pair that starts the search).
*/
pragma[noinline]
private predicate canReachATarget(State s) { delta+(s) = any(State succ | isStartLoops(_, succ)) }
}
/**
* Holds if `pivot` and `succ` are a pair of loops that could be the beginning of a quadratic blowup.
*
* There is a slight implementation difference compared to the paper: this predicate requires that `pivot != succ`.
* The case where `pivot = succ` causes exponential backtracking and is handled by the `js/redos` query.
*/
predicate isStartLoops(State pivot, State succ) {
pivot != succ and
succ.getRepr() instanceof InfiniteRepetitionQuantifier and
delta+(pivot) = succ and
(
pivot.getRepr() instanceof InfiniteRepetitionQuantifier
or
pivot = mkMatch(any(RegExpRoot root))
)
}
/**
* Gets a state for which there exists a transition in the NFA from `s'.
*/
State delta(State s) { delta(s, _, result) }
/**
* Holds if there are transitions from the components of `q` to the corresponding
* components of `r` labelled with `s1`, `s2`, and `s3`, respectively.
*/
pragma[noinline]
predicate step(StateTuple q, InputSymbol s1, InputSymbol s2, InputSymbol s3, StateTuple r) {
exists(State r1, State r2, State r3 |
step(q, s1, s2, s3, r1, r2, r3) and r = MkStateTuple(r1, r2, r3)
)
}
/**
* Holds if there are transitions from the components of `q` to `r1`, `r2`, and `r3
* labelled with `s1`, `s2`, and `s3`, respectively.
*/
pragma[noopt]
predicate step(
StateTuple q, InputSymbol s1, InputSymbol s2, InputSymbol s3, State r1, State r2, State r3
) {
exists(State q1, State q2, State q3 | q.isTuple(q1, q2, q3) |
deltaClosed(q1, s1, r1) and
deltaClosed(q2, s2, r2) and
deltaClosed(q3, s3, r3) and
// use noopt to force the join on `getAThreewayIntersect` to happen last.
exists(getAThreewayIntersect(s1, s2, s3))
)
}
/**
* Gets a char that is matched by all the edges `s1`, `s2`, and `s3`.
*
* The result is not complete, and might miss some combination of edges that share some character.
*/
pragma[noinline]
string getAThreewayIntersect(InputSymbol s1, InputSymbol s2, InputSymbol s3) {
result = minAndMaxIntersect(s1, s2) and result = [intersect(s2, s3), intersect(s1, s3)]
or
result = minAndMaxIntersect(s1, s3) and result = [intersect(s2, s3), intersect(s1, s2)]
or
result = minAndMaxIntersect(s2, s3) and result = [intersect(s1, s2), intersect(s1, s3)]
}
/**
* Gets the minimum and maximum characters that intersect between `a` and `b`.
* This predicate is used to limit the size of `getAThreewayIntersect`.
*/
pragma[noinline]
string minAndMaxIntersect(InputSymbol a, InputSymbol b) {
result = [min(intersect(a, b)), max(intersect(a, b))]
}
private newtype TTrace =
Nil() or
Step(InputSymbol s1, InputSymbol s2, InputSymbol s3, TTrace t) {
isReachableFromStartTuple(_, _, t, s1, s2, s3, _, _)
}
/**
* A list of tuples of input symbols that describe a path in the product automaton
* starting from some start state.
*/
class Trace extends TTrace {
/**
* Gets a string representation of this Trace that can be used for debug purposes.
*/
string toString() {
this = Nil() and result = "Nil()"
or
exists(InputSymbol s1, InputSymbol s2, InputSymbol s3, Trace t | this = Step(s1, s2, s3, t) |
result = "Step(" + s1 + ", " + s2 + ", " + s3 + ", " + t + ")"
)
}
}
/**
* Holds if there exists a transition from `r` to `q` in the product automaton.
* Notice that the arguments are flipped, and thus the direction is backwards.
*/
pragma[noinline]
predicate tupleDeltaBackwards(StateTuple q, StateTuple r) { step(r, _, _, _, q) }
/**
* Holds if `tuple` is an end state in our search.
* That means there exists a pair of loops `(pivot, succ)` such that `tuple = (pivot, succ, succ)`.
*/
predicate isEndTuple(StateTuple tuple) { tuple = getAnEndTuple(_, _) }
/**
* Gets the minimum length of a path from `r` to some an end state `end`.
*
* The implementation searches backwards from the end-tuple.
* This approach was chosen because it is way more efficient if the first predicate given to `shortestDistances` is small.
* The `end` argument must always be an end state.
*/
int distBackFromEnd(StateTuple r, StateTuple end) =
shortestDistances(isEndTuple/1, tupleDeltaBackwards/2)(end, r, result)
/**
* Holds if there exists a pair of repetitions `(pivot, succ)` in the regular expression such that:
* `tuple` is reachable from `(pivot, pivot, succ)` in the product automaton,
* and there is a distance of `dist` from `tuple` to the nearest end-tuple `(pivot, succ, succ)`,
* and a path from a start-state to `tuple` follows the transitions in `trace`.
*/
private predicate isReachableFromStartTuple(
State pivot, State succ, StateTuple tuple, Trace trace, int dist
) {
exists(InputSymbol s1, InputSymbol s2, InputSymbol s3, Trace v |
isReachableFromStartTuple(pivot, succ, v, s1, s2, s3, tuple, dist) and
trace = Step(s1, s2, s3, v)
)
}
private predicate isReachableFromStartTuple(
State pivot, State succ, Trace trace, InputSymbol s1, InputSymbol s2, InputSymbol s3,
StateTuple tuple, int dist
) {
// base case.
exists(State q1, State q2, State q3 |
isStartLoops(pivot, succ) and
step(MkStateTuple(pivot, pivot, succ), s1, s2, s3, tuple) and
tuple = MkStateTuple(q1, q2, q3) and
trace = Nil() and
dist = distBackFromEnd(tuple, MkStateTuple(pivot, succ, succ))
)
or
// recursive case
exists(StateTuple p |
isReachableFromStartTuple(pivot, succ, p, trace, dist + 1) and
dist = distBackFromEnd(tuple, MkStateTuple(pivot, succ, succ)) and
step(p, s1, s2, s3, tuple)
)
}
/**
* Gets the tuple `(pivot, succ, succ)` from the product automaton.
*/
StateTuple getAnEndTuple(State pivot, State succ) {
isStartLoops(pivot, succ) and
result = MkStateTuple(pivot, succ, succ)
}
/** An implementation of a chain containing chars for use by `Concretizer`. */
private module CharTreeImpl implements CharTree {
class CharNode = Trace;
CharNode getPrev(CharNode t) { t = Step(_, _, _, result) }
/** Holds if `n` is used in `isPumpable`. */
predicate isARelevantEnd(CharNode n) {
exists(State pivot, State succ |
isReachableFromStartTuple(pivot, succ, getAnEndTuple(pivot, succ), n, _)
)
}
string getChar(CharNode t) {
exists(InputSymbol s1, InputSymbol s2, InputSymbol s3 | t = Step(s1, s2, s3, _) |
result = getAThreewayIntersect(s1, s2, s3)
)
}
}
/**
* Holds if matching repetitions of `pump` can:
* 1) Transition from `pivot` back to `pivot`.
* 2) Transition from `pivot` to `succ`.
* 3) Transition from `succ` to `succ`.
*
* From theorem 3 in the paper linked in the top of this file we can therefore conclude that
* the regular expression has polynomial backtracking - if a rejecting suffix exists.
*
* This predicate is used by `SuperLinearReDoSConfiguration`, and the final results are
* available in the `hasReDoSResult` predicate.
*/
predicate isPumpable(State pivot, State succ, string pump) {
exists(StateTuple q, Trace t |
isReachableFromStartTuple(pivot, succ, q, t, _) and
q = getAnEndTuple(pivot, succ) and
pump = Concretizer<CharTreeImpl>::concretize(t)
)
}
/**
* Holds if states starting in `state` can have polynomial backtracking with the string `pump`.
*/
predicate isReDoSCandidate(State state, string pump) { isPumpable(_, state, pump) }
/**
* Holds if repetitions of `pump` at `t` will cause polynomial backtracking.
*/
predicate polynomialReDoS(RegExpTerm t, string pump, string prefixMsg, RegExpTerm prev) {
exists(State s, State pivot |
ReDoSPruning<isReDoSCandidate/2>::hasReDoSResult(t, pump, s, prefixMsg) and
isPumpable(pivot, s, _) and
prev = pivot.getRepr()
)
}
/**
* Gets a message for why `term` can cause polynomial backtracking.
*/
string getReasonString(RegExpTerm term, string pump, string prefixMsg, RegExpTerm prev) {
polynomialReDoS(term, pump, prefixMsg, prev) and
result =
"Strings " + prefixMsg + "with many repetitions of '" + pump +
"' can start matching anywhere after the start of the preceeding " + prev
}
/**
* A term that may cause a regular expression engine to perform a
* polynomial number of match attempts, relative to the input length.
*/
class PolynomialBackTrackingTerm extends InfiniteRepetitionQuantifier {
string reason;
string pump;
string prefixMsg;
RegExpTerm prev;
PolynomialBackTrackingTerm() {
reason = getReasonString(this, pump, prefixMsg, prev) and
// there might be many reasons for this term to have polynomial backtracking - we pick the shortest one.
reason = min(string msg | msg = getReasonString(this, _, _, _) | msg order by msg.length(), msg)
}
/**
* Holds if all non-empty successors to the polynomial backtracking term matches the end of the line.
*/
predicate isAtEndLine() {
forall(RegExpTerm succ | this.getSuccessor+() = succ and not matchesEpsilon(succ) |
succ instanceof RegExpDollar
)
}
/**
* Gets the string that should be repeated to cause this regular expression to perform polynomially.
*/
string getPumpString() { result = pump }
/**
* Gets a message for which prefix a matching string must start with for this term to cause polynomial backtracking.
*/
string getPrefixMessage() { result = prefixMsg }
/**
* Gets a predecessor to `this`, which also loops on the pump string, and thereby causes polynomial backtracking.
*/
RegExpTerm getPreviousLoop() { result = prev }
/**
* Gets the reason for the number of match attempts.
*/
string getReason() { result = reason }
}
private import semmle.python.RegexTreeView::RegexTreeView as TreeView
// SuperlinearBackTracking should be used directly from the shared pack, and not from this file.
deprecated private import codeql.regex.nfa.SuperlinearBackTracking::Make<TreeView> as Dep
import Dep

View File

@@ -1,3 +1,11 @@
## 0.5.4
No user-facing changes.
## 0.5.3
No user-facing changes.
## 0.5.2
### Minor Analysis Improvements

View File

@@ -12,8 +12,9 @@
* external/cwe/cwe-020
*/
import semmle.python.security.OverlyLargeRangeQuery
private import semmle.python.RegexTreeView::RegexTreeView as TreeView
import codeql.regex.OverlyLargeRangeQuery::Make<TreeView>
from RegExpCharacterRange range, string reason
from TreeView::RegExpCharacterRange range, string reason
where problem(range, reason)
select range, "Suspicious character range that " + reason + "."

View File

@@ -14,7 +14,8 @@
* external/cwe/cwe-186
*/
import semmle.python.security.BadTagFilterQuery
private import semmle.python.RegexTreeView::RegexTreeView as TreeView
import codeql.regex.nfa.BadTagFilterQuery::Make<TreeView>
from HtmlMatchingRegExp regexp, string msg
where msg = min(string m | isBadRegexpFilter(regexp, m) | m order by m.length(), m) // there might be multiple, we arbitrarily pick the shortest one

View File

@@ -14,7 +14,6 @@
*/
import python
import semmle.python.security.regexp.SuperlinearBackTracking
import semmle.python.security.dataflow.PolynomialReDoSQuery
import DataFlow::PathGraph

View File

@@ -14,10 +14,10 @@
* external/cwe/cwe-400
*/
import python
import semmle.python.security.regexp.ExponentialBackTracking
private import semmle.python.RegexTreeView::RegexTreeView as TreeView
import codeql.regex.nfa.ExponentialBackTracking::Make<TreeView>
from RegExpTerm t, string pump, State s, string prefixMsg
from TreeView::RegExpTerm t, string pump, State s, string prefixMsg
where
hasReDoSResult(t, pump, s, prefixMsg) and
// exclude verbose mode regexes for now

View File

@@ -0,0 +1,3 @@
## 0.5.3
No user-facing changes.

View File

@@ -0,0 +1,3 @@
## 0.5.4
No user-facing changes.

View File

@@ -1,2 +1,2 @@
---
lastReleaseVersion: 0.5.2
lastReleaseVersion: 0.5.4

View File

@@ -1,11 +1,11 @@
name: codeql/python-queries
version: 0.5.3-dev
version: 0.5.5-dev
groups:
- python
- queries
dependencies:
codeql/python-all: "*"
codeql/suite-helpers: "*"
codeql/python-all: ${workspace}
codeql/suite-helpers: ${workspace}
suites: codeql-suites
extractor: python
defaultSuiteFile: codeql-suites/python-code-scanning.qls

View File

@@ -137,6 +137,7 @@ abstract class InlineExpectationsTest extends string {
final predicate hasFailureMessage(FailureLocatable element, string message) {
exists(ActualResult actualResult |
actualResult.getTest() = this and
actualResult.getTag() = this.getARelevantTag() and
element = actualResult and
(
exists(FalseNegativeExpectation falseNegative |
@@ -150,9 +151,18 @@ abstract class InlineExpectationsTest extends string {
)
)
or
exists(ActualResult actualResult |
actualResult.getTest() = this and
not actualResult.getTag() = this.getARelevantTag() and
element = actualResult and
message =
"Tag mismatch: Actual result with tag '" + actualResult.getTag() +
"' that is not part of getARelevantTag()"
)
or
exists(ValidExpectation expectation |
not exists(ActualResult actualResult | expectation.matchesActualResult(actualResult)) and
expectation.getTag() = getARelevantTag() and
expectation.getTag() = this.getARelevantTag() and
element = expectation and
(
expectation instanceof GoodExpectation and

View File

@@ -6,6 +6,8 @@ uniqueNodeToString
missingToString
parameterCallable
localFlowIsLocal
readStepIsLocal
storeStepIsLocal
compatibleTypesReflexive
unreachableNodeCCtx
localCallNodes

View File

@@ -6,6 +6,8 @@ uniqueNodeToString
missingToString
parameterCallable
localFlowIsLocal
readStepIsLocal
storeStepIsLocal
compatibleTypesReflexive
unreachableNodeCCtx
localCallNodes

View File

@@ -6,6 +6,8 @@ uniqueNodeToString
missingToString
parameterCallable
localFlowIsLocal
readStepIsLocal
storeStepIsLocal
compatibleTypesReflexive
unreachableNodeCCtx
localCallNodes

View File

@@ -6,6 +6,8 @@ uniqueNodeToString
missingToString
parameterCallable
localFlowIsLocal
readStepIsLocal
storeStepIsLocal
compatibleTypesReflexive
unreachableNodeCCtx
localCallNodes

View File

@@ -6,6 +6,8 @@ uniqueNodeToString
missingToString
parameterCallable
localFlowIsLocal
readStepIsLocal
storeStepIsLocal
compatibleTypesReflexive
unreachableNodeCCtx
localCallNodes

View File

@@ -6,6 +6,8 @@ uniqueNodeToString
missingToString
parameterCallable
localFlowIsLocal
readStepIsLocal
storeStepIsLocal
compatibleTypesReflexive
unreachableNodeCCtx
localCallNodes

View File

@@ -6,6 +6,8 @@ uniqueNodeToString
missingToString
parameterCallable
localFlowIsLocal
readStepIsLocal
storeStepIsLocal
compatibleTypesReflexive
unreachableNodeCCtx
localCallNodes

View File

@@ -6,6 +6,8 @@ uniqueNodeToString
missingToString
parameterCallable
localFlowIsLocal
readStepIsLocal
storeStepIsLocal
compatibleTypesReflexive
unreachableNodeCCtx
localCallNodes

View File

@@ -6,6 +6,8 @@ uniqueNodeToString
missingToString
parameterCallable
localFlowIsLocal
readStepIsLocal
storeStepIsLocal
compatibleTypesReflexive
unreachableNodeCCtx
localCallNodes

View File

@@ -6,6 +6,8 @@ uniqueNodeToString
missingToString
parameterCallable
localFlowIsLocal
readStepIsLocal
storeStepIsLocal
compatibleTypesReflexive
unreachableNodeCCtx
localCallNodes

View File

@@ -6,6 +6,8 @@ uniqueNodeToString
missingToString
parameterCallable
localFlowIsLocal
readStepIsLocal
storeStepIsLocal
compatibleTypesReflexive
unreachableNodeCCtx
localCallNodes

View File

@@ -6,6 +6,8 @@ uniqueNodeToString
missingToString
parameterCallable
localFlowIsLocal
readStepIsLocal
storeStepIsLocal
compatibleTypesReflexive
unreachableNodeCCtx
localCallNodes

View File

@@ -6,6 +6,8 @@ uniqueNodeToString
missingToString
parameterCallable
localFlowIsLocal
readStepIsLocal
storeStepIsLocal
compatibleTypesReflexive
unreachableNodeCCtx
localCallNodes

View File

@@ -6,6 +6,8 @@ uniqueNodeToString
missingToString
parameterCallable
localFlowIsLocal
readStepIsLocal
storeStepIsLocal
compatibleTypesReflexive
unreachableNodeCCtx
localCallNodes

View File

@@ -6,6 +6,8 @@ uniqueNodeToString
missingToString
parameterCallable
localFlowIsLocal
readStepIsLocal
storeStepIsLocal
compatibleTypesReflexive
unreachableNodeCCtx
localCallNodes

View File

@@ -6,6 +6,8 @@ uniqueNodeToString
missingToString
parameterCallable
localFlowIsLocal
readStepIsLocal
storeStepIsLocal
compatibleTypesReflexive
unreachableNodeCCtx
localCallNodes

View File

@@ -6,6 +6,8 @@ uniqueNodeToString
missingToString
parameterCallable
localFlowIsLocal
readStepIsLocal
storeStepIsLocal
compatibleTypesReflexive
unreachableNodeCCtx
localCallNodes

View File

@@ -6,8 +6,8 @@ test_direct_import()
def test_alias_problem():
from .alias_problem import foo # $ MISSING: tracked
print(foo) # $ MISSING: tracked
from .alias_problem import foo # $ tracked
print(foo) # $ tracked
test_alias_problem()
@@ -34,8 +34,8 @@ test_alias_only_direct()
def test_problem_absolute_import():
from pkg.problem_absolute_import import foo # $ MISSING: tracked
print(foo) # $ MISSING: tracked
from pkg.problem_absolute_import import foo # $ tracked
print(foo) # $ tracked
test_problem_absolute_import()

View File

@@ -6,6 +6,8 @@ uniqueNodeToString
missingToString
parameterCallable
localFlowIsLocal
readStepIsLocal
storeStepIsLocal
compatibleTypesReflexive
unreachableNodeCCtx
localCallNodes

View File

@@ -0,0 +1,6 @@
from trace import *
enter(__file__)
clashing_attr = "clashing_attr"
exit(__file__)

View File

@@ -0,0 +1,4 @@
from trace import *
enter(__file__)
exit(__file__)

View File

@@ -0,0 +1,4 @@
from trace import *
enter(__file__)
exit(__file__)

View File

@@ -0,0 +1,6 @@
from trace import *
enter(__file__)
bar_attr = "bar_attr"
exit(__file__)

View File

@@ -0,0 +1,14 @@
from trace import *
enter(__file__)
# A simple attribute. Used in main.py
foo_attr = "foo_attr"
# A private attribute. Accessible from main.py despite this.
__private_foo_attr = "__private_foo_attr"
# A reexport of bar under a new name. Used in main.py
import bar as bar_reexported #$ imports=bar as=bar_reexported
check("bar_reexported.bar_attr", bar_reexported.bar_attr, "bar_attr", globals()) #$ prints=bar_attr
exit(__file__)

View File

@@ -0,0 +1,122 @@
import python
import semmle.python.dataflow.new.DataFlow
import semmle.python.ApiGraphs
import TestUtilities.InlineExpectationsTest
import semmle.python.dataflow.new.internal.ImportResolution
/** A string that appears on the right hand side of an assignment. */
private class SourceString extends DataFlow::Node {
string contents;
SourceString() {
this.asExpr().(StrConst).getText() = contents and
this.asExpr().getParent() instanceof Assign
}
string getContents() { result = contents }
}
/** An argument that is checked using the `check` function. */
private class CheckArgument extends DataFlow::Node {
CheckArgument() { this = API::moduleImport("trace").getMember("check").getACall().getArg(1) }
}
/** A data-flow node that is a reference to a module. */
private class ModuleRef extends DataFlow::Node {
Module mod;
ModuleRef() {
this = ImportResolution::getModuleReference(mod) and
not mod.getName() in ["__future__", "trace"]
}
string getName() { result = mod.getName() }
}
/**
* A data-flow node that is guarded by a version check. Only supports checks of the form `if
*sys.version_info[0] == ...` where the right hand side is either `2` or `3`.
*/
private class VersionGuardedNode extends DataFlow::Node {
int version;
VersionGuardedNode() {
version in [2, 3] and
exists(If parent, CompareNode c | parent.getBody().contains(this.asExpr()) |
c.operands(API::moduleImport("sys")
.getMember("version_info")
.getASubscript()
.asSource()
.asCfgNode(), any(Eq eq),
any(IntegerLiteral lit | lit.getValue() = version).getAFlowNode())
)
}
int getVersion() { result = version }
}
private class ImportConfiguration extends DataFlow::Configuration {
ImportConfiguration() { this = "ImportConfiguration" }
override predicate isSource(DataFlow::Node source) { source instanceof SourceString }
override predicate isSink(DataFlow::Node sink) {
sink = API::moduleImport("trace").getMember("check").getACall().getArg(1)
}
}
class ResolutionTest extends InlineExpectationsTest {
ResolutionTest() { this = "ResolutionTest" }
override string getARelevantTag() { result = "prints" }
override predicate hasActualResult(Location location, string element, string tag, string value) {
(
exists(DataFlow::PathNode source, DataFlow::PathNode sink, ImportConfiguration config |
config.hasFlowPath(source, sink) and
not sink.getNode() instanceof VersionGuardedNode and
tag = "prints" and
location = sink.getNode().getLocation() and
value = source.getNode().(SourceString).getContents() and
element = sink.getNode().toString()
)
or
exists(ModuleRef ref |
not ref instanceof VersionGuardedNode and
ref instanceof CheckArgument and
tag = "prints" and
location = ref.getLocation() and
value = "\"<module " + ref.getName() + ">\"" and
element = ref.toString()
)
)
}
}
class ResolutionTest3 extends InlineExpectationsTest {
ResolutionTest3() { this = "ResolutionTest3" }
override string getARelevantTag() { result = "prints3" and major_version() = 3 }
override predicate hasActualResult(Location location, string element, string tag, string value) {
(
exists(DataFlow::PathNode source, DataFlow::PathNode sink, ImportConfiguration config |
config.hasFlowPath(source, sink) and
sink.getNode().(VersionGuardedNode).getVersion() = 3 and
tag = "prints3" and
location = sink.getNode().getLocation() and
value = source.getNode().(SourceString).getContents() and
element = sink.getNode().toString()
)
or
exists(ModuleRef ref |
ref.(VersionGuardedNode).getVersion() = 3 and
ref instanceof CheckArgument and
tag = "prints3" and
location = ref.getLocation() and
value = "\"<module " + ref.getName() + ">\"" and
element = ref.toString()
)
)
}
}

View File

@@ -0,0 +1,49 @@
import python
import TestUtilities.InlineExpectationsTest
import semmle.python.dataflow.new.DataFlow
import semmle.python.dataflow.new.internal.ImportResolution
private class ImmediateModuleRef extends DataFlow::Node {
Module mod;
string alias;
ImmediateModuleRef() {
this = ImportResolution::getImmediateModuleReference(mod) and
not mod.getName() in ["__future__", "trace"] and
this.asExpr() = any(Alias a | alias = a.getAsname().(Name).getId()).getAsname()
}
Module getModule() { result = mod }
string getAsname() { result = alias }
}
class ImportTest extends InlineExpectationsTest {
ImportTest() { this = "ImportTest" }
override string getARelevantTag() { result = "imports" }
override predicate hasActualResult(Location location, string element, string tag, string value) {
exists(ImmediateModuleRef ref |
tag = "imports" and
location = ref.getLocation() and
value = ref.getModule().getName() and
element = ref.toString()
)
}
}
class AliasTest extends InlineExpectationsTest {
AliasTest() { this = "AliasTest" }
override string getARelevantTag() { result = "as" }
override predicate hasActualResult(Location location, string element, string tag, string value) {
exists(ImmediateModuleRef ref |
tag = "as" and
location = ref.getLocation() and
value = ref.getAsname() and
element = ref.toString()
)
}
}

View File

@@ -0,0 +1,94 @@
#! /usr/bin/env python3
"""
A slightly complicated test setup. I wanted to both make sure I captured
the semantics of Python and also the fact that the kinds of global flow
we expect to see are indeed present.
The code is executable, and prints out both when the execution reaches
certain files, and also what values are assigned to the various
attributes that are referenced throughout the program. These values are
validated in the test as well.
My original version used introspection to avoid referencing attributes
directly (thus enabling better error diagnostics), but unfortunately
that made it so that the model couldn't follow what was going on.
The current setup is a bit clunky (and Python's scoping rules makes it
especially so -- cf. the explicit calls to `globals` and `locals`), but
I think it does the job okay.
"""
from __future__ import print_function
import sys
from trace import *
enter(__file__)
# A simple import. Binds foo to the foo module
import foo #$ imports=foo as=foo
check("foo.foo_attr", foo.foo_attr, "foo_attr", globals()) #$ prints=foo_attr
# Private attributes are still accessible.
check("foo.__private_foo_attr", foo.__private_foo_attr, "__private_foo_attr", globals()) #$ prints=__private_foo_attr
# An aliased import, binding foo to foo_alias
import foo as foo_alias #$ imports=foo as=foo_alias
check("foo_alias.foo_attr", foo_alias.foo_attr, "foo_attr", globals()) #$ prints=foo_attr
# A reference to a reexported module
check("foo.bar_reexported", foo.bar_reexported, "<module bar>", globals()) #$ prints="<module bar>"
check("foo.bar_reexported.bar_attr", foo.bar_reexported.bar_attr, "bar_attr", globals()) #$ prints=bar_attr
# A simple "import from" statement.
from bar import bar_attr
check("bar_attr", bar_attr, "bar_attr", globals()) #$ prints=bar_attr
# Importing an attribute from a subpackage of a package.
from package.subpackage import subpackage_attr
check("subpackage_attr", subpackage_attr, "subpackage_attr", globals()) #$ prints=subpackage_attr
# Importing a package attribute under an alias.
from package import package_attr as package_attr_alias
check("package_attr_alias", package_attr_alias, "package_attr", globals()) #$ prints=package_attr
# Importing a subpackage under an alias.
from package import subpackage as aliased_subpackage #$ imports=package.subpackage.__init__ as=aliased_subpackage
check("aliased_subpackage.subpackage_attr", aliased_subpackage.subpackage_attr, "subpackage_attr", globals()) #$ prints=subpackage_attr
def local_import():
# Same as above, but in a local scope.
import package.subpackage as local_subpackage #$ imports=package.subpackage.__init__ as=local_subpackage
check("local_subpackage.subpackage_attr", local_subpackage.subpackage_attr, "subpackage_attr", locals()) #$ prints=subpackage_attr
local_import()
# Importing a subpacking using `import` and binding it to a name.
import package.subpackage as aliased_subpackage #$ imports=package.subpackage.__init__ as=aliased_subpackage
check("aliased_subpackage.subpackage_attr", aliased_subpackage.subpackage_attr, "subpackage_attr", globals()) #$ prints=subpackage_attr
# Importing without binding instead binds the top level name.
import package.subpackage #$ imports=package.__init__ as=package
check("package.package_attr", package.package_attr, "package_attr", globals()) #$ prints=package_attr
# Deep imports
import package.subpackage.submodule #$ imports=package.__init__ as=package
check("package.subpackage.submodule.submodule_attr", package.subpackage.submodule.submodule_attr, "submodule_attr", globals()) #$ prints=submodule_attr
if sys.version_info[0] == 3:
# Importing from a namespace module.
from namespace_package.namespace_module import namespace_module_attr
check("namespace_module_attr", namespace_module_attr, "namespace_module_attr", globals()) #$ prints3=namespace_module_attr
from attr_clash import clashing_attr, non_clashing_submodule #$ imports=attr_clash.clashing_attr as=clashing_attr imports=attr_clash.non_clashing_submodule as=non_clashing_submodule
check("clashing_attr", clashing_attr, "clashing_attr", globals()) #$ prints=clashing_attr SPURIOUS: prints="<module attr_clash.clashing_attr>"
check("non_clashing_submodule", non_clashing_submodule, "<module attr_clash.non_clashing_submodule>", globals()) #$ prints="<module attr_clash.non_clashing_submodule>"
exit(__file__)
print()
if status() == 0:
print("PASS")
else:
print("FAIL")

View File

@@ -0,0 +1,6 @@
from trace import *
enter(__file__)
namespace_module_attr = "namespace_module_attr"
exit(__file__)

View File

@@ -0,0 +1,7 @@
from trace import *
enter(__file__)
attr_used_in_subpackage = "attr_used_in_subpackage"
package_attr = "package_attr"
exit(__file__)

View File

@@ -0,0 +1,14 @@
from trace import *
enter(__file__)
subpackage_attr = "subpackage_attr"
# Importing an attribute from the parent package.
from .. import attr_used_in_subpackage as imported_attr
check("imported_attr", imported_attr, "attr_used_in_subpackage", globals()) #$ prints=attr_used_in_subpackage
# Importing an irrelevant attribute from a sibling module binds the name to the module.
from .submodule import irrelevant_attr
check("submodule.submodule_attr", submodule.submodule_attr, "submodule_attr", globals()) #$ prints=submodule_attr
exit(__file__)

View File

@@ -0,0 +1,7 @@
from trace import *
enter(__file__)
submodule_attr = "submodule_attr"
irrelevant_attr = "irrelevant_attr"
exit(__file__)

View File

@@ -0,0 +1,52 @@
from __future__ import print_function
_indent_level = 0
_print = print
def print(*args, **kwargs):
_print(" " * _indent_level, end="")
_print(*args, **kwargs)
def enter(file_name):
global _indent_level
print("Entering {}".format(file_name))
_indent_level += 1
def exit(file_name):
global _indent_level
_indent_level -= 1
print("Leaving {}".format(file_name))
_status = 0
def status():
return _status
def check(attr_path, actual_value, expected_value, bindings):
global _status
parts = attr_path.split(".")
base, parts = parts[0], parts[1:]
if base not in bindings:
print("Error: {} not in bindings".format(base))
_status = 1
return
val = bindings[base]
for part in parts:
if not hasattr(val, part):
print("Error: Unknown attribute {}".format(part))
_status = 1
return
val = getattr(val, part)
if val != actual_value:
print("Error: Value at path {} and actual value are out of sync! {} != {}".format(attr_path, val, actual_value))
_status = 1
if str(val).startswith("<module"):
val = "<module " + val.__name__ + ">"
if val != expected_value:
print("Error: Expected {} to be {}, got {}".format(attr_path, expected_value, val))
_status = 1
return
print("OK: {} = {}".format(attr_path, val))
__all__ = ["enter", "exit", "check", "status"]

View File

@@ -6,6 +6,8 @@ uniqueNodeToString
missingToString
parameterCallable
localFlowIsLocal
readStepIsLocal
storeStepIsLocal
compatibleTypesReflexive
unreachableNodeCCtx
localCallNodes

View File

@@ -0,0 +1,2 @@
| test.py:1:1:1:3 | foo | Tag mismatch: Actual result with tag 'foo' that is not part of getARelevantTag() |
| test.py:4:1:4:3 | foo | Tag mismatch: Actual result with tag 'foo' that is not part of getARelevantTag() |

View File

@@ -0,0 +1,20 @@
// test to illustrate what happens if you forget to put in the
// right values for `getARelevantTag`. We want to alert on this,
// so it gets fixed!
import python
import TestUtilities.InlineExpectationsTest
class MissingRelevantTag extends InlineExpectationsTest {
MissingRelevantTag() { this = "MissingRelevantTag" }
override string getARelevantTag() { none() }
override predicate hasActualResult(Location location, string element, string tag, string value) {
exists(Name name | name.getId() = "foo" |
location = name.getLocation() and
element = name.toString() and
value = "val" and
tag = "foo"
)
}
}

View File

@@ -0,0 +1,7 @@
foo # $ foo=val
# with wrong value
foo # $ foo=bad-value
# there is a typo here, so this result is actually missing!
fooo # $ foo=val

View File

@@ -0,0 +1,41 @@
import python
import semmle.python.essa.SsaCompute
import TestUtilities.InlineExpectationsTest
class UseTest extends InlineExpectationsTest {
UseTest() { this = "UseTest" }
override string getARelevantTag() { result in ["use-use", "def-use", "def"] }
override predicate hasActualResult(Location location, string element, string tag, string value) {
exists(location.getFile().getRelativePath()) and
exists(string name | name in ["x", "y"] |
exists(NameNode nodeTo, Location prevLoc |
(
exists(NameNode nodeFrom | AdjacentUses::adjacentUseUse(nodeFrom, nodeTo) |
prevLoc = nodeFrom.getLocation() and
name = nodeFrom.getId() and
tag = "use-use"
)
or
exists(EssaVariable var | AdjacentUses::firstUse(var, nodeTo) |
prevLoc = var.getLocation() and
name = var.getName() and
tag = "def-use"
)
) and
value = name + ":" + prevLoc.getStartLine() and
location = nodeTo.getLocation() and
element = nodeTo.toString()
)
or
exists(EssaVariable var | AdjacentUses::firstUse(var, _) |
value = var.getName() and
location = var.getLocation() and
element = var.getName() and
name = var.getName() and
tag = "def"
)
)
}
}

View File

@@ -0,0 +1,20 @@
class X(object):
def foo(self, *args):
print("X.foo", args)
def func(cond=True):
x = X() # $ def=x
print(x) # $ def-use=x:7
y = x # $ def=y use-use=x:9
print(y) # $ def-use=y:11
x.foo() # $ use-use=x:11
x.foo(1 if cond else 0) # $ use-use=x:14
x.foo() # $ MISSING: use-use=x:16
print(x) # $ use-use=x:17
func()

View File

@@ -6,6 +6,56 @@ uniqueNodeToString
missingToString
parameterCallable
localFlowIsLocal
readStepIsLocal
storeStepIsLocal
| testapp/orm_form_test.py:6:1:6:28 | [orm-model] Class MyModel | testapp/tests.py:83:16:83:36 | ControlFlowNode for Attribute() | Store step does not preserve enclosing callable. |
| testapp/orm_form_test.py:6:1:6:28 | [orm-model] Class MyModel | testapp/tests.py:84:16:84:43 | ControlFlowNode for Attribute() | Store step does not preserve enclosing callable. |
| testapp/orm_form_test.py:6:1:6:28 | [orm-model] Class MyModel | testapp/tests.py:85:16:85:36 | ControlFlowNode for Attribute() | Store step does not preserve enclosing callable. |
| testapp/orm_inheritance.py:45:15:45:20 | ControlFlowNode for SOURCE | testapp/orm_inheritance.py:29:1:29:25 | [orm-model] Class Book | Store step does not preserve enclosing callable. |
| testapp/orm_inheritance.py:76:15:76:20 | ControlFlowNode for SOURCE | testapp/orm_inheritance.py:29:1:29:25 | [orm-model] Class Book | Store step does not preserve enclosing callable. |
| testapp/orm_inheritance.py:76:15:76:20 | ControlFlowNode for SOURCE | testapp/orm_inheritance.py:33:1:33:25 | [orm-model] Class PhysicalBook | Store step does not preserve enclosing callable. |
| testapp/orm_inheritance.py:77:27:77:32 | ControlFlowNode for SOURCE | testapp/orm_inheritance.py:33:1:33:25 | [orm-model] Class PhysicalBook | Store step does not preserve enclosing callable. |
| testapp/orm_inheritance.py:78:35:78:40 | ControlFlowNode for SOURCE | testapp/orm_inheritance.py:33:1:33:25 | [orm-model] Class PhysicalBook | Store step does not preserve enclosing callable. |
| testapp/orm_inheritance.py:93:15:93:26 | ControlFlowNode for Str | testapp/orm_inheritance.py:29:1:29:25 | [orm-model] Class Book | Store step does not preserve enclosing callable. |
| testapp/orm_inheritance.py:93:15:93:26 | ControlFlowNode for Str | testapp/orm_inheritance.py:38:1:38:18 | [orm-model] Class EBook | Store step does not preserve enclosing callable. |
| testapp/orm_inheritance.py:94:23:94:28 | ControlFlowNode for Str | testapp/orm_inheritance.py:38:1:38:18 | [orm-model] Class EBook | Store step does not preserve enclosing callable. |
| testapp/orm_inheritance.py:95:35:95:40 | ControlFlowNode for Str | testapp/orm_inheritance.py:38:1:38:18 | [orm-model] Class EBook | Store step does not preserve enclosing callable. |
| testapp/orm_inheritance.py:133:15:133:20 | ControlFlowNode for SOURCE | testapp/orm_inheritance.py:117:1:117:33 | [orm-model] Class PolyBook | Store step does not preserve enclosing callable. |
| testapp/orm_inheritance.py:167:15:167:20 | ControlFlowNode for SOURCE | testapp/orm_inheritance.py:117:1:117:33 | [orm-model] Class PolyBook | Store step does not preserve enclosing callable. |
| testapp/orm_inheritance.py:167:15:167:20 | ControlFlowNode for SOURCE | testapp/orm_inheritance.py:121:1:121:33 | [orm-model] Class PolyPhysicalBook | Store step does not preserve enclosing callable. |
| testapp/orm_inheritance.py:168:27:168:32 | ControlFlowNode for SOURCE | testapp/orm_inheritance.py:121:1:121:33 | [orm-model] Class PolyPhysicalBook | Store step does not preserve enclosing callable. |
| testapp/orm_inheritance.py:169:35:169:40 | ControlFlowNode for SOURCE | testapp/orm_inheritance.py:121:1:121:33 | [orm-model] Class PolyPhysicalBook | Store step does not preserve enclosing callable. |
| testapp/orm_inheritance.py:183:15:183:26 | ControlFlowNode for Str | testapp/orm_inheritance.py:117:1:117:33 | [orm-model] Class PolyBook | Store step does not preserve enclosing callable. |
| testapp/orm_inheritance.py:183:15:183:26 | ControlFlowNode for Str | testapp/orm_inheritance.py:126:1:126:26 | [orm-model] Class PolyEBook | Store step does not preserve enclosing callable. |
| testapp/orm_inheritance.py:184:23:184:28 | ControlFlowNode for Str | testapp/orm_inheritance.py:126:1:126:26 | [orm-model] Class PolyEBook | Store step does not preserve enclosing callable. |
| testapp/orm_inheritance.py:185:35:185:40 | ControlFlowNode for Str | testapp/orm_inheritance.py:126:1:126:26 | [orm-model] Class PolyEBook | Store step does not preserve enclosing callable. |
| testapp/orm_security_tests.py:15:1:15:27 | [orm-model] Class Person | testapp/orm_security_tests.py:42:23:42:42 | ControlFlowNode for Attribute() | Store step does not preserve enclosing callable. |
| testapp/orm_tests.py:115:41:115:46 | ControlFlowNode for SOURCE | testapp/orm_tests.py:110:1:110:30 | [orm-model] Class TestSave5 | Store step does not preserve enclosing callable. |
| testapp/orm_tests.py:131:86:131:91 | ControlFlowNode for SOURCE | testapp/orm_tests.py:126:1:126:30 | [orm-model] Class TestSave6 | Store step does not preserve enclosing callable. |
| testapp/orm_tests.py:149:89:149:94 | ControlFlowNode for SOURCE | testapp/orm_tests.py:144:1:144:30 | [orm-model] Class TestSave7 | Store step does not preserve enclosing callable. |
| testapp/orm_tests.py:161:1:161:30 | [orm-model] Class TestSave8 | testapp/orm_tests.py:168:22:168:44 | ControlFlowNode for Attribute() | Store step does not preserve enclosing callable. |
| testapp/orm_tests.py:165:35:165:39 | ControlFlowNode for Str | testapp/orm_tests.py:161:1:161:30 | [orm-model] Class TestSave8 | Store step does not preserve enclosing callable. |
| testapp/orm_tests.py:168:58:168:63 | ControlFlowNode for SOURCE | testapp/orm_tests.py:161:1:161:30 | [orm-model] Class TestSave8 | Store step does not preserve enclosing callable. |
| testapp/orm_tests.py:184:41:184:45 | ControlFlowNode for Str | testapp/orm_tests.py:177:1:177:30 | [orm-model] Class TestSave9 | Store step does not preserve enclosing callable. |
| testapp/orm_tests.py:185:49:185:51 | ControlFlowNode for obj | testapp/orm_tests.py:180:1:180:44 | [orm-model] Class TestSave9WithForeignKey | Store step does not preserve enclosing callable. |
| testapp/orm_tests.py:212:55:212:59 | ControlFlowNode for Str | testapp/orm_tests.py:206:1:206:35 | [orm-model] Class save10_Comment | Store step does not preserve enclosing callable. |
| testapp/orm_tests.py:239:55:239:59 | ControlFlowNode for Str | testapp/orm_tests.py:233:1:233:35 | [orm-model] Class save11_Comment | Store step does not preserve enclosing callable. |
| testapp/orm_tests.py:273:1:273:31 | [orm-model] Class TestSave13 | testapp/orm_tests.py:281:12:281:35 | ControlFlowNode for Attribute() | Store step does not preserve enclosing callable. |
| testapp/orm_tests.py:294:1:294:29 | [orm-model] Class TestLoad | testapp/orm_tests.py:308:12:308:33 | ControlFlowNode for Attribute() | Store step does not preserve enclosing callable. |
| testapp/orm_tests.py:294:1:294:29 | [orm-model] Class TestLoad | testapp/orm_tests.py:314:12:314:33 | ControlFlowNode for Attribute() | Store step does not preserve enclosing callable. |
| testapp/orm_tests.py:294:1:294:29 | [orm-model] Class TestLoad | testapp/orm_tests.py:320:11:320:32 | ControlFlowNode for Attribute() | Store step does not preserve enclosing callable. |
| testapp/orm_tests.py:294:1:294:29 | [orm-model] Class TestLoad | testapp/orm_tests.py:320:11:320:59 | ControlFlowNode for Attribute() | Store step does not preserve enclosing callable. |
| testapp/orm_tests.py:294:1:294:29 | [orm-model] Class TestLoad | testapp/orm_tests.py:320:11:320:78 | ControlFlowNode for Attribute() | Store step does not preserve enclosing callable. |
| testapp/orm_tests.py:294:1:294:29 | [orm-model] Class TestLoad | testapp/orm_tests.py:324:12:324:33 | ControlFlowNode for Attribute() | Store step does not preserve enclosing callable. |
| testapp/orm_tests.py:294:1:294:29 | [orm-model] Class TestLoad | testapp/orm_tests.py:324:12:324:60 | ControlFlowNode for Attribute() | Store step does not preserve enclosing callable. |
| testapp/orm_tests.py:294:1:294:29 | [orm-model] Class TestLoad | testapp/orm_tests.py:324:12:324:79 | ControlFlowNode for Attribute() | Store step does not preserve enclosing callable. |
| testapp/orm_tests.py:294:1:294:29 | [orm-model] Class TestLoad | testapp/orm_tests.py:331:12:331:33 | ControlFlowNode for Attribute() | Store step does not preserve enclosing callable. |
| testapp/orm_tests.py:294:1:294:29 | [orm-model] Class TestLoad | testapp/orm_tests.py:337:12:337:33 | ControlFlowNode for Attribute() | Store step does not preserve enclosing callable. |
| testapp/orm_tests.py:294:1:294:29 | [orm-model] Class TestLoad | testapp/orm_tests.py:344:12:344:33 | ControlFlowNode for Attribute() | Store step does not preserve enclosing callable. |
| testapp/orm_tests.py:294:1:294:29 | [orm-model] Class TestLoad | testapp/orm_tests.py:350:12:350:33 | ControlFlowNode for Attribute() | Store step does not preserve enclosing callable. |
| testapp/orm_tests.py:294:1:294:29 | [orm-model] Class TestLoad | testapp/orm_tests.py:356:12:356:33 | ControlFlowNode for Attribute() | Store step does not preserve enclosing callable. |
| testapp/orm_tests.py:294:1:294:29 | [orm-model] Class TestLoad | testapp/orm_tests.py:363:9:363:37 | ControlFlowNode for Attribute() | Store step does not preserve enclosing callable. |
| testapp/tests.py:81:33:81:37 | ControlFlowNode for Str | testapp/orm_form_test.py:6:1:6:28 | [orm-model] Class MyModel | Store step does not preserve enclosing callable. |
compatibleTypesReflexive
unreachableNodeCCtx
localCallNodes

View File

@@ -1,7 +1,7 @@
name: codeql/python-tests
groups: [python, test]
dependencies:
codeql/python-all: "*"
codeql/python-queries: "*"
codeql/python-all: ${workspace}
codeql/python-queries: ${workspace}
extractor: python
tests: .

View File

@@ -1,2 +1,2 @@
| test.py:8:12:8:23 | Str | test.py:8:21:8:23 | \\s+ | Strings with many repetitions of ' ' can start matching anywhere after the start of the preceeding \\s+$ |
| test.py:9:14:9:29 | Str | test.py:9:27:9:29 | \\d+ | Strings with many repetitions of '99' can start matching anywhere after the start of the preceeding \\d+ |
| test.py:9:14:9:29 | Str | test.py:9:27:9:29 | \\d+ | Strings starting with '0.9' and with many repetitions of '99' can start matching anywhere after the start of the preceeding \\d+ |

View File

@@ -1,5 +1,6 @@
import python
import semmle.python.security.regexp.SuperlinearBackTracking
private import semmle.python.RegexTreeView::RegexTreeView as TreeView
import codeql.regex.nfa.SuperlinearBackTracking::Make<TreeView>
from PolynomialBackTrackingTerm t
select t.getRegex(), t, t.getReason()
select t.(TreeView::RegExpTerm).getRegex(), t, t.getReason()

View File

@@ -16,4 +16,4 @@ nodes
subpaths
#select
| test.py:8:30:8:33 | ControlFlowNode for text | test.py:2:26:2:32 | ControlFlowNode for ImportMember | test.py:8:30:8:33 | ControlFlowNode for text | This $@ that depends on a $@ may run slow on strings with many repetitions of ' '. | test.py:8:21:8:23 | \\s+ | regular expression | test.py:2:26:2:32 | ControlFlowNode for ImportMember | user-provided value |
| test.py:9:32:9:35 | ControlFlowNode for text | test.py:2:26:2:32 | ControlFlowNode for ImportMember | test.py:9:32:9:35 | ControlFlowNode for text | This $@ that depends on a $@ may run slow on strings with many repetitions of '99'. | test.py:9:27:9:29 | \\d+ | regular expression | test.py:2:26:2:32 | ControlFlowNode for ImportMember | user-provided value |
| test.py:9:32:9:35 | ControlFlowNode for text | test.py:2:26:2:32 | ControlFlowNode for ImportMember | test.py:9:32:9:35 | ControlFlowNode for text | This $@ that depends on a $@ may run slow on strings starting with '0.9' and with many repetitions of '99'. | test.py:9:27:9:29 | \\d+ | regular expression | test.py:2:26:2:32 | ControlFlowNode for ImportMember | user-provided value |

View File

@@ -2,20 +2,20 @@
| KnownCVEs.py:30:24:31:25 | .* | This part of the regular expression may cause exponential backtracking on strings containing many repetitions of ','. |
| KnownCVEs.py:35:18:35:81 | ([-/:,#%.'"\\s!\\w]\|\\w-\\w\|'[\\s\\w]+'\\s*\|"[\\s\\w]+"\|\\([\\d,%\\.\\s]+\\))* | This part of the regular expression may cause exponential backtracking on strings containing many repetitions of '"\\t"'. |
| redos.py:6:28:6:42 | (?:__\|[\\s\\S])+? | This part of the regular expression may cause exponential backtracking on strings containing many repetitions of '__'. |
| redos.py:6:52:6:68 | (?:\\*\\*\|[\\s\\S])+? | This part of the regular expression may cause exponential backtracking on strings containing many repetitions of '**'. |
| redos.py:21:34:21:53 | (?:[^"\\\\]\|\\\\\\\\\|\\\\.)+ | This part of the regular expression may cause exponential backtracking on strings containing many repetitions of '\\\\\\\\'. |
| redos.py:21:57:21:76 | (?:[^'\\\\]\|\\\\\\\\\|\\\\.)+ | This part of the regular expression may cause exponential backtracking on strings containing many repetitions of '\\\\\\\\'. |
| redos.py:21:81:21:100 | (?:[^)\\\\]\|\\\\\\\\\|\\\\.)+ | This part of the regular expression may cause exponential backtracking on strings containing many repetitions of '\\\\\\\\'. |
| redos.py:33:64:33:65 | .* | This part of the regular expression may cause exponential backtracking on strings containing many repetitions of '\|\|\\n'. |
| redos.py:6:52:6:68 | (?:\\*\\*\|[\\s\\S])+? | This part of the regular expression may cause exponential backtracking on strings starting with '*' and containing many repetitions of '**'. |
| redos.py:21:34:21:53 | (?:[^"\\\\]\|\\\\\\\\\|\\\\.)+ | This part of the regular expression may cause exponential backtracking on strings starting with '\\t"' and containing many repetitions of '\\\\\\\\'. |
| redos.py:21:57:21:76 | (?:[^'\\\\]\|\\\\\\\\\|\\\\.)+ | This part of the regular expression may cause exponential backtracking on strings starting with '\\t'' and containing many repetitions of '\\\\\\\\'. |
| redos.py:21:81:21:100 | (?:[^)\\\\]\|\\\\\\\\\|\\\\.)+ | This part of the regular expression may cause exponential backtracking on strings starting with '\\t(' and containing many repetitions of '\\\\\\\\'. |
| redos.py:33:64:33:65 | .* | This part of the regular expression may cause exponential backtracking on strings starting with '!\|\\n-\|\\n' and containing many repetitions of '\|\|\\n'. |
| redos.py:38:33:38:42 | (\\\\\\/\|.)*? | This part of the regular expression may cause exponential backtracking on strings containing many repetitions of '\\\\/'. |
| redos.py:43:37:43:38 | .* | This part of the regular expression may cause exponential backtracking on strings containing many repetitions of '#'. |
| redos.py:43:37:43:38 | .* | This part of the regular expression may cause exponential backtracking on strings starting with '#' and containing many repetitions of '#'. |
| redos.py:49:41:49:43 | .*? | This part of the regular expression may cause exponential backtracking on strings starting with '"' and containing many repetitions of '""'. |
| redos.py:49:47:49:49 | .*? | This part of the regular expression may cause exponential backtracking on strings starting with ''' and containing many repetitions of ''''. |
| redos.py:54:47:54:49 | .*? | This part of the regular expression may cause exponential backtracking on strings containing many repetitions of ']['. |
| redos.py:54:80:54:82 | .*? | This part of the regular expression may cause exponential backtracking on strings containing many repetitions of ']['. |
| redos.py:54:47:54:49 | .*? | This part of the regular expression may cause exponential backtracking on strings starting with '$[' and containing many repetitions of ']['. |
| redos.py:54:80:54:82 | .*? | This part of the regular expression may cause exponential backtracking on strings starting with '$.$[' and containing many repetitions of ']['. |
| redos.py:60:25:60:30 | [a-z]+ | This part of the regular expression may cause exponential backtracking on strings containing many repetitions of 'a'. |
| redos.py:61:25:61:30 | [a-z]* | This part of the regular expression may cause exponential backtracking on strings containing many repetitions of 'a'. |
| redos.py:62:53:62:64 | [a-zA-Z0-9]+ | This part of the regular expression may cause exponential backtracking on strings containing many repetitions of '0'. |
| redos.py:62:53:62:64 | [a-zA-Z0-9]+ | This part of the regular expression may cause exponential backtracking on strings starting with '0' and containing many repetitions of '0'. |
| redos.py:63:26:63:33 | ([a-z])+ | This part of the regular expression may cause exponential backtracking on strings containing many repetitions of 'aa'. |
| redos.py:68:26:68:41 | [\\w#:.~>+()\\s-]+ | This part of the regular expression may cause exponential backtracking on strings containing many repetitions of '\\t'. |
| redos.py:68:48:68:50 | .*? | This part of the regular expression may cause exponential backtracking on strings starting with '[' and containing many repetitions of ']['. |
@@ -51,7 +51,6 @@
| redos.py:196:91:196:92 | ,? | This part of the regular expression may cause exponential backtracking on strings starting with '{[A(A)A: ' and containing many repetitions of ',A: '. |
| redos.py:199:25:199:26 | a+ | This part of the regular expression may cause exponential backtracking on strings containing many repetitions of 'a'. |
| redos.py:199:28:199:29 | b+ | This part of the regular expression may cause exponential backtracking on strings containing many repetitions of 'b'. |
| redos.py:202:26:202:32 | (a+a?)* | This part of the regular expression may cause exponential backtracking on strings containing many repetitions of 'a'. |
| redos.py:202:27:202:28 | a+ | This part of the regular expression may cause exponential backtracking on strings containing many repetitions of 'a'. |
| redos.py:205:25:205:26 | a+ | This part of the regular expression may cause exponential backtracking on strings containing many repetitions of 'a'. |
| redos.py:211:25:211:26 | a+ | This part of the regular expression may cause exponential backtracking on strings containing many repetitions of 'a'. |
@@ -71,16 +70,16 @@
| redos.py:268:28:268:39 | ([\ufffd\ufffd]\|[\ufffd\ufffd])* | This part of the regular expression may cause exponential backtracking on strings starting with 'foo' and containing many repetitions of '\ufffd'. |
| redos.py:271:28:271:41 | ((\ufffd\|\ufffd)\|(\ufffd\|\ufffd))* | This part of the regular expression may cause exponential backtracking on strings starting with 'foo' and containing many repetitions of '\ufffd'. |
| redos.py:274:31:274:32 | b+ | This part of the regular expression may cause exponential backtracking on strings containing many repetitions of 'b'. |
| redos.py:277:48:277:50 | \\s* | This part of the regular expression may cause exponential backtracking on strings containing many repetitions of '""\\t0='. |
| redos.py:277:48:277:50 | \\s* | This part of the regular expression may cause exponential backtracking on strings starting with '<0\\t0=' and containing many repetitions of '""\\t0='. |
| redos.py:283:26:283:27 | a+ | This part of the regular expression may cause exponential backtracking on strings containing many repetitions of 'a'. |
| redos.py:286:26:286:27 | a+ | This part of the regular expression may cause exponential backtracking on strings containing many repetitions of 'a'. |
| redos.py:292:26:292:27 | a+ | This part of the regular expression may cause exponential backtracking on strings containing many repetitions of 'a'. |
| redos.py:295:35:295:36 | a+ | This part of the regular expression may cause exponential backtracking on strings containing many repetitions of 'a'. |
| redos.py:301:100:301:101 | e+ | This part of the regular expression may cause exponential backtracking on strings starting with ';00000000000000' and containing many repetitions of 'e'. |
| redos.py:301:100:301:101 | e+ | This part of the regular expression may cause exponential backtracking on strings starting with '00000000000000' and containing many repetitions of 'e'. |
| redos.py:304:28:304:29 | c+ | This part of the regular expression may cause exponential backtracking on strings starting with 'ab' and containing many repetitions of 'c'. |
| redos.py:307:28:307:30 | \\s+ | This part of the regular expression may cause exponential backtracking on strings containing many repetitions of '\\t'. |
| redos.py:310:26:310:34 | ([^/]\|X)+ | This part of the regular expression may cause exponential backtracking on strings containing many repetitions of 'X'. |
| redos.py:313:30:313:34 | [^Y]+ | This part of the regular expression may cause exponential backtracking on strings containing many repetitions of 'Xx'. |
| redos.py:313:30:313:34 | [^Y]+ | This part of the regular expression may cause exponential backtracking on strings starting with 'x' and containing many repetitions of 'Xx'. |
| redos.py:316:25:316:26 | a* | This part of the regular expression may cause exponential backtracking on strings containing many repetitions of 'a'. |
| redos.py:319:28:319:33 | [\\w-]* | This part of the regular expression may cause exponential backtracking on strings starting with 'foo' and containing many repetitions of '-'. |
| redos.py:322:25:322:29 | (ab)* | This part of the regular expression may cause exponential backtracking on strings containing many repetitions of 'ab'. |