Merge branch 'main' into python-clean-up-import-resolution

This commit is contained in:
Taus
2022-11-17 22:47:50 +00:00
2847 changed files with 159486 additions and 141642 deletions

View File

@@ -1,5 +1,5 @@
name: codeql/python-consistency-queries
groups: [python, test, consistency-queries]
dependencies:
codeql/python-all: "*"
codeql/python-all: ${workspace}
extractor: python

View File

@@ -3,4 +3,4 @@ groups:
- python
- examples
dependencies:
codeql/python-all: "*"
codeql/python-all: ${workspace}

View File

@@ -1,3 +1,16 @@
## 0.6.3
No user-facing changes.
## 0.6.2
### Minor Analysis Improvements
* Fixed labels in the API graph pertaining to definitions of subscripts. Previously, these were found by `getMember` rather than `getASubscript`.
* Added edges for indices of subscripts to the API graph. Now a subscripted API node will have an edge to the API node for the index expression. So if `foo` is matched by API node `A`, then `"key"` in `foo["key"]` will be matched by the API node `A.getIndex()`. This can be used to track the origin of the index.
* Added member predicate `getSubscriptAt(API::Node index)` to `API::Node`. Like `getASubscript()`, this will return an API node that matches a subscript of the node, but here it will be restricted to subscripts where the index matches the `index` parameter.
* Added convenience predicate `getSubscript("key")` to obtain a subscript at a specific index, when the index happens to be a statically known string.
## 0.6.1
### Minor Analysis Improvements

View File

@@ -0,0 +1,4 @@
---
category: minorAnalysis
---
* The ReDoS libraries in `semmle.code.python.security.regexp` has been moved to a shared pack inside the `shared/` folder, and the previous location has been deprecated.

View File

@@ -1,6 +1,7 @@
---
category: minorAnalysis
---
## 0.6.2
### Minor Analysis Improvements
* Fixed labels in the API graph pertaining to definitions of subscripts. Previously, these were found by `getMember` rather than `getASubscript`.
* Added edges for indices of subscripts to the API graph. Now a subscripted API node will have an edge to the API node for the index expression. So if `foo` is matched by API node `A`, then `"key"` in `foo["key"]` will be matched by the API node `A.getIndex()`. This can be used to track the origin of the index.
* Added member predicate `getSubscriptAt(API::Node index)` to `API::Node`. Like `getASubscript()`, this will return an API node that matches a subscript of the node, but here it will be restricted to subscripts where the index matches the `index` parameter.

View File

@@ -0,0 +1,3 @@
## 0.6.3
No user-facing changes.

View File

@@ -1,2 +1,2 @@
---
lastReleaseVersion: 0.6.1
lastReleaseVersion: 0.6.3

View File

@@ -1,7 +1,9 @@
name: codeql/python-all
version: 0.6.2-dev
version: 0.6.4-dev
groups: python
dbscheme: semmlecode.python.dbscheme
extractor: python
library: true
upgrades: upgrades
dependencies:
codeql/regex: ${workspace}

View File

@@ -311,7 +311,7 @@ module CodeExecution {
* Often, it is worthy of an alert if an SQL statement is constructed such that
* executing it would be a security risk.
*
* If it is important that the SQL statement is indeed executed, then use `SQLExecution`.
* If it is important that the SQL statement is indeed executed, then use `SqlExecution`.
*
* Extend this class to refine existing API models. If you want to model new APIs,
* extend `SqlConstruction::Range` instead.
@@ -329,7 +329,7 @@ module SqlConstruction {
* Often, it is worthy of an alert if an SQL statement is constructed such that
* executing it would be a security risk.
*
* If it is important that the SQL statement is indeed executed, then use `SQLExecution`.
* If it is important that the SQL statement is indeed executed, then use `SqlExecution`.
*
* Extend this class to model new APIs. If you want to refine existing API models,
* extend `SqlConstruction` instead.

File diff suppressed because it is too large Load Diff

View File

@@ -1,5 +1,5 @@
/**
* Module for parsing access paths from CSV models, both the identifying access path used
* Module for parsing access paths from MaD models, both the identifying access path used
* by dynamic languages, and the input/output specifications for summary steps.
*
* This file is used by the shared data flow library and by the JavaScript libraries

View File

@@ -147,6 +147,12 @@ abstract class Configuration extends string {
*/
FlowFeature getAFeature() { none() }
/** Holds if sources should be grouped in the result of `hasFlowPath`. */
predicate sourceGrouping(Node source, string sourceGroup) { none() }
/** Holds if sinks should be grouped in the result of `hasFlowPath`. */
predicate sinkGrouping(Node sink, string sinkGroup) { none() }
/**
* Holds if data may flow from `source` to `sink` for this configuration.
*/
@@ -158,7 +164,7 @@ abstract class Configuration extends string {
* The corresponding paths are generated from the end-points and the graph
* included in the module `PathGraph`.
*/
predicate hasFlowPath(PathNode source, PathNode sink) { flowsTo(source, sink, _, _, this) }
predicate hasFlowPath(PathNode source, PathNode sink) { hasFlowPath(source, sink, this) }
/**
* Holds if data may flow from some source to `sink` for this configuration.
@@ -2629,6 +2635,7 @@ private predicate evalUnfold(AccessPathApprox apa, boolean unfold, Configuration
/**
* Gets the number of `AccessPath`s that correspond to `apa`.
*/
pragma[assume_small_delta]
private int countAps(AccessPathApprox apa, Configuration config) {
evalUnfold(apa, false, config) and
result = 1 and
@@ -2647,6 +2654,7 @@ private int countAps(AccessPathApprox apa, Configuration config) {
* that it is expanded to a precise head-tail representation.
*/
language[monotonicAggregates]
pragma[assume_small_delta]
private int countPotentialAps(AccessPathApprox apa, Configuration config) {
apa instanceof AccessPathApproxNil and result = 1
or
@@ -2681,6 +2689,7 @@ private newtype TAccessPath =
}
private newtype TPathNode =
pragma[assume_small_delta]
TPathNodeMid(
NodeEx node, FlowState state, CallContext cc, SummaryCtx sc, AccessPath ap, Configuration config
) {
@@ -2709,6 +2718,18 @@ private newtype TPathNode =
state = sink.getState() and
config = sink.getConfiguration()
)
} or
TPathNodeSourceGroup(string sourceGroup, Configuration config) {
exists(PathNodeImpl source |
sourceGroup = source.getSourceGroup() and
config = source.getConfiguration()
)
} or
TPathNodeSinkGroup(string sinkGroup, Configuration config) {
exists(PathNodeSink sink |
sinkGroup = sink.getSinkGroup() and
config = sink.getConfiguration()
)
}
/**
@@ -2778,6 +2799,7 @@ private class AccessPathCons extends AccessPath, TAccessPathCons {
override AccessPathFrontHead getFront() { result = TFrontHead(head) }
pragma[assume_small_delta]
override AccessPathApproxCons getApprox() {
result = TConsNil(head, tail.(AccessPathNil).getType())
or
@@ -2786,6 +2808,7 @@ private class AccessPathCons extends AccessPath, TAccessPathCons {
result = TCons1(head, this.length())
}
pragma[assume_small_delta]
override int length() { result = 1 + tail.length() }
private string toStringImpl(boolean needsSuffix) {
@@ -2874,54 +2897,16 @@ private class AccessPathCons1 extends AccessPath, TAccessPathCons1 {
}
}
/**
* A `Node` augmented with a call context (except for sinks), an access path, and a configuration.
* Only those `PathNode`s that are reachable from a source are generated.
*/
class PathNode extends TPathNode {
/** Gets a textual representation of this element. */
string toString() { none() }
/**
* Gets a textual representation of this element, including a textual
* representation of the call context.
*/
string toStringWithContext() { none() }
/**
* Holds if this element is at the specified location.
* The location spans column `startcolumn` of line `startline` to
* column `endcolumn` of line `endline` in file `filepath`.
* For more information, see
* [Locations](https://codeql.github.com/docs/writing-codeql-queries/providing-locations-in-codeql-queries/).
*/
predicate hasLocationInfo(
string filepath, int startline, int startcolumn, int endline, int endcolumn
) {
none()
}
/** Gets the underlying `Node`. */
final Node getNode() { this.(PathNodeImpl).getNodeEx().projectToNode() = result }
abstract private class PathNodeImpl extends TPathNode {
/** Gets the `FlowState` of this node. */
FlowState getState() { none() }
abstract FlowState getState();
/** Gets the associated configuration. */
Configuration getConfiguration() { none() }
/** Gets a successor of this node, if any. */
final PathNode getASuccessor() {
result = this.(PathNodeImpl).getANonHiddenSuccessor() and
reach(this) and
reach(result)
}
abstract Configuration getConfiguration();
/** Holds if this node is a source. */
predicate isSource() { none() }
}
abstract predicate isSource();
abstract private class PathNodeImpl extends PathNode {
abstract PathNodeImpl getASuccessorImpl();
private PathNodeImpl getASuccessorIfHidden() {
@@ -2953,6 +2938,22 @@ abstract private class PathNodeImpl extends PathNode {
)
}
string getSourceGroup() {
this.isSource() and
this.getConfiguration().sourceGrouping(this.getNodeEx().asNode(), result)
}
predicate isFlowSource() {
this.isSource() and not exists(this.getSourceGroup())
or
this instanceof PathNodeSourceGroup
}
predicate isFlowSink() {
this = any(PathNodeSink sink | not exists(sink.getSinkGroup())) or
this instanceof PathNodeSinkGroup
}
private string ppAp() {
this instanceof PathNodeSink and result = ""
or
@@ -2967,13 +2968,23 @@ abstract private class PathNodeImpl extends PathNode {
result = " <" + this.(PathNodeMid).getCallContext().toString() + ">"
}
override string toString() { result = this.getNodeEx().toString() + this.ppAp() }
/** Gets a textual representation of this element. */
string toString() { result = this.getNodeEx().toString() + this.ppAp() }
override string toStringWithContext() {
result = this.getNodeEx().toString() + this.ppAp() + this.ppCtx()
}
/**
* Gets a textual representation of this element, including a textual
* representation of the call context.
*/
string toStringWithContext() { result = this.getNodeEx().toString() + this.ppAp() + this.ppCtx() }
override predicate hasLocationInfo(
/**
* Holds if this element is at the specified location.
* The location spans column `startcolumn` of line `startline` to
* column `endcolumn` of line `endline` in file `filepath`.
* For more information, see
* [Locations](https://codeql.github.com/docs/writing-codeql-queries/providing-locations-in-codeql-queries/).
*/
predicate hasLocationInfo(
string filepath, int startline, int startcolumn, int endline, int endcolumn
) {
this.getNodeEx().hasLocationInfo(filepath, startline, startcolumn, endline, endcolumn)
@@ -2982,18 +2993,71 @@ abstract private class PathNodeImpl extends PathNode {
/** Holds if `n` can reach a sink. */
private predicate directReach(PathNodeImpl n) {
n instanceof PathNodeSink or directReach(n.getANonHiddenSuccessor())
n instanceof PathNodeSink or
n instanceof PathNodeSinkGroup or
directReach(n.getANonHiddenSuccessor())
}
/** Holds if `n` can reach a sink or is used in a subpath that can reach a sink. */
private predicate reach(PathNode n) { directReach(n) or Subpaths::retReach(n) }
private predicate reach(PathNodeImpl n) { directReach(n) or Subpaths::retReach(n) }
/** Holds if `n1.getASuccessor() = n2` and `n2` can reach a sink. */
private predicate pathSucc(PathNodeImpl n1, PathNode n2) {
private predicate pathSucc(PathNodeImpl n1, PathNodeImpl n2) {
n1.getANonHiddenSuccessor() = n2 and directReach(n2)
}
private predicate pathSuccPlus(PathNode n1, PathNode n2) = fastTC(pathSucc/2)(n1, n2)
private predicate pathSuccPlus(PathNodeImpl n1, PathNodeImpl n2) = fastTC(pathSucc/2)(n1, n2)
/**
* A `Node` augmented with a call context (except for sinks), an access path, and a configuration.
* Only those `PathNode`s that are reachable from a source, and which can reach a sink, are generated.
*/
class PathNode instanceof PathNodeImpl {
PathNode() { reach(this) }
/** Gets a textual representation of this element. */
final string toString() { result = super.toString() }
/**
* Gets a textual representation of this element, including a textual
* representation of the call context.
*/
final string toStringWithContext() { result = super.toStringWithContext() }
/**
* Holds if this element is at the specified location.
* The location spans column `startcolumn` of line `startline` to
* column `endcolumn` of line `endline` in file `filepath`.
* For more information, see
* [Locations](https://codeql.github.com/docs/writing-codeql-queries/providing-locations-in-codeql-queries/).
*/
final predicate hasLocationInfo(
string filepath, int startline, int startcolumn, int endline, int endcolumn
) {
super.hasLocationInfo(filepath, startline, startcolumn, endline, endcolumn)
}
/** Gets the underlying `Node`. */
final Node getNode() { super.getNodeEx().projectToNode() = result }
/** Gets the `FlowState` of this node. */
final FlowState getState() { result = super.getState() }
/** Gets the associated configuration. */
final Configuration getConfiguration() { result = super.getConfiguration() }
/** Gets a successor of this node, if any. */
final PathNode getASuccessor() { result = super.getANonHiddenSuccessor() }
/** Holds if this node is a source. */
final predicate isSource() { super.isSource() }
/** Holds if this node is a grouping of source nodes. */
final predicate isSourceGroup(string group) { this = TPathNodeSourceGroup(group, _) }
/** Holds if this node is a grouping of sink nodes. */
final predicate isSinkGroup(string group) { this = TPathNodeSinkGroup(group, _) }
}
/**
* Provides the query predicates needed to include a graph in a path-problem query.
@@ -3004,7 +3068,7 @@ module PathGraph {
/** Holds if `n` is a node in the graph of data flow path explanations. */
query predicate nodes(PathNode n, string key, string val) {
reach(n) and key = "semmle.label" and val = n.toString()
key = "semmle.label" and val = n.toString()
}
/**
@@ -3013,11 +3077,7 @@ module PathGraph {
* `ret -> out` is summarized as the edge `arg -> out`.
*/
query predicate subpaths(PathNode arg, PathNode par, PathNode ret, PathNode out) {
Subpaths::subpaths(arg, par, ret, out) and
reach(arg) and
reach(par) and
reach(ret) and
reach(out)
Subpaths::subpaths(arg, par, ret, out)
}
}
@@ -3118,9 +3178,66 @@ private class PathNodeSink extends PathNodeImpl, TPathNodeSink {
override Configuration getConfiguration() { result = config }
override PathNodeImpl getASuccessorImpl() { none() }
override PathNodeImpl getASuccessorImpl() {
result = TPathNodeSinkGroup(this.getSinkGroup(), config)
}
override predicate isSource() { sourceNode(node, state, config) }
string getSinkGroup() { config.sinkGrouping(node.asNode(), result) }
}
private class PathNodeSourceGroup extends PathNodeImpl, TPathNodeSourceGroup {
string sourceGroup;
Configuration config;
PathNodeSourceGroup() { this = TPathNodeSourceGroup(sourceGroup, config) }
override NodeEx getNodeEx() { none() }
override FlowState getState() { none() }
override Configuration getConfiguration() { result = config }
override PathNodeImpl getASuccessorImpl() {
result.getSourceGroup() = sourceGroup and
result.getConfiguration() = config
}
override predicate isSource() { none() }
override string toString() { result = sourceGroup }
override predicate hasLocationInfo(
string filepath, int startline, int startcolumn, int endline, int endcolumn
) {
filepath = "" and startline = 0 and startcolumn = 0 and endline = 0 and endcolumn = 0
}
}
private class PathNodeSinkGroup extends PathNodeImpl, TPathNodeSinkGroup {
string sinkGroup;
Configuration config;
PathNodeSinkGroup() { this = TPathNodeSinkGroup(sinkGroup, config) }
override NodeEx getNodeEx() { none() }
override FlowState getState() { none() }
override Configuration getConfiguration() { result = config }
override PathNodeImpl getASuccessorImpl() { none() }
override predicate isSource() { none() }
override string toString() { result = sinkGroup }
override predicate hasLocationInfo(
string filepath, int startline, int startcolumn, int endline, int endcolumn
) {
filepath = "" and startline = 0 and startcolumn = 0 and endline = 0 and endcolumn = 0
}
}
private predicate pathNode(
@@ -3142,6 +3259,7 @@ private predicate pathNode(
* Holds if data may flow from `mid` to `node`. The last step in or out of
* a callable is recorded by `cc`.
*/
pragma[assume_small_delta]
pragma[nomagic]
private predicate pathStep(
PathNodeMid mid, NodeEx node, FlowState state, CallContext cc, SummaryCtx sc, AccessPath ap
@@ -3399,7 +3517,7 @@ private module Subpaths {
*/
pragma[nomagic]
private predicate subpaths02(
PathNode arg, ParamNodeEx par, SummaryCtxSome sc, CallContext innercc, ReturnKindExt kind,
PathNodeImpl arg, ParamNodeEx par, SummaryCtxSome sc, CallContext innercc, ReturnKindExt kind,
NodeEx out, FlowState sout, AccessPath apout
) {
subpaths01(arg, par, sc, innercc, kind, out, sout, apout) and
@@ -3407,14 +3525,14 @@ private module Subpaths {
}
pragma[nomagic]
private Configuration getPathNodeConf(PathNode n) { result = n.getConfiguration() }
private Configuration getPathNodeConf(PathNodeImpl n) { result = n.getConfiguration() }
/**
* Holds if `(arg, par, ret, out)` forms a subpath-tuple.
*/
pragma[nomagic]
private predicate subpaths03(
PathNode arg, ParamNodeEx par, PathNodeMid ret, NodeEx out, FlowState sout, AccessPath apout
PathNodeImpl arg, ParamNodeEx par, PathNodeMid ret, NodeEx out, FlowState sout, AccessPath apout
) {
exists(SummaryCtxSome sc, CallContext innercc, ReturnKindExt kind, RetNodeEx retnode |
subpaths02(arg, par, sc, innercc, kind, out, sout, apout) and
@@ -3444,7 +3562,7 @@ private module Subpaths {
* a subpath between `par` and `ret` with the connecting edges `arg -> par` and
* `ret -> out` is summarized as the edge `arg -> out`.
*/
predicate subpaths(PathNodeImpl arg, PathNodeImpl par, PathNodeImpl ret, PathNode out) {
predicate subpaths(PathNodeImpl arg, PathNodeImpl par, PathNodeImpl ret, PathNodeImpl out) {
exists(ParamNodeEx p, NodeEx o, FlowState sout, AccessPath apout, PathNodeMid out0 |
pragma[only_bind_into](arg).getANonHiddenSuccessor() = pragma[only_bind_into](out0) and
subpaths03(pragma[only_bind_into](arg), p, localStepToHidden*(ret), o, sout, apout) and
@@ -3460,7 +3578,7 @@ private module Subpaths {
* Holds if `n` can reach a return node in a summarized subpath that can reach a sink.
*/
predicate retReach(PathNodeImpl n) {
exists(PathNode out | subpaths(_, _, n, out) | directReach(out) or retReach(out))
exists(PathNodeImpl out | subpaths(_, _, n, out) | directReach(out) or retReach(out))
or
exists(PathNodeImpl mid |
retReach(mid) and
@@ -3476,12 +3594,22 @@ private module Subpaths {
* Will only have results if `configuration` has non-empty sources and
* sinks.
*/
private predicate hasFlowPath(
PathNodeImpl flowsource, PathNodeImpl flowsink, Configuration configuration
) {
flowsource.isFlowSource() and
flowsource.getConfiguration() = configuration and
(flowsource = flowsink or pathSuccPlus(flowsource, flowsink)) and
flowsink.isFlowSink()
}
private predicate flowsTo(
PathNode flowsource, PathNodeSink flowsink, Node source, Node sink, Configuration configuration
PathNodeImpl flowsource, PathNodeSink flowsink, Node source, Node sink,
Configuration configuration
) {
flowsource.isSource() and
flowsource.getConfiguration() = configuration and
flowsource.(PathNodeImpl).getNodeEx().asNode() = source and
flowsource.getNodeEx().asNode() = source and
(flowsource = flowsink or pathSuccPlus(flowsource, flowsink)) and
flowsink.getNodeEx().asNode() = sink
}
@@ -3504,14 +3632,14 @@ private predicate finalStats(
fields = count(TypedContent f0 | exists(PathNodeMid pn | pn.getAp().getHead() = f0)) and
conscand = count(AccessPath ap | exists(PathNodeMid pn | pn.getAp() = ap)) and
states = count(FlowState state | exists(PathNodeMid pn | pn.getState() = state)) and
tuples = count(PathNode pn)
tuples = count(PathNodeImpl pn)
or
fwd = false and
nodes = count(NodeEx n0 | exists(PathNodeImpl pn | pn.getNodeEx() = n0 and reach(pn))) and
fields = count(TypedContent f0 | exists(PathNodeMid pn | pn.getAp().getHead() = f0 and reach(pn))) and
conscand = count(AccessPath ap | exists(PathNodeMid pn | pn.getAp() = ap and reach(pn))) and
states = count(FlowState state | exists(PathNodeMid pn | pn.getState() = state and reach(pn))) and
tuples = count(PathNode pn | reach(pn))
tuples = count(PathNode pn)
}
/**

View File

@@ -147,6 +147,12 @@ abstract class Configuration extends string {
*/
FlowFeature getAFeature() { none() }
/** Holds if sources should be grouped in the result of `hasFlowPath`. */
predicate sourceGrouping(Node source, string sourceGroup) { none() }
/** Holds if sinks should be grouped in the result of `hasFlowPath`. */
predicate sinkGrouping(Node sink, string sinkGroup) { none() }
/**
* Holds if data may flow from `source` to `sink` for this configuration.
*/
@@ -158,7 +164,7 @@ abstract class Configuration extends string {
* The corresponding paths are generated from the end-points and the graph
* included in the module `PathGraph`.
*/
predicate hasFlowPath(PathNode source, PathNode sink) { flowsTo(source, sink, _, _, this) }
predicate hasFlowPath(PathNode source, PathNode sink) { hasFlowPath(source, sink, this) }
/**
* Holds if data may flow from some source to `sink` for this configuration.
@@ -2629,6 +2635,7 @@ private predicate evalUnfold(AccessPathApprox apa, boolean unfold, Configuration
/**
* Gets the number of `AccessPath`s that correspond to `apa`.
*/
pragma[assume_small_delta]
private int countAps(AccessPathApprox apa, Configuration config) {
evalUnfold(apa, false, config) and
result = 1 and
@@ -2647,6 +2654,7 @@ private int countAps(AccessPathApprox apa, Configuration config) {
* that it is expanded to a precise head-tail representation.
*/
language[monotonicAggregates]
pragma[assume_small_delta]
private int countPotentialAps(AccessPathApprox apa, Configuration config) {
apa instanceof AccessPathApproxNil and result = 1
or
@@ -2681,6 +2689,7 @@ private newtype TAccessPath =
}
private newtype TPathNode =
pragma[assume_small_delta]
TPathNodeMid(
NodeEx node, FlowState state, CallContext cc, SummaryCtx sc, AccessPath ap, Configuration config
) {
@@ -2709,6 +2718,18 @@ private newtype TPathNode =
state = sink.getState() and
config = sink.getConfiguration()
)
} or
TPathNodeSourceGroup(string sourceGroup, Configuration config) {
exists(PathNodeImpl source |
sourceGroup = source.getSourceGroup() and
config = source.getConfiguration()
)
} or
TPathNodeSinkGroup(string sinkGroup, Configuration config) {
exists(PathNodeSink sink |
sinkGroup = sink.getSinkGroup() and
config = sink.getConfiguration()
)
}
/**
@@ -2778,6 +2799,7 @@ private class AccessPathCons extends AccessPath, TAccessPathCons {
override AccessPathFrontHead getFront() { result = TFrontHead(head) }
pragma[assume_small_delta]
override AccessPathApproxCons getApprox() {
result = TConsNil(head, tail.(AccessPathNil).getType())
or
@@ -2786,6 +2808,7 @@ private class AccessPathCons extends AccessPath, TAccessPathCons {
result = TCons1(head, this.length())
}
pragma[assume_small_delta]
override int length() { result = 1 + tail.length() }
private string toStringImpl(boolean needsSuffix) {
@@ -2874,54 +2897,16 @@ private class AccessPathCons1 extends AccessPath, TAccessPathCons1 {
}
}
/**
* A `Node` augmented with a call context (except for sinks), an access path, and a configuration.
* Only those `PathNode`s that are reachable from a source are generated.
*/
class PathNode extends TPathNode {
/** Gets a textual representation of this element. */
string toString() { none() }
/**
* Gets a textual representation of this element, including a textual
* representation of the call context.
*/
string toStringWithContext() { none() }
/**
* Holds if this element is at the specified location.
* The location spans column `startcolumn` of line `startline` to
* column `endcolumn` of line `endline` in file `filepath`.
* For more information, see
* [Locations](https://codeql.github.com/docs/writing-codeql-queries/providing-locations-in-codeql-queries/).
*/
predicate hasLocationInfo(
string filepath, int startline, int startcolumn, int endline, int endcolumn
) {
none()
}
/** Gets the underlying `Node`. */
final Node getNode() { this.(PathNodeImpl).getNodeEx().projectToNode() = result }
abstract private class PathNodeImpl extends TPathNode {
/** Gets the `FlowState` of this node. */
FlowState getState() { none() }
abstract FlowState getState();
/** Gets the associated configuration. */
Configuration getConfiguration() { none() }
/** Gets a successor of this node, if any. */
final PathNode getASuccessor() {
result = this.(PathNodeImpl).getANonHiddenSuccessor() and
reach(this) and
reach(result)
}
abstract Configuration getConfiguration();
/** Holds if this node is a source. */
predicate isSource() { none() }
}
abstract predicate isSource();
abstract private class PathNodeImpl extends PathNode {
abstract PathNodeImpl getASuccessorImpl();
private PathNodeImpl getASuccessorIfHidden() {
@@ -2953,6 +2938,22 @@ abstract private class PathNodeImpl extends PathNode {
)
}
string getSourceGroup() {
this.isSource() and
this.getConfiguration().sourceGrouping(this.getNodeEx().asNode(), result)
}
predicate isFlowSource() {
this.isSource() and not exists(this.getSourceGroup())
or
this instanceof PathNodeSourceGroup
}
predicate isFlowSink() {
this = any(PathNodeSink sink | not exists(sink.getSinkGroup())) or
this instanceof PathNodeSinkGroup
}
private string ppAp() {
this instanceof PathNodeSink and result = ""
or
@@ -2967,13 +2968,23 @@ abstract private class PathNodeImpl extends PathNode {
result = " <" + this.(PathNodeMid).getCallContext().toString() + ">"
}
override string toString() { result = this.getNodeEx().toString() + this.ppAp() }
/** Gets a textual representation of this element. */
string toString() { result = this.getNodeEx().toString() + this.ppAp() }
override string toStringWithContext() {
result = this.getNodeEx().toString() + this.ppAp() + this.ppCtx()
}
/**
* Gets a textual representation of this element, including a textual
* representation of the call context.
*/
string toStringWithContext() { result = this.getNodeEx().toString() + this.ppAp() + this.ppCtx() }
override predicate hasLocationInfo(
/**
* Holds if this element is at the specified location.
* The location spans column `startcolumn` of line `startline` to
* column `endcolumn` of line `endline` in file `filepath`.
* For more information, see
* [Locations](https://codeql.github.com/docs/writing-codeql-queries/providing-locations-in-codeql-queries/).
*/
predicate hasLocationInfo(
string filepath, int startline, int startcolumn, int endline, int endcolumn
) {
this.getNodeEx().hasLocationInfo(filepath, startline, startcolumn, endline, endcolumn)
@@ -2982,18 +2993,71 @@ abstract private class PathNodeImpl extends PathNode {
/** Holds if `n` can reach a sink. */
private predicate directReach(PathNodeImpl n) {
n instanceof PathNodeSink or directReach(n.getANonHiddenSuccessor())
n instanceof PathNodeSink or
n instanceof PathNodeSinkGroup or
directReach(n.getANonHiddenSuccessor())
}
/** Holds if `n` can reach a sink or is used in a subpath that can reach a sink. */
private predicate reach(PathNode n) { directReach(n) or Subpaths::retReach(n) }
private predicate reach(PathNodeImpl n) { directReach(n) or Subpaths::retReach(n) }
/** Holds if `n1.getASuccessor() = n2` and `n2` can reach a sink. */
private predicate pathSucc(PathNodeImpl n1, PathNode n2) {
private predicate pathSucc(PathNodeImpl n1, PathNodeImpl n2) {
n1.getANonHiddenSuccessor() = n2 and directReach(n2)
}
private predicate pathSuccPlus(PathNode n1, PathNode n2) = fastTC(pathSucc/2)(n1, n2)
private predicate pathSuccPlus(PathNodeImpl n1, PathNodeImpl n2) = fastTC(pathSucc/2)(n1, n2)
/**
* A `Node` augmented with a call context (except for sinks), an access path, and a configuration.
* Only those `PathNode`s that are reachable from a source, and which can reach a sink, are generated.
*/
class PathNode instanceof PathNodeImpl {
PathNode() { reach(this) }
/** Gets a textual representation of this element. */
final string toString() { result = super.toString() }
/**
* Gets a textual representation of this element, including a textual
* representation of the call context.
*/
final string toStringWithContext() { result = super.toStringWithContext() }
/**
* Holds if this element is at the specified location.
* The location spans column `startcolumn` of line `startline` to
* column `endcolumn` of line `endline` in file `filepath`.
* For more information, see
* [Locations](https://codeql.github.com/docs/writing-codeql-queries/providing-locations-in-codeql-queries/).
*/
final predicate hasLocationInfo(
string filepath, int startline, int startcolumn, int endline, int endcolumn
) {
super.hasLocationInfo(filepath, startline, startcolumn, endline, endcolumn)
}
/** Gets the underlying `Node`. */
final Node getNode() { super.getNodeEx().projectToNode() = result }
/** Gets the `FlowState` of this node. */
final FlowState getState() { result = super.getState() }
/** Gets the associated configuration. */
final Configuration getConfiguration() { result = super.getConfiguration() }
/** Gets a successor of this node, if any. */
final PathNode getASuccessor() { result = super.getANonHiddenSuccessor() }
/** Holds if this node is a source. */
final predicate isSource() { super.isSource() }
/** Holds if this node is a grouping of source nodes. */
final predicate isSourceGroup(string group) { this = TPathNodeSourceGroup(group, _) }
/** Holds if this node is a grouping of sink nodes. */
final predicate isSinkGroup(string group) { this = TPathNodeSinkGroup(group, _) }
}
/**
* Provides the query predicates needed to include a graph in a path-problem query.
@@ -3004,7 +3068,7 @@ module PathGraph {
/** Holds if `n` is a node in the graph of data flow path explanations. */
query predicate nodes(PathNode n, string key, string val) {
reach(n) and key = "semmle.label" and val = n.toString()
key = "semmle.label" and val = n.toString()
}
/**
@@ -3013,11 +3077,7 @@ module PathGraph {
* `ret -> out` is summarized as the edge `arg -> out`.
*/
query predicate subpaths(PathNode arg, PathNode par, PathNode ret, PathNode out) {
Subpaths::subpaths(arg, par, ret, out) and
reach(arg) and
reach(par) and
reach(ret) and
reach(out)
Subpaths::subpaths(arg, par, ret, out)
}
}
@@ -3118,9 +3178,66 @@ private class PathNodeSink extends PathNodeImpl, TPathNodeSink {
override Configuration getConfiguration() { result = config }
override PathNodeImpl getASuccessorImpl() { none() }
override PathNodeImpl getASuccessorImpl() {
result = TPathNodeSinkGroup(this.getSinkGroup(), config)
}
override predicate isSource() { sourceNode(node, state, config) }
string getSinkGroup() { config.sinkGrouping(node.asNode(), result) }
}
private class PathNodeSourceGroup extends PathNodeImpl, TPathNodeSourceGroup {
string sourceGroup;
Configuration config;
PathNodeSourceGroup() { this = TPathNodeSourceGroup(sourceGroup, config) }
override NodeEx getNodeEx() { none() }
override FlowState getState() { none() }
override Configuration getConfiguration() { result = config }
override PathNodeImpl getASuccessorImpl() {
result.getSourceGroup() = sourceGroup and
result.getConfiguration() = config
}
override predicate isSource() { none() }
override string toString() { result = sourceGroup }
override predicate hasLocationInfo(
string filepath, int startline, int startcolumn, int endline, int endcolumn
) {
filepath = "" and startline = 0 and startcolumn = 0 and endline = 0 and endcolumn = 0
}
}
private class PathNodeSinkGroup extends PathNodeImpl, TPathNodeSinkGroup {
string sinkGroup;
Configuration config;
PathNodeSinkGroup() { this = TPathNodeSinkGroup(sinkGroup, config) }
override NodeEx getNodeEx() { none() }
override FlowState getState() { none() }
override Configuration getConfiguration() { result = config }
override PathNodeImpl getASuccessorImpl() { none() }
override predicate isSource() { none() }
override string toString() { result = sinkGroup }
override predicate hasLocationInfo(
string filepath, int startline, int startcolumn, int endline, int endcolumn
) {
filepath = "" and startline = 0 and startcolumn = 0 and endline = 0 and endcolumn = 0
}
}
private predicate pathNode(
@@ -3142,6 +3259,7 @@ private predicate pathNode(
* Holds if data may flow from `mid` to `node`. The last step in or out of
* a callable is recorded by `cc`.
*/
pragma[assume_small_delta]
pragma[nomagic]
private predicate pathStep(
PathNodeMid mid, NodeEx node, FlowState state, CallContext cc, SummaryCtx sc, AccessPath ap
@@ -3399,7 +3517,7 @@ private module Subpaths {
*/
pragma[nomagic]
private predicate subpaths02(
PathNode arg, ParamNodeEx par, SummaryCtxSome sc, CallContext innercc, ReturnKindExt kind,
PathNodeImpl arg, ParamNodeEx par, SummaryCtxSome sc, CallContext innercc, ReturnKindExt kind,
NodeEx out, FlowState sout, AccessPath apout
) {
subpaths01(arg, par, sc, innercc, kind, out, sout, apout) and
@@ -3407,14 +3525,14 @@ private module Subpaths {
}
pragma[nomagic]
private Configuration getPathNodeConf(PathNode n) { result = n.getConfiguration() }
private Configuration getPathNodeConf(PathNodeImpl n) { result = n.getConfiguration() }
/**
* Holds if `(arg, par, ret, out)` forms a subpath-tuple.
*/
pragma[nomagic]
private predicate subpaths03(
PathNode arg, ParamNodeEx par, PathNodeMid ret, NodeEx out, FlowState sout, AccessPath apout
PathNodeImpl arg, ParamNodeEx par, PathNodeMid ret, NodeEx out, FlowState sout, AccessPath apout
) {
exists(SummaryCtxSome sc, CallContext innercc, ReturnKindExt kind, RetNodeEx retnode |
subpaths02(arg, par, sc, innercc, kind, out, sout, apout) and
@@ -3444,7 +3562,7 @@ private module Subpaths {
* a subpath between `par` and `ret` with the connecting edges `arg -> par` and
* `ret -> out` is summarized as the edge `arg -> out`.
*/
predicate subpaths(PathNodeImpl arg, PathNodeImpl par, PathNodeImpl ret, PathNode out) {
predicate subpaths(PathNodeImpl arg, PathNodeImpl par, PathNodeImpl ret, PathNodeImpl out) {
exists(ParamNodeEx p, NodeEx o, FlowState sout, AccessPath apout, PathNodeMid out0 |
pragma[only_bind_into](arg).getANonHiddenSuccessor() = pragma[only_bind_into](out0) and
subpaths03(pragma[only_bind_into](arg), p, localStepToHidden*(ret), o, sout, apout) and
@@ -3460,7 +3578,7 @@ private module Subpaths {
* Holds if `n` can reach a return node in a summarized subpath that can reach a sink.
*/
predicate retReach(PathNodeImpl n) {
exists(PathNode out | subpaths(_, _, n, out) | directReach(out) or retReach(out))
exists(PathNodeImpl out | subpaths(_, _, n, out) | directReach(out) or retReach(out))
or
exists(PathNodeImpl mid |
retReach(mid) and
@@ -3476,12 +3594,22 @@ private module Subpaths {
* Will only have results if `configuration` has non-empty sources and
* sinks.
*/
private predicate hasFlowPath(
PathNodeImpl flowsource, PathNodeImpl flowsink, Configuration configuration
) {
flowsource.isFlowSource() and
flowsource.getConfiguration() = configuration and
(flowsource = flowsink or pathSuccPlus(flowsource, flowsink)) and
flowsink.isFlowSink()
}
private predicate flowsTo(
PathNode flowsource, PathNodeSink flowsink, Node source, Node sink, Configuration configuration
PathNodeImpl flowsource, PathNodeSink flowsink, Node source, Node sink,
Configuration configuration
) {
flowsource.isSource() and
flowsource.getConfiguration() = configuration and
flowsource.(PathNodeImpl).getNodeEx().asNode() = source and
flowsource.getNodeEx().asNode() = source and
(flowsource = flowsink or pathSuccPlus(flowsource, flowsink)) and
flowsink.getNodeEx().asNode() = sink
}
@@ -3504,14 +3632,14 @@ private predicate finalStats(
fields = count(TypedContent f0 | exists(PathNodeMid pn | pn.getAp().getHead() = f0)) and
conscand = count(AccessPath ap | exists(PathNodeMid pn | pn.getAp() = ap)) and
states = count(FlowState state | exists(PathNodeMid pn | pn.getState() = state)) and
tuples = count(PathNode pn)
tuples = count(PathNodeImpl pn)
or
fwd = false and
nodes = count(NodeEx n0 | exists(PathNodeImpl pn | pn.getNodeEx() = n0 and reach(pn))) and
fields = count(TypedContent f0 | exists(PathNodeMid pn | pn.getAp().getHead() = f0 and reach(pn))) and
conscand = count(AccessPath ap | exists(PathNodeMid pn | pn.getAp() = ap and reach(pn))) and
states = count(FlowState state | exists(PathNodeMid pn | pn.getState() = state and reach(pn))) and
tuples = count(PathNode pn | reach(pn))
tuples = count(PathNode pn)
}
/**

View File

@@ -147,6 +147,12 @@ abstract class Configuration extends string {
*/
FlowFeature getAFeature() { none() }
/** Holds if sources should be grouped in the result of `hasFlowPath`. */
predicate sourceGrouping(Node source, string sourceGroup) { none() }
/** Holds if sinks should be grouped in the result of `hasFlowPath`. */
predicate sinkGrouping(Node sink, string sinkGroup) { none() }
/**
* Holds if data may flow from `source` to `sink` for this configuration.
*/
@@ -158,7 +164,7 @@ abstract class Configuration extends string {
* The corresponding paths are generated from the end-points and the graph
* included in the module `PathGraph`.
*/
predicate hasFlowPath(PathNode source, PathNode sink) { flowsTo(source, sink, _, _, this) }
predicate hasFlowPath(PathNode source, PathNode sink) { hasFlowPath(source, sink, this) }
/**
* Holds if data may flow from some source to `sink` for this configuration.
@@ -2629,6 +2635,7 @@ private predicate evalUnfold(AccessPathApprox apa, boolean unfold, Configuration
/**
* Gets the number of `AccessPath`s that correspond to `apa`.
*/
pragma[assume_small_delta]
private int countAps(AccessPathApprox apa, Configuration config) {
evalUnfold(apa, false, config) and
result = 1 and
@@ -2647,6 +2654,7 @@ private int countAps(AccessPathApprox apa, Configuration config) {
* that it is expanded to a precise head-tail representation.
*/
language[monotonicAggregates]
pragma[assume_small_delta]
private int countPotentialAps(AccessPathApprox apa, Configuration config) {
apa instanceof AccessPathApproxNil and result = 1
or
@@ -2681,6 +2689,7 @@ private newtype TAccessPath =
}
private newtype TPathNode =
pragma[assume_small_delta]
TPathNodeMid(
NodeEx node, FlowState state, CallContext cc, SummaryCtx sc, AccessPath ap, Configuration config
) {
@@ -2709,6 +2718,18 @@ private newtype TPathNode =
state = sink.getState() and
config = sink.getConfiguration()
)
} or
TPathNodeSourceGroup(string sourceGroup, Configuration config) {
exists(PathNodeImpl source |
sourceGroup = source.getSourceGroup() and
config = source.getConfiguration()
)
} or
TPathNodeSinkGroup(string sinkGroup, Configuration config) {
exists(PathNodeSink sink |
sinkGroup = sink.getSinkGroup() and
config = sink.getConfiguration()
)
}
/**
@@ -2778,6 +2799,7 @@ private class AccessPathCons extends AccessPath, TAccessPathCons {
override AccessPathFrontHead getFront() { result = TFrontHead(head) }
pragma[assume_small_delta]
override AccessPathApproxCons getApprox() {
result = TConsNil(head, tail.(AccessPathNil).getType())
or
@@ -2786,6 +2808,7 @@ private class AccessPathCons extends AccessPath, TAccessPathCons {
result = TCons1(head, this.length())
}
pragma[assume_small_delta]
override int length() { result = 1 + tail.length() }
private string toStringImpl(boolean needsSuffix) {
@@ -2874,54 +2897,16 @@ private class AccessPathCons1 extends AccessPath, TAccessPathCons1 {
}
}
/**
* A `Node` augmented with a call context (except for sinks), an access path, and a configuration.
* Only those `PathNode`s that are reachable from a source are generated.
*/
class PathNode extends TPathNode {
/** Gets a textual representation of this element. */
string toString() { none() }
/**
* Gets a textual representation of this element, including a textual
* representation of the call context.
*/
string toStringWithContext() { none() }
/**
* Holds if this element is at the specified location.
* The location spans column `startcolumn` of line `startline` to
* column `endcolumn` of line `endline` in file `filepath`.
* For more information, see
* [Locations](https://codeql.github.com/docs/writing-codeql-queries/providing-locations-in-codeql-queries/).
*/
predicate hasLocationInfo(
string filepath, int startline, int startcolumn, int endline, int endcolumn
) {
none()
}
/** Gets the underlying `Node`. */
final Node getNode() { this.(PathNodeImpl).getNodeEx().projectToNode() = result }
abstract private class PathNodeImpl extends TPathNode {
/** Gets the `FlowState` of this node. */
FlowState getState() { none() }
abstract FlowState getState();
/** Gets the associated configuration. */
Configuration getConfiguration() { none() }
/** Gets a successor of this node, if any. */
final PathNode getASuccessor() {
result = this.(PathNodeImpl).getANonHiddenSuccessor() and
reach(this) and
reach(result)
}
abstract Configuration getConfiguration();
/** Holds if this node is a source. */
predicate isSource() { none() }
}
abstract predicate isSource();
abstract private class PathNodeImpl extends PathNode {
abstract PathNodeImpl getASuccessorImpl();
private PathNodeImpl getASuccessorIfHidden() {
@@ -2953,6 +2938,22 @@ abstract private class PathNodeImpl extends PathNode {
)
}
string getSourceGroup() {
this.isSource() and
this.getConfiguration().sourceGrouping(this.getNodeEx().asNode(), result)
}
predicate isFlowSource() {
this.isSource() and not exists(this.getSourceGroup())
or
this instanceof PathNodeSourceGroup
}
predicate isFlowSink() {
this = any(PathNodeSink sink | not exists(sink.getSinkGroup())) or
this instanceof PathNodeSinkGroup
}
private string ppAp() {
this instanceof PathNodeSink and result = ""
or
@@ -2967,13 +2968,23 @@ abstract private class PathNodeImpl extends PathNode {
result = " <" + this.(PathNodeMid).getCallContext().toString() + ">"
}
override string toString() { result = this.getNodeEx().toString() + this.ppAp() }
/** Gets a textual representation of this element. */
string toString() { result = this.getNodeEx().toString() + this.ppAp() }
override string toStringWithContext() {
result = this.getNodeEx().toString() + this.ppAp() + this.ppCtx()
}
/**
* Gets a textual representation of this element, including a textual
* representation of the call context.
*/
string toStringWithContext() { result = this.getNodeEx().toString() + this.ppAp() + this.ppCtx() }
override predicate hasLocationInfo(
/**
* Holds if this element is at the specified location.
* The location spans column `startcolumn` of line `startline` to
* column `endcolumn` of line `endline` in file `filepath`.
* For more information, see
* [Locations](https://codeql.github.com/docs/writing-codeql-queries/providing-locations-in-codeql-queries/).
*/
predicate hasLocationInfo(
string filepath, int startline, int startcolumn, int endline, int endcolumn
) {
this.getNodeEx().hasLocationInfo(filepath, startline, startcolumn, endline, endcolumn)
@@ -2982,18 +2993,71 @@ abstract private class PathNodeImpl extends PathNode {
/** Holds if `n` can reach a sink. */
private predicate directReach(PathNodeImpl n) {
n instanceof PathNodeSink or directReach(n.getANonHiddenSuccessor())
n instanceof PathNodeSink or
n instanceof PathNodeSinkGroup or
directReach(n.getANonHiddenSuccessor())
}
/** Holds if `n` can reach a sink or is used in a subpath that can reach a sink. */
private predicate reach(PathNode n) { directReach(n) or Subpaths::retReach(n) }
private predicate reach(PathNodeImpl n) { directReach(n) or Subpaths::retReach(n) }
/** Holds if `n1.getASuccessor() = n2` and `n2` can reach a sink. */
private predicate pathSucc(PathNodeImpl n1, PathNode n2) {
private predicate pathSucc(PathNodeImpl n1, PathNodeImpl n2) {
n1.getANonHiddenSuccessor() = n2 and directReach(n2)
}
private predicate pathSuccPlus(PathNode n1, PathNode n2) = fastTC(pathSucc/2)(n1, n2)
private predicate pathSuccPlus(PathNodeImpl n1, PathNodeImpl n2) = fastTC(pathSucc/2)(n1, n2)
/**
* A `Node` augmented with a call context (except for sinks), an access path, and a configuration.
* Only those `PathNode`s that are reachable from a source, and which can reach a sink, are generated.
*/
class PathNode instanceof PathNodeImpl {
PathNode() { reach(this) }
/** Gets a textual representation of this element. */
final string toString() { result = super.toString() }
/**
* Gets a textual representation of this element, including a textual
* representation of the call context.
*/
final string toStringWithContext() { result = super.toStringWithContext() }
/**
* Holds if this element is at the specified location.
* The location spans column `startcolumn` of line `startline` to
* column `endcolumn` of line `endline` in file `filepath`.
* For more information, see
* [Locations](https://codeql.github.com/docs/writing-codeql-queries/providing-locations-in-codeql-queries/).
*/
final predicate hasLocationInfo(
string filepath, int startline, int startcolumn, int endline, int endcolumn
) {
super.hasLocationInfo(filepath, startline, startcolumn, endline, endcolumn)
}
/** Gets the underlying `Node`. */
final Node getNode() { super.getNodeEx().projectToNode() = result }
/** Gets the `FlowState` of this node. */
final FlowState getState() { result = super.getState() }
/** Gets the associated configuration. */
final Configuration getConfiguration() { result = super.getConfiguration() }
/** Gets a successor of this node, if any. */
final PathNode getASuccessor() { result = super.getANonHiddenSuccessor() }
/** Holds if this node is a source. */
final predicate isSource() { super.isSource() }
/** Holds if this node is a grouping of source nodes. */
final predicate isSourceGroup(string group) { this = TPathNodeSourceGroup(group, _) }
/** Holds if this node is a grouping of sink nodes. */
final predicate isSinkGroup(string group) { this = TPathNodeSinkGroup(group, _) }
}
/**
* Provides the query predicates needed to include a graph in a path-problem query.
@@ -3004,7 +3068,7 @@ module PathGraph {
/** Holds if `n` is a node in the graph of data flow path explanations. */
query predicate nodes(PathNode n, string key, string val) {
reach(n) and key = "semmle.label" and val = n.toString()
key = "semmle.label" and val = n.toString()
}
/**
@@ -3013,11 +3077,7 @@ module PathGraph {
* `ret -> out` is summarized as the edge `arg -> out`.
*/
query predicate subpaths(PathNode arg, PathNode par, PathNode ret, PathNode out) {
Subpaths::subpaths(arg, par, ret, out) and
reach(arg) and
reach(par) and
reach(ret) and
reach(out)
Subpaths::subpaths(arg, par, ret, out)
}
}
@@ -3118,9 +3178,66 @@ private class PathNodeSink extends PathNodeImpl, TPathNodeSink {
override Configuration getConfiguration() { result = config }
override PathNodeImpl getASuccessorImpl() { none() }
override PathNodeImpl getASuccessorImpl() {
result = TPathNodeSinkGroup(this.getSinkGroup(), config)
}
override predicate isSource() { sourceNode(node, state, config) }
string getSinkGroup() { config.sinkGrouping(node.asNode(), result) }
}
private class PathNodeSourceGroup extends PathNodeImpl, TPathNodeSourceGroup {
string sourceGroup;
Configuration config;
PathNodeSourceGroup() { this = TPathNodeSourceGroup(sourceGroup, config) }
override NodeEx getNodeEx() { none() }
override FlowState getState() { none() }
override Configuration getConfiguration() { result = config }
override PathNodeImpl getASuccessorImpl() {
result.getSourceGroup() = sourceGroup and
result.getConfiguration() = config
}
override predicate isSource() { none() }
override string toString() { result = sourceGroup }
override predicate hasLocationInfo(
string filepath, int startline, int startcolumn, int endline, int endcolumn
) {
filepath = "" and startline = 0 and startcolumn = 0 and endline = 0 and endcolumn = 0
}
}
private class PathNodeSinkGroup extends PathNodeImpl, TPathNodeSinkGroup {
string sinkGroup;
Configuration config;
PathNodeSinkGroup() { this = TPathNodeSinkGroup(sinkGroup, config) }
override NodeEx getNodeEx() { none() }
override FlowState getState() { none() }
override Configuration getConfiguration() { result = config }
override PathNodeImpl getASuccessorImpl() { none() }
override predicate isSource() { none() }
override string toString() { result = sinkGroup }
override predicate hasLocationInfo(
string filepath, int startline, int startcolumn, int endline, int endcolumn
) {
filepath = "" and startline = 0 and startcolumn = 0 and endline = 0 and endcolumn = 0
}
}
private predicate pathNode(
@@ -3142,6 +3259,7 @@ private predicate pathNode(
* Holds if data may flow from `mid` to `node`. The last step in or out of
* a callable is recorded by `cc`.
*/
pragma[assume_small_delta]
pragma[nomagic]
private predicate pathStep(
PathNodeMid mid, NodeEx node, FlowState state, CallContext cc, SummaryCtx sc, AccessPath ap
@@ -3399,7 +3517,7 @@ private module Subpaths {
*/
pragma[nomagic]
private predicate subpaths02(
PathNode arg, ParamNodeEx par, SummaryCtxSome sc, CallContext innercc, ReturnKindExt kind,
PathNodeImpl arg, ParamNodeEx par, SummaryCtxSome sc, CallContext innercc, ReturnKindExt kind,
NodeEx out, FlowState sout, AccessPath apout
) {
subpaths01(arg, par, sc, innercc, kind, out, sout, apout) and
@@ -3407,14 +3525,14 @@ private module Subpaths {
}
pragma[nomagic]
private Configuration getPathNodeConf(PathNode n) { result = n.getConfiguration() }
private Configuration getPathNodeConf(PathNodeImpl n) { result = n.getConfiguration() }
/**
* Holds if `(arg, par, ret, out)` forms a subpath-tuple.
*/
pragma[nomagic]
private predicate subpaths03(
PathNode arg, ParamNodeEx par, PathNodeMid ret, NodeEx out, FlowState sout, AccessPath apout
PathNodeImpl arg, ParamNodeEx par, PathNodeMid ret, NodeEx out, FlowState sout, AccessPath apout
) {
exists(SummaryCtxSome sc, CallContext innercc, ReturnKindExt kind, RetNodeEx retnode |
subpaths02(arg, par, sc, innercc, kind, out, sout, apout) and
@@ -3444,7 +3562,7 @@ private module Subpaths {
* a subpath between `par` and `ret` with the connecting edges `arg -> par` and
* `ret -> out` is summarized as the edge `arg -> out`.
*/
predicate subpaths(PathNodeImpl arg, PathNodeImpl par, PathNodeImpl ret, PathNode out) {
predicate subpaths(PathNodeImpl arg, PathNodeImpl par, PathNodeImpl ret, PathNodeImpl out) {
exists(ParamNodeEx p, NodeEx o, FlowState sout, AccessPath apout, PathNodeMid out0 |
pragma[only_bind_into](arg).getANonHiddenSuccessor() = pragma[only_bind_into](out0) and
subpaths03(pragma[only_bind_into](arg), p, localStepToHidden*(ret), o, sout, apout) and
@@ -3460,7 +3578,7 @@ private module Subpaths {
* Holds if `n` can reach a return node in a summarized subpath that can reach a sink.
*/
predicate retReach(PathNodeImpl n) {
exists(PathNode out | subpaths(_, _, n, out) | directReach(out) or retReach(out))
exists(PathNodeImpl out | subpaths(_, _, n, out) | directReach(out) or retReach(out))
or
exists(PathNodeImpl mid |
retReach(mid) and
@@ -3476,12 +3594,22 @@ private module Subpaths {
* Will only have results if `configuration` has non-empty sources and
* sinks.
*/
private predicate hasFlowPath(
PathNodeImpl flowsource, PathNodeImpl flowsink, Configuration configuration
) {
flowsource.isFlowSource() and
flowsource.getConfiguration() = configuration and
(flowsource = flowsink or pathSuccPlus(flowsource, flowsink)) and
flowsink.isFlowSink()
}
private predicate flowsTo(
PathNode flowsource, PathNodeSink flowsink, Node source, Node sink, Configuration configuration
PathNodeImpl flowsource, PathNodeSink flowsink, Node source, Node sink,
Configuration configuration
) {
flowsource.isSource() and
flowsource.getConfiguration() = configuration and
flowsource.(PathNodeImpl).getNodeEx().asNode() = source and
flowsource.getNodeEx().asNode() = source and
(flowsource = flowsink or pathSuccPlus(flowsource, flowsink)) and
flowsink.getNodeEx().asNode() = sink
}
@@ -3504,14 +3632,14 @@ private predicate finalStats(
fields = count(TypedContent f0 | exists(PathNodeMid pn | pn.getAp().getHead() = f0)) and
conscand = count(AccessPath ap | exists(PathNodeMid pn | pn.getAp() = ap)) and
states = count(FlowState state | exists(PathNodeMid pn | pn.getState() = state)) and
tuples = count(PathNode pn)
tuples = count(PathNodeImpl pn)
or
fwd = false and
nodes = count(NodeEx n0 | exists(PathNodeImpl pn | pn.getNodeEx() = n0 and reach(pn))) and
fields = count(TypedContent f0 | exists(PathNodeMid pn | pn.getAp().getHead() = f0 and reach(pn))) and
conscand = count(AccessPath ap | exists(PathNodeMid pn | pn.getAp() = ap and reach(pn))) and
states = count(FlowState state | exists(PathNodeMid pn | pn.getState() = state and reach(pn))) and
tuples = count(PathNode pn | reach(pn))
tuples = count(PathNode pn)
}
/**

View File

@@ -147,6 +147,12 @@ abstract class Configuration extends string {
*/
FlowFeature getAFeature() { none() }
/** Holds if sources should be grouped in the result of `hasFlowPath`. */
predicate sourceGrouping(Node source, string sourceGroup) { none() }
/** Holds if sinks should be grouped in the result of `hasFlowPath`. */
predicate sinkGrouping(Node sink, string sinkGroup) { none() }
/**
* Holds if data may flow from `source` to `sink` for this configuration.
*/
@@ -158,7 +164,7 @@ abstract class Configuration extends string {
* The corresponding paths are generated from the end-points and the graph
* included in the module `PathGraph`.
*/
predicate hasFlowPath(PathNode source, PathNode sink) { flowsTo(source, sink, _, _, this) }
predicate hasFlowPath(PathNode source, PathNode sink) { hasFlowPath(source, sink, this) }
/**
* Holds if data may flow from some source to `sink` for this configuration.
@@ -2629,6 +2635,7 @@ private predicate evalUnfold(AccessPathApprox apa, boolean unfold, Configuration
/**
* Gets the number of `AccessPath`s that correspond to `apa`.
*/
pragma[assume_small_delta]
private int countAps(AccessPathApprox apa, Configuration config) {
evalUnfold(apa, false, config) and
result = 1 and
@@ -2647,6 +2654,7 @@ private int countAps(AccessPathApprox apa, Configuration config) {
* that it is expanded to a precise head-tail representation.
*/
language[monotonicAggregates]
pragma[assume_small_delta]
private int countPotentialAps(AccessPathApprox apa, Configuration config) {
apa instanceof AccessPathApproxNil and result = 1
or
@@ -2681,6 +2689,7 @@ private newtype TAccessPath =
}
private newtype TPathNode =
pragma[assume_small_delta]
TPathNodeMid(
NodeEx node, FlowState state, CallContext cc, SummaryCtx sc, AccessPath ap, Configuration config
) {
@@ -2709,6 +2718,18 @@ private newtype TPathNode =
state = sink.getState() and
config = sink.getConfiguration()
)
} or
TPathNodeSourceGroup(string sourceGroup, Configuration config) {
exists(PathNodeImpl source |
sourceGroup = source.getSourceGroup() and
config = source.getConfiguration()
)
} or
TPathNodeSinkGroup(string sinkGroup, Configuration config) {
exists(PathNodeSink sink |
sinkGroup = sink.getSinkGroup() and
config = sink.getConfiguration()
)
}
/**
@@ -2778,6 +2799,7 @@ private class AccessPathCons extends AccessPath, TAccessPathCons {
override AccessPathFrontHead getFront() { result = TFrontHead(head) }
pragma[assume_small_delta]
override AccessPathApproxCons getApprox() {
result = TConsNil(head, tail.(AccessPathNil).getType())
or
@@ -2786,6 +2808,7 @@ private class AccessPathCons extends AccessPath, TAccessPathCons {
result = TCons1(head, this.length())
}
pragma[assume_small_delta]
override int length() { result = 1 + tail.length() }
private string toStringImpl(boolean needsSuffix) {
@@ -2874,54 +2897,16 @@ private class AccessPathCons1 extends AccessPath, TAccessPathCons1 {
}
}
/**
* A `Node` augmented with a call context (except for sinks), an access path, and a configuration.
* Only those `PathNode`s that are reachable from a source are generated.
*/
class PathNode extends TPathNode {
/** Gets a textual representation of this element. */
string toString() { none() }
/**
* Gets a textual representation of this element, including a textual
* representation of the call context.
*/
string toStringWithContext() { none() }
/**
* Holds if this element is at the specified location.
* The location spans column `startcolumn` of line `startline` to
* column `endcolumn` of line `endline` in file `filepath`.
* For more information, see
* [Locations](https://codeql.github.com/docs/writing-codeql-queries/providing-locations-in-codeql-queries/).
*/
predicate hasLocationInfo(
string filepath, int startline, int startcolumn, int endline, int endcolumn
) {
none()
}
/** Gets the underlying `Node`. */
final Node getNode() { this.(PathNodeImpl).getNodeEx().projectToNode() = result }
abstract private class PathNodeImpl extends TPathNode {
/** Gets the `FlowState` of this node. */
FlowState getState() { none() }
abstract FlowState getState();
/** Gets the associated configuration. */
Configuration getConfiguration() { none() }
/** Gets a successor of this node, if any. */
final PathNode getASuccessor() {
result = this.(PathNodeImpl).getANonHiddenSuccessor() and
reach(this) and
reach(result)
}
abstract Configuration getConfiguration();
/** Holds if this node is a source. */
predicate isSource() { none() }
}
abstract predicate isSource();
abstract private class PathNodeImpl extends PathNode {
abstract PathNodeImpl getASuccessorImpl();
private PathNodeImpl getASuccessorIfHidden() {
@@ -2953,6 +2938,22 @@ abstract private class PathNodeImpl extends PathNode {
)
}
string getSourceGroup() {
this.isSource() and
this.getConfiguration().sourceGrouping(this.getNodeEx().asNode(), result)
}
predicate isFlowSource() {
this.isSource() and not exists(this.getSourceGroup())
or
this instanceof PathNodeSourceGroup
}
predicate isFlowSink() {
this = any(PathNodeSink sink | not exists(sink.getSinkGroup())) or
this instanceof PathNodeSinkGroup
}
private string ppAp() {
this instanceof PathNodeSink and result = ""
or
@@ -2967,13 +2968,23 @@ abstract private class PathNodeImpl extends PathNode {
result = " <" + this.(PathNodeMid).getCallContext().toString() + ">"
}
override string toString() { result = this.getNodeEx().toString() + this.ppAp() }
/** Gets a textual representation of this element. */
string toString() { result = this.getNodeEx().toString() + this.ppAp() }
override string toStringWithContext() {
result = this.getNodeEx().toString() + this.ppAp() + this.ppCtx()
}
/**
* Gets a textual representation of this element, including a textual
* representation of the call context.
*/
string toStringWithContext() { result = this.getNodeEx().toString() + this.ppAp() + this.ppCtx() }
override predicate hasLocationInfo(
/**
* Holds if this element is at the specified location.
* The location spans column `startcolumn` of line `startline` to
* column `endcolumn` of line `endline` in file `filepath`.
* For more information, see
* [Locations](https://codeql.github.com/docs/writing-codeql-queries/providing-locations-in-codeql-queries/).
*/
predicate hasLocationInfo(
string filepath, int startline, int startcolumn, int endline, int endcolumn
) {
this.getNodeEx().hasLocationInfo(filepath, startline, startcolumn, endline, endcolumn)
@@ -2982,18 +2993,71 @@ abstract private class PathNodeImpl extends PathNode {
/** Holds if `n` can reach a sink. */
private predicate directReach(PathNodeImpl n) {
n instanceof PathNodeSink or directReach(n.getANonHiddenSuccessor())
n instanceof PathNodeSink or
n instanceof PathNodeSinkGroup or
directReach(n.getANonHiddenSuccessor())
}
/** Holds if `n` can reach a sink or is used in a subpath that can reach a sink. */
private predicate reach(PathNode n) { directReach(n) or Subpaths::retReach(n) }
private predicate reach(PathNodeImpl n) { directReach(n) or Subpaths::retReach(n) }
/** Holds if `n1.getASuccessor() = n2` and `n2` can reach a sink. */
private predicate pathSucc(PathNodeImpl n1, PathNode n2) {
private predicate pathSucc(PathNodeImpl n1, PathNodeImpl n2) {
n1.getANonHiddenSuccessor() = n2 and directReach(n2)
}
private predicate pathSuccPlus(PathNode n1, PathNode n2) = fastTC(pathSucc/2)(n1, n2)
private predicate pathSuccPlus(PathNodeImpl n1, PathNodeImpl n2) = fastTC(pathSucc/2)(n1, n2)
/**
* A `Node` augmented with a call context (except for sinks), an access path, and a configuration.
* Only those `PathNode`s that are reachable from a source, and which can reach a sink, are generated.
*/
class PathNode instanceof PathNodeImpl {
PathNode() { reach(this) }
/** Gets a textual representation of this element. */
final string toString() { result = super.toString() }
/**
* Gets a textual representation of this element, including a textual
* representation of the call context.
*/
final string toStringWithContext() { result = super.toStringWithContext() }
/**
* Holds if this element is at the specified location.
* The location spans column `startcolumn` of line `startline` to
* column `endcolumn` of line `endline` in file `filepath`.
* For more information, see
* [Locations](https://codeql.github.com/docs/writing-codeql-queries/providing-locations-in-codeql-queries/).
*/
final predicate hasLocationInfo(
string filepath, int startline, int startcolumn, int endline, int endcolumn
) {
super.hasLocationInfo(filepath, startline, startcolumn, endline, endcolumn)
}
/** Gets the underlying `Node`. */
final Node getNode() { super.getNodeEx().projectToNode() = result }
/** Gets the `FlowState` of this node. */
final FlowState getState() { result = super.getState() }
/** Gets the associated configuration. */
final Configuration getConfiguration() { result = super.getConfiguration() }
/** Gets a successor of this node, if any. */
final PathNode getASuccessor() { result = super.getANonHiddenSuccessor() }
/** Holds if this node is a source. */
final predicate isSource() { super.isSource() }
/** Holds if this node is a grouping of source nodes. */
final predicate isSourceGroup(string group) { this = TPathNodeSourceGroup(group, _) }
/** Holds if this node is a grouping of sink nodes. */
final predicate isSinkGroup(string group) { this = TPathNodeSinkGroup(group, _) }
}
/**
* Provides the query predicates needed to include a graph in a path-problem query.
@@ -3004,7 +3068,7 @@ module PathGraph {
/** Holds if `n` is a node in the graph of data flow path explanations. */
query predicate nodes(PathNode n, string key, string val) {
reach(n) and key = "semmle.label" and val = n.toString()
key = "semmle.label" and val = n.toString()
}
/**
@@ -3013,11 +3077,7 @@ module PathGraph {
* `ret -> out` is summarized as the edge `arg -> out`.
*/
query predicate subpaths(PathNode arg, PathNode par, PathNode ret, PathNode out) {
Subpaths::subpaths(arg, par, ret, out) and
reach(arg) and
reach(par) and
reach(ret) and
reach(out)
Subpaths::subpaths(arg, par, ret, out)
}
}
@@ -3118,9 +3178,66 @@ private class PathNodeSink extends PathNodeImpl, TPathNodeSink {
override Configuration getConfiguration() { result = config }
override PathNodeImpl getASuccessorImpl() { none() }
override PathNodeImpl getASuccessorImpl() {
result = TPathNodeSinkGroup(this.getSinkGroup(), config)
}
override predicate isSource() { sourceNode(node, state, config) }
string getSinkGroup() { config.sinkGrouping(node.asNode(), result) }
}
private class PathNodeSourceGroup extends PathNodeImpl, TPathNodeSourceGroup {
string sourceGroup;
Configuration config;
PathNodeSourceGroup() { this = TPathNodeSourceGroup(sourceGroup, config) }
override NodeEx getNodeEx() { none() }
override FlowState getState() { none() }
override Configuration getConfiguration() { result = config }
override PathNodeImpl getASuccessorImpl() {
result.getSourceGroup() = sourceGroup and
result.getConfiguration() = config
}
override predicate isSource() { none() }
override string toString() { result = sourceGroup }
override predicate hasLocationInfo(
string filepath, int startline, int startcolumn, int endline, int endcolumn
) {
filepath = "" and startline = 0 and startcolumn = 0 and endline = 0 and endcolumn = 0
}
}
private class PathNodeSinkGroup extends PathNodeImpl, TPathNodeSinkGroup {
string sinkGroup;
Configuration config;
PathNodeSinkGroup() { this = TPathNodeSinkGroup(sinkGroup, config) }
override NodeEx getNodeEx() { none() }
override FlowState getState() { none() }
override Configuration getConfiguration() { result = config }
override PathNodeImpl getASuccessorImpl() { none() }
override predicate isSource() { none() }
override string toString() { result = sinkGroup }
override predicate hasLocationInfo(
string filepath, int startline, int startcolumn, int endline, int endcolumn
) {
filepath = "" and startline = 0 and startcolumn = 0 and endline = 0 and endcolumn = 0
}
}
private predicate pathNode(
@@ -3142,6 +3259,7 @@ private predicate pathNode(
* Holds if data may flow from `mid` to `node`. The last step in or out of
* a callable is recorded by `cc`.
*/
pragma[assume_small_delta]
pragma[nomagic]
private predicate pathStep(
PathNodeMid mid, NodeEx node, FlowState state, CallContext cc, SummaryCtx sc, AccessPath ap
@@ -3399,7 +3517,7 @@ private module Subpaths {
*/
pragma[nomagic]
private predicate subpaths02(
PathNode arg, ParamNodeEx par, SummaryCtxSome sc, CallContext innercc, ReturnKindExt kind,
PathNodeImpl arg, ParamNodeEx par, SummaryCtxSome sc, CallContext innercc, ReturnKindExt kind,
NodeEx out, FlowState sout, AccessPath apout
) {
subpaths01(arg, par, sc, innercc, kind, out, sout, apout) and
@@ -3407,14 +3525,14 @@ private module Subpaths {
}
pragma[nomagic]
private Configuration getPathNodeConf(PathNode n) { result = n.getConfiguration() }
private Configuration getPathNodeConf(PathNodeImpl n) { result = n.getConfiguration() }
/**
* Holds if `(arg, par, ret, out)` forms a subpath-tuple.
*/
pragma[nomagic]
private predicate subpaths03(
PathNode arg, ParamNodeEx par, PathNodeMid ret, NodeEx out, FlowState sout, AccessPath apout
PathNodeImpl arg, ParamNodeEx par, PathNodeMid ret, NodeEx out, FlowState sout, AccessPath apout
) {
exists(SummaryCtxSome sc, CallContext innercc, ReturnKindExt kind, RetNodeEx retnode |
subpaths02(arg, par, sc, innercc, kind, out, sout, apout) and
@@ -3444,7 +3562,7 @@ private module Subpaths {
* a subpath between `par` and `ret` with the connecting edges `arg -> par` and
* `ret -> out` is summarized as the edge `arg -> out`.
*/
predicate subpaths(PathNodeImpl arg, PathNodeImpl par, PathNodeImpl ret, PathNode out) {
predicate subpaths(PathNodeImpl arg, PathNodeImpl par, PathNodeImpl ret, PathNodeImpl out) {
exists(ParamNodeEx p, NodeEx o, FlowState sout, AccessPath apout, PathNodeMid out0 |
pragma[only_bind_into](arg).getANonHiddenSuccessor() = pragma[only_bind_into](out0) and
subpaths03(pragma[only_bind_into](arg), p, localStepToHidden*(ret), o, sout, apout) and
@@ -3460,7 +3578,7 @@ private module Subpaths {
* Holds if `n` can reach a return node in a summarized subpath that can reach a sink.
*/
predicate retReach(PathNodeImpl n) {
exists(PathNode out | subpaths(_, _, n, out) | directReach(out) or retReach(out))
exists(PathNodeImpl out | subpaths(_, _, n, out) | directReach(out) or retReach(out))
or
exists(PathNodeImpl mid |
retReach(mid) and
@@ -3476,12 +3594,22 @@ private module Subpaths {
* Will only have results if `configuration` has non-empty sources and
* sinks.
*/
private predicate hasFlowPath(
PathNodeImpl flowsource, PathNodeImpl flowsink, Configuration configuration
) {
flowsource.isFlowSource() and
flowsource.getConfiguration() = configuration and
(flowsource = flowsink or pathSuccPlus(flowsource, flowsink)) and
flowsink.isFlowSink()
}
private predicate flowsTo(
PathNode flowsource, PathNodeSink flowsink, Node source, Node sink, Configuration configuration
PathNodeImpl flowsource, PathNodeSink flowsink, Node source, Node sink,
Configuration configuration
) {
flowsource.isSource() and
flowsource.getConfiguration() = configuration and
flowsource.(PathNodeImpl).getNodeEx().asNode() = source and
flowsource.getNodeEx().asNode() = source and
(flowsource = flowsink or pathSuccPlus(flowsource, flowsink)) and
flowsink.getNodeEx().asNode() = sink
}
@@ -3504,14 +3632,14 @@ private predicate finalStats(
fields = count(TypedContent f0 | exists(PathNodeMid pn | pn.getAp().getHead() = f0)) and
conscand = count(AccessPath ap | exists(PathNodeMid pn | pn.getAp() = ap)) and
states = count(FlowState state | exists(PathNodeMid pn | pn.getState() = state)) and
tuples = count(PathNode pn)
tuples = count(PathNodeImpl pn)
or
fwd = false and
nodes = count(NodeEx n0 | exists(PathNodeImpl pn | pn.getNodeEx() = n0 and reach(pn))) and
fields = count(TypedContent f0 | exists(PathNodeMid pn | pn.getAp().getHead() = f0 and reach(pn))) and
conscand = count(AccessPath ap | exists(PathNodeMid pn | pn.getAp() = ap and reach(pn))) and
states = count(FlowState state | exists(PathNodeMid pn | pn.getState() = state and reach(pn))) and
tuples = count(PathNode pn | reach(pn))
tuples = count(PathNode pn)
}
/**

View File

@@ -136,6 +136,18 @@ module Consistency {
msg = "Local flow step does not preserve enclosing callable."
}
query predicate readStepIsLocal(Node n1, Node n2, string msg) {
readStep(n1, _, n2) and
nodeGetEnclosingCallable(n1) != nodeGetEnclosingCallable(n2) and
msg = "Read step does not preserve enclosing callable."
}
query predicate storeStepIsLocal(Node n1, Node n2, string msg) {
storeStep(n1, _, n2) and
nodeGetEnclosingCallable(n1) != nodeGetEnclosingCallable(n2) and
msg = "Store step does not preserve enclosing callable."
}
private DataFlowType typeRepr() { result = getNodeType(_) }
query predicate compatibleTypesReflexive(DataFlowType t, string msg) {

View File

@@ -892,7 +892,7 @@ module Private {
}
/**
* Provides a means of translating externally (e.g., CSV) defined flow
* Provides a means of translating externally (e.g., MaD) defined flow
* summaries into a `SummarizedCallable`s.
*/
module External {
@@ -1121,7 +1121,7 @@ module Private {
}
/**
* Holds if `node` is specified as a source with the given kind in a CSV flow
* Holds if `node` is specified as a source with the given kind in a MaD flow
* model.
*/
predicate isSourceNode(InterpretNode node, string kind) {
@@ -1132,7 +1132,7 @@ module Private {
}
/**
* Holds if `node` is specified as a sink with the given kind in a CSV flow
* Holds if `node` is specified as a sink with the given kind in a MaD flow
* model.
*/
predicate isSinkNode(InterpretNode node, string kind) {

View File

@@ -2,155 +2,7 @@
* Provides predicates for reasoning about bad tag filter vulnerabilities.
*/
import regexp.RegexpMatching
/**
* Holds if the regexp `root` should be tested against `str`.
* Implements the `isRegexpMatchingCandidateSig` signature from `RegexpMatching`.
* `ignorePrefix` toggles whether the regular expression should be treated as accepting any prefix if it's unanchored.
* `testWithGroups` toggles whether it's tested which groups are filled by a given input string.
*/
private predicate isBadTagFilterCandidate(
RootTerm root, string str, boolean ignorePrefix, boolean testWithGroups
) {
// the regexp must mention "<" and ">" explicitly.
forall(string angleBracket | angleBracket = ["<", ">"] |
any(RegExpConstant term | term.getValue().matches("%" + angleBracket + "%")).getRootTerm() =
root
) and
ignorePrefix = true and
(
str = ["<!-- foo -->", "<!-- foo --!>", "<!- foo ->", "<foo>", "<script>"] and
testWithGroups = true
or
str =
[
"<!-- foo -->", "<!- foo ->", "<!-- foo --!>", "<!-- foo\n -->", "<script>foo</script>",
"<script \n>foo</script>", "<script >foo\n</script>", "<foo ></foo>", "<foo>",
"<foo src=\"foo\"></foo>", "<script>", "<script src=\"foo\"></script>",
"<script src='foo'></script>", "<SCRIPT>foo</SCRIPT>", "<script\tsrc=\"foo\"/>",
"<script\tsrc='foo'></script>", "<sCrIpT>foo</ScRiPt>", "<script src=\"foo\">foo</script >",
"<script src=\"foo\">foo</script foo=\"bar\">", "<script src=\"foo\">foo</script\t\n bar>"
] and
testWithGroups = false
)
}
/**
* A regexp that matches some string from the `isBadTagFilterCandidate` predicate.
*/
class HtmlMatchingRegExp extends RootTerm {
HtmlMatchingRegExp() { RegexpMatching<isBadTagFilterCandidate/4>::matches(this, _) }
/** Holds if this regexp matched `str`, where `str` is one of the string from `isBadTagFilterCandidate`. */
predicate matches(string str) { RegexpMatching<isBadTagFilterCandidate/4>::matches(this, str) }
/** Holds if this regexp fills capture group `g' when matching `str', where `str` is one of the string from `isBadTagFilterCandidate`. */
predicate fillsCaptureGroup(string str, int g) {
RegexpMatching<isBadTagFilterCandidate/4>::fillsCaptureGroup(this, str, g)
}
}
/** DEPRECATED: Alias for HtmlMatchingRegExp */
deprecated class HTMLMatchingRegExp = HtmlMatchingRegExp;
/**
* Holds if `regexp` matches some HTML tags, but misses some HTML tags that it should match.
*
* When adding a new case to this predicate, make sure the test string used in `matches(..)` calls are present in `HTMLMatchingRegExp::test` / `HTMLMatchingRegExp::testWithGroups`.
*/
predicate isBadRegexpFilter(HtmlMatchingRegExp regexp, string msg) {
// CVE-2021-33829 - matching both "<!-- foo -->" and "<!-- foo --!>", but in different capture groups
regexp.matches("<!-- foo -->") and
regexp.matches("<!-- foo --!>") and
exists(int a, int b | a != b |
regexp.fillsCaptureGroup("<!-- foo -->", a) and
// <!-- foo --> might be ambiguously parsed (matching both capture groups), and that is ok here.
regexp.fillsCaptureGroup("<!-- foo --!>", b) and
not regexp.fillsCaptureGroup("<!-- foo --!>", a) and
msg =
"Comments ending with --> are matched differently from comments ending with --!>. The first is matched with capture group "
+ a + " and comments ending with --!> are matched with capture group " +
strictconcat(int i | regexp.fillsCaptureGroup("<!-- foo --!>", i) | i.toString(), ", ") +
"."
)
or
// CVE-2020-17480 - matching "<!-- foo -->" and other tags, but not "<!-- foo --!>".
exists(int group, int other |
group != other and
regexp.fillsCaptureGroup("<!-- foo -->", group) and
regexp.fillsCaptureGroup("<foo>", other) and
not regexp.matches("<!-- foo --!>") and
not regexp.fillsCaptureGroup("<!-- foo -->", any(int i | i != group)) and
not regexp.fillsCaptureGroup("<!- foo ->", group) and
not regexp.fillsCaptureGroup("<foo>", group) and
not regexp.fillsCaptureGroup("<script>", group) and
msg =
"This regular expression only parses --> (capture group " + group +
") and not --!> as an HTML comment end tag."
)
or
regexp.matches("<!-- foo -->") and
not regexp.matches("<!-- foo\n -->") and
not regexp.matches("<!- foo ->") and
not regexp.matches("<foo>") and
not regexp.matches("<script>") and
msg = "This regular expression does not match comments containing newlines."
or
regexp.matches("<script>foo</script>") and
regexp.matches("<script src=\"foo\"></script>") and
not regexp.matches("<foo ></foo>") and
(
not regexp.matches("<script \n>foo</script>") and
msg = "This regular expression matches <script></script>, but not <script \\n></script>"
or
not regexp.matches("<script >foo\n</script>") and
msg = "This regular expression matches <script>...</script>, but not <script >...\\n</script>"
)
or
regexp.matches("<script>foo</script>") and
regexp.matches("<script src=\"foo\"></script>") and
not regexp.matches("<script src='foo'></script>") and
not regexp.matches("<foo>") and
msg = "This regular expression does not match script tags where the attribute uses single-quotes."
or
regexp.matches("<script>foo</script>") and
regexp.matches("<script src='foo'></script>") and
not regexp.matches("<script src=\"foo\"></script>") and
not regexp.matches("<foo>") and
msg = "This regular expression does not match script tags where the attribute uses double-quotes."
or
regexp.matches("<script>foo</script>") and
regexp.matches("<script src='foo'></script>") and
not regexp.matches("<script\tsrc='foo'></script>") and
not regexp.matches("<foo>") and
not regexp.matches("<foo src=\"foo\"></foo>") and
msg = "This regular expression does not match script tags where tabs are used between attributes."
or
regexp.matches("<script>foo</script>") and
not RegExpFlags::isIgnoreCase(regexp) and
not regexp.matches("<foo>") and
not regexp.matches("<foo ></foo>") and
(
not regexp.matches("<SCRIPT>foo</SCRIPT>") and
msg = "This regular expression does not match upper case <SCRIPT> tags."
or
not regexp.matches("<sCrIpT>foo</ScRiPt>") and
regexp.matches("<SCRIPT>foo</SCRIPT>") and
msg = "This regular expression does not match mixed case <sCrIpT> tags."
)
or
regexp.matches("<script src=\"foo\"></script>") and
not regexp.matches("<foo>") and
not regexp.matches("<foo ></foo>") and
(
not regexp.matches("<script src=\"foo\">foo</script >") and
msg = "This regular expression does not match script end tags like </script >."
or
not regexp.matches("<script src=\"foo\">foo</script foo=\"bar\">") and
msg = "This regular expression does not match script end tags like </script foo=\"bar\">."
or
not regexp.matches("<script src=\"foo\">foo</script\t\n bar>") and
msg = "This regular expression does not match script end tags like </script\\t\\n bar>."
)
}
private import semmle.python.RegexTreeView::RegexTreeView as TreeView
// BadTagFilterQuery should be used directly from the shared pack, and not from this file.
deprecated import codeql.regex.nfa.BadTagFilterQuery::Make<TreeView> as Dep
import Dep

View File

@@ -2,288 +2,7 @@
* Classes and predicates for working with suspicious character ranges.
*/
// We don't need the NFA utils, just the regexp tree.
// but the below is a nice shared library that exposes the API we need.
import regexp.NfaUtils
/**
* Gets a rank for `range` that is unique for ranges in the same file.
* Prioritizes ranges that match more characters.
*/
int rankRange(RegExpCharacterRange range) {
range =
rank[result](RegExpCharacterRange r, Location l, int low, int high |
r.getLocation() = l and
isRange(r, low, high)
|
r order by (high - low) desc, l.getStartLine(), l.getStartColumn()
)
}
/** Holds if `range` spans from the unicode code points `low` to `high` (both inclusive). */
predicate isRange(RegExpCharacterRange range, int low, int high) {
exists(string lowc, string highc |
range.isRange(lowc, highc) and
low.toUnicode() = lowc and
high.toUnicode() = highc
)
}
/** Holds if `char` is an alpha-numeric character. */
predicate isAlphanumeric(string char) {
// written like this to avoid having a bindingset for the predicate
char = [[48 .. 57], [65 .. 90], [97 .. 122]].toUnicode() // 0-9, A-Z, a-z
}
/**
* Holds if the given ranges are from the same character class
* and there exists at least one character matched by both ranges.
*/
predicate overlap(RegExpCharacterRange a, RegExpCharacterRange b) {
exists(RegExpCharacterClass clz |
a = clz.getAChild() and
b = clz.getAChild() and
a != b
|
exists(int alow, int ahigh, int blow, int bhigh |
isRange(a, alow, ahigh) and
isRange(b, blow, bhigh) and
alow <= bhigh and
blow <= ahigh
)
)
}
/**
* Holds if `range` overlaps with the char class `escape` from the same character class.
*/
predicate overlapsWithCharEscape(RegExpCharacterRange range, RegExpCharacterClassEscape escape) {
exists(RegExpCharacterClass clz, string low, string high |
range = clz.getAChild() and
escape = clz.getAChild() and
range.isRange(low, high)
|
escape.getValue() = "w" and
getInRange(low, high).regexpMatch("\\w")
or
escape.getValue() = "d" and
getInRange(low, high).regexpMatch("\\d")
or
escape.getValue() = "s" and
getInRange(low, high).regexpMatch("\\s")
)
}
/** Gets the unicode code point for a `char`. */
bindingset[char]
int toCodePoint(string char) { result.toUnicode() = char }
/** A character range that appears to be overly wide. */
class OverlyWideRange extends RegExpCharacterRange {
OverlyWideRange() {
exists(int low, int high, int numChars |
isRange(this, low, high) and
numChars = (1 + high - low) and
this.getRootTerm().isUsedAsRegExp() and
numChars >= 10
|
// across the Z-a range (which includes backticks)
toCodePoint("Z") >= low and
toCodePoint("a") <= high
or
// across the 9-A range (which includes e.g. ; and ?)
toCodePoint("9") >= low and
toCodePoint("A") <= high
or
// a non-alphanumeric char as part of the range boundaries
exists(int bound | bound = [low, high] | not isAlphanumeric(bound.toUnicode())) and
// while still being ascii
low < 128 and
high < 128
) and
// allowlist for known ranges
not this = allowedWideRanges()
}
/** Gets a string representation of a character class that matches the same chars as this range. */
string printEquivalent() { result = RangePrinter::printEquivalentCharClass(this) }
}
/** Gets a range that should not be reported as an overly wide range. */
RegExpCharacterRange allowedWideRanges() {
// ~ is the last printable ASCII character, it's used right in various wide ranges.
result.isRange(_, "~")
or
// the same with " " and "!". " " is the first printable character, and "!" is the first non-white-space printable character.
result.isRange([" ", "!"], _)
or
// the `[@-_]` range is intentional
result.isRange("@", "_")
or
// starting from the zero byte is a good indication that it's purposely matching a large range.
result.isRange(0.toUnicode(), _)
}
/** Gets a char between (and including) `low` and `high`. */
bindingset[low, high]
private string getInRange(string low, string high) {
result = [toCodePoint(low) .. toCodePoint(high)].toUnicode()
}
/** A module computing an equivalent character class for an overly wide range. */
module RangePrinter {
bindingset[char]
bindingset[result]
private string next(string char) {
exists(int prev, int next |
prev.toUnicode() = char and
next.toUnicode() = result and
next = prev + 1
)
}
/** Gets the points where the parts of the pretty printed range should be cut off. */
private string cutoffs() { result = ["A", "Z", "a", "z", "0", "9"] }
/** Gets the char to use in the low end of a range for a given `cut` */
private string lowCut(string cut) {
cut = ["A", "a", "0"] and
result = cut
or
cut = ["Z", "z", "9"] and
result = next(cut)
}
/** Gets the char to use in the high end of a range for a given `cut` */
private string highCut(string cut) {
cut = ["Z", "z", "9"] and
result = cut
or
cut = ["A", "a", "0"] and
next(result) = cut
}
/** Gets the cutoff char used for a given `part` of a range when pretty-printing it. */
private string cutoff(OverlyWideRange range, int part) {
exists(int low, int high | isRange(range, low, high) |
result =
rank[part + 1](string cut |
cut = cutoffs() and low < toCodePoint(cut) and toCodePoint(cut) < high
|
cut order by toCodePoint(cut)
)
)
}
/** Gets the number of parts we should print for a given `range`. */
private int parts(OverlyWideRange range) { result = 1 + count(cutoff(range, _)) }
/** Holds if the given part of a range should span from `low` to `high`. */
private predicate part(OverlyWideRange range, int part, string low, string high) {
// first part.
part = 0 and
(
range.isRange(low, high) and
parts(range) = 1
or
parts(range) >= 2 and
range.isRange(low, _) and
high = highCut(cutoff(range, part))
)
or
// middle
part >= 1 and
part < parts(range) - 1 and
low = lowCut(cutoff(range, part - 1)) and
high = highCut(cutoff(range, part))
or
// last.
part = parts(range) - 1 and
low = lowCut(cutoff(range, part - 1)) and
range.isRange(_, high)
}
/** Gets an escaped `char` for use in a character class. */
bindingset[char]
private string escape(string char) {
exists(string reg | reg = "(\\[|\\]|\\\\|-|/)" |
if char.regexpMatch(reg) then result = "\\" + char else result = char
)
}
/** Gets a part of the equivalent range. */
private string printEquivalentCharClass(OverlyWideRange range, int part) {
exists(string low, string high | part(range, part, low, high) |
if
isAlphanumeric(low) and
isAlphanumeric(high)
then result = low + "-" + high
else
result =
strictconcat(string char | char = getInRange(low, high) | escape(char) order by char)
)
}
/** Gets the entire pretty printed equivalent range. */
string printEquivalentCharClass(OverlyWideRange range) {
result =
strictconcat(string r, int part |
r = "[" and part = -1 and exists(range)
or
r = printEquivalentCharClass(range, part)
or
r = "]" and part = parts(range)
|
r order by part
)
}
}
/** Gets a char range that is overly large because of `reason`. */
RegExpCharacterRange getABadRange(string reason, int priority) {
result instanceof OverlyWideRange and
priority = 0 and
exists(string equiv | equiv = result.(OverlyWideRange).printEquivalent() |
if equiv.length() <= 50
then reason = "is equivalent to " + equiv
else reason = "is equivalent to " + equiv.substring(0, 50) + "..."
)
or
priority = 1 and
exists(RegExpCharacterRange other |
reason = "overlaps with " + other + " in the same character class" and
rankRange(result) < rankRange(other) and
overlap(result, other)
)
or
priority = 2 and
exists(RegExpCharacterClassEscape escape |
reason = "overlaps with " + escape + " in the same character class" and
overlapsWithCharEscape(result, escape)
)
or
reason = "is empty" and
priority = 3 and
exists(int low, int high |
isRange(result, low, high) and
low > high
)
}
/** Holds if `range` matches suspiciously many characters. */
predicate problem(RegExpCharacterRange range, string reason) {
reason =
strictconcat(string m, int priority |
range = getABadRange(m, priority)
|
m, ", and " order by priority desc
) and
// specifying a range using an escape is usually OK.
not range.getAChild() instanceof RegExpEscape and
// Unicode escapes in strings are interpreted before it turns into a regexp,
// so e.g. [\u0001-\uFFFF] will just turn up as a range between two constants.
// We therefore exclude these ranges.
range.getRootTerm().getParent() instanceof RegExpLiteral and
// is used as regexp (mostly for JS where regular expressions are parsed eagerly)
range.getRootTerm().isUsedAsRegExp()
}
private import semmle.python.RegexTreeView::RegexTreeView as TreeView
// OverlyLargeRangeQuery should be used directly from the shared pack, and not from this file.
deprecated import codeql.regex.OverlyLargeRangeQuery::Make<TreeView> as Dep
import Dep

View File

@@ -11,7 +11,7 @@ private import semmle.python.dataflow.new.TaintTracking
private import semmle.python.Concepts
private import semmle.python.dataflow.new.RemoteFlowSources
private import semmle.python.dataflow.new.BarrierGuards
private import semmle.python.RegexTreeView
private import semmle.python.RegexTreeView::RegexTreeView as TreeView
private import semmle.python.ApiGraphs
/**
@@ -20,6 +20,9 @@ private import semmle.python.ApiGraphs
* vulnerabilities, as well as extension points for adding your own.
*/
module PolynomialReDoS {
private import TreeView
import codeql.regex.nfa.SuperlinearBackTracking::Make<TreeView>
/**
* A data flow source for "polynomial regular expression denial of service (ReDoS)" vulnerabilities.
*/

View File

@@ -103,7 +103,7 @@ module HeuristicNames {
*/
string notSensitiveRegexp() {
result =
"(?is).*([^\\w$.-]|redact|censor|obfuscate|hash|md5|sha|random|((?<!un)(en))?(crypt|code)|certain|concert|secretar|accountant|accountab).*"
"(?is).*([^\\w$.-]|redact|censor|obfuscate|hash|md5|sha|random|((?<!un)(en))?(crypt|(?<!pass)code)|certain|concert|secretar|accountant|accountab).*"
}
/**

View File

@@ -62,284 +62,7 @@
* a suffix `x` (possible empty) that is most likely __not__ accepted.
*/
import NfaUtils
/**
* Holds if state `s` might be inside a backtracking repetition.
*/
pragma[noinline]
private predicate stateInsideBacktracking(State s) {
s.getRepr().getParent*() instanceof MaybeBacktrackingRepetition
}
/**
* A infinitely repeating quantifier that might backtrack.
*/
private class MaybeBacktrackingRepetition extends InfiniteRepetitionQuantifier {
MaybeBacktrackingRepetition() {
exists(RegExpTerm child |
child instanceof RegExpAlt or
child instanceof RegExpQuantifier
|
child.getParent+() = this
)
}
}
/**
* A state in the product automaton.
*/
private newtype TStatePair =
/**
* We lazily only construct those states that we are actually
* going to need: `(q, q)` for every fork state `q`, and any
* pair of states that can be reached from a pair that we have
* already constructed. To cut down on the number of states,
* we only represent states `(q1, q2)` where `q1` is lexicographically
* no bigger than `q2`.
*
* States are only constructed if both states in the pair are
* inside a repetition that might backtrack.
*/
MkStatePair(State q1, State q2) {
isFork(q1, _, _, _, _) and q2 = q1
or
(step(_, _, _, q1, q2) or step(_, _, _, q2, q1)) and
rankState(q1) <= rankState(q2)
}
/**
* Gets a unique number for a `state`.
* Is used to create an ordering of states, where states with the same `toString()` will be ordered differently.
*/
private int rankState(State state) {
state =
rank[result](State s, Location l |
stateInsideBacktracking(s) and
l = s.getRepr().getLocation()
|
s order by l.getStartLine(), l.getStartColumn(), s.toString()
)
}
/**
* A state in the product automaton.
*/
private class StatePair extends TStatePair {
State q1;
State q2;
StatePair() { this = MkStatePair(q1, q2) }
/** Gets a textual representation of this element. */
string toString() { result = "(" + q1 + ", " + q2 + ")" }
/** Gets the first component of the state pair. */
State getLeft() { result = q1 }
/** Gets the second component of the state pair. */
State getRight() { result = q2 }
}
/**
* Holds for `(fork, fork)` state pairs when `isFork(fork, _, _, _, _)` holds.
*
* Used in `statePairDistToFork`
*/
private predicate isStatePairFork(StatePair p) {
exists(State fork | p = MkStatePair(fork, fork) and isFork(fork, _, _, _, _))
}
/**
* Holds if there are transitions from the components of `q` to the corresponding
* components of `r`.
*
* Used in `statePairDistToFork`
*/
private predicate reverseStep(StatePair r, StatePair q) { step(q, _, _, r) }
/**
* Gets the minimum length of a path from `q` to `r` in the
* product automaton.
*/
private int statePairDistToFork(StatePair q, StatePair r) =
shortestDistances(isStatePairFork/1, reverseStep/2)(r, q, result)
/**
* Holds if there are transitions from `q` to `r1` and from `q` to `r2`
* labelled with `s1` and `s2`, respectively, where `s1` and `s2` do not
* trivially have an empty intersection.
*
* This predicate only holds for states associated with regular expressions
* that have at least one repetition quantifier in them (otherwise the
* expression cannot be vulnerable to ReDoS attacks anyway).
*/
pragma[noopt]
private predicate isFork(State q, InputSymbol s1, InputSymbol s2, State r1, State r2) {
stateInsideBacktracking(q) and
exists(State q1, State q2 |
q1 = epsilonSucc*(q) and
delta(q1, s1, r1) and
q2 = epsilonSucc*(q) and
delta(q2, s2, r2) and
// Use pragma[noopt] to prevent intersect(s1,s2) from being the starting point of the join.
// From (s1,s2) it would find a huge number of intermediate state pairs (q1,q2) originating from different literals,
// and discover at the end that no `q` can reach both `q1` and `q2` by epsilon transitions.
exists(intersect(s1, s2))
|
s1 != s2
or
r1 != r2
or
r1 = r2 and q1 != q2
or
// If q can reach itself by epsilon transitions, then there are two distinct paths to the q1/q2 state:
// one that uses the loop and one that doesn't. The engine will separately attempt to match with each path,
// despite ending in the same state. The "fork" thus arises from the choice of whether to use the loop or not.
// To avoid every state in the loop becoming a fork state,
// we arbitrarily pick the InfiniteRepetitionQuantifier state as the canonical fork state for the loop
// (every epsilon-loop must contain such a state).
//
// We additionally require that the there exists another InfiniteRepetitionQuantifier `mid` on the path from `q` to itself.
// This is done to avoid flagging regular expressions such as `/(a?)*b/` - that only has polynomial runtime, and is detected by `js/polynomial-redos`.
// The below code is therefore a heuristic, that only flags regular expressions such as `/(a*)*b/`,
// and does not flag regular expressions such as `/(a?b?)c/`, but the latter pattern is not used frequently.
r1 = r2 and
q1 = q2 and
epsilonSucc+(q) = q and
exists(RegExpTerm term | term = q.getRepr() | term instanceof InfiniteRepetitionQuantifier) and
// One of the mid states is an infinite quantifier itself
exists(State mid, RegExpTerm term |
mid = epsilonSucc+(q) and
term = mid.getRepr() and
term instanceof InfiniteRepetitionQuantifier and
q = epsilonSucc+(mid) and
not mid = q
)
) and
stateInsideBacktracking(r1) and
stateInsideBacktracking(r2)
}
/**
* Gets the state pair `(q1, q2)` or `(q2, q1)`; note that only
* one or the other is defined.
*/
private StatePair mkStatePair(State q1, State q2) {
result = MkStatePair(q1, q2) or result = MkStatePair(q2, q1)
}
/**
* Holds if there are transitions from the components of `q` to the corresponding
* components of `r` labelled with `s1` and `s2`, respectively.
*/
private predicate step(StatePair q, InputSymbol s1, InputSymbol s2, StatePair r) {
exists(State r1, State r2 | step(q, s1, s2, r1, r2) and r = mkStatePair(r1, r2))
}
/**
* Holds if there are transitions from the components of `q` to `r1` and `r2`
* labelled with `s1` and `s2`, respectively.
*
* We only consider transitions where the resulting states `(r1, r2)` are both
* inside a repetition that might backtrack.
*/
pragma[noopt]
private predicate step(StatePair q, InputSymbol s1, InputSymbol s2, State r1, State r2) {
exists(State q1, State q2 | q.getLeft() = q1 and q.getRight() = q2 |
deltaClosed(q1, s1, r1) and
deltaClosed(q2, s2, r2) and
// use noopt to force the join on `intersect` to happen last.
exists(intersect(s1, s2))
) and
stateInsideBacktracking(r1) and
stateInsideBacktracking(r2)
}
private newtype TTrace =
Nil() or
Step(InputSymbol s1, InputSymbol s2, TTrace t) { isReachableFromFork(_, _, s1, s2, t, _) }
/**
* A list of pairs of input symbols that describe a path in the product automaton
* starting from some fork state.
*/
private class Trace extends TTrace {
/** Gets a textual representation of this element. */
string toString() {
this = Nil() and result = "Nil()"
or
exists(InputSymbol s1, InputSymbol s2, Trace t | this = Step(s1, s2, t) |
result = "Step(" + s1 + ", " + s2 + ", " + t + ")"
)
}
}
/**
* Holds if `r` is reachable from `(fork, fork)` under input `w`, and there is
* a path from `r` back to `(fork, fork)` with `rem` steps.
*/
private predicate isReachableFromFork(State fork, StatePair r, Trace w, int rem) {
exists(InputSymbol s1, InputSymbol s2, Trace v |
isReachableFromFork(fork, r, s1, s2, v, rem) and
w = Step(s1, s2, v)
)
}
private predicate isReachableFromFork(
State fork, StatePair r, InputSymbol s1, InputSymbol s2, Trace v, int rem
) {
// base case
exists(State q1, State q2 |
isFork(fork, s1, s2, q1, q2) and
r = MkStatePair(q1, q2) and
v = Nil() and
rem = statePairDistToFork(r, MkStatePair(fork, fork))
)
or
// recursive case
exists(StatePair p |
isReachableFromFork(fork, p, v, rem + 1) and
step(p, s1, s2, r) and
rem = statePairDistToFork(r, MkStatePair(fork, fork))
)
}
/**
* Gets a state in the product automaton from which `(fork, fork)` is
* reachable in zero or more epsilon transitions.
*/
private StatePair getAForkPair(State fork) {
isFork(fork, _, _, _, _) and
result = MkStatePair(epsilonPred*(fork), epsilonPred*(fork))
}
/** An implementation of a chain containing chars for use by `Concretizer`. */
private module CharTreeImpl implements CharTree {
class CharNode = Trace;
CharNode getPrev(CharNode t) { t = Step(_, _, result) }
/** Holds if `n` is a trace that is used by `concretize` in `isPumpable`. */
predicate isARelevantEnd(CharNode n) {
exists(State f | isReachableFromFork(f, getAForkPair(f), n, _))
}
string getChar(CharNode t) {
exists(InputSymbol s1, InputSymbol s2 | t = Step(s1, s2, _) | result = intersect(s1, s2))
}
}
/**
* Holds if `fork` is a pumpable fork with word `w`.
*/
private predicate isPumpable(State fork, string w) {
exists(StatePair q, Trace t |
isReachableFromFork(fork, q, t, _) and
q = getAForkPair(fork) and
w = Concretizer<CharTreeImpl>::concretize(t)
)
}
/** Holds if `state` has exponential ReDoS */
predicate hasReDoSResult = ReDoSPruning<isPumpable/2>::hasReDoSResult/4;
private import semmle.python.RegexTreeView::RegexTreeView as TreeView
// ExponentialBackTracking should be used directly from the shared pack, and not from this file.
deprecated private import codeql.regex.nfa.ExponentialBackTracking::Make<TreeView> as Dep
import Dep

File diff suppressed because it is too large Load Diff

View File

@@ -1,75 +0,0 @@
/**
* Provides Python-specific definitions for use in the NfaUtils module.
*/
import python
import semmle.python.RegexTreeView
/**
* Holds if `term` is an escape class representing e.g. `\d`.
* `clazz` is which character class it represents, e.g. "d" for `\d`.
*/
predicate isEscapeClass(RegExpTerm term, string clazz) {
exists(RegExpCharacterClassEscape escape | term = escape | escape.getValue() = clazz)
}
/**
* Holds if `term` is a possessive quantifier.
* As python's regexes do not support possessive quantifiers, this never holds, but is used by the shared library.
*/
predicate isPossessive(RegExpQuantifier term) { none() }
/**
* Holds if the regex that `term` is part of is used in a way that ignores any leading prefix of the input it's matched against.
* Not yet implemented for Python.
*/
predicate matchesAnyPrefix(RegExpTerm term) { any() }
/**
* Holds if the regex that `term` is part of is used in a way that ignores any trailing suffix of the input it's matched against.
* Not yet implemented for Python.
*/
predicate matchesAnySuffix(RegExpTerm term) { any() }
/**
* Holds if the regular expression should not be considered.
*
* We make the pragmatic performance optimization to ignore regular expressions in files
* that does not belong to the project code (such as installed dependencies).
*/
predicate isExcluded(RegExpParent parent) {
not exists(parent.getRegex().getLocation().getFile().getRelativePath())
or
// Regexes with many occurrences of ".*" may cause the polynomial ReDoS computation to explode, so
// we explicitly exclude these.
count(int i | exists(parent.getRegex().getText().regexpFind("\\.\\*", i, _)) | i) > 10
}
/**
* A module containing predicates for determining which flags a regular expression have.
*/
module RegExpFlags {
/**
* Holds if `root` has the `i` flag for case-insensitive matching.
*/
predicate isIgnoreCase(RegExpTerm root) {
root.isRootTerm() and
root.getLiteral().isIgnoreCase()
}
/**
* Gets the flags for `root`, or the empty string if `root` has no flags.
*/
string getFlags(RegExpTerm root) {
root.isRootTerm() and
result = root.getLiteral().getFlags()
}
/**
* Holds if `root` has the `s` flag for multi-line matching.
*/
predicate isDotAll(RegExpTerm root) {
root.isRootTerm() and
root.getLiteral().isDotAll()
}
}

View File

@@ -3,155 +3,7 @@
* and for testing which capture groups are filled when a particular regexp matches a string.
*/
import NfaUtils
/** A root term */
class RootTerm extends RegExpTerm {
RootTerm() { this.isRootTerm() }
}
/**
* Holds if it should be tested whether `root` matches `str`.
*
* If `ignorePrefix` is true, then a regexp without a start anchor will be treated as if it had a start anchor.
* E.g. a regular expression `/foo$/` will match any string that ends with "foo",
* but if `ignorePrefix` is true, it will only match "foo".
*
* If `testWithGroups` is true, then the `RegexpMatching::fillsCaptureGroup` predicate can be used to determine which capture
* groups are filled by a string.
*/
signature predicate isRegexpMatchingCandidateSig(
RootTerm root, string str, boolean ignorePrefix, boolean testWithGroups
);
/**
* A module for determining if a regexp matches a given string,
* and reasoning about which capture groups are filled by a given string.
*
* The module parameter `isCandidate` determines which strings should be tested,
* and the results can be read from the `matches` and `fillsCaptureGroup` predicates.
*/
module RegexpMatching<isRegexpMatchingCandidateSig/4 isCandidate> {
/**
* Gets a state the regular expression `reg` can be in after matching the `i`th char in `str`.
* The regular expression is modeled as a non-determistic finite automaton,
* the regular expression can therefore be in multiple states after matching a character.
*
* It's a forward search to all possible states, and there is thus no guarantee that the state is on a path to an accepting state.
*/
private State getAState(RootTerm reg, int i, string str, boolean ignorePrefix) {
// start state, the -1 position before any chars have been matched
i = -1 and
isCandidate(reg, str, ignorePrefix, _) and
result.getRepr().getRootTerm() = reg and
isStartState(result)
or
// recursive case
result = getAStateAfterMatching(reg, _, str, i, _, ignorePrefix)
}
/**
* Gets the next state after the `prev` state from `reg`.
* `prev` is the state after matching `fromIndex` chars in `str`,
* and the result is the state after matching `toIndex` chars in `str`.
*
* This predicate is used as a step relation in the forwards search (`getAState`),
* and also as a step relation in the later backwards search (`getAStateThatReachesAccept`).
*/
private State getAStateAfterMatching(
RootTerm reg, State prev, string str, int toIndex, int fromIndex, boolean ignorePrefix
) {
// the basic recursive case - outlined into a noopt helper to make performance work out.
result = getAStateAfterMatchingAux(reg, prev, str, toIndex, fromIndex, ignorePrefix)
or
// we can skip past word boundaries if the next char is a non-word char.
fromIndex = toIndex and
prev.getRepr() instanceof RegExpWordBoundary and
prev = getAState(reg, toIndex, str, ignorePrefix) and
after(prev.getRepr()) = result and
str.charAt(toIndex + 1).regexpMatch("\\W") // \W matches any non-word char.
}
pragma[noopt]
private State getAStateAfterMatchingAux(
RootTerm reg, State prev, string str, int toIndex, int fromIndex, boolean ignorePrefix
) {
prev = getAState(reg, fromIndex, str, ignorePrefix) and
fromIndex = toIndex - 1 and
exists(string char | char = str.charAt(toIndex) | specializedDeltaClosed(prev, char, result)) and
not discardedPrefixStep(prev, result, ignorePrefix)
}
/** Holds if a step from `prev` to `next` should be discarded when the `ignorePrefix` flag is set. */
private predicate discardedPrefixStep(State prev, State next, boolean ignorePrefix) {
prev = mkMatch(any(RegExpRoot r)) and
ignorePrefix = true and
next = prev
}
// The `deltaClosed` relation specialized to the chars that exists in strings tested by a `MatchedRegExp`.
private predicate specializedDeltaClosed(State prev, string char, State next) {
deltaClosed(prev, specializedGetAnInputSymbolMatching(char), next)
}
// The `getAnInputSymbolMatching` relation specialized to the chars that exists in strings tested by a `MatchedRegExp`.
pragma[noinline]
private InputSymbol specializedGetAnInputSymbolMatching(string char) {
exists(string s, RootTerm r | isCandidate(r, s, _, _) | char = s.charAt(_)) and
result = getAnInputSymbolMatching(char)
}
/**
* Gets the `i`th state on a path to the accepting state when `reg` matches `str`.
* Starts with an accepting state as found by `getAState` and searches backwards
* to the start state through the reachable states (as found by `getAState`).
*
* This predicate satisfies the invariant that the result state can be reached with `i` steps from a start state,
* and an accepting state can be found after (`str.length() - 1 - i`) steps from the result.
* The result state is therefore always on a valid path where `reg` accepts `str`.
*
* This predicate is only used to find which capture groups a regular expression has filled,
* and thus the search is only performed for the strings in the `testWithGroups(..)` predicate.
*/
private State getAStateThatReachesAccept(RootTerm reg, int i, string str, boolean ignorePrefix) {
// base case, reaches an accepting state from the last state in `getAState(..)`
isCandidate(reg, str, ignorePrefix, true) and
i = str.length() - 1 and
result = getAState(reg, i, str, ignorePrefix) and
epsilonSucc*(result) = Accept(_)
or
// recursive case. `next` is the next state to be matched after matching `prev`.
// this predicate is doing a backwards search, so `prev` is the result we are looking for.
exists(State next, State prev, int fromIndex, int toIndex |
next = getAStateThatReachesAccept(reg, toIndex, str, ignorePrefix) and
next = getAStateAfterMatching(reg, prev, str, toIndex, fromIndex, ignorePrefix) and
i = fromIndex and
result = prev
)
}
/** Gets the capture group number that `term` belongs to. */
private int group(RegExpTerm term) {
exists(RegExpGroup grp | grp.getNumber() = result | term.getParent*() = grp)
}
/**
* Holds if `reg` matches `str`, where `str` is in the `isCandidate` predicate.
*/
predicate matches(RootTerm reg, string str) {
exists(State state | state = getAState(reg, str.length() - 1, str, _) |
epsilonSucc*(state) = Accept(_)
)
}
/**
* Holds if matching `str` against `reg` may fill capture group number `g`.
* Only holds if `str` is in the `testWithGroups` predicate.
*/
predicate fillsCaptureGroup(RootTerm reg, string str, int g) {
exists(State s |
s = getAStateThatReachesAccept(reg, _, str, _) and
g = group(s.getRepr())
)
}
}
private import semmle.python.RegexTreeView::RegexTreeView as TreeView
// RegexpMatching should be used directly from the shared pack, and not from this file.
deprecated import codeql.regex.nfa.RegexpMatching::Make<TreeView> as Dep
import Dep

View File

@@ -1,11 +1,4 @@
/**
* Provides classes for working with regular expressions that can
* perform backtracking in superlinear time.
*/
import NfaUtils
/*
* This module implements the analysis described in the paper:
* Valentin Wustholz, Oswaldo Olivo, Marijn J. H. Heule, and Isil Dillig:
* Static Detection of DoS Vulnerabilities in
@@ -42,377 +35,7 @@ import NfaUtils
* It also doesn't find all transitions in the product automaton, which can cause false negatives.
*/
/**
* Gets any root (start) state of a regular expression.
*/
private State getRootState() { result = mkMatch(any(RegExpRoot r)) }
private newtype TStateTuple =
MkStateTuple(State q1, State q2, State q3) {
// starts at (pivot, pivot, succ)
isStartLoops(q1, q3) and q1 = q2
or
step(_, _, _, _, q1, q2, q3) and FeasibleTuple::isFeasibleTuple(q1, q2, q3)
}
/**
* A state in the product automaton.
* The product automaton contains 3-tuples of states.
*
* We lazily only construct those states that we are actually
* going to need.
* Either a start state `(pivot, pivot, succ)`, or a state
* where there exists a transition from an already existing state.
*
* The exponential variant of this query (`js/redos`) uses an optimization
* trick where `q1 <= q2`. This trick cannot be used here as the order
* of the elements matter.
*/
class StateTuple extends TStateTuple {
State q1;
State q2;
State q3;
StateTuple() { this = MkStateTuple(q1, q2, q3) }
/**
* Gest a string representation of this tuple.
*/
string toString() { result = "(" + q1 + ", " + q2 + ", " + q3 + ")" }
/**
* Holds if this tuple is `(r1, r2, r3)`.
*/
pragma[noinline]
predicate isTuple(State r1, State r2, State r3) { r1 = q1 and r2 = q2 and r3 = q3 }
}
/**
* A module for determining feasible tuples for the product automaton.
*
* The implementation is split into many predicates for performance reasons.
*/
private module FeasibleTuple {
/**
* Holds if the tuple `(r1, r2, r3)` might be on path from a start-state to an end-state in the product automaton.
*/
pragma[inline]
predicate isFeasibleTuple(State r1, State r2, State r3) {
// The first element is either inside a repetition (or the start state itself)
isRepetitionOrStart(r1) and
// The last element is inside a repetition
stateInsideRepetition(r3) and
// The states are reachable in the NFA in the order r1 -> r2 -> r3
delta+(r1) = r2 and
delta+(r2) = r3 and
// The first element can reach a beginning (the "pivot" state in a `(pivot, succ)` pair).
canReachABeginning(r1) and
// The last element can reach a target (the "succ" state in a `(pivot, succ)` pair).
canReachATarget(r3)
}
/**
* Holds if `s` is either inside a repetition, or is the start state (which is a repetition).
*/
pragma[noinline]
private predicate isRepetitionOrStart(State s) { stateInsideRepetition(s) or s = getRootState() }
/**
* Holds if state `s` might be inside a backtracking repetition.
*/
pragma[noinline]
private predicate stateInsideRepetition(State s) {
s.getRepr().getParent*() instanceof InfiniteRepetitionQuantifier
}
/**
* Holds if there exists a path in the NFA from `s` to a "pivot" state
* (from a `(pivot, succ)` pair that starts the search).
*/
pragma[noinline]
private predicate canReachABeginning(State s) {
delta+(s) = any(State pivot | isStartLoops(pivot, _))
}
/**
* Holds if there exists a path in the NFA from `s` to a "succ" state
* (from a `(pivot, succ)` pair that starts the search).
*/
pragma[noinline]
private predicate canReachATarget(State s) { delta+(s) = any(State succ | isStartLoops(_, succ)) }
}
/**
* Holds if `pivot` and `succ` are a pair of loops that could be the beginning of a quadratic blowup.
*
* There is a slight implementation difference compared to the paper: this predicate requires that `pivot != succ`.
* The case where `pivot = succ` causes exponential backtracking and is handled by the `js/redos` query.
*/
predicate isStartLoops(State pivot, State succ) {
pivot != succ and
succ.getRepr() instanceof InfiniteRepetitionQuantifier and
delta+(pivot) = succ and
(
pivot.getRepr() instanceof InfiniteRepetitionQuantifier
or
pivot = mkMatch(any(RegExpRoot root))
)
}
/**
* Gets a state for which there exists a transition in the NFA from `s'.
*/
State delta(State s) { delta(s, _, result) }
/**
* Holds if there are transitions from the components of `q` to the corresponding
* components of `r` labelled with `s1`, `s2`, and `s3`, respectively.
*/
pragma[noinline]
predicate step(StateTuple q, InputSymbol s1, InputSymbol s2, InputSymbol s3, StateTuple r) {
exists(State r1, State r2, State r3 |
step(q, s1, s2, s3, r1, r2, r3) and r = MkStateTuple(r1, r2, r3)
)
}
/**
* Holds if there are transitions from the components of `q` to `r1`, `r2`, and `r3
* labelled with `s1`, `s2`, and `s3`, respectively.
*/
pragma[noopt]
predicate step(
StateTuple q, InputSymbol s1, InputSymbol s2, InputSymbol s3, State r1, State r2, State r3
) {
exists(State q1, State q2, State q3 | q.isTuple(q1, q2, q3) |
deltaClosed(q1, s1, r1) and
deltaClosed(q2, s2, r2) and
deltaClosed(q3, s3, r3) and
// use noopt to force the join on `getAThreewayIntersect` to happen last.
exists(getAThreewayIntersect(s1, s2, s3))
)
}
/**
* Gets a char that is matched by all the edges `s1`, `s2`, and `s3`.
*
* The result is not complete, and might miss some combination of edges that share some character.
*/
pragma[noinline]
string getAThreewayIntersect(InputSymbol s1, InputSymbol s2, InputSymbol s3) {
result = minAndMaxIntersect(s1, s2) and result = [intersect(s2, s3), intersect(s1, s3)]
or
result = minAndMaxIntersect(s1, s3) and result = [intersect(s2, s3), intersect(s1, s2)]
or
result = minAndMaxIntersect(s2, s3) and result = [intersect(s1, s2), intersect(s1, s3)]
}
/**
* Gets the minimum and maximum characters that intersect between `a` and `b`.
* This predicate is used to limit the size of `getAThreewayIntersect`.
*/
pragma[noinline]
string minAndMaxIntersect(InputSymbol a, InputSymbol b) {
result = [min(intersect(a, b)), max(intersect(a, b))]
}
private newtype TTrace =
Nil() or
Step(InputSymbol s1, InputSymbol s2, InputSymbol s3, TTrace t) {
isReachableFromStartTuple(_, _, t, s1, s2, s3, _, _)
}
/**
* A list of tuples of input symbols that describe a path in the product automaton
* starting from some start state.
*/
class Trace extends TTrace {
/**
* Gets a string representation of this Trace that can be used for debug purposes.
*/
string toString() {
this = Nil() and result = "Nil()"
or
exists(InputSymbol s1, InputSymbol s2, InputSymbol s3, Trace t | this = Step(s1, s2, s3, t) |
result = "Step(" + s1 + ", " + s2 + ", " + s3 + ", " + t + ")"
)
}
}
/**
* Holds if there exists a transition from `r` to `q` in the product automaton.
* Notice that the arguments are flipped, and thus the direction is backwards.
*/
pragma[noinline]
predicate tupleDeltaBackwards(StateTuple q, StateTuple r) { step(r, _, _, _, q) }
/**
* Holds if `tuple` is an end state in our search.
* That means there exists a pair of loops `(pivot, succ)` such that `tuple = (pivot, succ, succ)`.
*/
predicate isEndTuple(StateTuple tuple) { tuple = getAnEndTuple(_, _) }
/**
* Gets the minimum length of a path from `r` to some an end state `end`.
*
* The implementation searches backwards from the end-tuple.
* This approach was chosen because it is way more efficient if the first predicate given to `shortestDistances` is small.
* The `end` argument must always be an end state.
*/
int distBackFromEnd(StateTuple r, StateTuple end) =
shortestDistances(isEndTuple/1, tupleDeltaBackwards/2)(end, r, result)
/**
* Holds if there exists a pair of repetitions `(pivot, succ)` in the regular expression such that:
* `tuple` is reachable from `(pivot, pivot, succ)` in the product automaton,
* and there is a distance of `dist` from `tuple` to the nearest end-tuple `(pivot, succ, succ)`,
* and a path from a start-state to `tuple` follows the transitions in `trace`.
*/
private predicate isReachableFromStartTuple(
State pivot, State succ, StateTuple tuple, Trace trace, int dist
) {
exists(InputSymbol s1, InputSymbol s2, InputSymbol s3, Trace v |
isReachableFromStartTuple(pivot, succ, v, s1, s2, s3, tuple, dist) and
trace = Step(s1, s2, s3, v)
)
}
private predicate isReachableFromStartTuple(
State pivot, State succ, Trace trace, InputSymbol s1, InputSymbol s2, InputSymbol s3,
StateTuple tuple, int dist
) {
// base case.
exists(State q1, State q2, State q3 |
isStartLoops(pivot, succ) and
step(MkStateTuple(pivot, pivot, succ), s1, s2, s3, tuple) and
tuple = MkStateTuple(q1, q2, q3) and
trace = Nil() and
dist = distBackFromEnd(tuple, MkStateTuple(pivot, succ, succ))
)
or
// recursive case
exists(StateTuple p |
isReachableFromStartTuple(pivot, succ, p, trace, dist + 1) and
dist = distBackFromEnd(tuple, MkStateTuple(pivot, succ, succ)) and
step(p, s1, s2, s3, tuple)
)
}
/**
* Gets the tuple `(pivot, succ, succ)` from the product automaton.
*/
StateTuple getAnEndTuple(State pivot, State succ) {
isStartLoops(pivot, succ) and
result = MkStateTuple(pivot, succ, succ)
}
/** An implementation of a chain containing chars for use by `Concretizer`. */
private module CharTreeImpl implements CharTree {
class CharNode = Trace;
CharNode getPrev(CharNode t) { t = Step(_, _, _, result) }
/** Holds if `n` is used in `isPumpable`. */
predicate isARelevantEnd(CharNode n) {
exists(State pivot, State succ |
isReachableFromStartTuple(pivot, succ, getAnEndTuple(pivot, succ), n, _)
)
}
string getChar(CharNode t) {
exists(InputSymbol s1, InputSymbol s2, InputSymbol s3 | t = Step(s1, s2, s3, _) |
result = getAThreewayIntersect(s1, s2, s3)
)
}
}
/**
* Holds if matching repetitions of `pump` can:
* 1) Transition from `pivot` back to `pivot`.
* 2) Transition from `pivot` to `succ`.
* 3) Transition from `succ` to `succ`.
*
* From theorem 3 in the paper linked in the top of this file we can therefore conclude that
* the regular expression has polynomial backtracking - if a rejecting suffix exists.
*
* This predicate is used by `SuperLinearReDoSConfiguration`, and the final results are
* available in the `hasReDoSResult` predicate.
*/
predicate isPumpable(State pivot, State succ, string pump) {
exists(StateTuple q, Trace t |
isReachableFromStartTuple(pivot, succ, q, t, _) and
q = getAnEndTuple(pivot, succ) and
pump = Concretizer<CharTreeImpl>::concretize(t)
)
}
/**
* Holds if states starting in `state` can have polynomial backtracking with the string `pump`.
*/
predicate isReDoSCandidate(State state, string pump) { isPumpable(_, state, pump) }
/**
* Holds if repetitions of `pump` at `t` will cause polynomial backtracking.
*/
predicate polynomialReDoS(RegExpTerm t, string pump, string prefixMsg, RegExpTerm prev) {
exists(State s, State pivot |
ReDoSPruning<isReDoSCandidate/2>::hasReDoSResult(t, pump, s, prefixMsg) and
isPumpable(pivot, s, _) and
prev = pivot.getRepr()
)
}
/**
* Gets a message for why `term` can cause polynomial backtracking.
*/
string getReasonString(RegExpTerm term, string pump, string prefixMsg, RegExpTerm prev) {
polynomialReDoS(term, pump, prefixMsg, prev) and
result =
"Strings " + prefixMsg + "with many repetitions of '" + pump +
"' can start matching anywhere after the start of the preceeding " + prev
}
/**
* A term that may cause a regular expression engine to perform a
* polynomial number of match attempts, relative to the input length.
*/
class PolynomialBackTrackingTerm extends InfiniteRepetitionQuantifier {
string reason;
string pump;
string prefixMsg;
RegExpTerm prev;
PolynomialBackTrackingTerm() {
reason = getReasonString(this, pump, prefixMsg, prev) and
// there might be many reasons for this term to have polynomial backtracking - we pick the shortest one.
reason = min(string msg | msg = getReasonString(this, _, _, _) | msg order by msg.length(), msg)
}
/**
* Holds if all non-empty successors to the polynomial backtracking term matches the end of the line.
*/
predicate isAtEndLine() {
forall(RegExpTerm succ | this.getSuccessor+() = succ and not matchesEpsilon(succ) |
succ instanceof RegExpDollar
)
}
/**
* Gets the string that should be repeated to cause this regular expression to perform polynomially.
*/
string getPumpString() { result = pump }
/**
* Gets a message for which prefix a matching string must start with for this term to cause polynomial backtracking.
*/
string getPrefixMessage() { result = prefixMsg }
/**
* Gets a predecessor to `this`, which also loops on the pump string, and thereby causes polynomial backtracking.
*/
RegExpTerm getPreviousLoop() { result = prev }
/**
* Gets the reason for the number of match attempts.
*/
string getReason() { result = reason }
}
private import semmle.python.RegexTreeView::RegexTreeView as TreeView
// SuperlinearBackTracking should be used directly from the shared pack, and not from this file.
deprecated private import codeql.regex.nfa.SuperlinearBackTracking::Make<TreeView> as Dep
import Dep

View File

@@ -1,3 +1,20 @@
## 0.5.3
No user-facing changes.
## 0.5.2
### Minor Analysis Improvements
* Added model of `cx_Oracle`, `oracledb`, `phonenixdb` and `pyodbc` PyPI packages as a SQL interface following PEP249, resulting in additional sinks for `py/sql-injection`.
* Added model of `executemany` calls on PEP-249 compliant database APIs, resulting in additional sinks for `py/sql-injection`.
* Added model of `pymssql` PyPI package as a SQL interface following PEP249, resulting in additional sinks for `py/sql-injection`.
* The alert messages of many queries were changed to better follow the style guide and make the messages consistent with other languages.
### Bug Fixes
* Fixed how `flask.request` is modeled as a RemoteFlowSource, such that we show fewer duplicated alert messages for Code Scanning alerts. The import, such as `from flask import request`, will now be shown as the first step in a path explanation.
## 0.5.1
No user-facing changes.

View File

@@ -12,8 +12,9 @@
* external/cwe/cwe-020
*/
import semmle.python.security.OverlyLargeRangeQuery
private import semmle.python.RegexTreeView::RegexTreeView as TreeView
import codeql.regex.OverlyLargeRangeQuery::Make<TreeView>
from RegExpCharacterRange range, string reason
from TreeView::RegExpCharacterRange range, string reason
where problem(range, reason)
select range, "Suspicious character range that " + reason + "."

View File

@@ -14,7 +14,8 @@
* external/cwe/cwe-186
*/
import semmle.python.security.BadTagFilterQuery
private import semmle.python.RegexTreeView::RegexTreeView as TreeView
import codeql.regex.nfa.BadTagFilterQuery::Make<TreeView>
from HtmlMatchingRegExp regexp, string msg
where msg = min(string m | isBadRegexpFilter(regexp, m) | m order by m.length(), m) // there might be multiple, we arbitrarily pick the shortest one

View File

@@ -8,7 +8,7 @@ This should be kept up to date; the world is moving fast and protocols are being
- TLS 1.0 and TLS 1.1 are insecure
- TLS 1.2 have some issues. but TLS 1.3 is not widely supported
## Conection methods
## Connection methods
- `ssl.wrap_socket` is creating insecure connections, use `SSLContext.wrap_socket` instead. [link](https://docs.python.org/3/library/ssl.html#ssl.wrap_socket)
> Deprecated since version 3.7: Since Python 3.2 and 2.7.9, it is recommended to use the `SSLContext.wrap_socket()` instead of `wrap_socket()`. The top-level function is limited and creates an insecure client socket without server name indication or hostname matching.

View File

@@ -14,7 +14,6 @@
*/
import python
import semmle.python.security.regexp.SuperlinearBackTracking
import semmle.python.security.dataflow.PolynomialReDoSQuery
import DataFlow::PathGraph

View File

@@ -14,10 +14,10 @@
* external/cwe/cwe-400
*/
import python
import semmle.python.security.regexp.ExponentialBackTracking
private import semmle.python.RegexTreeView::RegexTreeView as TreeView
import codeql.regex.nfa.ExponentialBackTracking::Make<TreeView>
from RegExpTerm t, string pump, State s, string prefixMsg
from TreeView::RegExpTerm t, string pump, State s, string prefixMsg
where
hasReDoSResult(t, pump, s, prefixMsg) and
// exclude verbose mode regexes for now

View File

@@ -1,4 +0,0 @@
---
category: fix
---
* Fixed how `flask.request` is modeled as a RemoteFlowSource, such that we show fewer duplicated alert messages for Code Scanning alerts. The import, such as `from flask import request`, will now be shown as the first step in a path explanation.

View File

@@ -1,4 +0,0 @@
---
category: minorAnalysis
---
* The alert message of many queries have been changed to better follow the style guide and make the message consistent with other languages.

View File

@@ -1,4 +0,0 @@
---
category: minorAnalysis
---
* Added model of `executemany` calls on PEP-249 compliant database APIs, resulting in additional sinks for `py/sql-injection`.

View File

@@ -1,4 +0,0 @@
---
category: minorAnalysis
---
* Added model of `pymssql` PyPI package as a SQL interface following PEP249, resulting in additional sinks for `py/sql-injection`.

View File

@@ -1,4 +0,0 @@
---
category: minorAnalysis
---
* Added model of `cx_Oracle`, `oracledb`, `phonenixdb` and `pyodbc` PyPI packages as a SQL interface following PEP249, resulting in additional sinks for `py/sql-injection`.

View File

@@ -0,0 +1,12 @@
## 0.5.2
### Minor Analysis Improvements
* Added model of `cx_Oracle`, `oracledb`, `phonenixdb` and `pyodbc` PyPI packages as a SQL interface following PEP249, resulting in additional sinks for `py/sql-injection`.
* Added model of `executemany` calls on PEP-249 compliant database APIs, resulting in additional sinks for `py/sql-injection`.
* Added model of `pymssql` PyPI package as a SQL interface following PEP249, resulting in additional sinks for `py/sql-injection`.
* The alert messages of many queries were changed to better follow the style guide and make the messages consistent with other languages.
### Bug Fixes
* Fixed how `flask.request` is modeled as a RemoteFlowSource, such that we show fewer duplicated alert messages for Code Scanning alerts. The import, such as `from flask import request`, will now be shown as the first step in a path explanation.

View File

@@ -0,0 +1,3 @@
## 0.5.3
No user-facing changes.

View File

@@ -1,2 +1,2 @@
---
lastReleaseVersion: 0.5.1
lastReleaseVersion: 0.5.3

View File

@@ -0,0 +1,60 @@
<!DOCTYPE qhelp PUBLIC
"-//Semmle//qhelp//EN"
"qhelp.dtd">
<qhelp>
<overview>
<p>Extracting files from a malicious tarball without validating that the destination file path
is within the destination directory can cause files outside the destination directory to be
overwritten, due to the possible presence of directory traversal elements (<code>..</code>) in
archive path names.</p>
<p>Tarball contain archive entries representing each file in the archive. These entries
include a file path for the entry, but these file paths are not restricted and may contain
unexpected special elements such as the directory traversal element (<code>..</code>). If these
file paths are used to determine an output file to write the contents of the archive item to, then
the file may be written to an unexpected location. This can result in sensitive information being
revealed or deleted, or an attacker being able to influence behavior by modifying unexpected
files.</p>
<p>For example, if a tarball contains a file entry <code>../sneaky-file</code>, and the tarball
is extracted to the directory <code>/tmp/tmp123</code>, then naively combining the paths would result
in an output file path of <code>/tmp/tmp123/../sneaky-file</code>, which would cause the file to be
written to <code>/tmp/</code>.</p>
</overview>
<recommendation>
<p>Ensure that output paths constructed from tarball entries are validated
to prevent writing files to unexpected locations.</p>
<p>The recommended way of writing an output file from a tarball entry is to call <code>extract()</code> or <code>extractall()</code>.
</p>
</recommendation>
<example>
<p>
In this example an archive is extracted without validating file paths.
</p>
<sample src="examples/TarSlip_1.py" />
<p>To fix this vulnerability, we need to call the function <code>extractall()</code>.
</p>
<sample src="examples/NoHIT_TarSlip_1.py" />
</example>
<references>
<li>
Snyk:
<a href="https://snyk.io/research/zip-slip-vulnerability">Zip Slip Vulnerability</a>.
</li>
<li>
Tarfile documentation
<a href="https://docs.python.org/3/library/tarfile.html#tarfile.TarFile.extractall">extractall() warning</a>
</li>
</references>
</qhelp>

View File

@@ -0,0 +1,122 @@
/**
* @name Arbitrary file write during tarfile extraction
* @description Extracting files from a malicious tar archive without validating that the
* destination file path is within the destination directory can cause files outside
* the destination directory to be overwritten.
* @kind path-problem
* @id py/tarslip
* @problem.severity error
* @security-severity 7.5
* @precision high
* @tags security
* external/cwe/cwe-022
*/
import python
import semmle.python.dataflow.new.DataFlow
import semmle.python.dataflow.new.TaintTracking
import DataFlow::PathGraph
import semmle.python.ApiGraphs
import semmle.python.dataflow.new.internal.Attributes
import semmle.python.dataflow.new.BarrierGuards
import semmle.python.dataflow.new.RemoteFlowSources
/**
* Handle those three cases of Tarfile opens:
* - `tarfile.open()`
* - `tarfile.TarFile()`
* - `MKtarfile.Tarfile.open()`
*/
API::Node tarfileOpen() {
result in [
API::moduleImport("tarfile").getMember(["open", "TarFile"]),
API::moduleImport("tarfile").getMember("TarFile").getASubclass().getMember("open")
]
}
/**
* Handle the previous three cases, plus the use of `closing` in the previous cases
*/
class AllTarfileOpens extends API::CallNode {
AllTarfileOpens() {
this = tarfileOpen().getACall()
or
exists(API::Node closing, Node arg |
closing = API::moduleImport("contextlib").getMember("closing") and
this = closing.getACall() and
arg = this.getArg(0) and
arg = tarfileOpen().getACall()
)
}
}
/**
* A taint-tracking configuration for detecting more "TarSlip" vulnerabilities.
*/
class Configuration extends TaintTracking::Configuration {
Configuration() { this = "TarSlip" }
override predicate isSource(DataFlow::Node source) { source = tarfileOpen().getACall() }
override predicate isSink(DataFlow::Node sink) {
(
// A sink capturing method calls to `extractall` without `members` argument.
// For a call to `file.extractall` without `members` argument, `file` is considered a sink.
exists(MethodCallNode call, AllTarfileOpens atfo |
call = atfo.getReturn().getMember("extractall").getACall() and
not exists(Node arg | arg = call.getArgByName("members")) and
sink = call.getObject()
)
or
// A sink capturing method calls to `extractall` with `members` argument.
// For a call to `file.extractall` with `members` argument, `file` is considered a sink if not
// a the `members` argument contains a NameConstant as None, a List or call to the method `getmembers`.
// Otherwise, the argument of `members` is considered a sink.
exists(MethodCallNode call, Node arg, AllTarfileOpens atfo |
call = atfo.getReturn().getMember("extractall").getACall() and
arg = call.getArgByName("members") and
if
arg.asCfgNode() instanceof NameConstantNode or
arg.asCfgNode() instanceof ListNode
then sink = call.getObject()
else
if arg.(MethodCallNode).getMethodName() = "getmembers"
then sink = arg.(MethodCallNode).getObject()
else sink = call.getArgByName("members")
)
or
// An argument to `extract` is considered a sink.
exists(AllTarfileOpens atfo |
sink = atfo.getReturn().getMember("extract").getACall().getArg(0)
)
or
//An argument to `_extract_member` is considered a sink.
exists(MethodCallNode call, AllTarfileOpens atfo |
call = atfo.getReturn().getMember("_extract_member").getACall() and
call.getArg(1).(AttrRead).accesses(sink, "name")
)
) and
not sink.getScope().getLocation().getFile().inStdlib()
}
override predicate isAdditionalTaintStep(DataFlow::Node nodeFrom, DataFlow::Node nodeTo) {
exists(AttrRead attr, MethodCallNode call |
attr.accesses(nodeFrom, "getmembers") and
nodeFrom = call.getObject() and
nodeFrom instanceof AllTarfileOpens and
nodeTo = call
)
or
exists(API::CallNode closing |
closing = API::moduleImport("contextlib").getMember("closing").getACall() and
nodeFrom = closing.getArg(0) and
nodeFrom = tarfileOpen().getReturn().getAValueReachingSink() and
nodeTo = closing
)
}
}
from Configuration config, DataFlow::PathNode source, DataFlow::PathNode sink
where config.hasFlowPath(source, sink)
select sink, source, sink, "Extraction of tarfile from $@ to a potentially untrusted source $@.",
source.getNode(), source.getNode().toString(), sink.getNode(), sink.getNode().toString()

View File

@@ -0,0 +1,13 @@
import uuid
class User:
def __init__(self):
self.token = None
def resetPassword(self):
self.token = uuid.uuid1().hex
user = User()
user.resetPassword()

View File

@@ -0,0 +1,40 @@
<!DOCTYPE qhelp PUBLIC
"-//Semmle//qhelp//EN"
"qhelp.dtd">
<qhelp>
<overview>
<p>
GUIDs (often called UUIDs) are widely used in modern web applications.
One common use for UUIDs is the generation of one-time-use tokens.
These can used for password reset, and e-mail confirmation routines, for example.
</p>
<p>
There are five versions of UUIDs defined in RFC 4122.
Out of the five, four are generated in a predictable manner.
This means it is possible for someone to predict future UUIDs based on a sample
generated by the target application.
</p>
<p>
Version four is the only UUID version expected to be randomly generated.
Therefore, for situations where predictable tokens are not desired (e.g. password reset tokens),
all other versions should be avoided.
</p>
</overview>
<recommendation>
<p>When using GUIDs/UUIDs for generating tokens that should not be predictable, use version four.</p>
</recommendation>
<example>
<p>This example shows a UUID v1 being used for a password reset routine.
</p>
<sample src="TokenBuiltFromUUID.py" />
</example>
<references>
<li>UUID <a href="https://datatracker.ietf.org/doc/html/rfc4122">RFC</a>.</li>
<li>Daniel Thatcher <i>In GUID We Trust</i> <a href="https://www.intruder.io/research/in-guid-we-trust">article</a>.</li>
<li>UUID exploitation <a href="https://github.com/intruder-io/guidtool">tool</a>.</li>
</references>
</qhelp>

View File

@@ -0,0 +1,60 @@
/**
* @name Predictable token
* @description Tokens used for sensitive tasks, such as, password recovery,
* and email confirmation, should not use predictable values.
* @kind path-problem
* @precision medium
* @problem.severity error
* @security-severity 5
* @id py/predictable-token
* @tags security
* external/cwe/cwe-340
*/
import python
import semmle.python.dataflow.new.DataFlow
import semmle.python.ApiGraphs
import semmle.python.dataflow.new.TaintTracking
import DataFlow::PathGraph
class PredictableResultSource extends DataFlow::Node {
PredictableResultSource() {
exists(API::Node uuidCallRet |
uuidCallRet = API::moduleImport("uuid").getMember(["uuid1", "uuid3", "uuid5"]).getReturn()
|
this = uuidCallRet.asSource()
or
this = uuidCallRet.getMember(["hex", "bytes", "bytes_le"]).asSource()
)
}
}
class TokenAssignmentValueSink extends DataFlow::Node {
TokenAssignmentValueSink() {
exists(string name | name.toLowerCase().matches(["%token", "%code"]) |
exists(DefinitionNode n | n.getValue() = this.asCfgNode() | name = n.(NameNode).getId())
or
exists(DataFlow::AttrWrite aw | aw.getValue() = this | name = aw.getAttributeName())
)
}
}
class TokenBuiltFromUuidConfig extends TaintTracking::Configuration {
TokenBuiltFromUuidConfig() { this = "TokenBuiltFromUuidConfig" }
override predicate isSource(DataFlow::Node source) { source instanceof PredictableResultSource }
override predicate isSink(DataFlow::Node sink) { sink instanceof TokenAssignmentValueSink }
override predicate isAdditionalTaintStep(DataFlow::Node nodeFrom, DataFlow::Node nodeTo) {
exists(DataFlow::CallCfgNode call |
call = API::builtin("str").getACall() and
nodeFrom = call.getArg(0) and
nodeTo = call
)
}
}
from DataFlow::PathNode source, DataFlow::PathNode sink, TokenBuiltFromUuidConfig config
where config.hasFlowPath(source, sink)
select sink.getNode(), source, sink, "Token built from $@.", source.getNode(), "predictable value"

View File

@@ -1,11 +1,11 @@
name: codeql/python-queries
version: 0.5.2-dev
version: 0.5.4-dev
groups:
- python
- queries
dependencies:
codeql/python-all: "*"
codeql/suite-helpers: "*"
codeql/python-all: ${workspace}
codeql/suite-helpers: ${workspace}
suites: codeql-suites
extractor: python
defaultSuiteFile: codeql-suites/python-code-scanning.qls

View File

@@ -137,6 +137,7 @@ abstract class InlineExpectationsTest extends string {
final predicate hasFailureMessage(FailureLocatable element, string message) {
exists(ActualResult actualResult |
actualResult.getTest() = this and
actualResult.getTag() = this.getARelevantTag() and
element = actualResult and
(
exists(FalseNegativeExpectation falseNegative |
@@ -150,9 +151,18 @@ abstract class InlineExpectationsTest extends string {
)
)
or
exists(ActualResult actualResult |
actualResult.getTest() = this and
not actualResult.getTag() = this.getARelevantTag() and
element = actualResult and
message =
"Tag mismatch: Actual result with tag '" + actualResult.getTag() +
"' that is not part of getARelevantTag()"
)
or
exists(ValidExpectation expectation |
not exists(ActualResult actualResult | expectation.matchesActualResult(actualResult)) and
expectation.getTag() = getARelevantTag() and
expectation.getTag() = this.getARelevantTag() and
element = expectation and
(
expectation instanceof GoodExpectation and

View File

@@ -6,6 +6,8 @@ uniqueNodeToString
missingToString
parameterCallable
localFlowIsLocal
readStepIsLocal
storeStepIsLocal
compatibleTypesReflexive
unreachableNodeCCtx
localCallNodes

View File

@@ -6,6 +6,8 @@ uniqueNodeToString
missingToString
parameterCallable
localFlowIsLocal
readStepIsLocal
storeStepIsLocal
compatibleTypesReflexive
unreachableNodeCCtx
localCallNodes

View File

@@ -6,6 +6,8 @@ uniqueNodeToString
missingToString
parameterCallable
localFlowIsLocal
readStepIsLocal
storeStepIsLocal
compatibleTypesReflexive
unreachableNodeCCtx
localCallNodes

View File

@@ -6,6 +6,8 @@ uniqueNodeToString
missingToString
parameterCallable
localFlowIsLocal
readStepIsLocal
storeStepIsLocal
compatibleTypesReflexive
unreachableNodeCCtx
localCallNodes

View File

@@ -6,6 +6,8 @@ uniqueNodeToString
missingToString
parameterCallable
localFlowIsLocal
readStepIsLocal
storeStepIsLocal
compatibleTypesReflexive
unreachableNodeCCtx
localCallNodes

View File

@@ -6,6 +6,8 @@ uniqueNodeToString
missingToString
parameterCallable
localFlowIsLocal
readStepIsLocal
storeStepIsLocal
compatibleTypesReflexive
unreachableNodeCCtx
localCallNodes

View File

@@ -6,6 +6,8 @@ uniqueNodeToString
missingToString
parameterCallable
localFlowIsLocal
readStepIsLocal
storeStepIsLocal
compatibleTypesReflexive
unreachableNodeCCtx
localCallNodes

View File

@@ -6,6 +6,8 @@ uniqueNodeToString
missingToString
parameterCallable
localFlowIsLocal
readStepIsLocal
storeStepIsLocal
compatibleTypesReflexive
unreachableNodeCCtx
localCallNodes

View File

@@ -6,6 +6,8 @@ uniqueNodeToString
missingToString
parameterCallable
localFlowIsLocal
readStepIsLocal
storeStepIsLocal
compatibleTypesReflexive
unreachableNodeCCtx
localCallNodes

View File

@@ -6,6 +6,8 @@ uniqueNodeToString
missingToString
parameterCallable
localFlowIsLocal
readStepIsLocal
storeStepIsLocal
compatibleTypesReflexive
unreachableNodeCCtx
localCallNodes

View File

@@ -6,6 +6,8 @@ uniqueNodeToString
missingToString
parameterCallable
localFlowIsLocal
readStepIsLocal
storeStepIsLocal
compatibleTypesReflexive
unreachableNodeCCtx
localCallNodes

View File

@@ -6,6 +6,8 @@ uniqueNodeToString
missingToString
parameterCallable
localFlowIsLocal
readStepIsLocal
storeStepIsLocal
compatibleTypesReflexive
unreachableNodeCCtx
localCallNodes

View File

@@ -6,6 +6,8 @@ uniqueNodeToString
missingToString
parameterCallable
localFlowIsLocal
readStepIsLocal
storeStepIsLocal
compatibleTypesReflexive
unreachableNodeCCtx
localCallNodes

View File

@@ -6,6 +6,8 @@ uniqueNodeToString
missingToString
parameterCallable
localFlowIsLocal
readStepIsLocal
storeStepIsLocal
compatibleTypesReflexive
unreachableNodeCCtx
localCallNodes

View File

@@ -6,6 +6,8 @@ uniqueNodeToString
missingToString
parameterCallable
localFlowIsLocal
readStepIsLocal
storeStepIsLocal
compatibleTypesReflexive
unreachableNodeCCtx
localCallNodes

View File

@@ -6,6 +6,8 @@ uniqueNodeToString
missingToString
parameterCallable
localFlowIsLocal
readStepIsLocal
storeStepIsLocal
compatibleTypesReflexive
unreachableNodeCCtx
localCallNodes

View File

@@ -6,6 +6,8 @@ uniqueNodeToString
missingToString
parameterCallable
localFlowIsLocal
readStepIsLocal
storeStepIsLocal
compatibleTypesReflexive
unreachableNodeCCtx
localCallNodes

View File

@@ -6,6 +6,8 @@ uniqueNodeToString
missingToString
parameterCallable
localFlowIsLocal
readStepIsLocal
storeStepIsLocal
compatibleTypesReflexive
unreachableNodeCCtx
localCallNodes

View File

@@ -0,0 +1,143 @@
edges
| TarSlipImprov.py:15:7:15:39 | ControlFlowNode for Attribute() | TarSlipImprov.py:17:5:17:10 | GSSA Variable member |
| TarSlipImprov.py:17:5:17:10 | GSSA Variable member | TarSlipImprov.py:22:35:22:40 | ControlFlowNode for result |
| TarSlipImprov.py:26:21:26:27 | ControlFlowNode for tarfile | TarSlipImprov.py:28:9:28:14 | SSA variable member |
| TarSlipImprov.py:28:9:28:14 | SSA variable member | TarSlipImprov.py:36:12:36:17 | ControlFlowNode for result |
| TarSlipImprov.py:38:7:38:39 | ControlFlowNode for Attribute() | TarSlipImprov.py:39:65:39:67 | ControlFlowNode for tar |
| TarSlipImprov.py:39:65:39:67 | ControlFlowNode for tar | TarSlipImprov.py:26:21:26:27 | ControlFlowNode for tarfile |
| TarSlipImprov.py:39:65:39:67 | ControlFlowNode for tar | TarSlipImprov.py:39:49:39:68 | ControlFlowNode for members_filter1() |
| TarSlipImprov.py:43:6:43:38 | ControlFlowNode for Attribute() | TarSlipImprov.py:44:9:44:13 | GSSA Variable entry |
| TarSlipImprov.py:44:9:44:13 | GSSA Variable entry | TarSlipImprov.py:47:21:47:25 | ControlFlowNode for entry |
| TarSlipImprov.py:54:6:54:38 | ControlFlowNode for Attribute() | TarSlipImprov.py:56:9:56:13 | GSSA Variable entry |
| TarSlipImprov.py:56:9:56:13 | GSSA Variable entry | TarSlipImprov.py:58:21:58:25 | ControlFlowNode for entry |
| TarSlipImprov.py:88:6:88:43 | ControlFlowNode for Attribute() | TarSlipImprov.py:91:5:91:7 | ControlFlowNode for tar |
| TarSlipImprov.py:111:7:111:39 | ControlFlowNode for Attribute() | TarSlipImprov.py:115:9:115:11 | ControlFlowNode for tar |
| TarSlipImprov.py:123:6:123:29 | ControlFlowNode for Attribute() | TarSlipImprov.py:124:9:124:13 | GSSA Variable entry |
| TarSlipImprov.py:124:9:124:13 | GSSA Variable entry | TarSlipImprov.py:125:36:125:40 | ControlFlowNode for entry |
| TarSlipImprov.py:129:6:129:26 | ControlFlowNode for Attribute() | TarSlipImprov.py:130:5:130:7 | ControlFlowNode for tar |
| TarSlipImprov.py:133:7:133:39 | ControlFlowNode for Attribute() | TarSlipImprov.py:134:1:134:3 | ControlFlowNode for tar |
| TarSlipImprov.py:141:6:141:29 | ControlFlowNode for Attribute() | TarSlipImprov.py:142:9:142:13 | GSSA Variable entry |
| TarSlipImprov.py:142:9:142:13 | GSSA Variable entry | TarSlipImprov.py:143:36:143:40 | ControlFlowNode for entry |
| TarSlipImprov.py:176:6:176:31 | ControlFlowNode for Attribute() | TarSlipImprov.py:177:9:177:13 | GSSA Variable entry |
| TarSlipImprov.py:177:9:177:13 | GSSA Variable entry | TarSlipImprov.py:178:36:178:40 | ControlFlowNode for entry |
| TarSlipImprov.py:182:6:182:31 | ControlFlowNode for Attribute() | TarSlipImprov.py:183:9:183:13 | GSSA Variable entry |
| TarSlipImprov.py:183:9:183:13 | GSSA Variable entry | TarSlipImprov.py:184:21:184:25 | ControlFlowNode for entry |
| TarSlipImprov.py:188:7:188:27 | ControlFlowNode for Attribute() | TarSlipImprov.py:189:1:189:3 | ControlFlowNode for tar |
| TarSlipImprov.py:193:6:193:31 | ControlFlowNode for Attribute() | TarSlipImprov.py:194:49:194:51 | ControlFlowNode for tar |
| TarSlipImprov.py:210:6:210:43 | ControlFlowNode for Attribute() | TarSlipImprov.py:211:5:211:7 | ControlFlowNode for tar |
| TarSlipImprov.py:231:6:231:38 | ControlFlowNode for Attribute() | TarSlipImprov.py:233:9:233:9 | GSSA Variable f |
| TarSlipImprov.py:233:9:233:9 | GSSA Variable f | TarSlipImprov.py:236:44:236:50 | ControlFlowNode for members |
| TarSlipImprov.py:258:6:258:26 | ControlFlowNode for Attribute() | TarSlipImprov.py:259:9:259:13 | GSSA Variable entry |
| TarSlipImprov.py:259:9:259:13 | GSSA Variable entry | TarSlipImprov.py:261:25:261:29 | ControlFlowNode for entry |
| TarSlipImprov.py:264:6:264:38 | ControlFlowNode for Attribute() | TarSlipImprov.py:265:9:265:13 | GSSA Variable entry |
| TarSlipImprov.py:265:9:265:13 | GSSA Variable entry | TarSlipImprov.py:268:21:268:25 | ControlFlowNode for entry |
| TarSlipImprov.py:271:6:271:39 | ControlFlowNode for Attribute() | TarSlipImprov.py:272:9:272:13 | GSSA Variable entry |
| TarSlipImprov.py:272:9:272:13 | GSSA Variable entry | TarSlipImprov.py:274:25:274:29 | ControlFlowNode for entry |
| TarSlipImprov.py:276:6:276:38 | ControlFlowNode for Attribute() | TarSlipImprov.py:277:9:277:13 | GSSA Variable entry |
| TarSlipImprov.py:277:9:277:13 | GSSA Variable entry | TarSlipImprov.py:280:21:280:25 | ControlFlowNode for entry |
| TarSlipImprov.py:283:6:283:51 | ControlFlowNode for Attribute() | TarSlipImprov.py:284:5:284:7 | ControlFlowNode for tar |
| TarSlipImprov.py:287:7:287:28 | ControlFlowNode for Attribute() | TarSlipImprov.py:288:49:288:51 | ControlFlowNode for tar |
| TarSlipImprov.py:292:7:292:39 | ControlFlowNode for Attribute() | TarSlipImprov.py:293:1:293:3 | ControlFlowNode for tar |
| TarSlipImprov.py:300:6:300:51 | ControlFlowNode for Attribute() | TarSlipImprov.py:301:49:301:51 | ControlFlowNode for tar |
| TarSlipImprov.py:304:7:304:39 | ControlFlowNode for Attribute() | TarSlipImprov.py:306:5:306:10 | GSSA Variable member |
| TarSlipImprov.py:306:5:306:10 | GSSA Variable member | TarSlipImprov.py:310:49:310:54 | ControlFlowNode for result |
nodes
| TarSlipImprov.py:15:7:15:39 | ControlFlowNode for Attribute() | semmle.label | ControlFlowNode for Attribute() |
| TarSlipImprov.py:17:5:17:10 | GSSA Variable member | semmle.label | GSSA Variable member |
| TarSlipImprov.py:22:35:22:40 | ControlFlowNode for result | semmle.label | ControlFlowNode for result |
| TarSlipImprov.py:26:21:26:27 | ControlFlowNode for tarfile | semmle.label | ControlFlowNode for tarfile |
| TarSlipImprov.py:28:9:28:14 | SSA variable member | semmle.label | SSA variable member |
| TarSlipImprov.py:36:12:36:17 | ControlFlowNode for result | semmle.label | ControlFlowNode for result |
| TarSlipImprov.py:38:7:38:39 | ControlFlowNode for Attribute() | semmle.label | ControlFlowNode for Attribute() |
| TarSlipImprov.py:39:49:39:68 | ControlFlowNode for members_filter1() | semmle.label | ControlFlowNode for members_filter1() |
| TarSlipImprov.py:39:65:39:67 | ControlFlowNode for tar | semmle.label | ControlFlowNode for tar |
| TarSlipImprov.py:43:6:43:38 | ControlFlowNode for Attribute() | semmle.label | ControlFlowNode for Attribute() |
| TarSlipImprov.py:44:9:44:13 | GSSA Variable entry | semmle.label | GSSA Variable entry |
| TarSlipImprov.py:47:21:47:25 | ControlFlowNode for entry | semmle.label | ControlFlowNode for entry |
| TarSlipImprov.py:54:6:54:38 | ControlFlowNode for Attribute() | semmle.label | ControlFlowNode for Attribute() |
| TarSlipImprov.py:56:9:56:13 | GSSA Variable entry | semmle.label | GSSA Variable entry |
| TarSlipImprov.py:58:21:58:25 | ControlFlowNode for entry | semmle.label | ControlFlowNode for entry |
| TarSlipImprov.py:88:6:88:43 | ControlFlowNode for Attribute() | semmle.label | ControlFlowNode for Attribute() |
| TarSlipImprov.py:91:5:91:7 | ControlFlowNode for tar | semmle.label | ControlFlowNode for tar |
| TarSlipImprov.py:111:7:111:39 | ControlFlowNode for Attribute() | semmle.label | ControlFlowNode for Attribute() |
| TarSlipImprov.py:115:9:115:11 | ControlFlowNode for tar | semmle.label | ControlFlowNode for tar |
| TarSlipImprov.py:123:6:123:29 | ControlFlowNode for Attribute() | semmle.label | ControlFlowNode for Attribute() |
| TarSlipImprov.py:124:9:124:13 | GSSA Variable entry | semmle.label | GSSA Variable entry |
| TarSlipImprov.py:125:36:125:40 | ControlFlowNode for entry | semmle.label | ControlFlowNode for entry |
| TarSlipImprov.py:129:6:129:26 | ControlFlowNode for Attribute() | semmle.label | ControlFlowNode for Attribute() |
| TarSlipImprov.py:130:5:130:7 | ControlFlowNode for tar | semmle.label | ControlFlowNode for tar |
| TarSlipImprov.py:133:7:133:39 | ControlFlowNode for Attribute() | semmle.label | ControlFlowNode for Attribute() |
| TarSlipImprov.py:134:1:134:3 | ControlFlowNode for tar | semmle.label | ControlFlowNode for tar |
| TarSlipImprov.py:141:6:141:29 | ControlFlowNode for Attribute() | semmle.label | ControlFlowNode for Attribute() |
| TarSlipImprov.py:142:9:142:13 | GSSA Variable entry | semmle.label | GSSA Variable entry |
| TarSlipImprov.py:143:36:143:40 | ControlFlowNode for entry | semmle.label | ControlFlowNode for entry |
| TarSlipImprov.py:176:6:176:31 | ControlFlowNode for Attribute() | semmle.label | ControlFlowNode for Attribute() |
| TarSlipImprov.py:177:9:177:13 | GSSA Variable entry | semmle.label | GSSA Variable entry |
| TarSlipImprov.py:178:36:178:40 | ControlFlowNode for entry | semmle.label | ControlFlowNode for entry |
| TarSlipImprov.py:182:6:182:31 | ControlFlowNode for Attribute() | semmle.label | ControlFlowNode for Attribute() |
| TarSlipImprov.py:183:9:183:13 | GSSA Variable entry | semmle.label | GSSA Variable entry |
| TarSlipImprov.py:184:21:184:25 | ControlFlowNode for entry | semmle.label | ControlFlowNode for entry |
| TarSlipImprov.py:188:7:188:27 | ControlFlowNode for Attribute() | semmle.label | ControlFlowNode for Attribute() |
| TarSlipImprov.py:189:1:189:3 | ControlFlowNode for tar | semmle.label | ControlFlowNode for tar |
| TarSlipImprov.py:193:6:193:31 | ControlFlowNode for Attribute() | semmle.label | ControlFlowNode for Attribute() |
| TarSlipImprov.py:194:49:194:51 | ControlFlowNode for tar | semmle.label | ControlFlowNode for tar |
| TarSlipImprov.py:210:6:210:43 | ControlFlowNode for Attribute() | semmle.label | ControlFlowNode for Attribute() |
| TarSlipImprov.py:211:5:211:7 | ControlFlowNode for tar | semmle.label | ControlFlowNode for tar |
| TarSlipImprov.py:231:6:231:38 | ControlFlowNode for Attribute() | semmle.label | ControlFlowNode for Attribute() |
| TarSlipImprov.py:233:9:233:9 | GSSA Variable f | semmle.label | GSSA Variable f |
| TarSlipImprov.py:236:44:236:50 | ControlFlowNode for members | semmle.label | ControlFlowNode for members |
| TarSlipImprov.py:254:1:254:31 | ControlFlowNode for Attribute() | semmle.label | ControlFlowNode for Attribute() |
| TarSlipImprov.py:258:6:258:26 | ControlFlowNode for Attribute() | semmle.label | ControlFlowNode for Attribute() |
| TarSlipImprov.py:259:9:259:13 | GSSA Variable entry | semmle.label | GSSA Variable entry |
| TarSlipImprov.py:261:25:261:29 | ControlFlowNode for entry | semmle.label | ControlFlowNode for entry |
| TarSlipImprov.py:264:6:264:38 | ControlFlowNode for Attribute() | semmle.label | ControlFlowNode for Attribute() |
| TarSlipImprov.py:265:9:265:13 | GSSA Variable entry | semmle.label | GSSA Variable entry |
| TarSlipImprov.py:268:21:268:25 | ControlFlowNode for entry | semmle.label | ControlFlowNode for entry |
| TarSlipImprov.py:271:6:271:39 | ControlFlowNode for Attribute() | semmle.label | ControlFlowNode for Attribute() |
| TarSlipImprov.py:272:9:272:13 | GSSA Variable entry | semmle.label | GSSA Variable entry |
| TarSlipImprov.py:274:25:274:29 | ControlFlowNode for entry | semmle.label | ControlFlowNode for entry |
| TarSlipImprov.py:276:6:276:38 | ControlFlowNode for Attribute() | semmle.label | ControlFlowNode for Attribute() |
| TarSlipImprov.py:277:9:277:13 | GSSA Variable entry | semmle.label | GSSA Variable entry |
| TarSlipImprov.py:280:21:280:25 | ControlFlowNode for entry | semmle.label | ControlFlowNode for entry |
| TarSlipImprov.py:283:6:283:51 | ControlFlowNode for Attribute() | semmle.label | ControlFlowNode for Attribute() |
| TarSlipImprov.py:284:5:284:7 | ControlFlowNode for tar | semmle.label | ControlFlowNode for tar |
| TarSlipImprov.py:287:7:287:28 | ControlFlowNode for Attribute() | semmle.label | ControlFlowNode for Attribute() |
| TarSlipImprov.py:288:49:288:51 | ControlFlowNode for tar | semmle.label | ControlFlowNode for tar |
| TarSlipImprov.py:292:7:292:39 | ControlFlowNode for Attribute() | semmle.label | ControlFlowNode for Attribute() |
| TarSlipImprov.py:293:1:293:3 | ControlFlowNode for tar | semmle.label | ControlFlowNode for tar |
| TarSlipImprov.py:300:6:300:51 | ControlFlowNode for Attribute() | semmle.label | ControlFlowNode for Attribute() |
| TarSlipImprov.py:301:49:301:51 | ControlFlowNode for tar | semmle.label | ControlFlowNode for tar |
| TarSlipImprov.py:304:7:304:39 | ControlFlowNode for Attribute() | semmle.label | ControlFlowNode for Attribute() |
| TarSlipImprov.py:306:5:306:10 | GSSA Variable member | semmle.label | GSSA Variable member |
| TarSlipImprov.py:310:49:310:54 | ControlFlowNode for result | semmle.label | ControlFlowNode for result |
| TarSlipImprov.py:316:1:316:46 | ControlFlowNode for Attribute() | semmle.label | ControlFlowNode for Attribute() |
subpaths
| TarSlipImprov.py:39:65:39:67 | ControlFlowNode for tar | TarSlipImprov.py:26:21:26:27 | ControlFlowNode for tarfile | TarSlipImprov.py:36:12:36:17 | ControlFlowNode for result | TarSlipImprov.py:39:49:39:68 | ControlFlowNode for members_filter1() |
#select
| TarSlipImprov.py:22:35:22:40 | ControlFlowNode for result | TarSlipImprov.py:15:7:15:39 | ControlFlowNode for Attribute() | TarSlipImprov.py:22:35:22:40 | ControlFlowNode for result | Extraction of tarfile from $@ to a potentially untrusted source $@. | TarSlipImprov.py:15:7:15:39 | ControlFlowNode for Attribute() | ControlFlowNode for Attribute() | TarSlipImprov.py:22:35:22:40 | ControlFlowNode for result | ControlFlowNode for result |
| TarSlipImprov.py:39:49:39:68 | ControlFlowNode for members_filter1() | TarSlipImprov.py:38:7:38:39 | ControlFlowNode for Attribute() | TarSlipImprov.py:39:49:39:68 | ControlFlowNode for members_filter1() | Extraction of tarfile from $@ to a potentially untrusted source $@. | TarSlipImprov.py:38:7:38:39 | ControlFlowNode for Attribute() | ControlFlowNode for Attribute() | TarSlipImprov.py:39:49:39:68 | ControlFlowNode for members_filter1() | ControlFlowNode for members_filter1() |
| TarSlipImprov.py:47:21:47:25 | ControlFlowNode for entry | TarSlipImprov.py:43:6:43:38 | ControlFlowNode for Attribute() | TarSlipImprov.py:47:21:47:25 | ControlFlowNode for entry | Extraction of tarfile from $@ to a potentially untrusted source $@. | TarSlipImprov.py:43:6:43:38 | ControlFlowNode for Attribute() | ControlFlowNode for Attribute() | TarSlipImprov.py:47:21:47:25 | ControlFlowNode for entry | ControlFlowNode for entry |
| TarSlipImprov.py:58:21:58:25 | ControlFlowNode for entry | TarSlipImprov.py:54:6:54:38 | ControlFlowNode for Attribute() | TarSlipImprov.py:58:21:58:25 | ControlFlowNode for entry | Extraction of tarfile from $@ to a potentially untrusted source $@. | TarSlipImprov.py:54:6:54:38 | ControlFlowNode for Attribute() | ControlFlowNode for Attribute() | TarSlipImprov.py:58:21:58:25 | ControlFlowNode for entry | ControlFlowNode for entry |
| TarSlipImprov.py:91:5:91:7 | ControlFlowNode for tar | TarSlipImprov.py:88:6:88:43 | ControlFlowNode for Attribute() | TarSlipImprov.py:91:5:91:7 | ControlFlowNode for tar | Extraction of tarfile from $@ to a potentially untrusted source $@. | TarSlipImprov.py:88:6:88:43 | ControlFlowNode for Attribute() | ControlFlowNode for Attribute() | TarSlipImprov.py:91:5:91:7 | ControlFlowNode for tar | ControlFlowNode for tar |
| TarSlipImprov.py:115:9:115:11 | ControlFlowNode for tar | TarSlipImprov.py:111:7:111:39 | ControlFlowNode for Attribute() | TarSlipImprov.py:115:9:115:11 | ControlFlowNode for tar | Extraction of tarfile from $@ to a potentially untrusted source $@. | TarSlipImprov.py:111:7:111:39 | ControlFlowNode for Attribute() | ControlFlowNode for Attribute() | TarSlipImprov.py:115:9:115:11 | ControlFlowNode for tar | ControlFlowNode for tar |
| TarSlipImprov.py:125:36:125:40 | ControlFlowNode for entry | TarSlipImprov.py:123:6:123:29 | ControlFlowNode for Attribute() | TarSlipImprov.py:125:36:125:40 | ControlFlowNode for entry | Extraction of tarfile from $@ to a potentially untrusted source $@. | TarSlipImprov.py:123:6:123:29 | ControlFlowNode for Attribute() | ControlFlowNode for Attribute() | TarSlipImprov.py:125:36:125:40 | ControlFlowNode for entry | ControlFlowNode for entry |
| TarSlipImprov.py:130:5:130:7 | ControlFlowNode for tar | TarSlipImprov.py:129:6:129:26 | ControlFlowNode for Attribute() | TarSlipImprov.py:130:5:130:7 | ControlFlowNode for tar | Extraction of tarfile from $@ to a potentially untrusted source $@. | TarSlipImprov.py:129:6:129:26 | ControlFlowNode for Attribute() | ControlFlowNode for Attribute() | TarSlipImprov.py:130:5:130:7 | ControlFlowNode for tar | ControlFlowNode for tar |
| TarSlipImprov.py:134:1:134:3 | ControlFlowNode for tar | TarSlipImprov.py:133:7:133:39 | ControlFlowNode for Attribute() | TarSlipImprov.py:134:1:134:3 | ControlFlowNode for tar | Extraction of tarfile from $@ to a potentially untrusted source $@. | TarSlipImprov.py:133:7:133:39 | ControlFlowNode for Attribute() | ControlFlowNode for Attribute() | TarSlipImprov.py:134:1:134:3 | ControlFlowNode for tar | ControlFlowNode for tar |
| TarSlipImprov.py:143:36:143:40 | ControlFlowNode for entry | TarSlipImprov.py:141:6:141:29 | ControlFlowNode for Attribute() | TarSlipImprov.py:143:36:143:40 | ControlFlowNode for entry | Extraction of tarfile from $@ to a potentially untrusted source $@. | TarSlipImprov.py:141:6:141:29 | ControlFlowNode for Attribute() | ControlFlowNode for Attribute() | TarSlipImprov.py:143:36:143:40 | ControlFlowNode for entry | ControlFlowNode for entry |
| TarSlipImprov.py:178:36:178:40 | ControlFlowNode for entry | TarSlipImprov.py:176:6:176:31 | ControlFlowNode for Attribute() | TarSlipImprov.py:178:36:178:40 | ControlFlowNode for entry | Extraction of tarfile from $@ to a potentially untrusted source $@. | TarSlipImprov.py:176:6:176:31 | ControlFlowNode for Attribute() | ControlFlowNode for Attribute() | TarSlipImprov.py:178:36:178:40 | ControlFlowNode for entry | ControlFlowNode for entry |
| TarSlipImprov.py:184:21:184:25 | ControlFlowNode for entry | TarSlipImprov.py:182:6:182:31 | ControlFlowNode for Attribute() | TarSlipImprov.py:184:21:184:25 | ControlFlowNode for entry | Extraction of tarfile from $@ to a potentially untrusted source $@. | TarSlipImprov.py:182:6:182:31 | ControlFlowNode for Attribute() | ControlFlowNode for Attribute() | TarSlipImprov.py:184:21:184:25 | ControlFlowNode for entry | ControlFlowNode for entry |
| TarSlipImprov.py:189:1:189:3 | ControlFlowNode for tar | TarSlipImprov.py:188:7:188:27 | ControlFlowNode for Attribute() | TarSlipImprov.py:189:1:189:3 | ControlFlowNode for tar | Extraction of tarfile from $@ to a potentially untrusted source $@. | TarSlipImprov.py:188:7:188:27 | ControlFlowNode for Attribute() | ControlFlowNode for Attribute() | TarSlipImprov.py:189:1:189:3 | ControlFlowNode for tar | ControlFlowNode for tar |
| TarSlipImprov.py:194:49:194:51 | ControlFlowNode for tar | TarSlipImprov.py:193:6:193:31 | ControlFlowNode for Attribute() | TarSlipImprov.py:194:49:194:51 | ControlFlowNode for tar | Extraction of tarfile from $@ to a potentially untrusted source $@. | TarSlipImprov.py:193:6:193:31 | ControlFlowNode for Attribute() | ControlFlowNode for Attribute() | TarSlipImprov.py:194:49:194:51 | ControlFlowNode for tar | ControlFlowNode for tar |
| TarSlipImprov.py:211:5:211:7 | ControlFlowNode for tar | TarSlipImprov.py:210:6:210:43 | ControlFlowNode for Attribute() | TarSlipImprov.py:211:5:211:7 | ControlFlowNode for tar | Extraction of tarfile from $@ to a potentially untrusted source $@. | TarSlipImprov.py:210:6:210:43 | ControlFlowNode for Attribute() | ControlFlowNode for Attribute() | TarSlipImprov.py:211:5:211:7 | ControlFlowNode for tar | ControlFlowNode for tar |
| TarSlipImprov.py:236:44:236:50 | ControlFlowNode for members | TarSlipImprov.py:231:6:231:38 | ControlFlowNode for Attribute() | TarSlipImprov.py:236:44:236:50 | ControlFlowNode for members | Extraction of tarfile from $@ to a potentially untrusted source $@. | TarSlipImprov.py:231:6:231:38 | ControlFlowNode for Attribute() | ControlFlowNode for Attribute() | TarSlipImprov.py:236:44:236:50 | ControlFlowNode for members | ControlFlowNode for members |
| TarSlipImprov.py:254:1:254:31 | ControlFlowNode for Attribute() | TarSlipImprov.py:254:1:254:31 | ControlFlowNode for Attribute() | TarSlipImprov.py:254:1:254:31 | ControlFlowNode for Attribute() | Extraction of tarfile from $@ to a potentially untrusted source $@. | TarSlipImprov.py:254:1:254:31 | ControlFlowNode for Attribute() | ControlFlowNode for Attribute() | TarSlipImprov.py:254:1:254:31 | ControlFlowNode for Attribute() | ControlFlowNode for Attribute() |
| TarSlipImprov.py:261:25:261:29 | ControlFlowNode for entry | TarSlipImprov.py:258:6:258:26 | ControlFlowNode for Attribute() | TarSlipImprov.py:261:25:261:29 | ControlFlowNode for entry | Extraction of tarfile from $@ to a potentially untrusted source $@. | TarSlipImprov.py:258:6:258:26 | ControlFlowNode for Attribute() | ControlFlowNode for Attribute() | TarSlipImprov.py:261:25:261:29 | ControlFlowNode for entry | ControlFlowNode for entry |
| TarSlipImprov.py:268:21:268:25 | ControlFlowNode for entry | TarSlipImprov.py:264:6:264:38 | ControlFlowNode for Attribute() | TarSlipImprov.py:268:21:268:25 | ControlFlowNode for entry | Extraction of tarfile from $@ to a potentially untrusted source $@. | TarSlipImprov.py:264:6:264:38 | ControlFlowNode for Attribute() | ControlFlowNode for Attribute() | TarSlipImprov.py:268:21:268:25 | ControlFlowNode for entry | ControlFlowNode for entry |
| TarSlipImprov.py:274:25:274:29 | ControlFlowNode for entry | TarSlipImprov.py:271:6:271:39 | ControlFlowNode for Attribute() | TarSlipImprov.py:274:25:274:29 | ControlFlowNode for entry | Extraction of tarfile from $@ to a potentially untrusted source $@. | TarSlipImprov.py:271:6:271:39 | ControlFlowNode for Attribute() | ControlFlowNode for Attribute() | TarSlipImprov.py:274:25:274:29 | ControlFlowNode for entry | ControlFlowNode for entry |
| TarSlipImprov.py:280:21:280:25 | ControlFlowNode for entry | TarSlipImprov.py:276:6:276:38 | ControlFlowNode for Attribute() | TarSlipImprov.py:280:21:280:25 | ControlFlowNode for entry | Extraction of tarfile from $@ to a potentially untrusted source $@. | TarSlipImprov.py:276:6:276:38 | ControlFlowNode for Attribute() | ControlFlowNode for Attribute() | TarSlipImprov.py:280:21:280:25 | ControlFlowNode for entry | ControlFlowNode for entry |
| TarSlipImprov.py:284:5:284:7 | ControlFlowNode for tar | TarSlipImprov.py:283:6:283:51 | ControlFlowNode for Attribute() | TarSlipImprov.py:284:5:284:7 | ControlFlowNode for tar | Extraction of tarfile from $@ to a potentially untrusted source $@. | TarSlipImprov.py:283:6:283:51 | ControlFlowNode for Attribute() | ControlFlowNode for Attribute() | TarSlipImprov.py:284:5:284:7 | ControlFlowNode for tar | ControlFlowNode for tar |
| TarSlipImprov.py:288:49:288:51 | ControlFlowNode for tar | TarSlipImprov.py:287:7:287:28 | ControlFlowNode for Attribute() | TarSlipImprov.py:288:49:288:51 | ControlFlowNode for tar | Extraction of tarfile from $@ to a potentially untrusted source $@. | TarSlipImprov.py:287:7:287:28 | ControlFlowNode for Attribute() | ControlFlowNode for Attribute() | TarSlipImprov.py:288:49:288:51 | ControlFlowNode for tar | ControlFlowNode for tar |
| TarSlipImprov.py:293:1:293:3 | ControlFlowNode for tar | TarSlipImprov.py:292:7:292:39 | ControlFlowNode for Attribute() | TarSlipImprov.py:293:1:293:3 | ControlFlowNode for tar | Extraction of tarfile from $@ to a potentially untrusted source $@. | TarSlipImprov.py:292:7:292:39 | ControlFlowNode for Attribute() | ControlFlowNode for Attribute() | TarSlipImprov.py:293:1:293:3 | ControlFlowNode for tar | ControlFlowNode for tar |
| TarSlipImprov.py:301:49:301:51 | ControlFlowNode for tar | TarSlipImprov.py:300:6:300:51 | ControlFlowNode for Attribute() | TarSlipImprov.py:301:49:301:51 | ControlFlowNode for tar | Extraction of tarfile from $@ to a potentially untrusted source $@. | TarSlipImprov.py:300:6:300:51 | ControlFlowNode for Attribute() | ControlFlowNode for Attribute() | TarSlipImprov.py:301:49:301:51 | ControlFlowNode for tar | ControlFlowNode for tar |
| TarSlipImprov.py:310:49:310:54 | ControlFlowNode for result | TarSlipImprov.py:304:7:304:39 | ControlFlowNode for Attribute() | TarSlipImprov.py:310:49:310:54 | ControlFlowNode for result | Extraction of tarfile from $@ to a potentially untrusted source $@. | TarSlipImprov.py:304:7:304:39 | ControlFlowNode for Attribute() | ControlFlowNode for Attribute() | TarSlipImprov.py:310:49:310:54 | ControlFlowNode for result | ControlFlowNode for result |
| TarSlipImprov.py:316:1:316:46 | ControlFlowNode for Attribute() | TarSlipImprov.py:316:1:316:46 | ControlFlowNode for Attribute() | TarSlipImprov.py:316:1:316:46 | ControlFlowNode for Attribute() | Extraction of tarfile from $@ to a potentially untrusted source $@. | TarSlipImprov.py:316:1:316:46 | ControlFlowNode for Attribute() | ControlFlowNode for Attribute() | TarSlipImprov.py:316:1:316:46 | ControlFlowNode for Attribute() | ControlFlowNode for Attribute() |

View File

@@ -0,0 +1 @@
experimental/Security/CWE-022bis/TarSlipImprov.ql

View File

@@ -0,0 +1,316 @@
#!/usr/bin/python
import sys
import tarfile
import tempfile
import os
from tarfile import TarFile
from contextlib import closing, contextmanager
import subprocess
import os.path
unsafe_filename_tar = sys.argv[2]
safe_filename_tar = "safe_path.tar"
tar = tarfile.open(unsafe_filename_tar)
result = []
for member in tar:
if ".." in member.name:
raise ValueError("Path in member name !!!")
result.append(member)
path = unsafe_filename_tar
tar.extractall(path=path, members=result)
tar.close()
def members_filter1(tarfile):
result = []
for member in tarfile:
if '../' in member.name:
print('Member name container directory traversal sequence')
continue
elif member.issym() or member.islnk():
print('Symlink to external resource')
continue
result.append(member)
return result
tar = tarfile.open(unsafe_filename_tar)
tar.extractall(path=tempfile.mkdtemp(), members=members_filter1(tar))
tar.close()
with tarfile.open(unsafe_filename_tar) as tar:
for entry in tar:
if ".." in entry.name:
raise ValueError("Illegal tar archive entry")
tar.extract(entry, "/tmp/unpack/")
def _validate_archive_name(name, target):
if not os.path.abspath(os.path.join(target, name)).startswith(target + os.path.sep):
raise ValueError(f"Provided language pack contains invalid name {name}")
with tarfile.open(unsafe_filename_tar) as tar:
target = "/tmp/unpack"
for entry in tar:
_validate_archive_name(entry.name, target)
tar.extract(entry, target)
def members_filter2(tarfile):
result = []
for member in tarfile.getmembers():
if '../' in member.name:
print('Member name container directory traversal sequence')
continue
elif (member.issym() or member.islnk()) and ('../' in member.linkname):
print('Symlink to external resource')
continue
result.append(member)
return result
tar = tarfile.open(unsafe_filename_tar)
tar.extractall(path=tempfile.mkdtemp(), members=members_filter2(tar))
tar.close()
def _validate_tar_info(info, target):
_validate_archive_name(info.name, target)
if not (info.isfile() or info.isdir()):
raise ValueError("Provided language pack contains invalid file type")
def _validate_archive_name(name, target):
if not os.path.abspath(os.path.join(target, name)).startswith(target + os.path.sep):
raise ValueError(f"Provided language pack contains invalid name {name}")
target = "/tmp/unpack"
with tarfile.open(unsafe_filename_tar, "r") as tar:
for info in tar.getmembers():
_validate_tar_info(info, target)
tar.extractall(target)
def members_filter3(tarfile):
result = []
for member in tarfile.getmembers():
if '../' in member.name:
print('Member name container directory traversal sequence')
continue
elif member.issym() or member.islnk():
print('Symlink to external resource')
continue
result.append(member)
return result
tar = tarfile.open(unsafe_filename_tar)
tar.extractall(path=tempfile.mkdtemp(), members=members_filter3(tar))
tar.close()
tar = tarfile.open(unsafe_filename_tar)
tarf = tar.getmembers()
for f in tarf:
if not f.issym():
tar.extractall(path=tempfile.mkdtemp(), members=[f])
tar.close()
class MKTar(TarFile):
pass
tarball = unsafe_filename_tar
with MKTar.open(name=tarball) as tar:
for entry in tar:
tar._extract_member(entry, entry.name)
tarball = unsafe_filename_tar
with tarfile.open(tarball) as tar:
tar.extractall()
tar = tarfile.open(unsafe_filename_tar)
tar.extractall(path=tempfile.mkdtemp(), members=None)
class MKTar(tarfile.TarFile):
pass
tarball = unsafe_filename_tar
with MKTar.open(name=tarball) as tar:
for entry in tar:
tar._extract_member(entry, entry.name)
@contextmanager
def py2_tarxz(filename):
with tempfile.TemporaryFile() as tmp:
subprocess.check_call(["xz", "-dc", filename], stdout=tmp.fileno())
tmp.seek(0)
with closing(tarfile.TarFile(fileobj=tmp)) as tf:
yield tf
def unpack_tarball(tar_filename, dest):
if sys.version_info[0] < 3 and tar_filename.endswith('.xz'):
# Py 2.7 lacks lzma support
tar_cm = py2_tarxz(tar_filename)
else:
tar_cm = closing(tarfile.open(tar_filename))
base_dir = None
with tar_cm as tarc:
for member in tarc:
base_name = member.name.split('/')[0]
if base_dir is None:
base_dir = base_name
elif base_dir != base_name:
print('Unexpected path in %s: %s' % (tar_filename, base_name))
tarc.extractall(dest)
return os.path.join(dest, base_dir)
unpack_tarball(unsafe_filename_tar, "/tmp/unpack")
tarball = unsafe_filename_tar
with tarfile.open(name=tarball) as tar:
for entry in tar:
tar._extract_member(entry, entry.name)
tarball = unsafe_filename_tar
with tarfile.open(name=tarball) as tar:
for entry in tar:
tar.extract(entry, "/tmp/unpack/")
tarball = unsafe_filename_tar
tar = tarfile.open(tarball)
tar.extractall("/tmp/unpack/")
tarball = unsafe_filename_tar
with tarfile.open(tarball, "r") as tar:
tar.extractall(path="/tmp/unpack/", members=tar)
def members_filter4(tarfile):
result = []
for member in tarfile.getmembers():
if member.issym() or member.islnk():
print('Symlink to external resource')
continue
result.append(member)
return result
tar = tarfile.open(unsafe_filename_tar)
tar.extractall(path=tempfile.mkdtemp(), members=members_filter4(tar))
tar.close()
with tarfile.open(unsafe_filename_tar, "r") as tar:
tar.extractall(path="/tmp/unpack")
def members_filter5(tarfile):
result = []
for member in tarfile.getmembers():
if member.issym():
print('Symlink to external resource')
continue
result.append(member)
return result
tar = tarfile.open(unsafe_filename_tar)
tar.extractall(path=tempfile.mkdtemp(), members=members_filter5(tar))
tar.close()
filename = unsafe_filename_tar
tmp_dir = "/tmp/"
read_type = "r:gz" if filename.endswith("tgz") else "r"
with tarfile.open(filename, read_type) as corpus_tar:
members = []
for f in corpus_tar:
if not os.path.isfile(os.path.join(tmp_dir, f.name)):
members.append(f)
corpus_tar.extractall(tmp_dir, members=members)
def members_filter6(tarfile):
result = []
for member in tarfile.getmembers():
if not member.isreg():
print('Symlink to external resource')
continue
result.append(member)
return result
tar = tarfile.open(filename)
tar.extractall(path=tempfile.mkdtemp(), members=members_filter6(tar))
tar.close()
archive_path = unsafe_filename_tar
target_dir = "/tmp/unpack"
tarfile.open(archive_path, "r").extractall(path=target_dir)
tarball = unsafe_filename_tar
with tarfile.open(tarball) as tar:
for entry in tar:
if entry.isfile():
tar.extract(entry, "/tmp/unpack/")
with tarfile.open(unsafe_filename_tar) as tar:
for entry in tar:
if entry.name.startswith("/"):
raise ValueError("Illegal tar archive entry")
tar.extract(entry, "/tmp/unpack/")
tarball = unsafe_filename_tar
with tarfile.TarFile(tarball, mode="r") as tar:
for entry in tar:
if entry.isfile():
tar.extract(entry, "/tmp/unpack/")
with tarfile.open(unsafe_filename_tar) as tar:
for entry in tar:
if os.path.isabs(entry.name):
raise ValueError("Illegal tar archive entry")
tar.extract(entry, "/tmp/unpack/")
with tarfile.TarFile(unsafe_filename_tar, mode="r") as tar:
tar.extractall(path="/tmp/unpack")
tar = tarfile.open(filename)
tar.extractall(path=tempfile.mkdtemp(), members=tar.getmembers())
tar.close()
tar = tarfile.open(unsafe_filename_tar)
tar.extractall(path=tempfile.mkdtemp(), members=None)
tar.extractall(path=tempfile.mkdtemp(), members=members_filter4(tar))
tar.close()
with tarfile.TarFile(unsafe_filename_tar, mode="r") as tar:
tar.extractall(path="/tmp/unpack/", members=tar)
tar = tarfile.open(unsafe_filename_tar)
result = []
for member in tar:
if member.issym():
raise ValueError("But it is a symlink")
result.append(member)
tar.extractall(path=tempfile.mkdtemp(), members=result)
tar.close()
archive_path = unsafe_filename_tar
target_dir = "/tmp/unpack"
tarfile.TarFile(unsafe_filename_tar, mode="r").extractall(path=target_dir)

View File

@@ -6,6 +6,8 @@ uniqueNodeToString
missingToString
parameterCallable
localFlowIsLocal
readStepIsLocal
storeStepIsLocal
compatibleTypesReflexive
unreachableNodeCCtx
localCallNodes

View File

@@ -0,0 +1,2 @@
| test.py:1:1:1:3 | foo | Tag mismatch: Actual result with tag 'foo' that is not part of getARelevantTag() |
| test.py:4:1:4:3 | foo | Tag mismatch: Actual result with tag 'foo' that is not part of getARelevantTag() |

View File

@@ -0,0 +1,20 @@
// test to illustrate what happens if you forget to put in the
// right values for `getARelevantTag`. We want to alert on this,
// so it gets fixed!
import python
import TestUtilities.InlineExpectationsTest
class MissingRelevantTag extends InlineExpectationsTest {
MissingRelevantTag() { this = "MissingRelevantTag" }
override string getARelevantTag() { none() }
override predicate hasActualResult(Location location, string element, string tag, string value) {
exists(Name name | name.getId() = "foo" |
location = name.getLocation() and
element = name.toString() and
value = "val" and
tag = "foo"
)
}
}

View File

@@ -0,0 +1,7 @@
foo # $ foo=val
# with wrong value
foo # $ foo=bad-value
# there is a typo here, so this result is actually missing!
fooo # $ foo=val

View File

@@ -0,0 +1,41 @@
import python
import semmle.python.essa.SsaCompute
import TestUtilities.InlineExpectationsTest
class UseTest extends InlineExpectationsTest {
UseTest() { this = "UseTest" }
override string getARelevantTag() { result in ["use-use", "def-use", "def"] }
override predicate hasActualResult(Location location, string element, string tag, string value) {
exists(location.getFile().getRelativePath()) and
exists(string name | name in ["x", "y"] |
exists(NameNode nodeTo, Location prevLoc |
(
exists(NameNode nodeFrom | AdjacentUses::adjacentUseUse(nodeFrom, nodeTo) |
prevLoc = nodeFrom.getLocation() and
name = nodeFrom.getId() and
tag = "use-use"
)
or
exists(EssaVariable var | AdjacentUses::firstUse(var, nodeTo) |
prevLoc = var.getLocation() and
name = var.getName() and
tag = "def-use"
)
) and
value = name + ":" + prevLoc.getStartLine() and
location = nodeTo.getLocation() and
element = nodeTo.toString()
)
or
exists(EssaVariable var | AdjacentUses::firstUse(var, _) |
value = var.getName() and
location = var.getLocation() and
element = var.getName() and
name = var.getName() and
tag = "def"
)
)
}
}

View File

@@ -0,0 +1,20 @@
class X(object):
def foo(self, *args):
print("X.foo", args)
def func(cond=True):
x = X() # $ def=x
print(x) # $ def-use=x:7
y = x # $ def=y use-use=x:9
print(y) # $ def-use=y:11
x.foo() # $ use-use=x:11
x.foo(1 if cond else 0) # $ use-use=x:14
x.foo() # $ MISSING: use-use=x:16
print(x) # $ use-use=x:17
func()

View File

@@ -6,6 +6,56 @@ uniqueNodeToString
missingToString
parameterCallable
localFlowIsLocal
readStepIsLocal
storeStepIsLocal
| testapp/orm_form_test.py:6:1:6:28 | [orm-model] Class MyModel | testapp/tests.py:83:16:83:36 | ControlFlowNode for Attribute() | Store step does not preserve enclosing callable. |
| testapp/orm_form_test.py:6:1:6:28 | [orm-model] Class MyModel | testapp/tests.py:84:16:84:43 | ControlFlowNode for Attribute() | Store step does not preserve enclosing callable. |
| testapp/orm_form_test.py:6:1:6:28 | [orm-model] Class MyModel | testapp/tests.py:85:16:85:36 | ControlFlowNode for Attribute() | Store step does not preserve enclosing callable. |
| testapp/orm_inheritance.py:45:15:45:20 | ControlFlowNode for SOURCE | testapp/orm_inheritance.py:29:1:29:25 | [orm-model] Class Book | Store step does not preserve enclosing callable. |
| testapp/orm_inheritance.py:76:15:76:20 | ControlFlowNode for SOURCE | testapp/orm_inheritance.py:29:1:29:25 | [orm-model] Class Book | Store step does not preserve enclosing callable. |
| testapp/orm_inheritance.py:76:15:76:20 | ControlFlowNode for SOURCE | testapp/orm_inheritance.py:33:1:33:25 | [orm-model] Class PhysicalBook | Store step does not preserve enclosing callable. |
| testapp/orm_inheritance.py:77:27:77:32 | ControlFlowNode for SOURCE | testapp/orm_inheritance.py:33:1:33:25 | [orm-model] Class PhysicalBook | Store step does not preserve enclosing callable. |
| testapp/orm_inheritance.py:78:35:78:40 | ControlFlowNode for SOURCE | testapp/orm_inheritance.py:33:1:33:25 | [orm-model] Class PhysicalBook | Store step does not preserve enclosing callable. |
| testapp/orm_inheritance.py:93:15:93:26 | ControlFlowNode for Str | testapp/orm_inheritance.py:29:1:29:25 | [orm-model] Class Book | Store step does not preserve enclosing callable. |
| testapp/orm_inheritance.py:93:15:93:26 | ControlFlowNode for Str | testapp/orm_inheritance.py:38:1:38:18 | [orm-model] Class EBook | Store step does not preserve enclosing callable. |
| testapp/orm_inheritance.py:94:23:94:28 | ControlFlowNode for Str | testapp/orm_inheritance.py:38:1:38:18 | [orm-model] Class EBook | Store step does not preserve enclosing callable. |
| testapp/orm_inheritance.py:95:35:95:40 | ControlFlowNode for Str | testapp/orm_inheritance.py:38:1:38:18 | [orm-model] Class EBook | Store step does not preserve enclosing callable. |
| testapp/orm_inheritance.py:133:15:133:20 | ControlFlowNode for SOURCE | testapp/orm_inheritance.py:117:1:117:33 | [orm-model] Class PolyBook | Store step does not preserve enclosing callable. |
| testapp/orm_inheritance.py:167:15:167:20 | ControlFlowNode for SOURCE | testapp/orm_inheritance.py:117:1:117:33 | [orm-model] Class PolyBook | Store step does not preserve enclosing callable. |
| testapp/orm_inheritance.py:167:15:167:20 | ControlFlowNode for SOURCE | testapp/orm_inheritance.py:121:1:121:33 | [orm-model] Class PolyPhysicalBook | Store step does not preserve enclosing callable. |
| testapp/orm_inheritance.py:168:27:168:32 | ControlFlowNode for SOURCE | testapp/orm_inheritance.py:121:1:121:33 | [orm-model] Class PolyPhysicalBook | Store step does not preserve enclosing callable. |
| testapp/orm_inheritance.py:169:35:169:40 | ControlFlowNode for SOURCE | testapp/orm_inheritance.py:121:1:121:33 | [orm-model] Class PolyPhysicalBook | Store step does not preserve enclosing callable. |
| testapp/orm_inheritance.py:183:15:183:26 | ControlFlowNode for Str | testapp/orm_inheritance.py:117:1:117:33 | [orm-model] Class PolyBook | Store step does not preserve enclosing callable. |
| testapp/orm_inheritance.py:183:15:183:26 | ControlFlowNode for Str | testapp/orm_inheritance.py:126:1:126:26 | [orm-model] Class PolyEBook | Store step does not preserve enclosing callable. |
| testapp/orm_inheritance.py:184:23:184:28 | ControlFlowNode for Str | testapp/orm_inheritance.py:126:1:126:26 | [orm-model] Class PolyEBook | Store step does not preserve enclosing callable. |
| testapp/orm_inheritance.py:185:35:185:40 | ControlFlowNode for Str | testapp/orm_inheritance.py:126:1:126:26 | [orm-model] Class PolyEBook | Store step does not preserve enclosing callable. |
| testapp/orm_security_tests.py:15:1:15:27 | [orm-model] Class Person | testapp/orm_security_tests.py:42:23:42:42 | ControlFlowNode for Attribute() | Store step does not preserve enclosing callable. |
| testapp/orm_tests.py:115:41:115:46 | ControlFlowNode for SOURCE | testapp/orm_tests.py:110:1:110:30 | [orm-model] Class TestSave5 | Store step does not preserve enclosing callable. |
| testapp/orm_tests.py:131:86:131:91 | ControlFlowNode for SOURCE | testapp/orm_tests.py:126:1:126:30 | [orm-model] Class TestSave6 | Store step does not preserve enclosing callable. |
| testapp/orm_tests.py:149:89:149:94 | ControlFlowNode for SOURCE | testapp/orm_tests.py:144:1:144:30 | [orm-model] Class TestSave7 | Store step does not preserve enclosing callable. |
| testapp/orm_tests.py:161:1:161:30 | [orm-model] Class TestSave8 | testapp/orm_tests.py:168:22:168:44 | ControlFlowNode for Attribute() | Store step does not preserve enclosing callable. |
| testapp/orm_tests.py:165:35:165:39 | ControlFlowNode for Str | testapp/orm_tests.py:161:1:161:30 | [orm-model] Class TestSave8 | Store step does not preserve enclosing callable. |
| testapp/orm_tests.py:168:58:168:63 | ControlFlowNode for SOURCE | testapp/orm_tests.py:161:1:161:30 | [orm-model] Class TestSave8 | Store step does not preserve enclosing callable. |
| testapp/orm_tests.py:184:41:184:45 | ControlFlowNode for Str | testapp/orm_tests.py:177:1:177:30 | [orm-model] Class TestSave9 | Store step does not preserve enclosing callable. |
| testapp/orm_tests.py:185:49:185:51 | ControlFlowNode for obj | testapp/orm_tests.py:180:1:180:44 | [orm-model] Class TestSave9WithForeignKey | Store step does not preserve enclosing callable. |
| testapp/orm_tests.py:212:55:212:59 | ControlFlowNode for Str | testapp/orm_tests.py:206:1:206:35 | [orm-model] Class save10_Comment | Store step does not preserve enclosing callable. |
| testapp/orm_tests.py:239:55:239:59 | ControlFlowNode for Str | testapp/orm_tests.py:233:1:233:35 | [orm-model] Class save11_Comment | Store step does not preserve enclosing callable. |
| testapp/orm_tests.py:273:1:273:31 | [orm-model] Class TestSave13 | testapp/orm_tests.py:281:12:281:35 | ControlFlowNode for Attribute() | Store step does not preserve enclosing callable. |
| testapp/orm_tests.py:294:1:294:29 | [orm-model] Class TestLoad | testapp/orm_tests.py:308:12:308:33 | ControlFlowNode for Attribute() | Store step does not preserve enclosing callable. |
| testapp/orm_tests.py:294:1:294:29 | [orm-model] Class TestLoad | testapp/orm_tests.py:314:12:314:33 | ControlFlowNode for Attribute() | Store step does not preserve enclosing callable. |
| testapp/orm_tests.py:294:1:294:29 | [orm-model] Class TestLoad | testapp/orm_tests.py:320:11:320:32 | ControlFlowNode for Attribute() | Store step does not preserve enclosing callable. |
| testapp/orm_tests.py:294:1:294:29 | [orm-model] Class TestLoad | testapp/orm_tests.py:320:11:320:59 | ControlFlowNode for Attribute() | Store step does not preserve enclosing callable. |
| testapp/orm_tests.py:294:1:294:29 | [orm-model] Class TestLoad | testapp/orm_tests.py:320:11:320:78 | ControlFlowNode for Attribute() | Store step does not preserve enclosing callable. |
| testapp/orm_tests.py:294:1:294:29 | [orm-model] Class TestLoad | testapp/orm_tests.py:324:12:324:33 | ControlFlowNode for Attribute() | Store step does not preserve enclosing callable. |
| testapp/orm_tests.py:294:1:294:29 | [orm-model] Class TestLoad | testapp/orm_tests.py:324:12:324:60 | ControlFlowNode for Attribute() | Store step does not preserve enclosing callable. |
| testapp/orm_tests.py:294:1:294:29 | [orm-model] Class TestLoad | testapp/orm_tests.py:324:12:324:79 | ControlFlowNode for Attribute() | Store step does not preserve enclosing callable. |
| testapp/orm_tests.py:294:1:294:29 | [orm-model] Class TestLoad | testapp/orm_tests.py:331:12:331:33 | ControlFlowNode for Attribute() | Store step does not preserve enclosing callable. |
| testapp/orm_tests.py:294:1:294:29 | [orm-model] Class TestLoad | testapp/orm_tests.py:337:12:337:33 | ControlFlowNode for Attribute() | Store step does not preserve enclosing callable. |
| testapp/orm_tests.py:294:1:294:29 | [orm-model] Class TestLoad | testapp/orm_tests.py:344:12:344:33 | ControlFlowNode for Attribute() | Store step does not preserve enclosing callable. |
| testapp/orm_tests.py:294:1:294:29 | [orm-model] Class TestLoad | testapp/orm_tests.py:350:12:350:33 | ControlFlowNode for Attribute() | Store step does not preserve enclosing callable. |
| testapp/orm_tests.py:294:1:294:29 | [orm-model] Class TestLoad | testapp/orm_tests.py:356:12:356:33 | ControlFlowNode for Attribute() | Store step does not preserve enclosing callable. |
| testapp/orm_tests.py:294:1:294:29 | [orm-model] Class TestLoad | testapp/orm_tests.py:363:9:363:37 | ControlFlowNode for Attribute() | Store step does not preserve enclosing callable. |
| testapp/tests.py:81:33:81:37 | ControlFlowNode for Str | testapp/orm_form_test.py:6:1:6:28 | [orm-model] Class MyModel | Store step does not preserve enclosing callable. |
compatibleTypesReflexive
unreachableNodeCCtx
localCallNodes

View File

@@ -1,7 +1,7 @@
name: codeql/python-tests
groups: [python, test]
dependencies:
codeql/python-all: "*"
codeql/python-queries: "*"
codeql/python-all: ${workspace}
codeql/python-queries: ${workspace}
extractor: python
tests: .

View File

@@ -1,2 +1,2 @@
| test.py:8:12:8:23 | Str | test.py:8:21:8:23 | \\s+ | Strings with many repetitions of ' ' can start matching anywhere after the start of the preceeding \\s+$ |
| test.py:9:14:9:29 | Str | test.py:9:27:9:29 | \\d+ | Strings with many repetitions of '99' can start matching anywhere after the start of the preceeding \\d+ |
| test.py:9:14:9:29 | Str | test.py:9:27:9:29 | \\d+ | Strings starting with '0.9' and with many repetitions of '99' can start matching anywhere after the start of the preceeding \\d+ |

View File

@@ -1,5 +1,6 @@
import python
import semmle.python.security.regexp.SuperlinearBackTracking
private import semmle.python.RegexTreeView::RegexTreeView as TreeView
import codeql.regex.nfa.SuperlinearBackTracking::Make<TreeView>
from PolynomialBackTrackingTerm t
select t.getRegex(), t, t.getReason()
select t.(TreeView::RegExpTerm).getRegex(), t, t.getReason()

View File

@@ -16,4 +16,4 @@ nodes
subpaths
#select
| test.py:8:30:8:33 | ControlFlowNode for text | test.py:2:26:2:32 | ControlFlowNode for ImportMember | test.py:8:30:8:33 | ControlFlowNode for text | This $@ that depends on a $@ may run slow on strings with many repetitions of ' '. | test.py:8:21:8:23 | \\s+ | regular expression | test.py:2:26:2:32 | ControlFlowNode for ImportMember | user-provided value |
| test.py:9:32:9:35 | ControlFlowNode for text | test.py:2:26:2:32 | ControlFlowNode for ImportMember | test.py:9:32:9:35 | ControlFlowNode for text | This $@ that depends on a $@ may run slow on strings with many repetitions of '99'. | test.py:9:27:9:29 | \\d+ | regular expression | test.py:2:26:2:32 | ControlFlowNode for ImportMember | user-provided value |
| test.py:9:32:9:35 | ControlFlowNode for text | test.py:2:26:2:32 | ControlFlowNode for ImportMember | test.py:9:32:9:35 | ControlFlowNode for text | This $@ that depends on a $@ may run slow on strings starting with '0.9' and with many repetitions of '99'. | test.py:9:27:9:29 | \\d+ | regular expression | test.py:2:26:2:32 | ControlFlowNode for ImportMember | user-provided value |

View File

@@ -2,20 +2,20 @@
| KnownCVEs.py:30:24:31:25 | .* | This part of the regular expression may cause exponential backtracking on strings containing many repetitions of ','. |
| KnownCVEs.py:35:18:35:81 | ([-/:,#%.'"\\s!\\w]\|\\w-\\w\|'[\\s\\w]+'\\s*\|"[\\s\\w]+"\|\\([\\d,%\\.\\s]+\\))* | This part of the regular expression may cause exponential backtracking on strings containing many repetitions of '"\\t"'. |
| redos.py:6:28:6:42 | (?:__\|[\\s\\S])+? | This part of the regular expression may cause exponential backtracking on strings containing many repetitions of '__'. |
| redos.py:6:52:6:68 | (?:\\*\\*\|[\\s\\S])+? | This part of the regular expression may cause exponential backtracking on strings containing many repetitions of '**'. |
| redos.py:21:34:21:53 | (?:[^"\\\\]\|\\\\\\\\\|\\\\.)+ | This part of the regular expression may cause exponential backtracking on strings containing many repetitions of '\\\\\\\\'. |
| redos.py:21:57:21:76 | (?:[^'\\\\]\|\\\\\\\\\|\\\\.)+ | This part of the regular expression may cause exponential backtracking on strings containing many repetitions of '\\\\\\\\'. |
| redos.py:21:81:21:100 | (?:[^)\\\\]\|\\\\\\\\\|\\\\.)+ | This part of the regular expression may cause exponential backtracking on strings containing many repetitions of '\\\\\\\\'. |
| redos.py:33:64:33:65 | .* | This part of the regular expression may cause exponential backtracking on strings containing many repetitions of '\|\|\\n'. |
| redos.py:6:52:6:68 | (?:\\*\\*\|[\\s\\S])+? | This part of the regular expression may cause exponential backtracking on strings starting with '*' and containing many repetitions of '**'. |
| redos.py:21:34:21:53 | (?:[^"\\\\]\|\\\\\\\\\|\\\\.)+ | This part of the regular expression may cause exponential backtracking on strings starting with '\\t"' and containing many repetitions of '\\\\\\\\'. |
| redos.py:21:57:21:76 | (?:[^'\\\\]\|\\\\\\\\\|\\\\.)+ | This part of the regular expression may cause exponential backtracking on strings starting with '\\t'' and containing many repetitions of '\\\\\\\\'. |
| redos.py:21:81:21:100 | (?:[^)\\\\]\|\\\\\\\\\|\\\\.)+ | This part of the regular expression may cause exponential backtracking on strings starting with '\\t(' and containing many repetitions of '\\\\\\\\'. |
| redos.py:33:64:33:65 | .* | This part of the regular expression may cause exponential backtracking on strings starting with '!\|\\n-\|\\n' and containing many repetitions of '\|\|\\n'. |
| redos.py:38:33:38:42 | (\\\\\\/\|.)*? | This part of the regular expression may cause exponential backtracking on strings containing many repetitions of '\\\\/'. |
| redos.py:43:37:43:38 | .* | This part of the regular expression may cause exponential backtracking on strings containing many repetitions of '#'. |
| redos.py:43:37:43:38 | .* | This part of the regular expression may cause exponential backtracking on strings starting with '#' and containing many repetitions of '#'. |
| redos.py:49:41:49:43 | .*? | This part of the regular expression may cause exponential backtracking on strings starting with '"' and containing many repetitions of '""'. |
| redos.py:49:47:49:49 | .*? | This part of the regular expression may cause exponential backtracking on strings starting with ''' and containing many repetitions of ''''. |
| redos.py:54:47:54:49 | .*? | This part of the regular expression may cause exponential backtracking on strings containing many repetitions of ']['. |
| redos.py:54:80:54:82 | .*? | This part of the regular expression may cause exponential backtracking on strings containing many repetitions of ']['. |
| redos.py:54:47:54:49 | .*? | This part of the regular expression may cause exponential backtracking on strings starting with '$[' and containing many repetitions of ']['. |
| redos.py:54:80:54:82 | .*? | This part of the regular expression may cause exponential backtracking on strings starting with '$.$[' and containing many repetitions of ']['. |
| redos.py:60:25:60:30 | [a-z]+ | This part of the regular expression may cause exponential backtracking on strings containing many repetitions of 'a'. |
| redos.py:61:25:61:30 | [a-z]* | This part of the regular expression may cause exponential backtracking on strings containing many repetitions of 'a'. |
| redos.py:62:53:62:64 | [a-zA-Z0-9]+ | This part of the regular expression may cause exponential backtracking on strings containing many repetitions of '0'. |
| redos.py:62:53:62:64 | [a-zA-Z0-9]+ | This part of the regular expression may cause exponential backtracking on strings starting with '0' and containing many repetitions of '0'. |
| redos.py:63:26:63:33 | ([a-z])+ | This part of the regular expression may cause exponential backtracking on strings containing many repetitions of 'aa'. |
| redos.py:68:26:68:41 | [\\w#:.~>+()\\s-]+ | This part of the regular expression may cause exponential backtracking on strings containing many repetitions of '\\t'. |
| redos.py:68:48:68:50 | .*? | This part of the regular expression may cause exponential backtracking on strings starting with '[' and containing many repetitions of ']['. |
@@ -51,7 +51,6 @@
| redos.py:196:91:196:92 | ,? | This part of the regular expression may cause exponential backtracking on strings starting with '{[A(A)A: ' and containing many repetitions of ',A: '. |
| redos.py:199:25:199:26 | a+ | This part of the regular expression may cause exponential backtracking on strings containing many repetitions of 'a'. |
| redos.py:199:28:199:29 | b+ | This part of the regular expression may cause exponential backtracking on strings containing many repetitions of 'b'. |
| redos.py:202:26:202:32 | (a+a?)* | This part of the regular expression may cause exponential backtracking on strings containing many repetitions of 'a'. |
| redos.py:202:27:202:28 | a+ | This part of the regular expression may cause exponential backtracking on strings containing many repetitions of 'a'. |
| redos.py:205:25:205:26 | a+ | This part of the regular expression may cause exponential backtracking on strings containing many repetitions of 'a'. |
| redos.py:211:25:211:26 | a+ | This part of the regular expression may cause exponential backtracking on strings containing many repetitions of 'a'. |
@@ -71,16 +70,16 @@
| redos.py:268:28:268:39 | ([\ufffd\ufffd]\|[\ufffd\ufffd])* | This part of the regular expression may cause exponential backtracking on strings starting with 'foo' and containing many repetitions of '\ufffd'. |
| redos.py:271:28:271:41 | ((\ufffd\|\ufffd)\|(\ufffd\|\ufffd))* | This part of the regular expression may cause exponential backtracking on strings starting with 'foo' and containing many repetitions of '\ufffd'. |
| redos.py:274:31:274:32 | b+ | This part of the regular expression may cause exponential backtracking on strings containing many repetitions of 'b'. |
| redos.py:277:48:277:50 | \\s* | This part of the regular expression may cause exponential backtracking on strings containing many repetitions of '""\\t0='. |
| redos.py:277:48:277:50 | \\s* | This part of the regular expression may cause exponential backtracking on strings starting with '<0\\t0=' and containing many repetitions of '""\\t0='. |
| redos.py:283:26:283:27 | a+ | This part of the regular expression may cause exponential backtracking on strings containing many repetitions of 'a'. |
| redos.py:286:26:286:27 | a+ | This part of the regular expression may cause exponential backtracking on strings containing many repetitions of 'a'. |
| redos.py:292:26:292:27 | a+ | This part of the regular expression may cause exponential backtracking on strings containing many repetitions of 'a'. |
| redos.py:295:35:295:36 | a+ | This part of the regular expression may cause exponential backtracking on strings containing many repetitions of 'a'. |
| redos.py:301:100:301:101 | e+ | This part of the regular expression may cause exponential backtracking on strings starting with ';00000000000000' and containing many repetitions of 'e'. |
| redos.py:301:100:301:101 | e+ | This part of the regular expression may cause exponential backtracking on strings starting with '00000000000000' and containing many repetitions of 'e'. |
| redos.py:304:28:304:29 | c+ | This part of the regular expression may cause exponential backtracking on strings starting with 'ab' and containing many repetitions of 'c'. |
| redos.py:307:28:307:30 | \\s+ | This part of the regular expression may cause exponential backtracking on strings containing many repetitions of '\\t'. |
| redos.py:310:26:310:34 | ([^/]\|X)+ | This part of the regular expression may cause exponential backtracking on strings containing many repetitions of 'X'. |
| redos.py:313:30:313:34 | [^Y]+ | This part of the regular expression may cause exponential backtracking on strings containing many repetitions of 'Xx'. |
| redos.py:313:30:313:34 | [^Y]+ | This part of the regular expression may cause exponential backtracking on strings starting with 'x' and containing many repetitions of 'Xx'. |
| redos.py:316:25:316:26 | a* | This part of the regular expression may cause exponential backtracking on strings containing many repetitions of 'a'. |
| redos.py:319:28:319:33 | [\\w-]* | This part of the regular expression may cause exponential backtracking on strings starting with 'foo' and containing many repetitions of '-'. |
| redos.py:322:25:322:29 | (ab)* | This part of the regular expression may cause exponential backtracking on strings containing many repetitions of 'ab'. |