mirror of
https://github.com/github/codeql.git
synced 2025-12-21 03:06:31 +01:00
Merge branch 'main' of https://github.com/github/codeql into python-dataflow/flow-summaries-from-scratch
synced files have changed
This commit is contained in:
@@ -1,3 +1,9 @@
|
||||
## 0.5.3
|
||||
|
||||
### Minor Analysis Improvements
|
||||
|
||||
* Change `.getASubclass()` on `API::Node` so it allows to follow subclasses even if the class has a class decorator.
|
||||
|
||||
## 0.5.2
|
||||
|
||||
## 0.5.1
|
||||
|
||||
5
python/ql/lib/change-notes/2022-05-25-redos-refac.md
Normal file
5
python/ql/lib/change-notes/2022-05-25-redos-refac.md
Normal file
@@ -0,0 +1,5 @@
|
||||
---
|
||||
category: deprecated
|
||||
---
|
||||
* The utility files previously in the `semmle.python.security.performance` package have been moved to the `semmle.python.security.regexp` package.
|
||||
The previous files still exist as deprecated aliases.
|
||||
@@ -0,0 +1,6 @@
|
||||
---
|
||||
category: minorAnalysis
|
||||
---
|
||||
* Most deprecated predicates/classes/modules that have been deprecated for over a year have been
|
||||
deleted.
|
||||
|
||||
5
python/ql/lib/change-notes/2022-08-22-xml-rename.md
Normal file
5
python/ql/lib/change-notes/2022-08-22-xml-rename.md
Normal file
@@ -0,0 +1,5 @@
|
||||
---
|
||||
category: deprecated
|
||||
---
|
||||
* Many classes/predicates/modules with upper-case acronyms in their name have been renamed to follow our style-guide.
|
||||
The old name still exists as a deprecated alias.
|
||||
@@ -1,4 +1,5 @@
|
||||
---
|
||||
category: minorAnalysis
|
||||
---
|
||||
## 0.5.3
|
||||
|
||||
### Minor Analysis Improvements
|
||||
|
||||
* Change `.getASubclass()` on `API::Node` so it allows to follow subclasses even if the class has a class decorator.
|
||||
@@ -1,2 +1,2 @@
|
||||
---
|
||||
lastReleaseVersion: 0.5.2
|
||||
lastReleaseVersion: 0.5.3
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
name: codeql/python-all
|
||||
version: 0.5.3-dev
|
||||
version: 0.5.4-dev
|
||||
groups: python
|
||||
dbscheme: semmlecode.python.dbscheme
|
||||
extractor: python
|
||||
|
||||
@@ -68,8 +68,6 @@ class ListComp extends ListComp_, Comp {
|
||||
|
||||
override Expr getIterable() { result = ListComp_.super.getIterable() }
|
||||
|
||||
override string toString() { result = ListComp_.super.toString() }
|
||||
|
||||
override Expr getElt() { result = Comp.super.getElt() }
|
||||
}
|
||||
|
||||
|
||||
@@ -616,6 +616,9 @@ private string non_byte_prefix() {
|
||||
not result.charAt(_) in ["b", "B"]
|
||||
}
|
||||
|
||||
/** A string constant. This is a placeholder class -- use `StrConst` instead. */
|
||||
class Str = StrConst;
|
||||
|
||||
/** A string constant. */
|
||||
class StrConst extends Str_, ImmutableLiteral {
|
||||
/* syntax: "hello" */
|
||||
|
||||
@@ -21,7 +21,7 @@ class File extends Container, @file {
|
||||
|
||||
/** Whether this file is a source code file. */
|
||||
predicate fromSource() {
|
||||
/* If we start to analyse .pyc files, then this will have to change. */
|
||||
/* If we start to analyze .pyc files, then this will have to change. */
|
||||
any()
|
||||
}
|
||||
|
||||
|
||||
0
python/ql/lib/semmle/python/Flow.qll
Executable file → Normal file
0
python/ql/lib/semmle/python/Flow.qll
Executable file → Normal file
@@ -2,8 +2,6 @@ import python
|
||||
|
||||
class KeyValuePair extends KeyValuePair_, DictDisplayItem {
|
||||
/* syntax: Expr : Expr */
|
||||
override Location getLocation() { result = KeyValuePair_.super.getLocation() }
|
||||
|
||||
override string toString() { result = KeyValuePair_.super.toString() }
|
||||
|
||||
/** Gets the value of this dictionary unpacking. */
|
||||
@@ -20,8 +18,6 @@ class KeyValuePair extends KeyValuePair_, DictDisplayItem {
|
||||
|
||||
/** A double-starred expression in a call or dict literal. */
|
||||
class DictUnpacking extends DictUnpacking_, DictUnpackingOrKeyword, DictDisplayItem {
|
||||
override Location getLocation() { result = DictUnpacking_.super.getLocation() }
|
||||
|
||||
override string toString() { result = DictUnpacking_.super.toString() }
|
||||
|
||||
/** Gets the value of this dictionary unpacking. */
|
||||
@@ -47,8 +43,6 @@ abstract class DictDisplayItem extends DictItem {
|
||||
/** A keyword argument in a call. For example `arg=expr` in `foo(0, arg=expr)` */
|
||||
class Keyword extends Keyword_, DictUnpackingOrKeyword {
|
||||
/* syntax: name = Expr */
|
||||
override Location getLocation() { result = Keyword_.super.getLocation() }
|
||||
|
||||
override string toString() { result = Keyword_.super.toString() }
|
||||
|
||||
/** Gets the value of this keyword argument. */
|
||||
|
||||
0
python/ql/lib/semmle/python/Scope.qll
Executable file → Normal file
0
python/ql/lib/semmle/python/Scope.qll
Executable file → Normal file
0
python/ql/lib/semmle/python/dataflow/new/TaintTracking3.qll
Executable file → Normal file
0
python/ql/lib/semmle/python/dataflow/new/TaintTracking3.qll
Executable file → Normal file
0
python/ql/lib/semmle/python/dataflow/new/TaintTracking4.qll
Executable file → Normal file
0
python/ql/lib/semmle/python/dataflow/new/TaintTracking4.qll
Executable file → Normal file
@@ -73,9 +73,7 @@ abstract class AttrWrite extends AttrRef {
|
||||
* ```
|
||||
* Also gives access to the `value` being written, by extending `DefinitionNode`.
|
||||
*/
|
||||
private class AttributeAssignmentNode extends DefinitionNode, AttrNode {
|
||||
override ControlFlowNode getValue() { result = DefinitionNode.super.getValue() }
|
||||
}
|
||||
private class AttributeAssignmentNode extends DefinitionNode, AttrNode { }
|
||||
|
||||
/** A simple attribute assignment: `object.attr = value`. */
|
||||
private class AttributeAssignmentAsAttrWrite extends AttrWrite, CfgNode {
|
||||
|
||||
@@ -3061,7 +3061,7 @@ private class PathNodeMid extends PathNodeImpl, TPathNodeMid {
|
||||
else cc instanceof CallContextAny
|
||||
) and
|
||||
sc instanceof SummaryCtxNone and
|
||||
ap instanceof AccessPathNil
|
||||
ap = TAccessPathNil(node.getDataFlowType())
|
||||
}
|
||||
|
||||
predicate isAtSink() {
|
||||
|
||||
@@ -3061,7 +3061,7 @@ private class PathNodeMid extends PathNodeImpl, TPathNodeMid {
|
||||
else cc instanceof CallContextAny
|
||||
) and
|
||||
sc instanceof SummaryCtxNone and
|
||||
ap instanceof AccessPathNil
|
||||
ap = TAccessPathNil(node.getDataFlowType())
|
||||
}
|
||||
|
||||
predicate isAtSink() {
|
||||
|
||||
@@ -3061,7 +3061,7 @@ private class PathNodeMid extends PathNodeImpl, TPathNodeMid {
|
||||
else cc instanceof CallContextAny
|
||||
) and
|
||||
sc instanceof SummaryCtxNone and
|
||||
ap instanceof AccessPathNil
|
||||
ap = TAccessPathNil(node.getDataFlowType())
|
||||
}
|
||||
|
||||
predicate isAtSink() {
|
||||
|
||||
@@ -3061,7 +3061,7 @@ private class PathNodeMid extends PathNodeImpl, TPathNodeMid {
|
||||
else cc instanceof CallContextAny
|
||||
) and
|
||||
sc instanceof SummaryCtxNone and
|
||||
ap instanceof AccessPathNil
|
||||
ap = TAccessPathNil(node.getDataFlowType())
|
||||
}
|
||||
|
||||
predicate isAtSink() {
|
||||
|
||||
@@ -78,7 +78,7 @@ deprecated Node importNode(string name) {
|
||||
// ```
|
||||
//
|
||||
// Where `foo_module_tracker` is a type tracker that tracks references to the `foo` module.
|
||||
// Because named imports are modelled as `AttrRead`s, the statement `from foo import bar as baz`
|
||||
// Because named imports are modeled as `AttrRead`s, the statement `from foo import bar as baz`
|
||||
// is interpreted as if it was an assignment `baz = foo.bar`, which means `baz` gets tracked as a
|
||||
// reference to `foo.bar`, as desired.
|
||||
exists(ImportExpr imp_expr |
|
||||
|
||||
0
python/ql/lib/semmle/python/dataflow/old/TaintTracking.qll
Executable file → Normal file
0
python/ql/lib/semmle/python/dataflow/old/TaintTracking.qll
Executable file → Normal file
@@ -212,8 +212,8 @@ class EssaEdgeRefinement extends EssaDefinition, TEssaEdgeDefinition {
|
||||
/** Gets the SSA variable to which this refinement applies. */
|
||||
EssaVariable getInput() {
|
||||
exists(SsaSourceVariable var, EssaDefinition def |
|
||||
var = this.getSourceVariable() and
|
||||
var = def.getSourceVariable() and
|
||||
pragma[only_bind_into](var) = this.getSourceVariable() and
|
||||
pragma[only_bind_into](var) = def.getSourceVariable() and
|
||||
def.reachesEndOfBlock(this.getPredecessor()) and
|
||||
result.getDefinition() = def
|
||||
)
|
||||
@@ -632,7 +632,8 @@ class DeletionDefinition extends EssaNodeDefinition {
|
||||
*/
|
||||
class ScopeEntryDefinition extends EssaNodeDefinition {
|
||||
ScopeEntryDefinition() {
|
||||
this.getDefiningNode() = this.getSourceVariable().getScopeEntryDefinition() and
|
||||
this.getDefiningNode() =
|
||||
pragma[only_bind_out](this.getSourceVariable()).getScopeEntryDefinition() and
|
||||
not this instanceof ImplicitSubModuleDefinition
|
||||
}
|
||||
|
||||
|
||||
@@ -10,13 +10,6 @@ private import semmle.python.Concepts
|
||||
private import semmle.python.ApiGraphs
|
||||
import semmle.python.frameworks.internal.PEP249Impl
|
||||
|
||||
/**
|
||||
* A module implementing PEP 249. Extend this class for implementations.
|
||||
*
|
||||
* DEPRECATED: Extend `PEP249::PEP249ModuleApiNode` instead.
|
||||
*/
|
||||
abstract deprecated class PEP249Module extends DataFlow::Node { }
|
||||
|
||||
/**
|
||||
* DEPRECATED: Use `PEP249::PEP249ModuleApiNode` instead.
|
||||
*/
|
||||
|
||||
@@ -1854,16 +1854,22 @@ private module StdlibPrivate {
|
||||
deprecated API::Node cgiHTTPServer() { result = cgiHttpServer() }
|
||||
|
||||
/** Provides models for the `CGIHTTPServer` module. */
|
||||
module CGIHTTPServer {
|
||||
module CgiHttpServer {
|
||||
/**
|
||||
* Provides models for the `CGIHTTPServer.CGIHTTPRequestHandler` class (Python 2 only).
|
||||
*/
|
||||
module CGIHTTPRequestHandler {
|
||||
/** Gets a reference to the `CGIHTTPServer.CGIHTTPRequestHandler` class. */
|
||||
module CgiHttpRequestHandler {
|
||||
/** Gets a reference to the `CGIHTTPServer.CgiHttpRequestHandler` class. */
|
||||
API::Node classRef() { result = cgiHttpServer().getMember("CGIHTTPRequestHandler") }
|
||||
}
|
||||
|
||||
/** DEPRECATED: Alias for CgiHttpRequestHandler */
|
||||
deprecated module CGIHTTPRequestHandler = CgiHttpRequestHandler;
|
||||
}
|
||||
|
||||
/** DEPRECATED: Alias for CgiHttpServer */
|
||||
deprecated module CGIHTTPServer = CgiHttpServer;
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// http (Python 3 only)
|
||||
// ---------------------------------------------------------------------------
|
||||
@@ -1911,10 +1917,13 @@ private module StdlibPrivate {
|
||||
*
|
||||
* See https://docs.python.org/3.9/library/http.server.html#http.server.CGIHTTPRequestHandler.
|
||||
*/
|
||||
module CGIHTTPRequestHandler {
|
||||
module CgiHttpRequestHandler {
|
||||
/** Gets a reference to the `http.server.CGIHTTPRequestHandler` class. */
|
||||
API::Node classRef() { result = server().getMember("CGIHTTPRequestHandler") }
|
||||
}
|
||||
|
||||
/** DEPRECATED: Alias for CgiHttpRequestHandler */
|
||||
deprecated module CGIHTTPRequestHandler = CgiHttpRequestHandler;
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1933,11 +1942,11 @@ private module StdlibPrivate {
|
||||
// Python 2
|
||||
BaseHttpServer::BaseHttpRequestHandler::classRef(),
|
||||
SimpleHttpServer::SimpleHttpRequestHandler::classRef(),
|
||||
CGIHTTPServer::CGIHTTPRequestHandler::classRef(),
|
||||
CgiHttpServer::CgiHttpRequestHandler::classRef(),
|
||||
// Python 3
|
||||
Http::Server::BaseHttpRequestHandler::classRef(),
|
||||
Http::Server::SimpleHttpRequestHandler::classRef(),
|
||||
Http::Server::CGIHTTPRequestHandler::classRef()
|
||||
Http::Server::CgiHttpRequestHandler::classRef()
|
||||
].getASubclass*()
|
||||
}
|
||||
|
||||
|
||||
@@ -259,11 +259,6 @@ private module WerkzeugOld {
|
||||
* See https://werkzeug.palletsprojects.com/en/1.0.x/datastructures/#werkzeug.datastructures.MultiDict.
|
||||
*/
|
||||
deprecated module MultiDict {
|
||||
/**
|
||||
* DEPRECATED. Use `Werkzeug::MultiDict::InstanceSource` instead.
|
||||
*/
|
||||
abstract deprecated class InstanceSource extends DataFlow::Node { }
|
||||
|
||||
/**
|
||||
* DEPRECATED. Use `Werkzeug::MultiDict::InstanceSource` instead.
|
||||
*
|
||||
@@ -312,11 +307,6 @@ private module WerkzeugOld {
|
||||
* See https://werkzeug.palletsprojects.com/en/1.0.x/datastructures/#werkzeug.datastructures.FileStorage.
|
||||
*/
|
||||
deprecated module FileStorage {
|
||||
/**
|
||||
* DEPRECATED. Use `Werkzeug::FileStorage::InstanceSource` instead.
|
||||
*/
|
||||
abstract deprecated class InstanceSource extends DataFlow::Node { }
|
||||
|
||||
/**
|
||||
* DEPRECATED. Use `Werkzeug::FileStorage::InstanceSource` instead.
|
||||
*
|
||||
|
||||
@@ -42,7 +42,7 @@ private module NotExposed {
|
||||
// Implementation below
|
||||
// ---------------------------------------------------------------------------
|
||||
//
|
||||
// We are looking to find all subclassed of the already modelled classes, and ideally
|
||||
// We are looking to find all subclassed of the already modeled classes, and ideally
|
||||
// we would identify an `API::Node` for each (then `toString` would give the API
|
||||
// path).
|
||||
//
|
||||
|
||||
@@ -273,6 +273,41 @@ predicate builtin_name_points_to(string name, Object value, ClassObject cls) {
|
||||
value = Object::builtin(name) and cls.asBuiltin() = value.asBuiltin().getClass()
|
||||
}
|
||||
|
||||
pragma[nomagic]
|
||||
private predicate essa_var_scope(SsaSourceVariable var, Scope pred_scope, EssaVariable pred_var) {
|
||||
BaseFlow::reaches_exit(pred_var) and
|
||||
pred_var.getScope() = pred_scope and
|
||||
var = pred_var.getSourceVariable()
|
||||
}
|
||||
|
||||
pragma[nomagic]
|
||||
private predicate scope_entry_def_scope(
|
||||
SsaSourceVariable var, Scope succ_scope, ScopeEntryDefinition succ_def
|
||||
) {
|
||||
var = succ_def.getSourceVariable() and
|
||||
succ_def.getScope() = succ_scope
|
||||
}
|
||||
|
||||
pragma[nomagic]
|
||||
private predicate step_through_init(Scope succ_scope, Scope pred_scope, Scope init) {
|
||||
init.getName() = "__init__" and
|
||||
init.precedes(succ_scope) and
|
||||
pred_scope.precedes(init)
|
||||
}
|
||||
|
||||
pragma[nomagic]
|
||||
private predicate scope_entry_value_transfer_through_init(
|
||||
EssaVariable pred_var, Scope pred_scope, ScopeEntryDefinition succ_def, Scope succ_scope
|
||||
) {
|
||||
exists(SsaSourceVariable var, Scope init |
|
||||
var instanceof GlobalVariable and
|
||||
essa_var_scope(var, pragma[only_bind_into](pred_scope), pred_var) and
|
||||
scope_entry_def_scope(var, succ_scope, succ_def) and
|
||||
step_through_init(succ_scope, pragma[only_bind_into](pred_scope), init) and
|
||||
not var.(Variable).getAStore().getScope() = init
|
||||
)
|
||||
}
|
||||
|
||||
module BaseFlow {
|
||||
predicate reaches_exit(EssaVariable var) { var.getAUse() = var.getScope().getANormalExit() }
|
||||
|
||||
@@ -283,27 +318,15 @@ module BaseFlow {
|
||||
) {
|
||||
Stages::DataFlow::ref() and
|
||||
exists(SsaSourceVariable var |
|
||||
reaches_exit(pred_var) and
|
||||
pred_var.getScope() = pred_scope and
|
||||
var = pred_var.getSourceVariable() and
|
||||
var = succ_def.getSourceVariable() and
|
||||
succ_def.getScope() = succ_scope
|
||||
essa_var_scope(var, pred_scope, pred_var) and
|
||||
scope_entry_def_scope(var, succ_scope, succ_def)
|
||||
|
|
||||
pred_scope.precedes(succ_scope)
|
||||
or
|
||||
/*
|
||||
* If an `__init__` method does not modify the global variable, then
|
||||
* we can skip it and take the value directly from the module.
|
||||
*/
|
||||
|
||||
exists(Scope init |
|
||||
init.getName() = "__init__" and
|
||||
init.precedes(succ_scope) and
|
||||
pred_scope.precedes(init) and
|
||||
not var.(Variable).getAStore().getScope() = init and
|
||||
var instanceof GlobalVariable
|
||||
)
|
||||
)
|
||||
or
|
||||
// If an `__init__` method does not modify the global variable, then
|
||||
// we can skip it and take the value directly from the module.
|
||||
scope_entry_value_transfer_through_init(pred_var, pred_scope, succ_def, succ_scope)
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -1271,12 +1271,21 @@ module InterProceduralPointsTo {
|
||||
)
|
||||
)
|
||||
or
|
||||
non_escaping_global_transfer(pred_var, pred_context, succ_def, succ_context)
|
||||
}
|
||||
|
||||
pragma[nomagic]
|
||||
private predicate non_escaping_global_transfer(
|
||||
EssaVariable pred_var, PointsToContext pred_context, ScopeEntryDefinition succ_def,
|
||||
PointsToContext succ_context
|
||||
) {
|
||||
exists(NonEscapingGlobalVariable var |
|
||||
var = pred_var.getSourceVariable() and
|
||||
var = succ_def.getSourceVariable() and
|
||||
pred_var.getAUse() = succ_context.getRootCall() and
|
||||
pred_context.isImport() and
|
||||
succ_context.appliesToScope(succ_def.getScope())
|
||||
pragma[only_bind_into](succ_context)
|
||||
.appliesToScope(pragma[only_bind_into](succ_def).getScope())
|
||||
)
|
||||
}
|
||||
|
||||
|
||||
@@ -1,5 +1,4 @@
|
||||
import python
|
||||
deprecated import semmle.python.objects.ObjectInternal as OI
|
||||
private import semmle.python.ApiGraphs
|
||||
// Need to import since frameworks can extend the abstract `RegexString`
|
||||
private import semmle.python.Frameworks
|
||||
@@ -98,19 +97,6 @@ private DataFlow::Node re_flag_tracker(string flag_name) {
|
||||
/** Gets a regular expression mode flag associated with the given data flow node. */
|
||||
string mode_from_node(DataFlow::Node node) { node = re_flag_tracker(result) }
|
||||
|
||||
/**
|
||||
* DEPRECATED 2021-02-24 -- use `mode_from_node` instead.
|
||||
*
|
||||
* Gets a regular expression mode flag associated with the given value.
|
||||
*/
|
||||
deprecated string mode_from_mode_object(Value obj) {
|
||||
result in ["DEBUG", "IGNORECASE", "LOCALE", "MULTILINE", "DOTALL", "UNICODE", "VERBOSE"] and
|
||||
exists(int flag |
|
||||
flag = Value::named("sre_constants.SRE_FLAG_" + result).(OI::ObjectInternal).intValue() and
|
||||
obj.(OI::ObjectInternal).intValue().bitAnd(flag) = flag
|
||||
)
|
||||
}
|
||||
|
||||
/** A StrConst used as a regular expression */
|
||||
abstract class RegexString extends Expr {
|
||||
RegexString() { (this instanceof Bytes or this instanceof Unicode) }
|
||||
|
||||
@@ -2,196 +2,27 @@
|
||||
* Provides precicates for reasoning about bad tag filter vulnerabilities.
|
||||
*/
|
||||
|
||||
import performance.ReDoSUtil
|
||||
import regexp.RegexpMatching
|
||||
|
||||
/**
|
||||
* A module for determining if a regexp matches a given string,
|
||||
* and reasoning about which capture groups are filled by a given string.
|
||||
* Holds if the regexp `root` should be tested against `str`.
|
||||
* Implements the `isRegexpMatchingCandidateSig` signature from `RegexpMatching`.
|
||||
* `ignorePrefix` toggles whether the regular expression should be treated as accepting any prefix if it's unanchored.
|
||||
* `testWithGroups` toggles whether it's tested which groups are filled by a given input string.
|
||||
*/
|
||||
private module RegexpMatching {
|
||||
/**
|
||||
* A class to test whether a regular expression matches a string.
|
||||
* Override this class and extend `test`/`testWithGroups` to configure which strings should be tested for acceptance by this regular expression.
|
||||
* The result can afterwards be read from the `matches` predicate.
|
||||
*
|
||||
* Strings in the `testWithGroups` predicate are also tested for which capture groups are filled by the given string.
|
||||
* The result is available in the `fillCaptureGroup` predicate.
|
||||
*/
|
||||
abstract class MatchedRegExp extends RegExpTerm {
|
||||
MatchedRegExp() { this.isRootTerm() }
|
||||
|
||||
/**
|
||||
* Holds if it should be tested whether this regular expression matches `str`.
|
||||
*
|
||||
* If `ignorePrefix` is true, then a regexp without a start anchor will be treated as if it had a start anchor.
|
||||
* E.g. a regular expression `/foo$/` will match any string that ends with "foo",
|
||||
* but if `ignorePrefix` is true, it will only match "foo".
|
||||
*/
|
||||
predicate test(string str, boolean ignorePrefix) {
|
||||
none() // maybe overridden in subclasses
|
||||
}
|
||||
|
||||
/**
|
||||
* Same as `test(..)`, but where the `fillsCaptureGroup` afterwards tells which capture groups were filled by the given string.
|
||||
*/
|
||||
predicate testWithGroups(string str, boolean ignorePrefix) {
|
||||
none() // maybe overridden in subclasses
|
||||
}
|
||||
|
||||
/**
|
||||
* Holds if this RegExp matches `str`, where `str` is either in the `test` or `testWithGroups` predicate.
|
||||
*/
|
||||
final predicate matches(string str) {
|
||||
exists(State state | state = getAState(this, str.length() - 1, str, _) |
|
||||
epsilonSucc*(state) = Accept(_)
|
||||
)
|
||||
}
|
||||
|
||||
/**
|
||||
* Holds if matching `str` may fill capture group number `g`.
|
||||
* Only holds if `str` is in the `testWithGroups` predicate.
|
||||
*/
|
||||
final predicate fillsCaptureGroup(string str, int g) {
|
||||
exists(State s |
|
||||
s = getAStateThatReachesAccept(this, _, str, _) and
|
||||
g = group(s.getRepr())
|
||||
)
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Gets a state the regular expression `reg` can be in after matching the `i`th char in `str`.
|
||||
* The regular expression is modeled as a non-determistic finite automaton,
|
||||
* the regular expression can therefore be in multiple states after matching a character.
|
||||
*
|
||||
* It's a forward search to all possible states, and there is thus no guarantee that the state is on a path to an accepting state.
|
||||
*/
|
||||
private State getAState(MatchedRegExp reg, int i, string str, boolean ignorePrefix) {
|
||||
// start state, the -1 position before any chars have been matched
|
||||
i = -1 and
|
||||
(
|
||||
reg.test(str, ignorePrefix)
|
||||
or
|
||||
reg.testWithGroups(str, ignorePrefix)
|
||||
) and
|
||||
result.getRepr().getRootTerm() = reg and
|
||||
isStartState(result)
|
||||
private predicate isBadTagFilterCandidate(
|
||||
RootTerm root, string str, boolean ignorePrefix, boolean testWithGroups
|
||||
) {
|
||||
// the regexp must mention "<" and ">" explicitly.
|
||||
forall(string angleBracket | angleBracket = ["<", ">"] |
|
||||
any(RegExpConstant term | term.getValue().matches("%" + angleBracket + "%")).getRootTerm() =
|
||||
root
|
||||
) and
|
||||
ignorePrefix = true and
|
||||
(
|
||||
str = ["<!-- foo -->", "<!-- foo --!>", "<!- foo ->", "<foo>", "<script>"] and
|
||||
testWithGroups = true
|
||||
or
|
||||
// recursive case
|
||||
result = getAStateAfterMatching(reg, _, str, i, _, ignorePrefix)
|
||||
}
|
||||
|
||||
/**
|
||||
* Gets the next state after the `prev` state from `reg`.
|
||||
* `prev` is the state after matching `fromIndex` chars in `str`,
|
||||
* and the result is the state after matching `toIndex` chars in `str`.
|
||||
*
|
||||
* This predicate is used as a step relation in the forwards search (`getAState`),
|
||||
* and also as a step relation in the later backwards search (`getAStateThatReachesAccept`).
|
||||
*/
|
||||
private State getAStateAfterMatching(
|
||||
MatchedRegExp reg, State prev, string str, int toIndex, int fromIndex, boolean ignorePrefix
|
||||
) {
|
||||
// the basic recursive case - outlined into a noopt helper to make performance work out.
|
||||
result = getAStateAfterMatchingAux(reg, prev, str, toIndex, fromIndex, ignorePrefix)
|
||||
or
|
||||
// we can skip past word boundaries if the next char is a non-word char.
|
||||
fromIndex = toIndex and
|
||||
prev.getRepr() instanceof RegExpWordBoundary and
|
||||
prev = getAState(reg, toIndex, str, ignorePrefix) and
|
||||
after(prev.getRepr()) = result and
|
||||
str.charAt(toIndex + 1).regexpMatch("\\W") // \W matches any non-word char.
|
||||
}
|
||||
|
||||
pragma[noopt]
|
||||
private State getAStateAfterMatchingAux(
|
||||
MatchedRegExp reg, State prev, string str, int toIndex, int fromIndex, boolean ignorePrefix
|
||||
) {
|
||||
prev = getAState(reg, fromIndex, str, ignorePrefix) and
|
||||
fromIndex = toIndex - 1 and
|
||||
exists(string char | char = str.charAt(toIndex) | specializedDeltaClosed(prev, char, result)) and
|
||||
not discardedPrefixStep(prev, result, ignorePrefix)
|
||||
}
|
||||
|
||||
/** Holds if a step from `prev` to `next` should be discarded when the `ignorePrefix` flag is set. */
|
||||
private predicate discardedPrefixStep(State prev, State next, boolean ignorePrefix) {
|
||||
prev = mkMatch(any(RegExpRoot r)) and
|
||||
ignorePrefix = true and
|
||||
next = prev
|
||||
}
|
||||
|
||||
// The `deltaClosed` relation specialized to the chars that exists in strings tested by a `MatchedRegExp`.
|
||||
private predicate specializedDeltaClosed(State prev, string char, State next) {
|
||||
deltaClosed(prev, specializedGetAnInputSymbolMatching(char), next)
|
||||
}
|
||||
|
||||
// The `getAnInputSymbolMatching` relation specialized to the chars that exists in strings tested by a `MatchedRegExp`.
|
||||
pragma[noinline]
|
||||
private InputSymbol specializedGetAnInputSymbolMatching(string char) {
|
||||
exists(string s, MatchedRegExp r |
|
||||
r.test(s, _)
|
||||
or
|
||||
r.testWithGroups(s, _)
|
||||
|
|
||||
char = s.charAt(_)
|
||||
) and
|
||||
result = getAnInputSymbolMatching(char)
|
||||
}
|
||||
|
||||
/**
|
||||
* Gets the `i`th state on a path to the accepting state when `reg` matches `str`.
|
||||
* Starts with an accepting state as found by `getAState` and searches backwards
|
||||
* to the start state through the reachable states (as found by `getAState`).
|
||||
*
|
||||
* This predicate holds the invariant that the result state can be reached with `i` steps from a start state,
|
||||
* and an accepting state can be found after (`str.length() - 1 - i`) steps from the result.
|
||||
* The result state is therefore always on a valid path where `reg` accepts `str`.
|
||||
*
|
||||
* This predicate is only used to find which capture groups a regular expression has filled,
|
||||
* and thus the search is only performed for the strings in the `testWithGroups(..)` predicate.
|
||||
*/
|
||||
private State getAStateThatReachesAccept(
|
||||
MatchedRegExp reg, int i, string str, boolean ignorePrefix
|
||||
) {
|
||||
// base case, reaches an accepting state from the last state in `getAState(..)`
|
||||
reg.testWithGroups(str, ignorePrefix) and
|
||||
i = str.length() - 1 and
|
||||
result = getAState(reg, i, str, ignorePrefix) and
|
||||
epsilonSucc*(result) = Accept(_)
|
||||
or
|
||||
// recursive case. `next` is the next state to be matched after matching `prev`.
|
||||
// this predicate is doing a backwards search, so `prev` is the result we are looking for.
|
||||
exists(State next, State prev, int fromIndex, int toIndex |
|
||||
next = getAStateThatReachesAccept(reg, toIndex, str, ignorePrefix) and
|
||||
next = getAStateAfterMatching(reg, prev, str, toIndex, fromIndex, ignorePrefix) and
|
||||
i = fromIndex and
|
||||
result = prev
|
||||
)
|
||||
}
|
||||
|
||||
/** Gets the capture group number that `term` belongs to. */
|
||||
private int group(RegExpTerm term) {
|
||||
exists(RegExpGroup grp | grp.getNumber() = result | term.getParent*() = grp)
|
||||
}
|
||||
}
|
||||
|
||||
/** A class to test whether a regular expression matches certain HTML tags. */
|
||||
class HtmlMatchingRegExp extends RegexpMatching::MatchedRegExp {
|
||||
HtmlMatchingRegExp() {
|
||||
// the regexp must mention "<" and ">" explicitly.
|
||||
forall(string angleBracket | angleBracket = ["<", ">"] |
|
||||
any(RegExpConstant term | term.getValue().matches("%" + angleBracket + "%")).getRootTerm() =
|
||||
this
|
||||
)
|
||||
}
|
||||
|
||||
override predicate testWithGroups(string str, boolean ignorePrefix) {
|
||||
ignorePrefix = true and
|
||||
str = ["<!-- foo -->", "<!-- foo --!>", "<!- foo ->", "<foo>", "<script>"]
|
||||
}
|
||||
|
||||
override predicate test(string str, boolean ignorePrefix) {
|
||||
ignorePrefix = true and
|
||||
str =
|
||||
[
|
||||
"<!-- foo -->", "<!- foo ->", "<!-- foo --!>", "<!-- foo\n -->", "<script>foo</script>",
|
||||
@@ -200,7 +31,23 @@ class HtmlMatchingRegExp extends RegexpMatching::MatchedRegExp {
|
||||
"<script src='foo'></script>", "<SCRIPT>foo</SCRIPT>", "<script\tsrc=\"foo\"/>",
|
||||
"<script\tsrc='foo'></script>", "<sCrIpT>foo</ScRiPt>", "<script src=\"foo\">foo</script >",
|
||||
"<script src=\"foo\">foo</script foo=\"bar\">", "<script src=\"foo\">foo</script\t\n bar>"
|
||||
]
|
||||
] and
|
||||
testWithGroups = false
|
||||
)
|
||||
}
|
||||
|
||||
/**
|
||||
* A regexp that matches some string from the `isBadTagFilterCandidate` predicate.
|
||||
*/
|
||||
class HtmlMatchingRegExp extends RootTerm {
|
||||
HtmlMatchingRegExp() { RegexpMatching<isBadTagFilterCandidate/4>::matches(this, _) }
|
||||
|
||||
/** Holds if this regexp matched `str`, where `str` is one of the string from `isBadTagFilterCandidate`. */
|
||||
predicate matches(string str) { RegexpMatching<isBadTagFilterCandidate/4>::matches(this, str) }
|
||||
|
||||
/** Holds if this regexp fills capture group `g' when matching `str', where `str` is one of the string from `isBadTagFilterCandidate`. */
|
||||
predicate fillsCaptureGroup(string str, int g) {
|
||||
RegexpMatching<isBadTagFilterCandidate/4>::fillsCaptureGroup(this, str, g)
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
281
python/ql/lib/semmle/python/security/OverlyLargeRangeQuery.qll
Normal file
281
python/ql/lib/semmle/python/security/OverlyLargeRangeQuery.qll
Normal file
@@ -0,0 +1,281 @@
|
||||
/**
|
||||
* Classes and predicates for working with suspicious character ranges.
|
||||
*/
|
||||
|
||||
// We don't need the NFA utils, just the regexp tree.
|
||||
// but the below is a nice shared library that exposes the API we need.
|
||||
import regexp.NfaUtils
|
||||
|
||||
/**
|
||||
* Gets a rank for `range` that is unique for ranges in the same file.
|
||||
* Prioritizes ranges that match more characters.
|
||||
*/
|
||||
int rankRange(RegExpCharacterRange range) {
|
||||
range =
|
||||
rank[result](RegExpCharacterRange r, Location l, int low, int high |
|
||||
r.getLocation() = l and
|
||||
isRange(r, low, high)
|
||||
|
|
||||
r order by (high - low) desc, l.getStartLine(), l.getStartColumn()
|
||||
)
|
||||
}
|
||||
|
||||
/** Holds if `range` spans from the unicode code points `low` to `high` (both inclusive). */
|
||||
predicate isRange(RegExpCharacterRange range, int low, int high) {
|
||||
exists(string lowc, string highc |
|
||||
range.isRange(lowc, highc) and
|
||||
low.toUnicode() = lowc and
|
||||
high.toUnicode() = highc
|
||||
)
|
||||
}
|
||||
|
||||
/** Holds if `char` is an alpha-numeric character. */
|
||||
predicate isAlphanumeric(string char) {
|
||||
// written like this to avoid having a bindingset for the predicate
|
||||
char = [[48 .. 57], [65 .. 90], [97 .. 122]].toUnicode() // 0-9, A-Z, a-z
|
||||
}
|
||||
|
||||
/**
|
||||
* Holds if the given ranges are from the same character class
|
||||
* and there exists at least one character matched by both ranges.
|
||||
*/
|
||||
predicate overlap(RegExpCharacterRange a, RegExpCharacterRange b) {
|
||||
exists(RegExpCharacterClass clz |
|
||||
a = clz.getAChild() and
|
||||
b = clz.getAChild() and
|
||||
a != b
|
||||
|
|
||||
exists(int alow, int ahigh, int blow, int bhigh |
|
||||
isRange(a, alow, ahigh) and
|
||||
isRange(b, blow, bhigh) and
|
||||
alow <= bhigh and
|
||||
blow <= ahigh
|
||||
)
|
||||
)
|
||||
}
|
||||
|
||||
/**
|
||||
* Holds if `range` overlaps with the char class `escape` from the same character class.
|
||||
*/
|
||||
predicate overlapsWithCharEscape(RegExpCharacterRange range, RegExpCharacterClassEscape escape) {
|
||||
exists(RegExpCharacterClass clz, string low, string high |
|
||||
range = clz.getAChild() and
|
||||
escape = clz.getAChild() and
|
||||
range.isRange(low, high)
|
||||
|
|
||||
escape.getValue() = "w" and
|
||||
getInRange(low, high).regexpMatch("\\w")
|
||||
or
|
||||
escape.getValue() = "d" and
|
||||
getInRange(low, high).regexpMatch("\\d")
|
||||
or
|
||||
escape.getValue() = "s" and
|
||||
getInRange(low, high).regexpMatch("\\s")
|
||||
)
|
||||
}
|
||||
|
||||
/** Gets the unicode code point for a `char`. */
|
||||
bindingset[char]
|
||||
int toCodePoint(string char) { result.toUnicode() = char }
|
||||
|
||||
/** A character range that appears to be overly wide. */
|
||||
class OverlyWideRange extends RegExpCharacterRange {
|
||||
OverlyWideRange() {
|
||||
exists(int low, int high, int numChars |
|
||||
isRange(this, low, high) and
|
||||
numChars = (1 + high - low) and
|
||||
this.getRootTerm().isUsedAsRegExp() and
|
||||
numChars >= 10
|
||||
|
|
||||
// across the Z-a range (which includes backticks)
|
||||
toCodePoint("Z") >= low and
|
||||
toCodePoint("a") <= high
|
||||
or
|
||||
// across the 9-A range (which includes e.g. ; and ?)
|
||||
toCodePoint("9") >= low and
|
||||
toCodePoint("A") <= high
|
||||
or
|
||||
// a non-alphanumeric char as part of the range boundaries
|
||||
exists(int bound | bound = [low, high] | not isAlphanumeric(bound.toUnicode()))
|
||||
) and
|
||||
// allowlist for known ranges
|
||||
not this = allowedWideRanges()
|
||||
}
|
||||
|
||||
/** Gets a string representation of a character class that matches the same chars as this range. */
|
||||
string printEquivalent() { result = RangePrinter::printEquivalentCharClass(this) }
|
||||
}
|
||||
|
||||
/** Gets a range that should not be reported as an overly wide range. */
|
||||
RegExpCharacterRange allowedWideRanges() {
|
||||
// ~ is the last printable ASCII character, it's used right in various wide ranges.
|
||||
result.isRange(_, "~")
|
||||
or
|
||||
// the same with " " and "!". " " is the first printable character, and "!" is the first non-white-space printable character.
|
||||
result.isRange([" ", "!"], _)
|
||||
or
|
||||
// the `[@-_]` range is intentional
|
||||
result.isRange("@", "_")
|
||||
or
|
||||
// starting from the zero byte is a good indication that it's purposely matching a large range.
|
||||
result.isRange(0.toUnicode(), _)
|
||||
}
|
||||
|
||||
/** Gets a char between (and including) `low` and `high`. */
|
||||
bindingset[low, high]
|
||||
private string getInRange(string low, string high) {
|
||||
result = [toCodePoint(low) .. toCodePoint(high)].toUnicode()
|
||||
}
|
||||
|
||||
/** A module computing an equivalent character class for an overly wide range. */
|
||||
module RangePrinter {
|
||||
bindingset[char]
|
||||
bindingset[result]
|
||||
private string next(string char) {
|
||||
exists(int prev, int next |
|
||||
prev.toUnicode() = char and
|
||||
next.toUnicode() = result and
|
||||
next = prev + 1
|
||||
)
|
||||
}
|
||||
|
||||
/** Gets the points where the parts of the pretty printed range should be cut off. */
|
||||
private string cutoffs() { result = ["A", "Z", "a", "z", "0", "9"] }
|
||||
|
||||
/** Gets the char to use in the low end of a range for a given `cut` */
|
||||
private string lowCut(string cut) {
|
||||
cut = ["A", "a", "0"] and
|
||||
result = cut
|
||||
or
|
||||
cut = ["Z", "z", "9"] and
|
||||
result = next(cut)
|
||||
}
|
||||
|
||||
/** Gets the char to use in the high end of a range for a given `cut` */
|
||||
private string highCut(string cut) {
|
||||
cut = ["Z", "z", "9"] and
|
||||
result = cut
|
||||
or
|
||||
cut = ["A", "a", "0"] and
|
||||
next(result) = cut
|
||||
}
|
||||
|
||||
/** Gets the cutoff char used for a given `part` of a range when pretty-printing it. */
|
||||
private string cutoff(OverlyWideRange range, int part) {
|
||||
exists(int low, int high | isRange(range, low, high) |
|
||||
result =
|
||||
rank[part + 1](string cut |
|
||||
cut = cutoffs() and low < toCodePoint(cut) and toCodePoint(cut) < high
|
||||
|
|
||||
cut order by toCodePoint(cut)
|
||||
)
|
||||
)
|
||||
}
|
||||
|
||||
/** Gets the number of parts we should print for a given `range`. */
|
||||
private int parts(OverlyWideRange range) { result = 1 + strictcount(cutoff(range, _)) }
|
||||
|
||||
/** Holds if the given part of a range should span from `low` to `high`. */
|
||||
private predicate part(OverlyWideRange range, int part, string low, string high) {
|
||||
// first part.
|
||||
part = 0 and
|
||||
(
|
||||
range.isRange(low, high) and
|
||||
parts(range) = 1
|
||||
or
|
||||
parts(range) >= 2 and
|
||||
range.isRange(low, _) and
|
||||
high = highCut(cutoff(range, part))
|
||||
)
|
||||
or
|
||||
// middle
|
||||
part >= 1 and
|
||||
part < parts(range) - 1 and
|
||||
low = lowCut(cutoff(range, part - 1)) and
|
||||
high = highCut(cutoff(range, part))
|
||||
or
|
||||
// last.
|
||||
part = parts(range) - 1 and
|
||||
low = lowCut(cutoff(range, part - 1)) and
|
||||
range.isRange(_, high)
|
||||
}
|
||||
|
||||
/** Gets an escaped `char` for use in a character class. */
|
||||
bindingset[char]
|
||||
private string escape(string char) {
|
||||
exists(string reg | reg = "(\\[|\\]|\\\\|-|/)" |
|
||||
if char.regexpMatch(reg) then result = "\\" + char else result = char
|
||||
)
|
||||
}
|
||||
|
||||
/** Gets a part of the equivalent range. */
|
||||
private string printEquivalentCharClass(OverlyWideRange range, int part) {
|
||||
exists(string low, string high | part(range, part, low, high) |
|
||||
if
|
||||
isAlphanumeric(low) and
|
||||
isAlphanumeric(high)
|
||||
then result = low + "-" + high
|
||||
else
|
||||
result =
|
||||
strictconcat(string char | char = getInRange(low, high) | escape(char) order by char)
|
||||
)
|
||||
}
|
||||
|
||||
/** Gets the entire pretty printed equivalent range. */
|
||||
string printEquivalentCharClass(OverlyWideRange range) {
|
||||
result =
|
||||
strictconcat(string r, int part |
|
||||
r = "[" and part = -1 and exists(range)
|
||||
or
|
||||
r = printEquivalentCharClass(range, part)
|
||||
or
|
||||
r = "]" and part = parts(range)
|
||||
|
|
||||
r order by part
|
||||
)
|
||||
}
|
||||
}
|
||||
|
||||
/** Gets a char range that is overly large because of `reason`. */
|
||||
RegExpCharacterRange getABadRange(string reason, int priority) {
|
||||
priority = 0 and
|
||||
reason = "is equivalent to " + result.(OverlyWideRange).printEquivalent()
|
||||
or
|
||||
priority = 1 and
|
||||
exists(RegExpCharacterRange other |
|
||||
reason = "overlaps with " + other + " in the same character class" and
|
||||
rankRange(result) < rankRange(other) and
|
||||
overlap(result, other)
|
||||
)
|
||||
or
|
||||
priority = 2 and
|
||||
exists(RegExpCharacterClassEscape escape |
|
||||
reason = "overlaps with " + escape + " in the same character class" and
|
||||
overlapsWithCharEscape(result, escape)
|
||||
)
|
||||
or
|
||||
reason = "is empty" and
|
||||
priority = 3 and
|
||||
exists(int low, int high |
|
||||
isRange(result, low, high) and
|
||||
low > high
|
||||
)
|
||||
}
|
||||
|
||||
/** Holds if `range` matches suspiciously many characters. */
|
||||
predicate problem(RegExpCharacterRange range, string reason) {
|
||||
reason =
|
||||
strictconcat(string m, int priority |
|
||||
range = getABadRange(m, priority)
|
||||
|
|
||||
m, ", and " order by priority desc
|
||||
) and
|
||||
// specifying a range using an escape is usually OK.
|
||||
not range.getAChild() instanceof RegExpEscape and
|
||||
// Unicode escapes in strings are interpreted before it turns into a regexp,
|
||||
// so e.g. [\u0001-\uFFFF] will just turn up as a range between two constants.
|
||||
// We therefore exclude these ranges.
|
||||
range.getRootTerm().getParent() instanceof RegExpLiteral and
|
||||
// is used as regexp (mostly for JS where regular expressions are parsed eagerly)
|
||||
range.getRootTerm().isUsedAsRegExp()
|
||||
}
|
||||
0
python/ql/lib/semmle/python/security/TaintTracking.qll
Executable file → Normal file
0
python/ql/lib/semmle/python/security/TaintTracking.qll
Executable file → Normal file
@@ -106,16 +106,6 @@ module HeuristicNames {
|
||||
"(?is).*([^\\w$.-]|redact|censor|obfuscate|hash|md5|sha|random|((?<!un)(en))?(crypt|code)|certain|concert|secretar|accountant|accountab).*"
|
||||
}
|
||||
|
||||
/**
|
||||
* DEPRECATED: Use `maybeSensitiveRegexp` instead.
|
||||
*/
|
||||
deprecated predicate maybeSensitive = maybeSensitiveRegexp/1;
|
||||
|
||||
/**
|
||||
* DEPRECATED: Use `notSensitiveRegexp` instead.
|
||||
*/
|
||||
deprecated predicate notSensitive = notSensitiveRegexp/0;
|
||||
|
||||
/**
|
||||
* Holds if `name` may indicate the presence of sensitive data, and
|
||||
* `name` does not indicate that the data is in fact non-sensitive (for example since
|
||||
|
||||
@@ -1,374 +1,4 @@
|
||||
/**
|
||||
* This library implements the analysis described in the following two papers:
|
||||
*
|
||||
* James Kirrage, Asiri Rathnayake, Hayo Thielecke: Static Analysis for
|
||||
* Regular Expression Denial-of-Service Attacks. NSS 2013.
|
||||
* (http://www.cs.bham.ac.uk/~hxt/research/reg-exp-sec.pdf)
|
||||
* Asiri Rathnayake, Hayo Thielecke: Static Analysis for Regular Expression
|
||||
* Exponential Runtime via Substructural Logics. 2014.
|
||||
* (https://www.cs.bham.ac.uk/~hxt/research/redos_full.pdf)
|
||||
*
|
||||
* The basic idea is to search for overlapping cycles in the NFA, that is,
|
||||
* states `q` such that there are two distinct paths from `q` to itself
|
||||
* that consume the same word `w`.
|
||||
*
|
||||
* For any such state `q`, an attack string can be constructed as follows:
|
||||
* concatenate a prefix `v` that takes the NFA to `q` with `n` copies of
|
||||
* the word `w` that leads back to `q` along two different paths, followed
|
||||
* by a suffix `x` that is _not_ accepted in state `q`. A backtracking
|
||||
* implementation will need to explore at least 2^n different ways of going
|
||||
* from `q` back to itself while trying to match the `n` copies of `w`
|
||||
* before finally giving up.
|
||||
*
|
||||
* Now in order to identify overlapping cycles, all we have to do is find
|
||||
* pumpable forks, that is, states `q` that can transition to two different
|
||||
* states `r1` and `r2` on the same input symbol `c`, such that there are
|
||||
* paths from both `r1` and `r2` to `q` that consume the same word. The latter
|
||||
* condition is equivalent to saying that `(q, q)` is reachable from `(r1, r2)`
|
||||
* in the product NFA.
|
||||
*
|
||||
* This is what the library does. It makes a simple attempt to construct a
|
||||
* prefix `v` leading into `q`, but only to improve the alert message.
|
||||
* And the library tries to prove the existence of a suffix that ensures
|
||||
* rejection. This check might fail, which can cause false positives.
|
||||
*
|
||||
* Finally, sometimes it depends on the translation whether the NFA generated
|
||||
* for a regular expression has a pumpable fork or not. We implement one
|
||||
* particular translation, which may result in false positives or negatives
|
||||
* relative to some particular JavaScript engine.
|
||||
*
|
||||
* More precisely, the library constructs an NFA from a regular expression `r`
|
||||
* as follows:
|
||||
*
|
||||
* * Every sub-term `t` gives rise to an NFA state `Match(t,i)`, representing
|
||||
* the state of the automaton before attempting to match the `i`th character in `t`.
|
||||
* * There is one accepting state `Accept(r)`.
|
||||
* * There is a special `AcceptAnySuffix(r)` state, which accepts any suffix string
|
||||
* by using an epsilon transition to `Accept(r)` and an any transition to itself.
|
||||
* * Transitions between states may be labelled with epsilon, or an abstract
|
||||
* input symbol.
|
||||
* * Each abstract input symbol represents a set of concrete input characters:
|
||||
* either a single character, a set of characters represented by a
|
||||
* character class, or the set of all characters.
|
||||
* * The product automaton is constructed lazily, starting with pair states
|
||||
* `(q, q)` where `q` is a fork, and proceeding along an over-approximate
|
||||
* step relation.
|
||||
* * The over-approximate step relation allows transitions along pairs of
|
||||
* abstract input symbols where the symbols have overlap in the characters they accept.
|
||||
* * Once a trace of pairs of abstract input symbols that leads from a fork
|
||||
* back to itself has been identified, we attempt to construct a concrete
|
||||
* string corresponding to it, which may fail.
|
||||
* * Lastly we ensure that any state reached by repeating `n` copies of `w` has
|
||||
* a suffix `x` (possible empty) that is most likely __not__ accepted.
|
||||
*/
|
||||
/** DEPRECATED. Import `semmle.python.security.regexp.ExponentialBackTracking` instead. */
|
||||
|
||||
import ReDoSUtil
|
||||
|
||||
/**
|
||||
* Holds if state `s` might be inside a backtracking repetition.
|
||||
*/
|
||||
pragma[noinline]
|
||||
private predicate stateInsideBacktracking(State s) {
|
||||
s.getRepr().getParent*() instanceof MaybeBacktrackingRepetition
|
||||
}
|
||||
|
||||
/**
|
||||
* A infinitely repeating quantifier that might backtrack.
|
||||
*/
|
||||
private class MaybeBacktrackingRepetition extends InfiniteRepetitionQuantifier {
|
||||
MaybeBacktrackingRepetition() {
|
||||
exists(RegExpTerm child |
|
||||
child instanceof RegExpAlt or
|
||||
child instanceof RegExpQuantifier
|
||||
|
|
||||
child.getParent+() = this
|
||||
)
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* A state in the product automaton.
|
||||
*/
|
||||
private newtype TStatePair =
|
||||
/**
|
||||
* We lazily only construct those states that we are actually
|
||||
* going to need: `(q, q)` for every fork state `q`, and any
|
||||
* pair of states that can be reached from a pair that we have
|
||||
* already constructed. To cut down on the number of states,
|
||||
* we only represent states `(q1, q2)` where `q1` is lexicographically
|
||||
* no bigger than `q2`.
|
||||
*
|
||||
* States are only constructed if both states in the pair are
|
||||
* inside a repetition that might backtrack.
|
||||
*/
|
||||
MkStatePair(State q1, State q2) {
|
||||
isFork(q1, _, _, _, _) and q2 = q1
|
||||
or
|
||||
(step(_, _, _, q1, q2) or step(_, _, _, q2, q1)) and
|
||||
rankState(q1) <= rankState(q2)
|
||||
}
|
||||
|
||||
/**
|
||||
* Gets a unique number for a `state`.
|
||||
* Is used to create an ordering of states, where states with the same `toString()` will be ordered differently.
|
||||
*/
|
||||
private int rankState(State state) {
|
||||
state =
|
||||
rank[result](State s, Location l |
|
||||
l = s.getRepr().getLocation()
|
||||
|
|
||||
s order by l.getStartLine(), l.getStartColumn(), s.toString()
|
||||
)
|
||||
}
|
||||
|
||||
/**
|
||||
* A state in the product automaton.
|
||||
*/
|
||||
private class StatePair extends TStatePair {
|
||||
State q1;
|
||||
State q2;
|
||||
|
||||
StatePair() { this = MkStatePair(q1, q2) }
|
||||
|
||||
/** Gets a textual representation of this element. */
|
||||
string toString() { result = "(" + q1 + ", " + q2 + ")" }
|
||||
|
||||
/** Gets the first component of the state pair. */
|
||||
State getLeft() { result = q1 }
|
||||
|
||||
/** Gets the second component of the state pair. */
|
||||
State getRight() { result = q2 }
|
||||
}
|
||||
|
||||
/**
|
||||
* Holds for `(fork, fork)` state pairs when `isFork(fork, _, _, _, _)` holds.
|
||||
*
|
||||
* Used in `statePairDistToFork`
|
||||
*/
|
||||
private predicate isStatePairFork(StatePair p) {
|
||||
exists(State fork | p = MkStatePair(fork, fork) and isFork(fork, _, _, _, _))
|
||||
}
|
||||
|
||||
/**
|
||||
* Holds if there are transitions from the components of `q` to the corresponding
|
||||
* components of `r`.
|
||||
*
|
||||
* Used in `statePairDistToFork`
|
||||
*/
|
||||
private predicate reverseStep(StatePair r, StatePair q) { step(q, _, _, r) }
|
||||
|
||||
/**
|
||||
* Gets the minimum length of a path from `q` to `r` in the
|
||||
* product automaton.
|
||||
*/
|
||||
private int statePairDistToFork(StatePair q, StatePair r) =
|
||||
shortestDistances(isStatePairFork/1, reverseStep/2)(r, q, result)
|
||||
|
||||
/**
|
||||
* Holds if there are transitions from `q` to `r1` and from `q` to `r2`
|
||||
* labelled with `s1` and `s2`, respectively, where `s1` and `s2` do not
|
||||
* trivially have an empty intersection.
|
||||
*
|
||||
* This predicate only holds for states associated with regular expressions
|
||||
* that have at least one repetition quantifier in them (otherwise the
|
||||
* expression cannot be vulnerable to ReDoS attacks anyway).
|
||||
*/
|
||||
pragma[noopt]
|
||||
private predicate isFork(State q, InputSymbol s1, InputSymbol s2, State r1, State r2) {
|
||||
stateInsideBacktracking(q) and
|
||||
exists(State q1, State q2 |
|
||||
q1 = epsilonSucc*(q) and
|
||||
delta(q1, s1, r1) and
|
||||
q2 = epsilonSucc*(q) and
|
||||
delta(q2, s2, r2) and
|
||||
// Use pragma[noopt] to prevent intersect(s1,s2) from being the starting point of the join.
|
||||
// From (s1,s2) it would find a huge number of intermediate state pairs (q1,q2) originating from different literals,
|
||||
// and discover at the end that no `q` can reach both `q1` and `q2` by epsilon transitions.
|
||||
exists(intersect(s1, s2))
|
||||
|
|
||||
s1 != s2
|
||||
or
|
||||
r1 != r2
|
||||
or
|
||||
r1 = r2 and q1 != q2
|
||||
or
|
||||
// If q can reach itself by epsilon transitions, then there are two distinct paths to the q1/q2 state:
|
||||
// one that uses the loop and one that doesn't. The engine will separately attempt to match with each path,
|
||||
// despite ending in the same state. The "fork" thus arises from the choice of whether to use the loop or not.
|
||||
// To avoid every state in the loop becoming a fork state,
|
||||
// we arbitrarily pick the InfiniteRepetitionQuantifier state as the canonical fork state for the loop
|
||||
// (every epsilon-loop must contain such a state).
|
||||
//
|
||||
// We additionally require that the there exists another InfiniteRepetitionQuantifier `mid` on the path from `q` to itself.
|
||||
// This is done to avoid flagging regular expressions such as `/(a?)*b/` - that only has polynomial runtime, and is detected by `js/polynomial-redos`.
|
||||
// The below code is therefore a heuritic, that only flags regular expressions such as `/(a*)*b/`,
|
||||
// and does not flag regular expressions such as `/(a?b?)c/`, but the latter pattern is not used frequently.
|
||||
r1 = r2 and
|
||||
q1 = q2 and
|
||||
epsilonSucc+(q) = q and
|
||||
exists(RegExpTerm term | term = q.getRepr() | term instanceof InfiniteRepetitionQuantifier) and
|
||||
// One of the mid states is an infinite quantifier itself
|
||||
exists(State mid, RegExpTerm term |
|
||||
mid = epsilonSucc+(q) and
|
||||
term = mid.getRepr() and
|
||||
term instanceof InfiniteRepetitionQuantifier and
|
||||
q = epsilonSucc+(mid) and
|
||||
not mid = q
|
||||
)
|
||||
) and
|
||||
stateInsideBacktracking(r1) and
|
||||
stateInsideBacktracking(r2)
|
||||
}
|
||||
|
||||
/**
|
||||
* Gets the state pair `(q1, q2)` or `(q2, q1)`; note that only
|
||||
* one or the other is defined.
|
||||
*/
|
||||
private StatePair mkStatePair(State q1, State q2) {
|
||||
result = MkStatePair(q1, q2) or result = MkStatePair(q2, q1)
|
||||
}
|
||||
|
||||
/**
|
||||
* Holds if there are transitions from the components of `q` to the corresponding
|
||||
* components of `r` labelled with `s1` and `s2`, respectively.
|
||||
*/
|
||||
private predicate step(StatePair q, InputSymbol s1, InputSymbol s2, StatePair r) {
|
||||
exists(State r1, State r2 | step(q, s1, s2, r1, r2) and r = mkStatePair(r1, r2))
|
||||
}
|
||||
|
||||
/**
|
||||
* Holds if there are transitions from the components of `q` to `r1` and `r2`
|
||||
* labelled with `s1` and `s2`, respectively.
|
||||
*
|
||||
* We only consider transitions where the resulting states `(r1, r2)` are both
|
||||
* inside a repetition that might backtrack.
|
||||
*/
|
||||
pragma[noopt]
|
||||
private predicate step(StatePair q, InputSymbol s1, InputSymbol s2, State r1, State r2) {
|
||||
exists(State q1, State q2 | q.getLeft() = q1 and q.getRight() = q2 |
|
||||
deltaClosed(q1, s1, r1) and
|
||||
deltaClosed(q2, s2, r2) and
|
||||
// use noopt to force the join on `intersect` to happen last.
|
||||
exists(intersect(s1, s2))
|
||||
) and
|
||||
stateInsideBacktracking(r1) and
|
||||
stateInsideBacktracking(r2)
|
||||
}
|
||||
|
||||
private newtype TTrace =
|
||||
Nil() or
|
||||
Step(InputSymbol s1, InputSymbol s2, TTrace t) { isReachableFromFork(_, _, s1, s2, t, _) }
|
||||
|
||||
/**
|
||||
* A list of pairs of input symbols that describe a path in the product automaton
|
||||
* starting from some fork state.
|
||||
*/
|
||||
private class Trace extends TTrace {
|
||||
/** Gets a textual representation of this element. */
|
||||
string toString() {
|
||||
this = Nil() and result = "Nil()"
|
||||
or
|
||||
exists(InputSymbol s1, InputSymbol s2, Trace t | this = Step(s1, s2, t) |
|
||||
result = "Step(" + s1 + ", " + s2 + ", " + t + ")"
|
||||
)
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Holds if `r` is reachable from `(fork, fork)` under input `w`, and there is
|
||||
* a path from `r` back to `(fork, fork)` with `rem` steps.
|
||||
*/
|
||||
private predicate isReachableFromFork(State fork, StatePair r, Trace w, int rem) {
|
||||
exists(InputSymbol s1, InputSymbol s2, Trace v |
|
||||
isReachableFromFork(fork, r, s1, s2, v, rem) and
|
||||
w = Step(s1, s2, v)
|
||||
)
|
||||
}
|
||||
|
||||
private predicate isReachableFromFork(
|
||||
State fork, StatePair r, InputSymbol s1, InputSymbol s2, Trace v, int rem
|
||||
) {
|
||||
// base case
|
||||
exists(State q1, State q2 |
|
||||
isFork(fork, s1, s2, q1, q2) and
|
||||
r = MkStatePair(q1, q2) and
|
||||
v = Nil() and
|
||||
rem = statePairDistToFork(r, MkStatePair(fork, fork))
|
||||
)
|
||||
or
|
||||
// recursive case
|
||||
exists(StatePair p |
|
||||
isReachableFromFork(fork, p, v, rem + 1) and
|
||||
step(p, s1, s2, r) and
|
||||
rem = statePairDistToFork(r, MkStatePair(fork, fork))
|
||||
)
|
||||
}
|
||||
|
||||
/**
|
||||
* Gets a state in the product automaton from which `(fork, fork)` is
|
||||
* reachable in zero or more epsilon transitions.
|
||||
*/
|
||||
private StatePair getAForkPair(State fork) {
|
||||
isFork(fork, _, _, _, _) and
|
||||
result = MkStatePair(epsilonPred*(fork), epsilonPred*(fork))
|
||||
}
|
||||
|
||||
private predicate hasSuffix(Trace suffix, Trace t, int i) {
|
||||
// Declaring `t` to be a `RelevantTrace` currently causes a redundant check in the
|
||||
// recursive case, so instead we check it explicitly here.
|
||||
t instanceof RelevantTrace and
|
||||
i = 0 and
|
||||
suffix = t
|
||||
or
|
||||
hasSuffix(Step(_, _, suffix), t, i - 1)
|
||||
}
|
||||
|
||||
pragma[noinline]
|
||||
private predicate hasTuple(InputSymbol s1, InputSymbol s2, Trace t, int i) {
|
||||
hasSuffix(Step(s1, s2, _), t, i)
|
||||
}
|
||||
|
||||
private class RelevantTrace extends Trace, Step {
|
||||
RelevantTrace() {
|
||||
exists(State fork, StatePair q |
|
||||
isReachableFromFork(fork, q, this, _) and
|
||||
q = getAForkPair(fork)
|
||||
)
|
||||
}
|
||||
|
||||
pragma[noinline]
|
||||
private string intersect(int i) {
|
||||
exists(InputSymbol s1, InputSymbol s2 |
|
||||
hasTuple(s1, s2, this, i) and
|
||||
result = intersect(s1, s2)
|
||||
)
|
||||
}
|
||||
|
||||
/** Gets a string corresponding to this trace. */
|
||||
// the pragma is needed for the case where `intersect(s1, s2)` has multiple values,
|
||||
// not for recursion
|
||||
language[monotonicAggregates]
|
||||
string concretise() {
|
||||
result = strictconcat(int i | hasTuple(_, _, this, i) | this.intersect(i) order by i desc)
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Holds if `fork` is a pumpable fork with word `w`.
|
||||
*/
|
||||
private predicate isPumpable(State fork, string w) {
|
||||
exists(StatePair q, RelevantTrace t |
|
||||
isReachableFromFork(fork, q, t, _) and
|
||||
q = getAForkPair(fork) and
|
||||
w = t.concretise()
|
||||
)
|
||||
}
|
||||
|
||||
/**
|
||||
* An instantiation of `ReDoSConfiguration` for exponential backtracking.
|
||||
*/
|
||||
class ExponentialReDoSConfiguration extends ReDoSConfiguration {
|
||||
ExponentialReDoSConfiguration() { this = "ExponentialReDoSConfiguration" }
|
||||
|
||||
override predicate isReDoSCandidate(State state, string pump) { isPumpable(state, pump) }
|
||||
}
|
||||
deprecated import semmle.python.security.regexp.ExponentialBackTracking as Dep
|
||||
import Dep
|
||||
|
||||
File diff suppressed because it is too large
Load Diff
@@ -1,454 +1,4 @@
|
||||
/**
|
||||
* Provides classes for working with regular expressions that can
|
||||
* perform backtracking in superlinear time.
|
||||
*/
|
||||
/** DEPRECATED. Import `semmle.python.security.regexp.SuperlinearBackTracking` instead. */
|
||||
|
||||
import ReDoSUtil
|
||||
|
||||
/*
|
||||
* This module implements the analysis described in the paper:
|
||||
* Valentin Wustholz, Oswaldo Olivo, Marijn J. H. Heule, and Isil Dillig:
|
||||
* Static Detection of DoS Vulnerabilities in
|
||||
* Programs that use Regular Expressions
|
||||
* (Extended Version).
|
||||
* (https://arxiv.org/pdf/1701.04045.pdf)
|
||||
*
|
||||
* Theorem 3 from the paper describes the basic idea.
|
||||
*
|
||||
* The following explains the idea using variables and predicate names that are used in the implementation:
|
||||
* We consider a pair of repetitions, which we will call `pivot` and `succ`.
|
||||
*
|
||||
* We create a product automaton of 3-tuples of states (see `StateTuple`).
|
||||
* There exists a transition `(a,b,c) -> (d,e,f)` in the product automaton
|
||||
* iff there exists three transitions in the NFA `a->d, b->e, c->f` where those three
|
||||
* transitions all match a shared character `char`. (see `getAThreewayIntersect`)
|
||||
*
|
||||
* We start a search in the product automaton at `(pivot, pivot, succ)`,
|
||||
* and search for a series of transitions (a `Trace`), such that we end
|
||||
* at `(pivot, succ, succ)` (see `isReachableFromStartTuple`).
|
||||
*
|
||||
* For example, consider the regular expression `/^\d*5\w*$/`.
|
||||
* The search will start at the tuple `(\d*, \d*, \w*)` and search
|
||||
* for a path to `(\d*, \w*, \w*)`.
|
||||
* This path exists, and consists of a single transition in the product automaton,
|
||||
* where the three corresponding NFA edges all match the character `"5"`.
|
||||
*
|
||||
* The start-state in the NFA has an any-transition to itself, this allows us to
|
||||
* flag regular expressions such as `/a*$/` - which does not have a start anchor -
|
||||
* and can thus start matching anywhere.
|
||||
*
|
||||
* The implementation is not perfect.
|
||||
* It has the same suffix detection issue as the `js/redos` query, which can cause false positives.
|
||||
* It also doesn't find all transitions in the product automaton, which can cause false negatives.
|
||||
*/
|
||||
|
||||
/**
|
||||
* An instantiaion of `ReDoSConfiguration` for superlinear ReDoS.
|
||||
*/
|
||||
class SuperLinearReDoSConfiguration extends ReDoSConfiguration {
|
||||
SuperLinearReDoSConfiguration() { this = "SuperLinearReDoSConfiguration" }
|
||||
|
||||
override predicate isReDoSCandidate(State state, string pump) { isPumpable(_, state, pump) }
|
||||
}
|
||||
|
||||
/**
|
||||
* Gets any root (start) state of a regular expression.
|
||||
*/
|
||||
private State getRootState() { result = mkMatch(any(RegExpRoot r)) }
|
||||
|
||||
private newtype TStateTuple =
|
||||
MkStateTuple(State q1, State q2, State q3) {
|
||||
// starts at (pivot, pivot, succ)
|
||||
isStartLoops(q1, q3) and q1 = q2
|
||||
or
|
||||
step(_, _, _, _, q1, q2, q3) and FeasibleTuple::isFeasibleTuple(q1, q2, q3)
|
||||
}
|
||||
|
||||
/**
|
||||
* A state in the product automaton.
|
||||
* The product automaton contains 3-tuples of states.
|
||||
*
|
||||
* We lazily only construct those states that we are actually
|
||||
* going to need.
|
||||
* Either a start state `(pivot, pivot, succ)`, or a state
|
||||
* where there exists a transition from an already existing state.
|
||||
*
|
||||
* The exponential variant of this query (`js/redos`) uses an optimization
|
||||
* trick where `q1 <= q2`. This trick cannot be used here as the order
|
||||
* of the elements matter.
|
||||
*/
|
||||
class StateTuple extends TStateTuple {
|
||||
State q1;
|
||||
State q2;
|
||||
State q3;
|
||||
|
||||
StateTuple() { this = MkStateTuple(q1, q2, q3) }
|
||||
|
||||
/**
|
||||
* Gest a string repesentation of this tuple.
|
||||
*/
|
||||
string toString() { result = "(" + q1 + ", " + q2 + ", " + q3 + ")" }
|
||||
|
||||
/**
|
||||
* Holds if this tuple is `(r1, r2, r3)`.
|
||||
*/
|
||||
pragma[noinline]
|
||||
predicate isTuple(State r1, State r2, State r3) { r1 = q1 and r2 = q2 and r3 = q3 }
|
||||
}
|
||||
|
||||
/**
|
||||
* A module for determining feasible tuples for the product automaton.
|
||||
*
|
||||
* The implementation is split into many predicates for performance reasons.
|
||||
*/
|
||||
private module FeasibleTuple {
|
||||
/**
|
||||
* Holds if the tuple `(r1, r2, r3)` might be on path from a start-state to an end-state in the product automaton.
|
||||
*/
|
||||
pragma[inline]
|
||||
predicate isFeasibleTuple(State r1, State r2, State r3) {
|
||||
// The first element is either inside a repetition (or the start state itself)
|
||||
isRepetitionOrStart(r1) and
|
||||
// The last element is inside a repetition
|
||||
stateInsideRepetition(r3) and
|
||||
// The states are reachable in the NFA in the order r1 -> r2 -> r3
|
||||
delta+(r1) = r2 and
|
||||
delta+(r2) = r3 and
|
||||
// The first element can reach a beginning (the "pivot" state in a `(pivot, succ)` pair).
|
||||
canReachABeginning(r1) and
|
||||
// The last element can reach a target (the "succ" state in a `(pivot, succ)` pair).
|
||||
canReachATarget(r3)
|
||||
}
|
||||
|
||||
/**
|
||||
* Holds if `s` is either inside a repetition, or is the start state (which is a repetition).
|
||||
*/
|
||||
pragma[noinline]
|
||||
private predicate isRepetitionOrStart(State s) { stateInsideRepetition(s) or s = getRootState() }
|
||||
|
||||
/**
|
||||
* Holds if state `s` might be inside a backtracking repetition.
|
||||
*/
|
||||
pragma[noinline]
|
||||
private predicate stateInsideRepetition(State s) {
|
||||
s.getRepr().getParent*() instanceof InfiniteRepetitionQuantifier
|
||||
}
|
||||
|
||||
/**
|
||||
* Holds if there exists a path in the NFA from `s` to a "pivot" state
|
||||
* (from a `(pivot, succ)` pair that starts the search).
|
||||
*/
|
||||
pragma[noinline]
|
||||
private predicate canReachABeginning(State s) {
|
||||
delta+(s) = any(State pivot | isStartLoops(pivot, _))
|
||||
}
|
||||
|
||||
/**
|
||||
* Holds if there exists a path in the NFA from `s` to a "succ" state
|
||||
* (from a `(pivot, succ)` pair that starts the search).
|
||||
*/
|
||||
pragma[noinline]
|
||||
private predicate canReachATarget(State s) { delta+(s) = any(State succ | isStartLoops(_, succ)) }
|
||||
}
|
||||
|
||||
/**
|
||||
* Holds if `pivot` and `succ` are a pair of loops that could be the beginning of a quadratic blowup.
|
||||
*
|
||||
* There is a slight implementation difference compared to the paper: this predicate requires that `pivot != succ`.
|
||||
* The case where `pivot = succ` causes exponential backtracking and is handled by the `js/redos` query.
|
||||
*/
|
||||
predicate isStartLoops(State pivot, State succ) {
|
||||
pivot != succ and
|
||||
succ.getRepr() instanceof InfiniteRepetitionQuantifier and
|
||||
delta+(pivot) = succ and
|
||||
(
|
||||
pivot.getRepr() instanceof InfiniteRepetitionQuantifier
|
||||
or
|
||||
pivot = mkMatch(any(RegExpRoot root))
|
||||
)
|
||||
}
|
||||
|
||||
/**
|
||||
* Gets a state for which there exists a transition in the NFA from `s'.
|
||||
*/
|
||||
State delta(State s) { delta(s, _, result) }
|
||||
|
||||
/**
|
||||
* Holds if there are transitions from the components of `q` to the corresponding
|
||||
* components of `r` labelled with `s1`, `s2`, and `s3`, respectively.
|
||||
*/
|
||||
pragma[noinline]
|
||||
predicate step(StateTuple q, InputSymbol s1, InputSymbol s2, InputSymbol s3, StateTuple r) {
|
||||
exists(State r1, State r2, State r3 |
|
||||
step(q, s1, s2, s3, r1, r2, r3) and r = MkStateTuple(r1, r2, r3)
|
||||
)
|
||||
}
|
||||
|
||||
/**
|
||||
* Holds if there are transitions from the components of `q` to `r1`, `r2`, and `r3
|
||||
* labelled with `s1`, `s2`, and `s3`, respectively.
|
||||
*/
|
||||
pragma[noopt]
|
||||
predicate step(
|
||||
StateTuple q, InputSymbol s1, InputSymbol s2, InputSymbol s3, State r1, State r2, State r3
|
||||
) {
|
||||
exists(State q1, State q2, State q3 | q.isTuple(q1, q2, q3) |
|
||||
deltaClosed(q1, s1, r1) and
|
||||
deltaClosed(q2, s2, r2) and
|
||||
deltaClosed(q3, s3, r3) and
|
||||
// use noopt to force the join on `getAThreewayIntersect` to happen last.
|
||||
exists(getAThreewayIntersect(s1, s2, s3))
|
||||
)
|
||||
}
|
||||
|
||||
/**
|
||||
* Gets a char that is matched by all the edges `s1`, `s2`, and `s3`.
|
||||
*
|
||||
* The result is not complete, and might miss some combination of edges that share some character.
|
||||
*/
|
||||
pragma[noinline]
|
||||
string getAThreewayIntersect(InputSymbol s1, InputSymbol s2, InputSymbol s3) {
|
||||
result = minAndMaxIntersect(s1, s2) and result = [intersect(s2, s3), intersect(s1, s3)]
|
||||
or
|
||||
result = minAndMaxIntersect(s1, s3) and result = [intersect(s2, s3), intersect(s1, s2)]
|
||||
or
|
||||
result = minAndMaxIntersect(s2, s3) and result = [intersect(s1, s2), intersect(s1, s3)]
|
||||
}
|
||||
|
||||
/**
|
||||
* Gets the minimum and maximum characters that intersect between `a` and `b`.
|
||||
* This predicate is used to limit the size of `getAThreewayIntersect`.
|
||||
*/
|
||||
pragma[noinline]
|
||||
string minAndMaxIntersect(InputSymbol a, InputSymbol b) {
|
||||
result = [min(intersect(a, b)), max(intersect(a, b))]
|
||||
}
|
||||
|
||||
private newtype TTrace =
|
||||
Nil() or
|
||||
Step(InputSymbol s1, InputSymbol s2, InputSymbol s3, TTrace t) {
|
||||
exists(StateTuple p |
|
||||
isReachableFromStartTuple(_, _, p, t, _) and
|
||||
step(p, s1, s2, s3, _)
|
||||
)
|
||||
or
|
||||
exists(State pivot, State succ | isStartLoops(pivot, succ) |
|
||||
t = Nil() and step(MkStateTuple(pivot, pivot, succ), s1, s2, s3, _)
|
||||
)
|
||||
}
|
||||
|
||||
/**
|
||||
* A list of tuples of input symbols that describe a path in the product automaton
|
||||
* starting from some start state.
|
||||
*/
|
||||
class Trace extends TTrace {
|
||||
/**
|
||||
* Gets a string representation of this Trace that can be used for debug purposes.
|
||||
*/
|
||||
string toString() {
|
||||
this = Nil() and result = "Nil()"
|
||||
or
|
||||
exists(InputSymbol s1, InputSymbol s2, InputSymbol s3, Trace t | this = Step(s1, s2, s3, t) |
|
||||
result = "Step(" + s1 + ", " + s2 + ", " + s3 + ", " + t + ")"
|
||||
)
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Holds if there exists a transition from `r` to `q` in the product automaton.
|
||||
* Notice that the arguments are flipped, and thus the direction is backwards.
|
||||
*/
|
||||
pragma[noinline]
|
||||
predicate tupleDeltaBackwards(StateTuple q, StateTuple r) { step(r, _, _, _, q) }
|
||||
|
||||
/**
|
||||
* Holds if `tuple` is an end state in our search.
|
||||
* That means there exists a pair of loops `(pivot, succ)` such that `tuple = (pivot, succ, succ)`.
|
||||
*/
|
||||
predicate isEndTuple(StateTuple tuple) { tuple = getAnEndTuple(_, _) }
|
||||
|
||||
/**
|
||||
* Gets the minimum length of a path from `r` to some an end state `end`.
|
||||
*
|
||||
* The implementation searches backwards from the end-tuple.
|
||||
* This approach was chosen because it is way more efficient if the first predicate given to `shortestDistances` is small.
|
||||
* The `end` argument must always be an end state.
|
||||
*/
|
||||
int distBackFromEnd(StateTuple r, StateTuple end) =
|
||||
shortestDistances(isEndTuple/1, tupleDeltaBackwards/2)(end, r, result)
|
||||
|
||||
/**
|
||||
* Holds if there exists a pair of repetitions `(pivot, succ)` in the regular expression such that:
|
||||
* `tuple` is reachable from `(pivot, pivot, succ)` in the product automaton,
|
||||
* and there is a distance of `dist` from `tuple` to the nearest end-tuple `(pivot, succ, succ)`,
|
||||
* and a path from a start-state to `tuple` follows the transitions in `trace`.
|
||||
*/
|
||||
predicate isReachableFromStartTuple(State pivot, State succ, StateTuple tuple, Trace trace, int dist) {
|
||||
// base case. The first step is inlined to start the search after all possible 1-steps, and not just the ones with the shortest path.
|
||||
exists(InputSymbol s1, InputSymbol s2, InputSymbol s3, State q1, State q2, State q3 |
|
||||
isStartLoops(pivot, succ) and
|
||||
step(MkStateTuple(pivot, pivot, succ), s1, s2, s3, tuple) and
|
||||
tuple = MkStateTuple(q1, q2, q3) and
|
||||
trace = Step(s1, s2, s3, Nil()) and
|
||||
dist = distBackFromEnd(tuple, MkStateTuple(pivot, succ, succ))
|
||||
)
|
||||
or
|
||||
// recursive case
|
||||
exists(StateTuple p, Trace v, InputSymbol s1, InputSymbol s2, InputSymbol s3 |
|
||||
isReachableFromStartTuple(pivot, succ, p, v, dist + 1) and
|
||||
dist = isReachableFromStartTupleHelper(pivot, succ, tuple, p, s1, s2, s3) and
|
||||
trace = Step(s1, s2, s3, v)
|
||||
)
|
||||
}
|
||||
|
||||
/**
|
||||
* Helper predicate for the recursive case in `isReachableFromStartTuple`.
|
||||
*/
|
||||
pragma[noinline]
|
||||
private int isReachableFromStartTupleHelper(
|
||||
State pivot, State succ, StateTuple r, StateTuple p, InputSymbol s1, InputSymbol s2,
|
||||
InputSymbol s3
|
||||
) {
|
||||
result = distBackFromEnd(r, MkStateTuple(pivot, succ, succ)) and
|
||||
step(p, s1, s2, s3, r)
|
||||
}
|
||||
|
||||
/**
|
||||
* Gets the tuple `(pivot, succ, succ)` from the product automaton.
|
||||
*/
|
||||
StateTuple getAnEndTuple(State pivot, State succ) {
|
||||
isStartLoops(pivot, succ) and
|
||||
result = MkStateTuple(pivot, succ, succ)
|
||||
}
|
||||
|
||||
private predicate hasSuffix(Trace suffix, Trace t, int i) {
|
||||
// Declaring `t` to be a `RelevantTrace` currently causes a redundant check in the
|
||||
// recursive case, so instead we check it explicitly here.
|
||||
t instanceof RelevantTrace and
|
||||
i = 0 and
|
||||
suffix = t
|
||||
or
|
||||
hasSuffix(Step(_, _, _, suffix), t, i - 1)
|
||||
}
|
||||
|
||||
pragma[noinline]
|
||||
private predicate hasTuple(InputSymbol s1, InputSymbol s2, InputSymbol s3, Trace t, int i) {
|
||||
hasSuffix(Step(s1, s2, s3, _), t, i)
|
||||
}
|
||||
|
||||
private class RelevantTrace extends Trace, Step {
|
||||
RelevantTrace() {
|
||||
exists(State pivot, State succ, StateTuple q |
|
||||
isReachableFromStartTuple(pivot, succ, q, this, _) and
|
||||
q = getAnEndTuple(pivot, succ)
|
||||
)
|
||||
}
|
||||
|
||||
pragma[noinline]
|
||||
private string getAThreewayIntersect(int i) {
|
||||
exists(InputSymbol s1, InputSymbol s2, InputSymbol s3 |
|
||||
hasTuple(s1, s2, s3, this, i) and
|
||||
result = getAThreewayIntersect(s1, s2, s3)
|
||||
)
|
||||
}
|
||||
|
||||
/** Gets a string corresponding to this trace. */
|
||||
// the pragma is needed for the case where `getAThreewayIntersect(s1, s2, s3)` has multiple values,
|
||||
// not for recursion
|
||||
language[monotonicAggregates]
|
||||
string concretise() {
|
||||
result =
|
||||
strictconcat(int i |
|
||||
hasTuple(_, _, _, this, i)
|
||||
|
|
||||
this.getAThreewayIntersect(i) order by i desc
|
||||
)
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Holds if matching repetitions of `pump` can:
|
||||
* 1) Transition from `pivot` back to `pivot`.
|
||||
* 2) Transition from `pivot` to `succ`.
|
||||
* 3) Transition from `succ` to `succ`.
|
||||
*
|
||||
* From theorem 3 in the paper linked in the top of this file we can therefore conclude that
|
||||
* the regular expression has polynomial backtracking - if a rejecting suffix exists.
|
||||
*
|
||||
* This predicate is used by `SuperLinearReDoSConfiguration`, and the final results are
|
||||
* available in the `hasReDoSResult` predicate.
|
||||
*/
|
||||
predicate isPumpable(State pivot, State succ, string pump) {
|
||||
exists(StateTuple q, RelevantTrace t |
|
||||
isReachableFromStartTuple(pivot, succ, q, t, _) and
|
||||
q = getAnEndTuple(pivot, succ) and
|
||||
pump = t.concretise()
|
||||
)
|
||||
}
|
||||
|
||||
/**
|
||||
* Holds if repetitions of `pump` at `t` will cause polynomial backtracking.
|
||||
*/
|
||||
predicate polynimalReDoS(RegExpTerm t, string pump, string prefixMsg, RegExpTerm prev) {
|
||||
exists(State s, State pivot |
|
||||
hasReDoSResult(t, pump, s, prefixMsg) and
|
||||
isPumpable(pivot, s, _) and
|
||||
prev = pivot.getRepr()
|
||||
)
|
||||
}
|
||||
|
||||
/**
|
||||
* Gets a message for why `term` can cause polynomial backtracking.
|
||||
*/
|
||||
string getReasonString(RegExpTerm term, string pump, string prefixMsg, RegExpTerm prev) {
|
||||
polynimalReDoS(term, pump, prefixMsg, prev) and
|
||||
result =
|
||||
"Strings " + prefixMsg + "with many repetitions of '" + pump +
|
||||
"' can start matching anywhere after the start of the preceeding " + prev
|
||||
}
|
||||
|
||||
/**
|
||||
* A term that may cause a regular expression engine to perform a
|
||||
* polynomial number of match attempts, relative to the input length.
|
||||
*/
|
||||
class PolynomialBackTrackingTerm extends InfiniteRepetitionQuantifier {
|
||||
string reason;
|
||||
string pump;
|
||||
string prefixMsg;
|
||||
RegExpTerm prev;
|
||||
|
||||
PolynomialBackTrackingTerm() {
|
||||
reason = getReasonString(this, pump, prefixMsg, prev) and
|
||||
// there might be many reasons for this term to have polynomial backtracking - we pick the shortest one.
|
||||
reason = min(string msg | msg = getReasonString(this, _, _, _) | msg order by msg.length(), msg)
|
||||
}
|
||||
|
||||
/**
|
||||
* Holds if all non-empty successors to the polynomial backtracking term matches the end of the line.
|
||||
*/
|
||||
predicate isAtEndLine() {
|
||||
forall(RegExpTerm succ | this.getSuccessor+() = succ and not matchesEpsilon(succ) |
|
||||
succ instanceof RegExpDollar
|
||||
)
|
||||
}
|
||||
|
||||
/**
|
||||
* Gets the string that should be repeated to cause this regular expression to perform polynomially.
|
||||
*/
|
||||
string getPumpString() { result = pump }
|
||||
|
||||
/**
|
||||
* Gets a message for which prefix a matching string must start with for this term to cause polynomial backtracking.
|
||||
*/
|
||||
string getPrefixMessage() { result = prefixMsg }
|
||||
|
||||
/**
|
||||
* Gets a predecessor to `this`, which also loops on the pump string, and thereby causes polynomial backtracking.
|
||||
*/
|
||||
RegExpTerm getPreviousLoop() { result = prev }
|
||||
|
||||
/**
|
||||
* Gets the reason for the number of match attempts.
|
||||
*/
|
||||
string getReason() { result = reason }
|
||||
}
|
||||
deprecated import semmle.python.security.regexp.SuperlinearBackTracking as Dep
|
||||
import Dep
|
||||
|
||||
@@ -0,0 +1,344 @@
|
||||
/**
|
||||
* This library implements the analysis described in the following two papers:
|
||||
*
|
||||
* James Kirrage, Asiri Rathnayake, Hayo Thielecke: Static Analysis for
|
||||
* Regular Expression Denial-of-Service Attacks. NSS 2013.
|
||||
* (http://www.cs.bham.ac.uk/~hxt/research/reg-exp-sec.pdf)
|
||||
* Asiri Rathnayake, Hayo Thielecke: Static Analysis for Regular Expression
|
||||
* Exponential Runtime via Substructural Logics. 2014.
|
||||
* (https://www.cs.bham.ac.uk/~hxt/research/redos_full.pdf)
|
||||
*
|
||||
* The basic idea is to search for overlapping cycles in the NFA, that is,
|
||||
* states `q` such that there are two distinct paths from `q` to itself
|
||||
* that consume the same word `w`.
|
||||
*
|
||||
* For any such state `q`, an attack string can be constructed as follows:
|
||||
* concatenate a prefix `v` that takes the NFA to `q` with `n` copies of
|
||||
* the word `w` that leads back to `q` along two different paths, followed
|
||||
* by a suffix `x` that is _not_ accepted in state `q`. A backtracking
|
||||
* implementation will need to explore at least 2^n different ways of going
|
||||
* from `q` back to itself while trying to match the `n` copies of `w`
|
||||
* before finally giving up.
|
||||
*
|
||||
* Now in order to identify overlapping cycles, all we have to do is find
|
||||
* pumpable forks, that is, states `q` that can transition to two different
|
||||
* states `r1` and `r2` on the same input symbol `c`, such that there are
|
||||
* paths from both `r1` and `r2` to `q` that consume the same word. The latter
|
||||
* condition is equivalent to saying that `(q, q)` is reachable from `(r1, r2)`
|
||||
* in the product NFA.
|
||||
*
|
||||
* This is what the library does. It makes a simple attempt to construct a
|
||||
* prefix `v` leading into `q`, but only to improve the alert message.
|
||||
* And the library tries to prove the existence of a suffix that ensures
|
||||
* rejection. This check might fail, which can cause false positives.
|
||||
*
|
||||
* Finally, sometimes it depends on the translation whether the NFA generated
|
||||
* for a regular expression has a pumpable fork or not. We implement one
|
||||
* particular translation, which may result in false positives or negatives
|
||||
* relative to some particular JavaScript engine.
|
||||
*
|
||||
* More precisely, the library constructs an NFA from a regular expression `r`
|
||||
* as follows:
|
||||
*
|
||||
* * Every sub-term `t` gives rise to an NFA state `Match(t,i)`, representing
|
||||
* the state of the automaton before attempting to match the `i`th character in `t`.
|
||||
* * There is one accepting state `Accept(r)`.
|
||||
* * There is a special `AcceptAnySuffix(r)` state, which accepts any suffix string
|
||||
* by using an epsilon transition to `Accept(r)` and an any transition to itself.
|
||||
* * Transitions between states may be labelled with epsilon, or an abstract
|
||||
* input symbol.
|
||||
* * Each abstract input symbol represents a set of concrete input characters:
|
||||
* either a single character, a set of characters represented by a
|
||||
* character class, or the set of all characters.
|
||||
* * The product automaton is constructed lazily, starting with pair states
|
||||
* `(q, q)` where `q` is a fork, and proceeding along an over-approximate
|
||||
* step relation.
|
||||
* * The over-approximate step relation allows transitions along pairs of
|
||||
* abstract input symbols where the symbols have overlap in the characters they accept.
|
||||
* * Once a trace of pairs of abstract input symbols that leads from a fork
|
||||
* back to itself has been identified, we attempt to construct a concrete
|
||||
* string corresponding to it, which may fail.
|
||||
* * Lastly we ensure that any state reached by repeating `n` copies of `w` has
|
||||
* a suffix `x` (possible empty) that is most likely __not__ accepted.
|
||||
*/
|
||||
|
||||
import NfaUtils
|
||||
|
||||
/**
|
||||
* Holds if state `s` might be inside a backtracking repetition.
|
||||
*/
|
||||
pragma[noinline]
|
||||
private predicate stateInsideBacktracking(State s) {
|
||||
s.getRepr().getParent*() instanceof MaybeBacktrackingRepetition
|
||||
}
|
||||
|
||||
/**
|
||||
* A infinitely repeating quantifier that might backtrack.
|
||||
*/
|
||||
private class MaybeBacktrackingRepetition extends InfiniteRepetitionQuantifier {
|
||||
MaybeBacktrackingRepetition() {
|
||||
exists(RegExpTerm child |
|
||||
child instanceof RegExpAlt or
|
||||
child instanceof RegExpQuantifier
|
||||
|
|
||||
child.getParent+() = this
|
||||
)
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* A state in the product automaton.
|
||||
*/
|
||||
private newtype TStatePair =
|
||||
/**
|
||||
* We lazily only construct those states that we are actually
|
||||
* going to need: `(q, q)` for every fork state `q`, and any
|
||||
* pair of states that can be reached from a pair that we have
|
||||
* already constructed. To cut down on the number of states,
|
||||
* we only represent states `(q1, q2)` where `q1` is lexicographically
|
||||
* no bigger than `q2`.
|
||||
*
|
||||
* States are only constructed if both states in the pair are
|
||||
* inside a repetition that might backtrack.
|
||||
*/
|
||||
MkStatePair(State q1, State q2) {
|
||||
isFork(q1, _, _, _, _) and q2 = q1
|
||||
or
|
||||
(step(_, _, _, q1, q2) or step(_, _, _, q2, q1)) and
|
||||
rankState(q1) <= rankState(q2)
|
||||
}
|
||||
|
||||
/**
|
||||
* Gets a unique number for a `state`.
|
||||
* Is used to create an ordering of states, where states with the same `toString()` will be ordered differently.
|
||||
*/
|
||||
private int rankState(State state) {
|
||||
state =
|
||||
rank[result](State s, Location l |
|
||||
l = s.getRepr().getLocation()
|
||||
|
|
||||
s order by l.getStartLine(), l.getStartColumn(), s.toString()
|
||||
)
|
||||
}
|
||||
|
||||
/**
|
||||
* A state in the product automaton.
|
||||
*/
|
||||
private class StatePair extends TStatePair {
|
||||
State q1;
|
||||
State q2;
|
||||
|
||||
StatePair() { this = MkStatePair(q1, q2) }
|
||||
|
||||
/** Gets a textual representation of this element. */
|
||||
string toString() { result = "(" + q1 + ", " + q2 + ")" }
|
||||
|
||||
/** Gets the first component of the state pair. */
|
||||
State getLeft() { result = q1 }
|
||||
|
||||
/** Gets the second component of the state pair. */
|
||||
State getRight() { result = q2 }
|
||||
}
|
||||
|
||||
/**
|
||||
* Holds for `(fork, fork)` state pairs when `isFork(fork, _, _, _, _)` holds.
|
||||
*
|
||||
* Used in `statePairDistToFork`
|
||||
*/
|
||||
private predicate isStatePairFork(StatePair p) {
|
||||
exists(State fork | p = MkStatePair(fork, fork) and isFork(fork, _, _, _, _))
|
||||
}
|
||||
|
||||
/**
|
||||
* Holds if there are transitions from the components of `q` to the corresponding
|
||||
* components of `r`.
|
||||
*
|
||||
* Used in `statePairDistToFork`
|
||||
*/
|
||||
private predicate reverseStep(StatePair r, StatePair q) { step(q, _, _, r) }
|
||||
|
||||
/**
|
||||
* Gets the minimum length of a path from `q` to `r` in the
|
||||
* product automaton.
|
||||
*/
|
||||
private int statePairDistToFork(StatePair q, StatePair r) =
|
||||
shortestDistances(isStatePairFork/1, reverseStep/2)(r, q, result)
|
||||
|
||||
/**
|
||||
* Holds if there are transitions from `q` to `r1` and from `q` to `r2`
|
||||
* labelled with `s1` and `s2`, respectively, where `s1` and `s2` do not
|
||||
* trivially have an empty intersection.
|
||||
*
|
||||
* This predicate only holds for states associated with regular expressions
|
||||
* that have at least one repetition quantifier in them (otherwise the
|
||||
* expression cannot be vulnerable to ReDoS attacks anyway).
|
||||
*/
|
||||
pragma[noopt]
|
||||
private predicate isFork(State q, InputSymbol s1, InputSymbol s2, State r1, State r2) {
|
||||
stateInsideBacktracking(q) and
|
||||
exists(State q1, State q2 |
|
||||
q1 = epsilonSucc*(q) and
|
||||
delta(q1, s1, r1) and
|
||||
q2 = epsilonSucc*(q) and
|
||||
delta(q2, s2, r2) and
|
||||
// Use pragma[noopt] to prevent intersect(s1,s2) from being the starting point of the join.
|
||||
// From (s1,s2) it would find a huge number of intermediate state pairs (q1,q2) originating from different literals,
|
||||
// and discover at the end that no `q` can reach both `q1` and `q2` by epsilon transitions.
|
||||
exists(intersect(s1, s2))
|
||||
|
|
||||
s1 != s2
|
||||
or
|
||||
r1 != r2
|
||||
or
|
||||
r1 = r2 and q1 != q2
|
||||
or
|
||||
// If q can reach itself by epsilon transitions, then there are two distinct paths to the q1/q2 state:
|
||||
// one that uses the loop and one that doesn't. The engine will separately attempt to match with each path,
|
||||
// despite ending in the same state. The "fork" thus arises from the choice of whether to use the loop or not.
|
||||
// To avoid every state in the loop becoming a fork state,
|
||||
// we arbitrarily pick the InfiniteRepetitionQuantifier state as the canonical fork state for the loop
|
||||
// (every epsilon-loop must contain such a state).
|
||||
//
|
||||
// We additionally require that the there exists another InfiniteRepetitionQuantifier `mid` on the path from `q` to itself.
|
||||
// This is done to avoid flagging regular expressions such as `/(a?)*b/` - that only has polynomial runtime, and is detected by `js/polynomial-redos`.
|
||||
// The below code is therefore a heuritic, that only flags regular expressions such as `/(a*)*b/`,
|
||||
// and does not flag regular expressions such as `/(a?b?)c/`, but the latter pattern is not used frequently.
|
||||
r1 = r2 and
|
||||
q1 = q2 and
|
||||
epsilonSucc+(q) = q and
|
||||
exists(RegExpTerm term | term = q.getRepr() | term instanceof InfiniteRepetitionQuantifier) and
|
||||
// One of the mid states is an infinite quantifier itself
|
||||
exists(State mid, RegExpTerm term |
|
||||
mid = epsilonSucc+(q) and
|
||||
term = mid.getRepr() and
|
||||
term instanceof InfiniteRepetitionQuantifier and
|
||||
q = epsilonSucc+(mid) and
|
||||
not mid = q
|
||||
)
|
||||
) and
|
||||
stateInsideBacktracking(r1) and
|
||||
stateInsideBacktracking(r2)
|
||||
}
|
||||
|
||||
/**
|
||||
* Gets the state pair `(q1, q2)` or `(q2, q1)`; note that only
|
||||
* one or the other is defined.
|
||||
*/
|
||||
private StatePair mkStatePair(State q1, State q2) {
|
||||
result = MkStatePair(q1, q2) or result = MkStatePair(q2, q1)
|
||||
}
|
||||
|
||||
/**
|
||||
* Holds if there are transitions from the components of `q` to the corresponding
|
||||
* components of `r` labelled with `s1` and `s2`, respectively.
|
||||
*/
|
||||
private predicate step(StatePair q, InputSymbol s1, InputSymbol s2, StatePair r) {
|
||||
exists(State r1, State r2 | step(q, s1, s2, r1, r2) and r = mkStatePair(r1, r2))
|
||||
}
|
||||
|
||||
/**
|
||||
* Holds if there are transitions from the components of `q` to `r1` and `r2`
|
||||
* labelled with `s1` and `s2`, respectively.
|
||||
*
|
||||
* We only consider transitions where the resulting states `(r1, r2)` are both
|
||||
* inside a repetition that might backtrack.
|
||||
*/
|
||||
pragma[noopt]
|
||||
private predicate step(StatePair q, InputSymbol s1, InputSymbol s2, State r1, State r2) {
|
||||
exists(State q1, State q2 | q.getLeft() = q1 and q.getRight() = q2 |
|
||||
deltaClosed(q1, s1, r1) and
|
||||
deltaClosed(q2, s2, r2) and
|
||||
// use noopt to force the join on `intersect` to happen last.
|
||||
exists(intersect(s1, s2))
|
||||
) and
|
||||
stateInsideBacktracking(r1) and
|
||||
stateInsideBacktracking(r2)
|
||||
}
|
||||
|
||||
private newtype TTrace =
|
||||
Nil() or
|
||||
Step(InputSymbol s1, InputSymbol s2, TTrace t) { isReachableFromFork(_, _, s1, s2, t, _) }
|
||||
|
||||
/**
|
||||
* A list of pairs of input symbols that describe a path in the product automaton
|
||||
* starting from some fork state.
|
||||
*/
|
||||
private class Trace extends TTrace {
|
||||
/** Gets a textual representation of this element. */
|
||||
string toString() {
|
||||
this = Nil() and result = "Nil()"
|
||||
or
|
||||
exists(InputSymbol s1, InputSymbol s2, Trace t | this = Step(s1, s2, t) |
|
||||
result = "Step(" + s1 + ", " + s2 + ", " + t + ")"
|
||||
)
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Holds if `r` is reachable from `(fork, fork)` under input `w`, and there is
|
||||
* a path from `r` back to `(fork, fork)` with `rem` steps.
|
||||
*/
|
||||
private predicate isReachableFromFork(State fork, StatePair r, Trace w, int rem) {
|
||||
exists(InputSymbol s1, InputSymbol s2, Trace v |
|
||||
isReachableFromFork(fork, r, s1, s2, v, rem) and
|
||||
w = Step(s1, s2, v)
|
||||
)
|
||||
}
|
||||
|
||||
private predicate isReachableFromFork(
|
||||
State fork, StatePair r, InputSymbol s1, InputSymbol s2, Trace v, int rem
|
||||
) {
|
||||
// base case
|
||||
exists(State q1, State q2 |
|
||||
isFork(fork, s1, s2, q1, q2) and
|
||||
r = MkStatePair(q1, q2) and
|
||||
v = Nil() and
|
||||
rem = statePairDistToFork(r, MkStatePair(fork, fork))
|
||||
)
|
||||
or
|
||||
// recursive case
|
||||
exists(StatePair p |
|
||||
isReachableFromFork(fork, p, v, rem + 1) and
|
||||
step(p, s1, s2, r) and
|
||||
rem = statePairDistToFork(r, MkStatePair(fork, fork))
|
||||
)
|
||||
}
|
||||
|
||||
/**
|
||||
* Gets a state in the product automaton from which `(fork, fork)` is
|
||||
* reachable in zero or more epsilon transitions.
|
||||
*/
|
||||
private StatePair getAForkPair(State fork) {
|
||||
isFork(fork, _, _, _, _) and
|
||||
result = MkStatePair(epsilonPred*(fork), epsilonPred*(fork))
|
||||
}
|
||||
|
||||
/** An implementation of a chain containing chars for use by `Concretizer`. */
|
||||
private module CharTreeImpl implements CharTree {
|
||||
class CharNode = Trace;
|
||||
|
||||
CharNode getPrev(CharNode t) { t = Step(_, _, result) }
|
||||
|
||||
/** Holds if `n` is a trace that is used by `concretize` in `isPumpable`. */
|
||||
predicate isARelevantEnd(CharNode n) {
|
||||
exists(State f | isReachableFromFork(f, getAForkPair(f), n, _))
|
||||
}
|
||||
|
||||
string getChar(CharNode t) {
|
||||
exists(InputSymbol s1, InputSymbol s2 | t = Step(s1, s2, _) | result = intersect(s1, s2))
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Holds if `fork` is a pumpable fork with word `w`.
|
||||
*/
|
||||
private predicate isPumpable(State fork, string w) {
|
||||
exists(StatePair q, Trace t |
|
||||
isReachableFromFork(fork, q, t, _) and
|
||||
q = getAForkPair(fork) and
|
||||
w = Concretizer<CharTreeImpl>::concretize(t)
|
||||
)
|
||||
}
|
||||
|
||||
/** Holds if `state` has exponential ReDoS */
|
||||
predicate hasReDoSResult = ReDoSPruning<isPumpable/2>::hasReDoSResult/4;
|
||||
1319
python/ql/lib/semmle/python/security/regexp/NfaUtils.qll
Normal file
1319
python/ql/lib/semmle/python/security/regexp/NfaUtils.qll
Normal file
File diff suppressed because it is too large
Load Diff
@@ -1,5 +1,5 @@
|
||||
/**
|
||||
* Provides Python-specific definitions for use in the ReDoSUtil module.
|
||||
* Provides Python-specific definitions for use in the NfaUtils module.
|
||||
*/
|
||||
|
||||
import python
|
||||
157
python/ql/lib/semmle/python/security/regexp/RegexpMatching.qll
Normal file
157
python/ql/lib/semmle/python/security/regexp/RegexpMatching.qll
Normal file
@@ -0,0 +1,157 @@
|
||||
/**
|
||||
* Provides precicates for reasoning about which strings are matched by a regular expression,
|
||||
* and for testing which capture groups are filled when a particular regexp matches a string.
|
||||
*/
|
||||
|
||||
import NfaUtils
|
||||
|
||||
/** A root term */
|
||||
class RootTerm extends RegExpTerm {
|
||||
RootTerm() { this.isRootTerm() }
|
||||
}
|
||||
|
||||
/**
|
||||
* Holds if it should be tested whether `root` matches `str`.
|
||||
*
|
||||
* If `ignorePrefix` is true, then a regexp without a start anchor will be treated as if it had a start anchor.
|
||||
* E.g. a regular expression `/foo$/` will match any string that ends with "foo",
|
||||
* but if `ignorePrefix` is true, it will only match "foo".
|
||||
*
|
||||
* If `testWithGroups` is true, then the `RegexpMatching::fillsCaptureGroup` predicate can be used to determine which capture
|
||||
* groups are filled by a string.
|
||||
*/
|
||||
signature predicate isRegexpMatchingCandidateSig(
|
||||
RootTerm root, string str, boolean ignorePrefix, boolean testWithGroups
|
||||
);
|
||||
|
||||
/**
|
||||
* A module for determining if a regexp matches a given string,
|
||||
* and reasoning about which capture groups are filled by a given string.
|
||||
*
|
||||
* The module parameter `isCandidate` determines which strings should be tested,
|
||||
* and the results can be read from the `matches` and `fillsCaptureGroup` predicates.
|
||||
*/
|
||||
module RegexpMatching<isRegexpMatchingCandidateSig/4 isCandidate> {
|
||||
/**
|
||||
* Gets a state the regular expression `reg` can be in after matching the `i`th char in `str`.
|
||||
* The regular expression is modeled as a non-determistic finite automaton,
|
||||
* the regular expression can therefore be in multiple states after matching a character.
|
||||
*
|
||||
* It's a forward search to all possible states, and there is thus no guarantee that the state is on a path to an accepting state.
|
||||
*/
|
||||
private State getAState(RootTerm reg, int i, string str, boolean ignorePrefix) {
|
||||
// start state, the -1 position before any chars have been matched
|
||||
i = -1 and
|
||||
isCandidate(reg, str, ignorePrefix, _) and
|
||||
result.getRepr().getRootTerm() = reg and
|
||||
isStartState(result)
|
||||
or
|
||||
// recursive case
|
||||
result = getAStateAfterMatching(reg, _, str, i, _, ignorePrefix)
|
||||
}
|
||||
|
||||
/**
|
||||
* Gets the next state after the `prev` state from `reg`.
|
||||
* `prev` is the state after matching `fromIndex` chars in `str`,
|
||||
* and the result is the state after matching `toIndex` chars in `str`.
|
||||
*
|
||||
* This predicate is used as a step relation in the forwards search (`getAState`),
|
||||
* and also as a step relation in the later backwards search (`getAStateThatReachesAccept`).
|
||||
*/
|
||||
private State getAStateAfterMatching(
|
||||
RootTerm reg, State prev, string str, int toIndex, int fromIndex, boolean ignorePrefix
|
||||
) {
|
||||
// the basic recursive case - outlined into a noopt helper to make performance work out.
|
||||
result = getAStateAfterMatchingAux(reg, prev, str, toIndex, fromIndex, ignorePrefix)
|
||||
or
|
||||
// we can skip past word boundaries if the next char is a non-word char.
|
||||
fromIndex = toIndex and
|
||||
prev.getRepr() instanceof RegExpWordBoundary and
|
||||
prev = getAState(reg, toIndex, str, ignorePrefix) and
|
||||
after(prev.getRepr()) = result and
|
||||
str.charAt(toIndex + 1).regexpMatch("\\W") // \W matches any non-word char.
|
||||
}
|
||||
|
||||
pragma[noopt]
|
||||
private State getAStateAfterMatchingAux(
|
||||
RootTerm reg, State prev, string str, int toIndex, int fromIndex, boolean ignorePrefix
|
||||
) {
|
||||
prev = getAState(reg, fromIndex, str, ignorePrefix) and
|
||||
fromIndex = toIndex - 1 and
|
||||
exists(string char | char = str.charAt(toIndex) | specializedDeltaClosed(prev, char, result)) and
|
||||
not discardedPrefixStep(prev, result, ignorePrefix)
|
||||
}
|
||||
|
||||
/** Holds if a step from `prev` to `next` should be discarded when the `ignorePrefix` flag is set. */
|
||||
private predicate discardedPrefixStep(State prev, State next, boolean ignorePrefix) {
|
||||
prev = mkMatch(any(RegExpRoot r)) and
|
||||
ignorePrefix = true and
|
||||
next = prev
|
||||
}
|
||||
|
||||
// The `deltaClosed` relation specialized to the chars that exists in strings tested by a `MatchedRegExp`.
|
||||
private predicate specializedDeltaClosed(State prev, string char, State next) {
|
||||
deltaClosed(prev, specializedGetAnInputSymbolMatching(char), next)
|
||||
}
|
||||
|
||||
// The `getAnInputSymbolMatching` relation specialized to the chars that exists in strings tested by a `MatchedRegExp`.
|
||||
pragma[noinline]
|
||||
private InputSymbol specializedGetAnInputSymbolMatching(string char) {
|
||||
exists(string s, RootTerm r | isCandidate(r, s, _, _) | char = s.charAt(_)) and
|
||||
result = getAnInputSymbolMatching(char)
|
||||
}
|
||||
|
||||
/**
|
||||
* Gets the `i`th state on a path to the accepting state when `reg` matches `str`.
|
||||
* Starts with an accepting state as found by `getAState` and searches backwards
|
||||
* to the start state through the reachable states (as found by `getAState`).
|
||||
*
|
||||
* This predicate satisfies the invariant that the result state can be reached with `i` steps from a start state,
|
||||
* and an accepting state can be found after (`str.length() - 1 - i`) steps from the result.
|
||||
* The result state is therefore always on a valid path where `reg` accepts `str`.
|
||||
*
|
||||
* This predicate is only used to find which capture groups a regular expression has filled,
|
||||
* and thus the search is only performed for the strings in the `testWithGroups(..)` predicate.
|
||||
*/
|
||||
private State getAStateThatReachesAccept(RootTerm reg, int i, string str, boolean ignorePrefix) {
|
||||
// base case, reaches an accepting state from the last state in `getAState(..)`
|
||||
isCandidate(reg, str, ignorePrefix, true) and
|
||||
i = str.length() - 1 and
|
||||
result = getAState(reg, i, str, ignorePrefix) and
|
||||
epsilonSucc*(result) = Accept(_)
|
||||
or
|
||||
// recursive case. `next` is the next state to be matched after matching `prev`.
|
||||
// this predicate is doing a backwards search, so `prev` is the result we are looking for.
|
||||
exists(State next, State prev, int fromIndex, int toIndex |
|
||||
next = getAStateThatReachesAccept(reg, toIndex, str, ignorePrefix) and
|
||||
next = getAStateAfterMatching(reg, prev, str, toIndex, fromIndex, ignorePrefix) and
|
||||
i = fromIndex and
|
||||
result = prev
|
||||
)
|
||||
}
|
||||
|
||||
/** Gets the capture group number that `term` belongs to. */
|
||||
private int group(RegExpTerm term) {
|
||||
exists(RegExpGroup grp | grp.getNumber() = result | term.getParent*() = grp)
|
||||
}
|
||||
|
||||
/**
|
||||
* Holds if `reg` matches `str`, where `str` is in the `isCandidate` predicate.
|
||||
*/
|
||||
predicate matches(RootTerm reg, string str) {
|
||||
exists(State state | state = getAState(reg, str.length() - 1, str, _) |
|
||||
epsilonSucc*(state) = Accept(_)
|
||||
)
|
||||
}
|
||||
|
||||
/**
|
||||
* Holds if matching `str` against `reg` may fill capture group number `g`.
|
||||
* Only holds if `str` is in the `testWithGroups` predicate.
|
||||
*/
|
||||
predicate fillsCaptureGroup(RootTerm reg, string str, int g) {
|
||||
exists(State s |
|
||||
s = getAStateThatReachesAccept(reg, _, str, _) and
|
||||
g = group(s.getRepr())
|
||||
)
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,418 @@
|
||||
/**
|
||||
* Provides classes for working with regular expressions that can
|
||||
* perform backtracking in superlinear time.
|
||||
*/
|
||||
|
||||
import NfaUtils
|
||||
|
||||
/*
|
||||
* This module implements the analysis described in the paper:
|
||||
* Valentin Wustholz, Oswaldo Olivo, Marijn J. H. Heule, and Isil Dillig:
|
||||
* Static Detection of DoS Vulnerabilities in
|
||||
* Programs that use Regular Expressions
|
||||
* (Extended Version).
|
||||
* (https://arxiv.org/pdf/1701.04045.pdf)
|
||||
*
|
||||
* Theorem 3 from the paper describes the basic idea.
|
||||
*
|
||||
* The following explains the idea using variables and predicate names that are used in the implementation:
|
||||
* We consider a pair of repetitions, which we will call `pivot` and `succ`.
|
||||
*
|
||||
* We create a product automaton of 3-tuples of states (see `StateTuple`).
|
||||
* There exists a transition `(a,b,c) -> (d,e,f)` in the product automaton
|
||||
* iff there exists three transitions in the NFA `a->d, b->e, c->f` where those three
|
||||
* transitions all match a shared character `char`. (see `getAThreewayIntersect`)
|
||||
*
|
||||
* We start a search in the product automaton at `(pivot, pivot, succ)`,
|
||||
* and search for a series of transitions (a `Trace`), such that we end
|
||||
* at `(pivot, succ, succ)` (see `isReachableFromStartTuple`).
|
||||
*
|
||||
* For example, consider the regular expression `/^\d*5\w*$/`.
|
||||
* The search will start at the tuple `(\d*, \d*, \w*)` and search
|
||||
* for a path to `(\d*, \w*, \w*)`.
|
||||
* This path exists, and consists of a single transition in the product automaton,
|
||||
* where the three corresponding NFA edges all match the character `"5"`.
|
||||
*
|
||||
* The start-state in the NFA has an any-transition to itself, this allows us to
|
||||
* flag regular expressions such as `/a*$/` - which does not have a start anchor -
|
||||
* and can thus start matching anywhere.
|
||||
*
|
||||
* The implementation is not perfect.
|
||||
* It has the same suffix detection issue as the `js/redos` query, which can cause false positives.
|
||||
* It also doesn't find all transitions in the product automaton, which can cause false negatives.
|
||||
*/
|
||||
|
||||
/**
|
||||
* Gets any root (start) state of a regular expression.
|
||||
*/
|
||||
private State getRootState() { result = mkMatch(any(RegExpRoot r)) }
|
||||
|
||||
private newtype TStateTuple =
|
||||
MkStateTuple(State q1, State q2, State q3) {
|
||||
// starts at (pivot, pivot, succ)
|
||||
isStartLoops(q1, q3) and q1 = q2
|
||||
or
|
||||
step(_, _, _, _, q1, q2, q3) and FeasibleTuple::isFeasibleTuple(q1, q2, q3)
|
||||
}
|
||||
|
||||
/**
|
||||
* A state in the product automaton.
|
||||
* The product automaton contains 3-tuples of states.
|
||||
*
|
||||
* We lazily only construct those states that we are actually
|
||||
* going to need.
|
||||
* Either a start state `(pivot, pivot, succ)`, or a state
|
||||
* where there exists a transition from an already existing state.
|
||||
*
|
||||
* The exponential variant of this query (`js/redos`) uses an optimization
|
||||
* trick where `q1 <= q2`. This trick cannot be used here as the order
|
||||
* of the elements matter.
|
||||
*/
|
||||
class StateTuple extends TStateTuple {
|
||||
State q1;
|
||||
State q2;
|
||||
State q3;
|
||||
|
||||
StateTuple() { this = MkStateTuple(q1, q2, q3) }
|
||||
|
||||
/**
|
||||
* Gest a string repesentation of this tuple.
|
||||
*/
|
||||
string toString() { result = "(" + q1 + ", " + q2 + ", " + q3 + ")" }
|
||||
|
||||
/**
|
||||
* Holds if this tuple is `(r1, r2, r3)`.
|
||||
*/
|
||||
pragma[noinline]
|
||||
predicate isTuple(State r1, State r2, State r3) { r1 = q1 and r2 = q2 and r3 = q3 }
|
||||
}
|
||||
|
||||
/**
|
||||
* A module for determining feasible tuples for the product automaton.
|
||||
*
|
||||
* The implementation is split into many predicates for performance reasons.
|
||||
*/
|
||||
private module FeasibleTuple {
|
||||
/**
|
||||
* Holds if the tuple `(r1, r2, r3)` might be on path from a start-state to an end-state in the product automaton.
|
||||
*/
|
||||
pragma[inline]
|
||||
predicate isFeasibleTuple(State r1, State r2, State r3) {
|
||||
// The first element is either inside a repetition (or the start state itself)
|
||||
isRepetitionOrStart(r1) and
|
||||
// The last element is inside a repetition
|
||||
stateInsideRepetition(r3) and
|
||||
// The states are reachable in the NFA in the order r1 -> r2 -> r3
|
||||
delta+(r1) = r2 and
|
||||
delta+(r2) = r3 and
|
||||
// The first element can reach a beginning (the "pivot" state in a `(pivot, succ)` pair).
|
||||
canReachABeginning(r1) and
|
||||
// The last element can reach a target (the "succ" state in a `(pivot, succ)` pair).
|
||||
canReachATarget(r3)
|
||||
}
|
||||
|
||||
/**
|
||||
* Holds if `s` is either inside a repetition, or is the start state (which is a repetition).
|
||||
*/
|
||||
pragma[noinline]
|
||||
private predicate isRepetitionOrStart(State s) { stateInsideRepetition(s) or s = getRootState() }
|
||||
|
||||
/**
|
||||
* Holds if state `s` might be inside a backtracking repetition.
|
||||
*/
|
||||
pragma[noinline]
|
||||
private predicate stateInsideRepetition(State s) {
|
||||
s.getRepr().getParent*() instanceof InfiniteRepetitionQuantifier
|
||||
}
|
||||
|
||||
/**
|
||||
* Holds if there exists a path in the NFA from `s` to a "pivot" state
|
||||
* (from a `(pivot, succ)` pair that starts the search).
|
||||
*/
|
||||
pragma[noinline]
|
||||
private predicate canReachABeginning(State s) {
|
||||
delta+(s) = any(State pivot | isStartLoops(pivot, _))
|
||||
}
|
||||
|
||||
/**
|
||||
* Holds if there exists a path in the NFA from `s` to a "succ" state
|
||||
* (from a `(pivot, succ)` pair that starts the search).
|
||||
*/
|
||||
pragma[noinline]
|
||||
private predicate canReachATarget(State s) { delta+(s) = any(State succ | isStartLoops(_, succ)) }
|
||||
}
|
||||
|
||||
/**
|
||||
* Holds if `pivot` and `succ` are a pair of loops that could be the beginning of a quadratic blowup.
|
||||
*
|
||||
* There is a slight implementation difference compared to the paper: this predicate requires that `pivot != succ`.
|
||||
* The case where `pivot = succ` causes exponential backtracking and is handled by the `js/redos` query.
|
||||
*/
|
||||
predicate isStartLoops(State pivot, State succ) {
|
||||
pivot != succ and
|
||||
succ.getRepr() instanceof InfiniteRepetitionQuantifier and
|
||||
delta+(pivot) = succ and
|
||||
(
|
||||
pivot.getRepr() instanceof InfiniteRepetitionQuantifier
|
||||
or
|
||||
pivot = mkMatch(any(RegExpRoot root))
|
||||
)
|
||||
}
|
||||
|
||||
/**
|
||||
* Gets a state for which there exists a transition in the NFA from `s'.
|
||||
*/
|
||||
State delta(State s) { delta(s, _, result) }
|
||||
|
||||
/**
|
||||
* Holds if there are transitions from the components of `q` to the corresponding
|
||||
* components of `r` labelled with `s1`, `s2`, and `s3`, respectively.
|
||||
*/
|
||||
pragma[noinline]
|
||||
predicate step(StateTuple q, InputSymbol s1, InputSymbol s2, InputSymbol s3, StateTuple r) {
|
||||
exists(State r1, State r2, State r3 |
|
||||
step(q, s1, s2, s3, r1, r2, r3) and r = MkStateTuple(r1, r2, r3)
|
||||
)
|
||||
}
|
||||
|
||||
/**
|
||||
* Holds if there are transitions from the components of `q` to `r1`, `r2`, and `r3
|
||||
* labelled with `s1`, `s2`, and `s3`, respectively.
|
||||
*/
|
||||
pragma[noopt]
|
||||
predicate step(
|
||||
StateTuple q, InputSymbol s1, InputSymbol s2, InputSymbol s3, State r1, State r2, State r3
|
||||
) {
|
||||
exists(State q1, State q2, State q3 | q.isTuple(q1, q2, q3) |
|
||||
deltaClosed(q1, s1, r1) and
|
||||
deltaClosed(q2, s2, r2) and
|
||||
deltaClosed(q3, s3, r3) and
|
||||
// use noopt to force the join on `getAThreewayIntersect` to happen last.
|
||||
exists(getAThreewayIntersect(s1, s2, s3))
|
||||
)
|
||||
}
|
||||
|
||||
/**
|
||||
* Gets a char that is matched by all the edges `s1`, `s2`, and `s3`.
|
||||
*
|
||||
* The result is not complete, and might miss some combination of edges that share some character.
|
||||
*/
|
||||
pragma[noinline]
|
||||
string getAThreewayIntersect(InputSymbol s1, InputSymbol s2, InputSymbol s3) {
|
||||
result = minAndMaxIntersect(s1, s2) and result = [intersect(s2, s3), intersect(s1, s3)]
|
||||
or
|
||||
result = minAndMaxIntersect(s1, s3) and result = [intersect(s2, s3), intersect(s1, s2)]
|
||||
or
|
||||
result = minAndMaxIntersect(s2, s3) and result = [intersect(s1, s2), intersect(s1, s3)]
|
||||
}
|
||||
|
||||
/**
|
||||
* Gets the minimum and maximum characters that intersect between `a` and `b`.
|
||||
* This predicate is used to limit the size of `getAThreewayIntersect`.
|
||||
*/
|
||||
pragma[noinline]
|
||||
string minAndMaxIntersect(InputSymbol a, InputSymbol b) {
|
||||
result = [min(intersect(a, b)), max(intersect(a, b))]
|
||||
}
|
||||
|
||||
private newtype TTrace =
|
||||
Nil() or
|
||||
Step(InputSymbol s1, InputSymbol s2, InputSymbol s3, TTrace t) {
|
||||
isReachableFromStartTuple(_, _, t, s1, s2, s3, _, _)
|
||||
}
|
||||
|
||||
/**
|
||||
* A list of tuples of input symbols that describe a path in the product automaton
|
||||
* starting from some start state.
|
||||
*/
|
||||
class Trace extends TTrace {
|
||||
/**
|
||||
* Gets a string representation of this Trace that can be used for debug purposes.
|
||||
*/
|
||||
string toString() {
|
||||
this = Nil() and result = "Nil()"
|
||||
or
|
||||
exists(InputSymbol s1, InputSymbol s2, InputSymbol s3, Trace t | this = Step(s1, s2, s3, t) |
|
||||
result = "Step(" + s1 + ", " + s2 + ", " + s3 + ", " + t + ")"
|
||||
)
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Holds if there exists a transition from `r` to `q` in the product automaton.
|
||||
* Notice that the arguments are flipped, and thus the direction is backwards.
|
||||
*/
|
||||
pragma[noinline]
|
||||
predicate tupleDeltaBackwards(StateTuple q, StateTuple r) { step(r, _, _, _, q) }
|
||||
|
||||
/**
|
||||
* Holds if `tuple` is an end state in our search.
|
||||
* That means there exists a pair of loops `(pivot, succ)` such that `tuple = (pivot, succ, succ)`.
|
||||
*/
|
||||
predicate isEndTuple(StateTuple tuple) { tuple = getAnEndTuple(_, _) }
|
||||
|
||||
/**
|
||||
* Gets the minimum length of a path from `r` to some an end state `end`.
|
||||
*
|
||||
* The implementation searches backwards from the end-tuple.
|
||||
* This approach was chosen because it is way more efficient if the first predicate given to `shortestDistances` is small.
|
||||
* The `end` argument must always be an end state.
|
||||
*/
|
||||
int distBackFromEnd(StateTuple r, StateTuple end) =
|
||||
shortestDistances(isEndTuple/1, tupleDeltaBackwards/2)(end, r, result)
|
||||
|
||||
/**
|
||||
* Holds if there exists a pair of repetitions `(pivot, succ)` in the regular expression such that:
|
||||
* `tuple` is reachable from `(pivot, pivot, succ)` in the product automaton,
|
||||
* and there is a distance of `dist` from `tuple` to the nearest end-tuple `(pivot, succ, succ)`,
|
||||
* and a path from a start-state to `tuple` follows the transitions in `trace`.
|
||||
*/
|
||||
private predicate isReachableFromStartTuple(
|
||||
State pivot, State succ, StateTuple tuple, Trace trace, int dist
|
||||
) {
|
||||
exists(InputSymbol s1, InputSymbol s2, InputSymbol s3, Trace v |
|
||||
isReachableFromStartTuple(pivot, succ, v, s1, s2, s3, tuple, dist) and
|
||||
trace = Step(s1, s2, s3, v)
|
||||
)
|
||||
}
|
||||
|
||||
private predicate isReachableFromStartTuple(
|
||||
State pivot, State succ, Trace trace, InputSymbol s1, InputSymbol s2, InputSymbol s3,
|
||||
StateTuple tuple, int dist
|
||||
) {
|
||||
// base case.
|
||||
exists(State q1, State q2, State q3 |
|
||||
isStartLoops(pivot, succ) and
|
||||
step(MkStateTuple(pivot, pivot, succ), s1, s2, s3, tuple) and
|
||||
tuple = MkStateTuple(q1, q2, q3) and
|
||||
trace = Nil() and
|
||||
dist = distBackFromEnd(tuple, MkStateTuple(pivot, succ, succ))
|
||||
)
|
||||
or
|
||||
// recursive case
|
||||
exists(StateTuple p |
|
||||
isReachableFromStartTuple(pivot, succ, p, trace, dist + 1) and
|
||||
dist = distBackFromEnd(tuple, MkStateTuple(pivot, succ, succ)) and
|
||||
step(p, s1, s2, s3, tuple)
|
||||
)
|
||||
}
|
||||
|
||||
/**
|
||||
* Gets the tuple `(pivot, succ, succ)` from the product automaton.
|
||||
*/
|
||||
StateTuple getAnEndTuple(State pivot, State succ) {
|
||||
isStartLoops(pivot, succ) and
|
||||
result = MkStateTuple(pivot, succ, succ)
|
||||
}
|
||||
|
||||
/** An implementation of a chain containing chars for use by `Concretizer`. */
|
||||
private module CharTreeImpl implements CharTree {
|
||||
class CharNode = Trace;
|
||||
|
||||
CharNode getPrev(CharNode t) { t = Step(_, _, _, result) }
|
||||
|
||||
/** Holds if `n` is used in `isPumpable`. */
|
||||
predicate isARelevantEnd(CharNode n) {
|
||||
exists(State pivot, State succ |
|
||||
isReachableFromStartTuple(pivot, succ, getAnEndTuple(pivot, succ), n, _)
|
||||
)
|
||||
}
|
||||
|
||||
string getChar(CharNode t) {
|
||||
exists(InputSymbol s1, InputSymbol s2, InputSymbol s3 | t = Step(s1, s2, s3, _) |
|
||||
result = getAThreewayIntersect(s1, s2, s3)
|
||||
)
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Holds if matching repetitions of `pump` can:
|
||||
* 1) Transition from `pivot` back to `pivot`.
|
||||
* 2) Transition from `pivot` to `succ`.
|
||||
* 3) Transition from `succ` to `succ`.
|
||||
*
|
||||
* From theorem 3 in the paper linked in the top of this file we can therefore conclude that
|
||||
* the regular expression has polynomial backtracking - if a rejecting suffix exists.
|
||||
*
|
||||
* This predicate is used by `SuperLinearReDoSConfiguration`, and the final results are
|
||||
* available in the `hasReDoSResult` predicate.
|
||||
*/
|
||||
predicate isPumpable(State pivot, State succ, string pump) {
|
||||
exists(StateTuple q, Trace t |
|
||||
isReachableFromStartTuple(pivot, succ, q, t, _) and
|
||||
q = getAnEndTuple(pivot, succ) and
|
||||
pump = Concretizer<CharTreeImpl>::concretize(t)
|
||||
)
|
||||
}
|
||||
|
||||
/**
|
||||
* Holds if states starting in `state` can have polynomial backtracking with the string `pump`.
|
||||
*/
|
||||
predicate isReDoSCandidate(State state, string pump) { isPumpable(_, state, pump) }
|
||||
|
||||
/**
|
||||
* Holds if repetitions of `pump` at `t` will cause polynomial backtracking.
|
||||
*/
|
||||
predicate polynomialReDoS(RegExpTerm t, string pump, string prefixMsg, RegExpTerm prev) {
|
||||
exists(State s, State pivot |
|
||||
ReDoSPruning<isReDoSCandidate/2>::hasReDoSResult(t, pump, s, prefixMsg) and
|
||||
isPumpable(pivot, s, _) and
|
||||
prev = pivot.getRepr()
|
||||
)
|
||||
}
|
||||
|
||||
/**
|
||||
* Gets a message for why `term` can cause polynomial backtracking.
|
||||
*/
|
||||
string getReasonString(RegExpTerm term, string pump, string prefixMsg, RegExpTerm prev) {
|
||||
polynomialReDoS(term, pump, prefixMsg, prev) and
|
||||
result =
|
||||
"Strings " + prefixMsg + "with many repetitions of '" + pump +
|
||||
"' can start matching anywhere after the start of the preceeding " + prev
|
||||
}
|
||||
|
||||
/**
|
||||
* A term that may cause a regular expression engine to perform a
|
||||
* polynomial number of match attempts, relative to the input length.
|
||||
*/
|
||||
class PolynomialBackTrackingTerm extends InfiniteRepetitionQuantifier {
|
||||
string reason;
|
||||
string pump;
|
||||
string prefixMsg;
|
||||
RegExpTerm prev;
|
||||
|
||||
PolynomialBackTrackingTerm() {
|
||||
reason = getReasonString(this, pump, prefixMsg, prev) and
|
||||
// there might be many reasons for this term to have polynomial backtracking - we pick the shortest one.
|
||||
reason = min(string msg | msg = getReasonString(this, _, _, _) | msg order by msg.length(), msg)
|
||||
}
|
||||
|
||||
/**
|
||||
* Holds if all non-empty successors to the polynomial backtracking term matches the end of the line.
|
||||
*/
|
||||
predicate isAtEndLine() {
|
||||
forall(RegExpTerm succ | this.getSuccessor+() = succ and not matchesEpsilon(succ) |
|
||||
succ instanceof RegExpDollar
|
||||
)
|
||||
}
|
||||
|
||||
/**
|
||||
* Gets the string that should be repeated to cause this regular expression to perform polynomially.
|
||||
*/
|
||||
string getPumpString() { result = pump }
|
||||
|
||||
/**
|
||||
* Gets a message for which prefix a matching string must start with for this term to cause polynomial backtracking.
|
||||
*/
|
||||
string getPrefixMessage() { result = prefixMsg }
|
||||
|
||||
/**
|
||||
* Gets a predecessor to `this`, which also loops on the pump string, and thereby causes polynomial backtracking.
|
||||
*/
|
||||
RegExpTerm getPreviousLoop() { result = prev }
|
||||
|
||||
/**
|
||||
* Gets the reason for the number of match attempts.
|
||||
*/
|
||||
string getReason() { result = reason }
|
||||
}
|
||||
0
python/ql/lib/semmle/python/security/strings/Basic.qll
Executable file → Normal file
0
python/ql/lib/semmle/python/security/strings/Basic.qll
Executable file → Normal file
@@ -26,9 +26,21 @@ class ImportTimeScope extends Scope {
|
||||
|
||||
/** Gets the global variable that is used during lookup, should `var` be undefined. */
|
||||
GlobalVariable getOuterVariable(LocalVariable var) {
|
||||
this instanceof Class and
|
||||
var.getScope() = this and
|
||||
result.getScope() = this.getEnclosingModule() and
|
||||
var.getId() = result.getId()
|
||||
exists(string name |
|
||||
class_var_scope(this, name, var) and
|
||||
global_var_scope(name, this.getEnclosingModule(), result)
|
||||
)
|
||||
}
|
||||
}
|
||||
|
||||
pragma[nomagic]
|
||||
private predicate global_var_scope(string name, Scope scope, GlobalVariable var) {
|
||||
var.getScope() = scope and
|
||||
var.getId() = name
|
||||
}
|
||||
|
||||
pragma[nomagic]
|
||||
private predicate class_var_scope(Class cls, string name, LocalVariable var) {
|
||||
var.getScope() = cls and
|
||||
var.getId() = name
|
||||
}
|
||||
|
||||
107
python/ql/lib/semmle/python/xml/XML.qll
Executable file → Normal file
107
python/ql/lib/semmle/python/xml/XML.qll
Executable file → Normal file
@@ -8,7 +8,7 @@ private class TXmlLocatable =
|
||||
@xmldtd or @xmlelement or @xmlattribute or @xmlnamespace or @xmlcomment or @xmlcharacters;
|
||||
|
||||
/** An XML element that has a location. */
|
||||
class XMLLocatable extends @xmllocatable, TXmlLocatable {
|
||||
class XmlLocatable extends @xmllocatable, TXmlLocatable {
|
||||
/** Gets the source location for this element. */
|
||||
Location getLocation() { xmllocations(this, result) }
|
||||
|
||||
@@ -32,13 +32,16 @@ class XMLLocatable extends @xmllocatable, TXmlLocatable {
|
||||
string toString() { none() } // overridden in subclasses
|
||||
}
|
||||
|
||||
/** DEPRECATED: Alias for XmlLocatable */
|
||||
deprecated class XMLLocatable = XmlLocatable;
|
||||
|
||||
/**
|
||||
* An `XMLParent` is either an `XMLElement` or an `XMLFile`,
|
||||
* An `XmlParent` is either an `XmlElement` or an `XmlFile`,
|
||||
* both of which can contain other elements.
|
||||
*/
|
||||
class XMLParent extends @xmlparent {
|
||||
XMLParent() {
|
||||
// explicitly restrict `this` to be either an `XMLElement` or an `XMLFile`;
|
||||
class XmlParent extends @xmlparent {
|
||||
XmlParent() {
|
||||
// explicitly restrict `this` to be either an `XmlElement` or an `XmlFile`;
|
||||
// the type `@xmlparent` currently also includes non-XML files
|
||||
this instanceof @xmlelement or xmlEncoding(this, _)
|
||||
}
|
||||
@@ -50,28 +53,28 @@ class XMLParent extends @xmlparent {
|
||||
string getName() { none() } // overridden in subclasses
|
||||
|
||||
/** Gets the file to which this XML parent belongs. */
|
||||
XMLFile getFile() { result = this or xmlElements(this, _, _, _, result) }
|
||||
XmlFile getFile() { result = this or xmlElements(this, _, _, _, result) }
|
||||
|
||||
/** Gets the child element at a specified index of this XML parent. */
|
||||
XMLElement getChild(int index) { xmlElements(result, _, this, index, _) }
|
||||
XmlElement getChild(int index) { xmlElements(result, _, this, index, _) }
|
||||
|
||||
/** Gets a child element of this XML parent. */
|
||||
XMLElement getAChild() { xmlElements(result, _, this, _, _) }
|
||||
XmlElement getAChild() { xmlElements(result, _, this, _, _) }
|
||||
|
||||
/** Gets a child element of this XML parent with the given `name`. */
|
||||
XMLElement getAChild(string name) { xmlElements(result, _, this, _, _) and result.hasName(name) }
|
||||
XmlElement getAChild(string name) { xmlElements(result, _, this, _, _) and result.hasName(name) }
|
||||
|
||||
/** Gets a comment that is a child of this XML parent. */
|
||||
XMLComment getAComment() { xmlComments(result, _, this, _) }
|
||||
XmlComment getAComment() { xmlComments(result, _, this, _) }
|
||||
|
||||
/** Gets a character sequence that is a child of this XML parent. */
|
||||
XMLCharacters getACharactersSet() { xmlChars(result, _, this, _, _, _) }
|
||||
XmlCharacters getACharactersSet() { xmlChars(result, _, this, _, _, _) }
|
||||
|
||||
/** Gets the depth in the tree. (Overridden in XMLElement.) */
|
||||
/** Gets the depth in the tree. (Overridden in XmlElement.) */
|
||||
int getDepth() { result = 0 }
|
||||
|
||||
/** Gets the number of child XML elements of this XML parent. */
|
||||
int getNumberOfChildren() { result = count(XMLElement e | xmlElements(e, _, this, _, _)) }
|
||||
int getNumberOfChildren() { result = count(XmlElement e | xmlElements(e, _, this, _, _)) }
|
||||
|
||||
/** Gets the number of places in the body of this XML parent where text occurs. */
|
||||
int getNumberOfCharacterSets() { result = count(int pos | xmlChars(_, _, this, pos, _, _)) }
|
||||
@@ -92,9 +95,12 @@ class XMLParent extends @xmlparent {
|
||||
string toString() { result = this.getName() }
|
||||
}
|
||||
|
||||
/** DEPRECATED: Alias for XmlParent */
|
||||
deprecated class XMLParent = XmlParent;
|
||||
|
||||
/** An XML file. */
|
||||
class XMLFile extends XMLParent, File {
|
||||
XMLFile() { xmlEncoding(this, _) }
|
||||
class XmlFile extends XmlParent, File {
|
||||
XmlFile() { xmlEncoding(this, _) }
|
||||
|
||||
/** Gets a printable representation of this XML file. */
|
||||
override string toString() { result = this.getName() }
|
||||
@@ -120,15 +126,21 @@ class XMLFile extends XMLParent, File {
|
||||
string getEncoding() { xmlEncoding(this, result) }
|
||||
|
||||
/** Gets the XML file itself. */
|
||||
override XMLFile getFile() { result = this }
|
||||
override XmlFile getFile() { result = this }
|
||||
|
||||
/** Gets a top-most element in an XML file. */
|
||||
XMLElement getARootElement() { result = this.getAChild() }
|
||||
XmlElement getARootElement() { result = this.getAChild() }
|
||||
|
||||
/** Gets a DTD associated with this XML file. */
|
||||
XMLDTD getADTD() { xmlDTDs(result, _, _, _, this) }
|
||||
XmlDtd getADtd() { xmlDTDs(result, _, _, _, this) }
|
||||
|
||||
/** DEPRECATED: Alias for getADtd */
|
||||
deprecated XmlDtd getADTD() { result = this.getADtd() }
|
||||
}
|
||||
|
||||
/** DEPRECATED: Alias for XmlFile */
|
||||
deprecated class XMLFile = XmlFile;
|
||||
|
||||
/**
|
||||
* An XML document type definition (DTD).
|
||||
*
|
||||
@@ -140,7 +152,7 @@ class XMLFile extends XMLParent, File {
|
||||
* <!ELEMENT lastName (#PCDATA)>
|
||||
* ```
|
||||
*/
|
||||
class XMLDTD extends XMLLocatable, @xmldtd {
|
||||
class XmlDtd extends XmlLocatable, @xmldtd {
|
||||
/** Gets the name of the root element of this DTD. */
|
||||
string getRoot() { xmlDTDs(this, result, _, _, _) }
|
||||
|
||||
@@ -154,7 +166,7 @@ class XMLDTD extends XMLLocatable, @xmldtd {
|
||||
predicate isPublic() { not xmlDTDs(this, _, "", _, _) }
|
||||
|
||||
/** Gets the parent of this DTD. */
|
||||
XMLParent getParent() { xmlDTDs(this, _, _, _, result) }
|
||||
XmlParent getParent() { xmlDTDs(this, _, _, _, result) }
|
||||
|
||||
override string toString() {
|
||||
this.isPublic() and
|
||||
@@ -165,6 +177,9 @@ class XMLDTD extends XMLLocatable, @xmldtd {
|
||||
}
|
||||
}
|
||||
|
||||
/** DEPRECATED: Alias for XmlDtd */
|
||||
deprecated class XMLDTD = XmlDtd;
|
||||
|
||||
/**
|
||||
* An XML element in an XML file.
|
||||
*
|
||||
@@ -176,7 +191,7 @@ class XMLDTD extends XMLLocatable, @xmldtd {
|
||||
* </manifest>
|
||||
* ```
|
||||
*/
|
||||
class XMLElement extends @xmlelement, XMLParent, XMLLocatable {
|
||||
class XmlElement extends @xmlelement, XmlParent, XmlLocatable {
|
||||
/** Holds if this XML element has the given `name`. */
|
||||
predicate hasName(string name) { name = this.getName() }
|
||||
|
||||
@@ -184,10 +199,10 @@ class XMLElement extends @xmlelement, XMLParent, XMLLocatable {
|
||||
override string getName() { xmlElements(this, result, _, _, _) }
|
||||
|
||||
/** Gets the XML file in which this XML element occurs. */
|
||||
override XMLFile getFile() { xmlElements(this, _, _, _, result) }
|
||||
override XmlFile getFile() { xmlElements(this, _, _, _, result) }
|
||||
|
||||
/** Gets the parent of this XML element. */
|
||||
XMLParent getParent() { xmlElements(this, _, result, _, _) }
|
||||
XmlParent getParent() { xmlElements(this, _, result, _, _) }
|
||||
|
||||
/** Gets the index of this XML element among its parent's children. */
|
||||
int getIndex() { xmlElements(this, _, _, result, _) }
|
||||
@@ -196,7 +211,7 @@ class XMLElement extends @xmlelement, XMLParent, XMLLocatable {
|
||||
predicate hasNamespace() { xmlHasNs(this, _, _) }
|
||||
|
||||
/** Gets the namespace of this XML element, if any. */
|
||||
XMLNamespace getNamespace() { xmlHasNs(this, result, _) }
|
||||
XmlNamespace getNamespace() { xmlHasNs(this, result, _) }
|
||||
|
||||
/** Gets the index of this XML element among its parent's children. */
|
||||
int getElementPositionIndex() { xmlElements(this, _, _, result, _) }
|
||||
@@ -205,10 +220,10 @@ class XMLElement extends @xmlelement, XMLParent, XMLLocatable {
|
||||
override int getDepth() { result = this.getParent().getDepth() + 1 }
|
||||
|
||||
/** Gets an XML attribute of this XML element. */
|
||||
XMLAttribute getAnAttribute() { result.getElement() = this }
|
||||
XmlAttribute getAnAttribute() { result.getElement() = this }
|
||||
|
||||
/** Gets the attribute with the specified `name`, if any. */
|
||||
XMLAttribute getAttribute(string name) { result.getElement() = this and result.getName() = name }
|
||||
XmlAttribute getAttribute(string name) { result.getElement() = this and result.getName() = name }
|
||||
|
||||
/** Holds if this XML element has an attribute with the specified `name`. */
|
||||
predicate hasAttribute(string name) { exists(this.getAttribute(name)) }
|
||||
@@ -220,6 +235,9 @@ class XMLElement extends @xmlelement, XMLParent, XMLLocatable {
|
||||
override string toString() { result = this.getName() }
|
||||
}
|
||||
|
||||
/** DEPRECATED: Alias for XmlElement */
|
||||
deprecated class XMLElement = XmlElement;
|
||||
|
||||
/**
|
||||
* An attribute that occurs inside an XML element.
|
||||
*
|
||||
@@ -230,18 +248,18 @@ class XMLElement extends @xmlelement, XMLParent, XMLLocatable {
|
||||
* android:versionCode="1"
|
||||
* ```
|
||||
*/
|
||||
class XMLAttribute extends @xmlattribute, XMLLocatable {
|
||||
class XmlAttribute extends @xmlattribute, XmlLocatable {
|
||||
/** Gets the name of this attribute. */
|
||||
string getName() { xmlAttrs(this, _, result, _, _, _) }
|
||||
|
||||
/** Gets the XML element to which this attribute belongs. */
|
||||
XMLElement getElement() { xmlAttrs(this, result, _, _, _, _) }
|
||||
XmlElement getElement() { xmlAttrs(this, result, _, _, _, _) }
|
||||
|
||||
/** Holds if this attribute has a namespace. */
|
||||
predicate hasNamespace() { xmlHasNs(this, _, _) }
|
||||
|
||||
/** Gets the namespace of this attribute, if any. */
|
||||
XMLNamespace getNamespace() { xmlHasNs(this, result, _) }
|
||||
XmlNamespace getNamespace() { xmlHasNs(this, result, _) }
|
||||
|
||||
/** Gets the value of this attribute. */
|
||||
string getValue() { xmlAttrs(this, _, _, result, _, _) }
|
||||
@@ -250,6 +268,9 @@ class XMLAttribute extends @xmlattribute, XMLLocatable {
|
||||
override string toString() { result = this.getName() + "=" + this.getValue() }
|
||||
}
|
||||
|
||||
/** DEPRECATED: Alias for XmlAttribute */
|
||||
deprecated class XMLAttribute = XmlAttribute;
|
||||
|
||||
/**
|
||||
* A namespace used in an XML file.
|
||||
*
|
||||
@@ -259,23 +280,29 @@ class XMLAttribute extends @xmlattribute, XMLLocatable {
|
||||
* xmlns:android="http://schemas.android.com/apk/res/android"
|
||||
* ```
|
||||
*/
|
||||
class XMLNamespace extends XMLLocatable, @xmlnamespace {
|
||||
class XmlNamespace extends XmlLocatable, @xmlnamespace {
|
||||
/** Gets the prefix of this namespace. */
|
||||
string getPrefix() { xmlNs(this, result, _, _) }
|
||||
|
||||
/** Gets the URI of this namespace. */
|
||||
string getURI() { xmlNs(this, _, result, _) }
|
||||
string getUri() { xmlNs(this, _, result, _) }
|
||||
|
||||
/** DEPRECATED: Alias for getUri */
|
||||
deprecated string getURI() { result = this.getUri() }
|
||||
|
||||
/** Holds if this namespace has no prefix. */
|
||||
predicate isDefault() { this.getPrefix() = "" }
|
||||
|
||||
override string toString() {
|
||||
this.isDefault() and result = this.getURI()
|
||||
this.isDefault() and result = this.getUri()
|
||||
or
|
||||
not this.isDefault() and result = this.getPrefix() + ":" + this.getURI()
|
||||
not this.isDefault() and result = this.getPrefix() + ":" + this.getUri()
|
||||
}
|
||||
}
|
||||
|
||||
/** DEPRECATED: Alias for XmlNamespace */
|
||||
deprecated class XMLNamespace = XmlNamespace;
|
||||
|
||||
/**
|
||||
* A comment in an XML file.
|
||||
*
|
||||
@@ -285,17 +312,20 @@ class XMLNamespace extends XMLLocatable, @xmlnamespace {
|
||||
* <!-- This is a comment. -->
|
||||
* ```
|
||||
*/
|
||||
class XMLComment extends @xmlcomment, XMLLocatable {
|
||||
class XmlComment extends @xmlcomment, XmlLocatable {
|
||||
/** Gets the text content of this XML comment. */
|
||||
string getText() { xmlComments(this, result, _, _) }
|
||||
|
||||
/** Gets the parent of this XML comment. */
|
||||
XMLParent getParent() { xmlComments(this, _, result, _) }
|
||||
XmlParent getParent() { xmlComments(this, _, result, _) }
|
||||
|
||||
/** Gets a printable representation of this XML comment. */
|
||||
override string toString() { result = this.getText() }
|
||||
}
|
||||
|
||||
/** DEPRECATED: Alias for XmlComment */
|
||||
deprecated class XMLComment = XmlComment;
|
||||
|
||||
/**
|
||||
* A sequence of characters that occurs between opening and
|
||||
* closing tags of an XML element, excluding other elements.
|
||||
@@ -306,12 +336,12 @@ class XMLComment extends @xmlcomment, XMLLocatable {
|
||||
* <content>This is a sequence of characters.</content>
|
||||
* ```
|
||||
*/
|
||||
class XMLCharacters extends @xmlcharacters, XMLLocatable {
|
||||
class XmlCharacters extends @xmlcharacters, XmlLocatable {
|
||||
/** Gets the content of this character sequence. */
|
||||
string getCharacters() { xmlChars(this, result, _, _, _, _) }
|
||||
|
||||
/** Gets the parent of this character sequence. */
|
||||
XMLParent getParent() { xmlChars(this, _, result, _, _, _) }
|
||||
XmlParent getParent() { xmlChars(this, _, result, _, _, _) }
|
||||
|
||||
/** Holds if this character sequence is CDATA. */
|
||||
predicate isCDATA() { xmlChars(this, _, _, _, 1, _) }
|
||||
@@ -319,3 +349,6 @@ class XMLCharacters extends @xmlcharacters, XMLLocatable {
|
||||
/** Gets a printable representation of this XML character sequence. */
|
||||
override string toString() { result = this.getCharacters() }
|
||||
}
|
||||
|
||||
/** DEPRECATED: Alias for XmlCharacters */
|
||||
deprecated class XMLCharacters = XmlCharacters;
|
||||
|
||||
@@ -1,3 +1,5 @@
|
||||
## 0.4.1
|
||||
|
||||
## 0.4.0
|
||||
|
||||
### Breaking Changes
|
||||
|
||||
0
python/ql/src/Exceptions/EmptyExcept.ql
Executable file → Normal file
0
python/ql/src/Exceptions/EmptyExcept.ql
Executable file → Normal file
0
python/ql/src/Exceptions/UnguardedNextInGenerator.ql
Executable file → Normal file
0
python/ql/src/Exceptions/UnguardedNextInGenerator.ql
Executable file → Normal file
0
python/ql/src/Resources/FileNotAlwaysClosed.ql
Executable file → Normal file
0
python/ql/src/Resources/FileNotAlwaysClosed.ql
Executable file → Normal file
@@ -35,7 +35,7 @@ private import semmle.python.objects.ObjectInternal
|
||||
// functionality into `BuiltinFunctionValue` and `BuiltinMethodValue`, but will
|
||||
// probably require some more work: for this query, it's totally ok to use
|
||||
// `builtins.open` for the code `open(f)`, but well, it requires a bit of thinking to
|
||||
// figure out if that is desireable in general. I simply skipped a corner here!
|
||||
// figure out if that is desirable in general. I simply skipped a corner here!
|
||||
// 4. TaintTrackingPrivate: Nothing else gives us access to `defaultAdditionalTaintStep` :(
|
||||
/**
|
||||
* A callable that is considered a "safe" external API from a security perspective.
|
||||
@@ -131,7 +131,9 @@ class UntrustedExternalApiDataNode extends ExternalApiDataNode {
|
||||
/** DEPRECATED: Alias for UntrustedExternalApiDataNode */
|
||||
deprecated class UntrustedExternalAPIDataNode = UntrustedExternalApiDataNode;
|
||||
|
||||
/** An external API which is used with untrusted data. */
|
||||
private newtype TExternalApi =
|
||||
/** An untrusted API method `m` where untrusted data is passed at `index`. */
|
||||
TExternalApiParameter(DataFlowPrivate::DataFlowCallable callable, int index) {
|
||||
exists(UntrustedExternalApiDataNode n |
|
||||
callable = n.getCallable() and
|
||||
|
||||
65
python/ql/src/Security/CWE-020/OverlyLargeRange.qhelp
Normal file
65
python/ql/src/Security/CWE-020/OverlyLargeRange.qhelp
Normal file
@@ -0,0 +1,65 @@
|
||||
<!DOCTYPE qhelp PUBLIC
|
||||
"-//Semmle//qhelp//EN"
|
||||
"qhelp.dtd">
|
||||
<qhelp>
|
||||
|
||||
<overview>
|
||||
<p>
|
||||
It's easy to write a regular expression range that matches a wider range of characters than you intended.
|
||||
For example, <code>/[a-zA-z]/</code> matches all lowercase and all uppercase letters,
|
||||
as you would expect, but it also matches the characters: <code>[ \ ] ^ _ `</code>.
|
||||
</p>
|
||||
<p>
|
||||
Another common problem is failing to escape the dash character in a regular
|
||||
expression. An unescaped dash is interpreted
|
||||
as part of a range. For example, in the character class <code>[a-zA-Z0-9%=.,-_]</code>
|
||||
the last character range matches the 55 characters between
|
||||
<code>,</code> and <code>_</code> (both included), which overlaps with the
|
||||
range <code>[0-9]</code> and is clearly not intended by the writer.
|
||||
</p>
|
||||
</overview>
|
||||
|
||||
<recommendation>
|
||||
<p>
|
||||
Avoid any confusion about which characters are included in the range by
|
||||
writing unambiguous regular expressions.
|
||||
Always check that character ranges match only the expected characters.
|
||||
</p>
|
||||
</recommendation>
|
||||
|
||||
<example>
|
||||
|
||||
<p>
|
||||
The following example code is intended to check whether a string is a valid 6 digit hex color.
|
||||
</p>
|
||||
|
||||
<sample language="python">
|
||||
import re
|
||||
def is_valid_hex_color(color):
|
||||
return re.match(r'^#[0-9a-fA-f]{6}$', color) is not None
|
||||
</sample>
|
||||
|
||||
<p>
|
||||
However, the <code>A-f</code> range is overly large and matches every uppercase character.
|
||||
It would parse a "color" like <code>#XXYYZZ</code> as valid.
|
||||
</p>
|
||||
|
||||
<p>
|
||||
The fix is to use an uppercase <code>A-F</code> range instead.
|
||||
</p>
|
||||
|
||||
<sample language="python">
|
||||
import re
|
||||
def is_valid_hex_color(color):
|
||||
return re.match(r'^#[0-9a-fA-F]{6}$', color) is not None
|
||||
</sample>
|
||||
|
||||
</example>
|
||||
|
||||
<references>
|
||||
<li>GitHub Advisory Database: <a href="https://github.com/advisories/GHSA-g4rg-993r-mgx7">CVE-2021-42740: Improper Neutralization of Special Elements used in a Command in Shell-quote</a></li>
|
||||
<li>wh0.github.io: <a href="https://wh0.github.io/2021/10/28/shell-quote-rce-exploiting.html">Exploiting CVE-2021-42740</a></li>
|
||||
<li>Yosuke Ota: <a href="https://ota-meshi.github.io/eslint-plugin-regexp/rules/no-obscure-range.html">no-obscure-range</a></li>
|
||||
<li>Paul Boyd: <a href="https://pboyd.io/posts/comma-dash-dot/">The regex [,-.]</a></li>
|
||||
</references>
|
||||
</qhelp>
|
||||
19
python/ql/src/Security/CWE-020/OverlyLargeRange.ql
Normal file
19
python/ql/src/Security/CWE-020/OverlyLargeRange.ql
Normal file
@@ -0,0 +1,19 @@
|
||||
/**
|
||||
* @name Overly permissive regular expression range
|
||||
* @description Overly permissive regular expression ranges match a wider range of characters than intended.
|
||||
* This may allow an attacker to bypass a filter or sanitizer.
|
||||
* @kind problem
|
||||
* @problem.severity warning
|
||||
* @security-severity 5.0
|
||||
* @precision high
|
||||
* @id py/overly-large-range
|
||||
* @tags correctness
|
||||
* security
|
||||
* external/cwe/cwe-020
|
||||
*/
|
||||
|
||||
import semmle.python.security.OverlyLargeRangeQuery
|
||||
|
||||
from RegExpCharacterRange range, string reason
|
||||
where problem(range, reason)
|
||||
select range, "Suspicious character range that " + reason + "."
|
||||
0
python/ql/src/Security/CWE-078/CommandInjection.ql
Executable file → Normal file
0
python/ql/src/Security/CWE-078/CommandInjection.ql
Executable file → Normal file
@@ -19,14 +19,14 @@ private API::Node unsafe_paramiko_policy(string name) {
|
||||
result = API::moduleImport("paramiko").getMember("client").getMember(name)
|
||||
}
|
||||
|
||||
private API::Node paramikoSSHClientInstance() {
|
||||
private API::Node paramikoSshClientInstance() {
|
||||
result = API::moduleImport("paramiko").getMember("client").getMember("SSHClient").getReturn()
|
||||
}
|
||||
|
||||
from DataFlow::CallCfgNode call, DataFlow::Node arg, string name
|
||||
where
|
||||
// see http://docs.paramiko.org/en/stable/api/client.html#paramiko.client.SSHClient.set_missing_host_key_policy
|
||||
call = paramikoSSHClientInstance().getMember("set_missing_host_key_policy").getACall() and
|
||||
call = paramikoSshClientInstance().getMember("set_missing_host_key_policy").getACall() and
|
||||
arg in [call.getArg(0), call.getArgByName("policy")] and
|
||||
(
|
||||
arg = unsafe_paramiko_policy(name).getAValueReachableFromSource() or
|
||||
|
||||
@@ -29,7 +29,7 @@ The example shows two unsafe calls to <a href="https://semmle.com">semmle.com</a
|
||||
|
||||
<references>
|
||||
<li>
|
||||
Python requests documentation: <a href="http://docs.python-requests.org/en/master/user/advanced/#ssl-cert-verification">SSL Cert Verification</a>.
|
||||
Python requests documentation: <a href="https://requests.readthedocs.io/en/latest/user/advanced/#ssl-cert-verification">SSL Cert Verification</a>.
|
||||
</li>
|
||||
</references>
|
||||
</qhelp>
|
||||
|
||||
@@ -7,13 +7,13 @@ private import python
|
||||
private import semmle.python.ApiGraphs
|
||||
import TlsLibraryModel
|
||||
|
||||
class PyOpenSSLContextCreation extends ContextCreation, DataFlow::CallCfgNode {
|
||||
PyOpenSSLContextCreation() {
|
||||
class PyOpenSslContextCreation extends ContextCreation, DataFlow::CallCfgNode {
|
||||
PyOpenSslContextCreation() {
|
||||
this = API::moduleImport("OpenSSL").getMember("SSL").getMember("Context").getACall()
|
||||
}
|
||||
|
||||
override string getProtocol() {
|
||||
exists(DataFlow::Node protocolArg, PyOpenSSL pyo |
|
||||
exists(DataFlow::Node protocolArg, PyOpenSsl pyo |
|
||||
protocolArg in [this.getArg(0), this.getArgByName("method")]
|
||||
|
|
||||
protocolArg in [
|
||||
@@ -51,12 +51,12 @@ class SetOptionsCall extends ProtocolRestriction, DataFlow::CallCfgNode {
|
||||
}
|
||||
}
|
||||
|
||||
class UnspecificPyOpenSSLContextCreation extends PyOpenSSLContextCreation, UnspecificContextCreation {
|
||||
UnspecificPyOpenSSLContextCreation() { library instanceof PyOpenSSL }
|
||||
class UnspecificPyOpenSslContextCreation extends PyOpenSslContextCreation, UnspecificContextCreation {
|
||||
UnspecificPyOpenSslContextCreation() { library instanceof PyOpenSsl }
|
||||
}
|
||||
|
||||
class PyOpenSSL extends TlsLibrary {
|
||||
PyOpenSSL() { this = "pyOpenSSL" }
|
||||
class PyOpenSsl extends TlsLibrary {
|
||||
PyOpenSsl() { this = "pyOpenSSL" }
|
||||
|
||||
override string specific_version_name(ProtocolVersion version) { result = version + "_METHOD" }
|
||||
|
||||
@@ -70,7 +70,7 @@ class PyOpenSSL extends TlsLibrary {
|
||||
override ContextCreation default_context_creation() { none() }
|
||||
|
||||
override ContextCreation specific_context_creation() {
|
||||
result instanceof PyOpenSSLContextCreation
|
||||
result instanceof PyOpenSslContextCreation
|
||||
}
|
||||
|
||||
override DataFlow::Node insecure_connection_creation(ProtocolVersion version) { none() }
|
||||
@@ -80,6 +80,6 @@ class PyOpenSSL extends TlsLibrary {
|
||||
override ProtocolRestriction protocol_restriction() { result instanceof SetOptionsCall }
|
||||
|
||||
override ProtocolUnrestriction protocol_unrestriction() {
|
||||
result instanceof UnspecificPyOpenSSLContextCreation
|
||||
result instanceof UnspecificPyOpenSslContextCreation
|
||||
}
|
||||
}
|
||||
|
||||
@@ -7,8 +7,8 @@ private import python
|
||||
private import semmle.python.ApiGraphs
|
||||
import TlsLibraryModel
|
||||
|
||||
class SSLContextCreation extends ContextCreation, DataFlow::CallCfgNode {
|
||||
SSLContextCreation() { this = API::moduleImport("ssl").getMember("SSLContext").getACall() }
|
||||
class SslContextCreation extends ContextCreation, DataFlow::CallCfgNode {
|
||||
SslContextCreation() { this = API::moduleImport("ssl").getMember("SSLContext").getACall() }
|
||||
|
||||
override string getProtocol() {
|
||||
exists(DataFlow::Node protocolArg, Ssl ssl |
|
||||
@@ -27,8 +27,8 @@ class SSLContextCreation extends ContextCreation, DataFlow::CallCfgNode {
|
||||
}
|
||||
}
|
||||
|
||||
class SSLDefaultContextCreation extends ContextCreation {
|
||||
SSLDefaultContextCreation() {
|
||||
class SslDefaultContextCreation extends ContextCreation {
|
||||
SslDefaultContextCreation() {
|
||||
this = API::moduleImport("ssl").getMember("create_default_context").getACall()
|
||||
}
|
||||
|
||||
@@ -161,8 +161,8 @@ class ContextSetVersion extends ProtocolRestriction, ProtocolUnrestriction, Data
|
||||
}
|
||||
}
|
||||
|
||||
class UnspecificSSLContextCreation extends SSLContextCreation, UnspecificContextCreation {
|
||||
UnspecificSSLContextCreation() { library instanceof Ssl }
|
||||
class UnspecificSslContextCreation extends SslContextCreation, UnspecificContextCreation {
|
||||
UnspecificSslContextCreation() { library instanceof Ssl }
|
||||
|
||||
override ProtocolVersion getUnrestriction() {
|
||||
result = UnspecificContextCreation.super.getUnrestriction() and
|
||||
@@ -172,7 +172,7 @@ class UnspecificSSLContextCreation extends SSLContextCreation, UnspecificContext
|
||||
}
|
||||
}
|
||||
|
||||
class UnspecificSSLDefaultContextCreation extends SSLDefaultContextCreation, ProtocolUnrestriction {
|
||||
class UnspecificSslDefaultContextCreation extends SslDefaultContextCreation, ProtocolUnrestriction {
|
||||
override DataFlow::Node getContext() { result = this }
|
||||
|
||||
// see https://docs.python.org/3/library/ssl.html#ssl.create_default_context
|
||||
@@ -195,10 +195,10 @@ class Ssl extends TlsLibrary {
|
||||
override API::Node version_constants() { result = API::moduleImport("ssl") }
|
||||
|
||||
override ContextCreation default_context_creation() {
|
||||
result instanceof SSLDefaultContextCreation
|
||||
result instanceof SslDefaultContextCreation
|
||||
}
|
||||
|
||||
override ContextCreation specific_context_creation() { result instanceof SSLContextCreation }
|
||||
override ContextCreation specific_context_creation() { result instanceof SslContextCreation }
|
||||
|
||||
override DataFlow::CallCfgNode insecure_connection_creation(ProtocolVersion version) {
|
||||
result = API::moduleImport("ssl").getMember("wrap_socket").getACall() and
|
||||
@@ -220,8 +220,8 @@ class Ssl extends TlsLibrary {
|
||||
or
|
||||
result instanceof ContextSetVersion
|
||||
or
|
||||
result instanceof UnspecificSSLContextCreation
|
||||
result instanceof UnspecificSslContextCreation
|
||||
or
|
||||
result instanceof UnspecificSSLDefaultContextCreation
|
||||
result instanceof UnspecificSslDefaultContextCreation
|
||||
}
|
||||
}
|
||||
|
||||
@@ -13,7 +13,7 @@
|
||||
*/
|
||||
|
||||
import python
|
||||
import semmle.python.security.performance.SuperlinearBackTracking
|
||||
import semmle.python.security.regexp.SuperlinearBackTracking
|
||||
import semmle.python.security.dataflow.PolynomialReDoSQuery
|
||||
import DataFlow::PathGraph
|
||||
|
||||
|
||||
@@ -14,7 +14,7 @@
|
||||
*/
|
||||
|
||||
import python
|
||||
import semmle.python.security.performance.ExponentialBackTracking
|
||||
import semmle.python.security.regexp.ExponentialBackTracking
|
||||
|
||||
from RegExpTerm t, string pump, State s, string prefixMsg
|
||||
where
|
||||
|
||||
@@ -0,0 +1,5 @@
|
||||
---
|
||||
category: newQuery
|
||||
---
|
||||
* Added a new query, `py/suspicious-regexp-range`, to detect character ranges in regular expressions that seem to match
|
||||
too many characters.
|
||||
1
python/ql/src/change-notes/released/0.4.1.md
Normal file
1
python/ql/src/change-notes/released/0.4.1.md
Normal file
@@ -0,0 +1 @@
|
||||
## 0.4.1
|
||||
@@ -1,2 +1,2 @@
|
||||
---
|
||||
lastReleaseVersion: 0.4.0
|
||||
lastReleaseVersion: 0.4.1
|
||||
|
||||
@@ -17,8 +17,8 @@ import semmle.python.web.HttpRequest
|
||||
/* Sinks */
|
||||
import experimental.semmle.python.security.injection.XSLT
|
||||
|
||||
class XSLTInjectionConfiguration extends TaintTracking::Configuration {
|
||||
XSLTInjectionConfiguration() { this = "XSLT injection configuration" }
|
||||
class XsltInjectionConfiguration extends TaintTracking::Configuration {
|
||||
XsltInjectionConfiguration() { this = "XSLT injection configuration" }
|
||||
|
||||
deprecated override predicate isSource(TaintTracking::Source source) {
|
||||
source instanceof HttpRequestTaintSource
|
||||
@@ -29,7 +29,7 @@ class XSLTInjectionConfiguration extends TaintTracking::Configuration {
|
||||
}
|
||||
}
|
||||
|
||||
from XSLTInjectionConfiguration config, TaintedPathSource src, TaintedPathSink sink
|
||||
from XsltInjectionConfiguration config, TaintedPathSource src, TaintedPathSink sink
|
||||
where config.hasFlowPath(src, sink)
|
||||
select sink.getSink(), src, sink, "This XSLT query depends on $@.", src.getSource(),
|
||||
"a user-provided value"
|
||||
|
||||
@@ -2,7 +2,6 @@ private import python
|
||||
private import semmle.python.Concepts
|
||||
private import semmle.python.ApiGraphs
|
||||
private import semmle.python.dataflow.new.RemoteFlowSources
|
||||
private import semmle.python.dataflow.new.DataFlow
|
||||
|
||||
/**
|
||||
* A data flow source of the client ip obtained according to the remote endpoint identifier specified
|
||||
|
||||
@@ -182,7 +182,10 @@ module LdapBind {
|
||||
/**
|
||||
* Holds if the binding process use SSL.
|
||||
*/
|
||||
abstract predicate useSSL();
|
||||
abstract predicate useSsl();
|
||||
|
||||
/** DEPRECATED: Alias for useSsl */
|
||||
deprecated predicate useSSL() { useSsl() }
|
||||
}
|
||||
}
|
||||
|
||||
@@ -213,7 +216,10 @@ class LdapBind extends DataFlow::Node {
|
||||
/**
|
||||
* Holds if the binding process use SSL.
|
||||
*/
|
||||
predicate useSSL() { range.useSSL() }
|
||||
predicate useSsl() { range.useSsl() }
|
||||
|
||||
/** DEPRECATED: Alias for useSsl */
|
||||
deprecated predicate useSSL() { useSsl() }
|
||||
}
|
||||
|
||||
/** DEPRECATED: Alias for LdapBind */
|
||||
|
||||
@@ -12,13 +12,13 @@ private import semmle.python.ApiGraphs
|
||||
/**
|
||||
* Provides models for Python's ldap-related libraries.
|
||||
*/
|
||||
private module LDAP {
|
||||
private module Ldap {
|
||||
/**
|
||||
* Provides models for the `python-ldap` PyPI package (imported as `ldap`).
|
||||
*
|
||||
* See https://www.python-ldap.org/en/python-ldap-3.3.0/index.html
|
||||
*/
|
||||
private module LDAP2 {
|
||||
private module Ldap2 {
|
||||
/** Gets a reference to the `ldap` module. */
|
||||
API::Node ldap() { result = API::moduleImport("ldap") }
|
||||
|
||||
@@ -38,8 +38,8 @@ private module LDAP {
|
||||
*
|
||||
* See https://www.python-ldap.org/en/python-ldap-3.3.0/reference/ldap.html#functions
|
||||
*/
|
||||
private class LDAP2QueryMethods extends string {
|
||||
LDAP2QueryMethods() {
|
||||
private class Ldap2QueryMethods extends string {
|
||||
Ldap2QueryMethods() {
|
||||
this in ["search", "search_s", "search_st", "search_ext", "search_ext_s"]
|
||||
}
|
||||
}
|
||||
@@ -52,7 +52,7 @@ private module LDAP {
|
||||
/** Gets a reference to a `ldap` query. */
|
||||
private DataFlow::Node ldapQuery() {
|
||||
result = ldapOperation() and
|
||||
result.(DataFlow::AttrRead).getAttributeName() instanceof LDAP2QueryMethods
|
||||
result.(DataFlow::AttrRead).getAttributeName() instanceof Ldap2QueryMethods
|
||||
}
|
||||
|
||||
/**
|
||||
@@ -60,8 +60,8 @@ private module LDAP {
|
||||
*
|
||||
* See `LDAP2QueryMethods`
|
||||
*/
|
||||
private class LDAP2Query extends DataFlow::CallCfgNode, LdapQuery::Range {
|
||||
LDAP2Query() { this.getFunction() = ldapQuery() }
|
||||
private class Ldap2Query extends DataFlow::CallCfgNode, LdapQuery::Range {
|
||||
Ldap2Query() { this.getFunction() = ldapQuery() }
|
||||
|
||||
override DataFlow::Node getQuery() {
|
||||
result in [this.getArg(0), this.getArg(2), this.getArgByName("filterstr")]
|
||||
@@ -73,8 +73,8 @@ private module LDAP {
|
||||
*
|
||||
* See https://www.python-ldap.org/en/python-ldap-3.3.0/reference/ldap.html#functions
|
||||
*/
|
||||
private class LDAP2BindMethods extends string {
|
||||
LDAP2BindMethods() {
|
||||
private class Ldap2BindMethods extends string {
|
||||
Ldap2BindMethods() {
|
||||
this in [
|
||||
"bind", "bind_s", "simple_bind", "simple_bind_s", "sasl_interactive_bind_s",
|
||||
"sasl_non_interactive_bind_s", "sasl_external_bind_s", "sasl_gssapi_bind_s"
|
||||
@@ -85,12 +85,12 @@ private module LDAP {
|
||||
/** Gets a reference to a `ldap` bind. */
|
||||
private DataFlow::Node ldapBind() {
|
||||
result = ldapOperation() and
|
||||
result.(DataFlow::AttrRead).getAttributeName() instanceof LDAP2BindMethods
|
||||
result.(DataFlow::AttrRead).getAttributeName() instanceof Ldap2BindMethods
|
||||
}
|
||||
|
||||
/**List of SSL-demanding options */
|
||||
private class LDAPSSLOptions extends DataFlow::Node {
|
||||
LDAPSSLOptions() {
|
||||
private class LdapSslOptions extends DataFlow::Node {
|
||||
LdapSslOptions() {
|
||||
this = ldap().getMember("OPT_X_TLS_" + ["DEMAND", "HARD"]).getAValueReachableFromSource()
|
||||
}
|
||||
}
|
||||
@@ -100,8 +100,8 @@ private module LDAP {
|
||||
*
|
||||
* See `LDAP2BindMethods`
|
||||
*/
|
||||
private class LDAP2Bind extends DataFlow::CallCfgNode, LdapBind::Range {
|
||||
LDAP2Bind() { this.getFunction() = ldapBind() }
|
||||
private class Ldap2Bind extends DataFlow::CallCfgNode, LdapBind::Range {
|
||||
Ldap2Bind() { this.getFunction() = ldapBind() }
|
||||
|
||||
override DataFlow::Node getPassword() {
|
||||
result in [this.getArg(1), this.getArgByName("cred")]
|
||||
@@ -115,11 +115,11 @@ private module LDAP {
|
||||
)
|
||||
}
|
||||
|
||||
override predicate useSSL() {
|
||||
override predicate useSsl() {
|
||||
// use initialize to correlate `this` and so avoid FP in several instances
|
||||
exists(DataFlow::CallCfgNode initialize |
|
||||
// ldap.set_option(ldap.OPT_X_TLS_%s)
|
||||
ldap().getMember("set_option").getACall().getArg(_) instanceof LDAPSSLOptions
|
||||
ldap().getMember("set_option").getACall().getArg(_) instanceof LdapSslOptions
|
||||
or
|
||||
this.getFunction().(DataFlow::AttrRead).getObject().getALocalSource() = initialize and
|
||||
initialize = ldapInitialize().getACall() and
|
||||
@@ -136,7 +136,7 @@ private module LDAP {
|
||||
setOption.getFunction().(DataFlow::AttrRead).getObject().getALocalSource() =
|
||||
initialize and
|
||||
setOption.getFunction().(DataFlow::AttrRead).getAttributeName() = "set_option" and
|
||||
setOption.getArg(0) instanceof LDAPSSLOptions and
|
||||
setOption.getArg(0) instanceof LdapSslOptions and
|
||||
not DataFlow::exprNode(any(False falseExpr))
|
||||
.(DataFlow::LocalSourceNode)
|
||||
.flowsTo(setOption.getArg(1))
|
||||
@@ -144,6 +144,9 @@ private module LDAP {
|
||||
)
|
||||
)
|
||||
}
|
||||
|
||||
/** DEPRECATED: Alias for useSsl */
|
||||
deprecated override predicate useSSL() { this.useSsl() }
|
||||
}
|
||||
|
||||
/**
|
||||
@@ -151,8 +154,8 @@ private module LDAP {
|
||||
*
|
||||
* See https://github.com/python-ldap/python-ldap/blob/7ce471e238cdd9a4dd8d17baccd1c9e05e6f894a/Lib/ldap/dn.py#L17
|
||||
*/
|
||||
private class LDAP2EscapeDNCall extends DataFlow::CallCfgNode, LdapEscape::Range {
|
||||
LDAP2EscapeDNCall() { this = ldap().getMember("dn").getMember("escape_dn_chars").getACall() }
|
||||
private class Ldap2EscapeDNCall extends DataFlow::CallCfgNode, LdapEscape::Range {
|
||||
Ldap2EscapeDNCall() { this = ldap().getMember("dn").getMember("escape_dn_chars").getACall() }
|
||||
|
||||
override DataFlow::Node getAnInput() { result = this.getArg(0) }
|
||||
}
|
||||
@@ -162,8 +165,8 @@ private module LDAP {
|
||||
*
|
||||
* See https://www.python-ldap.org/en/python-ldap-3.3.0/reference/ldap-filter.html#ldap.filter.escape_filter_chars
|
||||
*/
|
||||
private class LDAP2EscapeFilterCall extends DataFlow::CallCfgNode, LdapEscape::Range {
|
||||
LDAP2EscapeFilterCall() {
|
||||
private class Ldap2EscapeFilterCall extends DataFlow::CallCfgNode, LdapEscape::Range {
|
||||
Ldap2EscapeFilterCall() {
|
||||
this = ldap().getMember("filter").getMember("escape_filter_chars").getACall()
|
||||
}
|
||||
|
||||
@@ -176,7 +179,7 @@ private module LDAP {
|
||||
*
|
||||
* See https://pypi.org/project/ldap3/
|
||||
*/
|
||||
private module LDAP3 {
|
||||
private module Ldap3 {
|
||||
/** Gets a reference to the `ldap3` module. */
|
||||
API::Node ldap3() { result = API::moduleImport("ldap3") }
|
||||
|
||||
@@ -192,8 +195,8 @@ private module LDAP {
|
||||
/**
|
||||
* A class to find `ldap3` methods executing a query.
|
||||
*/
|
||||
private class LDAP3Query extends DataFlow::CallCfgNode, LdapQuery::Range {
|
||||
LDAP3Query() {
|
||||
private class Ldap3Query extends DataFlow::CallCfgNode, LdapQuery::Range {
|
||||
Ldap3Query() {
|
||||
this.getFunction().(DataFlow::AttrRead).getObject().getALocalSource() =
|
||||
ldap3Connection().getACall() and
|
||||
this.getFunction().(DataFlow::AttrRead).getAttributeName() = "search"
|
||||
@@ -205,8 +208,8 @@ private module LDAP {
|
||||
/**
|
||||
* A class to find `ldap3` methods binding a connection.
|
||||
*/
|
||||
class LDAP3Bind extends DataFlow::CallCfgNode, LdapBind::Range {
|
||||
LDAP3Bind() { this = ldap3Connection().getACall() }
|
||||
class Ldap3Bind extends DataFlow::CallCfgNode, LdapBind::Range {
|
||||
Ldap3Bind() { this = ldap3Connection().getACall() }
|
||||
|
||||
override DataFlow::Node getPassword() {
|
||||
result in [this.getArg(2), this.getArgByName("password")]
|
||||
@@ -220,7 +223,7 @@ private module LDAP {
|
||||
)
|
||||
}
|
||||
|
||||
override predicate useSSL() {
|
||||
override predicate useSsl() {
|
||||
exists(DataFlow::CallCfgNode serverCall |
|
||||
serverCall = ldap3Server().getACall() and
|
||||
this.getArg(0).getALocalSource() = serverCall and
|
||||
@@ -236,6 +239,9 @@ private module LDAP {
|
||||
startTLS.getObject().getALocalSource() = this
|
||||
)
|
||||
}
|
||||
|
||||
/** DEPRECATED: Alias for useSsl */
|
||||
deprecated override predicate useSSL() { this.useSsl() }
|
||||
}
|
||||
|
||||
/**
|
||||
@@ -243,8 +249,8 @@ private module LDAP {
|
||||
*
|
||||
* See https://github.com/cannatag/ldap3/blob/4d33166f0869b929f59c6e6825a1b9505eb99967/ldap3/utils/dn.py#L390
|
||||
*/
|
||||
private class LDAP3EscapeDNCall extends DataFlow::CallCfgNode, LdapEscape::Range {
|
||||
LDAP3EscapeDNCall() { this = ldap3Utils().getMember("dn").getMember("escape_rdn").getACall() }
|
||||
private class Ldap3EscapeDNCall extends DataFlow::CallCfgNode, LdapEscape::Range {
|
||||
Ldap3EscapeDNCall() { this = ldap3Utils().getMember("dn").getMember("escape_rdn").getACall() }
|
||||
|
||||
override DataFlow::Node getAnInput() { result = this.getArg(0) }
|
||||
}
|
||||
@@ -254,8 +260,8 @@ private module LDAP {
|
||||
*
|
||||
* See https://github.com/cannatag/ldap3/blob/4d33166f0869b929f59c6e6825a1b9505eb99967/ldap3/utils/conv.py#L91
|
||||
*/
|
||||
private class LDAP3EscapeFilterCall extends DataFlow::CallCfgNode, LdapEscape::Range {
|
||||
LDAP3EscapeFilterCall() {
|
||||
private class Ldap3EscapeFilterCall extends DataFlow::CallCfgNode, LdapEscape::Range {
|
||||
Ldap3EscapeFilterCall() {
|
||||
this = ldap3Utils().getMember("conv").getMember("escape_filter_chars").getACall()
|
||||
}
|
||||
|
||||
|
||||
@@ -122,7 +122,7 @@ class LdapInsecureAuthConfig extends TaintTracking::Configuration {
|
||||
}
|
||||
|
||||
override predicate isSink(DataFlow::Node sink) {
|
||||
exists(LdapBind ldapBind | not ldapBind.useSSL() and sink = ldapBind.getHost())
|
||||
exists(LdapBind ldapBind | not ldapBind.useSsl() and sink = ldapBind.getHost())
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -11,12 +11,15 @@ import semmle.python.dataflow.TaintTracking
|
||||
import semmle.python.web.HttpRequest
|
||||
|
||||
/** Models XSLT Injection related classes and functions */
|
||||
module XSLTInjection {
|
||||
module XsltInjection {
|
||||
/** Returns a class value which refers to `lxml.etree` */
|
||||
Value etree() { result = Value::named("lxml.etree") }
|
||||
|
||||
/** A generic taint sink that is vulnerable to XSLT injection. */
|
||||
abstract class XSLTInjectionSink extends TaintSink { }
|
||||
abstract class XsltInjectionSink extends TaintSink { }
|
||||
|
||||
/** DEPRECATED: Alias for XsltInjectionSink */
|
||||
deprecated class XSLTInjectionSink = XsltInjectionSink;
|
||||
|
||||
/**
|
||||
* A kind of "taint", representing an untrusted XML string
|
||||
@@ -73,10 +76,10 @@ module XSLTInjection {
|
||||
* root = etree.XML("<xmlContent>")
|
||||
* find_text = etree.XSLT("`sink`")
|
||||
*/
|
||||
private class EtreeXSLTArgument extends XSLTInjectionSink {
|
||||
private class EtreeXsltArgument extends XsltInjectionSink {
|
||||
override string toString() { result = "lxml.etree.XSLT" }
|
||||
|
||||
EtreeXSLTArgument() {
|
||||
EtreeXsltArgument() {
|
||||
exists(CallNode call | call.getFunction().(AttrNode).getObject("XSLT").pointsTo(etree()) |
|
||||
call.getArg(0) = this
|
||||
)
|
||||
@@ -94,10 +97,10 @@ module XSLTInjection {
|
||||
* tree = etree.parse(f)
|
||||
* result_tree = tree.xslt(`sink`)
|
||||
*/
|
||||
private class ParseXSLTArgument extends XSLTInjectionSink {
|
||||
private class ParseXsltArgument extends XsltInjectionSink {
|
||||
override string toString() { result = "lxml.etree.parse.xslt" }
|
||||
|
||||
ParseXSLTArgument() {
|
||||
ParseXsltArgument() {
|
||||
exists(
|
||||
CallNode parseCall, CallNode xsltCall, ControlFlowNode obj, Variable var, AssignStmt assign
|
||||
|
|
||||
@@ -113,3 +116,6 @@ module XSLTInjection {
|
||||
override predicate sinks(TaintKind kind) { kind instanceof ExternalXmlKind }
|
||||
}
|
||||
}
|
||||
|
||||
/** DEPRECATED: Alias for XsltInjection */
|
||||
deprecated module XSLTInjection = XsltInjection;
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
name: codeql/python-queries
|
||||
version: 0.4.1-dev
|
||||
version: 0.4.2-dev
|
||||
groups:
|
||||
- python
|
||||
- queries
|
||||
|
||||
@@ -330,6 +330,19 @@ abstract private class Expectation extends FailureLocatable {
|
||||
override Location getLocation() { result = comment.getLocation() }
|
||||
}
|
||||
|
||||
private predicate onSameLine(ValidExpectation a, ActualResult b) {
|
||||
exists(string fname, int line, Location la, Location lb |
|
||||
// Join order intent:
|
||||
// Take the locations of ActualResults,
|
||||
// join with locations in the same file / on the same line,
|
||||
// then match those against ValidExpectations.
|
||||
la = a.getLocation() and
|
||||
pragma[only_bind_into](lb) = b.getLocation() and
|
||||
pragma[only_bind_into](la).hasLocationInfo(fname, line, _, _, _) and
|
||||
lb.hasLocationInfo(fname, line, _, _, _)
|
||||
)
|
||||
}
|
||||
|
||||
private class ValidExpectation extends Expectation, TValidExpectation {
|
||||
string tag;
|
||||
string value;
|
||||
@@ -344,8 +357,7 @@ private class ValidExpectation extends Expectation, TValidExpectation {
|
||||
string getKnownFailure() { result = knownFailure }
|
||||
|
||||
predicate matchesActualResult(ActualResult actualResult) {
|
||||
getLocation().getStartLine() = actualResult.getLocation().getStartLine() and
|
||||
getLocation().getFile() = actualResult.getLocation().getFile() and
|
||||
onSameLine(pragma[only_bind_into](this), actualResult) and
|
||||
getTag() = actualResult.getTag() and
|
||||
getValue() = actualResult.getValue()
|
||||
}
|
||||
|
||||
@@ -84,8 +84,8 @@ class Assertion extends Comment {
|
||||
string tryExplainFailure() {
|
||||
exists(int i, API::Node nd, string prefix, string suffix |
|
||||
nd = this.lookup(i) and
|
||||
i < getPathLength() and
|
||||
not exists(this.lookup([i + 1 .. getPathLength()])) and
|
||||
i < this.getPathLength() and
|
||||
not exists(this.lookup([i + 1 .. this.getPathLength()])) and
|
||||
prefix = nd + " has no outgoing edge labelled " + this.getEdgeLabel(i) + ";" and
|
||||
if exists(nd.getASuccessor())
|
||||
then
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
import python
|
||||
import experimental.semmle.python.security.injection.XSLT
|
||||
|
||||
from XSLTInjection::XSLTInjectionSink sink, TaintKind kind
|
||||
from XsltInjection::XsltInjectionSink sink, TaintKind kind
|
||||
where sink.sinks(kind)
|
||||
select sink, kind
|
||||
|
||||
0
python/ql/test/library-tests/PointsTo/new/Dataflow.ql
Executable file → Normal file
0
python/ql/test/library-tests/PointsTo/new/Dataflow.ql
Executable file → Normal file
0
python/ql/test/library-tests/PointsTo/new/PointsToWithContext.ql
Executable file → Normal file
0
python/ql/test/library-tests/PointsTo/new/PointsToWithContext.ql
Executable file → Normal file
0
python/ql/test/library-tests/formatting/FormatArguments.expected
Executable file → Normal file
0
python/ql/test/library-tests/formatting/FormatArguments.expected
Executable file → Normal file
0
python/ql/test/library-tests/formatting/FormatFields.expected
Executable file → Normal file
0
python/ql/test/library-tests/formatting/FormatFields.expected
Executable file → Normal file
0
python/ql/test/library-tests/jump_to_defn/Remote.expected
Executable file → Normal file
0
python/ql/test/library-tests/jump_to_defn/Remote.expected
Executable file → Normal file
0
python/ql/test/library-tests/jump_to_defn/test.expected
Executable file → Normal file
0
python/ql/test/library-tests/jump_to_defn/test.expected
Executable file → Normal file
0
python/ql/test/query-tests/Exceptions/general/EmptyExcept.expected
Executable file → Normal file
0
python/ql/test/query-tests/Exceptions/general/EmptyExcept.expected
Executable file → Normal file
@@ -0,0 +1,10 @@
|
||||
| test.py:3:29:3:31 | 0-9 | Suspicious character range that overlaps with 3-5 in the same character class. |
|
||||
| test.py:5:31:5:33 | A-z | Suspicious character range that overlaps with A-Z in the same character class, and is equivalent to [A-Z\\[\\\\\\]^_`a-z]. |
|
||||
| test.py:7:28:7:30 | z-a | Suspicious character range that is empty. |
|
||||
| test.py:17:38:17:40 | A-f | Suspicious character range that overlaps with a-f in the same character class, and is equivalent to [A-Z\\[\\\\\\]^_`a-f]. |
|
||||
| test.py:19:30:19:32 | $-` | Suspicious character range that is equivalent to [$%&'()*+,\\-.\\/0-9:;<=>?@A-Z\\[\\\\\\]^_`]. |
|
||||
| test.py:21:43:21:45 | +-< | Suspicious character range that is equivalent to [+,\\-.\\/0-9:;<]. |
|
||||
| test.py:23:47:23:49 | .-_ | Suspicious character range that overlaps with 1-9 in the same character class, and is equivalent to [.\\/0-9:;<=>?@A-Z\\[\\\\\\]^_]. |
|
||||
| test.py:25:34:25:36 | 7-F | Suspicious character range that is equivalent to [7-9:;<=>?@A-F]. |
|
||||
| test.py:27:38:27:40 | 0-9 | Suspicious character range that overlaps with \\d in the same character class. |
|
||||
| test.py:29:41:29:43 | .-? | Suspicious character range that overlaps with \\w in the same character class, and is equivalent to [.\\/0-9:;<=>?]. |
|
||||
@@ -0,0 +1 @@
|
||||
Security/CWE-020/OverlyLargeRange.ql
|
||||
@@ -0,0 +1,29 @@
|
||||
import re
|
||||
|
||||
overlap1 = re.compile(r'^[0-93-5]$') # NOT OK
|
||||
|
||||
overlap2 = re.compile(r'[A-ZA-z]') # NOT OK
|
||||
|
||||
isEmpty = re.compile(r'^[z-a]$') # NOT OK
|
||||
|
||||
isAscii = re.compile(r'^[\x00-\x7F]*$') # OK
|
||||
|
||||
printable = re.compile(r'[!-~]') # OK - used to select most printable ASCII characters
|
||||
|
||||
codePoints = re.compile(r'[^\x21-\x7E]|[[\](){}<>/%]') # OK
|
||||
|
||||
NON_ALPHANUMERIC_REGEXP = re.compile(r'([^\#-~| |!])') # OK
|
||||
|
||||
smallOverlap = re.compile(r'[0-9a-fA-f]') # NOT OK
|
||||
|
||||
weirdRange = re.compile(r'[$-`]') # NOT OK
|
||||
|
||||
keywordOperator = re.compile(r'[!\~\*\/%+-<>\^|=&]') # NOT OK
|
||||
|
||||
notYoutube = re.compile(r'youtu\.be\/[a-z1-9.-_]+') # NOT OK
|
||||
|
||||
numberToLetter = re.compile(r'[7-F]') # NOT OK
|
||||
|
||||
overlapsWithClass1 = re.compile(r'[0-9\d]') # NOT OK
|
||||
|
||||
overlapsWithClass2 = re.compile(r'[\w,.-?:*+]') # NOT OK
|
||||
@@ -1,5 +1,5 @@
|
||||
import python
|
||||
import semmle.python.security.performance.SuperlinearBackTracking
|
||||
import semmle.python.security.regexp.SuperlinearBackTracking
|
||||
|
||||
from PolynomialBackTrackingTerm t
|
||||
select t.getRegex(), t, t.getReason()
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
| KnownCVEs.py:15:22:15:24 | \\d+ | This part of the regular expression may cause exponential backtracking on strings containing many repetitions of '9'. |
|
||||
| KnownCVEs.py:15:22:15:24 | \\d+ | This part of the regular expression may cause exponential backtracking on strings containing many repetitions of '0'. |
|
||||
| KnownCVEs.py:30:24:31:25 | .* | This part of the regular expression may cause exponential backtracking on strings containing many repetitions of ','. |
|
||||
| KnownCVEs.py:35:18:35:81 | ([-/:,#%.'"\\s!\\w]\|\\w-\\w\|'[\\s\\w]+'\\s*\|"[\\s\\w]+"\|\\([\\d,%\\.\\s]+\\))* | This part of the regular expression may cause exponential backtracking on strings containing many repetitions of '"\\t"'. |
|
||||
| redos.py:6:28:6:42 | (?:__\|[\\s\\S])+? | This part of the regular expression may cause exponential backtracking on strings containing many repetitions of '__'. |
|
||||
@@ -31,7 +31,7 @@
|
||||
| redos.py:127:25:127:38 | ([a-z]\|[d-h])* | This part of the regular expression may cause exponential backtracking on strings containing many repetitions of 'd'. |
|
||||
| redos.py:130:25:130:40 | ([^a-z]\|[^0-9])* | This part of the regular expression may cause exponential backtracking on strings containing many repetitions of '/'. |
|
||||
| redos.py:133:25:133:35 | (\\d\|[0-9])* | This part of the regular expression may cause exponential backtracking on strings containing many repetitions of '0'. |
|
||||
| redos.py:136:25:136:32 | (\\s\|\\s)* | This part of the regular expression may cause exponential backtracking on strings containing many repetitions of ' '. |
|
||||
| redos.py:136:25:136:32 | (\\s\|\\s)* | This part of the regular expression may cause exponential backtracking on strings containing many repetitions of '\\t'. |
|
||||
| redos.py:139:25:139:31 | (\\w\|G)* | This part of the regular expression may cause exponential backtracking on strings containing many repetitions of 'G'. |
|
||||
| redos.py:145:25:145:32 | (\\d\|\\w)* | This part of the regular expression may cause exponential backtracking on strings containing many repetitions of '0'. |
|
||||
| redos.py:148:25:148:31 | (\\d\|5)* | This part of the regular expression may cause exponential backtracking on strings containing many repetitions of '5'. |
|
||||
@@ -46,7 +46,7 @@
|
||||
| redos.py:175:26:175:30 | [\\d]+ | This part of the regular expression may cause exponential backtracking on strings containing many repetitions of '0'. |
|
||||
| redos.py:187:26:187:31 | [^>a]+ | This part of the regular expression may cause exponential backtracking on strings containing many repetitions of '='. |
|
||||
| redos.py:190:27:190:29 | \\s* | This part of the regular expression may cause exponential backtracking on strings starting with '\\n' and containing many repetitions of '\\n'. |
|
||||
| redos.py:193:28:193:30 | \\s+ | This part of the regular expression may cause exponential backtracking on strings containing many repetitions of ' '. |
|
||||
| redos.py:193:28:193:30 | \\s+ | This part of the regular expression may cause exponential backtracking on strings containing many repetitions of '\\t'. |
|
||||
| redos.py:196:78:196:89 | [ a-zA-Z{}]+ | This part of the regular expression may cause exponential backtracking on strings starting with '{[A(A)A:' and containing many repetitions of ' A:'. |
|
||||
| redos.py:196:91:196:92 | ,? | This part of the regular expression may cause exponential backtracking on strings starting with '{[A(A)A: ' and containing many repetitions of ',A: '. |
|
||||
| redos.py:199:25:199:26 | a+ | This part of the regular expression may cause exponential backtracking on strings containing many repetitions of 'a'. |
|
||||
@@ -65,20 +65,20 @@
|
||||
| redos.py:256:37:256:39 | \\w* | This part of the regular expression may cause exponential backtracking on strings starting with 'foobarbaz' and containing many repetitions of 'foobarbazfoobarbazfoobarbazfoobarbazfoobarbazfoobarbaz'. |
|
||||
| redos.py:256:49:256:51 | \\w* | This part of the regular expression may cause exponential backtracking on strings starting with 'foobarbazfoobarbaz' and containing many repetitions of 'foobarbazfoobarbazfoobarbazfoobarbazfoobarbazfoobarbaz'. |
|
||||
| redos.py:256:61:256:63 | \\w* | This part of the regular expression may cause exponential backtracking on strings starting with 'foobarbazfoobarbazfoobarbaz' and containing many repetitions of 'foobarbazfoobarbazfoobarbazfoobarbazfoobarbazfoobarbaz'. |
|
||||
| redos.py:259:24:259:126 | (.thisisagoddamnlongstringforstresstestingthequery\|\\sthisisagoddamnlongstringforstresstestingthequery)* | This part of the regular expression may cause exponential backtracking on strings containing many repetitions of ' thisisagoddamnlongstringforstresstestingthequery'. |
|
||||
| redos.py:259:24:259:126 | (.thisisagoddamnlongstringforstresstestingthequery\|\\sthisisagoddamnlongstringforstresstestingthequery)* | This part of the regular expression may cause exponential backtracking on strings containing many repetitions of '\\tthisisagoddamnlongstringforstresstestingthequery'. |
|
||||
| redos.py:262:24:262:87 | (thisisagoddamnlongstringforstresstestingthequery\|this\\w+query)* | This part of the regular expression may cause exponential backtracking on strings containing many repetitions of 'thisisagoddamnlongstringforstresstestingthequery'. |
|
||||
| redos.py:262:78:262:80 | \\w+ | This part of the regular expression may cause exponential backtracking on strings starting with 'this' and containing many repetitions of 'aquerythis'. |
|
||||
| redos.py:262:78:262:80 | \\w+ | This part of the regular expression may cause exponential backtracking on strings starting with 'this' and containing many repetitions of '0querythis'. |
|
||||
| redos.py:268:28:268:39 | ([\ufffd\ufffd]\|[\ufffd\ufffd])* | This part of the regular expression may cause exponential backtracking on strings starting with 'foo' and containing many repetitions of '\ufffd'. |
|
||||
| redos.py:271:28:271:41 | ((\ufffd\|\ufffd)\|(\ufffd\|\ufffd))* | This part of the regular expression may cause exponential backtracking on strings starting with 'foo' and containing many repetitions of '\ufffd'. |
|
||||
| redos.py:274:31:274:32 | b+ | This part of the regular expression may cause exponential backtracking on strings containing many repetitions of 'b'. |
|
||||
| redos.py:277:48:277:50 | \\s* | This part of the regular expression may cause exponential backtracking on strings containing many repetitions of '"" a='. |
|
||||
| redos.py:277:48:277:50 | \\s* | This part of the regular expression may cause exponential backtracking on strings containing many repetitions of '""\\t0='. |
|
||||
| redos.py:283:26:283:27 | a+ | This part of the regular expression may cause exponential backtracking on strings containing many repetitions of 'a'. |
|
||||
| redos.py:286:26:286:27 | a+ | This part of the regular expression may cause exponential backtracking on strings containing many repetitions of 'a'. |
|
||||
| redos.py:292:26:292:27 | a+ | This part of the regular expression may cause exponential backtracking on strings containing many repetitions of 'a'. |
|
||||
| redos.py:295:35:295:36 | a+ | This part of the regular expression may cause exponential backtracking on strings containing many repetitions of 'a'. |
|
||||
| redos.py:301:100:301:101 | e+ | This part of the regular expression may cause exponential backtracking on strings starting with ';00000000000000' and containing many repetitions of 'e'. |
|
||||
| redos.py:304:28:304:29 | c+ | This part of the regular expression may cause exponential backtracking on strings starting with 'ab' and containing many repetitions of 'c'. |
|
||||
| redos.py:307:28:307:30 | \\s+ | This part of the regular expression may cause exponential backtracking on strings containing many repetitions of ' '. |
|
||||
| redos.py:307:28:307:30 | \\s+ | This part of the regular expression may cause exponential backtracking on strings containing many repetitions of '\\t'. |
|
||||
| redos.py:310:26:310:34 | ([^/]\|X)+ | This part of the regular expression may cause exponential backtracking on strings containing many repetitions of 'X'. |
|
||||
| redos.py:313:30:313:34 | [^Y]+ | This part of the regular expression may cause exponential backtracking on strings containing many repetitions of 'Xx'. |
|
||||
| redos.py:316:25:316:26 | a* | This part of the regular expression may cause exponential backtracking on strings containing many repetitions of 'a'. |
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
import python
|
||||
|
||||
abstract class XmlBytecodeExpr extends XMLElement { }
|
||||
abstract class XmlBytecodeExpr extends XmlElement { }
|
||||
|
||||
/** DEPRECATED: Alias for XmlBytecodeExpr */
|
||||
deprecated class XMLBytecodeExpr = XmlBytecodeExpr;
|
||||
|
||||
@@ -4,14 +4,14 @@ import semmle.python.objects.Callables
|
||||
import lib.BytecodeExpr
|
||||
|
||||
/** The XML data for a recorded call (includes all data). */
|
||||
class XmlRecordedCall extends XMLElement {
|
||||
class XmlRecordedCall extends XmlElement {
|
||||
XmlRecordedCall() { this.hasName("recorded_call") }
|
||||
|
||||
/** Gets the XML data for the call. */
|
||||
XmlCall getXmlCall() { result.getParent() = this }
|
||||
|
||||
/** DEPRECATED: Alias for getXmlCall */
|
||||
deprecated XMLCall getXMLCall() { result = getXmlCall() }
|
||||
deprecated XMLCall getXMLCall() { result = this.getXmlCall() }
|
||||
|
||||
/** Gets a call matching the recorded information. */
|
||||
Call getACall() { result = this.getXmlCall().getACall() }
|
||||
@@ -20,7 +20,7 @@ class XmlRecordedCall extends XMLElement {
|
||||
XmlCallee getXmlCallee() { result.getParent() = this }
|
||||
|
||||
/** DEPRECATED: Alias for getXmlCallee */
|
||||
deprecated XMLCallee getXMLCallee() { result = getXmlCallee() }
|
||||
deprecated XMLCallee getXMLCallee() { result = this.getXmlCallee() }
|
||||
|
||||
/** Gets a python function matching the recorded information of the callee. */
|
||||
Function getAPythonCallee() { result = this.getXmlCallee().(XmlPythonCallee).getACallee() }
|
||||
@@ -61,7 +61,7 @@ class XmlRecordedCall extends XMLElement {
|
||||
deprecated class XMLRecordedCall = XmlRecordedCall;
|
||||
|
||||
/** The XML data for the call part a recorded call. */
|
||||
class XmlCall extends XMLElement {
|
||||
class XmlCall extends XmlElement {
|
||||
XmlCall() { this.hasName("Call") }
|
||||
|
||||
string get_filename_data() { result = this.getAChild("filename").getTextValue() }
|
||||
@@ -90,10 +90,10 @@ class XmlCall extends XMLElement {
|
||||
expr.(Name).getId() = bytecode.(XmlBytecodeVariableName).get_name_data()
|
||||
or
|
||||
expr.(Attribute).getName() = bytecode.(XmlBytecodeAttribute).get_attr_name_data() and
|
||||
matchBytecodeExpr(expr.(Attribute).getObject(),
|
||||
this.matchBytecodeExpr(expr.(Attribute).getObject(),
|
||||
bytecode.(XmlBytecodeAttribute).get_object_data())
|
||||
or
|
||||
matchBytecodeExpr(expr.(Call).getFunc(), bytecode.(XmlBytecodeCall).get_function_data())
|
||||
this.matchBytecodeExpr(expr.(Call).getFunc(), bytecode.(XmlBytecodeCall).get_function_data())
|
||||
//
|
||||
// I considered allowing a partial match as well. That is, if the bytecode
|
||||
// expression information only tells us `<unknown>.foo()`, and we find an AST
|
||||
@@ -114,7 +114,7 @@ class XmlCall extends XMLElement {
|
||||
deprecated class XMLCall = XmlCall;
|
||||
|
||||
/** The XML data for the callee part a recorded call. */
|
||||
abstract class XmlCallee extends XMLElement { }
|
||||
abstract class XmlCallee extends XmlElement { }
|
||||
|
||||
/** DEPRECATED: Alias for XmlCallee */
|
||||
deprecated class XMLCallee = XmlCallee;
|
||||
|
||||
Reference in New Issue
Block a user