Merge branch 'main' of https://github.com/github/codeql into python-dataflow/flow-summaries-from-scratch

synced files have changed
This commit is contained in:
yoff
2022-08-25 09:24:05 +00:00
2460 changed files with 213219 additions and 79450 deletions

View File

@@ -1,3 +1,9 @@
## 0.5.3
### Minor Analysis Improvements
* Change `.getASubclass()` on `API::Node` so it allows to follow subclasses even if the class has a class decorator.
## 0.5.2
## 0.5.1

View File

@@ -0,0 +1,5 @@
---
category: deprecated
---
* The utility files previously in the `semmle.python.security.performance` package have been moved to the `semmle.python.security.regexp` package.
The previous files still exist as deprecated aliases.

View File

@@ -0,0 +1,6 @@
---
category: minorAnalysis
---
* Most deprecated predicates/classes/modules that have been deprecated for over a year have been
deleted.

View File

@@ -0,0 +1,5 @@
---
category: deprecated
---
* Many classes/predicates/modules with upper-case acronyms in their name have been renamed to follow our style-guide.
The old name still exists as a deprecated alias.

View File

@@ -1,4 +1,5 @@
---
category: minorAnalysis
---
## 0.5.3
### Minor Analysis Improvements
* Change `.getASubclass()` on `API::Node` so it allows to follow subclasses even if the class has a class decorator.

View File

@@ -1,2 +1,2 @@
---
lastReleaseVersion: 0.5.2
lastReleaseVersion: 0.5.3

View File

@@ -1,5 +1,5 @@
name: codeql/python-all
version: 0.5.3-dev
version: 0.5.4-dev
groups: python
dbscheme: semmlecode.python.dbscheme
extractor: python

View File

@@ -68,8 +68,6 @@ class ListComp extends ListComp_, Comp {
override Expr getIterable() { result = ListComp_.super.getIterable() }
override string toString() { result = ListComp_.super.toString() }
override Expr getElt() { result = Comp.super.getElt() }
}

View File

@@ -616,6 +616,9 @@ private string non_byte_prefix() {
not result.charAt(_) in ["b", "B"]
}
/** A string constant. This is a placeholder class -- use `StrConst` instead. */
class Str = StrConst;
/** A string constant. */
class StrConst extends Str_, ImmutableLiteral {
/* syntax: "hello" */

View File

@@ -21,7 +21,7 @@ class File extends Container, @file {
/** Whether this file is a source code file. */
predicate fromSource() {
/* If we start to analyse .pyc files, then this will have to change. */
/* If we start to analyze .pyc files, then this will have to change. */
any()
}

0
python/ql/lib/semmle/python/Flow.qll Executable file → Normal file
View File

View File

@@ -2,8 +2,6 @@ import python
class KeyValuePair extends KeyValuePair_, DictDisplayItem {
/* syntax: Expr : Expr */
override Location getLocation() { result = KeyValuePair_.super.getLocation() }
override string toString() { result = KeyValuePair_.super.toString() }
/** Gets the value of this dictionary unpacking. */
@@ -20,8 +18,6 @@ class KeyValuePair extends KeyValuePair_, DictDisplayItem {
/** A double-starred expression in a call or dict literal. */
class DictUnpacking extends DictUnpacking_, DictUnpackingOrKeyword, DictDisplayItem {
override Location getLocation() { result = DictUnpacking_.super.getLocation() }
override string toString() { result = DictUnpacking_.super.toString() }
/** Gets the value of this dictionary unpacking. */
@@ -47,8 +43,6 @@ abstract class DictDisplayItem extends DictItem {
/** A keyword argument in a call. For example `arg=expr` in `foo(0, arg=expr)` */
class Keyword extends Keyword_, DictUnpackingOrKeyword {
/* syntax: name = Expr */
override Location getLocation() { result = Keyword_.super.getLocation() }
override string toString() { result = Keyword_.super.toString() }
/** Gets the value of this keyword argument. */

0
python/ql/lib/semmle/python/Scope.qll Executable file → Normal file
View File

View File

View File

View File

@@ -73,9 +73,7 @@ abstract class AttrWrite extends AttrRef {
* ```
* Also gives access to the `value` being written, by extending `DefinitionNode`.
*/
private class AttributeAssignmentNode extends DefinitionNode, AttrNode {
override ControlFlowNode getValue() { result = DefinitionNode.super.getValue() }
}
private class AttributeAssignmentNode extends DefinitionNode, AttrNode { }
/** A simple attribute assignment: `object.attr = value`. */
private class AttributeAssignmentAsAttrWrite extends AttrWrite, CfgNode {

View File

@@ -3061,7 +3061,7 @@ private class PathNodeMid extends PathNodeImpl, TPathNodeMid {
else cc instanceof CallContextAny
) and
sc instanceof SummaryCtxNone and
ap instanceof AccessPathNil
ap = TAccessPathNil(node.getDataFlowType())
}
predicate isAtSink() {

View File

@@ -3061,7 +3061,7 @@ private class PathNodeMid extends PathNodeImpl, TPathNodeMid {
else cc instanceof CallContextAny
) and
sc instanceof SummaryCtxNone and
ap instanceof AccessPathNil
ap = TAccessPathNil(node.getDataFlowType())
}
predicate isAtSink() {

View File

@@ -3061,7 +3061,7 @@ private class PathNodeMid extends PathNodeImpl, TPathNodeMid {
else cc instanceof CallContextAny
) and
sc instanceof SummaryCtxNone and
ap instanceof AccessPathNil
ap = TAccessPathNil(node.getDataFlowType())
}
predicate isAtSink() {

View File

@@ -3061,7 +3061,7 @@ private class PathNodeMid extends PathNodeImpl, TPathNodeMid {
else cc instanceof CallContextAny
) and
sc instanceof SummaryCtxNone and
ap instanceof AccessPathNil
ap = TAccessPathNil(node.getDataFlowType())
}
predicate isAtSink() {

View File

@@ -78,7 +78,7 @@ deprecated Node importNode(string name) {
// ```
//
// Where `foo_module_tracker` is a type tracker that tracks references to the `foo` module.
// Because named imports are modelled as `AttrRead`s, the statement `from foo import bar as baz`
// Because named imports are modeled as `AttrRead`s, the statement `from foo import bar as baz`
// is interpreted as if it was an assignment `baz = foo.bar`, which means `baz` gets tracked as a
// reference to `foo.bar`, as desired.
exists(ImportExpr imp_expr |

View File

View File

@@ -212,8 +212,8 @@ class EssaEdgeRefinement extends EssaDefinition, TEssaEdgeDefinition {
/** Gets the SSA variable to which this refinement applies. */
EssaVariable getInput() {
exists(SsaSourceVariable var, EssaDefinition def |
var = this.getSourceVariable() and
var = def.getSourceVariable() and
pragma[only_bind_into](var) = this.getSourceVariable() and
pragma[only_bind_into](var) = def.getSourceVariable() and
def.reachesEndOfBlock(this.getPredecessor()) and
result.getDefinition() = def
)
@@ -632,7 +632,8 @@ class DeletionDefinition extends EssaNodeDefinition {
*/
class ScopeEntryDefinition extends EssaNodeDefinition {
ScopeEntryDefinition() {
this.getDefiningNode() = this.getSourceVariable().getScopeEntryDefinition() and
this.getDefiningNode() =
pragma[only_bind_out](this.getSourceVariable()).getScopeEntryDefinition() and
not this instanceof ImplicitSubModuleDefinition
}

View File

@@ -10,13 +10,6 @@ private import semmle.python.Concepts
private import semmle.python.ApiGraphs
import semmle.python.frameworks.internal.PEP249Impl
/**
* A module implementing PEP 249. Extend this class for implementations.
*
* DEPRECATED: Extend `PEP249::PEP249ModuleApiNode` instead.
*/
abstract deprecated class PEP249Module extends DataFlow::Node { }
/**
* DEPRECATED: Use `PEP249::PEP249ModuleApiNode` instead.
*/

View File

@@ -1854,16 +1854,22 @@ private module StdlibPrivate {
deprecated API::Node cgiHTTPServer() { result = cgiHttpServer() }
/** Provides models for the `CGIHTTPServer` module. */
module CGIHTTPServer {
module CgiHttpServer {
/**
* Provides models for the `CGIHTTPServer.CGIHTTPRequestHandler` class (Python 2 only).
*/
module CGIHTTPRequestHandler {
/** Gets a reference to the `CGIHTTPServer.CGIHTTPRequestHandler` class. */
module CgiHttpRequestHandler {
/** Gets a reference to the `CGIHTTPServer.CgiHttpRequestHandler` class. */
API::Node classRef() { result = cgiHttpServer().getMember("CGIHTTPRequestHandler") }
}
/** DEPRECATED: Alias for CgiHttpRequestHandler */
deprecated module CGIHTTPRequestHandler = CgiHttpRequestHandler;
}
/** DEPRECATED: Alias for CgiHttpServer */
deprecated module CGIHTTPServer = CgiHttpServer;
// ---------------------------------------------------------------------------
// http (Python 3 only)
// ---------------------------------------------------------------------------
@@ -1911,10 +1917,13 @@ private module StdlibPrivate {
*
* See https://docs.python.org/3.9/library/http.server.html#http.server.CGIHTTPRequestHandler.
*/
module CGIHTTPRequestHandler {
module CgiHttpRequestHandler {
/** Gets a reference to the `http.server.CGIHTTPRequestHandler` class. */
API::Node classRef() { result = server().getMember("CGIHTTPRequestHandler") }
}
/** DEPRECATED: Alias for CgiHttpRequestHandler */
deprecated module CGIHTTPRequestHandler = CgiHttpRequestHandler;
}
}
@@ -1933,11 +1942,11 @@ private module StdlibPrivate {
// Python 2
BaseHttpServer::BaseHttpRequestHandler::classRef(),
SimpleHttpServer::SimpleHttpRequestHandler::classRef(),
CGIHTTPServer::CGIHTTPRequestHandler::classRef(),
CgiHttpServer::CgiHttpRequestHandler::classRef(),
// Python 3
Http::Server::BaseHttpRequestHandler::classRef(),
Http::Server::SimpleHttpRequestHandler::classRef(),
Http::Server::CGIHTTPRequestHandler::classRef()
Http::Server::CgiHttpRequestHandler::classRef()
].getASubclass*()
}

View File

@@ -259,11 +259,6 @@ private module WerkzeugOld {
* See https://werkzeug.palletsprojects.com/en/1.0.x/datastructures/#werkzeug.datastructures.MultiDict.
*/
deprecated module MultiDict {
/**
* DEPRECATED. Use `Werkzeug::MultiDict::InstanceSource` instead.
*/
abstract deprecated class InstanceSource extends DataFlow::Node { }
/**
* DEPRECATED. Use `Werkzeug::MultiDict::InstanceSource` instead.
*
@@ -312,11 +307,6 @@ private module WerkzeugOld {
* See https://werkzeug.palletsprojects.com/en/1.0.x/datastructures/#werkzeug.datastructures.FileStorage.
*/
deprecated module FileStorage {
/**
* DEPRECATED. Use `Werkzeug::FileStorage::InstanceSource` instead.
*/
abstract deprecated class InstanceSource extends DataFlow::Node { }
/**
* DEPRECATED. Use `Werkzeug::FileStorage::InstanceSource` instead.
*

View File

@@ -42,7 +42,7 @@ private module NotExposed {
// Implementation below
// ---------------------------------------------------------------------------
//
// We are looking to find all subclassed of the already modelled classes, and ideally
// We are looking to find all subclassed of the already modeled classes, and ideally
// we would identify an `API::Node` for each (then `toString` would give the API
// path).
//

View File

@@ -273,6 +273,41 @@ predicate builtin_name_points_to(string name, Object value, ClassObject cls) {
value = Object::builtin(name) and cls.asBuiltin() = value.asBuiltin().getClass()
}
pragma[nomagic]
private predicate essa_var_scope(SsaSourceVariable var, Scope pred_scope, EssaVariable pred_var) {
BaseFlow::reaches_exit(pred_var) and
pred_var.getScope() = pred_scope and
var = pred_var.getSourceVariable()
}
pragma[nomagic]
private predicate scope_entry_def_scope(
SsaSourceVariable var, Scope succ_scope, ScopeEntryDefinition succ_def
) {
var = succ_def.getSourceVariable() and
succ_def.getScope() = succ_scope
}
pragma[nomagic]
private predicate step_through_init(Scope succ_scope, Scope pred_scope, Scope init) {
init.getName() = "__init__" and
init.precedes(succ_scope) and
pred_scope.precedes(init)
}
pragma[nomagic]
private predicate scope_entry_value_transfer_through_init(
EssaVariable pred_var, Scope pred_scope, ScopeEntryDefinition succ_def, Scope succ_scope
) {
exists(SsaSourceVariable var, Scope init |
var instanceof GlobalVariable and
essa_var_scope(var, pragma[only_bind_into](pred_scope), pred_var) and
scope_entry_def_scope(var, succ_scope, succ_def) and
step_through_init(succ_scope, pragma[only_bind_into](pred_scope), init) and
not var.(Variable).getAStore().getScope() = init
)
}
module BaseFlow {
predicate reaches_exit(EssaVariable var) { var.getAUse() = var.getScope().getANormalExit() }
@@ -283,27 +318,15 @@ module BaseFlow {
) {
Stages::DataFlow::ref() and
exists(SsaSourceVariable var |
reaches_exit(pred_var) and
pred_var.getScope() = pred_scope and
var = pred_var.getSourceVariable() and
var = succ_def.getSourceVariable() and
succ_def.getScope() = succ_scope
essa_var_scope(var, pred_scope, pred_var) and
scope_entry_def_scope(var, succ_scope, succ_def)
|
pred_scope.precedes(succ_scope)
or
/*
* If an `__init__` method does not modify the global variable, then
* we can skip it and take the value directly from the module.
*/
exists(Scope init |
init.getName() = "__init__" and
init.precedes(succ_scope) and
pred_scope.precedes(init) and
not var.(Variable).getAStore().getScope() = init and
var instanceof GlobalVariable
)
)
or
// If an `__init__` method does not modify the global variable, then
// we can skip it and take the value directly from the module.
scope_entry_value_transfer_through_init(pred_var, pred_scope, succ_def, succ_scope)
}
}

View File

@@ -1271,12 +1271,21 @@ module InterProceduralPointsTo {
)
)
or
non_escaping_global_transfer(pred_var, pred_context, succ_def, succ_context)
}
pragma[nomagic]
private predicate non_escaping_global_transfer(
EssaVariable pred_var, PointsToContext pred_context, ScopeEntryDefinition succ_def,
PointsToContext succ_context
) {
exists(NonEscapingGlobalVariable var |
var = pred_var.getSourceVariable() and
var = succ_def.getSourceVariable() and
pred_var.getAUse() = succ_context.getRootCall() and
pred_context.isImport() and
succ_context.appliesToScope(succ_def.getScope())
pragma[only_bind_into](succ_context)
.appliesToScope(pragma[only_bind_into](succ_def).getScope())
)
}

View File

@@ -1,5 +1,4 @@
import python
deprecated import semmle.python.objects.ObjectInternal as OI
private import semmle.python.ApiGraphs
// Need to import since frameworks can extend the abstract `RegexString`
private import semmle.python.Frameworks
@@ -98,19 +97,6 @@ private DataFlow::Node re_flag_tracker(string flag_name) {
/** Gets a regular expression mode flag associated with the given data flow node. */
string mode_from_node(DataFlow::Node node) { node = re_flag_tracker(result) }
/**
* DEPRECATED 2021-02-24 -- use `mode_from_node` instead.
*
* Gets a regular expression mode flag associated with the given value.
*/
deprecated string mode_from_mode_object(Value obj) {
result in ["DEBUG", "IGNORECASE", "LOCALE", "MULTILINE", "DOTALL", "UNICODE", "VERBOSE"] and
exists(int flag |
flag = Value::named("sre_constants.SRE_FLAG_" + result).(OI::ObjectInternal).intValue() and
obj.(OI::ObjectInternal).intValue().bitAnd(flag) = flag
)
}
/** A StrConst used as a regular expression */
abstract class RegexString extends Expr {
RegexString() { (this instanceof Bytes or this instanceof Unicode) }

View File

@@ -2,196 +2,27 @@
* Provides precicates for reasoning about bad tag filter vulnerabilities.
*/
import performance.ReDoSUtil
import regexp.RegexpMatching
/**
* A module for determining if a regexp matches a given string,
* and reasoning about which capture groups are filled by a given string.
* Holds if the regexp `root` should be tested against `str`.
* Implements the `isRegexpMatchingCandidateSig` signature from `RegexpMatching`.
* `ignorePrefix` toggles whether the regular expression should be treated as accepting any prefix if it's unanchored.
* `testWithGroups` toggles whether it's tested which groups are filled by a given input string.
*/
private module RegexpMatching {
/**
* A class to test whether a regular expression matches a string.
* Override this class and extend `test`/`testWithGroups` to configure which strings should be tested for acceptance by this regular expression.
* The result can afterwards be read from the `matches` predicate.
*
* Strings in the `testWithGroups` predicate are also tested for which capture groups are filled by the given string.
* The result is available in the `fillCaptureGroup` predicate.
*/
abstract class MatchedRegExp extends RegExpTerm {
MatchedRegExp() { this.isRootTerm() }
/**
* Holds if it should be tested whether this regular expression matches `str`.
*
* If `ignorePrefix` is true, then a regexp without a start anchor will be treated as if it had a start anchor.
* E.g. a regular expression `/foo$/` will match any string that ends with "foo",
* but if `ignorePrefix` is true, it will only match "foo".
*/
predicate test(string str, boolean ignorePrefix) {
none() // maybe overridden in subclasses
}
/**
* Same as `test(..)`, but where the `fillsCaptureGroup` afterwards tells which capture groups were filled by the given string.
*/
predicate testWithGroups(string str, boolean ignorePrefix) {
none() // maybe overridden in subclasses
}
/**
* Holds if this RegExp matches `str`, where `str` is either in the `test` or `testWithGroups` predicate.
*/
final predicate matches(string str) {
exists(State state | state = getAState(this, str.length() - 1, str, _) |
epsilonSucc*(state) = Accept(_)
)
}
/**
* Holds if matching `str` may fill capture group number `g`.
* Only holds if `str` is in the `testWithGroups` predicate.
*/
final predicate fillsCaptureGroup(string str, int g) {
exists(State s |
s = getAStateThatReachesAccept(this, _, str, _) and
g = group(s.getRepr())
)
}
}
/**
* Gets a state the regular expression `reg` can be in after matching the `i`th char in `str`.
* The regular expression is modeled as a non-determistic finite automaton,
* the regular expression can therefore be in multiple states after matching a character.
*
* It's a forward search to all possible states, and there is thus no guarantee that the state is on a path to an accepting state.
*/
private State getAState(MatchedRegExp reg, int i, string str, boolean ignorePrefix) {
// start state, the -1 position before any chars have been matched
i = -1 and
(
reg.test(str, ignorePrefix)
or
reg.testWithGroups(str, ignorePrefix)
) and
result.getRepr().getRootTerm() = reg and
isStartState(result)
private predicate isBadTagFilterCandidate(
RootTerm root, string str, boolean ignorePrefix, boolean testWithGroups
) {
// the regexp must mention "<" and ">" explicitly.
forall(string angleBracket | angleBracket = ["<", ">"] |
any(RegExpConstant term | term.getValue().matches("%" + angleBracket + "%")).getRootTerm() =
root
) and
ignorePrefix = true and
(
str = ["<!-- foo -->", "<!-- foo --!>", "<!- foo ->", "<foo>", "<script>"] and
testWithGroups = true
or
// recursive case
result = getAStateAfterMatching(reg, _, str, i, _, ignorePrefix)
}
/**
* Gets the next state after the `prev` state from `reg`.
* `prev` is the state after matching `fromIndex` chars in `str`,
* and the result is the state after matching `toIndex` chars in `str`.
*
* This predicate is used as a step relation in the forwards search (`getAState`),
* and also as a step relation in the later backwards search (`getAStateThatReachesAccept`).
*/
private State getAStateAfterMatching(
MatchedRegExp reg, State prev, string str, int toIndex, int fromIndex, boolean ignorePrefix
) {
// the basic recursive case - outlined into a noopt helper to make performance work out.
result = getAStateAfterMatchingAux(reg, prev, str, toIndex, fromIndex, ignorePrefix)
or
// we can skip past word boundaries if the next char is a non-word char.
fromIndex = toIndex and
prev.getRepr() instanceof RegExpWordBoundary and
prev = getAState(reg, toIndex, str, ignorePrefix) and
after(prev.getRepr()) = result and
str.charAt(toIndex + 1).regexpMatch("\\W") // \W matches any non-word char.
}
pragma[noopt]
private State getAStateAfterMatchingAux(
MatchedRegExp reg, State prev, string str, int toIndex, int fromIndex, boolean ignorePrefix
) {
prev = getAState(reg, fromIndex, str, ignorePrefix) and
fromIndex = toIndex - 1 and
exists(string char | char = str.charAt(toIndex) | specializedDeltaClosed(prev, char, result)) and
not discardedPrefixStep(prev, result, ignorePrefix)
}
/** Holds if a step from `prev` to `next` should be discarded when the `ignorePrefix` flag is set. */
private predicate discardedPrefixStep(State prev, State next, boolean ignorePrefix) {
prev = mkMatch(any(RegExpRoot r)) and
ignorePrefix = true and
next = prev
}
// The `deltaClosed` relation specialized to the chars that exists in strings tested by a `MatchedRegExp`.
private predicate specializedDeltaClosed(State prev, string char, State next) {
deltaClosed(prev, specializedGetAnInputSymbolMatching(char), next)
}
// The `getAnInputSymbolMatching` relation specialized to the chars that exists in strings tested by a `MatchedRegExp`.
pragma[noinline]
private InputSymbol specializedGetAnInputSymbolMatching(string char) {
exists(string s, MatchedRegExp r |
r.test(s, _)
or
r.testWithGroups(s, _)
|
char = s.charAt(_)
) and
result = getAnInputSymbolMatching(char)
}
/**
* Gets the `i`th state on a path to the accepting state when `reg` matches `str`.
* Starts with an accepting state as found by `getAState` and searches backwards
* to the start state through the reachable states (as found by `getAState`).
*
* This predicate holds the invariant that the result state can be reached with `i` steps from a start state,
* and an accepting state can be found after (`str.length() - 1 - i`) steps from the result.
* The result state is therefore always on a valid path where `reg` accepts `str`.
*
* This predicate is only used to find which capture groups a regular expression has filled,
* and thus the search is only performed for the strings in the `testWithGroups(..)` predicate.
*/
private State getAStateThatReachesAccept(
MatchedRegExp reg, int i, string str, boolean ignorePrefix
) {
// base case, reaches an accepting state from the last state in `getAState(..)`
reg.testWithGroups(str, ignorePrefix) and
i = str.length() - 1 and
result = getAState(reg, i, str, ignorePrefix) and
epsilonSucc*(result) = Accept(_)
or
// recursive case. `next` is the next state to be matched after matching `prev`.
// this predicate is doing a backwards search, so `prev` is the result we are looking for.
exists(State next, State prev, int fromIndex, int toIndex |
next = getAStateThatReachesAccept(reg, toIndex, str, ignorePrefix) and
next = getAStateAfterMatching(reg, prev, str, toIndex, fromIndex, ignorePrefix) and
i = fromIndex and
result = prev
)
}
/** Gets the capture group number that `term` belongs to. */
private int group(RegExpTerm term) {
exists(RegExpGroup grp | grp.getNumber() = result | term.getParent*() = grp)
}
}
/** A class to test whether a regular expression matches certain HTML tags. */
class HtmlMatchingRegExp extends RegexpMatching::MatchedRegExp {
HtmlMatchingRegExp() {
// the regexp must mention "<" and ">" explicitly.
forall(string angleBracket | angleBracket = ["<", ">"] |
any(RegExpConstant term | term.getValue().matches("%" + angleBracket + "%")).getRootTerm() =
this
)
}
override predicate testWithGroups(string str, boolean ignorePrefix) {
ignorePrefix = true and
str = ["<!-- foo -->", "<!-- foo --!>", "<!- foo ->", "<foo>", "<script>"]
}
override predicate test(string str, boolean ignorePrefix) {
ignorePrefix = true and
str =
[
"<!-- foo -->", "<!- foo ->", "<!-- foo --!>", "<!-- foo\n -->", "<script>foo</script>",
@@ -200,7 +31,23 @@ class HtmlMatchingRegExp extends RegexpMatching::MatchedRegExp {
"<script src='foo'></script>", "<SCRIPT>foo</SCRIPT>", "<script\tsrc=\"foo\"/>",
"<script\tsrc='foo'></script>", "<sCrIpT>foo</ScRiPt>", "<script src=\"foo\">foo</script >",
"<script src=\"foo\">foo</script foo=\"bar\">", "<script src=\"foo\">foo</script\t\n bar>"
]
] and
testWithGroups = false
)
}
/**
* A regexp that matches some string from the `isBadTagFilterCandidate` predicate.
*/
class HtmlMatchingRegExp extends RootTerm {
HtmlMatchingRegExp() { RegexpMatching<isBadTagFilterCandidate/4>::matches(this, _) }
/** Holds if this regexp matched `str`, where `str` is one of the string from `isBadTagFilterCandidate`. */
predicate matches(string str) { RegexpMatching<isBadTagFilterCandidate/4>::matches(this, str) }
/** Holds if this regexp fills capture group `g' when matching `str', where `str` is one of the string from `isBadTagFilterCandidate`. */
predicate fillsCaptureGroup(string str, int g) {
RegexpMatching<isBadTagFilterCandidate/4>::fillsCaptureGroup(this, str, g)
}
}

View File

@@ -0,0 +1,281 @@
/**
* Classes and predicates for working with suspicious character ranges.
*/
// We don't need the NFA utils, just the regexp tree.
// but the below is a nice shared library that exposes the API we need.
import regexp.NfaUtils
/**
* Gets a rank for `range` that is unique for ranges in the same file.
* Prioritizes ranges that match more characters.
*/
int rankRange(RegExpCharacterRange range) {
range =
rank[result](RegExpCharacterRange r, Location l, int low, int high |
r.getLocation() = l and
isRange(r, low, high)
|
r order by (high - low) desc, l.getStartLine(), l.getStartColumn()
)
}
/** Holds if `range` spans from the unicode code points `low` to `high` (both inclusive). */
predicate isRange(RegExpCharacterRange range, int low, int high) {
exists(string lowc, string highc |
range.isRange(lowc, highc) and
low.toUnicode() = lowc and
high.toUnicode() = highc
)
}
/** Holds if `char` is an alpha-numeric character. */
predicate isAlphanumeric(string char) {
// written like this to avoid having a bindingset for the predicate
char = [[48 .. 57], [65 .. 90], [97 .. 122]].toUnicode() // 0-9, A-Z, a-z
}
/**
* Holds if the given ranges are from the same character class
* and there exists at least one character matched by both ranges.
*/
predicate overlap(RegExpCharacterRange a, RegExpCharacterRange b) {
exists(RegExpCharacterClass clz |
a = clz.getAChild() and
b = clz.getAChild() and
a != b
|
exists(int alow, int ahigh, int blow, int bhigh |
isRange(a, alow, ahigh) and
isRange(b, blow, bhigh) and
alow <= bhigh and
blow <= ahigh
)
)
}
/**
* Holds if `range` overlaps with the char class `escape` from the same character class.
*/
predicate overlapsWithCharEscape(RegExpCharacterRange range, RegExpCharacterClassEscape escape) {
exists(RegExpCharacterClass clz, string low, string high |
range = clz.getAChild() and
escape = clz.getAChild() and
range.isRange(low, high)
|
escape.getValue() = "w" and
getInRange(low, high).regexpMatch("\\w")
or
escape.getValue() = "d" and
getInRange(low, high).regexpMatch("\\d")
or
escape.getValue() = "s" and
getInRange(low, high).regexpMatch("\\s")
)
}
/** Gets the unicode code point for a `char`. */
bindingset[char]
int toCodePoint(string char) { result.toUnicode() = char }
/** A character range that appears to be overly wide. */
class OverlyWideRange extends RegExpCharacterRange {
OverlyWideRange() {
exists(int low, int high, int numChars |
isRange(this, low, high) and
numChars = (1 + high - low) and
this.getRootTerm().isUsedAsRegExp() and
numChars >= 10
|
// across the Z-a range (which includes backticks)
toCodePoint("Z") >= low and
toCodePoint("a") <= high
or
// across the 9-A range (which includes e.g. ; and ?)
toCodePoint("9") >= low and
toCodePoint("A") <= high
or
// a non-alphanumeric char as part of the range boundaries
exists(int bound | bound = [low, high] | not isAlphanumeric(bound.toUnicode()))
) and
// allowlist for known ranges
not this = allowedWideRanges()
}
/** Gets a string representation of a character class that matches the same chars as this range. */
string printEquivalent() { result = RangePrinter::printEquivalentCharClass(this) }
}
/** Gets a range that should not be reported as an overly wide range. */
RegExpCharacterRange allowedWideRanges() {
// ~ is the last printable ASCII character, it's used right in various wide ranges.
result.isRange(_, "~")
or
// the same with " " and "!". " " is the first printable character, and "!" is the first non-white-space printable character.
result.isRange([" ", "!"], _)
or
// the `[@-_]` range is intentional
result.isRange("@", "_")
or
// starting from the zero byte is a good indication that it's purposely matching a large range.
result.isRange(0.toUnicode(), _)
}
/** Gets a char between (and including) `low` and `high`. */
bindingset[low, high]
private string getInRange(string low, string high) {
result = [toCodePoint(low) .. toCodePoint(high)].toUnicode()
}
/** A module computing an equivalent character class for an overly wide range. */
module RangePrinter {
bindingset[char]
bindingset[result]
private string next(string char) {
exists(int prev, int next |
prev.toUnicode() = char and
next.toUnicode() = result and
next = prev + 1
)
}
/** Gets the points where the parts of the pretty printed range should be cut off. */
private string cutoffs() { result = ["A", "Z", "a", "z", "0", "9"] }
/** Gets the char to use in the low end of a range for a given `cut` */
private string lowCut(string cut) {
cut = ["A", "a", "0"] and
result = cut
or
cut = ["Z", "z", "9"] and
result = next(cut)
}
/** Gets the char to use in the high end of a range for a given `cut` */
private string highCut(string cut) {
cut = ["Z", "z", "9"] and
result = cut
or
cut = ["A", "a", "0"] and
next(result) = cut
}
/** Gets the cutoff char used for a given `part` of a range when pretty-printing it. */
private string cutoff(OverlyWideRange range, int part) {
exists(int low, int high | isRange(range, low, high) |
result =
rank[part + 1](string cut |
cut = cutoffs() and low < toCodePoint(cut) and toCodePoint(cut) < high
|
cut order by toCodePoint(cut)
)
)
}
/** Gets the number of parts we should print for a given `range`. */
private int parts(OverlyWideRange range) { result = 1 + strictcount(cutoff(range, _)) }
/** Holds if the given part of a range should span from `low` to `high`. */
private predicate part(OverlyWideRange range, int part, string low, string high) {
// first part.
part = 0 and
(
range.isRange(low, high) and
parts(range) = 1
or
parts(range) >= 2 and
range.isRange(low, _) and
high = highCut(cutoff(range, part))
)
or
// middle
part >= 1 and
part < parts(range) - 1 and
low = lowCut(cutoff(range, part - 1)) and
high = highCut(cutoff(range, part))
or
// last.
part = parts(range) - 1 and
low = lowCut(cutoff(range, part - 1)) and
range.isRange(_, high)
}
/** Gets an escaped `char` for use in a character class. */
bindingset[char]
private string escape(string char) {
exists(string reg | reg = "(\\[|\\]|\\\\|-|/)" |
if char.regexpMatch(reg) then result = "\\" + char else result = char
)
}
/** Gets a part of the equivalent range. */
private string printEquivalentCharClass(OverlyWideRange range, int part) {
exists(string low, string high | part(range, part, low, high) |
if
isAlphanumeric(low) and
isAlphanumeric(high)
then result = low + "-" + high
else
result =
strictconcat(string char | char = getInRange(low, high) | escape(char) order by char)
)
}
/** Gets the entire pretty printed equivalent range. */
string printEquivalentCharClass(OverlyWideRange range) {
result =
strictconcat(string r, int part |
r = "[" and part = -1 and exists(range)
or
r = printEquivalentCharClass(range, part)
or
r = "]" and part = parts(range)
|
r order by part
)
}
}
/** Gets a char range that is overly large because of `reason`. */
RegExpCharacterRange getABadRange(string reason, int priority) {
priority = 0 and
reason = "is equivalent to " + result.(OverlyWideRange).printEquivalent()
or
priority = 1 and
exists(RegExpCharacterRange other |
reason = "overlaps with " + other + " in the same character class" and
rankRange(result) < rankRange(other) and
overlap(result, other)
)
or
priority = 2 and
exists(RegExpCharacterClassEscape escape |
reason = "overlaps with " + escape + " in the same character class" and
overlapsWithCharEscape(result, escape)
)
or
reason = "is empty" and
priority = 3 and
exists(int low, int high |
isRange(result, low, high) and
low > high
)
}
/** Holds if `range` matches suspiciously many characters. */
predicate problem(RegExpCharacterRange range, string reason) {
reason =
strictconcat(string m, int priority |
range = getABadRange(m, priority)
|
m, ", and " order by priority desc
) and
// specifying a range using an escape is usually OK.
not range.getAChild() instanceof RegExpEscape and
// Unicode escapes in strings are interpreted before it turns into a regexp,
// so e.g. [\u0001-\uFFFF] will just turn up as a range between two constants.
// We therefore exclude these ranges.
range.getRootTerm().getParent() instanceof RegExpLiteral and
// is used as regexp (mostly for JS where regular expressions are parsed eagerly)
range.getRootTerm().isUsedAsRegExp()
}

0
python/ql/lib/semmle/python/security/TaintTracking.qll Executable file → Normal file
View File

View File

@@ -106,16 +106,6 @@ module HeuristicNames {
"(?is).*([^\\w$.-]|redact|censor|obfuscate|hash|md5|sha|random|((?<!un)(en))?(crypt|code)|certain|concert|secretar|accountant|accountab).*"
}
/**
* DEPRECATED: Use `maybeSensitiveRegexp` instead.
*/
deprecated predicate maybeSensitive = maybeSensitiveRegexp/1;
/**
* DEPRECATED: Use `notSensitiveRegexp` instead.
*/
deprecated predicate notSensitive = notSensitiveRegexp/0;
/**
* Holds if `name` may indicate the presence of sensitive data, and
* `name` does not indicate that the data is in fact non-sensitive (for example since

View File

@@ -1,374 +1,4 @@
/**
* This library implements the analysis described in the following two papers:
*
* James Kirrage, Asiri Rathnayake, Hayo Thielecke: Static Analysis for
* Regular Expression Denial-of-Service Attacks. NSS 2013.
* (http://www.cs.bham.ac.uk/~hxt/research/reg-exp-sec.pdf)
* Asiri Rathnayake, Hayo Thielecke: Static Analysis for Regular Expression
* Exponential Runtime via Substructural Logics. 2014.
* (https://www.cs.bham.ac.uk/~hxt/research/redos_full.pdf)
*
* The basic idea is to search for overlapping cycles in the NFA, that is,
* states `q` such that there are two distinct paths from `q` to itself
* that consume the same word `w`.
*
* For any such state `q`, an attack string can be constructed as follows:
* concatenate a prefix `v` that takes the NFA to `q` with `n` copies of
* the word `w` that leads back to `q` along two different paths, followed
* by a suffix `x` that is _not_ accepted in state `q`. A backtracking
* implementation will need to explore at least 2^n different ways of going
* from `q` back to itself while trying to match the `n` copies of `w`
* before finally giving up.
*
* Now in order to identify overlapping cycles, all we have to do is find
* pumpable forks, that is, states `q` that can transition to two different
* states `r1` and `r2` on the same input symbol `c`, such that there are
* paths from both `r1` and `r2` to `q` that consume the same word. The latter
* condition is equivalent to saying that `(q, q)` is reachable from `(r1, r2)`
* in the product NFA.
*
* This is what the library does. It makes a simple attempt to construct a
* prefix `v` leading into `q`, but only to improve the alert message.
* And the library tries to prove the existence of a suffix that ensures
* rejection. This check might fail, which can cause false positives.
*
* Finally, sometimes it depends on the translation whether the NFA generated
* for a regular expression has a pumpable fork or not. We implement one
* particular translation, which may result in false positives or negatives
* relative to some particular JavaScript engine.
*
* More precisely, the library constructs an NFA from a regular expression `r`
* as follows:
*
* * Every sub-term `t` gives rise to an NFA state `Match(t,i)`, representing
* the state of the automaton before attempting to match the `i`th character in `t`.
* * There is one accepting state `Accept(r)`.
* * There is a special `AcceptAnySuffix(r)` state, which accepts any suffix string
* by using an epsilon transition to `Accept(r)` and an any transition to itself.
* * Transitions between states may be labelled with epsilon, or an abstract
* input symbol.
* * Each abstract input symbol represents a set of concrete input characters:
* either a single character, a set of characters represented by a
* character class, or the set of all characters.
* * The product automaton is constructed lazily, starting with pair states
* `(q, q)` where `q` is a fork, and proceeding along an over-approximate
* step relation.
* * The over-approximate step relation allows transitions along pairs of
* abstract input symbols where the symbols have overlap in the characters they accept.
* * Once a trace of pairs of abstract input symbols that leads from a fork
* back to itself has been identified, we attempt to construct a concrete
* string corresponding to it, which may fail.
* * Lastly we ensure that any state reached by repeating `n` copies of `w` has
* a suffix `x` (possible empty) that is most likely __not__ accepted.
*/
/** DEPRECATED. Import `semmle.python.security.regexp.ExponentialBackTracking` instead. */
import ReDoSUtil
/**
* Holds if state `s` might be inside a backtracking repetition.
*/
pragma[noinline]
private predicate stateInsideBacktracking(State s) {
s.getRepr().getParent*() instanceof MaybeBacktrackingRepetition
}
/**
* A infinitely repeating quantifier that might backtrack.
*/
private class MaybeBacktrackingRepetition extends InfiniteRepetitionQuantifier {
MaybeBacktrackingRepetition() {
exists(RegExpTerm child |
child instanceof RegExpAlt or
child instanceof RegExpQuantifier
|
child.getParent+() = this
)
}
}
/**
* A state in the product automaton.
*/
private newtype TStatePair =
/**
* We lazily only construct those states that we are actually
* going to need: `(q, q)` for every fork state `q`, and any
* pair of states that can be reached from a pair that we have
* already constructed. To cut down on the number of states,
* we only represent states `(q1, q2)` where `q1` is lexicographically
* no bigger than `q2`.
*
* States are only constructed if both states in the pair are
* inside a repetition that might backtrack.
*/
MkStatePair(State q1, State q2) {
isFork(q1, _, _, _, _) and q2 = q1
or
(step(_, _, _, q1, q2) or step(_, _, _, q2, q1)) and
rankState(q1) <= rankState(q2)
}
/**
* Gets a unique number for a `state`.
* Is used to create an ordering of states, where states with the same `toString()` will be ordered differently.
*/
private int rankState(State state) {
state =
rank[result](State s, Location l |
l = s.getRepr().getLocation()
|
s order by l.getStartLine(), l.getStartColumn(), s.toString()
)
}
/**
* A state in the product automaton.
*/
private class StatePair extends TStatePair {
State q1;
State q2;
StatePair() { this = MkStatePair(q1, q2) }
/** Gets a textual representation of this element. */
string toString() { result = "(" + q1 + ", " + q2 + ")" }
/** Gets the first component of the state pair. */
State getLeft() { result = q1 }
/** Gets the second component of the state pair. */
State getRight() { result = q2 }
}
/**
* Holds for `(fork, fork)` state pairs when `isFork(fork, _, _, _, _)` holds.
*
* Used in `statePairDistToFork`
*/
private predicate isStatePairFork(StatePair p) {
exists(State fork | p = MkStatePair(fork, fork) and isFork(fork, _, _, _, _))
}
/**
* Holds if there are transitions from the components of `q` to the corresponding
* components of `r`.
*
* Used in `statePairDistToFork`
*/
private predicate reverseStep(StatePair r, StatePair q) { step(q, _, _, r) }
/**
* Gets the minimum length of a path from `q` to `r` in the
* product automaton.
*/
private int statePairDistToFork(StatePair q, StatePair r) =
shortestDistances(isStatePairFork/1, reverseStep/2)(r, q, result)
/**
* Holds if there are transitions from `q` to `r1` and from `q` to `r2`
* labelled with `s1` and `s2`, respectively, where `s1` and `s2` do not
* trivially have an empty intersection.
*
* This predicate only holds for states associated with regular expressions
* that have at least one repetition quantifier in them (otherwise the
* expression cannot be vulnerable to ReDoS attacks anyway).
*/
pragma[noopt]
private predicate isFork(State q, InputSymbol s1, InputSymbol s2, State r1, State r2) {
stateInsideBacktracking(q) and
exists(State q1, State q2 |
q1 = epsilonSucc*(q) and
delta(q1, s1, r1) and
q2 = epsilonSucc*(q) and
delta(q2, s2, r2) and
// Use pragma[noopt] to prevent intersect(s1,s2) from being the starting point of the join.
// From (s1,s2) it would find a huge number of intermediate state pairs (q1,q2) originating from different literals,
// and discover at the end that no `q` can reach both `q1` and `q2` by epsilon transitions.
exists(intersect(s1, s2))
|
s1 != s2
or
r1 != r2
or
r1 = r2 and q1 != q2
or
// If q can reach itself by epsilon transitions, then there are two distinct paths to the q1/q2 state:
// one that uses the loop and one that doesn't. The engine will separately attempt to match with each path,
// despite ending in the same state. The "fork" thus arises from the choice of whether to use the loop or not.
// To avoid every state in the loop becoming a fork state,
// we arbitrarily pick the InfiniteRepetitionQuantifier state as the canonical fork state for the loop
// (every epsilon-loop must contain such a state).
//
// We additionally require that the there exists another InfiniteRepetitionQuantifier `mid` on the path from `q` to itself.
// This is done to avoid flagging regular expressions such as `/(a?)*b/` - that only has polynomial runtime, and is detected by `js/polynomial-redos`.
// The below code is therefore a heuritic, that only flags regular expressions such as `/(a*)*b/`,
// and does not flag regular expressions such as `/(a?b?)c/`, but the latter pattern is not used frequently.
r1 = r2 and
q1 = q2 and
epsilonSucc+(q) = q and
exists(RegExpTerm term | term = q.getRepr() | term instanceof InfiniteRepetitionQuantifier) and
// One of the mid states is an infinite quantifier itself
exists(State mid, RegExpTerm term |
mid = epsilonSucc+(q) and
term = mid.getRepr() and
term instanceof InfiniteRepetitionQuantifier and
q = epsilonSucc+(mid) and
not mid = q
)
) and
stateInsideBacktracking(r1) and
stateInsideBacktracking(r2)
}
/**
* Gets the state pair `(q1, q2)` or `(q2, q1)`; note that only
* one or the other is defined.
*/
private StatePair mkStatePair(State q1, State q2) {
result = MkStatePair(q1, q2) or result = MkStatePair(q2, q1)
}
/**
* Holds if there are transitions from the components of `q` to the corresponding
* components of `r` labelled with `s1` and `s2`, respectively.
*/
private predicate step(StatePair q, InputSymbol s1, InputSymbol s2, StatePair r) {
exists(State r1, State r2 | step(q, s1, s2, r1, r2) and r = mkStatePair(r1, r2))
}
/**
* Holds if there are transitions from the components of `q` to `r1` and `r2`
* labelled with `s1` and `s2`, respectively.
*
* We only consider transitions where the resulting states `(r1, r2)` are both
* inside a repetition that might backtrack.
*/
pragma[noopt]
private predicate step(StatePair q, InputSymbol s1, InputSymbol s2, State r1, State r2) {
exists(State q1, State q2 | q.getLeft() = q1 and q.getRight() = q2 |
deltaClosed(q1, s1, r1) and
deltaClosed(q2, s2, r2) and
// use noopt to force the join on `intersect` to happen last.
exists(intersect(s1, s2))
) and
stateInsideBacktracking(r1) and
stateInsideBacktracking(r2)
}
private newtype TTrace =
Nil() or
Step(InputSymbol s1, InputSymbol s2, TTrace t) { isReachableFromFork(_, _, s1, s2, t, _) }
/**
* A list of pairs of input symbols that describe a path in the product automaton
* starting from some fork state.
*/
private class Trace extends TTrace {
/** Gets a textual representation of this element. */
string toString() {
this = Nil() and result = "Nil()"
or
exists(InputSymbol s1, InputSymbol s2, Trace t | this = Step(s1, s2, t) |
result = "Step(" + s1 + ", " + s2 + ", " + t + ")"
)
}
}
/**
* Holds if `r` is reachable from `(fork, fork)` under input `w`, and there is
* a path from `r` back to `(fork, fork)` with `rem` steps.
*/
private predicate isReachableFromFork(State fork, StatePair r, Trace w, int rem) {
exists(InputSymbol s1, InputSymbol s2, Trace v |
isReachableFromFork(fork, r, s1, s2, v, rem) and
w = Step(s1, s2, v)
)
}
private predicate isReachableFromFork(
State fork, StatePair r, InputSymbol s1, InputSymbol s2, Trace v, int rem
) {
// base case
exists(State q1, State q2 |
isFork(fork, s1, s2, q1, q2) and
r = MkStatePair(q1, q2) and
v = Nil() and
rem = statePairDistToFork(r, MkStatePair(fork, fork))
)
or
// recursive case
exists(StatePair p |
isReachableFromFork(fork, p, v, rem + 1) and
step(p, s1, s2, r) and
rem = statePairDistToFork(r, MkStatePair(fork, fork))
)
}
/**
* Gets a state in the product automaton from which `(fork, fork)` is
* reachable in zero or more epsilon transitions.
*/
private StatePair getAForkPair(State fork) {
isFork(fork, _, _, _, _) and
result = MkStatePair(epsilonPred*(fork), epsilonPred*(fork))
}
private predicate hasSuffix(Trace suffix, Trace t, int i) {
// Declaring `t` to be a `RelevantTrace` currently causes a redundant check in the
// recursive case, so instead we check it explicitly here.
t instanceof RelevantTrace and
i = 0 and
suffix = t
or
hasSuffix(Step(_, _, suffix), t, i - 1)
}
pragma[noinline]
private predicate hasTuple(InputSymbol s1, InputSymbol s2, Trace t, int i) {
hasSuffix(Step(s1, s2, _), t, i)
}
private class RelevantTrace extends Trace, Step {
RelevantTrace() {
exists(State fork, StatePair q |
isReachableFromFork(fork, q, this, _) and
q = getAForkPair(fork)
)
}
pragma[noinline]
private string intersect(int i) {
exists(InputSymbol s1, InputSymbol s2 |
hasTuple(s1, s2, this, i) and
result = intersect(s1, s2)
)
}
/** Gets a string corresponding to this trace. */
// the pragma is needed for the case where `intersect(s1, s2)` has multiple values,
// not for recursion
language[monotonicAggregates]
string concretise() {
result = strictconcat(int i | hasTuple(_, _, this, i) | this.intersect(i) order by i desc)
}
}
/**
* Holds if `fork` is a pumpable fork with word `w`.
*/
private predicate isPumpable(State fork, string w) {
exists(StatePair q, RelevantTrace t |
isReachableFromFork(fork, q, t, _) and
q = getAForkPair(fork) and
w = t.concretise()
)
}
/**
* An instantiation of `ReDoSConfiguration` for exponential backtracking.
*/
class ExponentialReDoSConfiguration extends ReDoSConfiguration {
ExponentialReDoSConfiguration() { this = "ExponentialReDoSConfiguration" }
override predicate isReDoSCandidate(State state, string pump) { isPumpable(state, pump) }
}
deprecated import semmle.python.security.regexp.ExponentialBackTracking as Dep
import Dep

File diff suppressed because it is too large Load Diff

View File

@@ -1,454 +1,4 @@
/**
* Provides classes for working with regular expressions that can
* perform backtracking in superlinear time.
*/
/** DEPRECATED. Import `semmle.python.security.regexp.SuperlinearBackTracking` instead. */
import ReDoSUtil
/*
* This module implements the analysis described in the paper:
* Valentin Wustholz, Oswaldo Olivo, Marijn J. H. Heule, and Isil Dillig:
* Static Detection of DoS Vulnerabilities in
* Programs that use Regular Expressions
* (Extended Version).
* (https://arxiv.org/pdf/1701.04045.pdf)
*
* Theorem 3 from the paper describes the basic idea.
*
* The following explains the idea using variables and predicate names that are used in the implementation:
* We consider a pair of repetitions, which we will call `pivot` and `succ`.
*
* We create a product automaton of 3-tuples of states (see `StateTuple`).
* There exists a transition `(a,b,c) -> (d,e,f)` in the product automaton
* iff there exists three transitions in the NFA `a->d, b->e, c->f` where those three
* transitions all match a shared character `char`. (see `getAThreewayIntersect`)
*
* We start a search in the product automaton at `(pivot, pivot, succ)`,
* and search for a series of transitions (a `Trace`), such that we end
* at `(pivot, succ, succ)` (see `isReachableFromStartTuple`).
*
* For example, consider the regular expression `/^\d*5\w*$/`.
* The search will start at the tuple `(\d*, \d*, \w*)` and search
* for a path to `(\d*, \w*, \w*)`.
* This path exists, and consists of a single transition in the product automaton,
* where the three corresponding NFA edges all match the character `"5"`.
*
* The start-state in the NFA has an any-transition to itself, this allows us to
* flag regular expressions such as `/a*$/` - which does not have a start anchor -
* and can thus start matching anywhere.
*
* The implementation is not perfect.
* It has the same suffix detection issue as the `js/redos` query, which can cause false positives.
* It also doesn't find all transitions in the product automaton, which can cause false negatives.
*/
/**
* An instantiaion of `ReDoSConfiguration` for superlinear ReDoS.
*/
class SuperLinearReDoSConfiguration extends ReDoSConfiguration {
SuperLinearReDoSConfiguration() { this = "SuperLinearReDoSConfiguration" }
override predicate isReDoSCandidate(State state, string pump) { isPumpable(_, state, pump) }
}
/**
* Gets any root (start) state of a regular expression.
*/
private State getRootState() { result = mkMatch(any(RegExpRoot r)) }
private newtype TStateTuple =
MkStateTuple(State q1, State q2, State q3) {
// starts at (pivot, pivot, succ)
isStartLoops(q1, q3) and q1 = q2
or
step(_, _, _, _, q1, q2, q3) and FeasibleTuple::isFeasibleTuple(q1, q2, q3)
}
/**
* A state in the product automaton.
* The product automaton contains 3-tuples of states.
*
* We lazily only construct those states that we are actually
* going to need.
* Either a start state `(pivot, pivot, succ)`, or a state
* where there exists a transition from an already existing state.
*
* The exponential variant of this query (`js/redos`) uses an optimization
* trick where `q1 <= q2`. This trick cannot be used here as the order
* of the elements matter.
*/
class StateTuple extends TStateTuple {
State q1;
State q2;
State q3;
StateTuple() { this = MkStateTuple(q1, q2, q3) }
/**
* Gest a string repesentation of this tuple.
*/
string toString() { result = "(" + q1 + ", " + q2 + ", " + q3 + ")" }
/**
* Holds if this tuple is `(r1, r2, r3)`.
*/
pragma[noinline]
predicate isTuple(State r1, State r2, State r3) { r1 = q1 and r2 = q2 and r3 = q3 }
}
/**
* A module for determining feasible tuples for the product automaton.
*
* The implementation is split into many predicates for performance reasons.
*/
private module FeasibleTuple {
/**
* Holds if the tuple `(r1, r2, r3)` might be on path from a start-state to an end-state in the product automaton.
*/
pragma[inline]
predicate isFeasibleTuple(State r1, State r2, State r3) {
// The first element is either inside a repetition (or the start state itself)
isRepetitionOrStart(r1) and
// The last element is inside a repetition
stateInsideRepetition(r3) and
// The states are reachable in the NFA in the order r1 -> r2 -> r3
delta+(r1) = r2 and
delta+(r2) = r3 and
// The first element can reach a beginning (the "pivot" state in a `(pivot, succ)` pair).
canReachABeginning(r1) and
// The last element can reach a target (the "succ" state in a `(pivot, succ)` pair).
canReachATarget(r3)
}
/**
* Holds if `s` is either inside a repetition, or is the start state (which is a repetition).
*/
pragma[noinline]
private predicate isRepetitionOrStart(State s) { stateInsideRepetition(s) or s = getRootState() }
/**
* Holds if state `s` might be inside a backtracking repetition.
*/
pragma[noinline]
private predicate stateInsideRepetition(State s) {
s.getRepr().getParent*() instanceof InfiniteRepetitionQuantifier
}
/**
* Holds if there exists a path in the NFA from `s` to a "pivot" state
* (from a `(pivot, succ)` pair that starts the search).
*/
pragma[noinline]
private predicate canReachABeginning(State s) {
delta+(s) = any(State pivot | isStartLoops(pivot, _))
}
/**
* Holds if there exists a path in the NFA from `s` to a "succ" state
* (from a `(pivot, succ)` pair that starts the search).
*/
pragma[noinline]
private predicate canReachATarget(State s) { delta+(s) = any(State succ | isStartLoops(_, succ)) }
}
/**
* Holds if `pivot` and `succ` are a pair of loops that could be the beginning of a quadratic blowup.
*
* There is a slight implementation difference compared to the paper: this predicate requires that `pivot != succ`.
* The case where `pivot = succ` causes exponential backtracking and is handled by the `js/redos` query.
*/
predicate isStartLoops(State pivot, State succ) {
pivot != succ and
succ.getRepr() instanceof InfiniteRepetitionQuantifier and
delta+(pivot) = succ and
(
pivot.getRepr() instanceof InfiniteRepetitionQuantifier
or
pivot = mkMatch(any(RegExpRoot root))
)
}
/**
* Gets a state for which there exists a transition in the NFA from `s'.
*/
State delta(State s) { delta(s, _, result) }
/**
* Holds if there are transitions from the components of `q` to the corresponding
* components of `r` labelled with `s1`, `s2`, and `s3`, respectively.
*/
pragma[noinline]
predicate step(StateTuple q, InputSymbol s1, InputSymbol s2, InputSymbol s3, StateTuple r) {
exists(State r1, State r2, State r3 |
step(q, s1, s2, s3, r1, r2, r3) and r = MkStateTuple(r1, r2, r3)
)
}
/**
* Holds if there are transitions from the components of `q` to `r1`, `r2`, and `r3
* labelled with `s1`, `s2`, and `s3`, respectively.
*/
pragma[noopt]
predicate step(
StateTuple q, InputSymbol s1, InputSymbol s2, InputSymbol s3, State r1, State r2, State r3
) {
exists(State q1, State q2, State q3 | q.isTuple(q1, q2, q3) |
deltaClosed(q1, s1, r1) and
deltaClosed(q2, s2, r2) and
deltaClosed(q3, s3, r3) and
// use noopt to force the join on `getAThreewayIntersect` to happen last.
exists(getAThreewayIntersect(s1, s2, s3))
)
}
/**
* Gets a char that is matched by all the edges `s1`, `s2`, and `s3`.
*
* The result is not complete, and might miss some combination of edges that share some character.
*/
pragma[noinline]
string getAThreewayIntersect(InputSymbol s1, InputSymbol s2, InputSymbol s3) {
result = minAndMaxIntersect(s1, s2) and result = [intersect(s2, s3), intersect(s1, s3)]
or
result = minAndMaxIntersect(s1, s3) and result = [intersect(s2, s3), intersect(s1, s2)]
or
result = minAndMaxIntersect(s2, s3) and result = [intersect(s1, s2), intersect(s1, s3)]
}
/**
* Gets the minimum and maximum characters that intersect between `a` and `b`.
* This predicate is used to limit the size of `getAThreewayIntersect`.
*/
pragma[noinline]
string minAndMaxIntersect(InputSymbol a, InputSymbol b) {
result = [min(intersect(a, b)), max(intersect(a, b))]
}
private newtype TTrace =
Nil() or
Step(InputSymbol s1, InputSymbol s2, InputSymbol s3, TTrace t) {
exists(StateTuple p |
isReachableFromStartTuple(_, _, p, t, _) and
step(p, s1, s2, s3, _)
)
or
exists(State pivot, State succ | isStartLoops(pivot, succ) |
t = Nil() and step(MkStateTuple(pivot, pivot, succ), s1, s2, s3, _)
)
}
/**
* A list of tuples of input symbols that describe a path in the product automaton
* starting from some start state.
*/
class Trace extends TTrace {
/**
* Gets a string representation of this Trace that can be used for debug purposes.
*/
string toString() {
this = Nil() and result = "Nil()"
or
exists(InputSymbol s1, InputSymbol s2, InputSymbol s3, Trace t | this = Step(s1, s2, s3, t) |
result = "Step(" + s1 + ", " + s2 + ", " + s3 + ", " + t + ")"
)
}
}
/**
* Holds if there exists a transition from `r` to `q` in the product automaton.
* Notice that the arguments are flipped, and thus the direction is backwards.
*/
pragma[noinline]
predicate tupleDeltaBackwards(StateTuple q, StateTuple r) { step(r, _, _, _, q) }
/**
* Holds if `tuple` is an end state in our search.
* That means there exists a pair of loops `(pivot, succ)` such that `tuple = (pivot, succ, succ)`.
*/
predicate isEndTuple(StateTuple tuple) { tuple = getAnEndTuple(_, _) }
/**
* Gets the minimum length of a path from `r` to some an end state `end`.
*
* The implementation searches backwards from the end-tuple.
* This approach was chosen because it is way more efficient if the first predicate given to `shortestDistances` is small.
* The `end` argument must always be an end state.
*/
int distBackFromEnd(StateTuple r, StateTuple end) =
shortestDistances(isEndTuple/1, tupleDeltaBackwards/2)(end, r, result)
/**
* Holds if there exists a pair of repetitions `(pivot, succ)` in the regular expression such that:
* `tuple` is reachable from `(pivot, pivot, succ)` in the product automaton,
* and there is a distance of `dist` from `tuple` to the nearest end-tuple `(pivot, succ, succ)`,
* and a path from a start-state to `tuple` follows the transitions in `trace`.
*/
predicate isReachableFromStartTuple(State pivot, State succ, StateTuple tuple, Trace trace, int dist) {
// base case. The first step is inlined to start the search after all possible 1-steps, and not just the ones with the shortest path.
exists(InputSymbol s1, InputSymbol s2, InputSymbol s3, State q1, State q2, State q3 |
isStartLoops(pivot, succ) and
step(MkStateTuple(pivot, pivot, succ), s1, s2, s3, tuple) and
tuple = MkStateTuple(q1, q2, q3) and
trace = Step(s1, s2, s3, Nil()) and
dist = distBackFromEnd(tuple, MkStateTuple(pivot, succ, succ))
)
or
// recursive case
exists(StateTuple p, Trace v, InputSymbol s1, InputSymbol s2, InputSymbol s3 |
isReachableFromStartTuple(pivot, succ, p, v, dist + 1) and
dist = isReachableFromStartTupleHelper(pivot, succ, tuple, p, s1, s2, s3) and
trace = Step(s1, s2, s3, v)
)
}
/**
* Helper predicate for the recursive case in `isReachableFromStartTuple`.
*/
pragma[noinline]
private int isReachableFromStartTupleHelper(
State pivot, State succ, StateTuple r, StateTuple p, InputSymbol s1, InputSymbol s2,
InputSymbol s3
) {
result = distBackFromEnd(r, MkStateTuple(pivot, succ, succ)) and
step(p, s1, s2, s3, r)
}
/**
* Gets the tuple `(pivot, succ, succ)` from the product automaton.
*/
StateTuple getAnEndTuple(State pivot, State succ) {
isStartLoops(pivot, succ) and
result = MkStateTuple(pivot, succ, succ)
}
private predicate hasSuffix(Trace suffix, Trace t, int i) {
// Declaring `t` to be a `RelevantTrace` currently causes a redundant check in the
// recursive case, so instead we check it explicitly here.
t instanceof RelevantTrace and
i = 0 and
suffix = t
or
hasSuffix(Step(_, _, _, suffix), t, i - 1)
}
pragma[noinline]
private predicate hasTuple(InputSymbol s1, InputSymbol s2, InputSymbol s3, Trace t, int i) {
hasSuffix(Step(s1, s2, s3, _), t, i)
}
private class RelevantTrace extends Trace, Step {
RelevantTrace() {
exists(State pivot, State succ, StateTuple q |
isReachableFromStartTuple(pivot, succ, q, this, _) and
q = getAnEndTuple(pivot, succ)
)
}
pragma[noinline]
private string getAThreewayIntersect(int i) {
exists(InputSymbol s1, InputSymbol s2, InputSymbol s3 |
hasTuple(s1, s2, s3, this, i) and
result = getAThreewayIntersect(s1, s2, s3)
)
}
/** Gets a string corresponding to this trace. */
// the pragma is needed for the case where `getAThreewayIntersect(s1, s2, s3)` has multiple values,
// not for recursion
language[monotonicAggregates]
string concretise() {
result =
strictconcat(int i |
hasTuple(_, _, _, this, i)
|
this.getAThreewayIntersect(i) order by i desc
)
}
}
/**
* Holds if matching repetitions of `pump` can:
* 1) Transition from `pivot` back to `pivot`.
* 2) Transition from `pivot` to `succ`.
* 3) Transition from `succ` to `succ`.
*
* From theorem 3 in the paper linked in the top of this file we can therefore conclude that
* the regular expression has polynomial backtracking - if a rejecting suffix exists.
*
* This predicate is used by `SuperLinearReDoSConfiguration`, and the final results are
* available in the `hasReDoSResult` predicate.
*/
predicate isPumpable(State pivot, State succ, string pump) {
exists(StateTuple q, RelevantTrace t |
isReachableFromStartTuple(pivot, succ, q, t, _) and
q = getAnEndTuple(pivot, succ) and
pump = t.concretise()
)
}
/**
* Holds if repetitions of `pump` at `t` will cause polynomial backtracking.
*/
predicate polynimalReDoS(RegExpTerm t, string pump, string prefixMsg, RegExpTerm prev) {
exists(State s, State pivot |
hasReDoSResult(t, pump, s, prefixMsg) and
isPumpable(pivot, s, _) and
prev = pivot.getRepr()
)
}
/**
* Gets a message for why `term` can cause polynomial backtracking.
*/
string getReasonString(RegExpTerm term, string pump, string prefixMsg, RegExpTerm prev) {
polynimalReDoS(term, pump, prefixMsg, prev) and
result =
"Strings " + prefixMsg + "with many repetitions of '" + pump +
"' can start matching anywhere after the start of the preceeding " + prev
}
/**
* A term that may cause a regular expression engine to perform a
* polynomial number of match attempts, relative to the input length.
*/
class PolynomialBackTrackingTerm extends InfiniteRepetitionQuantifier {
string reason;
string pump;
string prefixMsg;
RegExpTerm prev;
PolynomialBackTrackingTerm() {
reason = getReasonString(this, pump, prefixMsg, prev) and
// there might be many reasons for this term to have polynomial backtracking - we pick the shortest one.
reason = min(string msg | msg = getReasonString(this, _, _, _) | msg order by msg.length(), msg)
}
/**
* Holds if all non-empty successors to the polynomial backtracking term matches the end of the line.
*/
predicate isAtEndLine() {
forall(RegExpTerm succ | this.getSuccessor+() = succ and not matchesEpsilon(succ) |
succ instanceof RegExpDollar
)
}
/**
* Gets the string that should be repeated to cause this regular expression to perform polynomially.
*/
string getPumpString() { result = pump }
/**
* Gets a message for which prefix a matching string must start with for this term to cause polynomial backtracking.
*/
string getPrefixMessage() { result = prefixMsg }
/**
* Gets a predecessor to `this`, which also loops on the pump string, and thereby causes polynomial backtracking.
*/
RegExpTerm getPreviousLoop() { result = prev }
/**
* Gets the reason for the number of match attempts.
*/
string getReason() { result = reason }
}
deprecated import semmle.python.security.regexp.SuperlinearBackTracking as Dep
import Dep

View File

@@ -0,0 +1,344 @@
/**
* This library implements the analysis described in the following two papers:
*
* James Kirrage, Asiri Rathnayake, Hayo Thielecke: Static Analysis for
* Regular Expression Denial-of-Service Attacks. NSS 2013.
* (http://www.cs.bham.ac.uk/~hxt/research/reg-exp-sec.pdf)
* Asiri Rathnayake, Hayo Thielecke: Static Analysis for Regular Expression
* Exponential Runtime via Substructural Logics. 2014.
* (https://www.cs.bham.ac.uk/~hxt/research/redos_full.pdf)
*
* The basic idea is to search for overlapping cycles in the NFA, that is,
* states `q` such that there are two distinct paths from `q` to itself
* that consume the same word `w`.
*
* For any such state `q`, an attack string can be constructed as follows:
* concatenate a prefix `v` that takes the NFA to `q` with `n` copies of
* the word `w` that leads back to `q` along two different paths, followed
* by a suffix `x` that is _not_ accepted in state `q`. A backtracking
* implementation will need to explore at least 2^n different ways of going
* from `q` back to itself while trying to match the `n` copies of `w`
* before finally giving up.
*
* Now in order to identify overlapping cycles, all we have to do is find
* pumpable forks, that is, states `q` that can transition to two different
* states `r1` and `r2` on the same input symbol `c`, such that there are
* paths from both `r1` and `r2` to `q` that consume the same word. The latter
* condition is equivalent to saying that `(q, q)` is reachable from `(r1, r2)`
* in the product NFA.
*
* This is what the library does. It makes a simple attempt to construct a
* prefix `v` leading into `q`, but only to improve the alert message.
* And the library tries to prove the existence of a suffix that ensures
* rejection. This check might fail, which can cause false positives.
*
* Finally, sometimes it depends on the translation whether the NFA generated
* for a regular expression has a pumpable fork or not. We implement one
* particular translation, which may result in false positives or negatives
* relative to some particular JavaScript engine.
*
* More precisely, the library constructs an NFA from a regular expression `r`
* as follows:
*
* * Every sub-term `t` gives rise to an NFA state `Match(t,i)`, representing
* the state of the automaton before attempting to match the `i`th character in `t`.
* * There is one accepting state `Accept(r)`.
* * There is a special `AcceptAnySuffix(r)` state, which accepts any suffix string
* by using an epsilon transition to `Accept(r)` and an any transition to itself.
* * Transitions between states may be labelled with epsilon, or an abstract
* input symbol.
* * Each abstract input symbol represents a set of concrete input characters:
* either a single character, a set of characters represented by a
* character class, or the set of all characters.
* * The product automaton is constructed lazily, starting with pair states
* `(q, q)` where `q` is a fork, and proceeding along an over-approximate
* step relation.
* * The over-approximate step relation allows transitions along pairs of
* abstract input symbols where the symbols have overlap in the characters they accept.
* * Once a trace of pairs of abstract input symbols that leads from a fork
* back to itself has been identified, we attempt to construct a concrete
* string corresponding to it, which may fail.
* * Lastly we ensure that any state reached by repeating `n` copies of `w` has
* a suffix `x` (possible empty) that is most likely __not__ accepted.
*/
import NfaUtils
/**
* Holds if state `s` might be inside a backtracking repetition.
*/
pragma[noinline]
private predicate stateInsideBacktracking(State s) {
s.getRepr().getParent*() instanceof MaybeBacktrackingRepetition
}
/**
* A infinitely repeating quantifier that might backtrack.
*/
private class MaybeBacktrackingRepetition extends InfiniteRepetitionQuantifier {
MaybeBacktrackingRepetition() {
exists(RegExpTerm child |
child instanceof RegExpAlt or
child instanceof RegExpQuantifier
|
child.getParent+() = this
)
}
}
/**
* A state in the product automaton.
*/
private newtype TStatePair =
/**
* We lazily only construct those states that we are actually
* going to need: `(q, q)` for every fork state `q`, and any
* pair of states that can be reached from a pair that we have
* already constructed. To cut down on the number of states,
* we only represent states `(q1, q2)` where `q1` is lexicographically
* no bigger than `q2`.
*
* States are only constructed if both states in the pair are
* inside a repetition that might backtrack.
*/
MkStatePair(State q1, State q2) {
isFork(q1, _, _, _, _) and q2 = q1
or
(step(_, _, _, q1, q2) or step(_, _, _, q2, q1)) and
rankState(q1) <= rankState(q2)
}
/**
* Gets a unique number for a `state`.
* Is used to create an ordering of states, where states with the same `toString()` will be ordered differently.
*/
private int rankState(State state) {
state =
rank[result](State s, Location l |
l = s.getRepr().getLocation()
|
s order by l.getStartLine(), l.getStartColumn(), s.toString()
)
}
/**
* A state in the product automaton.
*/
private class StatePair extends TStatePair {
State q1;
State q2;
StatePair() { this = MkStatePair(q1, q2) }
/** Gets a textual representation of this element. */
string toString() { result = "(" + q1 + ", " + q2 + ")" }
/** Gets the first component of the state pair. */
State getLeft() { result = q1 }
/** Gets the second component of the state pair. */
State getRight() { result = q2 }
}
/**
* Holds for `(fork, fork)` state pairs when `isFork(fork, _, _, _, _)` holds.
*
* Used in `statePairDistToFork`
*/
private predicate isStatePairFork(StatePair p) {
exists(State fork | p = MkStatePair(fork, fork) and isFork(fork, _, _, _, _))
}
/**
* Holds if there are transitions from the components of `q` to the corresponding
* components of `r`.
*
* Used in `statePairDistToFork`
*/
private predicate reverseStep(StatePair r, StatePair q) { step(q, _, _, r) }
/**
* Gets the minimum length of a path from `q` to `r` in the
* product automaton.
*/
private int statePairDistToFork(StatePair q, StatePair r) =
shortestDistances(isStatePairFork/1, reverseStep/2)(r, q, result)
/**
* Holds if there are transitions from `q` to `r1` and from `q` to `r2`
* labelled with `s1` and `s2`, respectively, where `s1` and `s2` do not
* trivially have an empty intersection.
*
* This predicate only holds for states associated with regular expressions
* that have at least one repetition quantifier in them (otherwise the
* expression cannot be vulnerable to ReDoS attacks anyway).
*/
pragma[noopt]
private predicate isFork(State q, InputSymbol s1, InputSymbol s2, State r1, State r2) {
stateInsideBacktracking(q) and
exists(State q1, State q2 |
q1 = epsilonSucc*(q) and
delta(q1, s1, r1) and
q2 = epsilonSucc*(q) and
delta(q2, s2, r2) and
// Use pragma[noopt] to prevent intersect(s1,s2) from being the starting point of the join.
// From (s1,s2) it would find a huge number of intermediate state pairs (q1,q2) originating from different literals,
// and discover at the end that no `q` can reach both `q1` and `q2` by epsilon transitions.
exists(intersect(s1, s2))
|
s1 != s2
or
r1 != r2
or
r1 = r2 and q1 != q2
or
// If q can reach itself by epsilon transitions, then there are two distinct paths to the q1/q2 state:
// one that uses the loop and one that doesn't. The engine will separately attempt to match with each path,
// despite ending in the same state. The "fork" thus arises from the choice of whether to use the loop or not.
// To avoid every state in the loop becoming a fork state,
// we arbitrarily pick the InfiniteRepetitionQuantifier state as the canonical fork state for the loop
// (every epsilon-loop must contain such a state).
//
// We additionally require that the there exists another InfiniteRepetitionQuantifier `mid` on the path from `q` to itself.
// This is done to avoid flagging regular expressions such as `/(a?)*b/` - that only has polynomial runtime, and is detected by `js/polynomial-redos`.
// The below code is therefore a heuritic, that only flags regular expressions such as `/(a*)*b/`,
// and does not flag regular expressions such as `/(a?b?)c/`, but the latter pattern is not used frequently.
r1 = r2 and
q1 = q2 and
epsilonSucc+(q) = q and
exists(RegExpTerm term | term = q.getRepr() | term instanceof InfiniteRepetitionQuantifier) and
// One of the mid states is an infinite quantifier itself
exists(State mid, RegExpTerm term |
mid = epsilonSucc+(q) and
term = mid.getRepr() and
term instanceof InfiniteRepetitionQuantifier and
q = epsilonSucc+(mid) and
not mid = q
)
) and
stateInsideBacktracking(r1) and
stateInsideBacktracking(r2)
}
/**
* Gets the state pair `(q1, q2)` or `(q2, q1)`; note that only
* one or the other is defined.
*/
private StatePair mkStatePair(State q1, State q2) {
result = MkStatePair(q1, q2) or result = MkStatePair(q2, q1)
}
/**
* Holds if there are transitions from the components of `q` to the corresponding
* components of `r` labelled with `s1` and `s2`, respectively.
*/
private predicate step(StatePair q, InputSymbol s1, InputSymbol s2, StatePair r) {
exists(State r1, State r2 | step(q, s1, s2, r1, r2) and r = mkStatePair(r1, r2))
}
/**
* Holds if there are transitions from the components of `q` to `r1` and `r2`
* labelled with `s1` and `s2`, respectively.
*
* We only consider transitions where the resulting states `(r1, r2)` are both
* inside a repetition that might backtrack.
*/
pragma[noopt]
private predicate step(StatePair q, InputSymbol s1, InputSymbol s2, State r1, State r2) {
exists(State q1, State q2 | q.getLeft() = q1 and q.getRight() = q2 |
deltaClosed(q1, s1, r1) and
deltaClosed(q2, s2, r2) and
// use noopt to force the join on `intersect` to happen last.
exists(intersect(s1, s2))
) and
stateInsideBacktracking(r1) and
stateInsideBacktracking(r2)
}
private newtype TTrace =
Nil() or
Step(InputSymbol s1, InputSymbol s2, TTrace t) { isReachableFromFork(_, _, s1, s2, t, _) }
/**
* A list of pairs of input symbols that describe a path in the product automaton
* starting from some fork state.
*/
private class Trace extends TTrace {
/** Gets a textual representation of this element. */
string toString() {
this = Nil() and result = "Nil()"
or
exists(InputSymbol s1, InputSymbol s2, Trace t | this = Step(s1, s2, t) |
result = "Step(" + s1 + ", " + s2 + ", " + t + ")"
)
}
}
/**
* Holds if `r` is reachable from `(fork, fork)` under input `w`, and there is
* a path from `r` back to `(fork, fork)` with `rem` steps.
*/
private predicate isReachableFromFork(State fork, StatePair r, Trace w, int rem) {
exists(InputSymbol s1, InputSymbol s2, Trace v |
isReachableFromFork(fork, r, s1, s2, v, rem) and
w = Step(s1, s2, v)
)
}
private predicate isReachableFromFork(
State fork, StatePair r, InputSymbol s1, InputSymbol s2, Trace v, int rem
) {
// base case
exists(State q1, State q2 |
isFork(fork, s1, s2, q1, q2) and
r = MkStatePair(q1, q2) and
v = Nil() and
rem = statePairDistToFork(r, MkStatePair(fork, fork))
)
or
// recursive case
exists(StatePair p |
isReachableFromFork(fork, p, v, rem + 1) and
step(p, s1, s2, r) and
rem = statePairDistToFork(r, MkStatePair(fork, fork))
)
}
/**
* Gets a state in the product automaton from which `(fork, fork)` is
* reachable in zero or more epsilon transitions.
*/
private StatePair getAForkPair(State fork) {
isFork(fork, _, _, _, _) and
result = MkStatePair(epsilonPred*(fork), epsilonPred*(fork))
}
/** An implementation of a chain containing chars for use by `Concretizer`. */
private module CharTreeImpl implements CharTree {
class CharNode = Trace;
CharNode getPrev(CharNode t) { t = Step(_, _, result) }
/** Holds if `n` is a trace that is used by `concretize` in `isPumpable`. */
predicate isARelevantEnd(CharNode n) {
exists(State f | isReachableFromFork(f, getAForkPair(f), n, _))
}
string getChar(CharNode t) {
exists(InputSymbol s1, InputSymbol s2 | t = Step(s1, s2, _) | result = intersect(s1, s2))
}
}
/**
* Holds if `fork` is a pumpable fork with word `w`.
*/
private predicate isPumpable(State fork, string w) {
exists(StatePair q, Trace t |
isReachableFromFork(fork, q, t, _) and
q = getAForkPair(fork) and
w = Concretizer<CharTreeImpl>::concretize(t)
)
}
/** Holds if `state` has exponential ReDoS */
predicate hasReDoSResult = ReDoSPruning<isPumpable/2>::hasReDoSResult/4;

File diff suppressed because it is too large Load Diff

View File

@@ -1,5 +1,5 @@
/**
* Provides Python-specific definitions for use in the ReDoSUtil module.
* Provides Python-specific definitions for use in the NfaUtils module.
*/
import python

View File

@@ -0,0 +1,157 @@
/**
* Provides precicates for reasoning about which strings are matched by a regular expression,
* and for testing which capture groups are filled when a particular regexp matches a string.
*/
import NfaUtils
/** A root term */
class RootTerm extends RegExpTerm {
RootTerm() { this.isRootTerm() }
}
/**
* Holds if it should be tested whether `root` matches `str`.
*
* If `ignorePrefix` is true, then a regexp without a start anchor will be treated as if it had a start anchor.
* E.g. a regular expression `/foo$/` will match any string that ends with "foo",
* but if `ignorePrefix` is true, it will only match "foo".
*
* If `testWithGroups` is true, then the `RegexpMatching::fillsCaptureGroup` predicate can be used to determine which capture
* groups are filled by a string.
*/
signature predicate isRegexpMatchingCandidateSig(
RootTerm root, string str, boolean ignorePrefix, boolean testWithGroups
);
/**
* A module for determining if a regexp matches a given string,
* and reasoning about which capture groups are filled by a given string.
*
* The module parameter `isCandidate` determines which strings should be tested,
* and the results can be read from the `matches` and `fillsCaptureGroup` predicates.
*/
module RegexpMatching<isRegexpMatchingCandidateSig/4 isCandidate> {
/**
* Gets a state the regular expression `reg` can be in after matching the `i`th char in `str`.
* The regular expression is modeled as a non-determistic finite automaton,
* the regular expression can therefore be in multiple states after matching a character.
*
* It's a forward search to all possible states, and there is thus no guarantee that the state is on a path to an accepting state.
*/
private State getAState(RootTerm reg, int i, string str, boolean ignorePrefix) {
// start state, the -1 position before any chars have been matched
i = -1 and
isCandidate(reg, str, ignorePrefix, _) and
result.getRepr().getRootTerm() = reg and
isStartState(result)
or
// recursive case
result = getAStateAfterMatching(reg, _, str, i, _, ignorePrefix)
}
/**
* Gets the next state after the `prev` state from `reg`.
* `prev` is the state after matching `fromIndex` chars in `str`,
* and the result is the state after matching `toIndex` chars in `str`.
*
* This predicate is used as a step relation in the forwards search (`getAState`),
* and also as a step relation in the later backwards search (`getAStateThatReachesAccept`).
*/
private State getAStateAfterMatching(
RootTerm reg, State prev, string str, int toIndex, int fromIndex, boolean ignorePrefix
) {
// the basic recursive case - outlined into a noopt helper to make performance work out.
result = getAStateAfterMatchingAux(reg, prev, str, toIndex, fromIndex, ignorePrefix)
or
// we can skip past word boundaries if the next char is a non-word char.
fromIndex = toIndex and
prev.getRepr() instanceof RegExpWordBoundary and
prev = getAState(reg, toIndex, str, ignorePrefix) and
after(prev.getRepr()) = result and
str.charAt(toIndex + 1).regexpMatch("\\W") // \W matches any non-word char.
}
pragma[noopt]
private State getAStateAfterMatchingAux(
RootTerm reg, State prev, string str, int toIndex, int fromIndex, boolean ignorePrefix
) {
prev = getAState(reg, fromIndex, str, ignorePrefix) and
fromIndex = toIndex - 1 and
exists(string char | char = str.charAt(toIndex) | specializedDeltaClosed(prev, char, result)) and
not discardedPrefixStep(prev, result, ignorePrefix)
}
/** Holds if a step from `prev` to `next` should be discarded when the `ignorePrefix` flag is set. */
private predicate discardedPrefixStep(State prev, State next, boolean ignorePrefix) {
prev = mkMatch(any(RegExpRoot r)) and
ignorePrefix = true and
next = prev
}
// The `deltaClosed` relation specialized to the chars that exists in strings tested by a `MatchedRegExp`.
private predicate specializedDeltaClosed(State prev, string char, State next) {
deltaClosed(prev, specializedGetAnInputSymbolMatching(char), next)
}
// The `getAnInputSymbolMatching` relation specialized to the chars that exists in strings tested by a `MatchedRegExp`.
pragma[noinline]
private InputSymbol specializedGetAnInputSymbolMatching(string char) {
exists(string s, RootTerm r | isCandidate(r, s, _, _) | char = s.charAt(_)) and
result = getAnInputSymbolMatching(char)
}
/**
* Gets the `i`th state on a path to the accepting state when `reg` matches `str`.
* Starts with an accepting state as found by `getAState` and searches backwards
* to the start state through the reachable states (as found by `getAState`).
*
* This predicate satisfies the invariant that the result state can be reached with `i` steps from a start state,
* and an accepting state can be found after (`str.length() - 1 - i`) steps from the result.
* The result state is therefore always on a valid path where `reg` accepts `str`.
*
* This predicate is only used to find which capture groups a regular expression has filled,
* and thus the search is only performed for the strings in the `testWithGroups(..)` predicate.
*/
private State getAStateThatReachesAccept(RootTerm reg, int i, string str, boolean ignorePrefix) {
// base case, reaches an accepting state from the last state in `getAState(..)`
isCandidate(reg, str, ignorePrefix, true) and
i = str.length() - 1 and
result = getAState(reg, i, str, ignorePrefix) and
epsilonSucc*(result) = Accept(_)
or
// recursive case. `next` is the next state to be matched after matching `prev`.
// this predicate is doing a backwards search, so `prev` is the result we are looking for.
exists(State next, State prev, int fromIndex, int toIndex |
next = getAStateThatReachesAccept(reg, toIndex, str, ignorePrefix) and
next = getAStateAfterMatching(reg, prev, str, toIndex, fromIndex, ignorePrefix) and
i = fromIndex and
result = prev
)
}
/** Gets the capture group number that `term` belongs to. */
private int group(RegExpTerm term) {
exists(RegExpGroup grp | grp.getNumber() = result | term.getParent*() = grp)
}
/**
* Holds if `reg` matches `str`, where `str` is in the `isCandidate` predicate.
*/
predicate matches(RootTerm reg, string str) {
exists(State state | state = getAState(reg, str.length() - 1, str, _) |
epsilonSucc*(state) = Accept(_)
)
}
/**
* Holds if matching `str` against `reg` may fill capture group number `g`.
* Only holds if `str` is in the `testWithGroups` predicate.
*/
predicate fillsCaptureGroup(RootTerm reg, string str, int g) {
exists(State s |
s = getAStateThatReachesAccept(reg, _, str, _) and
g = group(s.getRepr())
)
}
}

View File

@@ -0,0 +1,418 @@
/**
* Provides classes for working with regular expressions that can
* perform backtracking in superlinear time.
*/
import NfaUtils
/*
* This module implements the analysis described in the paper:
* Valentin Wustholz, Oswaldo Olivo, Marijn J. H. Heule, and Isil Dillig:
* Static Detection of DoS Vulnerabilities in
* Programs that use Regular Expressions
* (Extended Version).
* (https://arxiv.org/pdf/1701.04045.pdf)
*
* Theorem 3 from the paper describes the basic idea.
*
* The following explains the idea using variables and predicate names that are used in the implementation:
* We consider a pair of repetitions, which we will call `pivot` and `succ`.
*
* We create a product automaton of 3-tuples of states (see `StateTuple`).
* There exists a transition `(a,b,c) -> (d,e,f)` in the product automaton
* iff there exists three transitions in the NFA `a->d, b->e, c->f` where those three
* transitions all match a shared character `char`. (see `getAThreewayIntersect`)
*
* We start a search in the product automaton at `(pivot, pivot, succ)`,
* and search for a series of transitions (a `Trace`), such that we end
* at `(pivot, succ, succ)` (see `isReachableFromStartTuple`).
*
* For example, consider the regular expression `/^\d*5\w*$/`.
* The search will start at the tuple `(\d*, \d*, \w*)` and search
* for a path to `(\d*, \w*, \w*)`.
* This path exists, and consists of a single transition in the product automaton,
* where the three corresponding NFA edges all match the character `"5"`.
*
* The start-state in the NFA has an any-transition to itself, this allows us to
* flag regular expressions such as `/a*$/` - which does not have a start anchor -
* and can thus start matching anywhere.
*
* The implementation is not perfect.
* It has the same suffix detection issue as the `js/redos` query, which can cause false positives.
* It also doesn't find all transitions in the product automaton, which can cause false negatives.
*/
/**
* Gets any root (start) state of a regular expression.
*/
private State getRootState() { result = mkMatch(any(RegExpRoot r)) }
private newtype TStateTuple =
MkStateTuple(State q1, State q2, State q3) {
// starts at (pivot, pivot, succ)
isStartLoops(q1, q3) and q1 = q2
or
step(_, _, _, _, q1, q2, q3) and FeasibleTuple::isFeasibleTuple(q1, q2, q3)
}
/**
* A state in the product automaton.
* The product automaton contains 3-tuples of states.
*
* We lazily only construct those states that we are actually
* going to need.
* Either a start state `(pivot, pivot, succ)`, or a state
* where there exists a transition from an already existing state.
*
* The exponential variant of this query (`js/redos`) uses an optimization
* trick where `q1 <= q2`. This trick cannot be used here as the order
* of the elements matter.
*/
class StateTuple extends TStateTuple {
State q1;
State q2;
State q3;
StateTuple() { this = MkStateTuple(q1, q2, q3) }
/**
* Gest a string repesentation of this tuple.
*/
string toString() { result = "(" + q1 + ", " + q2 + ", " + q3 + ")" }
/**
* Holds if this tuple is `(r1, r2, r3)`.
*/
pragma[noinline]
predicate isTuple(State r1, State r2, State r3) { r1 = q1 and r2 = q2 and r3 = q3 }
}
/**
* A module for determining feasible tuples for the product automaton.
*
* The implementation is split into many predicates for performance reasons.
*/
private module FeasibleTuple {
/**
* Holds if the tuple `(r1, r2, r3)` might be on path from a start-state to an end-state in the product automaton.
*/
pragma[inline]
predicate isFeasibleTuple(State r1, State r2, State r3) {
// The first element is either inside a repetition (or the start state itself)
isRepetitionOrStart(r1) and
// The last element is inside a repetition
stateInsideRepetition(r3) and
// The states are reachable in the NFA in the order r1 -> r2 -> r3
delta+(r1) = r2 and
delta+(r2) = r3 and
// The first element can reach a beginning (the "pivot" state in a `(pivot, succ)` pair).
canReachABeginning(r1) and
// The last element can reach a target (the "succ" state in a `(pivot, succ)` pair).
canReachATarget(r3)
}
/**
* Holds if `s` is either inside a repetition, or is the start state (which is a repetition).
*/
pragma[noinline]
private predicate isRepetitionOrStart(State s) { stateInsideRepetition(s) or s = getRootState() }
/**
* Holds if state `s` might be inside a backtracking repetition.
*/
pragma[noinline]
private predicate stateInsideRepetition(State s) {
s.getRepr().getParent*() instanceof InfiniteRepetitionQuantifier
}
/**
* Holds if there exists a path in the NFA from `s` to a "pivot" state
* (from a `(pivot, succ)` pair that starts the search).
*/
pragma[noinline]
private predicate canReachABeginning(State s) {
delta+(s) = any(State pivot | isStartLoops(pivot, _))
}
/**
* Holds if there exists a path in the NFA from `s` to a "succ" state
* (from a `(pivot, succ)` pair that starts the search).
*/
pragma[noinline]
private predicate canReachATarget(State s) { delta+(s) = any(State succ | isStartLoops(_, succ)) }
}
/**
* Holds if `pivot` and `succ` are a pair of loops that could be the beginning of a quadratic blowup.
*
* There is a slight implementation difference compared to the paper: this predicate requires that `pivot != succ`.
* The case where `pivot = succ` causes exponential backtracking and is handled by the `js/redos` query.
*/
predicate isStartLoops(State pivot, State succ) {
pivot != succ and
succ.getRepr() instanceof InfiniteRepetitionQuantifier and
delta+(pivot) = succ and
(
pivot.getRepr() instanceof InfiniteRepetitionQuantifier
or
pivot = mkMatch(any(RegExpRoot root))
)
}
/**
* Gets a state for which there exists a transition in the NFA from `s'.
*/
State delta(State s) { delta(s, _, result) }
/**
* Holds if there are transitions from the components of `q` to the corresponding
* components of `r` labelled with `s1`, `s2`, and `s3`, respectively.
*/
pragma[noinline]
predicate step(StateTuple q, InputSymbol s1, InputSymbol s2, InputSymbol s3, StateTuple r) {
exists(State r1, State r2, State r3 |
step(q, s1, s2, s3, r1, r2, r3) and r = MkStateTuple(r1, r2, r3)
)
}
/**
* Holds if there are transitions from the components of `q` to `r1`, `r2`, and `r3
* labelled with `s1`, `s2`, and `s3`, respectively.
*/
pragma[noopt]
predicate step(
StateTuple q, InputSymbol s1, InputSymbol s2, InputSymbol s3, State r1, State r2, State r3
) {
exists(State q1, State q2, State q3 | q.isTuple(q1, q2, q3) |
deltaClosed(q1, s1, r1) and
deltaClosed(q2, s2, r2) and
deltaClosed(q3, s3, r3) and
// use noopt to force the join on `getAThreewayIntersect` to happen last.
exists(getAThreewayIntersect(s1, s2, s3))
)
}
/**
* Gets a char that is matched by all the edges `s1`, `s2`, and `s3`.
*
* The result is not complete, and might miss some combination of edges that share some character.
*/
pragma[noinline]
string getAThreewayIntersect(InputSymbol s1, InputSymbol s2, InputSymbol s3) {
result = minAndMaxIntersect(s1, s2) and result = [intersect(s2, s3), intersect(s1, s3)]
or
result = minAndMaxIntersect(s1, s3) and result = [intersect(s2, s3), intersect(s1, s2)]
or
result = minAndMaxIntersect(s2, s3) and result = [intersect(s1, s2), intersect(s1, s3)]
}
/**
* Gets the minimum and maximum characters that intersect between `a` and `b`.
* This predicate is used to limit the size of `getAThreewayIntersect`.
*/
pragma[noinline]
string minAndMaxIntersect(InputSymbol a, InputSymbol b) {
result = [min(intersect(a, b)), max(intersect(a, b))]
}
private newtype TTrace =
Nil() or
Step(InputSymbol s1, InputSymbol s2, InputSymbol s3, TTrace t) {
isReachableFromStartTuple(_, _, t, s1, s2, s3, _, _)
}
/**
* A list of tuples of input symbols that describe a path in the product automaton
* starting from some start state.
*/
class Trace extends TTrace {
/**
* Gets a string representation of this Trace that can be used for debug purposes.
*/
string toString() {
this = Nil() and result = "Nil()"
or
exists(InputSymbol s1, InputSymbol s2, InputSymbol s3, Trace t | this = Step(s1, s2, s3, t) |
result = "Step(" + s1 + ", " + s2 + ", " + s3 + ", " + t + ")"
)
}
}
/**
* Holds if there exists a transition from `r` to `q` in the product automaton.
* Notice that the arguments are flipped, and thus the direction is backwards.
*/
pragma[noinline]
predicate tupleDeltaBackwards(StateTuple q, StateTuple r) { step(r, _, _, _, q) }
/**
* Holds if `tuple` is an end state in our search.
* That means there exists a pair of loops `(pivot, succ)` such that `tuple = (pivot, succ, succ)`.
*/
predicate isEndTuple(StateTuple tuple) { tuple = getAnEndTuple(_, _) }
/**
* Gets the minimum length of a path from `r` to some an end state `end`.
*
* The implementation searches backwards from the end-tuple.
* This approach was chosen because it is way more efficient if the first predicate given to `shortestDistances` is small.
* The `end` argument must always be an end state.
*/
int distBackFromEnd(StateTuple r, StateTuple end) =
shortestDistances(isEndTuple/1, tupleDeltaBackwards/2)(end, r, result)
/**
* Holds if there exists a pair of repetitions `(pivot, succ)` in the regular expression such that:
* `tuple` is reachable from `(pivot, pivot, succ)` in the product automaton,
* and there is a distance of `dist` from `tuple` to the nearest end-tuple `(pivot, succ, succ)`,
* and a path from a start-state to `tuple` follows the transitions in `trace`.
*/
private predicate isReachableFromStartTuple(
State pivot, State succ, StateTuple tuple, Trace trace, int dist
) {
exists(InputSymbol s1, InputSymbol s2, InputSymbol s3, Trace v |
isReachableFromStartTuple(pivot, succ, v, s1, s2, s3, tuple, dist) and
trace = Step(s1, s2, s3, v)
)
}
private predicate isReachableFromStartTuple(
State pivot, State succ, Trace trace, InputSymbol s1, InputSymbol s2, InputSymbol s3,
StateTuple tuple, int dist
) {
// base case.
exists(State q1, State q2, State q3 |
isStartLoops(pivot, succ) and
step(MkStateTuple(pivot, pivot, succ), s1, s2, s3, tuple) and
tuple = MkStateTuple(q1, q2, q3) and
trace = Nil() and
dist = distBackFromEnd(tuple, MkStateTuple(pivot, succ, succ))
)
or
// recursive case
exists(StateTuple p |
isReachableFromStartTuple(pivot, succ, p, trace, dist + 1) and
dist = distBackFromEnd(tuple, MkStateTuple(pivot, succ, succ)) and
step(p, s1, s2, s3, tuple)
)
}
/**
* Gets the tuple `(pivot, succ, succ)` from the product automaton.
*/
StateTuple getAnEndTuple(State pivot, State succ) {
isStartLoops(pivot, succ) and
result = MkStateTuple(pivot, succ, succ)
}
/** An implementation of a chain containing chars for use by `Concretizer`. */
private module CharTreeImpl implements CharTree {
class CharNode = Trace;
CharNode getPrev(CharNode t) { t = Step(_, _, _, result) }
/** Holds if `n` is used in `isPumpable`. */
predicate isARelevantEnd(CharNode n) {
exists(State pivot, State succ |
isReachableFromStartTuple(pivot, succ, getAnEndTuple(pivot, succ), n, _)
)
}
string getChar(CharNode t) {
exists(InputSymbol s1, InputSymbol s2, InputSymbol s3 | t = Step(s1, s2, s3, _) |
result = getAThreewayIntersect(s1, s2, s3)
)
}
}
/**
* Holds if matching repetitions of `pump` can:
* 1) Transition from `pivot` back to `pivot`.
* 2) Transition from `pivot` to `succ`.
* 3) Transition from `succ` to `succ`.
*
* From theorem 3 in the paper linked in the top of this file we can therefore conclude that
* the regular expression has polynomial backtracking - if a rejecting suffix exists.
*
* This predicate is used by `SuperLinearReDoSConfiguration`, and the final results are
* available in the `hasReDoSResult` predicate.
*/
predicate isPumpable(State pivot, State succ, string pump) {
exists(StateTuple q, Trace t |
isReachableFromStartTuple(pivot, succ, q, t, _) and
q = getAnEndTuple(pivot, succ) and
pump = Concretizer<CharTreeImpl>::concretize(t)
)
}
/**
* Holds if states starting in `state` can have polynomial backtracking with the string `pump`.
*/
predicate isReDoSCandidate(State state, string pump) { isPumpable(_, state, pump) }
/**
* Holds if repetitions of `pump` at `t` will cause polynomial backtracking.
*/
predicate polynomialReDoS(RegExpTerm t, string pump, string prefixMsg, RegExpTerm prev) {
exists(State s, State pivot |
ReDoSPruning<isReDoSCandidate/2>::hasReDoSResult(t, pump, s, prefixMsg) and
isPumpable(pivot, s, _) and
prev = pivot.getRepr()
)
}
/**
* Gets a message for why `term` can cause polynomial backtracking.
*/
string getReasonString(RegExpTerm term, string pump, string prefixMsg, RegExpTerm prev) {
polynomialReDoS(term, pump, prefixMsg, prev) and
result =
"Strings " + prefixMsg + "with many repetitions of '" + pump +
"' can start matching anywhere after the start of the preceeding " + prev
}
/**
* A term that may cause a regular expression engine to perform a
* polynomial number of match attempts, relative to the input length.
*/
class PolynomialBackTrackingTerm extends InfiniteRepetitionQuantifier {
string reason;
string pump;
string prefixMsg;
RegExpTerm prev;
PolynomialBackTrackingTerm() {
reason = getReasonString(this, pump, prefixMsg, prev) and
// there might be many reasons for this term to have polynomial backtracking - we pick the shortest one.
reason = min(string msg | msg = getReasonString(this, _, _, _) | msg order by msg.length(), msg)
}
/**
* Holds if all non-empty successors to the polynomial backtracking term matches the end of the line.
*/
predicate isAtEndLine() {
forall(RegExpTerm succ | this.getSuccessor+() = succ and not matchesEpsilon(succ) |
succ instanceof RegExpDollar
)
}
/**
* Gets the string that should be repeated to cause this regular expression to perform polynomially.
*/
string getPumpString() { result = pump }
/**
* Gets a message for which prefix a matching string must start with for this term to cause polynomial backtracking.
*/
string getPrefixMessage() { result = prefixMsg }
/**
* Gets a predecessor to `this`, which also loops on the pump string, and thereby causes polynomial backtracking.
*/
RegExpTerm getPreviousLoop() { result = prev }
/**
* Gets the reason for the number of match attempts.
*/
string getReason() { result = reason }
}

0
python/ql/lib/semmle/python/security/strings/Basic.qll Executable file → Normal file
View File

View File

@@ -26,9 +26,21 @@ class ImportTimeScope extends Scope {
/** Gets the global variable that is used during lookup, should `var` be undefined. */
GlobalVariable getOuterVariable(LocalVariable var) {
this instanceof Class and
var.getScope() = this and
result.getScope() = this.getEnclosingModule() and
var.getId() = result.getId()
exists(string name |
class_var_scope(this, name, var) and
global_var_scope(name, this.getEnclosingModule(), result)
)
}
}
pragma[nomagic]
private predicate global_var_scope(string name, Scope scope, GlobalVariable var) {
var.getScope() = scope and
var.getId() = name
}
pragma[nomagic]
private predicate class_var_scope(Class cls, string name, LocalVariable var) {
var.getScope() = cls and
var.getId() = name
}

107
python/ql/lib/semmle/python/xml/XML.qll Executable file → Normal file
View File

@@ -8,7 +8,7 @@ private class TXmlLocatable =
@xmldtd or @xmlelement or @xmlattribute or @xmlnamespace or @xmlcomment or @xmlcharacters;
/** An XML element that has a location. */
class XMLLocatable extends @xmllocatable, TXmlLocatable {
class XmlLocatable extends @xmllocatable, TXmlLocatable {
/** Gets the source location for this element. */
Location getLocation() { xmllocations(this, result) }
@@ -32,13 +32,16 @@ class XMLLocatable extends @xmllocatable, TXmlLocatable {
string toString() { none() } // overridden in subclasses
}
/** DEPRECATED: Alias for XmlLocatable */
deprecated class XMLLocatable = XmlLocatable;
/**
* An `XMLParent` is either an `XMLElement` or an `XMLFile`,
* An `XmlParent` is either an `XmlElement` or an `XmlFile`,
* both of which can contain other elements.
*/
class XMLParent extends @xmlparent {
XMLParent() {
// explicitly restrict `this` to be either an `XMLElement` or an `XMLFile`;
class XmlParent extends @xmlparent {
XmlParent() {
// explicitly restrict `this` to be either an `XmlElement` or an `XmlFile`;
// the type `@xmlparent` currently also includes non-XML files
this instanceof @xmlelement or xmlEncoding(this, _)
}
@@ -50,28 +53,28 @@ class XMLParent extends @xmlparent {
string getName() { none() } // overridden in subclasses
/** Gets the file to which this XML parent belongs. */
XMLFile getFile() { result = this or xmlElements(this, _, _, _, result) }
XmlFile getFile() { result = this or xmlElements(this, _, _, _, result) }
/** Gets the child element at a specified index of this XML parent. */
XMLElement getChild(int index) { xmlElements(result, _, this, index, _) }
XmlElement getChild(int index) { xmlElements(result, _, this, index, _) }
/** Gets a child element of this XML parent. */
XMLElement getAChild() { xmlElements(result, _, this, _, _) }
XmlElement getAChild() { xmlElements(result, _, this, _, _) }
/** Gets a child element of this XML parent with the given `name`. */
XMLElement getAChild(string name) { xmlElements(result, _, this, _, _) and result.hasName(name) }
XmlElement getAChild(string name) { xmlElements(result, _, this, _, _) and result.hasName(name) }
/** Gets a comment that is a child of this XML parent. */
XMLComment getAComment() { xmlComments(result, _, this, _) }
XmlComment getAComment() { xmlComments(result, _, this, _) }
/** Gets a character sequence that is a child of this XML parent. */
XMLCharacters getACharactersSet() { xmlChars(result, _, this, _, _, _) }
XmlCharacters getACharactersSet() { xmlChars(result, _, this, _, _, _) }
/** Gets the depth in the tree. (Overridden in XMLElement.) */
/** Gets the depth in the tree. (Overridden in XmlElement.) */
int getDepth() { result = 0 }
/** Gets the number of child XML elements of this XML parent. */
int getNumberOfChildren() { result = count(XMLElement e | xmlElements(e, _, this, _, _)) }
int getNumberOfChildren() { result = count(XmlElement e | xmlElements(e, _, this, _, _)) }
/** Gets the number of places in the body of this XML parent where text occurs. */
int getNumberOfCharacterSets() { result = count(int pos | xmlChars(_, _, this, pos, _, _)) }
@@ -92,9 +95,12 @@ class XMLParent extends @xmlparent {
string toString() { result = this.getName() }
}
/** DEPRECATED: Alias for XmlParent */
deprecated class XMLParent = XmlParent;
/** An XML file. */
class XMLFile extends XMLParent, File {
XMLFile() { xmlEncoding(this, _) }
class XmlFile extends XmlParent, File {
XmlFile() { xmlEncoding(this, _) }
/** Gets a printable representation of this XML file. */
override string toString() { result = this.getName() }
@@ -120,15 +126,21 @@ class XMLFile extends XMLParent, File {
string getEncoding() { xmlEncoding(this, result) }
/** Gets the XML file itself. */
override XMLFile getFile() { result = this }
override XmlFile getFile() { result = this }
/** Gets a top-most element in an XML file. */
XMLElement getARootElement() { result = this.getAChild() }
XmlElement getARootElement() { result = this.getAChild() }
/** Gets a DTD associated with this XML file. */
XMLDTD getADTD() { xmlDTDs(result, _, _, _, this) }
XmlDtd getADtd() { xmlDTDs(result, _, _, _, this) }
/** DEPRECATED: Alias for getADtd */
deprecated XmlDtd getADTD() { result = this.getADtd() }
}
/** DEPRECATED: Alias for XmlFile */
deprecated class XMLFile = XmlFile;
/**
* An XML document type definition (DTD).
*
@@ -140,7 +152,7 @@ class XMLFile extends XMLParent, File {
* <!ELEMENT lastName (#PCDATA)>
* ```
*/
class XMLDTD extends XMLLocatable, @xmldtd {
class XmlDtd extends XmlLocatable, @xmldtd {
/** Gets the name of the root element of this DTD. */
string getRoot() { xmlDTDs(this, result, _, _, _) }
@@ -154,7 +166,7 @@ class XMLDTD extends XMLLocatable, @xmldtd {
predicate isPublic() { not xmlDTDs(this, _, "", _, _) }
/** Gets the parent of this DTD. */
XMLParent getParent() { xmlDTDs(this, _, _, _, result) }
XmlParent getParent() { xmlDTDs(this, _, _, _, result) }
override string toString() {
this.isPublic() and
@@ -165,6 +177,9 @@ class XMLDTD extends XMLLocatable, @xmldtd {
}
}
/** DEPRECATED: Alias for XmlDtd */
deprecated class XMLDTD = XmlDtd;
/**
* An XML element in an XML file.
*
@@ -176,7 +191,7 @@ class XMLDTD extends XMLLocatable, @xmldtd {
* </manifest>
* ```
*/
class XMLElement extends @xmlelement, XMLParent, XMLLocatable {
class XmlElement extends @xmlelement, XmlParent, XmlLocatable {
/** Holds if this XML element has the given `name`. */
predicate hasName(string name) { name = this.getName() }
@@ -184,10 +199,10 @@ class XMLElement extends @xmlelement, XMLParent, XMLLocatable {
override string getName() { xmlElements(this, result, _, _, _) }
/** Gets the XML file in which this XML element occurs. */
override XMLFile getFile() { xmlElements(this, _, _, _, result) }
override XmlFile getFile() { xmlElements(this, _, _, _, result) }
/** Gets the parent of this XML element. */
XMLParent getParent() { xmlElements(this, _, result, _, _) }
XmlParent getParent() { xmlElements(this, _, result, _, _) }
/** Gets the index of this XML element among its parent's children. */
int getIndex() { xmlElements(this, _, _, result, _) }
@@ -196,7 +211,7 @@ class XMLElement extends @xmlelement, XMLParent, XMLLocatable {
predicate hasNamespace() { xmlHasNs(this, _, _) }
/** Gets the namespace of this XML element, if any. */
XMLNamespace getNamespace() { xmlHasNs(this, result, _) }
XmlNamespace getNamespace() { xmlHasNs(this, result, _) }
/** Gets the index of this XML element among its parent's children. */
int getElementPositionIndex() { xmlElements(this, _, _, result, _) }
@@ -205,10 +220,10 @@ class XMLElement extends @xmlelement, XMLParent, XMLLocatable {
override int getDepth() { result = this.getParent().getDepth() + 1 }
/** Gets an XML attribute of this XML element. */
XMLAttribute getAnAttribute() { result.getElement() = this }
XmlAttribute getAnAttribute() { result.getElement() = this }
/** Gets the attribute with the specified `name`, if any. */
XMLAttribute getAttribute(string name) { result.getElement() = this and result.getName() = name }
XmlAttribute getAttribute(string name) { result.getElement() = this and result.getName() = name }
/** Holds if this XML element has an attribute with the specified `name`. */
predicate hasAttribute(string name) { exists(this.getAttribute(name)) }
@@ -220,6 +235,9 @@ class XMLElement extends @xmlelement, XMLParent, XMLLocatable {
override string toString() { result = this.getName() }
}
/** DEPRECATED: Alias for XmlElement */
deprecated class XMLElement = XmlElement;
/**
* An attribute that occurs inside an XML element.
*
@@ -230,18 +248,18 @@ class XMLElement extends @xmlelement, XMLParent, XMLLocatable {
* android:versionCode="1"
* ```
*/
class XMLAttribute extends @xmlattribute, XMLLocatable {
class XmlAttribute extends @xmlattribute, XmlLocatable {
/** Gets the name of this attribute. */
string getName() { xmlAttrs(this, _, result, _, _, _) }
/** Gets the XML element to which this attribute belongs. */
XMLElement getElement() { xmlAttrs(this, result, _, _, _, _) }
XmlElement getElement() { xmlAttrs(this, result, _, _, _, _) }
/** Holds if this attribute has a namespace. */
predicate hasNamespace() { xmlHasNs(this, _, _) }
/** Gets the namespace of this attribute, if any. */
XMLNamespace getNamespace() { xmlHasNs(this, result, _) }
XmlNamespace getNamespace() { xmlHasNs(this, result, _) }
/** Gets the value of this attribute. */
string getValue() { xmlAttrs(this, _, _, result, _, _) }
@@ -250,6 +268,9 @@ class XMLAttribute extends @xmlattribute, XMLLocatable {
override string toString() { result = this.getName() + "=" + this.getValue() }
}
/** DEPRECATED: Alias for XmlAttribute */
deprecated class XMLAttribute = XmlAttribute;
/**
* A namespace used in an XML file.
*
@@ -259,23 +280,29 @@ class XMLAttribute extends @xmlattribute, XMLLocatable {
* xmlns:android="http://schemas.android.com/apk/res/android"
* ```
*/
class XMLNamespace extends XMLLocatable, @xmlnamespace {
class XmlNamespace extends XmlLocatable, @xmlnamespace {
/** Gets the prefix of this namespace. */
string getPrefix() { xmlNs(this, result, _, _) }
/** Gets the URI of this namespace. */
string getURI() { xmlNs(this, _, result, _) }
string getUri() { xmlNs(this, _, result, _) }
/** DEPRECATED: Alias for getUri */
deprecated string getURI() { result = this.getUri() }
/** Holds if this namespace has no prefix. */
predicate isDefault() { this.getPrefix() = "" }
override string toString() {
this.isDefault() and result = this.getURI()
this.isDefault() and result = this.getUri()
or
not this.isDefault() and result = this.getPrefix() + ":" + this.getURI()
not this.isDefault() and result = this.getPrefix() + ":" + this.getUri()
}
}
/** DEPRECATED: Alias for XmlNamespace */
deprecated class XMLNamespace = XmlNamespace;
/**
* A comment in an XML file.
*
@@ -285,17 +312,20 @@ class XMLNamespace extends XMLLocatable, @xmlnamespace {
* <!-- This is a comment. -->
* ```
*/
class XMLComment extends @xmlcomment, XMLLocatable {
class XmlComment extends @xmlcomment, XmlLocatable {
/** Gets the text content of this XML comment. */
string getText() { xmlComments(this, result, _, _) }
/** Gets the parent of this XML comment. */
XMLParent getParent() { xmlComments(this, _, result, _) }
XmlParent getParent() { xmlComments(this, _, result, _) }
/** Gets a printable representation of this XML comment. */
override string toString() { result = this.getText() }
}
/** DEPRECATED: Alias for XmlComment */
deprecated class XMLComment = XmlComment;
/**
* A sequence of characters that occurs between opening and
* closing tags of an XML element, excluding other elements.
@@ -306,12 +336,12 @@ class XMLComment extends @xmlcomment, XMLLocatable {
* <content>This is a sequence of characters.</content>
* ```
*/
class XMLCharacters extends @xmlcharacters, XMLLocatable {
class XmlCharacters extends @xmlcharacters, XmlLocatable {
/** Gets the content of this character sequence. */
string getCharacters() { xmlChars(this, result, _, _, _, _) }
/** Gets the parent of this character sequence. */
XMLParent getParent() { xmlChars(this, _, result, _, _, _) }
XmlParent getParent() { xmlChars(this, _, result, _, _, _) }
/** Holds if this character sequence is CDATA. */
predicate isCDATA() { xmlChars(this, _, _, _, 1, _) }
@@ -319,3 +349,6 @@ class XMLCharacters extends @xmlcharacters, XMLLocatable {
/** Gets a printable representation of this XML character sequence. */
override string toString() { result = this.getCharacters() }
}
/** DEPRECATED: Alias for XmlCharacters */
deprecated class XMLCharacters = XmlCharacters;

View File

@@ -1,3 +1,5 @@
## 0.4.1
## 0.4.0
### Breaking Changes

0
python/ql/src/Exceptions/EmptyExcept.ql Executable file → Normal file
View File

0
python/ql/src/Exceptions/UnguardedNextInGenerator.ql Executable file → Normal file
View File

0
python/ql/src/Resources/FileNotAlwaysClosed.ql Executable file → Normal file
View File

View File

@@ -35,7 +35,7 @@ private import semmle.python.objects.ObjectInternal
// functionality into `BuiltinFunctionValue` and `BuiltinMethodValue`, but will
// probably require some more work: for this query, it's totally ok to use
// `builtins.open` for the code `open(f)`, but well, it requires a bit of thinking to
// figure out if that is desireable in general. I simply skipped a corner here!
// figure out if that is desirable in general. I simply skipped a corner here!
// 4. TaintTrackingPrivate: Nothing else gives us access to `defaultAdditionalTaintStep` :(
/**
* A callable that is considered a "safe" external API from a security perspective.
@@ -131,7 +131,9 @@ class UntrustedExternalApiDataNode extends ExternalApiDataNode {
/** DEPRECATED: Alias for UntrustedExternalApiDataNode */
deprecated class UntrustedExternalAPIDataNode = UntrustedExternalApiDataNode;
/** An external API which is used with untrusted data. */
private newtype TExternalApi =
/** An untrusted API method `m` where untrusted data is passed at `index`. */
TExternalApiParameter(DataFlowPrivate::DataFlowCallable callable, int index) {
exists(UntrustedExternalApiDataNode n |
callable = n.getCallable() and

View File

@@ -0,0 +1,65 @@
<!DOCTYPE qhelp PUBLIC
"-//Semmle//qhelp//EN"
"qhelp.dtd">
<qhelp>
<overview>
<p>
It's easy to write a regular expression range that matches a wider range of characters than you intended.
For example, <code>/[a-zA-z]/</code> matches all lowercase and all uppercase letters,
as you would expect, but it also matches the characters: <code>[ \ ] ^ _ `</code>.
</p>
<p>
Another common problem is failing to escape the dash character in a regular
expression. An unescaped dash is interpreted
as part of a range. For example, in the character class <code>[a-zA-Z0-9%=.,-_]</code>
the last character range matches the 55 characters between
<code>,</code> and <code>_</code> (both included), which overlaps with the
range <code>[0-9]</code> and is clearly not intended by the writer.
</p>
</overview>
<recommendation>
<p>
Avoid any confusion about which characters are included in the range by
writing unambiguous regular expressions.
Always check that character ranges match only the expected characters.
</p>
</recommendation>
<example>
<p>
The following example code is intended to check whether a string is a valid 6 digit hex color.
</p>
<sample language="python">
import re
def is_valid_hex_color(color):
return re.match(r'^#[0-9a-fA-f]{6}$', color) is not None
</sample>
<p>
However, the <code>A-f</code> range is overly large and matches every uppercase character.
It would parse a "color" like <code>#XXYYZZ</code> as valid.
</p>
<p>
The fix is to use an uppercase <code>A-F</code> range instead.
</p>
<sample language="python">
import re
def is_valid_hex_color(color):
return re.match(r'^#[0-9a-fA-F]{6}$', color) is not None
</sample>
</example>
<references>
<li>GitHub Advisory Database: <a href="https://github.com/advisories/GHSA-g4rg-993r-mgx7">CVE-2021-42740: Improper Neutralization of Special Elements used in a Command in Shell-quote</a></li>
<li>wh0.github.io: <a href="https://wh0.github.io/2021/10/28/shell-quote-rce-exploiting.html">Exploiting CVE-2021-42740</a></li>
<li>Yosuke Ota: <a href="https://ota-meshi.github.io/eslint-plugin-regexp/rules/no-obscure-range.html">no-obscure-range</a></li>
<li>Paul Boyd: <a href="https://pboyd.io/posts/comma-dash-dot/">The regex [,-.]</a></li>
</references>
</qhelp>

View File

@@ -0,0 +1,19 @@
/**
* @name Overly permissive regular expression range
* @description Overly permissive regular expression ranges match a wider range of characters than intended.
* This may allow an attacker to bypass a filter or sanitizer.
* @kind problem
* @problem.severity warning
* @security-severity 5.0
* @precision high
* @id py/overly-large-range
* @tags correctness
* security
* external/cwe/cwe-020
*/
import semmle.python.security.OverlyLargeRangeQuery
from RegExpCharacterRange range, string reason
where problem(range, reason)
select range, "Suspicious character range that " + reason + "."

0
python/ql/src/Security/CWE-078/CommandInjection.ql Executable file → Normal file
View File

View File

@@ -19,14 +19,14 @@ private API::Node unsafe_paramiko_policy(string name) {
result = API::moduleImport("paramiko").getMember("client").getMember(name)
}
private API::Node paramikoSSHClientInstance() {
private API::Node paramikoSshClientInstance() {
result = API::moduleImport("paramiko").getMember("client").getMember("SSHClient").getReturn()
}
from DataFlow::CallCfgNode call, DataFlow::Node arg, string name
where
// see http://docs.paramiko.org/en/stable/api/client.html#paramiko.client.SSHClient.set_missing_host_key_policy
call = paramikoSSHClientInstance().getMember("set_missing_host_key_policy").getACall() and
call = paramikoSshClientInstance().getMember("set_missing_host_key_policy").getACall() and
arg in [call.getArg(0), call.getArgByName("policy")] and
(
arg = unsafe_paramiko_policy(name).getAValueReachableFromSource() or

View File

@@ -29,7 +29,7 @@ The example shows two unsafe calls to <a href="https://semmle.com">semmle.com</a
<references>
<li>
Python requests documentation: <a href="http://docs.python-requests.org/en/master/user/advanced/#ssl-cert-verification">SSL Cert Verification</a>.
Python requests documentation: <a href="https://requests.readthedocs.io/en/latest/user/advanced/#ssl-cert-verification">SSL Cert Verification</a>.
</li>
</references>
</qhelp>

View File

@@ -7,13 +7,13 @@ private import python
private import semmle.python.ApiGraphs
import TlsLibraryModel
class PyOpenSSLContextCreation extends ContextCreation, DataFlow::CallCfgNode {
PyOpenSSLContextCreation() {
class PyOpenSslContextCreation extends ContextCreation, DataFlow::CallCfgNode {
PyOpenSslContextCreation() {
this = API::moduleImport("OpenSSL").getMember("SSL").getMember("Context").getACall()
}
override string getProtocol() {
exists(DataFlow::Node protocolArg, PyOpenSSL pyo |
exists(DataFlow::Node protocolArg, PyOpenSsl pyo |
protocolArg in [this.getArg(0), this.getArgByName("method")]
|
protocolArg in [
@@ -51,12 +51,12 @@ class SetOptionsCall extends ProtocolRestriction, DataFlow::CallCfgNode {
}
}
class UnspecificPyOpenSSLContextCreation extends PyOpenSSLContextCreation, UnspecificContextCreation {
UnspecificPyOpenSSLContextCreation() { library instanceof PyOpenSSL }
class UnspecificPyOpenSslContextCreation extends PyOpenSslContextCreation, UnspecificContextCreation {
UnspecificPyOpenSslContextCreation() { library instanceof PyOpenSsl }
}
class PyOpenSSL extends TlsLibrary {
PyOpenSSL() { this = "pyOpenSSL" }
class PyOpenSsl extends TlsLibrary {
PyOpenSsl() { this = "pyOpenSSL" }
override string specific_version_name(ProtocolVersion version) { result = version + "_METHOD" }
@@ -70,7 +70,7 @@ class PyOpenSSL extends TlsLibrary {
override ContextCreation default_context_creation() { none() }
override ContextCreation specific_context_creation() {
result instanceof PyOpenSSLContextCreation
result instanceof PyOpenSslContextCreation
}
override DataFlow::Node insecure_connection_creation(ProtocolVersion version) { none() }
@@ -80,6 +80,6 @@ class PyOpenSSL extends TlsLibrary {
override ProtocolRestriction protocol_restriction() { result instanceof SetOptionsCall }
override ProtocolUnrestriction protocol_unrestriction() {
result instanceof UnspecificPyOpenSSLContextCreation
result instanceof UnspecificPyOpenSslContextCreation
}
}

View File

@@ -7,8 +7,8 @@ private import python
private import semmle.python.ApiGraphs
import TlsLibraryModel
class SSLContextCreation extends ContextCreation, DataFlow::CallCfgNode {
SSLContextCreation() { this = API::moduleImport("ssl").getMember("SSLContext").getACall() }
class SslContextCreation extends ContextCreation, DataFlow::CallCfgNode {
SslContextCreation() { this = API::moduleImport("ssl").getMember("SSLContext").getACall() }
override string getProtocol() {
exists(DataFlow::Node protocolArg, Ssl ssl |
@@ -27,8 +27,8 @@ class SSLContextCreation extends ContextCreation, DataFlow::CallCfgNode {
}
}
class SSLDefaultContextCreation extends ContextCreation {
SSLDefaultContextCreation() {
class SslDefaultContextCreation extends ContextCreation {
SslDefaultContextCreation() {
this = API::moduleImport("ssl").getMember("create_default_context").getACall()
}
@@ -161,8 +161,8 @@ class ContextSetVersion extends ProtocolRestriction, ProtocolUnrestriction, Data
}
}
class UnspecificSSLContextCreation extends SSLContextCreation, UnspecificContextCreation {
UnspecificSSLContextCreation() { library instanceof Ssl }
class UnspecificSslContextCreation extends SslContextCreation, UnspecificContextCreation {
UnspecificSslContextCreation() { library instanceof Ssl }
override ProtocolVersion getUnrestriction() {
result = UnspecificContextCreation.super.getUnrestriction() and
@@ -172,7 +172,7 @@ class UnspecificSSLContextCreation extends SSLContextCreation, UnspecificContext
}
}
class UnspecificSSLDefaultContextCreation extends SSLDefaultContextCreation, ProtocolUnrestriction {
class UnspecificSslDefaultContextCreation extends SslDefaultContextCreation, ProtocolUnrestriction {
override DataFlow::Node getContext() { result = this }
// see https://docs.python.org/3/library/ssl.html#ssl.create_default_context
@@ -195,10 +195,10 @@ class Ssl extends TlsLibrary {
override API::Node version_constants() { result = API::moduleImport("ssl") }
override ContextCreation default_context_creation() {
result instanceof SSLDefaultContextCreation
result instanceof SslDefaultContextCreation
}
override ContextCreation specific_context_creation() { result instanceof SSLContextCreation }
override ContextCreation specific_context_creation() { result instanceof SslContextCreation }
override DataFlow::CallCfgNode insecure_connection_creation(ProtocolVersion version) {
result = API::moduleImport("ssl").getMember("wrap_socket").getACall() and
@@ -220,8 +220,8 @@ class Ssl extends TlsLibrary {
or
result instanceof ContextSetVersion
or
result instanceof UnspecificSSLContextCreation
result instanceof UnspecificSslContextCreation
or
result instanceof UnspecificSSLDefaultContextCreation
result instanceof UnspecificSslDefaultContextCreation
}
}

View File

@@ -13,7 +13,7 @@
*/
import python
import semmle.python.security.performance.SuperlinearBackTracking
import semmle.python.security.regexp.SuperlinearBackTracking
import semmle.python.security.dataflow.PolynomialReDoSQuery
import DataFlow::PathGraph

View File

@@ -14,7 +14,7 @@
*/
import python
import semmle.python.security.performance.ExponentialBackTracking
import semmle.python.security.regexp.ExponentialBackTracking
from RegExpTerm t, string pump, State s, string prefixMsg
where

View File

@@ -0,0 +1,5 @@
---
category: newQuery
---
* Added a new query, `py/suspicious-regexp-range`, to detect character ranges in regular expressions that seem to match
too many characters.

View File

@@ -0,0 +1 @@
## 0.4.1

View File

@@ -1,2 +1,2 @@
---
lastReleaseVersion: 0.4.0
lastReleaseVersion: 0.4.1

View File

@@ -17,8 +17,8 @@ import semmle.python.web.HttpRequest
/* Sinks */
import experimental.semmle.python.security.injection.XSLT
class XSLTInjectionConfiguration extends TaintTracking::Configuration {
XSLTInjectionConfiguration() { this = "XSLT injection configuration" }
class XsltInjectionConfiguration extends TaintTracking::Configuration {
XsltInjectionConfiguration() { this = "XSLT injection configuration" }
deprecated override predicate isSource(TaintTracking::Source source) {
source instanceof HttpRequestTaintSource
@@ -29,7 +29,7 @@ class XSLTInjectionConfiguration extends TaintTracking::Configuration {
}
}
from XSLTInjectionConfiguration config, TaintedPathSource src, TaintedPathSink sink
from XsltInjectionConfiguration config, TaintedPathSource src, TaintedPathSink sink
where config.hasFlowPath(src, sink)
select sink.getSink(), src, sink, "This XSLT query depends on $@.", src.getSource(),
"a user-provided value"

View File

@@ -2,7 +2,6 @@ private import python
private import semmle.python.Concepts
private import semmle.python.ApiGraphs
private import semmle.python.dataflow.new.RemoteFlowSources
private import semmle.python.dataflow.new.DataFlow
/**
* A data flow source of the client ip obtained according to the remote endpoint identifier specified

View File

@@ -182,7 +182,10 @@ module LdapBind {
/**
* Holds if the binding process use SSL.
*/
abstract predicate useSSL();
abstract predicate useSsl();
/** DEPRECATED: Alias for useSsl */
deprecated predicate useSSL() { useSsl() }
}
}
@@ -213,7 +216,10 @@ class LdapBind extends DataFlow::Node {
/**
* Holds if the binding process use SSL.
*/
predicate useSSL() { range.useSSL() }
predicate useSsl() { range.useSsl() }
/** DEPRECATED: Alias for useSsl */
deprecated predicate useSSL() { useSsl() }
}
/** DEPRECATED: Alias for LdapBind */

View File

@@ -12,13 +12,13 @@ private import semmle.python.ApiGraphs
/**
* Provides models for Python's ldap-related libraries.
*/
private module LDAP {
private module Ldap {
/**
* Provides models for the `python-ldap` PyPI package (imported as `ldap`).
*
* See https://www.python-ldap.org/en/python-ldap-3.3.0/index.html
*/
private module LDAP2 {
private module Ldap2 {
/** Gets a reference to the `ldap` module. */
API::Node ldap() { result = API::moduleImport("ldap") }
@@ -38,8 +38,8 @@ private module LDAP {
*
* See https://www.python-ldap.org/en/python-ldap-3.3.0/reference/ldap.html#functions
*/
private class LDAP2QueryMethods extends string {
LDAP2QueryMethods() {
private class Ldap2QueryMethods extends string {
Ldap2QueryMethods() {
this in ["search", "search_s", "search_st", "search_ext", "search_ext_s"]
}
}
@@ -52,7 +52,7 @@ private module LDAP {
/** Gets a reference to a `ldap` query. */
private DataFlow::Node ldapQuery() {
result = ldapOperation() and
result.(DataFlow::AttrRead).getAttributeName() instanceof LDAP2QueryMethods
result.(DataFlow::AttrRead).getAttributeName() instanceof Ldap2QueryMethods
}
/**
@@ -60,8 +60,8 @@ private module LDAP {
*
* See `LDAP2QueryMethods`
*/
private class LDAP2Query extends DataFlow::CallCfgNode, LdapQuery::Range {
LDAP2Query() { this.getFunction() = ldapQuery() }
private class Ldap2Query extends DataFlow::CallCfgNode, LdapQuery::Range {
Ldap2Query() { this.getFunction() = ldapQuery() }
override DataFlow::Node getQuery() {
result in [this.getArg(0), this.getArg(2), this.getArgByName("filterstr")]
@@ -73,8 +73,8 @@ private module LDAP {
*
* See https://www.python-ldap.org/en/python-ldap-3.3.0/reference/ldap.html#functions
*/
private class LDAP2BindMethods extends string {
LDAP2BindMethods() {
private class Ldap2BindMethods extends string {
Ldap2BindMethods() {
this in [
"bind", "bind_s", "simple_bind", "simple_bind_s", "sasl_interactive_bind_s",
"sasl_non_interactive_bind_s", "sasl_external_bind_s", "sasl_gssapi_bind_s"
@@ -85,12 +85,12 @@ private module LDAP {
/** Gets a reference to a `ldap` bind. */
private DataFlow::Node ldapBind() {
result = ldapOperation() and
result.(DataFlow::AttrRead).getAttributeName() instanceof LDAP2BindMethods
result.(DataFlow::AttrRead).getAttributeName() instanceof Ldap2BindMethods
}
/**List of SSL-demanding options */
private class LDAPSSLOptions extends DataFlow::Node {
LDAPSSLOptions() {
private class LdapSslOptions extends DataFlow::Node {
LdapSslOptions() {
this = ldap().getMember("OPT_X_TLS_" + ["DEMAND", "HARD"]).getAValueReachableFromSource()
}
}
@@ -100,8 +100,8 @@ private module LDAP {
*
* See `LDAP2BindMethods`
*/
private class LDAP2Bind extends DataFlow::CallCfgNode, LdapBind::Range {
LDAP2Bind() { this.getFunction() = ldapBind() }
private class Ldap2Bind extends DataFlow::CallCfgNode, LdapBind::Range {
Ldap2Bind() { this.getFunction() = ldapBind() }
override DataFlow::Node getPassword() {
result in [this.getArg(1), this.getArgByName("cred")]
@@ -115,11 +115,11 @@ private module LDAP {
)
}
override predicate useSSL() {
override predicate useSsl() {
// use initialize to correlate `this` and so avoid FP in several instances
exists(DataFlow::CallCfgNode initialize |
// ldap.set_option(ldap.OPT_X_TLS_%s)
ldap().getMember("set_option").getACall().getArg(_) instanceof LDAPSSLOptions
ldap().getMember("set_option").getACall().getArg(_) instanceof LdapSslOptions
or
this.getFunction().(DataFlow::AttrRead).getObject().getALocalSource() = initialize and
initialize = ldapInitialize().getACall() and
@@ -136,7 +136,7 @@ private module LDAP {
setOption.getFunction().(DataFlow::AttrRead).getObject().getALocalSource() =
initialize and
setOption.getFunction().(DataFlow::AttrRead).getAttributeName() = "set_option" and
setOption.getArg(0) instanceof LDAPSSLOptions and
setOption.getArg(0) instanceof LdapSslOptions and
not DataFlow::exprNode(any(False falseExpr))
.(DataFlow::LocalSourceNode)
.flowsTo(setOption.getArg(1))
@@ -144,6 +144,9 @@ private module LDAP {
)
)
}
/** DEPRECATED: Alias for useSsl */
deprecated override predicate useSSL() { this.useSsl() }
}
/**
@@ -151,8 +154,8 @@ private module LDAP {
*
* See https://github.com/python-ldap/python-ldap/blob/7ce471e238cdd9a4dd8d17baccd1c9e05e6f894a/Lib/ldap/dn.py#L17
*/
private class LDAP2EscapeDNCall extends DataFlow::CallCfgNode, LdapEscape::Range {
LDAP2EscapeDNCall() { this = ldap().getMember("dn").getMember("escape_dn_chars").getACall() }
private class Ldap2EscapeDNCall extends DataFlow::CallCfgNode, LdapEscape::Range {
Ldap2EscapeDNCall() { this = ldap().getMember("dn").getMember("escape_dn_chars").getACall() }
override DataFlow::Node getAnInput() { result = this.getArg(0) }
}
@@ -162,8 +165,8 @@ private module LDAP {
*
* See https://www.python-ldap.org/en/python-ldap-3.3.0/reference/ldap-filter.html#ldap.filter.escape_filter_chars
*/
private class LDAP2EscapeFilterCall extends DataFlow::CallCfgNode, LdapEscape::Range {
LDAP2EscapeFilterCall() {
private class Ldap2EscapeFilterCall extends DataFlow::CallCfgNode, LdapEscape::Range {
Ldap2EscapeFilterCall() {
this = ldap().getMember("filter").getMember("escape_filter_chars").getACall()
}
@@ -176,7 +179,7 @@ private module LDAP {
*
* See https://pypi.org/project/ldap3/
*/
private module LDAP3 {
private module Ldap3 {
/** Gets a reference to the `ldap3` module. */
API::Node ldap3() { result = API::moduleImport("ldap3") }
@@ -192,8 +195,8 @@ private module LDAP {
/**
* A class to find `ldap3` methods executing a query.
*/
private class LDAP3Query extends DataFlow::CallCfgNode, LdapQuery::Range {
LDAP3Query() {
private class Ldap3Query extends DataFlow::CallCfgNode, LdapQuery::Range {
Ldap3Query() {
this.getFunction().(DataFlow::AttrRead).getObject().getALocalSource() =
ldap3Connection().getACall() and
this.getFunction().(DataFlow::AttrRead).getAttributeName() = "search"
@@ -205,8 +208,8 @@ private module LDAP {
/**
* A class to find `ldap3` methods binding a connection.
*/
class LDAP3Bind extends DataFlow::CallCfgNode, LdapBind::Range {
LDAP3Bind() { this = ldap3Connection().getACall() }
class Ldap3Bind extends DataFlow::CallCfgNode, LdapBind::Range {
Ldap3Bind() { this = ldap3Connection().getACall() }
override DataFlow::Node getPassword() {
result in [this.getArg(2), this.getArgByName("password")]
@@ -220,7 +223,7 @@ private module LDAP {
)
}
override predicate useSSL() {
override predicate useSsl() {
exists(DataFlow::CallCfgNode serverCall |
serverCall = ldap3Server().getACall() and
this.getArg(0).getALocalSource() = serverCall and
@@ -236,6 +239,9 @@ private module LDAP {
startTLS.getObject().getALocalSource() = this
)
}
/** DEPRECATED: Alias for useSsl */
deprecated override predicate useSSL() { this.useSsl() }
}
/**
@@ -243,8 +249,8 @@ private module LDAP {
*
* See https://github.com/cannatag/ldap3/blob/4d33166f0869b929f59c6e6825a1b9505eb99967/ldap3/utils/dn.py#L390
*/
private class LDAP3EscapeDNCall extends DataFlow::CallCfgNode, LdapEscape::Range {
LDAP3EscapeDNCall() { this = ldap3Utils().getMember("dn").getMember("escape_rdn").getACall() }
private class Ldap3EscapeDNCall extends DataFlow::CallCfgNode, LdapEscape::Range {
Ldap3EscapeDNCall() { this = ldap3Utils().getMember("dn").getMember("escape_rdn").getACall() }
override DataFlow::Node getAnInput() { result = this.getArg(0) }
}
@@ -254,8 +260,8 @@ private module LDAP {
*
* See https://github.com/cannatag/ldap3/blob/4d33166f0869b929f59c6e6825a1b9505eb99967/ldap3/utils/conv.py#L91
*/
private class LDAP3EscapeFilterCall extends DataFlow::CallCfgNode, LdapEscape::Range {
LDAP3EscapeFilterCall() {
private class Ldap3EscapeFilterCall extends DataFlow::CallCfgNode, LdapEscape::Range {
Ldap3EscapeFilterCall() {
this = ldap3Utils().getMember("conv").getMember("escape_filter_chars").getACall()
}

View File

@@ -122,7 +122,7 @@ class LdapInsecureAuthConfig extends TaintTracking::Configuration {
}
override predicate isSink(DataFlow::Node sink) {
exists(LdapBind ldapBind | not ldapBind.useSSL() and sink = ldapBind.getHost())
exists(LdapBind ldapBind | not ldapBind.useSsl() and sink = ldapBind.getHost())
}
}

View File

@@ -11,12 +11,15 @@ import semmle.python.dataflow.TaintTracking
import semmle.python.web.HttpRequest
/** Models XSLT Injection related classes and functions */
module XSLTInjection {
module XsltInjection {
/** Returns a class value which refers to `lxml.etree` */
Value etree() { result = Value::named("lxml.etree") }
/** A generic taint sink that is vulnerable to XSLT injection. */
abstract class XSLTInjectionSink extends TaintSink { }
abstract class XsltInjectionSink extends TaintSink { }
/** DEPRECATED: Alias for XsltInjectionSink */
deprecated class XSLTInjectionSink = XsltInjectionSink;
/**
* A kind of "taint", representing an untrusted XML string
@@ -73,10 +76,10 @@ module XSLTInjection {
* root = etree.XML("<xmlContent>")
* find_text = etree.XSLT("`sink`")
*/
private class EtreeXSLTArgument extends XSLTInjectionSink {
private class EtreeXsltArgument extends XsltInjectionSink {
override string toString() { result = "lxml.etree.XSLT" }
EtreeXSLTArgument() {
EtreeXsltArgument() {
exists(CallNode call | call.getFunction().(AttrNode).getObject("XSLT").pointsTo(etree()) |
call.getArg(0) = this
)
@@ -94,10 +97,10 @@ module XSLTInjection {
* tree = etree.parse(f)
* result_tree = tree.xslt(`sink`)
*/
private class ParseXSLTArgument extends XSLTInjectionSink {
private class ParseXsltArgument extends XsltInjectionSink {
override string toString() { result = "lxml.etree.parse.xslt" }
ParseXSLTArgument() {
ParseXsltArgument() {
exists(
CallNode parseCall, CallNode xsltCall, ControlFlowNode obj, Variable var, AssignStmt assign
|
@@ -113,3 +116,6 @@ module XSLTInjection {
override predicate sinks(TaintKind kind) { kind instanceof ExternalXmlKind }
}
}
/** DEPRECATED: Alias for XsltInjection */
deprecated module XSLTInjection = XsltInjection;

View File

@@ -1,5 +1,5 @@
name: codeql/python-queries
version: 0.4.1-dev
version: 0.4.2-dev
groups:
- python
- queries

View File

@@ -330,6 +330,19 @@ abstract private class Expectation extends FailureLocatable {
override Location getLocation() { result = comment.getLocation() }
}
private predicate onSameLine(ValidExpectation a, ActualResult b) {
exists(string fname, int line, Location la, Location lb |
// Join order intent:
// Take the locations of ActualResults,
// join with locations in the same file / on the same line,
// then match those against ValidExpectations.
la = a.getLocation() and
pragma[only_bind_into](lb) = b.getLocation() and
pragma[only_bind_into](la).hasLocationInfo(fname, line, _, _, _) and
lb.hasLocationInfo(fname, line, _, _, _)
)
}
private class ValidExpectation extends Expectation, TValidExpectation {
string tag;
string value;
@@ -344,8 +357,7 @@ private class ValidExpectation extends Expectation, TValidExpectation {
string getKnownFailure() { result = knownFailure }
predicate matchesActualResult(ActualResult actualResult) {
getLocation().getStartLine() = actualResult.getLocation().getStartLine() and
getLocation().getFile() = actualResult.getLocation().getFile() and
onSameLine(pragma[only_bind_into](this), actualResult) and
getTag() = actualResult.getTag() and
getValue() = actualResult.getValue()
}

View File

@@ -84,8 +84,8 @@ class Assertion extends Comment {
string tryExplainFailure() {
exists(int i, API::Node nd, string prefix, string suffix |
nd = this.lookup(i) and
i < getPathLength() and
not exists(this.lookup([i + 1 .. getPathLength()])) and
i < this.getPathLength() and
not exists(this.lookup([i + 1 .. this.getPathLength()])) and
prefix = nd + " has no outgoing edge labelled " + this.getEdgeLabel(i) + ";" and
if exists(nd.getASuccessor())
then

View File

@@ -1,6 +1,6 @@
import python
import experimental.semmle.python.security.injection.XSLT
from XSLTInjection::XSLTInjectionSink sink, TaintKind kind
from XsltInjection::XsltInjectionSink sink, TaintKind kind
where sink.sinks(kind)
select sink, kind

0
python/ql/test/library-tests/PointsTo/new/Dataflow.ql Executable file → Normal file
View File

View File

View File

View File

View File

View File

View File

View File

@@ -0,0 +1,10 @@
| test.py:3:29:3:31 | 0-9 | Suspicious character range that overlaps with 3-5 in the same character class. |
| test.py:5:31:5:33 | A-z | Suspicious character range that overlaps with A-Z in the same character class, and is equivalent to [A-Z\\[\\\\\\]^_`a-z]. |
| test.py:7:28:7:30 | z-a | Suspicious character range that is empty. |
| test.py:17:38:17:40 | A-f | Suspicious character range that overlaps with a-f in the same character class, and is equivalent to [A-Z\\[\\\\\\]^_`a-f]. |
| test.py:19:30:19:32 | $-` | Suspicious character range that is equivalent to [$%&'()*+,\\-.\\/0-9:;<=>?@A-Z\\[\\\\\\]^_`]. |
| test.py:21:43:21:45 | +-< | Suspicious character range that is equivalent to [+,\\-.\\/0-9:;<]. |
| test.py:23:47:23:49 | .-_ | Suspicious character range that overlaps with 1-9 in the same character class, and is equivalent to [.\\/0-9:;<=>?@A-Z\\[\\\\\\]^_]. |
| test.py:25:34:25:36 | 7-F | Suspicious character range that is equivalent to [7-9:;<=>?@A-F]. |
| test.py:27:38:27:40 | 0-9 | Suspicious character range that overlaps with \\d in the same character class. |
| test.py:29:41:29:43 | .-? | Suspicious character range that overlaps with \\w in the same character class, and is equivalent to [.\\/0-9:;<=>?]. |

View File

@@ -0,0 +1 @@
Security/CWE-020/OverlyLargeRange.ql

View File

@@ -0,0 +1,29 @@
import re
overlap1 = re.compile(r'^[0-93-5]$') # NOT OK
overlap2 = re.compile(r'[A-ZA-z]') # NOT OK
isEmpty = re.compile(r'^[z-a]$') # NOT OK
isAscii = re.compile(r'^[\x00-\x7F]*$') # OK
printable = re.compile(r'[!-~]') # OK - used to select most printable ASCII characters
codePoints = re.compile(r'[^\x21-\x7E]|[[\](){}<>/%]') # OK
NON_ALPHANUMERIC_REGEXP = re.compile(r'([^\#-~| |!])') # OK
smallOverlap = re.compile(r'[0-9a-fA-f]') # NOT OK
weirdRange = re.compile(r'[$-`]') # NOT OK
keywordOperator = re.compile(r'[!\~\*\/%+-<>\^|=&]') # NOT OK
notYoutube = re.compile(r'youtu\.be\/[a-z1-9.-_]+') # NOT OK
numberToLetter = re.compile(r'[7-F]') # NOT OK
overlapsWithClass1 = re.compile(r'[0-9\d]') # NOT OK
overlapsWithClass2 = re.compile(r'[\w,.-?:*+]') # NOT OK

View File

@@ -1,5 +1,5 @@
import python
import semmle.python.security.performance.SuperlinearBackTracking
import semmle.python.security.regexp.SuperlinearBackTracking
from PolynomialBackTrackingTerm t
select t.getRegex(), t, t.getReason()

View File

@@ -1,4 +1,4 @@
| KnownCVEs.py:15:22:15:24 | \\d+ | This part of the regular expression may cause exponential backtracking on strings containing many repetitions of '9'. |
| KnownCVEs.py:15:22:15:24 | \\d+ | This part of the regular expression may cause exponential backtracking on strings containing many repetitions of '0'. |
| KnownCVEs.py:30:24:31:25 | .* | This part of the regular expression may cause exponential backtracking on strings containing many repetitions of ','. |
| KnownCVEs.py:35:18:35:81 | ([-/:,#%.'"\\s!\\w]\|\\w-\\w\|'[\\s\\w]+'\\s*\|"[\\s\\w]+"\|\\([\\d,%\\.\\s]+\\))* | This part of the regular expression may cause exponential backtracking on strings containing many repetitions of '"\\t"'. |
| redos.py:6:28:6:42 | (?:__\|[\\s\\S])+? | This part of the regular expression may cause exponential backtracking on strings containing many repetitions of '__'. |
@@ -31,7 +31,7 @@
| redos.py:127:25:127:38 | ([a-z]\|[d-h])* | This part of the regular expression may cause exponential backtracking on strings containing many repetitions of 'd'. |
| redos.py:130:25:130:40 | ([^a-z]\|[^0-9])* | This part of the regular expression may cause exponential backtracking on strings containing many repetitions of '/'. |
| redos.py:133:25:133:35 | (\\d\|[0-9])* | This part of the regular expression may cause exponential backtracking on strings containing many repetitions of '0'. |
| redos.py:136:25:136:32 | (\\s\|\\s)* | This part of the regular expression may cause exponential backtracking on strings containing many repetitions of ' '. |
| redos.py:136:25:136:32 | (\\s\|\\s)* | This part of the regular expression may cause exponential backtracking on strings containing many repetitions of '\\t'. |
| redos.py:139:25:139:31 | (\\w\|G)* | This part of the regular expression may cause exponential backtracking on strings containing many repetitions of 'G'. |
| redos.py:145:25:145:32 | (\\d\|\\w)* | This part of the regular expression may cause exponential backtracking on strings containing many repetitions of '0'. |
| redos.py:148:25:148:31 | (\\d\|5)* | This part of the regular expression may cause exponential backtracking on strings containing many repetitions of '5'. |
@@ -46,7 +46,7 @@
| redos.py:175:26:175:30 | [\\d]+ | This part of the regular expression may cause exponential backtracking on strings containing many repetitions of '0'. |
| redos.py:187:26:187:31 | [^>a]+ | This part of the regular expression may cause exponential backtracking on strings containing many repetitions of '='. |
| redos.py:190:27:190:29 | \\s* | This part of the regular expression may cause exponential backtracking on strings starting with '\\n' and containing many repetitions of '\\n'. |
| redos.py:193:28:193:30 | \\s+ | This part of the regular expression may cause exponential backtracking on strings containing many repetitions of ' '. |
| redos.py:193:28:193:30 | \\s+ | This part of the regular expression may cause exponential backtracking on strings containing many repetitions of '\\t'. |
| redos.py:196:78:196:89 | [ a-zA-Z{}]+ | This part of the regular expression may cause exponential backtracking on strings starting with '{[A(A)A:' and containing many repetitions of ' A:'. |
| redos.py:196:91:196:92 | ,? | This part of the regular expression may cause exponential backtracking on strings starting with '{[A(A)A: ' and containing many repetitions of ',A: '. |
| redos.py:199:25:199:26 | a+ | This part of the regular expression may cause exponential backtracking on strings containing many repetitions of 'a'. |
@@ -65,20 +65,20 @@
| redos.py:256:37:256:39 | \\w* | This part of the regular expression may cause exponential backtracking on strings starting with 'foobarbaz' and containing many repetitions of 'foobarbazfoobarbazfoobarbazfoobarbazfoobarbazfoobarbaz'. |
| redos.py:256:49:256:51 | \\w* | This part of the regular expression may cause exponential backtracking on strings starting with 'foobarbazfoobarbaz' and containing many repetitions of 'foobarbazfoobarbazfoobarbazfoobarbazfoobarbazfoobarbaz'. |
| redos.py:256:61:256:63 | \\w* | This part of the regular expression may cause exponential backtracking on strings starting with 'foobarbazfoobarbazfoobarbaz' and containing many repetitions of 'foobarbazfoobarbazfoobarbazfoobarbazfoobarbazfoobarbaz'. |
| redos.py:259:24:259:126 | (.thisisagoddamnlongstringforstresstestingthequery\|\\sthisisagoddamnlongstringforstresstestingthequery)* | This part of the regular expression may cause exponential backtracking on strings containing many repetitions of ' thisisagoddamnlongstringforstresstestingthequery'. |
| redos.py:259:24:259:126 | (.thisisagoddamnlongstringforstresstestingthequery\|\\sthisisagoddamnlongstringforstresstestingthequery)* | This part of the regular expression may cause exponential backtracking on strings containing many repetitions of '\\tthisisagoddamnlongstringforstresstestingthequery'. |
| redos.py:262:24:262:87 | (thisisagoddamnlongstringforstresstestingthequery\|this\\w+query)* | This part of the regular expression may cause exponential backtracking on strings containing many repetitions of 'thisisagoddamnlongstringforstresstestingthequery'. |
| redos.py:262:78:262:80 | \\w+ | This part of the regular expression may cause exponential backtracking on strings starting with 'this' and containing many repetitions of 'aquerythis'. |
| redos.py:262:78:262:80 | \\w+ | This part of the regular expression may cause exponential backtracking on strings starting with 'this' and containing many repetitions of '0querythis'. |
| redos.py:268:28:268:39 | ([\ufffd\ufffd]\|[\ufffd\ufffd])* | This part of the regular expression may cause exponential backtracking on strings starting with 'foo' and containing many repetitions of '\ufffd'. |
| redos.py:271:28:271:41 | ((\ufffd\|\ufffd)\|(\ufffd\|\ufffd))* | This part of the regular expression may cause exponential backtracking on strings starting with 'foo' and containing many repetitions of '\ufffd'. |
| redos.py:274:31:274:32 | b+ | This part of the regular expression may cause exponential backtracking on strings containing many repetitions of 'b'. |
| redos.py:277:48:277:50 | \\s* | This part of the regular expression may cause exponential backtracking on strings containing many repetitions of '"" a='. |
| redos.py:277:48:277:50 | \\s* | This part of the regular expression may cause exponential backtracking on strings containing many repetitions of '""\\t0='. |
| redos.py:283:26:283:27 | a+ | This part of the regular expression may cause exponential backtracking on strings containing many repetitions of 'a'. |
| redos.py:286:26:286:27 | a+ | This part of the regular expression may cause exponential backtracking on strings containing many repetitions of 'a'. |
| redos.py:292:26:292:27 | a+ | This part of the regular expression may cause exponential backtracking on strings containing many repetitions of 'a'. |
| redos.py:295:35:295:36 | a+ | This part of the regular expression may cause exponential backtracking on strings containing many repetitions of 'a'. |
| redos.py:301:100:301:101 | e+ | This part of the regular expression may cause exponential backtracking on strings starting with ';00000000000000' and containing many repetitions of 'e'. |
| redos.py:304:28:304:29 | c+ | This part of the regular expression may cause exponential backtracking on strings starting with 'ab' and containing many repetitions of 'c'. |
| redos.py:307:28:307:30 | \\s+ | This part of the regular expression may cause exponential backtracking on strings containing many repetitions of ' '. |
| redos.py:307:28:307:30 | \\s+ | This part of the regular expression may cause exponential backtracking on strings containing many repetitions of '\\t'. |
| redos.py:310:26:310:34 | ([^/]\|X)+ | This part of the regular expression may cause exponential backtracking on strings containing many repetitions of 'X'. |
| redos.py:313:30:313:34 | [^Y]+ | This part of the regular expression may cause exponential backtracking on strings containing many repetitions of 'Xx'. |
| redos.py:316:25:316:26 | a* | This part of the regular expression may cause exponential backtracking on strings containing many repetitions of 'a'. |

View File

@@ -1,6 +1,6 @@
import python
abstract class XmlBytecodeExpr extends XMLElement { }
abstract class XmlBytecodeExpr extends XmlElement { }
/** DEPRECATED: Alias for XmlBytecodeExpr */
deprecated class XMLBytecodeExpr = XmlBytecodeExpr;

View File

@@ -4,14 +4,14 @@ import semmle.python.objects.Callables
import lib.BytecodeExpr
/** The XML data for a recorded call (includes all data). */
class XmlRecordedCall extends XMLElement {
class XmlRecordedCall extends XmlElement {
XmlRecordedCall() { this.hasName("recorded_call") }
/** Gets the XML data for the call. */
XmlCall getXmlCall() { result.getParent() = this }
/** DEPRECATED: Alias for getXmlCall */
deprecated XMLCall getXMLCall() { result = getXmlCall() }
deprecated XMLCall getXMLCall() { result = this.getXmlCall() }
/** Gets a call matching the recorded information. */
Call getACall() { result = this.getXmlCall().getACall() }
@@ -20,7 +20,7 @@ class XmlRecordedCall extends XMLElement {
XmlCallee getXmlCallee() { result.getParent() = this }
/** DEPRECATED: Alias for getXmlCallee */
deprecated XMLCallee getXMLCallee() { result = getXmlCallee() }
deprecated XMLCallee getXMLCallee() { result = this.getXmlCallee() }
/** Gets a python function matching the recorded information of the callee. */
Function getAPythonCallee() { result = this.getXmlCallee().(XmlPythonCallee).getACallee() }
@@ -61,7 +61,7 @@ class XmlRecordedCall extends XMLElement {
deprecated class XMLRecordedCall = XmlRecordedCall;
/** The XML data for the call part a recorded call. */
class XmlCall extends XMLElement {
class XmlCall extends XmlElement {
XmlCall() { this.hasName("Call") }
string get_filename_data() { result = this.getAChild("filename").getTextValue() }
@@ -90,10 +90,10 @@ class XmlCall extends XMLElement {
expr.(Name).getId() = bytecode.(XmlBytecodeVariableName).get_name_data()
or
expr.(Attribute).getName() = bytecode.(XmlBytecodeAttribute).get_attr_name_data() and
matchBytecodeExpr(expr.(Attribute).getObject(),
this.matchBytecodeExpr(expr.(Attribute).getObject(),
bytecode.(XmlBytecodeAttribute).get_object_data())
or
matchBytecodeExpr(expr.(Call).getFunc(), bytecode.(XmlBytecodeCall).get_function_data())
this.matchBytecodeExpr(expr.(Call).getFunc(), bytecode.(XmlBytecodeCall).get_function_data())
//
// I considered allowing a partial match as well. That is, if the bytecode
// expression information only tells us `<unknown>.foo()`, and we find an AST
@@ -114,7 +114,7 @@ class XmlCall extends XMLElement {
deprecated class XMLCall = XmlCall;
/** The XML data for the callee part a recorded call. */
abstract class XmlCallee extends XMLElement { }
abstract class XmlCallee extends XmlElement { }
/** DEPRECATED: Alias for XmlCallee */
deprecated class XMLCallee = XmlCallee;