Merge branch 'main' into setliterals

This commit is contained in:
Geoffrey White
2021-10-14 14:39:39 +01:00
216 changed files with 9196 additions and 2926 deletions

View File

@@ -55,7 +55,7 @@ module API {
/**
* Gets a call to the function represented by this API component.
*/
DataFlow::CallCfgNode getACall() { result = getReturn().getAnImmediateUse() }
DataFlow::CallCfgNode getACall() { result = this.getReturn().getAnImmediateUse() }
/**
* Gets a node representing member `m` of this API component.
@@ -67,21 +67,21 @@ module API {
*/
bindingset[m]
bindingset[result]
Node getMember(string m) { result = getASuccessor(Label::member(m)) }
Node getMember(string m) { result = this.getASuccessor(Label::member(m)) }
/**
* Gets a node representing a member of this API component where the name of the member is
* not known statically.
*/
Node getUnknownMember() { result = getASuccessor(Label::unknownMember()) }
Node getUnknownMember() { result = this.getASuccessor(Label::unknownMember()) }
/**
* Gets a node representing a member of this API component where the name of the member may
* or may not be known statically.
*/
Node getAMember() {
result = getASuccessor(Label::member(_)) or
result = getUnknownMember()
result = this.getASuccessor(Label::member(_)) or
result = this.getUnknownMember()
}
/**
@@ -90,23 +90,25 @@ module API {
* This predicate may have multiple results when there are multiple invocations of this API component.
* Consider using `getACall()` if there is a need to distinguish between individual calls.
*/
Node getReturn() { result = getASuccessor(Label::return()) }
Node getReturn() { result = this.getASuccessor(Label::return()) }
/**
* Gets a node representing a subclass of the class represented by this node.
*/
Node getASubclass() { result = getASuccessor(Label::subclass()) }
Node getASubclass() { result = this.getASuccessor(Label::subclass()) }
/**
* Gets a node representing the result from awaiting this node.
*/
Node getAwaited() { result = getASuccessor(Label::await()) }
Node getAwaited() { result = this.getASuccessor(Label::await()) }
/**
* Gets a string representation of the lexicographically least among all shortest access paths
* from the root to this node.
*/
string getPath() { result = min(string p | p = getAPath(Impl::distanceFromRoot(this)) | p) }
string getPath() {
result = min(string p | p = this.getAPath(Impl::distanceFromRoot(this)) | p)
}
/**
* Gets a node such that there is an edge in the API graph between this node and the other
@@ -124,13 +126,13 @@ module API {
* Gets a node such that there is an edge in the API graph between this node and the other
* one.
*/
Node getAPredecessor() { result = getAPredecessor(_) }
Node getAPredecessor() { result = this.getAPredecessor(_) }
/**
* Gets a node such that there is an edge in the API graph between that other node and
* this one.
*/
Node getASuccessor() { result = getASuccessor(_) }
Node getASuccessor() { result = this.getASuccessor(_) }
/**
* Gets the data-flow node that gives rise to this node, if any.
@@ -147,11 +149,11 @@ module API {
predicate hasLocationInfo(
string filepath, int startline, int startcolumn, int endline, int endcolumn
) {
getInducingNode().hasLocationInfo(filepath, startline, startcolumn, endline, endcolumn)
this.getInducingNode().hasLocationInfo(filepath, startline, startcolumn, endline, endcolumn)
or
// For nodes that do not have a meaningful location, `path` is the empty string and all other
// parameters are zero.
not exists(getInducingNode()) and
not exists(this.getInducingNode()) and
filepath = "" and
startline = 0 and
startcolumn = 0 and
@@ -202,7 +204,7 @@ module API {
or
this = Impl::MkModuleImport(_) and type = "ModuleImport "
|
result = type + getPath()
result = type + this.getPath()
or
not exists(this.getPath()) and result = type + "with no path"
)

View File

@@ -355,6 +355,53 @@ module SqlExecution {
}
}
/**
* A data-flow node that executes a regular expression.
*
* Extend this class to refine existing API models. If you want to model new APIs,
* extend `RegexExecution::Range` instead.
*/
class RegexExecution extends DataFlow::Node {
RegexExecution::Range range;
RegexExecution() { this = range }
/** Gets the data flow node for the regex being executed by this node. */
DataFlow::Node getRegex() { result = range.getRegex() }
/** Gets a dataflow node for the string to be searched or matched against. */
DataFlow::Node getString() { result = range.getString() }
/**
* Gets the name of this regex execution, typically the name of an executing method.
* This is used for nice alert messages and should include the module if possible.
*/
string getName() { result = range.getName() }
}
/** Provides classes for modeling new regular-expression execution APIs. */
module RegexExecution {
/**
* A data-flow node that executes a regular expression.
*
* Extend this class to model new APIs. If you want to refine existing API models,
* extend `RegexExecution` instead.
*/
abstract class Range extends DataFlow::Node {
/** Gets the data flow node for the regex being executed by this node. */
abstract DataFlow::Node getRegex();
/** Gets a dataflow node for the string to be searched or matched against. */
abstract DataFlow::Node getString();
/**
* Gets the name of this regex execution, typically the name of an executing method.
* This is used for nice alert messages and should include the module if possible.
*/
abstract string getName();
}
}
/**
* A data-flow node that escapes meta-characters, which could be used to prevent
* injection attacks.
@@ -411,6 +458,9 @@ module Escaping {
/** Gets the escape-kind for escaping a string so it can safely be included in HTML. */
string getHtmlKind() { result = "html" }
/** Gets the escape-kind for escaping a string so it can safely be included in HTML. */
string getRegexKind() { result = "regex" }
// TODO: If adding an XML kind, update the modeling of the `MarkupSafe` PyPI package.
//
// Technically it claims to escape for both HTML and XML, but for now we don't have
@@ -427,6 +477,14 @@ class HtmlEscaping extends Escaping {
HtmlEscaping() { range.getKind() = Escaping::getHtmlKind() }
}
/**
* An escape of a string so it can be safely included in
* the body of a regex.
*/
class RegexEscaping extends Escaping {
RegexEscaping() { range.getKind() = Escaping::getRegexKind() }
}
/** Provides classes for modeling HTTP-related APIs. */
module HTTP {
import semmle.python.web.HttpConstants

View File

@@ -240,7 +240,7 @@ class Call extends Call_ {
/** Gets the tuple (*) argument of this call, provided there is exactly one. */
Expr getStarArg() {
count(this.getStarargs()) < 2 and
result = getStarargs()
result = this.getStarargs()
}
}

View File

@@ -256,7 +256,7 @@ abstract class Container extends @container {
* </table>
*/
string getBaseName() {
result = getAbsolutePath().regexpCapture(".*/(([^/]*?)(?:\\.([^.]*))?)", 1)
result = this.getAbsolutePath().regexpCapture(".*/(([^/]*?)(?:\\.([^.]*))?)", 1)
}
/**
@@ -282,7 +282,9 @@ abstract class Container extends @container {
* <tr><td>"/tmp/x.tar.gz"</td><td>"gz"</td></tr>
* </table>
*/
string getExtension() { result = getAbsolutePath().regexpCapture(".*/([^/]*?)(\\.([^.]*))?", 3) }
string getExtension() {
result = this.getAbsolutePath().regexpCapture(".*/([^/]*?)(\\.([^.]*))?", 3)
}
/**
* Gets the stem of this container, that is, the prefix of its base name up to
@@ -301,7 +303,9 @@ abstract class Container extends @container {
* <tr><td>"/tmp/x.tar.gz"</td><td>"x.tar"</td></tr>
* </table>
*/
string getStem() { result = getAbsolutePath().regexpCapture(".*/([^/]*?)(?:\\.([^.]*))?", 1) }
string getStem() {
result = this.getAbsolutePath().regexpCapture(".*/([^/]*?)(?:\\.([^.]*))?", 1)
}
File getFile(string baseName) {
result = this.getAFile() and

View File

@@ -851,9 +851,9 @@ class ForNode extends ControlFlowNode {
/** Holds if this `for` statement causes iteration over `sequence` storing each step of the iteration in `target` */
predicate iterates(ControlFlowNode target, ControlFlowNode sequence) {
sequence = getSequence() and
target = possibleTarget() and
not target = unrolledSuffix().possibleTarget()
sequence = this.getSequence() and
target = this.possibleTarget() and
not target = this.unrolledSuffix().possibleTarget()
}
/** Gets the sequence node for this `for` statement. */

View File

@@ -31,7 +31,7 @@ class ImportExpr extends ImportExpr_ {
// relative imports are no longer allowed in Python 3
major_version() < 3 and
// and can be explicitly turned off in later versions of Python 2
not getEnclosingModule().hasFromFuture("absolute_import")
not this.getEnclosingModule().hasFromFuture("absolute_import")
}
/**
@@ -53,8 +53,8 @@ class ImportExpr extends ImportExpr_ {
* the name of the topmost module that will be imported.
*/
private string relativeTopName() {
getLevel() = -1 and
result = basePackageName(1) + "." + this.getTopName() and
this.getLevel() = -1 and
result = this.basePackageName(1) + "." + this.getTopName() and
valid_module_name(result)
}
@@ -62,7 +62,7 @@ class ImportExpr extends ImportExpr_ {
if this.getLevel() <= 0
then result = this.getTopName()
else (
result = basePackageName(this.getLevel()) and
result = this.basePackageName(this.getLevel()) and
valid_module_name(result)
)
}
@@ -73,17 +73,17 @@ class ImportExpr extends ImportExpr_ {
* which may not be the name of the module.
*/
string bottomModuleName() {
result = relativeTopName() + this.remainderOfName()
result = this.relativeTopName() + this.remainderOfName()
or
not exists(relativeTopName()) and
not exists(this.relativeTopName()) and
result = this.qualifiedTopName() + this.remainderOfName()
}
/** Gets the name of topmost module or package being imported */
string topModuleName() {
result = relativeTopName()
result = this.relativeTopName()
or
not exists(relativeTopName()) and
not exists(this.relativeTopName()) and
result = this.qualifiedTopName()
}
@@ -94,7 +94,7 @@ class ImportExpr extends ImportExpr_ {
*/
string getImportedModuleName() {
exists(string bottomName | bottomName = this.bottomModuleName() |
if this.isTop() then result = topModuleName() else result = bottomName
if this.isTop() then result = this.topModuleName() else result = bottomName
)
}

View File

@@ -86,13 +86,13 @@ class Module extends Module_, Scope, AstNode {
/** Gets the package containing this module (or parent package if this is a package) */
Module getPackage() {
this.getName().matches("%.%") and
result.getName() = getName().regexpReplaceAll("\\.[^.]*$", "")
result.getName() = this.getName().regexpReplaceAll("\\.[^.]*$", "")
}
/** Gets the name of the package containing this module */
string getPackageName() {
this.getName().matches("%.%") and
result = getName().regexpReplaceAll("\\.[^.]*$", "")
result = this.getName().regexpReplaceAll("\\.[^.]*$", "")
}
/** Gets the metrics for this module */

View File

@@ -49,16 +49,17 @@ newtype TRegExpParent =
* or another regular expression term.
*/
class RegExpParent extends TRegExpParent {
/** Gets a textual representation of this element. */
string toString() { result = "RegExpParent" }
/** Gets the `i`th child term. */
abstract RegExpTerm getChild(int i);
/** Gets a child term . */
RegExpTerm getAChild() { result = getChild(_) }
RegExpTerm getAChild() { result = this.getChild(_) }
/** Gets the number of child terms. */
int getNumChild() { result = count(getAChild()) }
int getNumChild() { result = count(this.getAChild()) }
/** Gets the associated regex. */
abstract Regex getRegex();
@@ -72,14 +73,18 @@ class RegExpLiteral extends TRegExpLiteral, RegExpParent {
override RegExpTerm getChild(int i) { i = 0 and result.getRegex() = re and result.isRootTerm() }
/** Holds if dot, `.`, matches all characters, including newlines. */
predicate isDotAll() { re.getAMode() = "DOTALL" }
/** Holds if this regex matching is case-insensitive for this regex. */
predicate isIgnoreCase() { re.getAMode() = "IGNORECASE" }
/** Get a string representing all modes for this regex. */
string getFlags() { result = concat(string mode | mode = re.getAMode() | mode, " | ") }
override Regex getRegex() { result = re }
/** Gets the primary QL class for this regex. */
string getPrimaryQLClass() { result = "RegExpLiteral" }
}
@@ -117,7 +122,7 @@ class RegExpTerm extends RegExpParent {
RegExpTerm getRootTerm() {
this.isRootTerm() and result = this
or
result = getParent().(RegExpTerm).getRootTerm()
result = this.getParent().(RegExpTerm).getRootTerm()
}
/**
@@ -196,7 +201,7 @@ class RegExpTerm extends RegExpParent {
/** Gets the regular expression term that is matched (textually) before this one, if any. */
RegExpTerm getPredecessor() {
exists(RegExpTerm parent | parent = getParent() |
exists(RegExpTerm parent | parent = this.getParent() |
result = parent.(RegExpSequence).previousElement(this)
or
not exists(parent.(RegExpSequence).previousElement(this)) and
@@ -207,7 +212,7 @@ class RegExpTerm extends RegExpParent {
/** Gets the regular expression term that is matched (textually) after this one, if any. */
RegExpTerm getSuccessor() {
exists(RegExpTerm parent | parent = getParent() |
exists(RegExpTerm parent | parent = this.getParent() |
result = parent.(RegExpSequence).nextElement(this)
or
not exists(parent.(RegExpSequence).nextElement(this)) and
@@ -246,8 +251,10 @@ class RegExpQuantifier extends RegExpTerm, TRegExpQuantifier {
result.getEnd() = part_end
}
/** Hols if this term may match an unlimited number of times. */
predicate mayRepeatForever() { may_repeat_forever = true }
/** Gets the qualifier for this term. That is e.g "?" for "a?". */
string getQualifier() { result = re.getText().substring(part_end, end) }
override string getPrimaryQLClass() { result = "RegExpQuantifier" }
@@ -322,8 +329,10 @@ class RegExpRange extends RegExpQuantifier {
RegExpRange() { re.multiples(part_end, end, lower, upper) }
/** Gets the string defining the upper bound of this range, if any. */
string getUpper() { result = upper }
/** Gets the string defining the lower bound of this range, if any. */
string getLower() { result = lower }
/**
@@ -358,7 +367,7 @@ class RegExpSequence extends RegExpTerm, TRegExpSequence {
override RegExpTerm getChild(int i) { result = seqChild(re, start, end, i) }
/** Gets the element preceding `element` in this sequence. */
RegExpTerm previousElement(RegExpTerm element) { element = nextElement(result) }
RegExpTerm previousElement(RegExpTerm element) { element = this.nextElement(result) }
/** Gets the element following `element` in this sequence. */
RegExpTerm nextElement(RegExpTerm element) {
@@ -461,15 +470,17 @@ class RegExpEscape extends RegExpNormalChar {
// TODO: Find a way to include a formfeed character
// this.getUnescaped() = "f" and result = " "
// or
isUnicode() and
result = getUnicode()
this.isUnicode() and
result = this.getUnicode()
}
/** Holds if this terms name is given by the part following the escape character. */
predicate isIdentityEscape() { not this.getUnescaped() in ["n", "r", "t", "f"] }
override string getPrimaryQLClass() { result = "RegExpEscape" }
string getUnescaped() { result = this.getText().suffix(1) }
/** Gets the part of the term following the escape character. That is e.g. "w" if the term is "\w". */
private string getUnescaped() { result = this.getText().suffix(1) }
/**
* Gets the text for this escape. That is e.g. "\w".
@@ -479,7 +490,7 @@ class RegExpEscape extends RegExpNormalChar {
/**
* Holds if this is a unicode escape.
*/
private predicate isUnicode() { getText().prefix(2) = ["\\u", "\\U"] }
private predicate isUnicode() { this.getText().prefix(2) = ["\\u", "\\U"] }
/**
* Gets the unicode char for this escape.
@@ -536,15 +547,8 @@ private int toHex(string hex) {
* ```
*/
class RegExpCharacterClassEscape extends RegExpEscape {
// string value;
RegExpCharacterClassEscape() {
// value = re.getText().substring(start + 1, end) and
// value in ["d", "D", "s", "S", "w", "W"]
this.getValue() in ["d", "D", "s", "S", "w", "W"]
}
RegExpCharacterClassEscape() { this.getValue() in ["d", "D", "s", "S", "w", "W"] }
/** Gets the name of the character class; for example, `w` for `\w`. */
// override string getValue() { result = value }
override RegExpTerm getChild(int i) { none() }
override string getPrimaryQLClass() { result = "RegExpCharacterClassEscape" }
@@ -563,19 +567,22 @@ class RegExpCharacterClassEscape extends RegExpEscape {
class RegExpCharacterClass extends RegExpTerm, TRegExpCharacterClass {
RegExpCharacterClass() { this = TRegExpCharacterClass(re, start, end) }
/** Holds if this character class is inverted, matching the opposite of its content. */
predicate isInverted() { re.getChar(start + 1) = "^" }
/** Gets the `i`th char inside this charater class. */
string getCharThing(int i) { result = re.getChar(i + start) }
/** Holds if this character class can match anything. */
predicate isUniversalClass() {
// [^]
isInverted() and not exists(getAChild())
this.isInverted() and not exists(this.getAChild())
or
// [\w\W] and similar
not isInverted() and
not this.isInverted() and
exists(string cce1, string cce2 |
cce1 = getAChild().(RegExpCharacterClassEscape).getValue() and
cce2 = getAChild().(RegExpCharacterClassEscape).getValue()
cce1 = this.getAChild().(RegExpCharacterClassEscape).getValue() and
cce2 = this.getAChild().(RegExpCharacterClassEscape).getValue()
|
cce1 != cce2 and cce1.toLowerCase() = cce2.toLowerCase()
)
@@ -620,6 +627,7 @@ class RegExpCharacterRange extends RegExpTerm, TRegExpCharacterRange {
re.charRange(_, start, lower_end, upper_start, end)
}
/** Holds if this range goes from `lo` to `hi`, in effect is `lo-hi`. */
predicate isRange(string lo, string hi) {
lo = re.getText().substring(start, lower_end) and
hi = re.getText().substring(upper_start, end)
@@ -653,8 +661,13 @@ class RegExpCharacterRange extends RegExpTerm, TRegExpCharacterRange {
class RegExpNormalChar extends RegExpTerm, TRegExpNormalChar {
RegExpNormalChar() { this = TRegExpNormalChar(re, start, end) }
/**
* Holds if this constant represents a valid Unicode character (as opposed
* to a surrogate code point that does not correspond to a character by itself.)
*/
predicate isCharacter() { any() }
/** Gets the string representation of the char matched by this term. */
string getValue() { result = re.getText().substring(start, end) }
override RegExpTerm getChild(int i) { none() }
@@ -684,15 +697,15 @@ class RegExpConstant extends RegExpTerm {
qstart <= start and end <= qend
) and
value = this.(RegExpNormalChar).getValue()
// This will never hold
// or
// this = TRegExpSpecialChar(re, start, end) and
// re.inCharSet(start) and
// value = this.(RegExpSpecialChar).getChar()
}
/**
* Holds if this constant represents a valid Unicode character (as opposed
* to a surrogate code point that does not correspond to a character by itself.)
*/
predicate isCharacter() { any() }
/** Gets the string matched by this constant term. */
string getValue() { result = value }
override RegExpTerm getChild(int i) { none() }
@@ -731,10 +744,6 @@ class RegExpGroup extends RegExpTerm, TRegExpGroup {
/** Gets the name of this capture group, if any. */
string getName() { result = re.getGroupName(start, end) }
predicate isCharacter() { any() }
string getValue() { result = re.getText().substring(start, end) }
override RegExpTerm getChild(int i) {
result.getRegex() = re and
i = 0 and
@@ -762,8 +771,13 @@ class RegExpSpecialChar extends RegExpTerm, TRegExpSpecialChar {
re.specialCharacter(start, end, char)
}
/**
* Holds if this constant represents a valid Unicode character (as opposed
* to a surrogate code point that does not correspond to a character by itself.)
*/
predicate isCharacter() { any() }
/** Gets the char for this term. */
string getChar() { result = char }
override RegExpTerm getChild(int i) { none() }
@@ -828,8 +842,6 @@ class RegExpCaret extends RegExpSpecialChar {
class RegExpZeroWidthMatch extends RegExpGroup {
RegExpZeroWidthMatch() { re.zeroWidthMatch(start, end) }
override predicate isCharacter() { any() }
override RegExpTerm getChild(int i) { none() }
override string getPrimaryQLClass() { result = "RegExpZeroWidthMatch" }

View File

@@ -937,7 +937,7 @@ class CallContextSpecificCall extends CallContextCall, TSpecificCall {
}
override predicate relevantFor(DataFlowCallable callable) {
recordDataFlowCallSite(getCall(), callable)
recordDataFlowCallSite(this.getCall(), callable)
}
override predicate matchesCall(DataFlowCall call) { call = this.getCall() }
@@ -1257,7 +1257,7 @@ abstract class AccessPathFront extends TAccessPathFront {
TypedContent getHead() { this = TFrontHead(result) }
predicate isClearedAt(Node n) { clearsContentCached(n, getHead().getContent()) }
predicate isClearedAt(Node n) { clearsContentCached(n, this.getHead().getContent()) }
}
class AccessPathFrontNil extends AccessPathFront, TFrontNil {

View File

@@ -610,11 +610,11 @@ class DataFlowLambda extends DataFlowCallable, TLambda {
override string toString() { result = lambda.toString() }
override CallNode getACall() { result = getCallableValue().getACall() }
override CallNode getACall() { result = this.getCallableValue().getACall() }
override Scope getScope() { result = lambda.getEvaluatingScope() }
override NameNode getParameter(int n) { result = getParameter(getCallableValue(), n) }
override NameNode getParameter(int n) { result = getParameter(this.getCallableValue(), n) }
override string getName() { result = "Lambda callable" }

View File

@@ -62,12 +62,12 @@ class LocalSourceNode extends Node {
/**
* Gets a read of attribute `attrName` on this node.
*/
AttrRead getAnAttributeRead(string attrName) { result = getAnAttributeReference(attrName) }
AttrRead getAnAttributeRead(string attrName) { result = this.getAnAttributeReference(attrName) }
/**
* Gets a write of attribute `attrName` on this node.
*/
AttrWrite getAnAttributeWrite(string attrName) { result = getAnAttributeReference(attrName) }
AttrWrite getAnAttributeWrite(string attrName) { result = this.getAnAttributeReference(attrName) }
/**
* Gets a reference (read or write) of any attribute on this node.
@@ -81,12 +81,12 @@ class LocalSourceNode extends Node {
/**
* Gets a read of any attribute on this node.
*/
AttrRead getAnAttributeRead() { result = getAnAttributeReference() }
AttrRead getAnAttributeRead() { result = this.getAnAttributeReference() }
/**
* Gets a write of any attribute on this node.
*/
AttrWrite getAnAttributeWrite() { result = getAnAttributeReference() }
AttrWrite getAnAttributeWrite() { result = this.getAnAttributeReference() }
/**
* Gets a call to this node.

View File

@@ -75,24 +75,26 @@ abstract class Configuration extends DataFlow::Configuration {
predicate isSanitizer(DataFlow::Node node) { none() }
final override predicate isBarrier(DataFlow::Node node) {
isSanitizer(node) or
this.isSanitizer(node) or
defaultTaintSanitizer(node)
}
/** Holds if taint propagation into `node` is prohibited. */
predicate isSanitizerIn(DataFlow::Node node) { none() }
final override predicate isBarrierIn(DataFlow::Node node) { isSanitizerIn(node) }
final override predicate isBarrierIn(DataFlow::Node node) { this.isSanitizerIn(node) }
/** Holds if taint propagation out of `node` is prohibited. */
predicate isSanitizerOut(DataFlow::Node node) { none() }
final override predicate isBarrierOut(DataFlow::Node node) { isSanitizerOut(node) }
final override predicate isBarrierOut(DataFlow::Node node) { this.isSanitizerOut(node) }
/** Holds if taint propagation through nodes guarded by `guard` is prohibited. */
predicate isSanitizerGuard(DataFlow::BarrierGuard guard) { none() }
final override predicate isBarrierGuard(DataFlow::BarrierGuard guard) { isSanitizerGuard(guard) }
final override predicate isBarrierGuard(DataFlow::BarrierGuard guard) {
this.isSanitizerGuard(guard)
}
/**
* Holds if the additional taint propagation step from `node1` to `node2`
@@ -101,7 +103,7 @@ abstract class Configuration extends DataFlow::Configuration {
predicate isAdditionalTaintStep(DataFlow::Node node1, DataFlow::Node node2) { none() }
final override predicate isAdditionalFlowStep(DataFlow::Node node1, DataFlow::Node node2) {
isAdditionalTaintStep(node1, node2) or
this.isAdditionalTaintStep(node1, node2) or
defaultAdditionalTaintStep(node1, node2)
}

View File

@@ -75,24 +75,26 @@ abstract class Configuration extends DataFlow::Configuration {
predicate isSanitizer(DataFlow::Node node) { none() }
final override predicate isBarrier(DataFlow::Node node) {
isSanitizer(node) or
this.isSanitizer(node) or
defaultTaintSanitizer(node)
}
/** Holds if taint propagation into `node` is prohibited. */
predicate isSanitizerIn(DataFlow::Node node) { none() }
final override predicate isBarrierIn(DataFlow::Node node) { isSanitizerIn(node) }
final override predicate isBarrierIn(DataFlow::Node node) { this.isSanitizerIn(node) }
/** Holds if taint propagation out of `node` is prohibited. */
predicate isSanitizerOut(DataFlow::Node node) { none() }
final override predicate isBarrierOut(DataFlow::Node node) { isSanitizerOut(node) }
final override predicate isBarrierOut(DataFlow::Node node) { this.isSanitizerOut(node) }
/** Holds if taint propagation through nodes guarded by `guard` is prohibited. */
predicate isSanitizerGuard(DataFlow::BarrierGuard guard) { none() }
final override predicate isBarrierGuard(DataFlow::BarrierGuard guard) { isSanitizerGuard(guard) }
final override predicate isBarrierGuard(DataFlow::BarrierGuard guard) {
this.isSanitizerGuard(guard)
}
/**
* Holds if the additional taint propagation step from `node1` to `node2`
@@ -101,7 +103,7 @@ abstract class Configuration extends DataFlow::Configuration {
predicate isAdditionalTaintStep(DataFlow::Node node1, DataFlow::Node node2) { none() }
final override predicate isAdditionalFlowStep(DataFlow::Node node1, DataFlow::Node node2) {
isAdditionalTaintStep(node1, node2) or
this.isAdditionalTaintStep(node1, node2) or
defaultAdditionalTaintStep(node1, node2)
}

View File

@@ -75,24 +75,26 @@ abstract class Configuration extends DataFlow::Configuration {
predicate isSanitizer(DataFlow::Node node) { none() }
final override predicate isBarrier(DataFlow::Node node) {
isSanitizer(node) or
this.isSanitizer(node) or
defaultTaintSanitizer(node)
}
/** Holds if taint propagation into `node` is prohibited. */
predicate isSanitizerIn(DataFlow::Node node) { none() }
final override predicate isBarrierIn(DataFlow::Node node) { isSanitizerIn(node) }
final override predicate isBarrierIn(DataFlow::Node node) { this.isSanitizerIn(node) }
/** Holds if taint propagation out of `node` is prohibited. */
predicate isSanitizerOut(DataFlow::Node node) { none() }
final override predicate isBarrierOut(DataFlow::Node node) { isSanitizerOut(node) }
final override predicate isBarrierOut(DataFlow::Node node) { this.isSanitizerOut(node) }
/** Holds if taint propagation through nodes guarded by `guard` is prohibited. */
predicate isSanitizerGuard(DataFlow::BarrierGuard guard) { none() }
final override predicate isBarrierGuard(DataFlow::BarrierGuard guard) { isSanitizerGuard(guard) }
final override predicate isBarrierGuard(DataFlow::BarrierGuard guard) {
this.isSanitizerGuard(guard)
}
/**
* Holds if the additional taint propagation step from `node1` to `node2`
@@ -101,7 +103,7 @@ abstract class Configuration extends DataFlow::Configuration {
predicate isAdditionalTaintStep(DataFlow::Node node1, DataFlow::Node node2) { none() }
final override predicate isAdditionalFlowStep(DataFlow::Node node1, DataFlow::Node node2) {
isAdditionalTaintStep(node1, node2) or
this.isAdditionalTaintStep(node1, node2) or
defaultAdditionalTaintStep(node1, node2)
}

View File

@@ -75,24 +75,26 @@ abstract class Configuration extends DataFlow::Configuration {
predicate isSanitizer(DataFlow::Node node) { none() }
final override predicate isBarrier(DataFlow::Node node) {
isSanitizer(node) or
this.isSanitizer(node) or
defaultTaintSanitizer(node)
}
/** Holds if taint propagation into `node` is prohibited. */
predicate isSanitizerIn(DataFlow::Node node) { none() }
final override predicate isBarrierIn(DataFlow::Node node) { isSanitizerIn(node) }
final override predicate isBarrierIn(DataFlow::Node node) { this.isSanitizerIn(node) }
/** Holds if taint propagation out of `node` is prohibited. */
predicate isSanitizerOut(DataFlow::Node node) { none() }
final override predicate isBarrierOut(DataFlow::Node node) { isSanitizerOut(node) }
final override predicate isBarrierOut(DataFlow::Node node) { this.isSanitizerOut(node) }
/** Holds if taint propagation through nodes guarded by `guard` is prohibited. */
predicate isSanitizerGuard(DataFlow::BarrierGuard guard) { none() }
final override predicate isBarrierGuard(DataFlow::BarrierGuard guard) { isSanitizerGuard(guard) }
final override predicate isBarrierGuard(DataFlow::BarrierGuard guard) {
this.isSanitizerGuard(guard)
}
/**
* Holds if the additional taint propagation step from `node1` to `node2`
@@ -101,7 +103,7 @@ abstract class Configuration extends DataFlow::Configuration {
predicate isAdditionalTaintStep(DataFlow::Node node1, DataFlow::Node node2) { none() }
final override predicate isAdditionalFlowStep(DataFlow::Node node1, DataFlow::Node node2) {
isAdditionalTaintStep(node1, node2) or
this.isAdditionalTaintStep(node1, node2) or
defaultAdditionalTaintStep(node1, node2)
}

View File

@@ -225,9 +225,9 @@ class ModuleVariable extends SsaSourceVariable {
}
override ControlFlowNode getAnImplicitUse() {
result = global_variable_callnode()
result = this.global_variable_callnode()
or
result = global_variable_import()
result = this.global_variable_import()
or
exists(ImportTimeScope scope | scope.entryEdge(result, _) |
this = scope.getOuterVariable(_) or

View File

@@ -41,7 +41,7 @@ class EssaVariable extends TEssaDefinition {
*/
ControlFlowNode getASourceUse() {
exists(SsaSourceVariable var |
result = use_for_var(var) and
result = this.use_for_var(var) and
result = var.getASourceUse()
)
}
@@ -258,7 +258,7 @@ class PhiFunction extends EssaDefinition, TPhiFunction {
/** Gets another definition of the same source variable that reaches this definition. */
private EssaDefinition reachingDefinition(BasicBlock pred) {
result.getScope() = this.getScope() and
result.getSourceVariable() = pred_var(pred) and
result.getSourceVariable() = this.pred_var(pred) and
result.reachesEndOfBlock(pred)
}

View File

@@ -424,7 +424,7 @@ module AiohttpWebModel {
override string getAttributeName() { none() }
override string getMethodName() { result in ["read_nowait"] }
override string getMethodName() { result = "read_nowait" }
override string getAsyncMethodName() {
result in [

View File

@@ -116,7 +116,7 @@ private module CryptodomeModel {
] and
this =
API::moduleImport(["Crypto", "Cryptodome"])
.getMember(["Cipher"])
.getMember("Cipher")
.getMember(cipherName)
.getMember("new")
.getReturn()
@@ -135,21 +135,21 @@ private module CryptodomeModel {
or
// for the following methods, method signatures can be found in
// https://pycryptodome.readthedocs.io/en/latest/src/cipher/modern.html
methodName in ["update"] and
methodName = "update" and
result in [this.getArg(0), this.getArgByName("data")]
or
// although `mac_tag` is used as the parameter name in the spec above, some implementations use `received_mac_tag`, for an example, see
// https://github.com/Legrandin/pycryptodome/blob/5dace638b70ac35bb5d9b565f3e75f7869c9d851/lib/Crypto/Cipher/ChaCha20_Poly1305.py#L207
methodName in ["verify"] and
methodName = "verify" and
result in [this.getArg(0), this.getArgByName(["mac_tag", "received_mac_tag"])]
or
methodName in ["hexverify"] and
methodName = "hexverify" and
result in [this.getArg(0), this.getArgByName("mac_tag_hex")]
or
methodName in ["encrypt_and_digest"] and
methodName = "encrypt_and_digest" and
result in [this.getArg(0), this.getArgByName("plaintext")]
or
methodName in ["decrypt_and_verify"] and
methodName = "decrypt_and_verify" and
result in [
this.getArg(0), this.getArgByName("ciphertext"), this.getArg(1),
this.getArgByName("mac_tag")
@@ -169,7 +169,7 @@ private module CryptodomeModel {
methodName in ["sign", "verify"] and
this =
API::moduleImport(["Crypto", "Cryptodome"])
.getMember(["Signature"])
.getMember("Signature")
.getMember(signatureName)
.getMember("new")
.getReturn()
@@ -185,11 +185,11 @@ private module CryptodomeModel {
methodName = "sign" and
result in [this.getArg(0), this.getArgByName("msg_hash")] // Cryptodome.Hash instance
or
methodName in ["verify"] and
methodName = "verify" and
(
result in [this.getArg(0), this.getArgByName(["msg_hash"])] // Cryptodome.Hash instance
result in [this.getArg(0), this.getArgByName("msg_hash")] // Cryptodome.Hash instance
or
result in [this.getArg(1), this.getArgByName(["signature"])]
result in [this.getArg(1), this.getArgByName("signature")]
)
}
}
@@ -204,7 +204,7 @@ private module CryptodomeModel {
CryptodomeGenericHashOperation() {
exists(API::Node hashModule |
hashModule =
API::moduleImport(["Crypto", "Cryptodome"]).getMember(["Hash"]).getMember(hashName)
API::moduleImport(["Crypto", "Cryptodome"]).getMember("Hash").getMember(hashName)
|
this = hashModule.getMember("new").getACall()
or

View File

@@ -1844,11 +1844,13 @@ private module PrivateDjango {
t.start() and
result.asCfgNode().(CallNode).getFunction() = this.asViewRef().asCfgNode()
or
exists(DataFlow::TypeTracker t2 | result = asViewResult(t2).track(t2, t))
exists(DataFlow::TypeTracker t2 | result = this.asViewResult(t2).track(t2, t))
}
/** Gets a reference to the result of calling the `as_view` classmethod of this class. */
DataFlow::Node asViewResult() { asViewResult(DataFlow::TypeTracker::end()).flowsTo(result) }
DataFlow::Node asViewResult() {
this.asViewResult(DataFlow::TypeTracker::end()).flowsTo(result)
}
}
/** A class that we consider a django View class. */
@@ -1944,10 +1946,10 @@ private module PrivateDjango {
abstract DataFlow::Node getViewArg();
final override DjangoRouteHandler getARequestHandler() {
poorMansFunctionTracker(result) = getViewArg()
poorMansFunctionTracker(result) = this.getViewArg()
or
exists(DjangoViewClass vc |
getViewArg() = vc.asViewResult() and
this.getViewArg() = vc.asViewResult() and
result = vc.getARequestHandler()
)
}

View File

@@ -292,12 +292,12 @@ module Flask {
override Function getARequestHandler() {
exists(DataFlow::LocalSourceNode func_src |
func_src.flowsTo(getViewArg()) and
func_src.flowsTo(this.getViewArg()) and
func_src.asExpr().(CallableExpr) = result.getDefinition()
)
or
exists(FlaskViewClass vc |
getViewArg() = vc.asViewResult().getAUse() and
this.getViewArg() = vc.asViewResult().getAUse() and
result = vc.getARequestHandler()
)
}

View File

@@ -195,6 +195,101 @@ private module StdlibPrivate {
}
}
/**
* The `os.path` module offers a number of methods for checking if a file exists and/or has certain
* properties, leading to a file system access.
* A call to `os.path.exists` or `os.path.lexists` will check if a file exists on the file system.
* (Although, on some platforms, the check may return `false` due to missing permissions.)
* A call to `os.path.getatime` will raise `OSError` if the file does not exist or is inaccessible.
* See:
* - https://docs.python.org/3/library/os.path.html#os.path.exists
* - https://docs.python.org/3/library/os.path.html#os.path.lexists
* - https://docs.python.org/3/library/os.path.html#os.path.isfile
* - https://docs.python.org/3/library/os.path.html#os.path.isdir
* - https://docs.python.org/3/library/os.path.html#os.path.islink
* - https://docs.python.org/3/library/os.path.html#os.path.ismount
* - https://docs.python.org/3/library/os.path.html#os.path.getatime
* - https://docs.python.org/3/library/os.path.html#os.path.getmtime
* - https://docs.python.org/3/library/os.path.html#os.path.getctime
* - https://docs.python.org/3/library/os.path.html#os.path.getsize
* - https://docs.python.org/3/library/os.path.html#os.path.realpath
*/
private class OsPathProbingCall extends FileSystemAccess::Range, DataFlow::CallCfgNode {
OsPathProbingCall() {
this =
os::path()
.getMember([
// these check if the file exists
"exists", "lexists", "isfile", "isdir", "islink", "ismount",
// these raise errors if the file does not exist
"getatime", "getmtime", "getctime", "getsize"
])
.getACall()
}
override DataFlow::Node getAPathArgument() {
result in [this.getArg(0), this.getArgByName("path")]
}
}
/** A call to `os.path.samefile` will raise an exception if an `os.stat()` call on either pathname fails. */
private class OsPathSamefileCall extends FileSystemAccess::Range, DataFlow::CallCfgNode {
OsPathSamefileCall() { this = os::path().getMember("samefile").getACall() }
override DataFlow::Node getAPathArgument() {
result in [
this.getArg(0), this.getArgByName("path1"), this.getArg(1), this.getArgByName("path2")
]
}
}
// Functions with non-standard arguments:
// - os.path.join(path, *paths)
// - os.path.relpath(path, start=os.curdir)
// these functions need special treatment when computing `getPathArg`.
//
// Functions that excluded because they can act as sanitizers:
// - os.path.commonpath(paths): takes a sequence
// - os.path.commonprefix(list): takes a list argument
// unless the user control all arguments, we are comparing with a known value.
private string pathComputation() {
result in [
"abspath", "basename", "commonpath", "dirname", "expanduser", "expandvars", "join",
"normcase", "normpath", "realpath", "relpath", "split", "splitdrive", "splitext"
]
}
/**
* The `os.path` module offers a number of methods for computing new paths from existing paths.
* These should all propagate taint.
*/
private class OsPathComputation extends DataFlow::CallCfgNode {
string methodName;
OsPathComputation() {
methodName = pathComputation() and
this = os::path().getMember(methodName).getACall()
}
DataFlow::Node getPathArg() {
result in [this.getArg(0), this.getArgByName("path")]
or
methodName = "join" and result = this.getArg(_)
or
methodName = "relpath" and result in [this.getArg(1), this.getArgByName("start")]
}
}
/** An additional taint step for path computations. */
private class OsPathComputationAdditionalTaintStep extends TaintTracking::AdditionalTaintStep {
override predicate step(DataFlow::Node nodeFrom, DataFlow::Node nodeTo) {
exists(OsPathComputation call |
nodeTo = call and
nodeFrom = call.getPathArg()
)
}
}
/**
* A call to `os.path.normpath`.
* See https://docs.python.org/3/library/os.path.html#os.path.normpath
@@ -205,16 +300,6 @@ private module StdlibPrivate {
DataFlow::Node getPathArg() { result in [this.getArg(0), this.getArgByName("path")] }
}
/** An additional taint step for calls to `os.path.normpath` */
private class OsPathNormpathCallAdditionalTaintStep extends TaintTracking::AdditionalTaintStep {
override predicate step(DataFlow::Node nodeFrom, DataFlow::Node nodeTo) {
exists(OsPathNormpathCall call |
nodeTo = call and
nodeFrom = call.getPathArg()
)
}
}
/**
* A call to `os.path.abspath`.
* See https://docs.python.org/3/library/os.path.html#os.path.abspath
@@ -225,16 +310,6 @@ private module StdlibPrivate {
DataFlow::Node getPathArg() { result in [this.getArg(0), this.getArgByName("path")] }
}
/** An additional taint step for calls to `os.path.abspath` */
private class OsPathAbspathCallAdditionalTaintStep extends TaintTracking::AdditionalTaintStep {
override predicate step(DataFlow::Node nodeFrom, DataFlow::Node nodeTo) {
exists(OsPathAbspathCall call |
nodeTo = call and
nodeFrom = call.getPathArg()
)
}
}
/**
* A call to `os.path.realpath`.
* See https://docs.python.org/3/library/os.path.html#os.path.realpath
@@ -245,16 +320,6 @@ private module StdlibPrivate {
DataFlow::Node getPathArg() { result in [this.getArg(0), this.getArgByName("path")] }
}
/** An additional taint step for calls to `os.path.realpath` */
private class OsPathRealpathCallAdditionalTaintStep extends TaintTracking::AdditionalTaintStep {
override predicate step(DataFlow::Node nodeFrom, DataFlow::Node nodeTo) {
exists(OsPathRealpathCall call |
nodeTo = call and
nodeFrom = call.getPathArg()
)
}
}
/**
* A call to `os.system`.
* See https://docs.python.org/3/library/os.html#os.system
@@ -397,8 +462,8 @@ private module StdlibPrivate {
result = this.get_executable_arg()
or
exists(DataFlow::Node arg_args, boolean shell |
arg_args = get_args_arg() and
shell = get_shell_arg_value()
arg_args = this.get_args_arg() and
shell = this.get_shell_arg_value()
|
// When "executable" argument is set, and "shell" argument is `False`, the
// "args" argument will only be used to set the program name and arguments to
@@ -1212,7 +1277,7 @@ private module StdlibPrivate {
/**
* Gets a name of an attribute of a `pathlib.Path` object that is also a `pathlib.Path` object.
*/
private string pathlibPathAttribute() { result in ["parent"] }
private string pathlibPathAttribute() { result = "parent" }
/**
* Gets a name of a method of a `pathlib.Path` object that returns a `pathlib.Path` object.
@@ -1571,6 +1636,119 @@ private module StdlibPrivate {
result = this.getArg(any(int i | i >= msgIndex))
}
}
// ---------------------------------------------------------------------------
// re
// ---------------------------------------------------------------------------
/**
* List of methods in the `re` module immediately executing a regular expression.
*
* See https://docs.python.org/3/library/re.html#module-contents
*/
private class RegexExecutionMethod extends string {
RegexExecutionMethod() {
this in ["match", "fullmatch", "search", "split", "findall", "finditer", "sub", "subn"]
}
/** Gets the index of the argument representing the string to be searched by a regex. */
int getStringArgIndex() {
this in ["match", "fullmatch", "search", "split", "findall", "finditer"] and
result = 1
or
this in ["sub", "subn"] and
result = 2
}
}
/**
* A a call to a method from the `re` module immediately executing a regular expression.
*
* See `RegexExecutionMethods`
*/
private class DirectRegexExecution extends DataFlow::CallCfgNode, RegexExecution::Range {
RegexExecutionMethod method;
DirectRegexExecution() { this = API::moduleImport("re").getMember(method).getACall() }
override DataFlow::Node getRegex() { result in [this.getArg(0), this.getArgByName("pattern")] }
override DataFlow::Node getString() {
result in [this.getArg(method.getStringArgIndex()), this.getArgByName("string")]
}
override string getName() { result = "re." + method }
}
/** Helper module for tracking compiled regexes. */
private module CompiledRegexes {
private DataFlow::TypeTrackingNode compiledRegex(DataFlow::TypeTracker t, DataFlow::Node regex) {
t.start() and
result = API::moduleImport("re").getMember("compile").getACall() and
regex in [
result.(DataFlow::CallCfgNode).getArg(0),
result.(DataFlow::CallCfgNode).getArgByName("pattern")
]
or
exists(DataFlow::TypeTracker t2 | result = compiledRegex(t2, regex).track(t2, t))
}
DataFlow::Node compiledRegex(DataFlow::Node regex) {
compiledRegex(DataFlow::TypeTracker::end(), regex).flowsTo(result)
}
}
private import CompiledRegexes
/**
* A call on compiled regular expression (obtained via `re.compile`) executing a
* regular expression.
*
* Given the following example:
*
* ```py
* pattern = re.compile(input)
* pattern.match(s)
* ```
*
* This class will identify that `re.compile` compiles `input` and afterwards
* executes `re`'s `match`. As a result, `this` will refer to `pattern.match(s)`
* and `this.getRegexNode()` will return the node for `input` (`re.compile`'s first argument).
*
*
* See `RegexExecutionMethods`
*
* See https://docs.python.org/3/library/re.html#regular-expression-objects
*/
private class CompiledRegexExecution extends DataFlow::MethodCallNode, RegexExecution::Range {
DataFlow::Node regexNode;
RegexExecutionMethod method;
CompiledRegexExecution() { this.calls(compiledRegex(regexNode), method) }
override DataFlow::Node getRegex() { result = regexNode }
override DataFlow::Node getString() {
result in [this.getArg(method.getStringArgIndex() - 1), this.getArgByName("string")]
}
override string getName() { result = "re." + method }
}
/**
* A call to 're.escape'.
* See https://docs.python.org/3/library/re.html#re.escape
*/
private class ReEscapeCall extends Escaping::Range, DataFlow::CallCfgNode {
ReEscapeCall() { this = API::moduleImport("re").getMember("escape").getACall() }
override DataFlow::Node getAnInput() {
result in [this.getArg(0), this.getArgByName("pattern")]
}
override DataFlow::Node getOutput() { result = this }
override string getKind() { result = Escaping::getRegexKind() }
}
}
// ---------------------------------------------------------------------------

View File

@@ -318,7 +318,7 @@ private module Tornado {
]
}
override string getMethodName() { result in ["full_url"] }
override string getMethodName() { result = "full_url" }
override string getAsyncMethodName() { none() }
}

View File

@@ -58,7 +58,7 @@ module Werkzeug {
override string getAttributeName() { none() }
override string getMethodName() { result in ["getlist"] }
override string getMethodName() { result = "getlist" }
override string getAsyncMethodName() { none() }
}

View File

@@ -68,7 +68,7 @@ module Yarl {
]
}
override string getMethodName() { result in ["human_repr"] }
override string getMethodName() { result = "human_repr" }
override string getAsyncMethodName() { none() }
}

View File

@@ -60,8 +60,8 @@ module PolynomialReDoS {
RegExpTerm t;
RegexExecutionAsSink() {
exists(CompiledRegexes::RegexExecution re |
re.getRegexNode().asExpr() = t.getRegex() and
exists(RegexExecution re |
re.getRegex().asExpr() = t.getRegex() and
this = re.getString()
) and
t.isRootTerm()
@@ -76,137 +76,3 @@ module PolynomialReDoS {
*/
class StringConstCompareAsSanitizerGuard extends SanitizerGuard, StringConstCompare { }
}
/** Helper module for tracking compiled regexes. */
private module CompiledRegexes {
// TODO: This module should be refactored and merged with the experimental work done on detecting
// regex injections, such that this can be expressed from just using a concept.
/** A configuration for finding uses of compiled regexes. */
class RegexDefinitionConfiguration extends DataFlow2::Configuration {
RegexDefinitionConfiguration() { this = "RegexDefinitionConfiguration" }
override predicate isSource(DataFlow::Node source) { source instanceof RegexDefinitonSource }
override predicate isSink(DataFlow::Node sink) { sink instanceof RegexDefinitionSink }
}
/** A regex compilation. */
class RegexDefinitonSource extends DataFlow::CallCfgNode {
DataFlow::Node regexNode;
RegexDefinitonSource() {
this = API::moduleImport("re").getMember("compile").getACall() and
regexNode in [this.getArg(0), this.getArgByName("pattern")]
}
/** Gets the regex that is being compiled by this node. */
RegExpTerm getRegExp() { result.getRegex() = regexNode.asExpr() and result.isRootTerm() }
/** Gets the data flow node for the regex being compiled by this node. */
DataFlow::Node getRegexNode() { result = regexNode }
}
/** A use of a compiled regex. */
class RegexDefinitionSink extends DataFlow::Node {
RegexExecutionMethod method;
DataFlow::CallCfgNode executingCall;
RegexDefinitionSink() {
exists(DataFlow::AttrRead reMethod |
executingCall.getFunction() = reMethod and
reMethod.getAttributeName() = method and
this = reMethod.getObject()
)
}
/** Gets the method used to execute the regex. */
RegexExecutionMethod getMethod() { result = method }
/** Gets the data flow node for the executing call. */
DataFlow::CallCfgNode getExecutingCall() { result = executingCall }
}
/** A data flow node executing a regex. */
abstract class RegexExecution extends DataFlow::Node {
/** Gets the data flow node for the regex being compiled by this node. */
abstract DataFlow::Node getRegexNode();
/** Gets a dataflow node for the string to be searched or matched against. */
abstract DataFlow::Node getString();
}
private class RegexExecutionMethod extends string {
RegexExecutionMethod() {
this in ["match", "fullmatch", "search", "split", "findall", "finditer", "sub", "subn"]
}
}
/** Gets the index of the argument representing the string to be searched by a regex. */
int stringArg(RegexExecutionMethod method) {
method in ["match", "fullmatch", "search", "split", "findall", "finditer"] and
result = 1
or
method in ["sub", "subn"] and
result = 2
}
/**
* A class to find `re` methods immediately executing an expression.
*
* See `RegexExecutionMethods`
*/
class DirectRegex extends DataFlow::CallCfgNode, RegexExecution {
RegexExecutionMethod method;
DirectRegex() { this = API::moduleImport("re").getMember(method).getACall() }
override DataFlow::Node getRegexNode() {
result in [this.getArg(0), this.getArgByName("pattern")]
}
override DataFlow::Node getString() {
result in [this.getArg(stringArg(method)), this.getArgByName("string")]
}
}
/**
* A class to find `re` methods immediately executing a compiled expression by `re.compile`.
*
* Given the following example:
*
* ```py
* pattern = re.compile(input)
* pattern.match(s)
* ```
*
* This class will identify that `re.compile` compiles `input` and afterwards
* executes `re`'s `match`. As a result, `this` will refer to `pattern.match(s)`
* and `this.getRegexNode()` will return the node for `input` (`re.compile`'s first argument)
*
*
* See `RegexExecutionMethods`
*
* See https://docs.python.org/3/library/re.html#regular-expression-objects
*/
private class CompiledRegex extends DataFlow::CallCfgNode, RegexExecution {
DataFlow::Node regexNode;
RegexExecutionMethod method;
CompiledRegex() {
exists(
RegexDefinitionConfiguration conf, RegexDefinitonSource source, RegexDefinitionSink sink
|
conf.hasFlow(source, sink) and
regexNode = source.getRegexNode() and
method = sink.getMethod() and
this = sink.getExecutingCall()
)
}
override DataFlow::Node getRegexNode() { result = regexNode }
override DataFlow::Node getString() {
result in [this.getArg(stringArg(method) - 1), this.getArgByName("string")]
}
}
}

View File

@@ -59,7 +59,7 @@ module ReflectedXSS {
class HtmlEscapingAsSanitizer extends Sanitizer {
HtmlEscapingAsSanitizer() {
// TODO: For now, since there is not an `isSanitizingStep` member-predicate part of a
// `TaintTracking::Configuration`, we use treat the output is a taint-sanitizer. This
// `TaintTracking::Configuration`, we treat the output as a taint-sanitizer. This
// is slightly imprecise, which you can see in the `m_unsafe + SAFE` test-case in
// python/ql/test/library-tests/frameworks/markupsafe/taint_test.py
//

View File

@@ -0,0 +1,37 @@
/**
* Provides a taint-tracking configuration for detecting regular expression injection
* vulnerabilities.
*
* Note, for performance reasons: only import this file if
* `RegexInjection::Configuration` is needed, otherwise
* `RegexInjectionCustomizations` should be imported instead.
*/
private import python
import semmle.python.dataflow.new.DataFlow
import semmle.python.dataflow.new.TaintTracking
/**
* Provides a taint-tracking configuration for detecting regular expression injection
* vulnerabilities.
*/
module RegexInjection {
import RegexInjectionCustomizations::RegexInjection
/**
* A taint-tracking configuration for detecting "reflected server-side cross-site scripting" vulnerabilities.
*/
class Configuration extends TaintTracking::Configuration {
Configuration() { this = "RegexInjection" }
override predicate isSource(DataFlow::Node source) { source instanceof Source }
override predicate isSink(DataFlow::Node sink) { sink instanceof Sink }
override predicate isSanitizer(DataFlow::Node node) { node instanceof Sanitizer }
override predicate isSanitizerGuard(DataFlow::BarrierGuard guard) {
guard instanceof SanitizerGuard
}
}
}

View File

@@ -0,0 +1,62 @@
/**
* Provides default sources, sinks and sanitizers for detecting
* "regular expression injection"
* vulnerabilities, as well as extension points for adding your own.
*/
private import python
private import semmle.python.Concepts
private import semmle.python.dataflow.new.DataFlow
private import semmle.python.dataflow.new.TaintTracking
private import semmle.python.dataflow.new.RemoteFlowSources
/**
* Provides default sources, sinks and sanitizers for detecting
* "regular expression injection"
* vulnerabilities, as well as extension points for adding your own.
*/
module RegexInjection {
/**
* A data flow source for "regular expression injection" vulnerabilities.
*/
abstract class Source extends DataFlow::Node { }
/**
* A sink for "regular expression injection" vulnerabilities is the execution of a regular expression.
* If you have a custom way to execute regular expressions, you can extend `RegexExecution::Range`.
*/
class Sink extends DataFlow::Node {
RegexExecution regexExecution;
Sink() { this = regexExecution.getRegex() }
/** Gets the call that executes the regular expression marked by this sink. */
RegexExecution getRegexExecution() { result = regexExecution }
}
/**
* A sanitizer for "regular expression injection" vulnerabilities.
*/
abstract class Sanitizer extends DataFlow::Node { }
/**
* A sanitizer guard for "regular expression injection" vulnerabilities.
*/
abstract class SanitizerGuard extends DataFlow::BarrierGuard { }
/**
* A source of remote user input, considered as a flow source.
*/
class RemoteFlowSourceAsSource extends Source, RemoteFlowSource { }
/**
* A regex escaping, considered as a sanitizer.
*/
class RegexEscapingAsSanitizer extends Sanitizer {
RegexEscapingAsSanitizer() {
// Due to use-use flow, we want the output rather than an input
// (so the input can still flow to other sinks).
this = any(RegexEscaping esc).getOutput()
}
}
}

View File

@@ -477,7 +477,7 @@ private module CharacterClasses {
result = ["0", "9"]
or
cc.getValue() = "s" and
result = [" "]
result = " "
or
cc.getValue() = "w" and
result = ["a", "Z", "_", "0", "9"]
@@ -490,7 +490,7 @@ private module CharacterClasses {
result = "9"
or
cc.getValue() = "s" and
result = [" "]
result = " "
or
cc.getValue() = "w" and
result = "a"

View File

@@ -29,7 +29,7 @@ private predicate pyxl_tag(Call c, string name) {
}
class PyxlHtmlTag extends PyxlTag {
PyxlHtmlTag() { this.getPyxlTagName().prefix(2) = "x_" }
PyxlHtmlTag() { this.getPyxlTagName().matches("x\\_%") }
string getTagName() { result = this.getPyxlTagName().suffix(2) }

View File

@@ -33,7 +33,7 @@ class WsgiEnvironment extends TaintKind {
(
text = "QUERY_STRING" or
text = "PATH_INFO" or
text.prefix(5) = "HTTP_"
text.matches("HTTP\\_%")
)
)
}

View File

@@ -108,7 +108,7 @@ class XMLParent extends @xmlparent {
}
/** Gets the text value contained in this XML parent. */
string getTextValue() { result = allCharactersString() }
string getTextValue() { result = this.allCharactersString() }
/** Gets a printable representation of this XML parent. */
string toString() { result = this.getName() }
@@ -119,7 +119,7 @@ class XMLFile extends XMLParent, File {
XMLFile() { xmlEncoding(this, _) }
/** Gets a printable representation of this XML file. */
override string toString() { result = getName() }
override string toString() { result = this.getName() }
/** Gets the name of this XML file. */
override string getName() { result = File.super.getAbsolutePath() }
@@ -129,14 +129,14 @@ class XMLFile extends XMLParent, File {
*
* Gets the path of this XML file.
*/
deprecated string getPath() { result = getAbsolutePath() }
deprecated string getPath() { result = this.getAbsolutePath() }
/**
* DEPRECATED: Use `getParentContainer().getAbsolutePath()` instead.
*
* Gets the path of the folder that contains this XML file.
*/
deprecated string getFolder() { result = getParentContainer().getAbsolutePath() }
deprecated string getFolder() { result = this.getParentContainer().getAbsolutePath() }
/** Gets the encoding of this XML file. */
string getEncoding() { xmlEncoding(this, result) }
@@ -200,7 +200,7 @@ class XMLDTD extends XMLLocatable, @xmldtd {
*/
class XMLElement extends @xmlelement, XMLParent, XMLLocatable {
/** Holds if this XML element has the given `name`. */
predicate hasName(string name) { name = getName() }
predicate hasName(string name) { name = this.getName() }
/** Gets the name of this XML element. */
override string getName() { xmlElements(this, result, _, _, _) }
@@ -239,7 +239,7 @@ class XMLElement extends @xmlelement, XMLParent, XMLLocatable {
string getAttributeValue(string name) { result = this.getAttribute(name).getValue() }
/** Gets a printable representation of this XML element. */
override string toString() { result = getName() }
override string toString() { result = this.getName() }
}
/**