Merge branch 'main' into jorgectf/python/headerInjection

This commit is contained in:
jorgectf
2021-10-16 12:46:57 +02:00
1988 changed files with 125883 additions and 19969 deletions

View File

@@ -0,0 +1,2 @@
lgtm,codescanning
* Updated _Modification of parameter with default_ (`py/modification-of-default-value`) query to use the new data flow library instead of the old taint tracking library and to remove the use of points-to analysis. You may see differences in the results found by the query, but overall this change should result in a more robust and accurate analysis.

View File

@@ -0,0 +1,2 @@
lgtm,codescanning
* Added modeling of SQL execution in the `Flask-SQLAlchemy` PyPI package, resulting in additional sinks for the SQL Injection query (`py/sql-injection`).

View File

@@ -0,0 +1,2 @@
lgtm,codescanning
* Added modeling of SQL execution in the `SQLAlchemy` PyPI package, resulting in additional sinks for the SQL Injection query (`py/sql-injection`). This modeling was originally [submitted as a contribution by @mrthankyou](https://github.com/github/codeql/pull/5680).

View File

@@ -0,0 +1,2 @@
lgtm,codescanning
* Expanded the query _SQL query built from user-controlled sources_ (`py/sql-injection`) to alert if user-input is added to a TextClause from SQLAlchemy, since that can lead to SQL injection.

View File

@@ -0,0 +1,2 @@
lgtm,codescanning
* The query "Regular expression injection" (`py/regex-injection`) has been promoted from experimental to the main query pack. Its results will now appear by default. This query was originally [submitted as an experimental query by @jorgectf](https://github.com/github/codeql/pull/5442).

View File

@@ -0,0 +1,3 @@
lgtm,codescanning
* Added data-flow from both `x` and `y` to `x or y` and `x and y`, as a slight over-approximation of what is described in the
[Python Language Reference](https://docs.python.org/3/reference/expressions.html#boolean-operations).

View File

@@ -0,0 +1,2 @@
lgtm,codescanning
* Improved modeling of decoding through pickle related functions (which can lead to code execution), resulting in additional sinks for the _Deserializing untrusted input_ query (`py/unsafe-deserialization`). Now we fully support `pickle.load`, `pickle.loads`, `pickle.Unpickler`, `marshal.load`, `marshal.loads`, `dill.load`, `dill.loads`, `shelve.open`.

View File

@@ -55,7 +55,7 @@ module API {
/**
* Gets a call to the function represented by this API component.
*/
DataFlow::CallCfgNode getACall() { result = getReturn().getAnImmediateUse() }
DataFlow::CallCfgNode getACall() { result = this.getReturn().getAnImmediateUse() }
/**
* Gets a node representing member `m` of this API component.
@@ -67,21 +67,21 @@ module API {
*/
bindingset[m]
bindingset[result]
Node getMember(string m) { result = getASuccessor(Label::member(m)) }
Node getMember(string m) { result = this.getASuccessor(Label::member(m)) }
/**
* Gets a node representing a member of this API component where the name of the member is
* not known statically.
*/
Node getUnknownMember() { result = getASuccessor(Label::unknownMember()) }
Node getUnknownMember() { result = this.getASuccessor(Label::unknownMember()) }
/**
* Gets a node representing a member of this API component where the name of the member may
* or may not be known statically.
*/
Node getAMember() {
result = getASuccessor(Label::member(_)) or
result = getUnknownMember()
result = this.getASuccessor(Label::member(_)) or
result = this.getUnknownMember()
}
/**
@@ -90,23 +90,25 @@ module API {
* This predicate may have multiple results when there are multiple invocations of this API component.
* Consider using `getACall()` if there is a need to distinguish between individual calls.
*/
Node getReturn() { result = getASuccessor(Label::return()) }
Node getReturn() { result = this.getASuccessor(Label::return()) }
/**
* Gets a node representing a subclass of the class represented by this node.
*/
Node getASubclass() { result = getASuccessor(Label::subclass()) }
Node getASubclass() { result = this.getASuccessor(Label::subclass()) }
/**
* Gets a node representing the result from awaiting this node.
*/
Node getAwaited() { result = getASuccessor(Label::await()) }
Node getAwaited() { result = this.getASuccessor(Label::await()) }
/**
* Gets a string representation of the lexicographically least among all shortest access paths
* from the root to this node.
*/
string getPath() { result = min(string p | p = getAPath(Impl::distanceFromRoot(this)) | p) }
string getPath() {
result = min(string p | p = this.getAPath(Impl::distanceFromRoot(this)) | p)
}
/**
* Gets a node such that there is an edge in the API graph between this node and the other
@@ -124,13 +126,13 @@ module API {
* Gets a node such that there is an edge in the API graph between this node and the other
* one.
*/
Node getAPredecessor() { result = getAPredecessor(_) }
Node getAPredecessor() { result = this.getAPredecessor(_) }
/**
* Gets a node such that there is an edge in the API graph between that other node and
* this one.
*/
Node getASuccessor() { result = getASuccessor(_) }
Node getASuccessor() { result = this.getASuccessor(_) }
/**
* Gets the data-flow node that gives rise to this node, if any.
@@ -142,16 +144,16 @@ module API {
* The location spans column `startcolumn` of line `startline` to
* column `endcolumn` of line `endline` in file `filepath`.
* For more information, see
* [Locations](https://help.semmle.com/QL/learn-ql/locations.html).
* [Locations](https://codeql.github.com/docs/writing-codeql-queries/providing-locations-in-codeql-queries/).
*/
predicate hasLocationInfo(
string filepath, int startline, int startcolumn, int endline, int endcolumn
) {
getInducingNode().hasLocationInfo(filepath, startline, startcolumn, endline, endcolumn)
this.getInducingNode().hasLocationInfo(filepath, startline, startcolumn, endline, endcolumn)
or
// For nodes that do not have a meaningful location, `path` is the empty string and all other
// parameters are zero.
not exists(getInducingNode()) and
not exists(this.getInducingNode()) and
filepath = "" and
startline = 0 and
startcolumn = 0 and
@@ -202,7 +204,7 @@ module API {
or
this = Impl::MkModuleImport(_) and type = "ModuleImport "
|
result = type + getPath()
result = type + this.getPath()
or
not exists(this.getPath()) and result = type + "with no path"
)

View File

@@ -67,7 +67,7 @@ class CommentBlock extends @py_comment {
* The location spans column `startcolumn` of line `startline` to
* column `endcolumn` of line `endline` in file `filepath`.
* For more information, see
* [Locations](https://help.semmle.com/QL/learn-ql/ql/locations.html).
* [Locations](https://codeql.github.com/docs/writing-codeql-queries/providing-locations-in-codeql-queries/).
*/
predicate hasLocationInfo(
string filepath, int startline, int startcolumn, int endline, int endcolumn

View File

@@ -355,6 +355,53 @@ module SqlExecution {
}
}
/**
* A data-flow node that executes a regular expression.
*
* Extend this class to refine existing API models. If you want to model new APIs,
* extend `RegexExecution::Range` instead.
*/
class RegexExecution extends DataFlow::Node {
RegexExecution::Range range;
RegexExecution() { this = range }
/** Gets the data flow node for the regex being executed by this node. */
DataFlow::Node getRegex() { result = range.getRegex() }
/** Gets a dataflow node for the string to be searched or matched against. */
DataFlow::Node getString() { result = range.getString() }
/**
* Gets the name of this regex execution, typically the name of an executing method.
* This is used for nice alert messages and should include the module if possible.
*/
string getName() { result = range.getName() }
}
/** Provides classes for modeling new regular-expression execution APIs. */
module RegexExecution {
/**
* A data-flow node that executes a regular expression.
*
* Extend this class to model new APIs. If you want to refine existing API models,
* extend `RegexExecution` instead.
*/
abstract class Range extends DataFlow::Node {
/** Gets the data flow node for the regex being executed by this node. */
abstract DataFlow::Node getRegex();
/** Gets a dataflow node for the string to be searched or matched against. */
abstract DataFlow::Node getString();
/**
* Gets the name of this regex execution, typically the name of an executing method.
* This is used for nice alert messages and should include the module if possible.
*/
abstract string getName();
}
}
/**
* A data-flow node that escapes meta-characters, which could be used to prevent
* injection attacks.
@@ -411,6 +458,9 @@ module Escaping {
/** Gets the escape-kind for escaping a string so it can safely be included in HTML. */
string getHtmlKind() { result = "html" }
/** Gets the escape-kind for escaping a string so it can safely be included in HTML. */
string getRegexKind() { result = "regex" }
// TODO: If adding an XML kind, update the modeling of the `MarkupSafe` PyPI package.
//
// Technically it claims to escape for both HTML and XML, but for now we don't have
@@ -427,6 +477,14 @@ class HtmlEscaping extends Escaping {
HtmlEscaping() { range.getKind() = Escaping::getHtmlKind() }
}
/**
* An escape of a string so it can be safely included in
* the body of a regex.
*/
class RegexEscaping extends Escaping {
RegexEscaping() { range.getKind() = Escaping::getRegexKind() }
}
/** Provides classes for modeling HTTP-related APIs. */
module HTTP {
import semmle.python.web.HttpConstants

View File

@@ -17,7 +17,7 @@ class Expr extends Expr_, AstNode {
* Whether this expression defines variable `v`
* If doing dataflow, then consider using SsaVariable.getDefinition() for more precision.
*/
predicate defines(Variable v) { this.getASubExpression+().defines(v) }
predicate defines(Variable v) { this.getASubExpression().defines(v) }
/** Whether this expression may have a side effect (as determined purely from its syntax) */
predicate hasSideEffects() {
@@ -240,7 +240,7 @@ class Call extends Call_ {
/** Gets the tuple (*) argument of this call, provided there is exactly one. */
Expr getStarArg() {
count(this.getStarargs()) < 2 and
result = getStarargs()
result = this.getStarargs()
}
}

View File

@@ -1,9 +1,7 @@
import python
/** A file */
class File extends Container {
File() { files(this, _, _, _, _) }
class File extends Container, @file {
/** DEPRECATED: Use `getAbsolutePath` instead. */
deprecated override string getName() { result = this.getAbsolutePath() }
@@ -15,7 +13,7 @@ class File extends Container {
* The location spans column `startcolumn` of line `startline` to
* column `endcolumn` of line `endline` in file `filepath`.
* For more information, see
* [Locations](https://help.semmle.com/QL/learn-ql/ql/locations.html).
* [Locations](https://codeql.github.com/docs/writing-codeql-queries/providing-locations-in-codeql-queries/).
*/
predicate hasLocationInfo(
string filepath, int startline, int startcolumn, int endline, int endcolumn
@@ -34,9 +32,7 @@ class File extends Container {
}
/** Gets a short name for this file (just the file name) */
string getShortName() {
exists(string simple, string ext | files(this, _, simple, ext, _) | result = simple + ext)
}
string getShortName() { result = this.getBaseName() }
private int lastLine() {
result = max(int i | exists(Location l | l.getFile() = this and l.getEndLine() = i))
@@ -55,7 +51,7 @@ class File extends Container {
)
}
override string getAbsolutePath() { files(this, result, _, _, _) }
override string getAbsolutePath() { files(this, result) }
/** Gets the URL of this file. */
override string getURL() { result = "file://" + this.getAbsolutePath() + ":0:0:0:0" }
@@ -118,21 +114,16 @@ private predicate occupied_line(File f, int n) {
}
/** A folder (directory) */
class Folder extends Container {
Folder() { folders(this, _, _) }
class Folder extends Container, @folder {
/** DEPRECATED: Use `getAbsolutePath` instead. */
deprecated override string getName() { result = this.getAbsolutePath() }
/** DEPRECATED: Use `getBaseName` instead. */
deprecated string getSimple() { folders(this, _, result) }
/**
* Holds if this element is at the specified location.
* The location spans column `startcolumn` of line `startline` to
* column `endcolumn` of line `endline` in file `filepath`.
* For more information, see
* [Locations](https://help.semmle.com/QL/learn-ql/ql/locations.html).
* [Locations](https://codeql.github.com/docs/writing-codeql-queries/providing-locations-in-codeql-queries/).
*/
predicate hasLocationInfo(
string filepath, int startline, int startcolumn, int endline, int endcolumn
@@ -144,7 +135,7 @@ class Folder extends Container {
endcolumn = 0
}
override string getAbsolutePath() { folders(this, result, _) }
override string getAbsolutePath() { folders(this, result) }
/** Gets the URL of this folder. */
override string getURL() { result = "folder://" + this.getAbsolutePath() }
@@ -265,7 +256,7 @@ abstract class Container extends @container {
* </table>
*/
string getBaseName() {
result = getAbsolutePath().regexpCapture(".*/(([^/]*?)(?:\\.([^.]*))?)", 1)
result = this.getAbsolutePath().regexpCapture(".*/(([^/]*?)(?:\\.([^.]*))?)", 1)
}
/**
@@ -291,7 +282,9 @@ abstract class Container extends @container {
* <tr><td>"/tmp/x.tar.gz"</td><td>"gz"</td></tr>
* </table>
*/
string getExtension() { result = getAbsolutePath().regexpCapture(".*/([^/]*?)(\\.([^.]*))?", 3) }
string getExtension() {
result = this.getAbsolutePath().regexpCapture(".*/([^/]*?)(\\.([^.]*))?", 3)
}
/**
* Gets the stem of this container, that is, the prefix of its base name up to
@@ -310,7 +303,9 @@ abstract class Container extends @container {
* <tr><td>"/tmp/x.tar.gz"</td><td>"x.tar"</td></tr>
* </table>
*/
string getStem() { result = getAbsolutePath().regexpCapture(".*/([^/]*?)(?:\\.([^.]*))?", 1) }
string getStem() {
result = this.getAbsolutePath().regexpCapture(".*/([^/]*?)(?:\\.([^.]*))?", 1)
}
File getFile(string baseName) {
result = this.getAFile() and
@@ -332,7 +327,7 @@ abstract class Container extends @container {
/**
* Gets a URL representing the location of this container.
*
* For more information see [Providing URLs](https://help.semmle.com/QL/learn-ql/ql/locations.html#providing-urls).
* For more information see [Providing URLs](https://codeql.github.com/docs/writing-codeql-queries/providing-locations-in-codeql-queries/#providing-urls).
*/
abstract string getURL();
@@ -438,7 +433,7 @@ class Location extends @location {
* The location spans column `startcolumn` of line `startline` to
* column `endcolumn` of line `endline` in file `filepath`.
* For more information, see
* [Locations](https://help.semmle.com/QL/learn-ql/ql/locations.html).
* [Locations](https://codeql.github.com/docs/writing-codeql-queries/providing-locations-in-codeql-queries/).
*/
predicate hasLocationInfo(
string filepath, int startline, int startcolumn, int endline, int endcolumn
@@ -466,7 +461,7 @@ class Line extends @py_line {
* The location spans column `startcolumn` of line `startline` to
* column `endcolumn` of line `endline` in file `filepath`.
* For more information, see
* [Locations](https://help.semmle.com/QL/learn-ql/ql/locations.html).
* [Locations](https://codeql.github.com/docs/writing-codeql-queries/providing-locations-in-codeql-queries/).
*/
predicate hasLocationInfo(
string filepath, int startline, int startcolumn, int endline, int endcolumn

View File

@@ -653,6 +653,8 @@ class DefinitionNode extends ControlFlowNode {
DefinitionNode() {
exists(Assign a | a.getATarget().getAFlowNode() = this)
or
exists(AnnAssign a | a.getTarget().getAFlowNode() = this and exists(a.getValue()))
or
exists(Alias a | a.getAsname().getAFlowNode() = this)
or
augstore(_, this)
@@ -795,6 +797,9 @@ private AstNode assigned_value(Expr lhs) {
/* lhs = result */
exists(Assign a | a.getATarget() = lhs and result = a.getValue())
or
/* lhs : annotation = result */
exists(AnnAssign a | a.getTarget() = lhs and result = a.getValue())
or
/* import result as lhs */
exists(Alias a | a.getAsname() = lhs and result = a.getValue())
or
@@ -846,9 +851,9 @@ class ForNode extends ControlFlowNode {
/** Holds if this `for` statement causes iteration over `sequence` storing each step of the iteration in `target` */
predicate iterates(ControlFlowNode target, ControlFlowNode sequence) {
sequence = getSequence() and
target = possibleTarget() and
not target = unrolledSuffix().possibleTarget()
sequence = this.getSequence() and
target = this.possibleTarget() and
not target = this.unrolledSuffix().possibleTarget()
}
/** Gets the sequence node for this `for` statement. */
@@ -1106,7 +1111,7 @@ class BasicBlock extends @py_flow_node {
* The location spans column `startcolumn` of line `startline` to
* column `endcolumn` of line `endline` in file `filepath`.
* For more information, see
* [Locations](https://help.semmle.com/QL/learn-ql/ql/locations.html).
* [Locations](https://codeql.github.com/docs/writing-codeql-queries/providing-locations-in-codeql-queries/).
*/
predicate hasLocationInfo(
string filepath, int startline, int startcolumn, int endline, int endcolumn

View File

@@ -13,6 +13,7 @@ private import semmle.python.frameworks.Dill
private import semmle.python.frameworks.Django
private import semmle.python.frameworks.Fabric
private import semmle.python.frameworks.Flask
private import semmle.python.frameworks.FlaskSqlAlchemy
private import semmle.python.frameworks.Idna
private import semmle.python.frameworks.Invoke
private import semmle.python.frameworks.Jmespath
@@ -20,13 +21,14 @@ private import semmle.python.frameworks.MarkupSafe
private import semmle.python.frameworks.Multidict
private import semmle.python.frameworks.Mysql
private import semmle.python.frameworks.MySQLdb
private import semmle.python.frameworks.Peewee
private import semmle.python.frameworks.Psycopg2
private import semmle.python.frameworks.PyMySQL
private import semmle.python.frameworks.Rsa
private import semmle.python.frameworks.Simplejson
private import semmle.python.frameworks.SqlAlchemy
private import semmle.python.frameworks.Stdlib
private import semmle.python.frameworks.Tornado
private import semmle.python.frameworks.Peewee
private import semmle.python.frameworks.Twisted
private import semmle.python.frameworks.Ujson
private import semmle.python.frameworks.Yaml

View File

@@ -58,6 +58,7 @@ class Function extends Function_, Scope, AstNode {
/** Gets the name of the nth argument (for simple arguments) */
string getArgName(int index) { result = this.getArg(index).(Name).getId() }
/** Gets the parameter of this function with the name `name`. */
Parameter getArgByName(string name) {
(
result = this.getAnArg()

View File

@@ -9,6 +9,7 @@ class ConditionBlock extends BasicBlock {
}
/** Basic blocks controlled by this condition, i.e. those BBs for which the condition is testIsTrue */
pragma[nomagic]
predicate controls(BasicBlock controlled, boolean testIsTrue) {
/*
* For this block to control the block 'controlled' with 'testIsTrue' the following must be true:

View File

@@ -31,7 +31,7 @@ class ImportExpr extends ImportExpr_ {
// relative imports are no longer allowed in Python 3
major_version() < 3 and
// and can be explicitly turned off in later versions of Python 2
not getEnclosingModule().hasFromFuture("absolute_import")
not this.getEnclosingModule().hasFromFuture("absolute_import")
}
/**
@@ -53,8 +53,8 @@ class ImportExpr extends ImportExpr_ {
* the name of the topmost module that will be imported.
*/
private string relativeTopName() {
getLevel() = -1 and
result = basePackageName(1) + "." + this.getTopName() and
this.getLevel() = -1 and
result = this.basePackageName(1) + "." + this.getTopName() and
valid_module_name(result)
}
@@ -62,7 +62,7 @@ class ImportExpr extends ImportExpr_ {
if this.getLevel() <= 0
then result = this.getTopName()
else (
result = basePackageName(this.getLevel()) and
result = this.basePackageName(this.getLevel()) and
valid_module_name(result)
)
}
@@ -73,17 +73,17 @@ class ImportExpr extends ImportExpr_ {
* which may not be the name of the module.
*/
string bottomModuleName() {
result = relativeTopName() + this.remainderOfName()
result = this.relativeTopName() + this.remainderOfName()
or
not exists(relativeTopName()) and
not exists(this.relativeTopName()) and
result = this.qualifiedTopName() + this.remainderOfName()
}
/** Gets the name of topmost module or package being imported */
string topModuleName() {
result = relativeTopName()
result = this.relativeTopName()
or
not exists(relativeTopName()) and
not exists(this.relativeTopName()) and
result = this.qualifiedTopName()
}
@@ -94,7 +94,7 @@ class ImportExpr extends ImportExpr_ {
*/
string getImportedModuleName() {
exists(string bottomName | bottomName = this.bottomModuleName() |
if this.isTop() then result = topModuleName() else result = bottomName
if this.isTop() then result = this.topModuleName() else result = bottomName
)
}

View File

@@ -86,13 +86,13 @@ class Module extends Module_, Scope, AstNode {
/** Gets the package containing this module (or parent package if this is a package) */
Module getPackage() {
this.getName().matches("%.%") and
result.getName() = getName().regexpReplaceAll("\\.[^.]*$", "")
result.getName() = this.getName().regexpReplaceAll("\\.[^.]*$", "")
}
/** Gets the name of the package containing this module */
string getPackageName() {
this.getName().matches("%.%") and
result = getName().regexpReplaceAll("\\.[^.]*$", "")
result = this.getName().regexpReplaceAll("\\.[^.]*$", "")
}
/** Gets the metrics for this module */

View File

@@ -52,8 +52,7 @@ private newtype TPrintAstNode =
TStmtListNode(StmtList list) {
shouldPrint(list.getAnItem(), _) and
not list = any(Module mod).getBody() and
not forall(AstNode child | child = list.getAnItem() | isNotNeeded(child)) and
exists(list.getAnItem())
not forall(AstNode child | child = list.getAnItem() | isNotNeeded(child))
} or
TRegExpTermNode(RegExpTerm term) {
exists(StrConst str | term.getRootTerm() = getParsedRegExp(str) and shouldPrint(str, _))

View File

@@ -49,16 +49,17 @@ newtype TRegExpParent =
* or another regular expression term.
*/
class RegExpParent extends TRegExpParent {
/** Gets a textual representation of this element. */
string toString() { result = "RegExpParent" }
/** Gets the `i`th child term. */
abstract RegExpTerm getChild(int i);
/** Gets a child term . */
RegExpTerm getAChild() { result = getChild(_) }
RegExpTerm getAChild() { result = this.getChild(_) }
/** Gets the number of child terms. */
int getNumChild() { result = count(getAChild()) }
int getNumChild() { result = count(this.getAChild()) }
/** Gets the associated regex. */
abstract Regex getRegex();
@@ -72,14 +73,18 @@ class RegExpLiteral extends TRegExpLiteral, RegExpParent {
override RegExpTerm getChild(int i) { i = 0 and result.getRegex() = re and result.isRootTerm() }
/** Holds if dot, `.`, matches all characters, including newlines. */
predicate isDotAll() { re.getAMode() = "DOTALL" }
/** Holds if this regex matching is case-insensitive for this regex. */
predicate isIgnoreCase() { re.getAMode() = "IGNORECASE" }
/** Get a string representing all modes for this regex. */
string getFlags() { result = concat(string mode | mode = re.getAMode() | mode, " | ") }
override Regex getRegex() { result = re }
/** Gets the primary QL class for this regex. */
string getPrimaryQLClass() { result = "RegExpLiteral" }
}
@@ -117,7 +122,7 @@ class RegExpTerm extends RegExpParent {
RegExpTerm getRootTerm() {
this.isRootTerm() and result = this
or
result = getParent().(RegExpTerm).getRootTerm()
result = this.getParent().(RegExpTerm).getRootTerm()
}
/**
@@ -196,7 +201,7 @@ class RegExpTerm extends RegExpParent {
/** Gets the regular expression term that is matched (textually) before this one, if any. */
RegExpTerm getPredecessor() {
exists(RegExpTerm parent | parent = getParent() |
exists(RegExpTerm parent | parent = this.getParent() |
result = parent.(RegExpSequence).previousElement(this)
or
not exists(parent.(RegExpSequence).previousElement(this)) and
@@ -207,7 +212,7 @@ class RegExpTerm extends RegExpParent {
/** Gets the regular expression term that is matched (textually) after this one, if any. */
RegExpTerm getSuccessor() {
exists(RegExpTerm parent | parent = getParent() |
exists(RegExpTerm parent | parent = this.getParent() |
result = parent.(RegExpSequence).nextElement(this)
or
not exists(parent.(RegExpSequence).nextElement(this)) and
@@ -246,8 +251,10 @@ class RegExpQuantifier extends RegExpTerm, TRegExpQuantifier {
result.getEnd() = part_end
}
/** Hols if this term may match an unlimited number of times. */
predicate mayRepeatForever() { may_repeat_forever = true }
/** Gets the qualifier for this term. That is e.g "?" for "a?". */
string getQualifier() { result = re.getText().substring(part_end, end) }
override string getPrimaryQLClass() { result = "RegExpQuantifier" }
@@ -322,8 +329,10 @@ class RegExpRange extends RegExpQuantifier {
RegExpRange() { re.multiples(part_end, end, lower, upper) }
/** Gets the string defining the upper bound of this range, if any. */
string getUpper() { result = upper }
/** Gets the string defining the lower bound of this range, if any. */
string getLower() { result = lower }
/**
@@ -358,7 +367,7 @@ class RegExpSequence extends RegExpTerm, TRegExpSequence {
override RegExpTerm getChild(int i) { result = seqChild(re, start, end, i) }
/** Gets the element preceding `element` in this sequence. */
RegExpTerm previousElement(RegExpTerm element) { element = nextElement(result) }
RegExpTerm previousElement(RegExpTerm element) { element = this.nextElement(result) }
/** Gets the element following `element` in this sequence. */
RegExpTerm nextElement(RegExpTerm element) {
@@ -461,15 +470,17 @@ class RegExpEscape extends RegExpNormalChar {
// TODO: Find a way to include a formfeed character
// this.getUnescaped() = "f" and result = " "
// or
isUnicode() and
result = getUnicode()
this.isUnicode() and
result = this.getUnicode()
}
/** Holds if this terms name is given by the part following the escape character. */
predicate isIdentityEscape() { not this.getUnescaped() in ["n", "r", "t", "f"] }
override string getPrimaryQLClass() { result = "RegExpEscape" }
string getUnescaped() { result = this.getText().suffix(1) }
/** Gets the part of the term following the escape character. That is e.g. "w" if the term is "\w". */
private string getUnescaped() { result = this.getText().suffix(1) }
/**
* Gets the text for this escape. That is e.g. "\w".
@@ -479,7 +490,7 @@ class RegExpEscape extends RegExpNormalChar {
/**
* Holds if this is a unicode escape.
*/
private predicate isUnicode() { getText().prefix(2) = ["\\u", "\\U"] }
private predicate isUnicode() { this.getText().prefix(2) = ["\\u", "\\U"] }
/**
* Gets the unicode char for this escape.
@@ -536,15 +547,8 @@ private int toHex(string hex) {
* ```
*/
class RegExpCharacterClassEscape extends RegExpEscape {
// string value;
RegExpCharacterClassEscape() {
// value = re.getText().substring(start + 1, end) and
// value in ["d", "D", "s", "S", "w", "W"]
this.getValue() in ["d", "D", "s", "S", "w", "W"]
}
RegExpCharacterClassEscape() { this.getValue() in ["d", "D", "s", "S", "w", "W"] }
/** Gets the name of the character class; for example, `w` for `\w`. */
// override string getValue() { result = value }
override RegExpTerm getChild(int i) { none() }
override string getPrimaryQLClass() { result = "RegExpCharacterClassEscape" }
@@ -563,19 +567,22 @@ class RegExpCharacterClassEscape extends RegExpEscape {
class RegExpCharacterClass extends RegExpTerm, TRegExpCharacterClass {
RegExpCharacterClass() { this = TRegExpCharacterClass(re, start, end) }
/** Holds if this character class is inverted, matching the opposite of its content. */
predicate isInverted() { re.getChar(start + 1) = "^" }
/** Gets the `i`th char inside this charater class. */
string getCharThing(int i) { result = re.getChar(i + start) }
/** Holds if this character class can match anything. */
predicate isUniversalClass() {
// [^]
isInverted() and not exists(getAChild())
this.isInverted() and not exists(this.getAChild())
or
// [\w\W] and similar
not isInverted() and
not this.isInverted() and
exists(string cce1, string cce2 |
cce1 = getAChild().(RegExpCharacterClassEscape).getValue() and
cce2 = getAChild().(RegExpCharacterClassEscape).getValue()
cce1 = this.getAChild().(RegExpCharacterClassEscape).getValue() and
cce2 = this.getAChild().(RegExpCharacterClassEscape).getValue()
|
cce1 != cce2 and cce1.toLowerCase() = cce2.toLowerCase()
)
@@ -620,6 +627,7 @@ class RegExpCharacterRange extends RegExpTerm, TRegExpCharacterRange {
re.charRange(_, start, lower_end, upper_start, end)
}
/** Holds if this range goes from `lo` to `hi`, in effect is `lo-hi`. */
predicate isRange(string lo, string hi) {
lo = re.getText().substring(start, lower_end) and
hi = re.getText().substring(upper_start, end)
@@ -653,8 +661,13 @@ class RegExpCharacterRange extends RegExpTerm, TRegExpCharacterRange {
class RegExpNormalChar extends RegExpTerm, TRegExpNormalChar {
RegExpNormalChar() { this = TRegExpNormalChar(re, start, end) }
/**
* Holds if this constant represents a valid Unicode character (as opposed
* to a surrogate code point that does not correspond to a character by itself.)
*/
predicate isCharacter() { any() }
/** Gets the string representation of the char matched by this term. */
string getValue() { result = re.getText().substring(start, end) }
override RegExpTerm getChild(int i) { none() }
@@ -684,15 +697,15 @@ class RegExpConstant extends RegExpTerm {
qstart <= start and end <= qend
) and
value = this.(RegExpNormalChar).getValue()
// This will never hold
// or
// this = TRegExpSpecialChar(re, start, end) and
// re.inCharSet(start) and
// value = this.(RegExpSpecialChar).getChar()
}
/**
* Holds if this constant represents a valid Unicode character (as opposed
* to a surrogate code point that does not correspond to a character by itself.)
*/
predicate isCharacter() { any() }
/** Gets the string matched by this constant term. */
string getValue() { result = value }
override RegExpTerm getChild(int i) { none() }
@@ -731,10 +744,6 @@ class RegExpGroup extends RegExpTerm, TRegExpGroup {
/** Gets the name of this capture group, if any. */
string getName() { result = re.getGroupName(start, end) }
predicate isCharacter() { any() }
string getValue() { result = re.getText().substring(start, end) }
override RegExpTerm getChild(int i) {
result.getRegex() = re and
i = 0 and
@@ -762,8 +771,13 @@ class RegExpSpecialChar extends RegExpTerm, TRegExpSpecialChar {
re.specialCharacter(start, end, char)
}
/**
* Holds if this constant represents a valid Unicode character (as opposed
* to a surrogate code point that does not correspond to a character by itself.)
*/
predicate isCharacter() { any() }
/** Gets the char for this term. */
string getChar() { result = char }
override RegExpTerm getChild(int i) { none() }
@@ -828,8 +842,6 @@ class RegExpCaret extends RegExpSpecialChar {
class RegExpZeroWidthMatch extends RegExpGroup {
RegExpZeroWidthMatch() { re.zeroWidthMatch(start, end) }
override predicate isCharacter() { any() }
override RegExpTerm getChild(int i) { none() }
override string getPrimaryQLClass() { result = "RegExpZeroWidthMatch" }

View File

@@ -153,6 +153,12 @@ class ExceptStmt extends ExceptStmt_ {
override Stmt getASubStatement() { result = this.getAStmt() }
override Stmt getLastStatement() { result = this.getBody().getLastItem().getLastStatement() }
override Expr getType() {
result = super.getType() and not result instanceof Tuple
or
result = super.getType().(Tuple).getAnElt()
}
}
/** An assert statement, such as `assert a == b, "A is not equal to b"` */

View File

@@ -28,7 +28,11 @@ private module AlgorithmNames {
name = "SHA256" or
name = "SHA384" or
name = "SHA512" or
name = "SHA3"
name = "SHA3" or
name = "SHA3224" or
name = "SHA3256" or
name = "SHA3384" or
name = "SHA3512"
}
predicate isWeakHashingAlgorithm(string name) {

View File

@@ -2139,7 +2139,8 @@ private predicate expensiveLen2unfolding(TypedContent tc, Configuration config)
) and
accessPathApproxCostLimits(apLimit, tupleLimit) and
apLimit < tails and
tupleLimit < (tails - 1) * nodes
tupleLimit < (tails - 1) * nodes and
not tc.forceHighPrecision()
)
}
@@ -2973,12 +2974,15 @@ private AccessPathApprox getATail(AccessPathApprox apa, Configuration config) {
* expected to be expensive. Holds with `unfold = true` otherwise.
*/
private predicate evalUnfold(AccessPathApprox apa, boolean unfold, Configuration config) {
exists(int aps, int nodes, int apLimit, int tupleLimit |
aps = countPotentialAps(apa, config) and
nodes = countNodesUsingAccessPath(apa, config) and
accessPathCostLimits(apLimit, tupleLimit) and
if apLimit < aps and tupleLimit < (aps - 1) * nodes then unfold = false else unfold = true
)
if apa.getHead().forceHighPrecision()
then unfold = true
else
exists(int aps, int nodes, int apLimit, int tupleLimit |
aps = countPotentialAps(apa, config) and
nodes = countNodesUsingAccessPath(apa, config) and
accessPathCostLimits(apLimit, tupleLimit) and
if apLimit < aps and tupleLimit < (aps - 1) * nodes then unfold = false else unfold = true
)
}
/**
@@ -3248,7 +3252,7 @@ class PathNode extends TPathNode {
* The location spans column `startcolumn` of line `startline` to
* column `endcolumn` of line `endline` in file `filepath`.
* For more information, see
* [Locations](https://help.semmle.com/QL/learn-ql/ql/locations.html).
* [Locations](https://codeql.github.com/docs/writing-codeql-queries/providing-locations-in-codeql-queries/).
*/
predicate hasLocationInfo(
string filepath, int startline, int startcolumn, int endline, int endcolumn
@@ -3643,9 +3647,10 @@ private module Subpaths {
PathNode arg, ParamNodeEx par, SummaryCtxSome sc, CallContext innercc, ReturnKindExt kind,
NodeEx out, AccessPath apout
) {
pathThroughCallable(arg, out, _, apout) and
pathThroughCallable(arg, out, _, pragma[only_bind_into](apout)) and
pathIntoCallable(arg, par, _, innercc, sc, _) and
paramFlowsThrough(kind, innercc, sc, apout, _, unbindConf(arg.getConfiguration()))
paramFlowsThrough(kind, innercc, sc, pragma[only_bind_into](apout), _,
unbindConf(arg.getConfiguration()))
}
/**
@@ -3690,8 +3695,8 @@ private module Subpaths {
*/
predicate subpaths(PathNode arg, PathNodeImpl par, PathNodeMid ret, PathNodeMid out) {
exists(ParamNodeEx p, NodeEx o, AccessPath apout |
arg.getASuccessor() = par and
arg.getASuccessor() = out and
pragma[only_bind_into](arg).getASuccessor() = par and
pragma[only_bind_into](arg).getASuccessor() = out and
subpaths03(arg, p, ret, o, apout) and
par.getNodeEx() = p and
out.getNodeEx() = o and
@@ -4032,7 +4037,7 @@ private module FlowExploration {
* The location spans column `startcolumn` of line `startline` to
* column `endcolumn` of line `endline` in file `filepath`.
* For more information, see
* [Locations](https://help.semmle.com/QL/learn-ql/ql/locations.html).
* [Locations](https://codeql.github.com/docs/writing-codeql-queries/providing-locations-in-codeql-queries/).
*/
predicate hasLocationInfo(
string filepath, int startline, int startcolumn, int endline, int endcolumn

View File

@@ -2139,7 +2139,8 @@ private predicate expensiveLen2unfolding(TypedContent tc, Configuration config)
) and
accessPathApproxCostLimits(apLimit, tupleLimit) and
apLimit < tails and
tupleLimit < (tails - 1) * nodes
tupleLimit < (tails - 1) * nodes and
not tc.forceHighPrecision()
)
}
@@ -2973,12 +2974,15 @@ private AccessPathApprox getATail(AccessPathApprox apa, Configuration config) {
* expected to be expensive. Holds with `unfold = true` otherwise.
*/
private predicate evalUnfold(AccessPathApprox apa, boolean unfold, Configuration config) {
exists(int aps, int nodes, int apLimit, int tupleLimit |
aps = countPotentialAps(apa, config) and
nodes = countNodesUsingAccessPath(apa, config) and
accessPathCostLimits(apLimit, tupleLimit) and
if apLimit < aps and tupleLimit < (aps - 1) * nodes then unfold = false else unfold = true
)
if apa.getHead().forceHighPrecision()
then unfold = true
else
exists(int aps, int nodes, int apLimit, int tupleLimit |
aps = countPotentialAps(apa, config) and
nodes = countNodesUsingAccessPath(apa, config) and
accessPathCostLimits(apLimit, tupleLimit) and
if apLimit < aps and tupleLimit < (aps - 1) * nodes then unfold = false else unfold = true
)
}
/**
@@ -3248,7 +3252,7 @@ class PathNode extends TPathNode {
* The location spans column `startcolumn` of line `startline` to
* column `endcolumn` of line `endline` in file `filepath`.
* For more information, see
* [Locations](https://help.semmle.com/QL/learn-ql/ql/locations.html).
* [Locations](https://codeql.github.com/docs/writing-codeql-queries/providing-locations-in-codeql-queries/).
*/
predicate hasLocationInfo(
string filepath, int startline, int startcolumn, int endline, int endcolumn
@@ -3643,9 +3647,10 @@ private module Subpaths {
PathNode arg, ParamNodeEx par, SummaryCtxSome sc, CallContext innercc, ReturnKindExt kind,
NodeEx out, AccessPath apout
) {
pathThroughCallable(arg, out, _, apout) and
pathThroughCallable(arg, out, _, pragma[only_bind_into](apout)) and
pathIntoCallable(arg, par, _, innercc, sc, _) and
paramFlowsThrough(kind, innercc, sc, apout, _, unbindConf(arg.getConfiguration()))
paramFlowsThrough(kind, innercc, sc, pragma[only_bind_into](apout), _,
unbindConf(arg.getConfiguration()))
}
/**
@@ -3690,8 +3695,8 @@ private module Subpaths {
*/
predicate subpaths(PathNode arg, PathNodeImpl par, PathNodeMid ret, PathNodeMid out) {
exists(ParamNodeEx p, NodeEx o, AccessPath apout |
arg.getASuccessor() = par and
arg.getASuccessor() = out and
pragma[only_bind_into](arg).getASuccessor() = par and
pragma[only_bind_into](arg).getASuccessor() = out and
subpaths03(arg, p, ret, o, apout) and
par.getNodeEx() = p and
out.getNodeEx() = o and
@@ -4032,7 +4037,7 @@ private module FlowExploration {
* The location spans column `startcolumn` of line `startline` to
* column `endcolumn` of line `endline` in file `filepath`.
* For more information, see
* [Locations](https://help.semmle.com/QL/learn-ql/ql/locations.html).
* [Locations](https://codeql.github.com/docs/writing-codeql-queries/providing-locations-in-codeql-queries/).
*/
predicate hasLocationInfo(
string filepath, int startline, int startcolumn, int endline, int endcolumn

View File

@@ -2139,7 +2139,8 @@ private predicate expensiveLen2unfolding(TypedContent tc, Configuration config)
) and
accessPathApproxCostLimits(apLimit, tupleLimit) and
apLimit < tails and
tupleLimit < (tails - 1) * nodes
tupleLimit < (tails - 1) * nodes and
not tc.forceHighPrecision()
)
}
@@ -2973,12 +2974,15 @@ private AccessPathApprox getATail(AccessPathApprox apa, Configuration config) {
* expected to be expensive. Holds with `unfold = true` otherwise.
*/
private predicate evalUnfold(AccessPathApprox apa, boolean unfold, Configuration config) {
exists(int aps, int nodes, int apLimit, int tupleLimit |
aps = countPotentialAps(apa, config) and
nodes = countNodesUsingAccessPath(apa, config) and
accessPathCostLimits(apLimit, tupleLimit) and
if apLimit < aps and tupleLimit < (aps - 1) * nodes then unfold = false else unfold = true
)
if apa.getHead().forceHighPrecision()
then unfold = true
else
exists(int aps, int nodes, int apLimit, int tupleLimit |
aps = countPotentialAps(apa, config) and
nodes = countNodesUsingAccessPath(apa, config) and
accessPathCostLimits(apLimit, tupleLimit) and
if apLimit < aps and tupleLimit < (aps - 1) * nodes then unfold = false else unfold = true
)
}
/**
@@ -3248,7 +3252,7 @@ class PathNode extends TPathNode {
* The location spans column `startcolumn` of line `startline` to
* column `endcolumn` of line `endline` in file `filepath`.
* For more information, see
* [Locations](https://help.semmle.com/QL/learn-ql/ql/locations.html).
* [Locations](https://codeql.github.com/docs/writing-codeql-queries/providing-locations-in-codeql-queries/).
*/
predicate hasLocationInfo(
string filepath, int startline, int startcolumn, int endline, int endcolumn
@@ -3643,9 +3647,10 @@ private module Subpaths {
PathNode arg, ParamNodeEx par, SummaryCtxSome sc, CallContext innercc, ReturnKindExt kind,
NodeEx out, AccessPath apout
) {
pathThroughCallable(arg, out, _, apout) and
pathThroughCallable(arg, out, _, pragma[only_bind_into](apout)) and
pathIntoCallable(arg, par, _, innercc, sc, _) and
paramFlowsThrough(kind, innercc, sc, apout, _, unbindConf(arg.getConfiguration()))
paramFlowsThrough(kind, innercc, sc, pragma[only_bind_into](apout), _,
unbindConf(arg.getConfiguration()))
}
/**
@@ -3690,8 +3695,8 @@ private module Subpaths {
*/
predicate subpaths(PathNode arg, PathNodeImpl par, PathNodeMid ret, PathNodeMid out) {
exists(ParamNodeEx p, NodeEx o, AccessPath apout |
arg.getASuccessor() = par and
arg.getASuccessor() = out and
pragma[only_bind_into](arg).getASuccessor() = par and
pragma[only_bind_into](arg).getASuccessor() = out and
subpaths03(arg, p, ret, o, apout) and
par.getNodeEx() = p and
out.getNodeEx() = o and
@@ -4032,7 +4037,7 @@ private module FlowExploration {
* The location spans column `startcolumn` of line `startline` to
* column `endcolumn` of line `endline` in file `filepath`.
* For more information, see
* [Locations](https://help.semmle.com/QL/learn-ql/ql/locations.html).
* [Locations](https://codeql.github.com/docs/writing-codeql-queries/providing-locations-in-codeql-queries/).
*/
predicate hasLocationInfo(
string filepath, int startline, int startcolumn, int endline, int endcolumn

View File

@@ -2139,7 +2139,8 @@ private predicate expensiveLen2unfolding(TypedContent tc, Configuration config)
) and
accessPathApproxCostLimits(apLimit, tupleLimit) and
apLimit < tails and
tupleLimit < (tails - 1) * nodes
tupleLimit < (tails - 1) * nodes and
not tc.forceHighPrecision()
)
}
@@ -2973,12 +2974,15 @@ private AccessPathApprox getATail(AccessPathApprox apa, Configuration config) {
* expected to be expensive. Holds with `unfold = true` otherwise.
*/
private predicate evalUnfold(AccessPathApprox apa, boolean unfold, Configuration config) {
exists(int aps, int nodes, int apLimit, int tupleLimit |
aps = countPotentialAps(apa, config) and
nodes = countNodesUsingAccessPath(apa, config) and
accessPathCostLimits(apLimit, tupleLimit) and
if apLimit < aps and tupleLimit < (aps - 1) * nodes then unfold = false else unfold = true
)
if apa.getHead().forceHighPrecision()
then unfold = true
else
exists(int aps, int nodes, int apLimit, int tupleLimit |
aps = countPotentialAps(apa, config) and
nodes = countNodesUsingAccessPath(apa, config) and
accessPathCostLimits(apLimit, tupleLimit) and
if apLimit < aps and tupleLimit < (aps - 1) * nodes then unfold = false else unfold = true
)
}
/**
@@ -3248,7 +3252,7 @@ class PathNode extends TPathNode {
* The location spans column `startcolumn` of line `startline` to
* column `endcolumn` of line `endline` in file `filepath`.
* For more information, see
* [Locations](https://help.semmle.com/QL/learn-ql/ql/locations.html).
* [Locations](https://codeql.github.com/docs/writing-codeql-queries/providing-locations-in-codeql-queries/).
*/
predicate hasLocationInfo(
string filepath, int startline, int startcolumn, int endline, int endcolumn
@@ -3643,9 +3647,10 @@ private module Subpaths {
PathNode arg, ParamNodeEx par, SummaryCtxSome sc, CallContext innercc, ReturnKindExt kind,
NodeEx out, AccessPath apout
) {
pathThroughCallable(arg, out, _, apout) and
pathThroughCallable(arg, out, _, pragma[only_bind_into](apout)) and
pathIntoCallable(arg, par, _, innercc, sc, _) and
paramFlowsThrough(kind, innercc, sc, apout, _, unbindConf(arg.getConfiguration()))
paramFlowsThrough(kind, innercc, sc, pragma[only_bind_into](apout), _,
unbindConf(arg.getConfiguration()))
}
/**
@@ -3690,8 +3695,8 @@ private module Subpaths {
*/
predicate subpaths(PathNode arg, PathNodeImpl par, PathNodeMid ret, PathNodeMid out) {
exists(ParamNodeEx p, NodeEx o, AccessPath apout |
arg.getASuccessor() = par and
arg.getASuccessor() = out and
pragma[only_bind_into](arg).getASuccessor() = par and
pragma[only_bind_into](arg).getASuccessor() = out and
subpaths03(arg, p, ret, o, apout) and
par.getNodeEx() = p and
out.getNodeEx() = o and
@@ -4032,7 +4037,7 @@ private module FlowExploration {
* The location spans column `startcolumn` of line `startline` to
* column `endcolumn` of line `endline` in file `filepath`.
* For more information, see
* [Locations](https://help.semmle.com/QL/learn-ql/ql/locations.html).
* [Locations](https://codeql.github.com/docs/writing-codeql-queries/providing-locations-in-codeql-queries/).
*/
predicate hasLocationInfo(
string filepath, int startline, int startcolumn, int endline, int endcolumn

View File

@@ -937,7 +937,7 @@ class CallContextSpecificCall extends CallContextCall, TSpecificCall {
}
override predicate relevantFor(DataFlowCallable callable) {
recordDataFlowCallSite(getCall(), callable)
recordDataFlowCallSite(this.getCall(), callable)
}
override predicate matchesCall(DataFlowCall call) { call = this.getCall() }
@@ -1236,6 +1236,13 @@ class TypedContent extends MkTypedContent {
/** Gets a textual representation of this content. */
string toString() { result = c.toString() }
/**
* Holds if access paths with this `TypedContent` at their head always should
* be tracked at high precision. This disables adaptive access path precision
* for such access paths.
*/
predicate forceHighPrecision() { forceHighPrecision(c) }
}
/**
@@ -1250,7 +1257,7 @@ abstract class AccessPathFront extends TAccessPathFront {
TypedContent getHead() { this = TFrontHead(result) }
predicate isClearedAt(Node n) { clearsContentCached(n, getHead().getContent()) }
predicate isClearedAt(Node n) { clearsContentCached(n, this.getHead().getContent()) }
}
class AccessPathFrontNil extends AccessPathFront, TFrontNil {

View File

@@ -152,6 +152,7 @@ class DataFlowExpr = Expr;
* Flow comes from definitions, uses and refinements.
*/
// TODO: Consider constraining `nodeFrom` and `nodeTo` to be in the same scope.
// If they have different enclosing callables, we get consistency errors.
module EssaFlow {
predicate essaFlowStep(Node nodeFrom, Node nodeTo) {
// Definition
@@ -200,6 +201,9 @@ module EssaFlow {
// If expressions
nodeFrom.asCfgNode() = nodeTo.asCfgNode().(IfExprNode).getAnOperand()
or
// boolean inline expressions such as `x or y` or `x and y`
nodeFrom.asCfgNode() = nodeTo.asCfgNode().(BoolExprNode).getAnOperand()
or
// Flow inside an unpacking assignment
iterableUnpackingFlowStep(nodeFrom, nodeTo)
or
@@ -225,35 +229,60 @@ module EssaFlow {
//--------
/**
* This is the local flow predicate that is used as a building block in global
* data flow. It is a strict subset of the `localFlowStep` predicate, as it
* excludes SSA flow through instance fields.
* data flow.
*
* Local flow can happen either at import time, when the module is initialised
* or at runtime when callables in the module are called.
*/
predicate simpleLocalFlowStep(Node nodeFrom, Node nodeTo) {
// If there is ESSA-flow out of a node `node`, we want flow
// If there is local flow out of a node `node`, we want flow
// both out of `node` and any post-update node of `node`.
exists(Node node |
EssaFlow::essaFlowStep(node, nodeTo) and
nodeFrom = update(node) and
(
not node instanceof EssaNode or
not nodeTo instanceof EssaNode or
localEssaStep(node, nodeTo)
importTimeLocalFlowStep(node, nodeTo) or
runtimeLocalFlowStep(node, nodeTo)
)
)
}
/**
* Holds if there is an Essa flow step from `nodeFrom` to `nodeTo` that does not switch between
* local and global SSA variables.
* Holds if `node` is found at the top level of a module.
*/
private predicate localEssaStep(EssaNode nodeFrom, EssaNode nodeTo) {
EssaFlow::essaFlowStep(nodeFrom, nodeTo) and
(
nodeFrom.getVar() instanceof GlobalSsaVariable and
nodeTo.getVar() instanceof GlobalSsaVariable
or
not nodeFrom.getVar() instanceof GlobalSsaVariable and
not nodeTo.getVar() instanceof GlobalSsaVariable
pragma[inline]
predicate isTopLevel(Node node) { node.getScope() instanceof Module }
/** Holds if there is local flow from `nodeFrom` to `nodeTo` at import time. */
predicate importTimeLocalFlowStep(Node nodeFrom, Node nodeTo) {
// As a proxy for whether statements can be executed at import time,
// we check if they appear at the top level.
// This will miss statements inside functions called from the top level.
isTopLevel(nodeFrom) and
isTopLevel(nodeTo) and
EssaFlow::essaFlowStep(nodeFrom, nodeTo)
}
/** Holds if there is local flow from `nodeFrom` to `nodeTo` at runtime. */
predicate runtimeLocalFlowStep(Node nodeFrom, Node nodeTo) {
// Anything not at the top level can be executed at runtime.
not isTopLevel(nodeFrom) and
not isTopLevel(nodeTo) and
EssaFlow::essaFlowStep(nodeFrom, nodeTo)
}
/** `ModuleVariable`s are accessed via jump steps at runtime. */
predicate runtimeJumpStep(Node nodeFrom, Node nodeTo) {
// Module variable read
nodeFrom.(ModuleVariableNode).getARead() = nodeTo
or
// Module variable write
nodeFrom = nodeTo.(ModuleVariableNode).getAWrite()
or
// Setting the possible values of the variable at the end of import time
exists(SsaVariable def |
def = any(SsaVariable var).getAnUltimateDefinition() and
def.getDefinition() = nodeFrom.asCfgNode() and
def.getVariable() = nodeTo.(ModuleVariableNode).getVariable()
)
}
@@ -581,11 +610,11 @@ class DataFlowLambda extends DataFlowCallable, TLambda {
override string toString() { result = lambda.toString() }
override CallNode getACall() { result = getCallableValue().getACall() }
override CallNode getACall() { result = this.getCallableValue().getACall() }
override Scope getScope() { result = lambda.getEvaluatingScope() }
override NameNode getParameter(int n) { result = getParameter(getCallableValue(), n) }
override NameNode getParameter(int n) { result = getParameter(this.getCallableValue(), n) }
override string getName() { result = "Lambda callable" }
@@ -857,11 +886,7 @@ string ppReprType(DataFlowType t) { none() }
* taken into account.
*/
predicate jumpStep(Node nodeFrom, Node nodeTo) {
// Module variable read
nodeFrom.(ModuleVariableNode).getARead() = nodeTo
or
// Module variable write
nodeFrom = nodeTo.(ModuleVariableNode).getAWrite()
runtimeJumpStep(nodeFrom, nodeTo)
or
// Read of module attribute:
exists(AttrRead r, ModuleValue mv |
@@ -1620,6 +1645,12 @@ predicate isImmutableOrUnobservable(Node n) { none() }
int accessPathLimit() { result = 5 }
/**
* Holds if access paths with `c` at their head always should be tracked at high
* precision. This disables adaptive access path precision for such access paths.
*/
predicate forceHighPrecision(Content c) { none() }
/** Holds if `n` should be hidden from path explanations. */
predicate nodeIsHidden(Node n) { none() }

View File

@@ -102,7 +102,7 @@ class Node extends TNode {
* The location spans column `startcolumn` of line `startline` to
* column `endcolumn` of line `endline` in file `filepath`.
* For more information, see
* [Locations](https://help.semmle.com/QL/learn-ql/ql/locations.html).
* [Locations](https://codeql.github.com/docs/writing-codeql-queries/providing-locations-in-codeql-queries/).
*/
predicate hasLocationInfo(
string filepath, int startline, int startcolumn, int endline, int endcolumn
@@ -332,7 +332,7 @@ class ModuleVariableNode extends Node, TModuleVariableNode {
override Scope getScope() { result = mod }
override string toString() {
result = "ModuleVariableNode for " + var.toString() + " in " + mod.toString()
result = "ModuleVariableNode for " + mod.getName() + "." + var.getId()
}
/** Gets the module in which this variable appears. */

View File

@@ -62,12 +62,12 @@ class LocalSourceNode extends Node {
/**
* Gets a read of attribute `attrName` on this node.
*/
AttrRead getAnAttributeRead(string attrName) { result = getAnAttributeReference(attrName) }
AttrRead getAnAttributeRead(string attrName) { result = this.getAnAttributeReference(attrName) }
/**
* Gets a write of attribute `attrName` on this node.
*/
AttrWrite getAnAttributeWrite(string attrName) { result = getAnAttributeReference(attrName) }
AttrWrite getAnAttributeWrite(string attrName) { result = this.getAnAttributeReference(attrName) }
/**
* Gets a reference (read or write) of any attribute on this node.
@@ -81,12 +81,12 @@ class LocalSourceNode extends Node {
/**
* Gets a read of any attribute on this node.
*/
AttrRead getAnAttributeRead() { result = getAnAttributeReference() }
AttrRead getAnAttributeRead() { result = this.getAnAttributeReference() }
/**
* Gets a write of any attribute on this node.
*/
AttrWrite getAnAttributeWrite() { result = getAnAttributeReference() }
AttrWrite getAnAttributeWrite() { result = this.getAnAttributeReference() }
/**
* Gets a call to this node.

View File

@@ -58,7 +58,6 @@ string prettyNode(DataFlow::Node node) {
*/
bindingset[node]
string prettyNodeForInlineTest(DataFlow::Node node) {
exists(node.asExpr()) and
result = prettyExpr(node.asExpr())
or
exists(Expr e | e = node.(DataFlow::PostUpdateNode).getPreUpdateNode().asExpr() |

View File

@@ -75,24 +75,26 @@ abstract class Configuration extends DataFlow::Configuration {
predicate isSanitizer(DataFlow::Node node) { none() }
final override predicate isBarrier(DataFlow::Node node) {
isSanitizer(node) or
this.isSanitizer(node) or
defaultTaintSanitizer(node)
}
/** Holds if taint propagation into `node` is prohibited. */
predicate isSanitizerIn(DataFlow::Node node) { none() }
final override predicate isBarrierIn(DataFlow::Node node) { isSanitizerIn(node) }
final override predicate isBarrierIn(DataFlow::Node node) { this.isSanitizerIn(node) }
/** Holds if taint propagation out of `node` is prohibited. */
predicate isSanitizerOut(DataFlow::Node node) { none() }
final override predicate isBarrierOut(DataFlow::Node node) { isSanitizerOut(node) }
final override predicate isBarrierOut(DataFlow::Node node) { this.isSanitizerOut(node) }
/** Holds if taint propagation through nodes guarded by `guard` is prohibited. */
predicate isSanitizerGuard(DataFlow::BarrierGuard guard) { none() }
final override predicate isBarrierGuard(DataFlow::BarrierGuard guard) { isSanitizerGuard(guard) }
final override predicate isBarrierGuard(DataFlow::BarrierGuard guard) {
this.isSanitizerGuard(guard)
}
/**
* Holds if the additional taint propagation step from `node1` to `node2`
@@ -101,7 +103,7 @@ abstract class Configuration extends DataFlow::Configuration {
predicate isAdditionalTaintStep(DataFlow::Node node1, DataFlow::Node node2) { none() }
final override predicate isAdditionalFlowStep(DataFlow::Node node1, DataFlow::Node node2) {
isAdditionalTaintStep(node1, node2) or
this.isAdditionalTaintStep(node1, node2) or
defaultAdditionalTaintStep(node1, node2)
}

View File

@@ -75,24 +75,26 @@ abstract class Configuration extends DataFlow::Configuration {
predicate isSanitizer(DataFlow::Node node) { none() }
final override predicate isBarrier(DataFlow::Node node) {
isSanitizer(node) or
this.isSanitizer(node) or
defaultTaintSanitizer(node)
}
/** Holds if taint propagation into `node` is prohibited. */
predicate isSanitizerIn(DataFlow::Node node) { none() }
final override predicate isBarrierIn(DataFlow::Node node) { isSanitizerIn(node) }
final override predicate isBarrierIn(DataFlow::Node node) { this.isSanitizerIn(node) }
/** Holds if taint propagation out of `node` is prohibited. */
predicate isSanitizerOut(DataFlow::Node node) { none() }
final override predicate isBarrierOut(DataFlow::Node node) { isSanitizerOut(node) }
final override predicate isBarrierOut(DataFlow::Node node) { this.isSanitizerOut(node) }
/** Holds if taint propagation through nodes guarded by `guard` is prohibited. */
predicate isSanitizerGuard(DataFlow::BarrierGuard guard) { none() }
final override predicate isBarrierGuard(DataFlow::BarrierGuard guard) { isSanitizerGuard(guard) }
final override predicate isBarrierGuard(DataFlow::BarrierGuard guard) {
this.isSanitizerGuard(guard)
}
/**
* Holds if the additional taint propagation step from `node1` to `node2`
@@ -101,7 +103,7 @@ abstract class Configuration extends DataFlow::Configuration {
predicate isAdditionalTaintStep(DataFlow::Node node1, DataFlow::Node node2) { none() }
final override predicate isAdditionalFlowStep(DataFlow::Node node1, DataFlow::Node node2) {
isAdditionalTaintStep(node1, node2) or
this.isAdditionalTaintStep(node1, node2) or
defaultAdditionalTaintStep(node1, node2)
}

View File

@@ -75,24 +75,26 @@ abstract class Configuration extends DataFlow::Configuration {
predicate isSanitizer(DataFlow::Node node) { none() }
final override predicate isBarrier(DataFlow::Node node) {
isSanitizer(node) or
this.isSanitizer(node) or
defaultTaintSanitizer(node)
}
/** Holds if taint propagation into `node` is prohibited. */
predicate isSanitizerIn(DataFlow::Node node) { none() }
final override predicate isBarrierIn(DataFlow::Node node) { isSanitizerIn(node) }
final override predicate isBarrierIn(DataFlow::Node node) { this.isSanitizerIn(node) }
/** Holds if taint propagation out of `node` is prohibited. */
predicate isSanitizerOut(DataFlow::Node node) { none() }
final override predicate isBarrierOut(DataFlow::Node node) { isSanitizerOut(node) }
final override predicate isBarrierOut(DataFlow::Node node) { this.isSanitizerOut(node) }
/** Holds if taint propagation through nodes guarded by `guard` is prohibited. */
predicate isSanitizerGuard(DataFlow::BarrierGuard guard) { none() }
final override predicate isBarrierGuard(DataFlow::BarrierGuard guard) { isSanitizerGuard(guard) }
final override predicate isBarrierGuard(DataFlow::BarrierGuard guard) {
this.isSanitizerGuard(guard)
}
/**
* Holds if the additional taint propagation step from `node1` to `node2`
@@ -101,7 +103,7 @@ abstract class Configuration extends DataFlow::Configuration {
predicate isAdditionalTaintStep(DataFlow::Node node1, DataFlow::Node node2) { none() }
final override predicate isAdditionalFlowStep(DataFlow::Node node1, DataFlow::Node node2) {
isAdditionalTaintStep(node1, node2) or
this.isAdditionalTaintStep(node1, node2) or
defaultAdditionalTaintStep(node1, node2)
}

View File

@@ -75,24 +75,26 @@ abstract class Configuration extends DataFlow::Configuration {
predicate isSanitizer(DataFlow::Node node) { none() }
final override predicate isBarrier(DataFlow::Node node) {
isSanitizer(node) or
this.isSanitizer(node) or
defaultTaintSanitizer(node)
}
/** Holds if taint propagation into `node` is prohibited. */
predicate isSanitizerIn(DataFlow::Node node) { none() }
final override predicate isBarrierIn(DataFlow::Node node) { isSanitizerIn(node) }
final override predicate isBarrierIn(DataFlow::Node node) { this.isSanitizerIn(node) }
/** Holds if taint propagation out of `node` is prohibited. */
predicate isSanitizerOut(DataFlow::Node node) { none() }
final override predicate isBarrierOut(DataFlow::Node node) { isSanitizerOut(node) }
final override predicate isBarrierOut(DataFlow::Node node) { this.isSanitizerOut(node) }
/** Holds if taint propagation through nodes guarded by `guard` is prohibited. */
predicate isSanitizerGuard(DataFlow::BarrierGuard guard) { none() }
final override predicate isBarrierGuard(DataFlow::BarrierGuard guard) { isSanitizerGuard(guard) }
final override predicate isBarrierGuard(DataFlow::BarrierGuard guard) {
this.isSanitizerGuard(guard)
}
/**
* Holds if the additional taint propagation step from `node1` to `node2`
@@ -101,7 +103,7 @@ abstract class Configuration extends DataFlow::Configuration {
predicate isAdditionalTaintStep(DataFlow::Node node1, DataFlow::Node node2) { none() }
final override predicate isAdditionalFlowStep(DataFlow::Node node1, DataFlow::Node node2) {
isAdditionalTaintStep(node1, node2) or
this.isAdditionalTaintStep(node1, node2) or
defaultAdditionalTaintStep(node1, node2)
}

View File

@@ -384,7 +384,7 @@ abstract class TaintSource extends @py_flow_node {
* The location spans column `startcolumn` of line `startline` to
* column `endcolumn` of line `endline` in file `filepath`.
* For more information, see
* [Locations](https://help.semmle.com/QL/learn-ql/ql/locations.html).
* [Locations](https://codeql.github.com/docs/writing-codeql-queries/providing-locations-in-codeql-queries/).
*/
predicate hasLocationInfo(
string filepath, int startline, int startcolumn, int endline, int endcolumn
@@ -498,7 +498,7 @@ abstract class TaintSink extends @py_flow_node {
* The location spans column `startcolumn` of line `startline` to
* column `endcolumn` of line `endline` in file `filepath`.
* For more information, see
* [Locations](https://help.semmle.com/QL/learn-ql/ql/locations.html).
* [Locations](https://codeql.github.com/docs/writing-codeql-queries/providing-locations-in-codeql-queries/).
*/
predicate hasLocationInfo(
string filepath, int startline, int startcolumn, int endline, int endcolumn

View File

@@ -225,9 +225,9 @@ class ModuleVariable extends SsaSourceVariable {
}
override ControlFlowNode getAnImplicitUse() {
result = global_variable_callnode()
result = this.global_variable_callnode()
or
result = global_variable_import()
result = this.global_variable_import()
or
exists(ImportTimeScope scope | scope.entryEdge(result, _) |
this = scope.getOuterVariable(_) or

View File

@@ -41,7 +41,7 @@ class EssaVariable extends TEssaDefinition {
*/
ControlFlowNode getASourceUse() {
exists(SsaSourceVariable var |
result = use_for_var(var) and
result = this.use_for_var(var) and
result = var.getASourceUse()
)
}
@@ -258,7 +258,7 @@ class PhiFunction extends EssaDefinition, TPhiFunction {
/** Gets another definition of the same source variable that reaches this definition. */
private EssaDefinition reachingDefinition(BasicBlock pred) {
result.getScope() = this.getScope() and
result.getSourceVariable() = pred_var(pred) and
result.getSourceVariable() = this.pred_var(pred) and
result.reachesEndOfBlock(pred)
}

View File

@@ -424,7 +424,7 @@ module AiohttpWebModel {
override string getAttributeName() { none() }
override string getMethodName() { result in ["read_nowait"] }
override string getMethodName() { result = "read_nowait" }
override string getAsyncMethodName() {
result in [

View File

@@ -116,7 +116,7 @@ private module CryptodomeModel {
] and
this =
API::moduleImport(["Crypto", "Cryptodome"])
.getMember(["Cipher"])
.getMember("Cipher")
.getMember(cipherName)
.getMember("new")
.getReturn()
@@ -135,21 +135,21 @@ private module CryptodomeModel {
or
// for the following methods, method signatures can be found in
// https://pycryptodome.readthedocs.io/en/latest/src/cipher/modern.html
methodName in ["update"] and
methodName = "update" and
result in [this.getArg(0), this.getArgByName("data")]
or
// although `mac_tag` is used as the parameter name in the spec above, some implementations use `received_mac_tag`, for an example, see
// https://github.com/Legrandin/pycryptodome/blob/5dace638b70ac35bb5d9b565f3e75f7869c9d851/lib/Crypto/Cipher/ChaCha20_Poly1305.py#L207
methodName in ["verify"] and
methodName = "verify" and
result in [this.getArg(0), this.getArgByName(["mac_tag", "received_mac_tag"])]
or
methodName in ["hexverify"] and
methodName = "hexverify" and
result in [this.getArg(0), this.getArgByName("mac_tag_hex")]
or
methodName in ["encrypt_and_digest"] and
methodName = "encrypt_and_digest" and
result in [this.getArg(0), this.getArgByName("plaintext")]
or
methodName in ["decrypt_and_verify"] and
methodName = "decrypt_and_verify" and
result in [
this.getArg(0), this.getArgByName("ciphertext"), this.getArg(1),
this.getArgByName("mac_tag")
@@ -169,7 +169,7 @@ private module CryptodomeModel {
methodName in ["sign", "verify"] and
this =
API::moduleImport(["Crypto", "Cryptodome"])
.getMember(["Signature"])
.getMember("Signature")
.getMember(signatureName)
.getMember("new")
.getReturn()
@@ -185,11 +185,11 @@ private module CryptodomeModel {
methodName = "sign" and
result in [this.getArg(0), this.getArgByName("msg_hash")] // Cryptodome.Hash instance
or
methodName in ["verify"] and
methodName = "verify" and
(
result in [this.getArg(0), this.getArgByName(["msg_hash"])] // Cryptodome.Hash instance
result in [this.getArg(0), this.getArgByName("msg_hash")] // Cryptodome.Hash instance
or
result in [this.getArg(1), this.getArgByName(["signature"])]
result in [this.getArg(1), this.getArgByName("signature")]
)
}
}
@@ -204,7 +204,7 @@ private module CryptodomeModel {
CryptodomeGenericHashOperation() {
exists(API::Node hashModule |
hashModule =
API::moduleImport(["Crypto", "Cryptodome"]).getMember(["Hash"]).getMember(hashName)
API::moduleImport(["Crypto", "Cryptodome"]).getMember("Hash").getMember(hashName)
|
this = hashModule.getMember("new").getACall()
or

View File

@@ -1,5 +1,5 @@
/**
* Provides classes modeling security-relevant aspects of the 'dill' package.
* Provides classes modeling security-relevant aspects of the `dill` PyPI package.
* See https://pypi.org/project/dill/.
*/
@@ -10,18 +10,41 @@ private import semmle.python.Concepts
private import semmle.python.ApiGraphs
/**
* A call to `dill.loads`
* See https://pypi.org/project/dill/ (which currently refers you
* to https://docs.python.org/3/library/pickle.html#pickle.loads)
* Provides models for the `dill` PyPI package.
* See https://pypi.org/project/dill/.
*/
private class DillLoadsCall extends Decoding::Range, DataFlow::CallCfgNode {
DillLoadsCall() { this = API::moduleImport("dill").getMember("loads").getACall() }
private module Dill {
/**
* A call to `dill.load`
* See https://pypi.org/project/dill/ (which currently refers you
* to https://docs.python.org/3/library/pickle.html#pickle.load)
*/
private class DillLoadCall extends Decoding::Range, DataFlow::CallCfgNode {
DillLoadCall() { this = API::moduleImport("dill").getMember("load").getACall() }
override predicate mayExecuteInput() { any() }
override predicate mayExecuteInput() { any() }
override DataFlow::Node getAnInput() { result = this.getArg(0) }
override DataFlow::Node getAnInput() { result in [this.getArg(0), this.getArgByName("file")] }
override DataFlow::Node getOutput() { result = this }
override DataFlow::Node getOutput() { result = this }
override string getFormat() { result = "dill" }
override string getFormat() { result = "dill" }
}
/**
* A call to `dill.loads`
* See https://pypi.org/project/dill/ (which currently refers you
* to https://docs.python.org/3/library/pickle.html#pickle.loads)
*/
private class DillLoadsCall extends Decoding::Range, DataFlow::CallCfgNode {
DillLoadsCall() { this = API::moduleImport("dill").getMember("loads").getACall() }
override predicate mayExecuteInput() { any() }
override DataFlow::Node getAnInput() { result in [this.getArg(0), this.getArgByName("str")] }
override DataFlow::Node getOutput() { result = this }
override string getFormat() { result = "dill" }
}
}

View File

@@ -1844,11 +1844,13 @@ private module PrivateDjango {
t.start() and
result.asCfgNode().(CallNode).getFunction() = this.asViewRef().asCfgNode()
or
exists(DataFlow::TypeTracker t2 | result = asViewResult(t2).track(t2, t))
exists(DataFlow::TypeTracker t2 | result = this.asViewResult(t2).track(t2, t))
}
/** Gets a reference to the result of calling the `as_view` classmethod of this class. */
DataFlow::Node asViewResult() { asViewResult(DataFlow::TypeTracker::end()).flowsTo(result) }
DataFlow::Node asViewResult() {
this.asViewResult(DataFlow::TypeTracker::end()).flowsTo(result)
}
}
/** A class that we consider a django View class. */
@@ -1944,10 +1946,10 @@ private module PrivateDjango {
abstract DataFlow::Node getViewArg();
final override DjangoRouteHandler getARequestHandler() {
poorMansFunctionTracker(result) = getViewArg()
poorMansFunctionTracker(result) = this.getViewArg()
or
exists(DjangoViewClass vc |
getViewArg() = vc.asViewResult() and
this.getViewArg() = vc.asViewResult() and
result = vc.getARequestHandler()
)
}

View File

@@ -292,12 +292,12 @@ module Flask {
override Function getARequestHandler() {
exists(DataFlow::LocalSourceNode func_src |
func_src.flowsTo(getViewArg()) and
func_src.flowsTo(this.getViewArg()) and
func_src.asExpr().(CallableExpr) = result.getDefinition()
)
or
exists(FlaskViewClass vc |
getViewArg() = vc.asViewResult().getAUse() and
this.getViewArg() = vc.asViewResult().getAUse() and
result = vc.getARequestHandler()
)
}

View File

@@ -0,0 +1,56 @@
/**
* Provides classes modeling security-relevant aspects of the `Flask-SQLAlchemy` PyPI package
* (imported by `flask_sqlalchemy`).
* See
* - https://pypi.org/project/Flask-SQLAlchemy/
* - https://flask-sqlalchemy.palletsprojects.com/en/2.x/
*/
private import python
private import semmle.python.dataflow.new.DataFlow
private import semmle.python.dataflow.new.TaintTracking
private import semmle.python.ApiGraphs
private import semmle.python.Concepts
private import semmle.python.frameworks.SqlAlchemy
/**
* INTERNAL: Do not use.
*
* Provides models for the `Flask-SQLAlchemy` PyPI package (imported by `flask_sqlalchemy`).
* See
* - https://pypi.org/project/Flask-SQLAlchemy/
* - https://flask-sqlalchemy.palletsprojects.com/en/2.x/
*/
private module FlaskSqlAlchemy {
/** Gets an instance of `flask_sqlalchemy.SQLAlchemy` */
private API::Node dbInstance() {
result = API::moduleImport("flask_sqlalchemy").getMember("SQLAlchemy").getReturn()
}
/** A call to the `text` method on a DB. */
private class DbTextCall extends SqlAlchemy::TextClause::TextClauseConstruction {
DbTextCall() { this = dbInstance().getMember("text").getACall() }
}
/** Access on a DB resulting in an Engine */
private class DbEngine extends SqlAlchemy::Engine::InstanceSource {
DbEngine() {
this = dbInstance().getMember("engine").getAUse()
or
this = dbInstance().getMember("get_engine").getACall()
}
}
/** Access on a DB resulting in a Session */
private class DbSession extends SqlAlchemy::Session::InstanceSource {
DbSession() {
this = dbInstance().getMember("session").getAUse()
or
this = dbInstance().getMember("create_session").getReturn().getACall()
or
this = dbInstance().getMember("create_session").getReturn().getMember("begin").getACall()
or
this = dbInstance().getMember("create_scoped_session").getACall()
}
}
}

View File

@@ -0,0 +1,344 @@
/**
* Provides classes modeling security-relevant aspects of the `SQLAlchemy` PyPI package.
* See
* - https://pypi.org/project/SQLAlchemy/
* - https://docs.sqlalchemy.org/en/14/index.html
*/
private import python
private import semmle.python.dataflow.new.DataFlow
private import semmle.python.dataflow.new.TaintTracking
private import semmle.python.ApiGraphs
private import semmle.python.Concepts
// This import is done like this to avoid importing the deprecated top-level things that
// would pollute the namespace
private import semmle.python.frameworks.PEP249::PEP249 as PEP249
/**
* INTERNAL: Do not use.
*
* Provides models for the `SQLAlchemy` PyPI package.
* See
* - https://pypi.org/project/SQLAlchemy/
* - https://docs.sqlalchemy.org/en/14/index.html
*/
module SqlAlchemy {
/**
* Provides models for the `sqlalchemy.engine.Engine` and `sqlalchemy.future.Engine` classes.
*
* These are so similar that we model both in the same way.
*
* See
* - https://docs.sqlalchemy.org/en/14/core/connections.html#sqlalchemy.engine.Engine
* - https://docs.sqlalchemy.org/en/14/core/future.html#sqlalchemy.future.Engine
*/
module Engine {
/** Gets a reference to a SQLAlchemy Engine class. */
private API::Node classRef() {
result = API::moduleImport("sqlalchemy").getMember("engine").getMember("Engine")
or
result = API::moduleImport("sqlalchemy").getMember("future").getMember("Engine")
}
/**
* A source of instances of a SQLAlchemy Engine, extend this class to model new instances.
*
* This can include instantiations of the class, return values from function
* calls, or a special parameter that will be set when functions are called by an external
* library.
*
* Use the predicate `Engine::instance()` to get references to instances of a SQLAlchemy Engine.
*/
abstract class InstanceSource extends DataFlow::LocalSourceNode { }
private class EngineConstruction extends InstanceSource, DataFlow::CallCfgNode {
EngineConstruction() {
this = classRef().getACall()
or
this = API::moduleImport("sqlalchemy").getMember("create_engine").getACall()
or
this =
API::moduleImport("sqlalchemy").getMember("future").getMember("create_engine").getACall()
or
this.(DataFlow::MethodCallNode).calls(instance(), "execution_options")
}
}
/** Gets a reference to an instance of a SQLAlchemy Engine. */
private DataFlow::TypeTrackingNode instance(DataFlow::TypeTracker t) {
t.start() and
result instanceof InstanceSource
or
exists(DataFlow::TypeTracker t2 | result = instance(t2).track(t2, t))
}
/** Gets a reference to an instance of a SQLAlchemy Engine. */
DataFlow::Node instance() { instance(DataFlow::TypeTracker::end()).flowsTo(result) }
}
/**
* Provides models for the `sqlalchemy.engine.base.Connection` and `sqlalchemy.future.Connection` classes.
*
* These are so similar that we model both in the same way.
*
* See
* - https://docs.sqlalchemy.org/en/14/core/connections.html#sqlalchemy.engine.Connection
* - https://docs.sqlalchemy.org/en/14/core/future.html#sqlalchemy.future.Connection
*/
module Connection {
/** Gets a reference to a SQLAlchemy Connection class. */
private API::Node classRef() {
result =
API::moduleImport("sqlalchemy")
.getMember("engine")
.getMember("base")
.getMember("Connection")
or
result = API::moduleImport("sqlalchemy").getMember("future").getMember("Connection")
}
/**
* A source of instances of a SQLAlchemy Connection, extend this class to model new instances.
*
* This can include instantiations of the class, return values from function
* calls, or a special parameter that will be set when functions are called by an external
* library.
*
* Use the predicate `Connection::instance()` to get references to instances of a SQLAlchemy Connection.
*/
abstract class InstanceSource extends DataFlow::LocalSourceNode { }
private class ConnectionConstruction extends InstanceSource, DataFlow::CallCfgNode {
ConnectionConstruction() {
this = classRef().getACall()
or
this.(DataFlow::MethodCallNode).calls(Engine::instance(), ["begin", "connect"])
or
this.(DataFlow::MethodCallNode).calls(instance(), "connect")
or
this.(DataFlow::MethodCallNode).calls(instance(), "execution_options")
}
}
/** Gets a reference to an instance of a SQLAlchemy Connection. */
private DataFlow::TypeTrackingNode instance(DataFlow::TypeTracker t) {
t.start() and
result instanceof InstanceSource
or
exists(DataFlow::TypeTracker t2 | result = instance(t2).track(t2, t))
}
/** Gets a reference to an instance of a SQLAlchemy Connection. */
DataFlow::Node instance() { instance(DataFlow::TypeTracker::end()).flowsTo(result) }
}
/**
* Provides models for the underlying DB-API Connection of a SQLAlchemy Connection.
*
* See https://docs.sqlalchemy.org/en/14/core/connections.html#dbapi-connections.
*/
module DBAPIConnection {
/**
* A source of instances of DB-API Connections, extend this class to model new instances.
*
* This can include instantiations of the class, return values from function
* calls, or a special parameter that will be set when functions are called by an external
* library.
*
* Use the predicate `DBAPIConnection::instance()` to get references to instances of DB-API Connections.
*/
abstract class InstanceSource extends DataFlow::LocalSourceNode { }
private class DBAPIConnectionSources extends InstanceSource, PEP249::Connection::InstanceSource {
DBAPIConnectionSources() {
this.(DataFlow::MethodCallNode).calls(Engine::instance(), "raw_connection")
or
this.(DataFlow::AttrRead).accesses(Connection::instance(), "connection")
}
}
/** Gets a reference to an instance of DB-API Connections. */
private DataFlow::TypeTrackingNode instance(DataFlow::TypeTracker t) {
t.start() and
result instanceof InstanceSource
or
exists(DataFlow::TypeTracker t2 | result = instance(t2).track(t2, t))
}
/** Gets a reference to an instance of DB-API Connections. */
DataFlow::Node instance() { instance(DataFlow::TypeTracker::end()).flowsTo(result) }
}
/**
* Provides models for the `sqlalchemy.orm.Session` class
*
* See
* - https://docs.sqlalchemy.org/en/14/orm/session_api.html#sqlalchemy.orm.Session
* - https://docs.sqlalchemy.org/en/14/orm/session_basics.html
*/
module Session {
/** Gets a reference to the `sqlalchemy.orm.Session` class. */
private API::Node classRef() {
result = API::moduleImport("sqlalchemy").getMember("orm").getMember("Session")
}
/**
* A source of instances of `sqlalchemy.orm.Session`, extend this class to model new instances.
*
* This can include instantiations of the class, return values from function
* calls, or a special parameter that will be set when functions are called by an external
* library.
*
* Use the predicate `Session::instance()` to get references to instances of `sqlalchemy.orm.Session`.
*/
abstract class InstanceSource extends DataFlow::LocalSourceNode { }
private class SessionConstruction extends InstanceSource, DataFlow::CallCfgNode {
SessionConstruction() {
this = classRef().getACall()
or
this =
API::moduleImport("sqlalchemy")
.getMember("orm")
.getMember("sessionmaker")
.getReturn()
.getACall()
or
this =
API::moduleImport("sqlalchemy")
.getMember("orm")
.getMember("sessionmaker")
.getReturn()
.getMember("begin")
.getACall()
}
}
/** Gets a reference to an instance of `sqlalchemy.orm.Session`. */
private DataFlow::TypeTrackingNode instance(DataFlow::TypeTracker t) {
t.start() and
result instanceof InstanceSource
or
exists(DataFlow::TypeTracker t2 | result = instance(t2).track(t2, t))
}
/** Gets a reference to an instance of `sqlalchemy.orm.Session`. */
DataFlow::Node instance() { instance(DataFlow::TypeTracker::end()).flowsTo(result) }
}
/**
* A call to `execute` on a SQLAlchemy Engine, Connection, or Session.
* See
* - https://docs.sqlalchemy.org/en/14/core/connections.html#sqlalchemy.engine.Engine.execute
* - https://docs.sqlalchemy.org/en/14/core/connections.html#sqlalchemy.engine.Connection.execute
* - https://docs.sqlalchemy.org/en/14/core/future.html#sqlalchemy.future.Connection.execute
* - https://docs.sqlalchemy.org/en/14/orm/session_api.html#sqlalchemy.orm.Session.execute
*/
private class SqlAlchemyExecuteCall extends DataFlow::MethodCallNode, SqlExecution::Range {
SqlAlchemyExecuteCall() {
this.calls(Engine::instance(), "execute")
or
this.calls(Connection::instance(), "execute")
or
this.calls(Session::instance(), "execute")
}
override DataFlow::Node getSql() { result in [this.getArg(0), this.getArgByName("statement")] }
}
/**
* A call to `exec_driver_sql` on a SQLAlchemy Connection.
* See
* - https://docs.sqlalchemy.org/en/14/core/connections.html#sqlalchemy.engine.Connection.exec_driver_sql
* - https://docs.sqlalchemy.org/en/14/core/future.html#sqlalchemy.future.Connection.exec_driver_sql
*/
private class SqlAlchemyExecDriverSqlCall extends DataFlow::MethodCallNode, SqlExecution::Range {
SqlAlchemyExecDriverSqlCall() { this.calls(Connection::instance(), "exec_driver_sql") }
override DataFlow::Node getSql() { result in [this.getArg(0), this.getArgByName("statement")] }
}
/**
* A call to `scalar` on a SQLAlchemy Engine, Connection, or Session.
* See
* - https://docs.sqlalchemy.org/en/14/core/connections.html#sqlalchemy.engine.Engine.scalar
* - https://docs.sqlalchemy.org/en/14/core/connections.html#sqlalchemy.engine.Connection.scalar
* - https://docs.sqlalchemy.org/en/14/core/future.html#sqlalchemy.future.Connection.scalar
* - https://docs.sqlalchemy.org/en/14/orm/session_api.html#sqlalchemy.orm.Session.scalar
*/
private class SqlAlchemyScalarCall extends DataFlow::MethodCallNode, SqlExecution::Range {
SqlAlchemyScalarCall() {
this.calls(Engine::instance(), "scalar")
or
this.calls(Connection::instance(), "scalar")
or
this.calls(Session::instance(), "scalar")
}
override DataFlow::Node getSql() {
result in [this.getArg(0), this.getArgByName("statement"), this.getArgByName("object_")]
}
}
/**
* Provides models for the `sqlalchemy.sql.expression.TextClause` class,
* which represents a textual SQL string directly.
*
* ```py
* session.query(For14).filter_by(description=sqlalchemy.text(f"'{user_input}'")).all()
* ```
*
* Initially I wanted to add lots of additional taint steps for such that the normal
* SQL injection query would be able to find cases as the one above where an ORM query
* includes a TextClause that includes user-input directly... But that presented 2
* problems:
*
* - which part of the query construction above should be marked as SQL to fit our
* `SqlExecution` concept. Nothing really fits this well, since all the SQL
* execution happens under the hood.
* - This would require a LOT of modeling for these additional taint steps, since
* there are many many constructs we would need to have models for. (see the 2
* examples below)
*
* So instead we extended the SQL injection query to include TextClause construction
* as a sink. And so we don't highlight any parts of an ORM constructed query such as
* these as containing SQL, and don't need the additional taint steps either.
*
* See
* - https://docs.sqlalchemy.org/en/14/core/sqlelement.html#sqlalchemy.sql.expression.TextClause.
* - https://docs.sqlalchemy.org/en/14/core/sqlelement.html#sqlalchemy.sql.expression.text
*/
module TextClause {
/**
* A construction of a `sqlalchemy.sql.expression.TextClause`, which represents a
* textual SQL string directly.
*/
abstract class TextClauseConstruction extends DataFlow::CallCfgNode {
/** Gets the argument that specifies the SQL text. */
DataFlow::Node getTextArg() { result in [this.getArg(0), this.getArgByName("text")] }
}
/** `TextClause` constructions from the `sqlalchemy` package. */
private class DefaultTextClauseConstruction extends TextClauseConstruction {
DefaultTextClauseConstruction() {
this = API::moduleImport("sqlalchemy").getMember("text").getACall()
or
this = API::moduleImport("sqlalchemy").getMember("sql").getMember("text").getACall()
or
this =
API::moduleImport("sqlalchemy")
.getMember("sql")
.getMember("expression")
.getMember("text")
.getACall()
or
this =
API::moduleImport("sqlalchemy")
.getMember("sql")
.getMember("expression")
.getMember("TextClause")
.getACall()
}
}
}
}

View File

@@ -195,6 +195,101 @@ private module StdlibPrivate {
}
}
/**
* The `os.path` module offers a number of methods for checking if a file exists and/or has certain
* properties, leading to a file system access.
* A call to `os.path.exists` or `os.path.lexists` will check if a file exists on the file system.
* (Although, on some platforms, the check may return `false` due to missing permissions.)
* A call to `os.path.getatime` will raise `OSError` if the file does not exist or is inaccessible.
* See:
* - https://docs.python.org/3/library/os.path.html#os.path.exists
* - https://docs.python.org/3/library/os.path.html#os.path.lexists
* - https://docs.python.org/3/library/os.path.html#os.path.isfile
* - https://docs.python.org/3/library/os.path.html#os.path.isdir
* - https://docs.python.org/3/library/os.path.html#os.path.islink
* - https://docs.python.org/3/library/os.path.html#os.path.ismount
* - https://docs.python.org/3/library/os.path.html#os.path.getatime
* - https://docs.python.org/3/library/os.path.html#os.path.getmtime
* - https://docs.python.org/3/library/os.path.html#os.path.getctime
* - https://docs.python.org/3/library/os.path.html#os.path.getsize
* - https://docs.python.org/3/library/os.path.html#os.path.realpath
*/
private class OsPathProbingCall extends FileSystemAccess::Range, DataFlow::CallCfgNode {
OsPathProbingCall() {
this =
os::path()
.getMember([
// these check if the file exists
"exists", "lexists", "isfile", "isdir", "islink", "ismount",
// these raise errors if the file does not exist
"getatime", "getmtime", "getctime", "getsize"
])
.getACall()
}
override DataFlow::Node getAPathArgument() {
result in [this.getArg(0), this.getArgByName("path")]
}
}
/** A call to `os.path.samefile` will raise an exception if an `os.stat()` call on either pathname fails. */
private class OsPathSamefileCall extends FileSystemAccess::Range, DataFlow::CallCfgNode {
OsPathSamefileCall() { this = os::path().getMember("samefile").getACall() }
override DataFlow::Node getAPathArgument() {
result in [
this.getArg(0), this.getArgByName("path1"), this.getArg(1), this.getArgByName("path2")
]
}
}
// Functions with non-standard arguments:
// - os.path.join(path, *paths)
// - os.path.relpath(path, start=os.curdir)
// these functions need special treatment when computing `getPathArg`.
//
// Functions that excluded because they can act as sanitizers:
// - os.path.commonpath(paths): takes a sequence
// - os.path.commonprefix(list): takes a list argument
// unless the user control all arguments, we are comparing with a known value.
private string pathComputation() {
result in [
"abspath", "basename", "commonpath", "dirname", "expanduser", "expandvars", "join",
"normcase", "normpath", "realpath", "relpath", "split", "splitdrive", "splitext"
]
}
/**
* The `os.path` module offers a number of methods for computing new paths from existing paths.
* These should all propagate taint.
*/
private class OsPathComputation extends DataFlow::CallCfgNode {
string methodName;
OsPathComputation() {
methodName = pathComputation() and
this = os::path().getMember(methodName).getACall()
}
DataFlow::Node getPathArg() {
result in [this.getArg(0), this.getArgByName("path")]
or
methodName = "join" and result = this.getArg(_)
or
methodName = "relpath" and result in [this.getArg(1), this.getArgByName("start")]
}
}
/** An additional taint step for path computations. */
private class OsPathComputationAdditionalTaintStep extends TaintTracking::AdditionalTaintStep {
override predicate step(DataFlow::Node nodeFrom, DataFlow::Node nodeTo) {
exists(OsPathComputation call |
nodeTo = call and
nodeFrom = call.getPathArg()
)
}
}
/**
* A call to `os.path.normpath`.
* See https://docs.python.org/3/library/os.path.html#os.path.normpath
@@ -205,16 +300,6 @@ private module StdlibPrivate {
DataFlow::Node getPathArg() { result in [this.getArg(0), this.getArgByName("path")] }
}
/** An additional taint step for calls to `os.path.normpath` */
private class OsPathNormpathCallAdditionalTaintStep extends TaintTracking::AdditionalTaintStep {
override predicate step(DataFlow::Node nodeFrom, DataFlow::Node nodeTo) {
exists(OsPathNormpathCall call |
nodeTo = call and
nodeFrom = call.getPathArg()
)
}
}
/**
* A call to `os.path.abspath`.
* See https://docs.python.org/3/library/os.path.html#os.path.abspath
@@ -225,16 +310,6 @@ private module StdlibPrivate {
DataFlow::Node getPathArg() { result in [this.getArg(0), this.getArgByName("path")] }
}
/** An additional taint step for calls to `os.path.abspath` */
private class OsPathAbspathCallAdditionalTaintStep extends TaintTracking::AdditionalTaintStep {
override predicate step(DataFlow::Node nodeFrom, DataFlow::Node nodeTo) {
exists(OsPathAbspathCall call |
nodeTo = call and
nodeFrom = call.getPathArg()
)
}
}
/**
* A call to `os.path.realpath`.
* See https://docs.python.org/3/library/os.path.html#os.path.realpath
@@ -245,16 +320,6 @@ private module StdlibPrivate {
DataFlow::Node getPathArg() { result in [this.getArg(0), this.getArgByName("path")] }
}
/** An additional taint step for calls to `os.path.realpath` */
private class OsPathRealpathCallAdditionalTaintStep extends TaintTracking::AdditionalTaintStep {
override predicate step(DataFlow::Node nodeFrom, DataFlow::Node nodeTo) {
exists(OsPathRealpathCall call |
nodeTo = call and
nodeFrom = call.getPathArg()
)
}
}
/**
* A call to `os.system`.
* See https://docs.python.org/3/library/os.html#os.system
@@ -397,8 +462,8 @@ private module StdlibPrivate {
result = this.get_executable_arg()
or
exists(DataFlow::Node arg_args, boolean shell |
arg_args = get_args_arg() and
shell = get_shell_arg_value()
arg_args = this.get_args_arg() and
shell = this.get_shell_arg_value()
|
// When "executable" argument is set, and "shell" argument is `False`, the
// "args" argument will only be used to set the program name and arguments to
@@ -428,6 +493,22 @@ private module StdlibPrivate {
// ---------------------------------------------------------------------------
// marshal
// ---------------------------------------------------------------------------
/**
* A call to `marshal.load`
* See https://docs.python.org/3/library/marshal.html#marshal.load
*/
private class MarshalLoadCall extends Decoding::Range, DataFlow::CallCfgNode {
MarshalLoadCall() { this = API::moduleImport("marshal").getMember("load").getACall() }
override predicate mayExecuteInput() { any() }
override DataFlow::Node getAnInput() { result = this.getArg(0) }
override DataFlow::Node getOutput() { result = this }
override string getFormat() { result = "marshal" }
}
/**
* A call to `marshal.loads`
* See https://docs.python.org/3/library/marshal.html#marshal.loads
@@ -447,15 +528,23 @@ private module StdlibPrivate {
// ---------------------------------------------------------------------------
// pickle
// ---------------------------------------------------------------------------
/** Gets a reference to the `pickle` module. */
DataFlow::Node pickle() { result = API::moduleImport(["pickle", "cPickle", "_pickle"]).getAUse() }
/** Gets a reference to any of the `pickle` modules. */
API::Node pickle() { result = API::moduleImport(["pickle", "cPickle", "_pickle"]) }
/** Provides models for the `pickle` module. */
module pickle {
/** Gets a reference to the `pickle.loads` function. */
DataFlow::Node loads() {
result = API::moduleImport(["pickle", "cPickle", "_pickle"]).getMember("loads").getAUse()
}
/**
* A call to `pickle.load`
* See https://docs.python.org/3/library/pickle.html#pickle.load
*/
private class PickleLoadCall extends Decoding::Range, DataFlow::CallCfgNode {
PickleLoadCall() { this = pickle().getMember("load").getACall() }
override predicate mayExecuteInput() { any() }
override DataFlow::Node getAnInput() { result in [this.getArg(0), this.getArgByName("file")] }
override DataFlow::Node getOutput() { result = this }
override string getFormat() { result = "pickle" }
}
/**
@@ -463,11 +552,63 @@ private module StdlibPrivate {
* See https://docs.python.org/3/library/pickle.html#pickle.loads
*/
private class PickleLoadsCall extends Decoding::Range, DataFlow::CallCfgNode {
PickleLoadsCall() { this.getFunction() = pickle::loads() }
PickleLoadsCall() { this = pickle().getMember("loads").getACall() }
override predicate mayExecuteInput() { any() }
override DataFlow::Node getAnInput() { result = this.getArg(0) }
override DataFlow::Node getAnInput() { result in [this.getArg(0), this.getArgByName("data")] }
override DataFlow::Node getOutput() { result = this }
override string getFormat() { result = "pickle" }
}
/**
* A construction of a `pickle.Unpickler`
* See https://docs.python.org/3/library/pickle.html#pickle.Unpickler
*/
private class PickleUnpicklerCall extends Decoding::Range, DataFlow::CallCfgNode {
PickleUnpicklerCall() { this = pickle().getMember("Unpickler").getACall() }
override predicate mayExecuteInput() { any() }
override DataFlow::Node getAnInput() { result in [this.getArg(0), this.getArgByName("file")] }
override DataFlow::Node getOutput() { result = this.getAMethodCall("load") }
override string getFormat() { result = "pickle" }
}
// ---------------------------------------------------------------------------
// shelve
// ---------------------------------------------------------------------------
/**
* A call to `shelve.open`
* See https://docs.python.org/3/library/shelve.html#shelve.open
*
* Claiming there is decoding of the input to `shelve.open` is a bit questionable, since
* it's not the filename, but the contents of the file that is decoded.
*
* However, we definitely want to be able to alert if a user is able to control what
* file is used, since that can lead to code execution (even if that file is free of
* path injection).
*
* So right now the best way we have of modeling this seems to be to treat the filename
* argument as being deserialized...
*/
private class ShelveOpenCall extends Decoding::Range, FileSystemAccess::Range,
DataFlow::CallCfgNode {
ShelveOpenCall() { this = API::moduleImport("shelve").getMember("open").getACall() }
override predicate mayExecuteInput() { any() }
override DataFlow::Node getAnInput() {
result in [this.getArg(0), this.getArgByName("filename")]
}
override DataFlow::Node getAPathArgument() {
result in [this.getArg(0), this.getArgByName("filename")]
}
override DataFlow::Node getOutput() { result = this }
@@ -1136,7 +1277,7 @@ private module StdlibPrivate {
/**
* Gets a name of an attribute of a `pathlib.Path` object that is also a `pathlib.Path` object.
*/
private string pathlibPathAttribute() { result in ["parent"] }
private string pathlibPathAttribute() { result = "parent" }
/**
* Gets a name of a method of a `pathlib.Path` object that returns a `pathlib.Path` object.
@@ -1495,6 +1636,119 @@ private module StdlibPrivate {
result = this.getArg(any(int i | i >= msgIndex))
}
}
// ---------------------------------------------------------------------------
// re
// ---------------------------------------------------------------------------
/**
* List of methods in the `re` module immediately executing a regular expression.
*
* See https://docs.python.org/3/library/re.html#module-contents
*/
private class RegexExecutionMethod extends string {
RegexExecutionMethod() {
this in ["match", "fullmatch", "search", "split", "findall", "finditer", "sub", "subn"]
}
/** Gets the index of the argument representing the string to be searched by a regex. */
int getStringArgIndex() {
this in ["match", "fullmatch", "search", "split", "findall", "finditer"] and
result = 1
or
this in ["sub", "subn"] and
result = 2
}
}
/**
* A a call to a method from the `re` module immediately executing a regular expression.
*
* See `RegexExecutionMethods`
*/
private class DirectRegexExecution extends DataFlow::CallCfgNode, RegexExecution::Range {
RegexExecutionMethod method;
DirectRegexExecution() { this = API::moduleImport("re").getMember(method).getACall() }
override DataFlow::Node getRegex() { result in [this.getArg(0), this.getArgByName("pattern")] }
override DataFlow::Node getString() {
result in [this.getArg(method.getStringArgIndex()), this.getArgByName("string")]
}
override string getName() { result = "re." + method }
}
/** Helper module for tracking compiled regexes. */
private module CompiledRegexes {
private DataFlow::TypeTrackingNode compiledRegex(DataFlow::TypeTracker t, DataFlow::Node regex) {
t.start() and
result = API::moduleImport("re").getMember("compile").getACall() and
regex in [
result.(DataFlow::CallCfgNode).getArg(0),
result.(DataFlow::CallCfgNode).getArgByName("pattern")
]
or
exists(DataFlow::TypeTracker t2 | result = compiledRegex(t2, regex).track(t2, t))
}
DataFlow::Node compiledRegex(DataFlow::Node regex) {
compiledRegex(DataFlow::TypeTracker::end(), regex).flowsTo(result)
}
}
private import CompiledRegexes
/**
* A call on compiled regular expression (obtained via `re.compile`) executing a
* regular expression.
*
* Given the following example:
*
* ```py
* pattern = re.compile(input)
* pattern.match(s)
* ```
*
* This class will identify that `re.compile` compiles `input` and afterwards
* executes `re`'s `match`. As a result, `this` will refer to `pattern.match(s)`
* and `this.getRegexNode()` will return the node for `input` (`re.compile`'s first argument).
*
*
* See `RegexExecutionMethods`
*
* See https://docs.python.org/3/library/re.html#regular-expression-objects
*/
private class CompiledRegexExecution extends DataFlow::MethodCallNode, RegexExecution::Range {
DataFlow::Node regexNode;
RegexExecutionMethod method;
CompiledRegexExecution() { this.calls(compiledRegex(regexNode), method) }
override DataFlow::Node getRegex() { result = regexNode }
override DataFlow::Node getString() {
result in [this.getArg(method.getStringArgIndex() - 1), this.getArgByName("string")]
}
override string getName() { result = "re." + method }
}
/**
* A call to 're.escape'.
* See https://docs.python.org/3/library/re.html#re.escape
*/
private class ReEscapeCall extends Escaping::Range, DataFlow::CallCfgNode {
ReEscapeCall() { this = API::moduleImport("re").getMember("escape").getACall() }
override DataFlow::Node getAnInput() {
result in [this.getArg(0), this.getArgByName("pattern")]
}
override DataFlow::Node getOutput() { result = this }
override string getKind() { result = Escaping::getRegexKind() }
}
}
// ---------------------------------------------------------------------------

View File

@@ -318,7 +318,7 @@ private module Tornado {
]
}
override string getMethodName() { result in ["full_url"] }
override string getMethodName() { result = "full_url" }
override string getAsyncMethodName() { none() }
}

View File

@@ -58,7 +58,7 @@ module Werkzeug {
override string getAttributeName() { none() }
override string getMethodName() { result in ["getlist"] }
override string getMethodName() { result = "getlist" }
override string getAsyncMethodName() { none() }
}

View File

@@ -68,7 +68,7 @@ module Yarl {
]
}
override string getMethodName() { result in ["human_repr"] }
override string getMethodName() { result = "human_repr" }
override string getAsyncMethodName() { none() }
}

View File

@@ -79,7 +79,7 @@ class Value extends TObject {
* The location spans column `startcolumn` of line `startline` to
* column `endcolumn` of line `endline` in file `filepath`.
* For more information, see
* [Locations](https://help.semmle.com/QL/learn-ql/ql/locations.html).
* [Locations](https://codeql.github.com/docs/writing-codeql-queries/providing-locations-in-codeql-queries/).
*/
predicate hasLocationInfo(
string filepath, int startline, int startcolumn, int endline, int endcolumn

View File

@@ -300,7 +300,7 @@ module PointsToInternal {
ssa_definition_points_to(var.getDefinition(), context, value, origin)
or
exists(EssaVariable prev |
ssaShortCut+(prev, var) and
ssaShortCut(prev, var) and
variablePointsTo(prev, context, value, origin)
)
}

View File

@@ -773,15 +773,18 @@ abstract class RegexString extends Expr {
* string is empty.
*/
predicate multiples(int start, int end, string lower, string upper) {
this.getChar(start) = "{" and
this.getChar(end - 1) = "}" and
exists(string inner | inner = this.getText().substring(start + 1, end - 1) |
inner.regexpMatch("[0-9]+") and
exists(string text, string match, string inner |
text = this.getText() and
end = start + match.length() and
inner = match.substring(1, match.length() - 1)
|
match = text.regexpFind("\\{[0-9]+\\}", _, start) and
lower = inner and
upper = lower
or
inner.regexpMatch("[0-9]*,[0-9]*") and
exists(int commaIndex | commaIndex = inner.indexOf(",") |
match = text.regexpFind("\\{[0-9]*,[0-9]*\\}", _, start) and
exists(int commaIndex |
commaIndex = inner.indexOf(",") and
lower = inner.prefix(commaIndex) and
upper = inner.suffix(commaIndex + 1)
)

View File

@@ -46,7 +46,7 @@ class CustomPathNode extends TCustomPathNode {
* The location spans column `startcolumn` of line `startline` to
* column `endcolumn` of line `endline` in file `filepath`.
* For more information, see
* [Locations](https://help.semmle.com/QL/learn-ql/ql/locations.html).
* [Locations](https://codeql.github.com/docs/writing-codeql-queries/providing-locations-in-codeql-queries/).
*/
predicate hasLocationInfo(
string filepath, int startline, int startcolumn, int endline, int endcolumn

View File

@@ -60,8 +60,8 @@ module PolynomialReDoS {
RegExpTerm t;
RegexExecutionAsSink() {
exists(CompiledRegexes::RegexExecution re |
re.getRegexNode().asExpr() = t.getRegex() and
exists(RegexExecution re |
re.getRegex().asExpr() = t.getRegex() and
this = re.getString()
) and
t.isRootTerm()
@@ -76,137 +76,3 @@ module PolynomialReDoS {
*/
class StringConstCompareAsSanitizerGuard extends SanitizerGuard, StringConstCompare { }
}
/** Helper module for tracking compiled regexes. */
private module CompiledRegexes {
// TODO: This module should be refactored and merged with the experimental work done on detecting
// regex injections, such that this can be expressed from just using a concept.
/** A configuration for finding uses of compiled regexes. */
class RegexDefinitionConfiguration extends DataFlow2::Configuration {
RegexDefinitionConfiguration() { this = "RegexDefinitionConfiguration" }
override predicate isSource(DataFlow::Node source) { source instanceof RegexDefinitonSource }
override predicate isSink(DataFlow::Node sink) { sink instanceof RegexDefinitionSink }
}
/** A regex compilation. */
class RegexDefinitonSource extends DataFlow::CallCfgNode {
DataFlow::Node regexNode;
RegexDefinitonSource() {
this = API::moduleImport("re").getMember("compile").getACall() and
regexNode in [this.getArg(0), this.getArgByName("pattern")]
}
/** Gets the regex that is being compiled by this node. */
RegExpTerm getRegExp() { result.getRegex() = regexNode.asExpr() and result.isRootTerm() }
/** Gets the data flow node for the regex being compiled by this node. */
DataFlow::Node getRegexNode() { result = regexNode }
}
/** A use of a compiled regex. */
class RegexDefinitionSink extends DataFlow::Node {
RegexExecutionMethod method;
DataFlow::CallCfgNode executingCall;
RegexDefinitionSink() {
exists(DataFlow::AttrRead reMethod |
executingCall.getFunction() = reMethod and
reMethod.getAttributeName() = method and
this = reMethod.getObject()
)
}
/** Gets the method used to execute the regex. */
RegexExecutionMethod getMethod() { result = method }
/** Gets the data flow node for the executing call. */
DataFlow::CallCfgNode getExecutingCall() { result = executingCall }
}
/** A data flow node executing a regex. */
abstract class RegexExecution extends DataFlow::Node {
/** Gets the data flow node for the regex being compiled by this node. */
abstract DataFlow::Node getRegexNode();
/** Gets a dataflow node for the string to be searched or matched against. */
abstract DataFlow::Node getString();
}
private class RegexExecutionMethod extends string {
RegexExecutionMethod() {
this in ["match", "fullmatch", "search", "split", "findall", "finditer", "sub", "subn"]
}
}
/** Gets the index of the argument representing the string to be searched by a regex. */
int stringArg(RegexExecutionMethod method) {
method in ["match", "fullmatch", "search", "split", "findall", "finditer"] and
result = 1
or
method in ["sub", "subn"] and
result = 2
}
/**
* A class to find `re` methods immediately executing an expression.
*
* See `RegexExecutionMethods`
*/
class DirectRegex extends DataFlow::CallCfgNode, RegexExecution {
RegexExecutionMethod method;
DirectRegex() { this = API::moduleImport("re").getMember(method).getACall() }
override DataFlow::Node getRegexNode() {
result in [this.getArg(0), this.getArgByName("pattern")]
}
override DataFlow::Node getString() {
result in [this.getArg(stringArg(method)), this.getArgByName("string")]
}
}
/**
* A class to find `re` methods immediately executing a compiled expression by `re.compile`.
*
* Given the following example:
*
* ```py
* pattern = re.compile(input)
* pattern.match(s)
* ```
*
* This class will identify that `re.compile` compiles `input` and afterwards
* executes `re`'s `match`. As a result, `this` will refer to `pattern.match(s)`
* and `this.getRegexNode()` will return the node for `input` (`re.compile`'s first argument)
*
*
* See `RegexExecutionMethods`
*
* See https://docs.python.org/3/library/re.html#regular-expression-objects
*/
private class CompiledRegex extends DataFlow::CallCfgNode, RegexExecution {
DataFlow::Node regexNode;
RegexExecutionMethod method;
CompiledRegex() {
exists(
RegexDefinitionConfiguration conf, RegexDefinitonSource source, RegexDefinitionSink sink
|
conf.hasFlow(source, sink) and
regexNode = source.getRegexNode() and
method = sink.getMethod() and
this = sink.getExecutingCall()
)
}
override DataFlow::Node getRegexNode() { result = regexNode }
override DataFlow::Node getString() {
result in [this.getArg(stringArg(method) - 1), this.getArgByName("string")]
}
}
}

View File

@@ -59,7 +59,7 @@ module ReflectedXSS {
class HtmlEscapingAsSanitizer extends Sanitizer {
HtmlEscapingAsSanitizer() {
// TODO: For now, since there is not an `isSanitizingStep` member-predicate part of a
// `TaintTracking::Configuration`, we use treat the output is a taint-sanitizer. This
// `TaintTracking::Configuration`, we treat the output as a taint-sanitizer. This
// is slightly imprecise, which you can see in the `m_unsafe + SAFE` test-case in
// python/ql/test/library-tests/frameworks/markupsafe/taint_test.py
//

View File

@@ -9,6 +9,7 @@ private import semmle.python.dataflow.new.DataFlow
private import semmle.python.Concepts
private import semmle.python.dataflow.new.RemoteFlowSources
private import semmle.python.dataflow.new.BarrierGuards
private import semmle.python.frameworks.SqlAlchemy
/**
* Provides default sources, sinks and sanitizers for detecting
@@ -48,6 +49,13 @@ module SqlInjection {
SqlExecutionAsSink() { this = any(SqlExecution e).getSql() }
}
/**
* The text argument of a SQLAlchemy TextClause construction, considered as a flow sink.
*/
class TextArgAsSink extends Sink {
TextArgAsSink() { this = any(SqlAlchemy::TextClause::TextClauseConstruction tcc).getTextArg() }
}
/**
* A comparison with a constant string, considered as a sanitizer-guard.
*/

View File

@@ -0,0 +1,37 @@
/**
* Provides a taint-tracking configuration for detecting regular expression injection
* vulnerabilities.
*
* Note, for performance reasons: only import this file if
* `RegexInjection::Configuration` is needed, otherwise
* `RegexInjectionCustomizations` should be imported instead.
*/
private import python
import semmle.python.dataflow.new.DataFlow
import semmle.python.dataflow.new.TaintTracking
/**
* Provides a taint-tracking configuration for detecting regular expression injection
* vulnerabilities.
*/
module RegexInjection {
import RegexInjectionCustomizations::RegexInjection
/**
* A taint-tracking configuration for detecting "reflected server-side cross-site scripting" vulnerabilities.
*/
class Configuration extends TaintTracking::Configuration {
Configuration() { this = "RegexInjection" }
override predicate isSource(DataFlow::Node source) { source instanceof Source }
override predicate isSink(DataFlow::Node sink) { sink instanceof Sink }
override predicate isSanitizer(DataFlow::Node node) { node instanceof Sanitizer }
override predicate isSanitizerGuard(DataFlow::BarrierGuard guard) {
guard instanceof SanitizerGuard
}
}
}

View File

@@ -0,0 +1,62 @@
/**
* Provides default sources, sinks and sanitizers for detecting
* "regular expression injection"
* vulnerabilities, as well as extension points for adding your own.
*/
private import python
private import semmle.python.Concepts
private import semmle.python.dataflow.new.DataFlow
private import semmle.python.dataflow.new.TaintTracking
private import semmle.python.dataflow.new.RemoteFlowSources
/**
* Provides default sources, sinks and sanitizers for detecting
* "regular expression injection"
* vulnerabilities, as well as extension points for adding your own.
*/
module RegexInjection {
/**
* A data flow source for "regular expression injection" vulnerabilities.
*/
abstract class Source extends DataFlow::Node { }
/**
* A sink for "regular expression injection" vulnerabilities is the execution of a regular expression.
* If you have a custom way to execute regular expressions, you can extend `RegexExecution::Range`.
*/
class Sink extends DataFlow::Node {
RegexExecution regexExecution;
Sink() { this = regexExecution.getRegex() }
/** Gets the call that executes the regular expression marked by this sink. */
RegexExecution getRegexExecution() { result = regexExecution }
}
/**
* A sanitizer for "regular expression injection" vulnerabilities.
*/
abstract class Sanitizer extends DataFlow::Node { }
/**
* A sanitizer guard for "regular expression injection" vulnerabilities.
*/
abstract class SanitizerGuard extends DataFlow::BarrierGuard { }
/**
* A source of remote user input, considered as a flow source.
*/
class RemoteFlowSourceAsSource extends Source, RemoteFlowSource { }
/**
* A regex escaping, considered as a sanitizer.
*/
class RegexEscapingAsSanitizer extends Sanitizer {
RegexEscapingAsSanitizer() {
// Due to use-use flow, we want the output rather than an input
// (so the input can still flow to other sinks).
this = any(RegexEscaping esc).getOutput()
}
}
}

View File

@@ -139,8 +139,6 @@ class RegExpRoot extends RegExpTerm {
predicate isRelevant() {
// there is at least one repetition
getRoot(any(InfiniteRepetitionQuantifier q)) = this and
// there are no lookbehinds
not exists(RegExpLookbehind lbh | getRoot(lbh) = this) and
// is actually used as a RegExp
isUsedAsRegExp() and
// not excluded for library specific reasons
@@ -479,7 +477,7 @@ private module CharacterClasses {
result = ["0", "9"]
or
cc.getValue() = "s" and
result = [" "]
result = " "
or
cc.getValue() = "w" and
result = ["a", "Z", "_", "0", "9"]
@@ -492,7 +490,7 @@ private module CharacterClasses {
result = "9"
or
cc.getValue() = "s" and
result = [" "]
result = " "
or
cc.getValue() = "w" and
result = "a"

View File

@@ -29,7 +29,7 @@ private predicate pyxl_tag(Call c, string name) {
}
class PyxlHtmlTag extends PyxlTag {
PyxlHtmlTag() { this.getPyxlTagName().prefix(2) = "x_" }
PyxlHtmlTag() { this.getPyxlTagName().matches("x\\_%") }
string getTagName() { result = this.getPyxlTagName().suffix(2) }

View File

@@ -69,7 +69,7 @@ class Object extends @py_object {
* The location spans column `startcolumn` of line `startline` to
* column `endcolumn` of line `endline` in file `filepath`.
* For more information, see
* [Locations](https://help.semmle.com/QL/learn-ql/ql/locations.html).
* [Locations](https://codeql.github.com/docs/writing-codeql-queries/providing-locations-in-codeql-queries/).
*/
predicate hasLocationInfo(
string filepath, int startline, int startcolumn, int endline, int endcolumn

View File

@@ -33,7 +33,7 @@ class WsgiEnvironment extends TaintKind {
(
text = "QUERY_STRING" or
text = "PATH_INFO" or
text.prefix(5) = "HTTP_"
text.matches("HTTP\\_%")
)
)
}

View File

@@ -24,7 +24,7 @@ class XMLLocatable extends @xmllocatable, TXMLLocatable {
* The location spans column `startcolumn` of line `startline` to
* column `endcolumn` of line `endline` in file `filepath`.
* For more information, see
* [Locations](https://help.semmle.com/QL/learn-ql/ql/locations.html).
* [Locations](https://codeql.github.com/docs/writing-codeql-queries/providing-locations-in-codeql-queries/).
*/
predicate hasLocationInfo(
string filepath, int startline, int startcolumn, int endline, int endcolumn
@@ -108,7 +108,7 @@ class XMLParent extends @xmlparent {
}
/** Gets the text value contained in this XML parent. */
string getTextValue() { result = allCharactersString() }
string getTextValue() { result = this.allCharactersString() }
/** Gets a printable representation of this XML parent. */
string toString() { result = this.getName() }
@@ -119,7 +119,7 @@ class XMLFile extends XMLParent, File {
XMLFile() { xmlEncoding(this, _) }
/** Gets a printable representation of this XML file. */
override string toString() { result = getName() }
override string toString() { result = this.getName() }
/** Gets the name of this XML file. */
override string getName() { result = File.super.getAbsolutePath() }
@@ -129,14 +129,14 @@ class XMLFile extends XMLParent, File {
*
* Gets the path of this XML file.
*/
deprecated string getPath() { result = getAbsolutePath() }
deprecated string getPath() { result = this.getAbsolutePath() }
/**
* DEPRECATED: Use `getParentContainer().getAbsolutePath()` instead.
*
* Gets the path of the folder that contains this XML file.
*/
deprecated string getFolder() { result = getParentContainer().getAbsolutePath() }
deprecated string getFolder() { result = this.getParentContainer().getAbsolutePath() }
/** Gets the encoding of this XML file. */
string getEncoding() { xmlEncoding(this, result) }
@@ -200,7 +200,7 @@ class XMLDTD extends XMLLocatable, @xmldtd {
*/
class XMLElement extends @xmlelement, XMLParent, XMLLocatable {
/** Holds if this XML element has the given `name`. */
predicate hasName(string name) { name = getName() }
predicate hasName(string name) { name = this.getName() }
/** Gets the name of this XML element. */
override string getName() { xmlElements(this, result, _, _, _) }
@@ -239,7 +239,7 @@ class XMLElement extends @xmlelement, XMLParent, XMLLocatable {
string getAttributeValue(string name) { result = this.getAttribute(name).getValue() }
/** Gets a printable representation of this XML element. */
override string toString() { result = getName() }
override string toString() { result = this.getName() }
}
/**

View File

@@ -120,16 +120,11 @@ svnchurn(
Python dbscheme
****************************/
/* fromSource is ignored */
files(unique int id: @file,
varchar(900) name: string ref,
varchar(900) simple: string ref,
varchar(900) ext: string ref,
int fromSource: int ref);
varchar(900) name: string ref);
folders(unique int id: @folder,
varchar(900) name: string ref,
varchar(900) simple: string ref);
varchar(900) name: string ref);
@container = @folder | @file;

View File

@@ -4331,18 +4331,6 @@
<k>name</k>
<v>3066</v>
</e>
<e>
<k>simple</k>
<v>1294</v>
</e>
<e>
<k>ext</k>
<v>1</v>
</e>
<e>
<k>fromSource</k>
<v>1</v>
</e>
</columnsizes>
<dependencies>
<dep>
@@ -4362,54 +4350,6 @@
</val>
</dep>
<dep>
<src>id</src>
<trg>simple</trg>
<val>
<hist>
<budget>12</budget>
<bs>
<b>
<a>1</a>
<b>2</b>
<v>3066</v>
</b>
</bs>
</hist>
</val>
</dep>
<dep>
<src>id</src>
<trg>ext</trg>
<val>
<hist>
<budget>12</budget>
<bs>
<b>
<a>1</a>
<b>2</b>
<v>3066</v>
</b>
</bs>
</hist>
</val>
</dep>
<dep>
<src>id</src>
<trg>fromSource</trg>
<val>
<hist>
<budget>12</budget>
<bs>
<b>
<a>1</a>
<b>2</b>
<v>3066</v>
</b>
</bs>
</hist>
</val>
</dep>
<dep>
<src>name</src>
<trg>id</trg>
<val>
@@ -4425,276 +4365,6 @@
</hist>
</val>
</dep>
<dep>
<src>name</src>
<trg>simple</trg>
<val>
<hist>
<budget>12</budget>
<bs>
<b>
<a>1</a>
<b>2</b>
<v>3066</v>
</b>
</bs>
</hist>
</val>
</dep>
<dep>
<src>name</src>
<trg>ext</trg>
<val>
<hist>
<budget>12</budget>
<bs>
<b>
<a>1</a>
<b>2</b>
<v>3066</v>
</b>
</bs>
</hist>
</val>
</dep>
<dep>
<src>name</src>
<trg>fromSource</trg>
<val>
<hist>
<budget>12</budget>
<bs>
<b>
<a>1</a>
<b>2</b>
<v>3066</v>
</b>
</bs>
</hist>
</val>
</dep>
<dep>
<src>simple</src>
<trg>id</trg>
<val>
<hist>
<budget>12</budget>
<bs>
<b>
<a>1</a>
<b>2</b>
<v>1058</v>
</b>
<b>
<a>2</a>
<b>3</b>
<v>132</v>
</b>
<b>
<a>3</a>
<b>38</b>
<v>98</v>
</b>
<b>
<a>47</a>
<b>646</b>
<v>6</v>
</b>
</bs>
</hist>
</val>
</dep>
<dep>
<src>simple</src>
<trg>name</trg>
<val>
<hist>
<budget>12</budget>
<bs>
<b>
<a>1</a>
<b>2</b>
<v>1058</v>
</b>
<b>
<a>2</a>
<b>3</b>
<v>132</v>
</b>
<b>
<a>3</a>
<b>38</b>
<v>98</v>
</b>
<b>
<a>47</a>
<b>646</b>
<v>6</v>
</b>
</bs>
</hist>
</val>
</dep>
<dep>
<src>simple</src>
<trg>ext</trg>
<val>
<hist>
<budget>12</budget>
<bs>
<b>
<a>1</a>
<b>2</b>
<v>1294</v>
</b>
</bs>
</hist>
</val>
</dep>
<dep>
<src>simple</src>
<trg>fromSource</trg>
<val>
<hist>
<budget>12</budget>
<bs>
<b>
<a>1</a>
<b>2</b>
<v>1294</v>
</b>
</bs>
</hist>
</val>
</dep>
<dep>
<src>ext</src>
<trg>id</trg>
<val>
<hist>
<budget>12</budget>
<bs>
<b>
<a>3066</a>
<b>3067</b>
<v>1</v>
</b>
</bs>
</hist>
</val>
</dep>
<dep>
<src>ext</src>
<trg>name</trg>
<val>
<hist>
<budget>12</budget>
<bs>
<b>
<a>3066</a>
<b>3067</b>
<v>1</v>
</b>
</bs>
</hist>
</val>
</dep>
<dep>
<src>ext</src>
<trg>simple</trg>
<val>
<hist>
<budget>12</budget>
<bs>
<b>
<a>1294</a>
<b>1295</b>
<v>1</v>
</b>
</bs>
</hist>
</val>
</dep>
<dep>
<src>ext</src>
<trg>fromSource</trg>
<val>
<hist>
<budget>12</budget>
<bs>
<b>
<a>1</a>
<b>2</b>
<v>1</v>
</b>
</bs>
</hist>
</val>
</dep>
<dep>
<src>fromSource</src>
<trg>id</trg>
<val>
<hist>
<budget>12</budget>
<bs>
<b>
<a>3066</a>
<b>3067</b>
<v>1</v>
</b>
</bs>
</hist>
</val>
</dep>
<dep>
<src>fromSource</src>
<trg>name</trg>
<val>
<hist>
<budget>12</budget>
<bs>
<b>
<a>3066</a>
<b>3067</b>
<v>1</v>
</b>
</bs>
</hist>
</val>
</dep>
<dep>
<src>fromSource</src>
<trg>simple</trg>
<val>
<hist>
<budget>12</budget>
<bs>
<b>
<a>1294</a>
<b>1295</b>
<v>1</v>
</b>
</bs>
</hist>
</val>
</dep>
<dep>
<src>fromSource</src>
<trg>ext</trg>
<val>
<hist>
<budget>12</budget>
<bs>
<b>
<a>1</a>
<b>2</b>
<v>1</v>
</b>
</bs>
</hist>
</val>
</dep>
</dependencies>
</relation>
<relation>
@@ -4709,10 +4379,6 @@
<k>name</k>
<v>686</v>
</e>
<e>
<k>simple</k>
<v>538</v>
</e>
</columnsizes>
<dependencies>
<dep>
@@ -4732,22 +4398,6 @@
</val>
</dep>
<dep>
<src>id</src>
<trg>simple</trg>
<val>
<hist>
<budget>12</budget>
<bs>
<b>
<a>1</a>
<b>2</b>
<v>686</v>
</b>
</bs>
</hist>
</val>
</dep>
<dep>
<src>name</src>
<trg>id</trg>
<val>
@@ -4763,74 +4413,6 @@
</hist>
</val>
</dep>
<dep>
<src>name</src>
<trg>simple</trg>
<val>
<hist>
<budget>12</budget>
<bs>
<b>
<a>1</a>
<b>2</b>
<v>686</v>
</b>
</bs>
</hist>
</val>
</dep>
<dep>
<src>simple</src>
<trg>id</trg>
<val>
<hist>
<budget>12</budget>
<bs>
<b>
<a>1</a>
<b>2</b>
<v>481</v>
</b>
<b>
<a>2</a>
<b>4</b>
<v>45</v>
</b>
<b>
<a>4</a>
<b>27</b>
<v>12</v>
</b>
</bs>
</hist>
</val>
</dep>
<dep>
<src>simple</src>
<trg>name</trg>
<val>
<hist>
<budget>12</budget>
<bs>
<b>
<a>1</a>
<b>2</b>
<v>481</v>
</b>
<b>
<a>2</a>
<b>4</b>
<v>45</v>
</b>
<b>
<a>4</a>
<b>27</b>
<v>12</v>
</b>
</bs>
</hist>
</val>
</dep>
</dependencies>
</relation>
<relation>

1207
python/ql/lib/tutorial.qll Normal file

File diff suppressed because it is too large Load Diff

View File

@@ -12,88 +12,12 @@
*/
import python
import semmle.python.security.Paths
import semmle.python.functions.ModificationOfParameterWithDefault
import DataFlow::PathGraph
predicate safe_method(string name) {
name = "count" or
name = "index" or
name = "copy" or
name = "get" or
name = "has_key" or
name = "items" or
name = "keys" or
name = "values" or
name = "iteritems" or
name = "iterkeys" or
name = "itervalues" or
name = "__contains__" or
name = "__getitem__" or
name = "__getattribute__"
}
/** Gets the truthiness (non emptyness) of the default of `p` if that value is mutable */
private boolean mutableDefaultValue(Parameter p) {
exists(Dict d | p.getDefault() = d |
exists(d.getAKey()) and result = true
or
not exists(d.getAKey()) and result = false
)
or
exists(List l | p.getDefault() = l |
exists(l.getAnElt()) and result = true
or
not exists(l.getAnElt()) and result = false
)
}
class NonEmptyMutableValue extends TaintKind {
NonEmptyMutableValue() { this = "non-empty mutable value" }
}
class EmptyMutableValue extends TaintKind {
EmptyMutableValue() { this = "empty mutable value" }
override boolean booleanValue() { result = false }
}
class MutableDefaultValue extends TaintSource {
boolean nonEmpty;
MutableDefaultValue() { nonEmpty = mutableDefaultValue(this.(NameNode).getNode()) }
override string toString() { result = "mutable default value" }
override predicate isSourceOf(TaintKind kind) {
nonEmpty = false and kind instanceof EmptyMutableValue
or
nonEmpty = true and kind instanceof NonEmptyMutableValue
}
}
private ClassValue mutable_class() {
result = Value::named("list") or
result = Value::named("dict")
}
class Mutation extends TaintSink {
Mutation() {
exists(AugAssign a | a.getTarget().getAFlowNode() = this)
or
exists(Call c, Attribute a | c.getFunc() = a |
a.getObject().getAFlowNode() = this and
not safe_method(a.getName()) and
this.(ControlFlowNode).pointsTo().getClass() = mutable_class()
)
}
override predicate sinks(TaintKind kind) {
kind instanceof EmptyMutableValue
or
kind instanceof NonEmptyMutableValue
}
}
from TaintedPathSource src, TaintedPathSink sink
where src.flowsTo(sink)
select sink.getSink(), src, sink, "$@ flows to here and is mutated.", src.getSource(),
from
ModificationOfParameterWithDefault::Configuration config, DataFlow::PathNode source,
DataFlow::PathNode sink
where config.hasFlowPath(source, sink)
select sink.getNode(), source, sink, "$@ flows to here and is mutated.", source.getNode(),
"Default value"

View File

@@ -132,12 +132,12 @@ predicate incorrect_special_method_defn(
else
if required < func.minParameters()
then message = "Too many parameters" and show_counts = true
else
if func.minParameters() < required and not func.getScope().hasVarArg()
then
message = (required - func.minParameters()) + " default values(s) will never be used" and
show_counts = false
else none()
else (
func.minParameters() < required and
not func.getScope().hasVarArg() and
message = (required - func.minParameters()) + " default values(s) will never be used" and
show_counts = false
)
)
}

View File

@@ -197,7 +197,7 @@ class CommentedOutCodeBlock extends @py_comment {
* The location spans column `startcolumn` of line `startline` to
* column `endcolumn` of line `endline` in file `filepath`.
* For more information, see
* [Locations](https://help.semmle.com/QL/learn-ql/ql/locations.html).
* [Locations](https://codeql.github.com/docs/writing-codeql-queries/providing-locations-in-codeql-queries/).
*/
predicate hasLocationInfo(
string filepath, int startline, int startcolumn, int endline, int endcolumn

View File

@@ -20,7 +20,7 @@ class RangeFunction extends Function {
* The location spans column `startcolumn` of line `startline` to
* column `endcolumn` of line `endline` in file `filepath`.
* For more information, see
* [Locations](https://help.semmle.com/QL/learn-ql/ql/locations.html).
* [Locations](https://codeql.github.com/docs/writing-codeql-queries/providing-locations-in-codeql-queries/).
*/
predicate hasLocationInfo(
string filepath, int startline, int startcolumn, int endline, int endcolumn
@@ -40,7 +40,7 @@ class RangeClass extends Class {
* The location spans column `startcolumn` of line `startline` to
* column `endcolumn` of line `endline` in file `filepath`.
* For more information, see
* [Locations](https://help.semmle.com/QL/learn-ql/ql/locations.html).
* [Locations](https://codeql.github.com/docs/writing-codeql-queries/providing-locations-in-codeql-queries/).
*/
predicate hasLocationInfo(
string filepath, int startline, int startcolumn, int endline, int endcolumn

View File

@@ -9,6 +9,13 @@ If a database query (such as a SQL or NoSQL query) is built from
user-provided data without sufficient sanitization, a user
may be able to run malicious database queries.
</p>
<p>
This also includes using the <code>TextClause</code> class in the
<code><a href="https://pypi.org/project/SQLAlchemy/">SQLAlchemy</a></code> PyPI package,
which is used to represent a literal SQL fragment and is inserted directly into the
final SQL when used in a query built using the ORM.
</p>
</overview>
<recommendation>
@@ -52,5 +59,6 @@ vulnerable to SQL injection attacks. In this example, if <code>username</code> w
<references>
<li>Wikipedia: <a href="https://en.wikipedia.org/wiki/SQL_injection">SQL injection</a>.</li>
<li>OWASP: <a href="https://cheatsheetseries.owasp.org/cheatsheets/SQL_Injection_Prevention_Cheat_Sheet.html">SQL Injection Prevention Cheat Sheet</a>.</li>
<li><a href="https://docs.sqlalchemy.org/en/14/core/sqlelement.html#sqlalchemy.sql.expression.text.params.text">SQLAlchemy documentation for TextClause</a>.</li>
</references>
</qhelp>

View File

@@ -5,25 +5,24 @@
* exponential time on certain inputs.
* @kind path-problem
* @problem.severity error
* @precision high
* @id py/regex-injection
* @tags security
* external/cwe/cwe-730
* external/cwe/cwe-400
*/
// determine precision above
import python
import experimental.semmle.python.security.injection.RegexInjection
private import semmle.python.Concepts
import semmle.python.security.injection.RegexInjection
import DataFlow::PathGraph
from
RegexInjectionFlowConfig config, DataFlow::PathNode source, DataFlow::PathNode sink,
RegexInjectionSink regexInjectionSink, Attribute methodAttribute
RegexInjection::Configuration config, DataFlow::PathNode source, DataFlow::PathNode sink,
RegexExecution regexExecution
where
config.hasFlowPath(source, sink) and
regexInjectionSink = sink.getNode() and
methodAttribute = regexInjectionSink.getRegexMethod()
regexExecution = sink.getNode().(RegexInjection::Sink).getRegexExecution()
select sink.getNode(), source, sink,
"$@ regular expression is constructed from a $@ and executed by $@.", sink.getNode(), "This",
source.getNode(), "user-provided value", methodAttribute,
regexInjectionSink.getRegexModule() + "." + methodAttribute.getName()
source.getNode(), "user-provided value", regexExecution, regexExecution.getName()

View File

@@ -88,7 +88,7 @@ class CredentialSink extends TaintSink {
CredentialSink() {
exists(string name |
name.regexpMatch(getACredentialRegex()) and
not name.suffix(name.length() - 4) = "file"
not name.matches("%file")
|
any(FunctionValue func).getNamedArgumentForCall(_, name) = this
or

View File

@@ -19,6 +19,7 @@ predicate unused_local(Name unused, LocalVariable v) {
def.getVariable() = v and
def.isUnused() and
not exists(def.getARedef()) and
not exists(annotation_without_assignment(v)) and
def.isRelevant() and
not v = any(Nonlocal n).getAVariable() and
not exists(def.getNode().getParentNode().(FunctionDef).getDefinedFunction().getADecorator()) and
@@ -26,6 +27,17 @@ predicate unused_local(Name unused, LocalVariable v) {
)
}
/**
* Gets any annotation of the local variable `v` that does not also reassign its value.
*
* TODO: This predicate should not be needed. Rather, annotated "assignments" that do not actually
* assign a value should not result in the creation of an SSA variable (which then goes unused).
*/
private AnnAssign annotation_without_assignment(LocalVariable v) {
result.getTarget() = v.getAStore() and
not exists(result.getValue())
}
from Name unused, LocalVariable v
where
unused_local(unused, v) and

View File

@@ -88,7 +88,7 @@ class SuppressionScope extends @py_comment {
* The location spans column `startcolumn` of line `startline` to
* column `endcolumn` of line `endline` in file `filepath`.
* For more information, see
* [Locations](https://help.semmle.com/QL/learn-ql/ql/locations.html).
* [Locations](https://codeql.github.com/docs/writing-codeql-queries/providing-locations-in-codeql-queries/).
*/
predicate hasLocationInfo(
string filepath, int startline, int startcolumn, int endline, int endcolumn

View File

@@ -141,7 +141,7 @@ predicate builtin_object_consistency(string clsname, string problem, string what
or
not exists(o.toString()) and
problem = "no toString" and
not exists(string name | name.prefix(7) = "_semmle" | py_special_objects(o, name)) and
not exists(string name | name.matches("\\_semmle%") | py_special_objects(o, name)) and
not o = unknownValue()
)
}

View File

@@ -477,7 +477,7 @@ class NiceLocationExpr extends @py_expr {
* The location spans column `bc` of line `bl` to
* column `ec` of line `el` in file `f`.
* For more information, see
* [Locations](https://help.semmle.com/QL/learn-ql/ql/locations.html).
* [Locations](https://codeql.github.com/docs/writing-codeql-queries/providing-locations-in-codeql-queries/).
*/
predicate hasLocationInfo(string f, int bl, int bc, int el, int ec) {
/* Attribute location for x.y is that of 'y' so that url does not overlap with that of 'x' */

View File

@@ -0,0 +1,49 @@
<!DOCTYPE qhelp PUBLIC
"-//Semmle//qhelp//EN"
"qhelp.dtd">
<qhelp>
<overview>
<p>If unsanitized user input is written to a log entry, a malicious user may be able to forge new log entries.</p>
<p>Forgery can occur if a user provides some input creating the appearance of multiple
log entries. This can include unescaped new-line characters, or HTML or other markup.</p>
</overview>
<recommendation>
<p>
User input should be suitably sanitized before it is logged.
</p>
<p>
If the log entries are plain text then line breaks should be removed from user input, using for example
<code>replace(old, new)</code> or similar. Care should also be taken that user input is clearly marked
in log entries, and that a malicious user cannot cause confusion in other ways.
</p>
<p>
For log entries that will be displayed in HTML, user input should be HTML encoded before being logged, to prevent forgery and
other forms of HTML injection.
</p>
</recommendation>
<example>
<p>
In the example, the name provided by the user is recorded using the log output function (<code>logging.info</code> or <code>app.logger.info</code>, etc.).
In these four cases, the name provided by the user is not provided The processing is recorded. If a malicious user provides <code>Guest%0D%0AUser name: Admin</code>
as a parameter, the log entry will be divided into two lines, the first line is <code>User name: Guest</code> code>, the second line is <code>User name: Admin</code>.
</p>
<sample src="LogInjectionBad.py" />
<p>
In a good example, the program uses the <code>replace</code> function to provide parameter processing to the user, and replace <code>\r\n</code> and <code>\n</code>
with empty characters. To a certain extent, the occurrence of log injection vulnerabilities is reduced.
</p>
<sample src="LogInjectionGood.py" />
</example>
<references>
<li>OWASP: <a href="https://owasp.org/www-community/attacks/Log_Injection">Log Injection</a>.</li>
</references>
</qhelp>

View File

@@ -0,0 +1,20 @@
/**
* @name Log Injection
* @description Building log entries from user-controlled data is vulnerable to
* insertion of forged log entries by a malicious user.
* @kind path-problem
* @problem.severity error
* @precision high
* @id py/log-injection
* @tags security
* external/cwe/cwe-117
*/
import python
import experimental.semmle.python.security.injection.LogInjection
import DataFlow::PathGraph
from LogInjectionFlowConfig config, DataFlow::PathNode source, DataFlow::PathNode sink
where config.hasFlowPath(source, sink)
select sink.getNode(), source, sink, "$@ flows to log entry.", source.getNode(),
"User-provided value"

View File

@@ -0,0 +1,44 @@
#!/usr/bin/env python
# -*- coding: UTF-8 -*-
"""
@Desc Log Injection
"""
from flask import Flask
from flask import request
from django.utils.log import request_logger
import logging
logging.basicConfig(level=logging.DEBUG)
app = Flask(__name__)
@app.route('/bad1')
def bad1():
name = request.args.get('name')
app.logger.info('User name: ' + name) # Bad
return 'bad1'
@app.route('/bad2')
def bad2():
name = request.args.get('name')
logging.info('User name: ' + name) # Bad
return 'bad2'
@app.route('/bad3')
def bad3():
name = request.args.get('name')
request_logger.warn('User name: ' + name) # Bad
return 'bad3'
@app.route('/bad4')
def bad4():
name = request.args.get('name')
logtest = logging.getLogger('test')
logtest.debug('User name: ' + name) # Bad
return 'bad4'
if __name__ == '__main__':
app.debug = True
handler = logging.FileHandler('log')
app.logger.addHandler(handler)
app.run()

View File

@@ -0,0 +1,25 @@
#!/usr/bin/env python
# -*- coding: UTF-8 -*-
"""
@Desc Log Injection
"""
from flask import Flask
from flask import request
import logging
logging.basicConfig(level=logging.DEBUG)
app = Flask(__name__)
@app.route('/good1')
def good1():
name = request.args.get('name')
name = name.replace('\r\n','').replace('\n','')
logging.info('User name: ' + name) # Good
return 'good1'
if __name__ == '__main__':
app.debug = True
handler = logging.FileHandler('log')
app.logger.addHandler(handler)
app.run()

View File

@@ -0,0 +1,35 @@
#!/usr/bin/env python
# -*- coding: UTF-8 -*-
"""
@Desc ip address spoofing
"""
from flask import Flask
from flask import request
app = Flask(__name__)
@app.route('/bad1')
def bad1():
client_ip = request.headers.get('x-forwarded-for')
if not client_ip.startswith('192.168.'):
raise Exception('ip illegal')
return 'bad1'
@app.route('/bad2')
def bad2():
client_ip = request.headers.get('x-forwarded-for')
if not client_ip == '127.0.0.1':
raise Exception('ip illegal')
return 'bad2'
@app.route('/good1')
def good1():
client_ip = request.headers.get('x-forwarded-for')
client_ip = client_ip.split(',')[client_ip.split(',').length - 1]
if not client_ip == '127.0.0.1':
raise Exception('ip illegal')
return 'good1'
if __name__ == '__main__':
app.debug = True
app.run()

View File

@@ -0,0 +1,35 @@
<!DOCTYPE qhelp PUBLIC
"-//Semmle//qhelp//EN"
"qhelp.dtd">
<qhelp>
<overview>
<p>An original client IP address is retrieved from an http header (<code>X-Forwarded-For</code> or <code>X-Real-IP</code> or <code>Proxy-Client-IP</code>
etc.), which is used to ensure security. Attackers can forge the value of these identifiers to
bypass a ban-list, for example.</p>
</overview>
<recommendation>
<p>Do not trust the values of HTTP headers allegedly identifying the originating IP. If you are aware your application will run behind some reverse proxies then the last entry of a <code>X-Forwarded-For</code> header value may be more trustworthy than the rest of it because some reverse proxies append the IP address they observed to the end of any remote-supplied header.</p>
</recommendation>
<example>
<p>The following examples show the bad case and the good case respectively.
In <code>bad1</code> method and <code>bad2</code> method, the client ip the <code>X-Forwarded-For</code> is split into comma-separated values, but the less-trustworthy first one is used. Both of these examples could be deceived by providing a forged HTTP header. The method
<code>good1</code> similarly splits an <code>X-Forwarded-For</code> value, but uses the last, more-trustworthy entry.</p>
<sample src="ClientSuppliedIpUsedInSecurityCheck.py" />
</example>
<references>
<li>Dennis Schneider: <a href="https://www.dennis-schneider.com/blog/prevent-ip-address-spoofing-with-x-forwarded-for-header-and-aws-elb-in-clojure-ring/">
Prevent IP address spoofing with X-Forwarded-For header when using AWS ELB and Clojure Ring</a>
</li>
<li>Security Rule Zero: <a href="https://www.f5.com/company/blog/security-rule-zero-a-warning-about-x-forwarded-for">A Warning about X-Forwarded-For</a>
</li>
</references>
</qhelp>

View File

@@ -0,0 +1,56 @@
/**
* @name IP address spoofing
* @description A remote endpoint identifier is read from an HTTP header. Attackers can modify the value
* of the identifier to forge the client ip.
* @kind path-problem
* @problem.severity error
* @precision high
* @id py/ip-address-spoofing
* @tags security
* external/cwe/cwe-348
*/
import python
import semmle.python.dataflow.new.DataFlow
import semmle.python.dataflow.new.TaintTracking
import semmle.python.ApiGraphs
import ClientSuppliedIpUsedInSecurityCheckLib
import DataFlow::PathGraph
/**
* Taint-tracking configuration tracing flow from obtaining a client ip from an HTTP header to a sensitive use.
*/
class ClientSuppliedIpUsedInSecurityCheckConfig extends TaintTracking::Configuration {
ClientSuppliedIpUsedInSecurityCheckConfig() { this = "ClientSuppliedIpUsedInSecurityCheckConfig" }
override predicate isSource(DataFlow::Node source) {
source instanceof ClientSuppliedIpUsedInSecurityCheck
}
override predicate isSink(DataFlow::Node sink) { sink instanceof PossibleSecurityCheck }
override predicate isAdditionalTaintStep(DataFlow::Node pred, DataFlow::Node succ) {
exists(DataFlow::CallCfgNode ccn |
ccn = API::moduleImport("netaddr").getMember("IPAddress").getACall() and
ccn.getArg(0) = pred and
ccn = succ
)
}
override predicate isSanitizer(DataFlow::Node node) {
// `client_supplied_ip.split(",")[n]` for `n` > 0
exists(Subscript ss |
not ss.getIndex().(IntegerLiteral).getText() = "0" and
ss.getObject().(Call).getFunc().(Attribute).getName() = "split" and
ss.getObject().(Call).getAnArg().(StrConst).getText() = "," and
ss = node.asExpr()
)
}
}
from
ClientSuppliedIpUsedInSecurityCheckConfig config, DataFlow::PathNode source,
DataFlow::PathNode sink
where config.hasFlowPath(source, sink)
select sink.getNode(), source, sink, "IP address spoofing might include code from $@.",
source.getNode(), "this user input"

View File

@@ -0,0 +1,152 @@
private import python
private import semmle.python.Concepts
private import semmle.python.ApiGraphs
private import semmle.python.dataflow.new.DataFlow
private import semmle.python.dataflow.new.RemoteFlowSources
/**
* A data flow source of the client ip obtained according to the remote endpoint identifier specified
* (`X-Forwarded-For`, `X-Real-IP`, `Proxy-Client-IP`, etc.) in the header.
*
* For example: `request.headers.get("X-Forwarded-For")`.
*/
abstract class ClientSuppliedIpUsedInSecurityCheck extends DataFlow::CallCfgNode { }
private class FlaskClientSuppliedIpUsedInSecurityCheck extends ClientSuppliedIpUsedInSecurityCheck {
FlaskClientSuppliedIpUsedInSecurityCheck() {
exists(RemoteFlowSource rfs, DataFlow::AttrRead get |
rfs.getSourceType() = "flask.request" and this.getFunction() = get
|
// `get` is a call to request.headers.get or request.headers.get_all or request.headers.getlist
// request.headers
get.getObject()
.(DataFlow::AttrRead)
// request
.getObject()
.getALocalSource() = rfs and
get.getAttributeName() in ["get", "get_all", "getlist"] and
get.getObject().(DataFlow::AttrRead).getAttributeName() = "headers" and
this.getArg(0).asExpr().(StrConst).getText().toLowerCase() = clientIpParameterName()
)
}
}
private class DjangoClientSuppliedIpUsedInSecurityCheck extends ClientSuppliedIpUsedInSecurityCheck {
DjangoClientSuppliedIpUsedInSecurityCheck() {
exists(RemoteFlowSource rfs, DataFlow::AttrRead get |
rfs.getSourceType() = "django.http.request.HttpRequest" and this.getFunction() = get
|
// `get` is a call to request.headers.get or request.META.get
// request.headers
get.getObject()
.(DataFlow::AttrRead)
// request
.getObject()
.getALocalSource() = rfs and
get.getAttributeName() = "get" and
get.getObject().(DataFlow::AttrRead).getAttributeName() in ["headers", "META"] and
this.getArg(0).asExpr().(StrConst).getText().toLowerCase() = clientIpParameterName()
)
}
}
private class TornadoClientSuppliedIpUsedInSecurityCheck extends ClientSuppliedIpUsedInSecurityCheck {
TornadoClientSuppliedIpUsedInSecurityCheck() {
exists(RemoteFlowSource rfs, DataFlow::AttrRead get |
rfs.getSourceType() = "tornado.web.RequestHandler" and this.getFunction() = get
|
// `get` is a call to `rfs`.request.headers.get
// `rfs`.request.headers
get.getObject()
.(DataFlow::AttrRead)
// `rfs`.request
.getObject()
.(DataFlow::AttrRead)
// `rfs`
.getObject()
.getALocalSource() = rfs and
get.getAttributeName() in ["get", "get_list"] and
get.getObject().(DataFlow::AttrRead).getAttributeName() = "headers" and
this.getArg(0).asExpr().(StrConst).getText().toLowerCase() = clientIpParameterName()
)
}
}
private string clientIpParameterName() {
result in [
"x-forwarded-for", "x_forwarded_for", "x-real-ip", "x_real_ip", "proxy-client-ip",
"proxy_client_ip", "wl-proxy-client-ip", "wl_proxy_client_ip", "http_x_forwarded_for",
"http-x-forwarded-for", "http_x_forwarded", "http_x_cluster_client_ip", "http_client_ip",
"http_forwarded_for", "http_forwarded", "http_via", "remote_addr"
]
}
/** A data flow sink for ip address forgery vulnerabilities. */
abstract class PossibleSecurityCheck extends DataFlow::Node { }
/** A data flow sink for sql operation. */
private class SqlOperationAsSecurityCheck extends PossibleSecurityCheck {
SqlOperationAsSecurityCheck() { this = any(SqlExecution e).getSql() }
}
/**
* A data flow sink for remote client ip comparison.
*
* For example: `if not ipAddr.startswith('192.168.') : ...` determine whether the client ip starts
* with `192.168.`, and the program can be deceived by forging the ip address.
*/
private class CompareSink extends PossibleSecurityCheck {
CompareSink() {
exists(Call call |
call.getFunc().(Attribute).getName() = "startswith" and
call.getArg(0).(StrConst).getText().regexpMatch(getIpAddressRegex()) and
not call.getArg(0).(StrConst).getText() = "0:0:0:0:0:0:0:1" and
call.getFunc().(Attribute).getObject() = this.asExpr()
)
or
exists(Compare compare |
(
compare.getOp(0) instanceof Eq or
compare.getOp(0) instanceof NotEq
) and
(
compare.getLeft() = this.asExpr() and
compare.getComparator(0).(StrConst).getText() instanceof PrivateHostName and
not compare.getComparator(0).(StrConst).getText() = "0:0:0:0:0:0:0:1"
or
compare.getComparator(0) = this.asExpr() and
compare.getLeft().(StrConst).getText() instanceof PrivateHostName and
not compare.getLeft().(StrConst).getText() = "0:0:0:0:0:0:0:1"
)
)
or
exists(Compare compare |
(
compare.getOp(0) instanceof In or
compare.getOp(0) instanceof NotIn
) and
(
compare.getLeft() = this.asExpr()
or
compare.getComparator(0) = this.asExpr() and
not compare.getLeft().(StrConst).getText() in ["%", ",", "."]
)
)
}
}
string getIpAddressRegex() {
result =
"^((10\\.((1\\d{2})?|(2[0-4]\\d)?|(25[0-5])?|([1-9]\\d|[0-9])?)(\\.)?)|(192\\.168\\.)|172\\.(1[6789]|2[0-9]|3[01])\\.)((1\\d{2})?|(2[0-4]\\d)?|(25[0-5])?|([1-9]\\d|[0-9])?)(\\.)?((1\\d{2})?|(2[0-4]\\d)?|(25[0-5])?|([1-9]\\d|[0-9])?)$"
}
/**
* A string matching private host names of IPv4 and IPv6, which only matches the host portion therefore checking for port is not necessary.
* Several examples are localhost, reserved IPv4 IP addresses including 127.0.0.1, 10.x.x.x, 172.16.x,x, 192.168.x,x, and reserved IPv6 addresses including [0:0:0:0:0:0:0:1] and [::1]
*/
private class PrivateHostName extends string {
bindingset[this]
PrivateHostName() {
this.regexpMatch("(?i)localhost(?:[:/?#].*)?|127\\.0\\.0\\.1(?:[:/?#].*)?|10(?:\\.[0-9]+){3}(?:[:/?#].*)?|172\\.16(?:\\.[0-9]+){2}(?:[:/?#].*)?|192.168(?:\\.[0-9]+){2}(?:[:/?#].*)?|\\[?0:0:0:0:0:0:0:1\\]?(?:[:/?#].*)?|\\[?::1\\]?(?:[:/?#].*)?")
}
}

View File

@@ -0,0 +1,23 @@
<!DOCTYPE qhelp PUBLIC
"-//Semmle//qhelp//EN"
"qhelp.dtd">
<qhelp>
<overview>
<p>Failing to ensure the utilization of SSL in an LDAP connection can cause the entire communication
to be sent in cleartext making it easier for an attacker to intercept it.</p>
</overview>
<recommendation>
<p>Always set <code>use_SSL</code> to <code>True</code>, call <code>start_tls_s()</code> or set a proper option flag (<code>ldap.OPT_X_TLS_XXXXXX</code>).</p>
</recommendation>
<example>
<p>This example shows both good and bad ways to deal with this issue under Python 3.</p>
<p>The first one sets <code>use_SSL</code> to true as a keyword argument whereas the second one fails to provide a value for it, so
the default one is used (<code>False</code>).</p>
<sample src="examples/LDAPInsecureAuth.py" />
</example>
</qhelp>

View File

@@ -0,0 +1,21 @@
/**
* @name Python Insecure LDAP Authentication
* @description Python LDAP Insecure LDAP Authentication
* @kind path-problem
* @problem.severity error
* @id py/insecure-ldap-auth
* @tags experimental
* security
* external/cwe/cwe-522
* external/cwe/cwe-523
*/
// determine precision above
import python
import DataFlow::PathGraph
import experimental.semmle.python.security.LDAPInsecureAuth
from LDAPInsecureAuthConfig config, DataFlow::PathNode source, DataFlow::PathNode sink
where config.hasFlowPath(source, sink)
select sink.getNode(), source, sink, "$@ is authenticated insecurely.", sink.getNode(),
"This LDAP host"

View File

@@ -0,0 +1,20 @@
from ldap3 import Server, Connection, ALL
from flask import request, Flask
app = Flask(__name__)
@app.route("/good")
def good():
srv = Server(host, port, use_ssl=True)
conn = Connection(srv, dn, password)
conn.search(dn, search_filter)
return conn.response
@app.route("/bad")
def bad():
srv = Server(host, port)
conn = Connection(srv, dn, password)
conn.search(dn, search_filter)
return conn.response

View File

@@ -0,0 +1,33 @@
/**
* @name XPath query built from user-controlled sources
* @description Building a XPath query from user-controlled sources is vulnerable to insertion of
* malicious Xpath code by the user.
* @kind path-problem
* @problem.severity error
* @precision high
* @id py/xpath-injection
* @tags security
* external/cwe/cwe-643
*/
private import python
private import semmle.python.Concepts
private import semmle.python.dataflow.new.TaintTracking
private import semmle.python.Concepts
private import semmle.python.ApiGraphs
private import semmle.python.dataflow.new.RemoteFlowSources
private import semmle.python.dataflow.new.BarrierGuards
import XpathInjection::XpathInjection
import DataFlow::PathGraph
class XpathInjectionConfiguration extends TaintTracking::Configuration {
XpathInjectionConfiguration() { this = "PathNotNormalizedConfiguration" }
override predicate isSource(DataFlow::Node source) { source instanceof Source }
override predicate isSink(DataFlow::Node sink) { sink instanceof Sink }
}
from XpathInjectionConfiguration config, DataFlow::PathNode source, DataFlow::PathNode sink
where config.hasFlowPath(source, sink)
select sink, source, sink, "This Xpath query depends on $@.", source, "a user-provided value"

View File

@@ -0,0 +1,35 @@
/**
* Provides a taint-tracking configuration for detecting "Xpath Injection" vulnerabilities.
*
* Note, for performance reasons: only import this file if
* `XpathInjection::Configuration` is needed, otherwise
* `XpathInjectionCustomizations` should be imported instead.
*/
private import python
import semmle.python.dataflow.new.DataFlow
import semmle.python.dataflow.new.TaintTracking
/**
* Provides a taint-tracking configuration for detecting "Xpath Injection" vulnerabilities.
*/
module XpathInjection {
import XpathInjectionCustomizations::XpathInjection
/**
* A taint-tracking configuration for detecting "Xpath Injection" vulnerabilities.
*/
class Configuration extends TaintTracking::Configuration {
Configuration() { this = "Xpath Injection" }
override predicate isSource(DataFlow::Node source) { source instanceof Source }
override predicate isSink(DataFlow::Node sink) { sink instanceof Sink }
override predicate isSanitizer(DataFlow::Node node) { node instanceof Sanitizer }
override predicate isSanitizerGuard(DataFlow::BarrierGuard guard) {
guard instanceof SanitizerGuard
}
}
}

View File

@@ -0,0 +1,105 @@
/**
* Provides class and predicates to track external data that
* may represent malicious xpath query objects.
*
* This module is intended to be imported into a taint-tracking query.
*/
private import python
private import semmle.python.Concepts
private import semmle.python.dataflow.new.TaintTracking
private import semmle.python.Concepts
private import semmle.python.ApiGraphs
private import semmle.python.dataflow.new.RemoteFlowSources
private import semmle.python.dataflow.new.BarrierGuards
/** Models Xpath Injection related classes and functions */
module XpathInjection {
/**
* A data flow source for "XPath injection" vulnerabilities.
*/
abstract class Source extends DataFlow::Node { }
/**
* A data flow sink for "XPath injection" vulnerabilities.
*/
abstract class Sink extends DataFlow::Node { }
/**
* A sanitizer for "XPath injection" vulnerabilities.
*/
abstract class Sanitizer extends DataFlow::Node { }
/**
* A sanitizer guard for "XPath injection" vulnerabilities.
*/
abstract class SanitizerGuard extends DataFlow::BarrierGuard { }
/**
* A source of remote user input, considered as a flow source.
*/
class RemoteFlowSourceAsSource extends Source, RemoteFlowSource { }
/** Returns an API node referring to `lxml.etree` */
API::Node etree() { result = API::moduleImport("lxml").getMember("etree") }
/** Returns an API node referring to `lxml.etree` */
API::Node etreeFromString() { result = etree().getMember("fromstring") }
/** Returns an API node referring to `lxml.etree.parse` */
API::Node etreeParse() { result = etree().getMember("parse") }
/** Returns an API node referring to `lxml.etree.parse` */
API::Node libxml2parseFile() { result = API::moduleImport("libxml2").getMember("parseFile") }
/**
* A Sink representing an argument to `etree.XPath` or `etree.ETXPath` call.
*
* from lxml import etree
* root = etree.XML("<xmlContent>")
* find_text = etree.XPath("`sink`")
* find_text = etree.ETXPath("`sink`")
*/
private class EtreeXpathArgument extends Sink {
EtreeXpathArgument() { this = etree().getMember(["XPath", "ETXPath"]).getACall().getArg(0) }
}
/**
* A Sink representing an argument to the `etree.XPath` call.
*
* from lxml import etree
* root = etree.fromstring(file(XML_DB).read(), XMLParser())
* find_text = root.xpath("`sink`")
*/
private class EtreeFromstringXpathArgument extends Sink {
EtreeFromstringXpathArgument() {
this = etreeFromString().getReturn().getMember("xpath").getACall().getArg(0)
}
}
/**
* A Sink representing an argument to the `xpath` call to a parsed xml document.
*
* from lxml import etree
* from io import StringIO
* f = StringIO('<foo><bar></bar></foo>')
* tree = etree.parse(f)
* r = tree.xpath('`sink`')
*/
private class ParseXpathArgument extends Sink {
ParseXpathArgument() { this = etreeParse().getReturn().getMember("xpath").getACall().getArg(0) }
}
/**
* A Sink representing an argument to the `xpathEval` call to a parsed libxml2 document.
*
* import libxml2
* tree = libxml2.parseFile("file.xml")
* r = tree.xpathEval('`sink`')
*/
private class ParseFileXpathEvalArgument extends Sink {
ParseFileXpathEvalArgument() {
this = libxml2parseFile().getReturn().getMember("xpathEval").getACall().getArg(0)
}
}
}

View File

@@ -1,36 +0,0 @@
/**
* @name XPath query built from user-controlled sources
* @description Building a XPath query from user-controlled sources is vulnerable to insertion of
* malicious Xpath code by the user.
* @kind path-problem
* @problem.severity error
* @precision high
* @id py/xpath-injection
* @tags security
* external/cwe/cwe-643
*/
import python
import semmle.python.security.Paths
import semmle.python.security.strings.Untrusted
/* Sources */
import semmle.python.web.HttpRequest
/* Sinks */
import experimental.semmle.python.security.injection.Xpath
class XpathInjectionConfiguration extends TaintTracking::Configuration {
XpathInjectionConfiguration() { this = "Xpath injection configuration" }
override predicate isSource(TaintTracking::Source source) {
source instanceof HttpRequestTaintSource
}
override predicate isSink(TaintTracking::Sink sink) {
sink instanceof XpathInjection::XpathInjectionSink
}
}
from XpathInjectionConfiguration config, TaintedPathSource src, TaintedPathSink sink
where config.hasFlowPath(src, sink)
select sink.getSink(), src, sink, "This Xpath query depends on $@.", src.getSource(),
"a user-provided value"

View File

@@ -14,71 +14,34 @@ private import semmle.python.dataflow.new.RemoteFlowSources
private import semmle.python.dataflow.new.TaintTracking
private import experimental.semmle.python.Frameworks
/** Provides classes for modeling Regular Expression-related APIs. */
module RegexExecution {
/** Provides classes for modeling log related APIs. */
module LogOutput {
/**
* A data-flow node that executes a regular expression.
* A data flow node for log output.
*
* Extend this class to model new APIs. If you want to refine existing API models,
* extend `RegexExecution` instead.
* extend `LogOutput` instead.
*/
abstract class Range extends DataFlow::Node {
/**
* Gets the argument containing the executed expression.
* Get the parameter value of the log output function.
*/
abstract DataFlow::Node getRegexNode();
/**
* Gets the library used to execute the regular expression.
*/
abstract string getRegexModule();
abstract DataFlow::Node getAnInput();
}
}
/**
* A data-flow node that executes a regular expression.
* A data flow node for log output.
*
* Extend this class to refine existing API models. If you want to model new APIs,
* extend `RegexExecution::Range` instead.
* extend `LogOutput::Range` instead.
*/
class RegexExecution extends DataFlow::Node {
RegexExecution::Range range;
class LogOutput extends DataFlow::Node {
LogOutput::Range range;
RegexExecution() { this = range }
LogOutput() { this = range }
DataFlow::Node getRegexNode() { result = range.getRegexNode() }
string getRegexModule() { result = range.getRegexModule() }
}
/** Provides classes for modeling Regular Expression escape-related APIs. */
module RegexEscape {
/**
* A data-flow node that escapes a regular expression.
*
* Extend this class to model new APIs. If you want to refine existing API models,
* extend `RegexEscape` instead.
*/
abstract class Range extends DataFlow::Node {
/**
* Gets the argument containing the escaped expression.
*/
abstract DataFlow::Node getRegexNode();
}
}
/**
* A data-flow node that escapes a regular expression.
*
* Extend this class to refine existing API models. If you want to model new APIs,
* extend `RegexEscape::Range` instead.
*/
class RegexEscape extends DataFlow::Node {
RegexEscape::Range range;
RegexEscape() { this = range }
DataFlow::Node getRegexNode() { result = range.getRegexNode() }
DataFlow::Node getAnInput() { result = range.getAnInput() }
}
/** Provides classes for modeling LDAP query execution-related APIs. */
@@ -156,10 +119,20 @@ module LDAPBind {
* extend `LDAPBind` instead.
*/
abstract class Range extends DataFlow::Node {
/**
* Gets the argument containing the binding host.
*/
abstract DataFlow::Node getHost();
/**
* Gets the argument containing the binding expression.
*/
abstract DataFlow::Node getPassword();
/**
* Holds if the binding process use SSL.
*/
abstract predicate useSSL();
}
}
@@ -174,7 +147,20 @@ class LDAPBind extends DataFlow::Node {
LDAPBind() { this = range }
/**
* Gets the argument containing the binding host.
*/
DataFlow::Node getHost() { result = range.getHost() }
/**
* Gets the argument containing the binding expression.
*/
DataFlow::Node getPassword() { result = range.getPassword() }
/**
* Holds if the binding process use SSL.
*/
predicate useSSL() { range.useSSL() }
}
/** Provides classes for modeling SQL sanitization libraries. */

View File

@@ -8,3 +8,4 @@ private import experimental.semmle.python.frameworks.Django
private import experimental.semmle.python.frameworks.Werkzeug
private import experimental.semmle.python.frameworks.LDAP
private import experimental.semmle.python.frameworks.NoSQL
private import experimental.semmle.python.frameworks.Log

View File

@@ -88,6 +88,11 @@ private module LDAP {
result.(DataFlow::AttrRead).getAttributeName() instanceof LDAP2BindMethods
}
/**List of SSL-demanding options */
private class LDAPSSLOptions extends DataFlow::Node {
LDAPSSLOptions() { this = ldap().getMember("OPT_X_TLS_" + ["DEMAND", "HARD"]).getAUse() }
}
/**
* A class to find `ldap` methods binding a connection.
*
@@ -99,6 +104,44 @@ private module LDAP {
override DataFlow::Node getPassword() {
result in [this.getArg(1), this.getArgByName("cred")]
}
override DataFlow::Node getHost() {
exists(DataFlow::CallCfgNode initialize |
this.getFunction().(DataFlow::AttrRead).getObject().getALocalSource() = initialize and
initialize = ldapInitialize().getACall() and
result = initialize.getArg(0)
)
}
override predicate useSSL() {
// use initialize to correlate `this` and so avoid FP in several instances
exists(DataFlow::CallCfgNode initialize |
// ldap.set_option(ldap.OPT_X_TLS_%s)
ldap().getMember("set_option").getACall().getArg(_) instanceof LDAPSSLOptions
or
this.getFunction().(DataFlow::AttrRead).getObject().getALocalSource() = initialize and
initialize = ldapInitialize().getACall() and
(
// ldap_connection.start_tls_s()
// see https://www.python-ldap.org/en/python-ldap-3.3.0/reference/ldap.html#ldap.LDAPObject.start_tls_s
exists(DataFlow::MethodCallNode startTLS |
startTLS.getObject().getALocalSource() = initialize and
startTLS.getMethodName() = "start_tls_s"
)
or
// ldap_connection.set_option(ldap.OPT_X_TLS_%s, True)
exists(DataFlow::CallCfgNode setOption |
setOption.getFunction().(DataFlow::AttrRead).getObject().getALocalSource() =
initialize and
setOption.getFunction().(DataFlow::AttrRead).getAttributeName() = "set_option" and
setOption.getArg(0) instanceof LDAPSSLOptions and
not DataFlow::exprNode(any(False falseExpr))
.(DataFlow::LocalSourceNode)
.flowsTo(setOption.getArg(1))
)
)
)
}
}
/**
@@ -166,6 +209,31 @@ private module LDAP {
override DataFlow::Node getPassword() {
result in [this.getArg(2), this.getArgByName("password")]
}
override DataFlow::Node getHost() {
exists(DataFlow::CallCfgNode serverCall |
serverCall = ldap3Server().getACall() and
this.getArg(0).getALocalSource() = serverCall and
result = serverCall.getArg(0)
)
}
override predicate useSSL() {
exists(DataFlow::CallCfgNode serverCall |
serverCall = ldap3Server().getACall() and
this.getArg(0).getALocalSource() = serverCall and
DataFlow::exprNode(any(True trueExpr))
.(DataFlow::LocalSourceNode)
.flowsTo([serverCall.getArg(2), serverCall.getArgByName("use_ssl")])
)
or
// ldap_connection.start_tls_s()
// see https://www.python-ldap.org/en/python-ldap-3.3.0/reference/ldap.html#ldap.LDAPObject.start_tls_s
exists(DataFlow::MethodCallNode startTLS |
startTLS.getMethodName() = "start_tls_s" and
startTLS.getObject().getALocalSource() = this
)
}
}
/**

View File

@@ -0,0 +1,118 @@
/**
* Provides classes modeling security-relevant aspects of the log libraries.
*/
private import python
private import semmle.python.dataflow.new.DataFlow
private import semmle.python.dataflow.new.TaintTracking
private import semmle.python.dataflow.new.RemoteFlowSources
private import experimental.semmle.python.Concepts
private import semmle.python.frameworks.Flask
private import semmle.python.ApiGraphs
/**
* Provides models for Python's log-related libraries.
*/
private module log {
/**
* Log output method list.
*
* See https://docs.python.org/3/library/logging.html#logger-objects
*/
private class LogOutputMethods extends string {
LogOutputMethods() {
this in ["info", "error", "warn", "warning", "debug", "critical", "exception", "log"]
}
}
/**
* The class used to find the log output method of the `logging` module.
*
* See `LogOutputMethods`
*/
private class LoggingCall extends DataFlow::CallCfgNode, LogOutput::Range {
LoggingCall() {
this = API::moduleImport("logging").getMember(any(LogOutputMethods m)).getACall()
}
override DataFlow::Node getAnInput() {
this.getFunction().(DataFlow::AttrRead).getAttributeName() != "log" and
result in [this.getArg(_), this.getArgByName(_)] // this includes the arg named "msg"
or
this.getFunction().(DataFlow::AttrRead).getAttributeName() = "log" and
result in [this.getArg(any(int i | i > 0)), this.getArgByName(any(string s | s != "level"))]
}
}
/**
* The class used to find log output methods related to the `logging.getLogger` instance.
*
* See `LogOutputMethods`
*/
private class LoggerCall extends DataFlow::CallCfgNode, LogOutput::Range {
LoggerCall() {
this =
API::moduleImport("logging")
.getMember("getLogger")
.getReturn()
.getMember(any(LogOutputMethods m))
.getACall()
}
override DataFlow::Node getAnInput() {
this.getFunction().(DataFlow::AttrRead).getAttributeName() != "log" and
result in [this.getArg(_), this.getArgByName(_)] // this includes the arg named "msg"
or
this.getFunction().(DataFlow::AttrRead).getAttributeName() = "log" and
result in [this.getArg(any(int i | i > 0)), this.getArgByName(any(string s | s != "level"))]
}
}
/**
* The class used to find the relevant log output method of the `flask.Flask.logger` instance (flask application).
*
* See `LogOutputMethods`
*/
private class FlaskLoggingCall extends DataFlow::CallCfgNode, LogOutput::Range {
FlaskLoggingCall() {
this =
Flask::FlaskApp::instance()
.getMember("logger")
.getMember(any(LogOutputMethods m))
.getACall()
}
override DataFlow::Node getAnInput() {
this.getFunction().(DataFlow::AttrRead).getAttributeName() != "log" and
result in [this.getArg(_), this.getArgByName(_)] // this includes the arg named "msg"
or
this.getFunction().(DataFlow::AttrRead).getAttributeName() = "log" and
result in [this.getArg(any(int i | i > 0)), this.getArgByName(any(string s | s != "level"))]
}
}
/**
* The class used to find the relevant log output method of the `django.utils.log.request_logger` instance (django application).
*
* See `LogOutputMethods`
*/
private class DjangoLoggingCall extends DataFlow::CallCfgNode, LogOutput::Range {
DjangoLoggingCall() {
this =
API::moduleImport("django")
.getMember("utils")
.getMember("log")
.getMember("request_logger")
.getMember(any(LogOutputMethods m))
.getACall()
}
override DataFlow::Node getAnInput() {
this.getFunction().(DataFlow::AttrRead).getAttributeName() != "log" and
result in [this.getArg(_), this.getArgByName(_)] // this includes the arg named "msg"
or
this.getFunction().(DataFlow::AttrRead).getAttributeName() = "log" and
result in [this.getArg(any(int i | i > 0)), this.getArgByName(any(string s | s != "level"))]
}
}
}

Some files were not shown because too many files have changed in this diff Show More