mirror of
https://github.com/github/codeql.git
synced 2026-04-27 01:35:13 +02:00
Merge branch 'main' into jb1/16-cryptography-models-libraries-and-queries-migration
This commit is contained in:
4
python/ql/lib/change-notes/2023-09-22-regex-prefix.md
Normal file
4
python/ql/lib/change-notes/2023-09-22-regex-prefix.md
Normal file
@@ -0,0 +1,4 @@
|
||||
---
|
||||
category: fix
|
||||
---
|
||||
* Subterms of regular expressions encoded as single-line string literals now have better source-location information.
|
||||
@@ -0,0 +1,4 @@
|
||||
---
|
||||
category: minorAnalysis
|
||||
---
|
||||
* Regular expression fragments residing inside implicitly concatenated strings now have better location information.
|
||||
@@ -0,0 +1,4 @@
|
||||
---
|
||||
category: minorAnalysis
|
||||
---
|
||||
* Django Rest Framework better handles custom `ModelViewSet` classes functions
|
||||
@@ -154,6 +154,28 @@ class StringPart extends StringPart_, AstNode {
|
||||
override string toString() { result = StringPart_.super.toString() }
|
||||
|
||||
override Location getLocation() { result = StringPart_.super.getLocation() }
|
||||
|
||||
/**
|
||||
* Holds if the content of string `StringPart` is surrounded by
|
||||
* a prefix (including a quote) of length `prefixLength` and
|
||||
* a quote of length `quoteLength`.
|
||||
*/
|
||||
predicate contextSize(int prefixLength, int quoteLength) {
|
||||
exists(int occurrenceOffset |
|
||||
quoteLength = this.getText().regexpFind("\"{3}|\"{1}|'{3}|'{1}", 0, occurrenceOffset).length() and
|
||||
prefixLength = occurrenceOffset + quoteLength
|
||||
)
|
||||
}
|
||||
|
||||
/**
|
||||
* Gets the length of the content, that is the text between the prefix and the quote.
|
||||
* See `context` for obtaining the prefix and the quote.
|
||||
*/
|
||||
int getContentLength() {
|
||||
exists(int prefixLength, int quoteLength | this.contextSize(prefixLength, quoteLength) |
|
||||
result = this.getText().length() - prefixLength - quoteLength
|
||||
)
|
||||
}
|
||||
}
|
||||
|
||||
class StringPartList extends StringPartList_ { }
|
||||
|
||||
@@ -378,6 +378,68 @@ module SqlExecution {
|
||||
}
|
||||
}
|
||||
|
||||
/** Provides a class for modeling NoSQL execution APIs. */
|
||||
module NoSqlExecution {
|
||||
/**
|
||||
* A data-flow node that executes NoSQL queries.
|
||||
*
|
||||
* Extend this class to model new APIs. If you want to refine existing API models,
|
||||
* extend `NoSqlExecution` instead.
|
||||
*/
|
||||
abstract class Range extends DataFlow::Node {
|
||||
/** Gets the argument that specifies the NoSQL query to be executed. */
|
||||
abstract DataFlow::Node getQuery();
|
||||
|
||||
/** Holds if this query will unpack/interpret a dictionary */
|
||||
abstract predicate interpretsDict();
|
||||
|
||||
/** Holds if this query can be dangerous when run on a user-controlled string */
|
||||
abstract predicate vulnerableToStrings();
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* A data-flow node that executes NoSQL queries.
|
||||
*
|
||||
* Extend this class to refine existing API models. If you want to model new APIs,
|
||||
* extend `NoSqlExecution::Range` instead.
|
||||
*/
|
||||
class NoSqlExecution extends DataFlow::Node instanceof NoSqlExecution::Range {
|
||||
/** Gets the argument that specifies the NoSQL query to be executed. */
|
||||
DataFlow::Node getQuery() { result = super.getQuery() }
|
||||
|
||||
/** Holds if this query will unpack/interpret a dictionary */
|
||||
predicate interpretsDict() { super.interpretsDict() }
|
||||
|
||||
/** Holds if this query can be dangerous when run on a user-controlled string */
|
||||
predicate vulnerableToStrings() { super.vulnerableToStrings() }
|
||||
}
|
||||
|
||||
/** Provides classes for modeling NoSql sanitization-related APIs. */
|
||||
module NoSqlSanitizer {
|
||||
/**
|
||||
* A data-flow node that collects functions sanitizing NoSQL queries.
|
||||
*
|
||||
* Extend this class to model new APIs. If you want to refine existing API models,
|
||||
* extend `NoSQLSanitizer` instead.
|
||||
*/
|
||||
abstract class Range extends DataFlow::Node {
|
||||
/** Gets the argument that specifies the NoSql query to be sanitized. */
|
||||
abstract DataFlow::Node getAnInput();
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* A data-flow node that collects functions sanitizing NoSQL queries.
|
||||
*
|
||||
* Extend this class to model new APIs. If you want to refine existing API models,
|
||||
* extend `NoSQLSanitizer::Range` instead.
|
||||
*/
|
||||
class NoSqlSanitizer extends DataFlow::Node instanceof NoSqlSanitizer::Range {
|
||||
/** Gets the argument that specifies the NoSql query to be sanitized. */
|
||||
DataFlow::Node getAnInput() { result = super.getAnInput() }
|
||||
}
|
||||
|
||||
/**
|
||||
* A data-flow node that executes a regular expression.
|
||||
*
|
||||
|
||||
@@ -1,7 +1,32 @@
|
||||
/** Provides classes for working with files and folders. */
|
||||
|
||||
import python
|
||||
private import codeql.util.FileSystem
|
||||
|
||||
private module Input implements InputSig {
|
||||
abstract class ContainerBase extends @container {
|
||||
abstract string getAbsolutePath();
|
||||
|
||||
ContainerBase getParentContainer() { containerparent(result, this) }
|
||||
|
||||
string toString() { result = this.getAbsolutePath() }
|
||||
}
|
||||
|
||||
class FolderBase extends ContainerBase, @folder {
|
||||
override string getAbsolutePath() { folders(this, result) }
|
||||
}
|
||||
|
||||
class FileBase extends ContainerBase, @file {
|
||||
override string getAbsolutePath() { files(this, result) }
|
||||
}
|
||||
|
||||
predicate hasSourceLocationPrefix = sourceLocationPrefix/1;
|
||||
}
|
||||
|
||||
private module Impl = Make<Input>;
|
||||
|
||||
/** A file */
|
||||
class File extends Container, @file {
|
||||
class File extends Container, Impl::File {
|
||||
/**
|
||||
* Holds if this element is at the specified location.
|
||||
* The location spans column `startcolumn` of line `startline` to
|
||||
@@ -45,11 +70,6 @@ class File extends Container, @file {
|
||||
)
|
||||
}
|
||||
|
||||
override string getAbsolutePath() { files(this, result) }
|
||||
|
||||
/** Gets the URL of this file. */
|
||||
override string getURL() { result = "file://" + this.getAbsolutePath() + ":0:0:0:0" }
|
||||
|
||||
override Container getImportRoot(int n) {
|
||||
/* File stem must be a legal Python identifier */
|
||||
this.getStem().regexpMatch("[^\\d\\W]\\w*") and
|
||||
@@ -108,7 +128,7 @@ private predicate occupied_line(File f, int n) {
|
||||
}
|
||||
|
||||
/** A folder (directory) */
|
||||
class Folder extends Container, @folder {
|
||||
class Folder extends Container, Impl::Folder {
|
||||
/**
|
||||
* Holds if this element is at the specified location.
|
||||
* The location spans column `startcolumn` of line `startline` to
|
||||
@@ -126,11 +146,6 @@ class Folder extends Container, @folder {
|
||||
endcolumn = 0
|
||||
}
|
||||
|
||||
override string getAbsolutePath() { folders(this, result) }
|
||||
|
||||
/** Gets the URL of this folder. */
|
||||
override string getURL() { result = "folder://" + this.getAbsolutePath() }
|
||||
|
||||
override Container getImportRoot(int n) {
|
||||
this.isImportRoot(n) and result = this
|
||||
or
|
||||
@@ -144,34 +159,8 @@ class Folder extends Container, @folder {
|
||||
* A container is an abstract representation of a file system object that can
|
||||
* hold elements of interest.
|
||||
*/
|
||||
abstract class Container extends @container {
|
||||
Container getParent() { containerparent(result, this) }
|
||||
|
||||
/**
|
||||
* Gets a textual representation of the path of this container.
|
||||
*
|
||||
* This is the absolute path of the container.
|
||||
*/
|
||||
string toString() { result = this.getAbsolutePath() }
|
||||
|
||||
/**
|
||||
* Gets the relative path of this file or folder from the root folder of the
|
||||
* analyzed source location. The relative path of the root folder itself is
|
||||
* the empty string.
|
||||
*
|
||||
* This has no result if the container is outside the source root, that is,
|
||||
* if the root folder is not a reflexive, transitive parent of this container.
|
||||
*/
|
||||
string getRelativePath() {
|
||||
exists(string absPath, string pref |
|
||||
absPath = this.getAbsolutePath() and sourceLocationPrefix(pref)
|
||||
|
|
||||
absPath = pref and result = ""
|
||||
or
|
||||
absPath = pref.regexpReplaceAll("/$", "") + "/" + result and
|
||||
not result.matches("/%")
|
||||
)
|
||||
}
|
||||
class Container extends Impl::Container {
|
||||
Container getParent() { result = this.getParentContainer() }
|
||||
|
||||
/** Whether this file or folder is part of the standard library */
|
||||
predicate inStdlib() { this.inStdlib(_, _) }
|
||||
@@ -187,135 +176,13 @@ abstract class Container extends @container {
|
||||
)
|
||||
}
|
||||
|
||||
/* Standard cross-language API */
|
||||
/** Gets a file or sub-folder in this container. */
|
||||
Container getAChildContainer() { containerparent(this, result) }
|
||||
|
||||
/** Gets a file in this container. */
|
||||
File getAFile() { result = this.getAChildContainer() }
|
||||
|
||||
/** Gets a sub-folder in this container. */
|
||||
Folder getAFolder() { result = this.getAChildContainer() }
|
||||
|
||||
/**
|
||||
* Gets the absolute, canonical path of this container, using forward slashes
|
||||
* as path separator.
|
||||
*
|
||||
* The path starts with a _root prefix_ followed by zero or more _path
|
||||
* segments_ separated by forward slashes.
|
||||
*
|
||||
* The root prefix is of one of the following forms:
|
||||
*
|
||||
* 1. A single forward slash `/` (Unix-style)
|
||||
* 2. An upper-case drive letter followed by a colon and a forward slash,
|
||||
* such as `C:/` (Windows-style)
|
||||
* 3. Two forward slashes, a computer name, and then another forward slash,
|
||||
* such as `//FileServer/` (UNC-style)
|
||||
*
|
||||
* Path segments are never empty (that is, absolute paths never contain two
|
||||
* contiguous slashes, except as part of a UNC-style root prefix). Also, path
|
||||
* segments never contain forward slashes, and no path segment is of the
|
||||
* form `.` (one dot) or `..` (two dots).
|
||||
*
|
||||
* Note that an absolute path never ends with a forward slash, except if it is
|
||||
* a bare root prefix, that is, the path has no path segments. A container
|
||||
* whose absolute path has no segments is always a `Folder`, not a `File`.
|
||||
*/
|
||||
abstract string getAbsolutePath();
|
||||
|
||||
/**
|
||||
* Gets the base name of this container including extension, that is, the last
|
||||
* segment of its absolute path, or the empty string if it has no segments.
|
||||
*
|
||||
* Here are some examples of absolute paths and the corresponding base names
|
||||
* (surrounded with quotes to avoid ambiguity):
|
||||
*
|
||||
* <table border="1">
|
||||
* <tr><th>Absolute path</th><th>Base name</th></tr>
|
||||
* <tr><td>"/tmp/tst.py"</td><td>"tst.py"</td></tr>
|
||||
* <tr><td>"C:/Program Files (x86)"</td><td>"Program Files (x86)"</td></tr>
|
||||
* <tr><td>"/"</td><td>""</td></tr>
|
||||
* <tr><td>"C:/"</td><td>""</td></tr>
|
||||
* <tr><td>"D:/"</td><td>""</td></tr>
|
||||
* <tr><td>"//FileServer/"</td><td>""</td></tr>
|
||||
* </table>
|
||||
*/
|
||||
string getBaseName() {
|
||||
result = this.getAbsolutePath().regexpCapture(".*/(([^/]*?)(?:\\.([^.]*))?)", 1)
|
||||
}
|
||||
|
||||
/**
|
||||
* Gets the extension of this container, that is, the suffix of its base name
|
||||
* after the last dot character, if any.
|
||||
*
|
||||
* In particular,
|
||||
*
|
||||
* - if the name does not include a dot, there is no extension, so this
|
||||
* predicate has no result;
|
||||
* - if the name ends in a dot, the extension is the empty string;
|
||||
* - if the name contains multiple dots, the extension follows the last dot.
|
||||
*
|
||||
* Here are some examples of absolute paths and the corresponding extensions
|
||||
* (surrounded with quotes to avoid ambiguity):
|
||||
*
|
||||
* <table border="1">
|
||||
* <tr><th>Absolute path</th><th>Extension</th></tr>
|
||||
* <tr><td>"/tmp/tst.py"</td><td>"py"</td></tr>
|
||||
* <tr><td>"/tmp/.gitignore"</td><td>"gitignore"</td></tr>
|
||||
* <tr><td>"/bin/bash"</td><td>not defined</td></tr>
|
||||
* <tr><td>"/tmp/tst2."</td><td>""</td></tr>
|
||||
* <tr><td>"/tmp/x.tar.gz"</td><td>"gz"</td></tr>
|
||||
* </table>
|
||||
*/
|
||||
string getExtension() {
|
||||
result = this.getAbsolutePath().regexpCapture(".*/([^/]*?)(\\.([^.]*))?", 3)
|
||||
}
|
||||
|
||||
/**
|
||||
* Gets the stem of this container, that is, the prefix of its base name up to
|
||||
* (but not including) the last dot character if there is one, or the entire
|
||||
* base name if there is not.
|
||||
*
|
||||
* Here are some examples of absolute paths and the corresponding stems
|
||||
* (surrounded with quotes to avoid ambiguity):
|
||||
*
|
||||
* <table border="1">
|
||||
* <tr><th>Absolute path</th><th>Stem</th></tr>
|
||||
* <tr><td>"/tmp/tst.py"</td><td>"tst"</td></tr>
|
||||
* <tr><td>"/tmp/.gitignore"</td><td>""</td></tr>
|
||||
* <tr><td>"/bin/bash"</td><td>"bash"</td></tr>
|
||||
* <tr><td>"/tmp/tst2."</td><td>"tst2"</td></tr>
|
||||
* <tr><td>"/tmp/x.tar.gz"</td><td>"x.tar"</td></tr>
|
||||
* </table>
|
||||
*/
|
||||
string getStem() {
|
||||
result = this.getAbsolutePath().regexpCapture(".*/([^/]*?)(?:\\.([^.]*))?", 1)
|
||||
}
|
||||
|
||||
File getFile(string baseName) {
|
||||
result = this.getAFile() and
|
||||
result.getBaseName() = baseName
|
||||
}
|
||||
|
||||
Folder getFolder(string baseName) {
|
||||
result = this.getAFolder() and
|
||||
result.getBaseName() = baseName
|
||||
}
|
||||
|
||||
Container getParentContainer() { this = result.getAChildContainer() }
|
||||
override Container getParentContainer() { result = super.getParentContainer() }
|
||||
|
||||
Container getChildContainer(string baseName) {
|
||||
result = this.getAChildContainer() and
|
||||
result.getBaseName() = baseName
|
||||
}
|
||||
|
||||
/**
|
||||
* Gets a URL representing the location of this container.
|
||||
*
|
||||
* For more information see [Providing URLs](https://codeql.github.com/docs/writing-codeql-queries/providing-locations-in-codeql-queries/#providing-urls).
|
||||
*/
|
||||
abstract string getURL();
|
||||
|
||||
/** Holds if this folder is on the import path. */
|
||||
predicate isImportRoot() { this.isImportRoot(_) }
|
||||
|
||||
|
||||
@@ -10,6 +10,7 @@ private import semmle.python.frameworks.Aiomysql
|
||||
private import semmle.python.frameworks.Aiosqlite
|
||||
private import semmle.python.frameworks.Aiopg
|
||||
private import semmle.python.frameworks.Asyncpg
|
||||
private import semmle.python.frameworks.BSon
|
||||
private import semmle.python.frameworks.CassandraDriver
|
||||
private import semmle.python.frameworks.ClickhouseDriver
|
||||
private import semmle.python.frameworks.Cryptodome
|
||||
@@ -42,6 +43,7 @@ private import semmle.python.frameworks.Phoenixdb
|
||||
private import semmle.python.frameworks.Psycopg2
|
||||
private import semmle.python.frameworks.Pycurl
|
||||
private import semmle.python.frameworks.Pydantic
|
||||
private import semmle.python.frameworks.PyMongo
|
||||
private import semmle.python.frameworks.Pymssql
|
||||
private import semmle.python.frameworks.PyMySQL
|
||||
private import semmle.python.frameworks.Pyodbc
|
||||
|
||||
@@ -1639,13 +1639,3 @@ private module OutNodes {
|
||||
* `kind`.
|
||||
*/
|
||||
OutNode getAnOutNode(DataFlowCall call, ReturnKind kind) { call = result.getCall(kind) }
|
||||
|
||||
/**
|
||||
* Holds if flow from `call`'s argument `arg` to parameter `p` is permissible.
|
||||
*
|
||||
* This is a temporary hook to support technical debt in the Go language; do not use.
|
||||
*/
|
||||
pragma[inline]
|
||||
predicate golangSpecificParamArgFilter(DataFlowCall call, ParameterNode p, ArgumentNode arg) {
|
||||
any()
|
||||
}
|
||||
|
||||
@@ -297,6 +297,10 @@ private module Config implements FullStateConfigSig {
|
||||
|
||||
predicate isBarrierOut(Node node) { any(Configuration config).isBarrierOut(node) }
|
||||
|
||||
predicate isBarrierIn(Node node, FlowState state) { none() }
|
||||
|
||||
predicate isBarrierOut(Node node, FlowState state) { none() }
|
||||
|
||||
predicate isAdditionalFlowStep(Node node1, Node node2) {
|
||||
singleConfiguration() and
|
||||
any(Configuration config).isAdditionalFlowStep(node1, node2)
|
||||
|
||||
@@ -297,6 +297,10 @@ private module Config implements FullStateConfigSig {
|
||||
|
||||
predicate isBarrierOut(Node node) { any(Configuration config).isBarrierOut(node) }
|
||||
|
||||
predicate isBarrierIn(Node node, FlowState state) { none() }
|
||||
|
||||
predicate isBarrierOut(Node node, FlowState state) { none() }
|
||||
|
||||
predicate isAdditionalFlowStep(Node node1, Node node2) {
|
||||
singleConfiguration() and
|
||||
any(Configuration config).isAdditionalFlowStep(node1, node2)
|
||||
|
||||
@@ -297,6 +297,10 @@ private module Config implements FullStateConfigSig {
|
||||
|
||||
predicate isBarrierOut(Node node) { any(Configuration config).isBarrierOut(node) }
|
||||
|
||||
predicate isBarrierIn(Node node, FlowState state) { none() }
|
||||
|
||||
predicate isBarrierOut(Node node, FlowState state) { none() }
|
||||
|
||||
predicate isAdditionalFlowStep(Node node1, Node node2) {
|
||||
singleConfiguration() and
|
||||
any(Configuration config).isAdditionalFlowStep(node1, node2)
|
||||
|
||||
@@ -297,6 +297,10 @@ private module Config implements FullStateConfigSig {
|
||||
|
||||
predicate isBarrierOut(Node node) { any(Configuration config).isBarrierOut(node) }
|
||||
|
||||
predicate isBarrierIn(Node node, FlowState state) { none() }
|
||||
|
||||
predicate isBarrierOut(Node node, FlowState state) { none() }
|
||||
|
||||
predicate isAdditionalFlowStep(Node node1, Node node2) {
|
||||
singleConfiguration() and
|
||||
any(Configuration config).isAdditionalFlowStep(node1, node2)
|
||||
|
||||
@@ -1002,12 +1002,3 @@ class ContentApprox = Unit;
|
||||
/** Gets an approximated value for content `c`. */
|
||||
pragma[inline]
|
||||
ContentApprox getContentApprox(Content c) { any() }
|
||||
|
||||
/**
|
||||
* Gets an additional term that is added to the `join` and `branch` computations to reflect
|
||||
* an additional forward or backwards branching factor that is not taken into account
|
||||
* when calculating the (virtual) dispatch cost.
|
||||
*
|
||||
* Argument `arg` is part of a path from a source to a sink, and `p` is the target parameter.
|
||||
*/
|
||||
int getAdditionalFlowIntoCallNodeTerm(ArgumentNode arg, ParameterNode p) { none() }
|
||||
|
||||
38
python/ql/lib/semmle/python/frameworks/BSon.qll
Normal file
38
python/ql/lib/semmle/python/frameworks/BSon.qll
Normal file
@@ -0,0 +1,38 @@
|
||||
/**
|
||||
* Provides classes modeling security-relevant aspects of the `bson` PyPI package.
|
||||
* See
|
||||
* - https://pypi.org/project/bson/
|
||||
* - https://github.com/py-bson/bson
|
||||
*/
|
||||
|
||||
private import python
|
||||
private import semmle.python.dataflow.new.DataFlow
|
||||
private import semmle.python.Concepts
|
||||
private import semmle.python.ApiGraphs
|
||||
|
||||
/**
|
||||
* Provides models for the `bson` PyPI package.
|
||||
* See
|
||||
* - https://pypi.org/project/bson/
|
||||
* - https://github.com/py-bson/bson
|
||||
*/
|
||||
private module BSon {
|
||||
/**
|
||||
* ObjectId returns a string representing an id.
|
||||
* If at any time ObjectId can't parse it's input (like when a tainted dict in passed in),
|
||||
* then ObjectId will throw an error preventing the query from running.
|
||||
*/
|
||||
private class BsonObjectIdCall extends DataFlow::CallCfgNode, NoSqlSanitizer::Range {
|
||||
BsonObjectIdCall() {
|
||||
exists(API::Node mod |
|
||||
mod = API::moduleImport("bson")
|
||||
or
|
||||
mod = API::moduleImport("bson").getMember(["objectid", "json_util"])
|
||||
|
|
||||
this = mod.getMember("ObjectId").getACall()
|
||||
)
|
||||
}
|
||||
|
||||
override DataFlow::Node getAnInput() { result = this.getArg(0) }
|
||||
}
|
||||
}
|
||||
299
python/ql/lib/semmle/python/frameworks/PyMongo.qll
Normal file
299
python/ql/lib/semmle/python/frameworks/PyMongo.qll
Normal file
@@ -0,0 +1,299 @@
|
||||
/**
|
||||
* Provides classes modeling security-relevant aspects of the PyMongo bindings.
|
||||
*/
|
||||
|
||||
private import python
|
||||
private import semmle.python.dataflow.new.DataFlow
|
||||
private import semmle.python.dataflow.new.TaintTracking
|
||||
private import semmle.python.dataflow.new.RemoteFlowSources
|
||||
private import semmle.python.Concepts
|
||||
private import semmle.python.ApiGraphs
|
||||
|
||||
private module PyMongo {
|
||||
// API Nodes returning `Mongo` instances.
|
||||
/** Gets a reference to `pymongo.MongoClient` */
|
||||
private API::Node pyMongo() {
|
||||
result = API::moduleImport("pymongo").getMember("MongoClient").getReturn()
|
||||
or
|
||||
// see https://pymongo.readthedocs.io/en/stable/api/pymongo/mongo_client.html#pymongo.mongo_client.MongoClient
|
||||
result =
|
||||
API::moduleImport("pymongo").getMember("mongo_client").getMember("MongoClient").getReturn()
|
||||
}
|
||||
|
||||
/** Gets a reference to `flask_pymongo.PyMongo` */
|
||||
private API::Node flask_PyMongo() {
|
||||
result = API::moduleImport("flask_pymongo").getMember("PyMongo").getReturn()
|
||||
}
|
||||
|
||||
/** Gets a reference to `mongoengine` */
|
||||
private API::Node mongoEngine() { result = API::moduleImport("mongoengine") }
|
||||
|
||||
/** Gets a reference to `flask_mongoengine.MongoEngine` */
|
||||
private API::Node flask_MongoEngine() {
|
||||
result = API::moduleImport("flask_mongoengine").getMember("MongoEngine").getReturn()
|
||||
}
|
||||
|
||||
/**
|
||||
* Gets a reference to an initialized `Mongo` instance.
|
||||
* See `pyMongo()`, `flask_PyMongo()`
|
||||
*/
|
||||
private API::Node mongoClientInstance() {
|
||||
result = pyMongo() or
|
||||
result = flask_PyMongo()
|
||||
}
|
||||
|
||||
/**
|
||||
* Gets a reference to a `Mongo` DB instance.
|
||||
*
|
||||
* ```py
|
||||
* from flask_pymongo import PyMongo
|
||||
* mongo = PyMongo(app)
|
||||
* mongo.db.user.find({'name': safe_search})
|
||||
* ```
|
||||
*
|
||||
* `mongo.db` would be a `Mongo` instance.
|
||||
*/
|
||||
private API::Node mongoDBInstance() {
|
||||
result = mongoClientInstance().getASubscript()
|
||||
or
|
||||
result = mongoClientInstance().getAMember()
|
||||
or
|
||||
result = mongoEngine().getMember(["get_db", "connect"]).getReturn()
|
||||
or
|
||||
result = mongoEngine().getMember("connection").getMember(["get_db", "connect"]).getReturn()
|
||||
or
|
||||
result = flask_MongoEngine().getMember("get_db").getReturn()
|
||||
or
|
||||
// see https://pymongo.readthedocs.io/en/stable/api/pymongo/mongo_client.html#pymongo.mongo_client.MongoClient.get_default_database
|
||||
// see https://pymongo.readthedocs.io/en/stable/api/pymongo/mongo_client.html#pymongo.mongo_client.MongoClient.get_database
|
||||
result = mongoClientInstance().getMember(["get_default_database", "get_database"]).getReturn()
|
||||
}
|
||||
|
||||
/**
|
||||
* Gets a reference to a `Mongo` collection.
|
||||
*
|
||||
* ```py
|
||||
* from flask_pymongo import PyMongo
|
||||
* mongo = PyMongo(app)
|
||||
* mongo.db.user.find({'name': safe_search})
|
||||
* ```
|
||||
*
|
||||
* `mongo.db.user` would be a `Mongo` collection.
|
||||
*/
|
||||
private API::Node mongoCollection() {
|
||||
result = mongoDBInstance().getASubscript()
|
||||
or
|
||||
result = mongoDBInstance().getAMember()
|
||||
or
|
||||
// see https://pymongo.readthedocs.io/en/stable/api/pymongo/database.html#pymongo.database.Database.get_collection
|
||||
// see https://pymongo.readthedocs.io/en/stable/api/pymongo/database.html#pymongo.database.Database.create_collection
|
||||
result = mongoDBInstance().getMember(["get_collection", "create_collection"]).getReturn()
|
||||
}
|
||||
|
||||
/** Gets the name of a find_* relevant `Mongo` collection-level operation method. */
|
||||
private string mongoCollectionMethodName() {
|
||||
result in [
|
||||
"find", "find_raw_batches", "find_one", "find_one_and_delete", "find_and_modify",
|
||||
"find_one_and_replace", "find_one_and_update", "find_one_or_404"
|
||||
]
|
||||
}
|
||||
|
||||
/**
|
||||
* Gets a reference to a `Mongo` collection method call
|
||||
*
|
||||
* ```py
|
||||
* from flask_pymongo import PyMongo
|
||||
* mongo = PyMongo(app)
|
||||
* mongo.db.user.find({'name': safe_search})
|
||||
* ```
|
||||
*
|
||||
* `mongo.db.user.find({'name': safe_search})` would be a collection method call.
|
||||
*/
|
||||
private class MongoCollectionCall extends API::CallNode, NoSqlExecution::Range {
|
||||
MongoCollectionCall() {
|
||||
this = mongoCollection().getMember(mongoCollectionMethodName()).getACall()
|
||||
}
|
||||
|
||||
/** Gets the argument that specifies the NoSQL query to be executed, as an API::node */
|
||||
pragma[inline]
|
||||
API::Node getQueryAsApiNode() {
|
||||
// 'filter' is allowed keyword in pymongo, see https://pymongo.readthedocs.io/en/stable/api/pymongo/collection.html#pymongo.collection.Collection.find
|
||||
result = this.getParameter(0, "filter")
|
||||
}
|
||||
|
||||
override DataFlow::Node getQuery() { result = this.getQueryAsApiNode().asSink() }
|
||||
|
||||
override predicate interpretsDict() { any() }
|
||||
|
||||
override predicate vulnerableToStrings() { none() }
|
||||
}
|
||||
|
||||
/**
|
||||
* See https://pymongo.readthedocs.io/en/stable/api/pymongo/collection.html#pymongo.collection.Collection.aggregate
|
||||
*/
|
||||
private class MongoCollectionAggregation extends API::CallNode, NoSqlExecution::Range {
|
||||
MongoCollectionAggregation() { this = mongoCollection().getMember("aggregate").getACall() }
|
||||
|
||||
override DataFlow::Node getQuery() {
|
||||
result = this.getParameter(0, "pipeline").getASubscript().asSink()
|
||||
}
|
||||
|
||||
override predicate interpretsDict() { any() }
|
||||
|
||||
override predicate vulnerableToStrings() { none() }
|
||||
}
|
||||
|
||||
private class MongoMapReduce extends API::CallNode, NoSqlExecution::Range {
|
||||
MongoMapReduce() { this = mongoCollection().getMember("map_reduce").getACall() }
|
||||
|
||||
override DataFlow::Node getQuery() { result in [this.getArg(0), this.getArg(1)] }
|
||||
|
||||
override predicate interpretsDict() { none() }
|
||||
|
||||
override predicate vulnerableToStrings() { any() }
|
||||
}
|
||||
|
||||
private class MongoMapReduceQuery extends API::CallNode, NoSqlExecution::Range {
|
||||
MongoMapReduceQuery() { this = mongoCollection().getMember("map_reduce").getACall() }
|
||||
|
||||
override DataFlow::Node getQuery() { result = this.getArgByName("query") }
|
||||
|
||||
override predicate interpretsDict() { any() }
|
||||
|
||||
override predicate vulnerableToStrings() { none() }
|
||||
}
|
||||
|
||||
/** The `$where` query operator executes a string as JavaScript. */
|
||||
private class WhereQueryOperator extends DataFlow::Node, Decoding::Range {
|
||||
DataFlow::Node query;
|
||||
|
||||
WhereQueryOperator() {
|
||||
exists(API::Node dictionary |
|
||||
dictionary = any(MongoCollectionCall c).getQueryAsApiNode() and
|
||||
query = dictionary.getSubscript("$where").asSink() and
|
||||
this = dictionary.getAValueReachingSink()
|
||||
)
|
||||
}
|
||||
|
||||
override DataFlow::Node getAnInput() { result = query }
|
||||
|
||||
override DataFlow::Node getOutput() { result = this }
|
||||
|
||||
override string getFormat() { result = "NoSQL" }
|
||||
|
||||
override predicate mayExecuteInput() { none() }
|
||||
}
|
||||
|
||||
/**
|
||||
* The `$function` query operator executes its `body` string as JavaScript.
|
||||
*
|
||||
* See https://www.mongodb.com/docs/manual/reference/operator/aggregation/function/#mongodb-expression-exp.-function
|
||||
*/
|
||||
private class FunctionQueryOperator extends DataFlow::Node, Decoding::Range {
|
||||
DataFlow::Node query;
|
||||
|
||||
FunctionQueryOperator() {
|
||||
exists(API::Node dictionary |
|
||||
dictionary =
|
||||
any(MongoCollectionCall c).getQueryAsApiNode().getASubscript*().getSubscript("$function") and
|
||||
query = dictionary.getSubscript("body").asSink() and
|
||||
this = dictionary.getAValueReachingSink()
|
||||
)
|
||||
}
|
||||
|
||||
override DataFlow::Node getAnInput() { result = query }
|
||||
|
||||
override DataFlow::Node getOutput() { result = this }
|
||||
|
||||
override string getFormat() { result = "NoSQL" }
|
||||
|
||||
override predicate mayExecuteInput() { none() }
|
||||
}
|
||||
|
||||
/**
|
||||
* The `$accumulator` query operator executes strings in some of its fields as JavaScript.
|
||||
*
|
||||
* See https://www.mongodb.com/docs/manual/reference/operator/aggregation/accumulator/#mongodb-group-grp.-accumulator
|
||||
*/
|
||||
private class AccumulatorQueryOperator extends DataFlow::Node, Decoding::Range {
|
||||
DataFlow::Node query;
|
||||
|
||||
AccumulatorQueryOperator() {
|
||||
exists(API::Node dictionary |
|
||||
dictionary =
|
||||
mongoCollection()
|
||||
.getMember("aggregate")
|
||||
.getACall()
|
||||
.getParameter(0)
|
||||
.getASubscript*()
|
||||
.getSubscript("$accumulator") and
|
||||
query = dictionary.getSubscript(["init", "accumulate", "merge", "finalize"]).asSink() and
|
||||
this = dictionary.getAValueReachingSink()
|
||||
)
|
||||
}
|
||||
|
||||
override DataFlow::Node getAnInput() { result = query }
|
||||
|
||||
override DataFlow::Node getOutput() { result = this }
|
||||
|
||||
override string getFormat() { result = "NoSQL" }
|
||||
|
||||
override predicate mayExecuteInput() { any() }
|
||||
}
|
||||
|
||||
/**
|
||||
* Gets a reference to a call from a class whose base is a reference to `mongoEngine()` or `flask_MongoEngine()`'s
|
||||
* `Document` or `EmbeddedDocument` objects and its attribute is `objects`.
|
||||
*
|
||||
* ```py
|
||||
* from flask_mongoengine import MongoEngine
|
||||
* db = MongoEngine(app)
|
||||
* class Movie(db.Document):
|
||||
* title = db.StringField(required=True)
|
||||
*
|
||||
* Movie.objects(__raw__=json_search)
|
||||
* ```
|
||||
*
|
||||
* `Movie.objects(__raw__=json_search)` would be the result.
|
||||
*/
|
||||
private class MongoEngineObjectsCall extends DataFlow::CallCfgNode, NoSqlExecution::Range {
|
||||
MongoEngineObjectsCall() {
|
||||
this =
|
||||
[mongoEngine(), flask_MongoEngine()]
|
||||
.getMember(["Document", "EmbeddedDocument"])
|
||||
.getASubclass()
|
||||
.getMember("objects")
|
||||
.getACall()
|
||||
}
|
||||
|
||||
override DataFlow::Node getQuery() { result = this.getArgByName(_) }
|
||||
|
||||
override predicate interpretsDict() { any() }
|
||||
|
||||
override predicate vulnerableToStrings() { none() }
|
||||
}
|
||||
|
||||
/** Gets a reference to `mongosanitizer.sanitizer.sanitize` */
|
||||
private class MongoSanitizerCall extends DataFlow::CallCfgNode, NoSqlSanitizer::Range {
|
||||
MongoSanitizerCall() {
|
||||
this =
|
||||
API::moduleImport("mongosanitizer").getMember("sanitizer").getMember("sanitize").getACall()
|
||||
}
|
||||
|
||||
override DataFlow::Node getAnInput() { result = this.getArg(0) }
|
||||
}
|
||||
|
||||
/**
|
||||
* An equality operator can protect against dictionary interpretation.
|
||||
* For instance, in `{'password': {"$eq": password} }`, if a dictionary is injected into
|
||||
* `password`, it will not match.
|
||||
*/
|
||||
private class EqualityOperator extends DataFlow::Node, NoSqlSanitizer::Range {
|
||||
EqualityOperator() {
|
||||
this =
|
||||
any(MongoCollectionCall c).getQueryAsApiNode().getASubscript*().getSubscript("$eq").asSink()
|
||||
}
|
||||
|
||||
override DataFlow::Node getAnInput() { result = this }
|
||||
}
|
||||
}
|
||||
@@ -131,7 +131,10 @@ private module RestFramework {
|
||||
"initial", "http_method_not_allowed", "permission_denied", "throttled",
|
||||
"get_authenticate_header", "perform_content_negotiation", "perform_authentication",
|
||||
"check_permissions", "check_object_permissions", "check_throttles", "determine_version",
|
||||
"initialize_request", "finalize_response", "dispatch", "options"
|
||||
"initialize_request", "finalize_response", "dispatch", "options",
|
||||
// ModelViewSet
|
||||
// https://github.com/encode/django-rest-framework/blob/master/rest_framework/viewsets.py
|
||||
"create", "retrieve", "update", "partial_update", "destroy", "list"
|
||||
]
|
||||
}
|
||||
}
|
||||
|
||||
@@ -223,14 +223,54 @@ module Impl implements RegexTreeViewSig {
|
||||
*/
|
||||
Location getLocation() { result = re.getLocation() }
|
||||
|
||||
/** Gets the accumulated length of string parts with lower index than `index`, if any. */
|
||||
private int getPartOffset(int index) {
|
||||
index = 0 and result = 0
|
||||
or
|
||||
index > 0 and
|
||||
exists(int previousOffset | previousOffset = this.getPartOffset(index - 1) |
|
||||
result =
|
||||
previousOffset + re.(StrConst).getImplicitlyConcatenatedPart(index - 1).getContentLength()
|
||||
)
|
||||
}
|
||||
|
||||
/**
|
||||
* Gets the `StringPart` in which this `RegExpTerm` resides, if any.
|
||||
* `localOffset` will be the offset of this `RegExpTerm` inside `result`.
|
||||
*/
|
||||
StringPart getPart(int localOffset) {
|
||||
exists(int index, int prefixLength | index = max(int i | this.getPartOffset(i) <= start) |
|
||||
result = re.(StrConst).getImplicitlyConcatenatedPart(index) and
|
||||
result.contextSize(prefixLength, _) and
|
||||
// Example:
|
||||
// re.compile('...' r"""...this..""")
|
||||
// - `start` is the offset from `(` to `this` as counted after concatenating all parts.
|
||||
// - we subtract the length of the previous `StringPart`s, `'...'`, to know how far into this `StringPart` we go.
|
||||
// - as the prefix 'r"""' is part of the `StringPart`, `this` is found that much further in.
|
||||
localOffset = start - this.getPartOffset(index) + prefixLength
|
||||
)
|
||||
}
|
||||
|
||||
/** Holds if this term is found at the specified location offsets. */
|
||||
predicate hasLocationInfo(
|
||||
string filepath, int startline, int startcolumn, int endline, int endcolumn
|
||||
) {
|
||||
exists(int re_start |
|
||||
re.getLocation().hasLocationInfo(filepath, startline, re_start, endline, _) and
|
||||
startcolumn = re_start + start + 4 and
|
||||
endcolumn = re_start + end + 3
|
||||
not exists(this.getPart(_)) and
|
||||
exists(int re_start, int prefix_len | prefix_len = re.getPrefix().length() |
|
||||
re.getLocation().hasLocationInfo(filepath, startline, re_start, _, _) and
|
||||
startcolumn = re_start + start + prefix_len and
|
||||
endline = startline and
|
||||
endcolumn = re_start + end + prefix_len - 1
|
||||
/* inclusive vs exclusive */
|
||||
)
|
||||
or
|
||||
exists(StringPart part, int localOffset, int partStartColumn |
|
||||
part = this.getPart(localOffset)
|
||||
|
|
||||
part.getLocation().hasLocationInfo(filepath, startline, partStartColumn, _, _) and
|
||||
startcolumn = partStartColumn + localOffset and
|
||||
endline = startline and
|
||||
endcolumn = (end - start) + startcolumn
|
||||
)
|
||||
}
|
||||
|
||||
|
||||
@@ -101,7 +101,7 @@ private module FindRegexMode {
|
||||
}
|
||||
|
||||
/**
|
||||
* DEPRECATED: Use `Regex` instead.
|
||||
* DEPRECATED: Use `RegExp` instead.
|
||||
*/
|
||||
deprecated class Regex = RegExp;
|
||||
|
||||
@@ -327,6 +327,17 @@ class RegExp extends Expr instanceof StrConst {
|
||||
/** Gets the text of this regex */
|
||||
string getText() { result = super.getText() }
|
||||
|
||||
/**
|
||||
* Gets the prefix of this regex
|
||||
*
|
||||
* Examples:
|
||||
*
|
||||
* - The prefix of `'x*y'` is `'`.
|
||||
* - The prefix of `r''` is `r'`.
|
||||
* - The prefix of `r"""x*y"""` is `r"""`.
|
||||
*/
|
||||
string getPrefix() { result = super.getPrefix() }
|
||||
|
||||
/** Gets the `i`th character of this regex */
|
||||
string getChar(int i) { result = this.getText().charAt(i) }
|
||||
|
||||
|
||||
@@ -0,0 +1,104 @@
|
||||
/**
|
||||
* Provides default sources, sinks and sanitizers for detecting
|
||||
* "NoSql injection"
|
||||
* vulnerabilities, as well as extension points for adding your own.
|
||||
*/
|
||||
|
||||
import python
|
||||
import semmle.python.dataflow.new.DataFlow
|
||||
import semmle.python.dataflow.new.RemoteFlowSources
|
||||
import semmle.python.Concepts
|
||||
|
||||
/**
|
||||
* Provides default sources, sinks and sanitizers for detecting
|
||||
* "NoSql injection"
|
||||
* vulnerabilities, as well as extension points for adding your own.
|
||||
*/
|
||||
module NoSqlInjection {
|
||||
private newtype TFlowState =
|
||||
TString() or
|
||||
TDict()
|
||||
|
||||
/** A flow state, tracking the structure of the data. */
|
||||
abstract class FlowState extends TFlowState {
|
||||
/** Gets a textual representation of this element. */
|
||||
abstract string toString();
|
||||
}
|
||||
|
||||
/** A state where the tracked data is only a string. */
|
||||
class String extends FlowState, TString {
|
||||
override string toString() { result = "String" }
|
||||
}
|
||||
|
||||
/**
|
||||
* A state where the tracked data has been converted to
|
||||
* a dictionary.
|
||||
*
|
||||
* We include cases where data represent JSON objects, so
|
||||
* it could actually still be just a string. It could
|
||||
* also contain query operators, or even JavaScript code.
|
||||
*/
|
||||
class Dict extends FlowState, TDict {
|
||||
override string toString() { result = "Dict" }
|
||||
}
|
||||
|
||||
/** A source allowing string inputs. */
|
||||
abstract class StringSource extends DataFlow::Node { }
|
||||
|
||||
/** A source of allowing dictionaries. */
|
||||
abstract class DictSource extends DataFlow::Node { }
|
||||
|
||||
/** A sink vulnerable to user controlled strings. */
|
||||
abstract class StringSink extends DataFlow::Node { }
|
||||
|
||||
/** A sink vulnerable to user controlled dictionaries. */
|
||||
abstract class DictSink extends DataFlow::Node { }
|
||||
|
||||
/** A data flow node where a string is converted into a dictionary. */
|
||||
abstract class StringToDictConversion extends DataFlow::Node {
|
||||
/** Gets the argument that specifies the string to be converted. */
|
||||
abstract DataFlow::Node getAnInput();
|
||||
|
||||
/** Gets the resulting dictionary. */
|
||||
abstract DataFlow::Node getOutput();
|
||||
}
|
||||
|
||||
/** A remote flow source considered a source of user controlled strings. */
|
||||
class RemoteFlowSourceAsStringSource extends RemoteFlowSource, StringSource { }
|
||||
|
||||
/** A NoSQL query that is vulnerable to user controlled strings. */
|
||||
class NoSqlExecutionAsStringSink extends StringSink {
|
||||
NoSqlExecutionAsStringSink() {
|
||||
exists(NoSqlExecution noSqlExecution | this = noSqlExecution.getQuery() |
|
||||
noSqlExecution.vulnerableToStrings()
|
||||
)
|
||||
}
|
||||
}
|
||||
|
||||
/** A NoSQL query that is vulnerable to user controlled dictionaries. */
|
||||
class NoSqlExecutionAsDictSink extends DictSink {
|
||||
NoSqlExecutionAsDictSink() {
|
||||
exists(NoSqlExecution noSqlExecution | this = noSqlExecution.getQuery() |
|
||||
noSqlExecution.interpretsDict()
|
||||
)
|
||||
}
|
||||
}
|
||||
|
||||
/** A JSON decoding converts a string to a dictionary. */
|
||||
class JsonDecoding extends Decoding, StringToDictConversion {
|
||||
JsonDecoding() { this.getFormat() = "JSON" }
|
||||
|
||||
override DataFlow::Node getAnInput() { result = Decoding.super.getAnInput() }
|
||||
|
||||
override DataFlow::Node getOutput() { result = Decoding.super.getOutput() }
|
||||
}
|
||||
|
||||
/** A NoSQL decoding interprets a string as a dictionary. */
|
||||
class NoSqlDecoding extends Decoding, StringToDictConversion {
|
||||
NoSqlDecoding() { this.getFormat() = "NoSQL" }
|
||||
|
||||
override DataFlow::Node getAnInput() { result = Decoding.super.getAnInput() }
|
||||
|
||||
override DataFlow::Node getOutput() { result = Decoding.super.getOutput() }
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,61 @@
|
||||
/**
|
||||
* Provides a taint-tracking configuration for detecting NoSQL injection vulnerabilities
|
||||
*/
|
||||
|
||||
import python
|
||||
import semmle.python.dataflow.new.DataFlow
|
||||
import semmle.python.dataflow.new.TaintTracking
|
||||
import semmle.python.Concepts
|
||||
private import NoSqlInjectionCustomizations::NoSqlInjection as C
|
||||
|
||||
/**
|
||||
* A taint-tracking configuration for detecting NoSQL injection vulnerabilities.
|
||||
*/
|
||||
module NoSqlInjectionConfig implements DataFlow::StateConfigSig {
|
||||
class FlowState = C::FlowState;
|
||||
|
||||
predicate isSource(DataFlow::Node source, FlowState state) {
|
||||
source instanceof C::StringSource and
|
||||
state instanceof C::String
|
||||
or
|
||||
source instanceof C::DictSource and
|
||||
state instanceof C::Dict
|
||||
}
|
||||
|
||||
predicate isSink(DataFlow::Node sink, FlowState state) {
|
||||
sink instanceof C::StringSink and
|
||||
(
|
||||
state instanceof C::String
|
||||
or
|
||||
// since Dicts can include strings,
|
||||
// e.g. JSON objects can encode strings.
|
||||
state instanceof C::Dict
|
||||
)
|
||||
or
|
||||
sink instanceof C::DictSink and
|
||||
state instanceof C::Dict
|
||||
}
|
||||
|
||||
predicate isBarrier(DataFlow::Node node, FlowState state) {
|
||||
// Block `String` paths here, since they change state to `Dict`
|
||||
exists(C::StringToDictConversion c | node = c.getOutput()) and
|
||||
state instanceof C::String
|
||||
}
|
||||
|
||||
predicate isAdditionalFlowStep(
|
||||
DataFlow::Node nodeFrom, FlowState stateFrom, DataFlow::Node nodeTo, FlowState stateTo
|
||||
) {
|
||||
exists(C::StringToDictConversion c |
|
||||
nodeFrom = c.getAnInput() and
|
||||
nodeTo = c.getOutput()
|
||||
) and
|
||||
stateFrom instanceof C::String and
|
||||
stateTo instanceof C::Dict
|
||||
}
|
||||
|
||||
predicate isBarrier(DataFlow::Node node) {
|
||||
node = any(NoSqlSanitizer noSqlSanitizer).getAnInput()
|
||||
}
|
||||
}
|
||||
|
||||
module NoSqlInjectionFlow = TaintTracking::GlobalWithState<NoSqlInjectionConfig>;
|
||||
Reference in New Issue
Block a user