Merge branch 'main' into jb1/16-cryptography-models-libraries-and-queries-migration

This commit is contained in:
Josh Brown
2023-10-04 07:24:29 +11:00
committed by GitHub
738 changed files with 80490 additions and 82021 deletions

View File

@@ -0,0 +1,4 @@
---
category: fix
---
* Subterms of regular expressions encoded as single-line string literals now have better source-location information.

View File

@@ -0,0 +1,4 @@
---
category: minorAnalysis
---
* Regular expression fragments residing inside implicitly concatenated strings now have better location information.

View File

@@ -0,0 +1,4 @@
---
category: minorAnalysis
---
* Django Rest Framework better handles custom `ModelViewSet` classes functions

View File

@@ -154,6 +154,28 @@ class StringPart extends StringPart_, AstNode {
override string toString() { result = StringPart_.super.toString() }
override Location getLocation() { result = StringPart_.super.getLocation() }
/**
* Holds if the content of string `StringPart` is surrounded by
* a prefix (including a quote) of length `prefixLength` and
* a quote of length `quoteLength`.
*/
predicate contextSize(int prefixLength, int quoteLength) {
exists(int occurrenceOffset |
quoteLength = this.getText().regexpFind("\"{3}|\"{1}|'{3}|'{1}", 0, occurrenceOffset).length() and
prefixLength = occurrenceOffset + quoteLength
)
}
/**
* Gets the length of the content, that is the text between the prefix and the quote.
* See `context` for obtaining the prefix and the quote.
*/
int getContentLength() {
exists(int prefixLength, int quoteLength | this.contextSize(prefixLength, quoteLength) |
result = this.getText().length() - prefixLength - quoteLength
)
}
}
class StringPartList extends StringPartList_ { }

View File

@@ -378,6 +378,68 @@ module SqlExecution {
}
}
/** Provides a class for modeling NoSQL execution APIs. */
module NoSqlExecution {
/**
* A data-flow node that executes NoSQL queries.
*
* Extend this class to model new APIs. If you want to refine existing API models,
* extend `NoSqlExecution` instead.
*/
abstract class Range extends DataFlow::Node {
/** Gets the argument that specifies the NoSQL query to be executed. */
abstract DataFlow::Node getQuery();
/** Holds if this query will unpack/interpret a dictionary */
abstract predicate interpretsDict();
/** Holds if this query can be dangerous when run on a user-controlled string */
abstract predicate vulnerableToStrings();
}
}
/**
* A data-flow node that executes NoSQL queries.
*
* Extend this class to refine existing API models. If you want to model new APIs,
* extend `NoSqlExecution::Range` instead.
*/
class NoSqlExecution extends DataFlow::Node instanceof NoSqlExecution::Range {
/** Gets the argument that specifies the NoSQL query to be executed. */
DataFlow::Node getQuery() { result = super.getQuery() }
/** Holds if this query will unpack/interpret a dictionary */
predicate interpretsDict() { super.interpretsDict() }
/** Holds if this query can be dangerous when run on a user-controlled string */
predicate vulnerableToStrings() { super.vulnerableToStrings() }
}
/** Provides classes for modeling NoSql sanitization-related APIs. */
module NoSqlSanitizer {
/**
* A data-flow node that collects functions sanitizing NoSQL queries.
*
* Extend this class to model new APIs. If you want to refine existing API models,
* extend `NoSQLSanitizer` instead.
*/
abstract class Range extends DataFlow::Node {
/** Gets the argument that specifies the NoSql query to be sanitized. */
abstract DataFlow::Node getAnInput();
}
}
/**
* A data-flow node that collects functions sanitizing NoSQL queries.
*
* Extend this class to model new APIs. If you want to refine existing API models,
* extend `NoSQLSanitizer::Range` instead.
*/
class NoSqlSanitizer extends DataFlow::Node instanceof NoSqlSanitizer::Range {
/** Gets the argument that specifies the NoSql query to be sanitized. */
DataFlow::Node getAnInput() { result = super.getAnInput() }
}
/**
* A data-flow node that executes a regular expression.
*

View File

@@ -1,7 +1,32 @@
/** Provides classes for working with files and folders. */
import python
private import codeql.util.FileSystem
private module Input implements InputSig {
abstract class ContainerBase extends @container {
abstract string getAbsolutePath();
ContainerBase getParentContainer() { containerparent(result, this) }
string toString() { result = this.getAbsolutePath() }
}
class FolderBase extends ContainerBase, @folder {
override string getAbsolutePath() { folders(this, result) }
}
class FileBase extends ContainerBase, @file {
override string getAbsolutePath() { files(this, result) }
}
predicate hasSourceLocationPrefix = sourceLocationPrefix/1;
}
private module Impl = Make<Input>;
/** A file */
class File extends Container, @file {
class File extends Container, Impl::File {
/**
* Holds if this element is at the specified location.
* The location spans column `startcolumn` of line `startline` to
@@ -45,11 +70,6 @@ class File extends Container, @file {
)
}
override string getAbsolutePath() { files(this, result) }
/** Gets the URL of this file. */
override string getURL() { result = "file://" + this.getAbsolutePath() + ":0:0:0:0" }
override Container getImportRoot(int n) {
/* File stem must be a legal Python identifier */
this.getStem().regexpMatch("[^\\d\\W]\\w*") and
@@ -108,7 +128,7 @@ private predicate occupied_line(File f, int n) {
}
/** A folder (directory) */
class Folder extends Container, @folder {
class Folder extends Container, Impl::Folder {
/**
* Holds if this element is at the specified location.
* The location spans column `startcolumn` of line `startline` to
@@ -126,11 +146,6 @@ class Folder extends Container, @folder {
endcolumn = 0
}
override string getAbsolutePath() { folders(this, result) }
/** Gets the URL of this folder. */
override string getURL() { result = "folder://" + this.getAbsolutePath() }
override Container getImportRoot(int n) {
this.isImportRoot(n) and result = this
or
@@ -144,34 +159,8 @@ class Folder extends Container, @folder {
* A container is an abstract representation of a file system object that can
* hold elements of interest.
*/
abstract class Container extends @container {
Container getParent() { containerparent(result, this) }
/**
* Gets a textual representation of the path of this container.
*
* This is the absolute path of the container.
*/
string toString() { result = this.getAbsolutePath() }
/**
* Gets the relative path of this file or folder from the root folder of the
* analyzed source location. The relative path of the root folder itself is
* the empty string.
*
* This has no result if the container is outside the source root, that is,
* if the root folder is not a reflexive, transitive parent of this container.
*/
string getRelativePath() {
exists(string absPath, string pref |
absPath = this.getAbsolutePath() and sourceLocationPrefix(pref)
|
absPath = pref and result = ""
or
absPath = pref.regexpReplaceAll("/$", "") + "/" + result and
not result.matches("/%")
)
}
class Container extends Impl::Container {
Container getParent() { result = this.getParentContainer() }
/** Whether this file or folder is part of the standard library */
predicate inStdlib() { this.inStdlib(_, _) }
@@ -187,135 +176,13 @@ abstract class Container extends @container {
)
}
/* Standard cross-language API */
/** Gets a file or sub-folder in this container. */
Container getAChildContainer() { containerparent(this, result) }
/** Gets a file in this container. */
File getAFile() { result = this.getAChildContainer() }
/** Gets a sub-folder in this container. */
Folder getAFolder() { result = this.getAChildContainer() }
/**
* Gets the absolute, canonical path of this container, using forward slashes
* as path separator.
*
* The path starts with a _root prefix_ followed by zero or more _path
* segments_ separated by forward slashes.
*
* The root prefix is of one of the following forms:
*
* 1. A single forward slash `/` (Unix-style)
* 2. An upper-case drive letter followed by a colon and a forward slash,
* such as `C:/` (Windows-style)
* 3. Two forward slashes, a computer name, and then another forward slash,
* such as `//FileServer/` (UNC-style)
*
* Path segments are never empty (that is, absolute paths never contain two
* contiguous slashes, except as part of a UNC-style root prefix). Also, path
* segments never contain forward slashes, and no path segment is of the
* form `.` (one dot) or `..` (two dots).
*
* Note that an absolute path never ends with a forward slash, except if it is
* a bare root prefix, that is, the path has no path segments. A container
* whose absolute path has no segments is always a `Folder`, not a `File`.
*/
abstract string getAbsolutePath();
/**
* Gets the base name of this container including extension, that is, the last
* segment of its absolute path, or the empty string if it has no segments.
*
* Here are some examples of absolute paths and the corresponding base names
* (surrounded with quotes to avoid ambiguity):
*
* <table border="1">
* <tr><th>Absolute path</th><th>Base name</th></tr>
* <tr><td>"/tmp/tst.py"</td><td>"tst.py"</td></tr>
* <tr><td>"C:/Program Files (x86)"</td><td>"Program Files (x86)"</td></tr>
* <tr><td>"/"</td><td>""</td></tr>
* <tr><td>"C:/"</td><td>""</td></tr>
* <tr><td>"D:/"</td><td>""</td></tr>
* <tr><td>"//FileServer/"</td><td>""</td></tr>
* </table>
*/
string getBaseName() {
result = this.getAbsolutePath().regexpCapture(".*/(([^/]*?)(?:\\.([^.]*))?)", 1)
}
/**
* Gets the extension of this container, that is, the suffix of its base name
* after the last dot character, if any.
*
* In particular,
*
* - if the name does not include a dot, there is no extension, so this
* predicate has no result;
* - if the name ends in a dot, the extension is the empty string;
* - if the name contains multiple dots, the extension follows the last dot.
*
* Here are some examples of absolute paths and the corresponding extensions
* (surrounded with quotes to avoid ambiguity):
*
* <table border="1">
* <tr><th>Absolute path</th><th>Extension</th></tr>
* <tr><td>"/tmp/tst.py"</td><td>"py"</td></tr>
* <tr><td>"/tmp/.gitignore"</td><td>"gitignore"</td></tr>
* <tr><td>"/bin/bash"</td><td>not defined</td></tr>
* <tr><td>"/tmp/tst2."</td><td>""</td></tr>
* <tr><td>"/tmp/x.tar.gz"</td><td>"gz"</td></tr>
* </table>
*/
string getExtension() {
result = this.getAbsolutePath().regexpCapture(".*/([^/]*?)(\\.([^.]*))?", 3)
}
/**
* Gets the stem of this container, that is, the prefix of its base name up to
* (but not including) the last dot character if there is one, or the entire
* base name if there is not.
*
* Here are some examples of absolute paths and the corresponding stems
* (surrounded with quotes to avoid ambiguity):
*
* <table border="1">
* <tr><th>Absolute path</th><th>Stem</th></tr>
* <tr><td>"/tmp/tst.py"</td><td>"tst"</td></tr>
* <tr><td>"/tmp/.gitignore"</td><td>""</td></tr>
* <tr><td>"/bin/bash"</td><td>"bash"</td></tr>
* <tr><td>"/tmp/tst2."</td><td>"tst2"</td></tr>
* <tr><td>"/tmp/x.tar.gz"</td><td>"x.tar"</td></tr>
* </table>
*/
string getStem() {
result = this.getAbsolutePath().regexpCapture(".*/([^/]*?)(?:\\.([^.]*))?", 1)
}
File getFile(string baseName) {
result = this.getAFile() and
result.getBaseName() = baseName
}
Folder getFolder(string baseName) {
result = this.getAFolder() and
result.getBaseName() = baseName
}
Container getParentContainer() { this = result.getAChildContainer() }
override Container getParentContainer() { result = super.getParentContainer() }
Container getChildContainer(string baseName) {
result = this.getAChildContainer() and
result.getBaseName() = baseName
}
/**
* Gets a URL representing the location of this container.
*
* For more information see [Providing URLs](https://codeql.github.com/docs/writing-codeql-queries/providing-locations-in-codeql-queries/#providing-urls).
*/
abstract string getURL();
/** Holds if this folder is on the import path. */
predicate isImportRoot() { this.isImportRoot(_) }

View File

@@ -10,6 +10,7 @@ private import semmle.python.frameworks.Aiomysql
private import semmle.python.frameworks.Aiosqlite
private import semmle.python.frameworks.Aiopg
private import semmle.python.frameworks.Asyncpg
private import semmle.python.frameworks.BSon
private import semmle.python.frameworks.CassandraDriver
private import semmle.python.frameworks.ClickhouseDriver
private import semmle.python.frameworks.Cryptodome
@@ -42,6 +43,7 @@ private import semmle.python.frameworks.Phoenixdb
private import semmle.python.frameworks.Psycopg2
private import semmle.python.frameworks.Pycurl
private import semmle.python.frameworks.Pydantic
private import semmle.python.frameworks.PyMongo
private import semmle.python.frameworks.Pymssql
private import semmle.python.frameworks.PyMySQL
private import semmle.python.frameworks.Pyodbc

View File

@@ -1639,13 +1639,3 @@ private module OutNodes {
* `kind`.
*/
OutNode getAnOutNode(DataFlowCall call, ReturnKind kind) { call = result.getCall(kind) }
/**
* Holds if flow from `call`'s argument `arg` to parameter `p` is permissible.
*
* This is a temporary hook to support technical debt in the Go language; do not use.
*/
pragma[inline]
predicate golangSpecificParamArgFilter(DataFlowCall call, ParameterNode p, ArgumentNode arg) {
any()
}

View File

@@ -297,6 +297,10 @@ private module Config implements FullStateConfigSig {
predicate isBarrierOut(Node node) { any(Configuration config).isBarrierOut(node) }
predicate isBarrierIn(Node node, FlowState state) { none() }
predicate isBarrierOut(Node node, FlowState state) { none() }
predicate isAdditionalFlowStep(Node node1, Node node2) {
singleConfiguration() and
any(Configuration config).isAdditionalFlowStep(node1, node2)

View File

@@ -297,6 +297,10 @@ private module Config implements FullStateConfigSig {
predicate isBarrierOut(Node node) { any(Configuration config).isBarrierOut(node) }
predicate isBarrierIn(Node node, FlowState state) { none() }
predicate isBarrierOut(Node node, FlowState state) { none() }
predicate isAdditionalFlowStep(Node node1, Node node2) {
singleConfiguration() and
any(Configuration config).isAdditionalFlowStep(node1, node2)

View File

@@ -297,6 +297,10 @@ private module Config implements FullStateConfigSig {
predicate isBarrierOut(Node node) { any(Configuration config).isBarrierOut(node) }
predicate isBarrierIn(Node node, FlowState state) { none() }
predicate isBarrierOut(Node node, FlowState state) { none() }
predicate isAdditionalFlowStep(Node node1, Node node2) {
singleConfiguration() and
any(Configuration config).isAdditionalFlowStep(node1, node2)

View File

@@ -297,6 +297,10 @@ private module Config implements FullStateConfigSig {
predicate isBarrierOut(Node node) { any(Configuration config).isBarrierOut(node) }
predicate isBarrierIn(Node node, FlowState state) { none() }
predicate isBarrierOut(Node node, FlowState state) { none() }
predicate isAdditionalFlowStep(Node node1, Node node2) {
singleConfiguration() and
any(Configuration config).isAdditionalFlowStep(node1, node2)

View File

@@ -1002,12 +1002,3 @@ class ContentApprox = Unit;
/** Gets an approximated value for content `c`. */
pragma[inline]
ContentApprox getContentApprox(Content c) { any() }
/**
* Gets an additional term that is added to the `join` and `branch` computations to reflect
* an additional forward or backwards branching factor that is not taken into account
* when calculating the (virtual) dispatch cost.
*
* Argument `arg` is part of a path from a source to a sink, and `p` is the target parameter.
*/
int getAdditionalFlowIntoCallNodeTerm(ArgumentNode arg, ParameterNode p) { none() }

View File

@@ -0,0 +1,38 @@
/**
* Provides classes modeling security-relevant aspects of the `bson` PyPI package.
* See
* - https://pypi.org/project/bson/
* - https://github.com/py-bson/bson
*/
private import python
private import semmle.python.dataflow.new.DataFlow
private import semmle.python.Concepts
private import semmle.python.ApiGraphs
/**
* Provides models for the `bson` PyPI package.
* See
* - https://pypi.org/project/bson/
* - https://github.com/py-bson/bson
*/
private module BSon {
/**
* ObjectId returns a string representing an id.
* If at any time ObjectId can't parse it's input (like when a tainted dict in passed in),
* then ObjectId will throw an error preventing the query from running.
*/
private class BsonObjectIdCall extends DataFlow::CallCfgNode, NoSqlSanitizer::Range {
BsonObjectIdCall() {
exists(API::Node mod |
mod = API::moduleImport("bson")
or
mod = API::moduleImport("bson").getMember(["objectid", "json_util"])
|
this = mod.getMember("ObjectId").getACall()
)
}
override DataFlow::Node getAnInput() { result = this.getArg(0) }
}
}

View File

@@ -0,0 +1,299 @@
/**
* Provides classes modeling security-relevant aspects of the PyMongo bindings.
*/
private import python
private import semmle.python.dataflow.new.DataFlow
private import semmle.python.dataflow.new.TaintTracking
private import semmle.python.dataflow.new.RemoteFlowSources
private import semmle.python.Concepts
private import semmle.python.ApiGraphs
private module PyMongo {
// API Nodes returning `Mongo` instances.
/** Gets a reference to `pymongo.MongoClient` */
private API::Node pyMongo() {
result = API::moduleImport("pymongo").getMember("MongoClient").getReturn()
or
// see https://pymongo.readthedocs.io/en/stable/api/pymongo/mongo_client.html#pymongo.mongo_client.MongoClient
result =
API::moduleImport("pymongo").getMember("mongo_client").getMember("MongoClient").getReturn()
}
/** Gets a reference to `flask_pymongo.PyMongo` */
private API::Node flask_PyMongo() {
result = API::moduleImport("flask_pymongo").getMember("PyMongo").getReturn()
}
/** Gets a reference to `mongoengine` */
private API::Node mongoEngine() { result = API::moduleImport("mongoengine") }
/** Gets a reference to `flask_mongoengine.MongoEngine` */
private API::Node flask_MongoEngine() {
result = API::moduleImport("flask_mongoengine").getMember("MongoEngine").getReturn()
}
/**
* Gets a reference to an initialized `Mongo` instance.
* See `pyMongo()`, `flask_PyMongo()`
*/
private API::Node mongoClientInstance() {
result = pyMongo() or
result = flask_PyMongo()
}
/**
* Gets a reference to a `Mongo` DB instance.
*
* ```py
* from flask_pymongo import PyMongo
* mongo = PyMongo(app)
* mongo.db.user.find({'name': safe_search})
* ```
*
* `mongo.db` would be a `Mongo` instance.
*/
private API::Node mongoDBInstance() {
result = mongoClientInstance().getASubscript()
or
result = mongoClientInstance().getAMember()
or
result = mongoEngine().getMember(["get_db", "connect"]).getReturn()
or
result = mongoEngine().getMember("connection").getMember(["get_db", "connect"]).getReturn()
or
result = flask_MongoEngine().getMember("get_db").getReturn()
or
// see https://pymongo.readthedocs.io/en/stable/api/pymongo/mongo_client.html#pymongo.mongo_client.MongoClient.get_default_database
// see https://pymongo.readthedocs.io/en/stable/api/pymongo/mongo_client.html#pymongo.mongo_client.MongoClient.get_database
result = mongoClientInstance().getMember(["get_default_database", "get_database"]).getReturn()
}
/**
* Gets a reference to a `Mongo` collection.
*
* ```py
* from flask_pymongo import PyMongo
* mongo = PyMongo(app)
* mongo.db.user.find({'name': safe_search})
* ```
*
* `mongo.db.user` would be a `Mongo` collection.
*/
private API::Node mongoCollection() {
result = mongoDBInstance().getASubscript()
or
result = mongoDBInstance().getAMember()
or
// see https://pymongo.readthedocs.io/en/stable/api/pymongo/database.html#pymongo.database.Database.get_collection
// see https://pymongo.readthedocs.io/en/stable/api/pymongo/database.html#pymongo.database.Database.create_collection
result = mongoDBInstance().getMember(["get_collection", "create_collection"]).getReturn()
}
/** Gets the name of a find_* relevant `Mongo` collection-level operation method. */
private string mongoCollectionMethodName() {
result in [
"find", "find_raw_batches", "find_one", "find_one_and_delete", "find_and_modify",
"find_one_and_replace", "find_one_and_update", "find_one_or_404"
]
}
/**
* Gets a reference to a `Mongo` collection method call
*
* ```py
* from flask_pymongo import PyMongo
* mongo = PyMongo(app)
* mongo.db.user.find({'name': safe_search})
* ```
*
* `mongo.db.user.find({'name': safe_search})` would be a collection method call.
*/
private class MongoCollectionCall extends API::CallNode, NoSqlExecution::Range {
MongoCollectionCall() {
this = mongoCollection().getMember(mongoCollectionMethodName()).getACall()
}
/** Gets the argument that specifies the NoSQL query to be executed, as an API::node */
pragma[inline]
API::Node getQueryAsApiNode() {
// 'filter' is allowed keyword in pymongo, see https://pymongo.readthedocs.io/en/stable/api/pymongo/collection.html#pymongo.collection.Collection.find
result = this.getParameter(0, "filter")
}
override DataFlow::Node getQuery() { result = this.getQueryAsApiNode().asSink() }
override predicate interpretsDict() { any() }
override predicate vulnerableToStrings() { none() }
}
/**
* See https://pymongo.readthedocs.io/en/stable/api/pymongo/collection.html#pymongo.collection.Collection.aggregate
*/
private class MongoCollectionAggregation extends API::CallNode, NoSqlExecution::Range {
MongoCollectionAggregation() { this = mongoCollection().getMember("aggregate").getACall() }
override DataFlow::Node getQuery() {
result = this.getParameter(0, "pipeline").getASubscript().asSink()
}
override predicate interpretsDict() { any() }
override predicate vulnerableToStrings() { none() }
}
private class MongoMapReduce extends API::CallNode, NoSqlExecution::Range {
MongoMapReduce() { this = mongoCollection().getMember("map_reduce").getACall() }
override DataFlow::Node getQuery() { result in [this.getArg(0), this.getArg(1)] }
override predicate interpretsDict() { none() }
override predicate vulnerableToStrings() { any() }
}
private class MongoMapReduceQuery extends API::CallNode, NoSqlExecution::Range {
MongoMapReduceQuery() { this = mongoCollection().getMember("map_reduce").getACall() }
override DataFlow::Node getQuery() { result = this.getArgByName("query") }
override predicate interpretsDict() { any() }
override predicate vulnerableToStrings() { none() }
}
/** The `$where` query operator executes a string as JavaScript. */
private class WhereQueryOperator extends DataFlow::Node, Decoding::Range {
DataFlow::Node query;
WhereQueryOperator() {
exists(API::Node dictionary |
dictionary = any(MongoCollectionCall c).getQueryAsApiNode() and
query = dictionary.getSubscript("$where").asSink() and
this = dictionary.getAValueReachingSink()
)
}
override DataFlow::Node getAnInput() { result = query }
override DataFlow::Node getOutput() { result = this }
override string getFormat() { result = "NoSQL" }
override predicate mayExecuteInput() { none() }
}
/**
* The `$function` query operator executes its `body` string as JavaScript.
*
* See https://www.mongodb.com/docs/manual/reference/operator/aggregation/function/#mongodb-expression-exp.-function
*/
private class FunctionQueryOperator extends DataFlow::Node, Decoding::Range {
DataFlow::Node query;
FunctionQueryOperator() {
exists(API::Node dictionary |
dictionary =
any(MongoCollectionCall c).getQueryAsApiNode().getASubscript*().getSubscript("$function") and
query = dictionary.getSubscript("body").asSink() and
this = dictionary.getAValueReachingSink()
)
}
override DataFlow::Node getAnInput() { result = query }
override DataFlow::Node getOutput() { result = this }
override string getFormat() { result = "NoSQL" }
override predicate mayExecuteInput() { none() }
}
/**
* The `$accumulator` query operator executes strings in some of its fields as JavaScript.
*
* See https://www.mongodb.com/docs/manual/reference/operator/aggregation/accumulator/#mongodb-group-grp.-accumulator
*/
private class AccumulatorQueryOperator extends DataFlow::Node, Decoding::Range {
DataFlow::Node query;
AccumulatorQueryOperator() {
exists(API::Node dictionary |
dictionary =
mongoCollection()
.getMember("aggregate")
.getACall()
.getParameter(0)
.getASubscript*()
.getSubscript("$accumulator") and
query = dictionary.getSubscript(["init", "accumulate", "merge", "finalize"]).asSink() and
this = dictionary.getAValueReachingSink()
)
}
override DataFlow::Node getAnInput() { result = query }
override DataFlow::Node getOutput() { result = this }
override string getFormat() { result = "NoSQL" }
override predicate mayExecuteInput() { any() }
}
/**
* Gets a reference to a call from a class whose base is a reference to `mongoEngine()` or `flask_MongoEngine()`'s
* `Document` or `EmbeddedDocument` objects and its attribute is `objects`.
*
* ```py
* from flask_mongoengine import MongoEngine
* db = MongoEngine(app)
* class Movie(db.Document):
* title = db.StringField(required=True)
*
* Movie.objects(__raw__=json_search)
* ```
*
* `Movie.objects(__raw__=json_search)` would be the result.
*/
private class MongoEngineObjectsCall extends DataFlow::CallCfgNode, NoSqlExecution::Range {
MongoEngineObjectsCall() {
this =
[mongoEngine(), flask_MongoEngine()]
.getMember(["Document", "EmbeddedDocument"])
.getASubclass()
.getMember("objects")
.getACall()
}
override DataFlow::Node getQuery() { result = this.getArgByName(_) }
override predicate interpretsDict() { any() }
override predicate vulnerableToStrings() { none() }
}
/** Gets a reference to `mongosanitizer.sanitizer.sanitize` */
private class MongoSanitizerCall extends DataFlow::CallCfgNode, NoSqlSanitizer::Range {
MongoSanitizerCall() {
this =
API::moduleImport("mongosanitizer").getMember("sanitizer").getMember("sanitize").getACall()
}
override DataFlow::Node getAnInput() { result = this.getArg(0) }
}
/**
* An equality operator can protect against dictionary interpretation.
* For instance, in `{'password': {"$eq": password} }`, if a dictionary is injected into
* `password`, it will not match.
*/
private class EqualityOperator extends DataFlow::Node, NoSqlSanitizer::Range {
EqualityOperator() {
this =
any(MongoCollectionCall c).getQueryAsApiNode().getASubscript*().getSubscript("$eq").asSink()
}
override DataFlow::Node getAnInput() { result = this }
}
}

View File

@@ -131,7 +131,10 @@ private module RestFramework {
"initial", "http_method_not_allowed", "permission_denied", "throttled",
"get_authenticate_header", "perform_content_negotiation", "perform_authentication",
"check_permissions", "check_object_permissions", "check_throttles", "determine_version",
"initialize_request", "finalize_response", "dispatch", "options"
"initialize_request", "finalize_response", "dispatch", "options",
// ModelViewSet
// https://github.com/encode/django-rest-framework/blob/master/rest_framework/viewsets.py
"create", "retrieve", "update", "partial_update", "destroy", "list"
]
}
}

View File

@@ -223,14 +223,54 @@ module Impl implements RegexTreeViewSig {
*/
Location getLocation() { result = re.getLocation() }
/** Gets the accumulated length of string parts with lower index than `index`, if any. */
private int getPartOffset(int index) {
index = 0 and result = 0
or
index > 0 and
exists(int previousOffset | previousOffset = this.getPartOffset(index - 1) |
result =
previousOffset + re.(StrConst).getImplicitlyConcatenatedPart(index - 1).getContentLength()
)
}
/**
* Gets the `StringPart` in which this `RegExpTerm` resides, if any.
* `localOffset` will be the offset of this `RegExpTerm` inside `result`.
*/
StringPart getPart(int localOffset) {
exists(int index, int prefixLength | index = max(int i | this.getPartOffset(i) <= start) |
result = re.(StrConst).getImplicitlyConcatenatedPart(index) and
result.contextSize(prefixLength, _) and
// Example:
// re.compile('...' r"""...this..""")
// - `start` is the offset from `(` to `this` as counted after concatenating all parts.
// - we subtract the length of the previous `StringPart`s, `'...'`, to know how far into this `StringPart` we go.
// - as the prefix 'r"""' is part of the `StringPart`, `this` is found that much further in.
localOffset = start - this.getPartOffset(index) + prefixLength
)
}
/** Holds if this term is found at the specified location offsets. */
predicate hasLocationInfo(
string filepath, int startline, int startcolumn, int endline, int endcolumn
) {
exists(int re_start |
re.getLocation().hasLocationInfo(filepath, startline, re_start, endline, _) and
startcolumn = re_start + start + 4 and
endcolumn = re_start + end + 3
not exists(this.getPart(_)) and
exists(int re_start, int prefix_len | prefix_len = re.getPrefix().length() |
re.getLocation().hasLocationInfo(filepath, startline, re_start, _, _) and
startcolumn = re_start + start + prefix_len and
endline = startline and
endcolumn = re_start + end + prefix_len - 1
/* inclusive vs exclusive */
)
or
exists(StringPart part, int localOffset, int partStartColumn |
part = this.getPart(localOffset)
|
part.getLocation().hasLocationInfo(filepath, startline, partStartColumn, _, _) and
startcolumn = partStartColumn + localOffset and
endline = startline and
endcolumn = (end - start) + startcolumn
)
}

View File

@@ -101,7 +101,7 @@ private module FindRegexMode {
}
/**
* DEPRECATED: Use `Regex` instead.
* DEPRECATED: Use `RegExp` instead.
*/
deprecated class Regex = RegExp;
@@ -327,6 +327,17 @@ class RegExp extends Expr instanceof StrConst {
/** Gets the text of this regex */
string getText() { result = super.getText() }
/**
* Gets the prefix of this regex
*
* Examples:
*
* - The prefix of `'x*y'` is `'`.
* - The prefix of `r''` is `r'`.
* - The prefix of `r"""x*y"""` is `r"""`.
*/
string getPrefix() { result = super.getPrefix() }
/** Gets the `i`th character of this regex */
string getChar(int i) { result = this.getText().charAt(i) }

View File

@@ -0,0 +1,104 @@
/**
* Provides default sources, sinks and sanitizers for detecting
* "NoSql injection"
* vulnerabilities, as well as extension points for adding your own.
*/
import python
import semmle.python.dataflow.new.DataFlow
import semmle.python.dataflow.new.RemoteFlowSources
import semmle.python.Concepts
/**
* Provides default sources, sinks and sanitizers for detecting
* "NoSql injection"
* vulnerabilities, as well as extension points for adding your own.
*/
module NoSqlInjection {
private newtype TFlowState =
TString() or
TDict()
/** A flow state, tracking the structure of the data. */
abstract class FlowState extends TFlowState {
/** Gets a textual representation of this element. */
abstract string toString();
}
/** A state where the tracked data is only a string. */
class String extends FlowState, TString {
override string toString() { result = "String" }
}
/**
* A state where the tracked data has been converted to
* a dictionary.
*
* We include cases where data represent JSON objects, so
* it could actually still be just a string. It could
* also contain query operators, or even JavaScript code.
*/
class Dict extends FlowState, TDict {
override string toString() { result = "Dict" }
}
/** A source allowing string inputs. */
abstract class StringSource extends DataFlow::Node { }
/** A source of allowing dictionaries. */
abstract class DictSource extends DataFlow::Node { }
/** A sink vulnerable to user controlled strings. */
abstract class StringSink extends DataFlow::Node { }
/** A sink vulnerable to user controlled dictionaries. */
abstract class DictSink extends DataFlow::Node { }
/** A data flow node where a string is converted into a dictionary. */
abstract class StringToDictConversion extends DataFlow::Node {
/** Gets the argument that specifies the string to be converted. */
abstract DataFlow::Node getAnInput();
/** Gets the resulting dictionary. */
abstract DataFlow::Node getOutput();
}
/** A remote flow source considered a source of user controlled strings. */
class RemoteFlowSourceAsStringSource extends RemoteFlowSource, StringSource { }
/** A NoSQL query that is vulnerable to user controlled strings. */
class NoSqlExecutionAsStringSink extends StringSink {
NoSqlExecutionAsStringSink() {
exists(NoSqlExecution noSqlExecution | this = noSqlExecution.getQuery() |
noSqlExecution.vulnerableToStrings()
)
}
}
/** A NoSQL query that is vulnerable to user controlled dictionaries. */
class NoSqlExecutionAsDictSink extends DictSink {
NoSqlExecutionAsDictSink() {
exists(NoSqlExecution noSqlExecution | this = noSqlExecution.getQuery() |
noSqlExecution.interpretsDict()
)
}
}
/** A JSON decoding converts a string to a dictionary. */
class JsonDecoding extends Decoding, StringToDictConversion {
JsonDecoding() { this.getFormat() = "JSON" }
override DataFlow::Node getAnInput() { result = Decoding.super.getAnInput() }
override DataFlow::Node getOutput() { result = Decoding.super.getOutput() }
}
/** A NoSQL decoding interprets a string as a dictionary. */
class NoSqlDecoding extends Decoding, StringToDictConversion {
NoSqlDecoding() { this.getFormat() = "NoSQL" }
override DataFlow::Node getAnInput() { result = Decoding.super.getAnInput() }
override DataFlow::Node getOutput() { result = Decoding.super.getOutput() }
}
}

View File

@@ -0,0 +1,61 @@
/**
* Provides a taint-tracking configuration for detecting NoSQL injection vulnerabilities
*/
import python
import semmle.python.dataflow.new.DataFlow
import semmle.python.dataflow.new.TaintTracking
import semmle.python.Concepts
private import NoSqlInjectionCustomizations::NoSqlInjection as C
/**
* A taint-tracking configuration for detecting NoSQL injection vulnerabilities.
*/
module NoSqlInjectionConfig implements DataFlow::StateConfigSig {
class FlowState = C::FlowState;
predicate isSource(DataFlow::Node source, FlowState state) {
source instanceof C::StringSource and
state instanceof C::String
or
source instanceof C::DictSource and
state instanceof C::Dict
}
predicate isSink(DataFlow::Node sink, FlowState state) {
sink instanceof C::StringSink and
(
state instanceof C::String
or
// since Dicts can include strings,
// e.g. JSON objects can encode strings.
state instanceof C::Dict
)
or
sink instanceof C::DictSink and
state instanceof C::Dict
}
predicate isBarrier(DataFlow::Node node, FlowState state) {
// Block `String` paths here, since they change state to `Dict`
exists(C::StringToDictConversion c | node = c.getOutput()) and
state instanceof C::String
}
predicate isAdditionalFlowStep(
DataFlow::Node nodeFrom, FlowState stateFrom, DataFlow::Node nodeTo, FlowState stateTo
) {
exists(C::StringToDictConversion c |
nodeFrom = c.getAnInput() and
nodeTo = c.getOutput()
) and
stateFrom instanceof C::String and
stateTo instanceof C::Dict
}
predicate isBarrier(DataFlow::Node node) {
node = any(NoSqlSanitizer noSqlSanitizer).getAnInput()
}
}
module NoSqlInjectionFlow = TaintTracking::GlobalWithState<NoSqlInjectionConfig>;