Merge pull request #5612 from jty-team/jty/python/nosqlInjection

Python: CWE-943 - Add NoSQL injection query
This commit is contained in:
Rasmus Wriedt Larsen
2021-08-24 11:29:25 +02:00
committed by GitHub
19 changed files with 807 additions and 0 deletions

View File

@@ -0,0 +1,40 @@
<!DOCTYPE qhelp PUBLIC
"-//Semmle//qhelp//EN"
"qhelp.dtd">
<qhelp>
<overview>
<p>
Passing user-controlled sources into NoSQL queries can result in a NoSQL injection flaw.
This tainted NoSQL query containing a user-controlled source can then execute a malicious query in a NoSQL database such as MongoDB.
In order for the user-controlled source to taint the NoSQL query, the user-controller source must be converted into a Python object using something like <code>json.loads</code> or <code>xmltodict.parse</code>.
</p>
<p>
Because a user-controlled source is passed into the query, the malicious user can have complete control over the query itself.
When the tainted query is executed, the malicious user can commit malicious actions such as bypassing role restrictions or accessing and modifying restricted data in the NoSQL database.
</p>
</overview>
<recommendation>
<p>
NoSQL injections can be prevented by escaping user-input's special characters that are passed into the NoSQL query from the user-supplied source.
Alternatively, using a sanitize library such as MongoSanitizer will ensure that user-supplied sources can not act as a malicious query.
</p>
</recommendation>
<example>
<p>In the example below, the user-supplied source is passed to a MongoDB function that queries the MongoDB database.</p>
<sample src="examples/NoSQLInjection-bad.py" />
<p> This can be fixed by using a sanitizer library like MongoSanitizer as shown in this annotated code version below.</p>
<sample src="examples/NoSQLInjection-good.py" />
</example>
<references>
<li>Mongoengine: <a href="http://mongoengine.org/">Documentation</a>.</li>
<li>Flask-Mongoengine: <a href="http://docs.mongoengine.org/projects/flask-mongoengine/en/latest/">Documentation</a>.</li>
<li>PyMongo: <a href="https://pypi.org/project/pymongo/">Documentation</a>.</li>
<li>Flask-PyMongo: <a href="https://flask-pymongo.readthedocs.io/en/latest/">Documentation</a>.</li>
<li>OWASP: <a href="https://owasp.org/www-pdf-archive/GOD16-NOSQL.pdf">NoSQL Injection</a>.</li>
<li>Security Stack Exchange Discussion: <a href="https://security.stackexchange.com/questions/83231/mongodb-nosql-injection-in-python-code">Question 83231</a>.</li>
</references>
</qhelp>

View File

@@ -0,0 +1,19 @@
/**
* @name NoSQL Injection
* @description Building a NoSQL query from user-controlled sources is vulnerable to insertion of
* malicious NoSQL code by the user.
* @kind path-problem
* @problem.severity error
* @id py/nosql-injection
* @tags experimental
* security
* external/cwe/cwe-943
*/
import python
import experimental.semmle.python.security.injection.NoSQLInjection
from CustomPathNode source, CustomPathNode sink
where noSQLInjectionFlow(source, sink)
select sink, source, sink, "$@ NoSQL query contains an unsanitized $@", sink, "This", source,
"user-provided value"

View File

@@ -0,0 +1,13 @@
from flask import Flask, request
from flask_pymongo import PyMongo
import json
mongo = PyMongo(app)
@app.route("/")
def home_page():
unsanitized_search = request.args['search']
json_search = json.loads(unsanitized_search)
result = mongo.db.user.find({'name': json_search})

View File

@@ -0,0 +1,15 @@
from flask import Flask, request
from flask_pymongo import PyMongo
from mongosanitizer.sanitizer import sanitize
import json
mongo = PyMongo(app)
@app.route("/")
def home_page():
unsafe_search = request.args['search']
json_search = json.loads(unsafe_search)
safe_search = sanitize(unsanitized_search)
result = client.db.collection.find_one({'data': safe_search})

View File

@@ -209,3 +209,61 @@ class SQLEscape extends DataFlow::Node {
*/
DataFlow::Node getAnInput() { result = range.getAnInput() }
}
/** Provides a class for modeling NoSQL execution APIs. */
module NoSQLQuery {
/**
* A data-flow node that executes NoSQL queries.
*
* Extend this class to model new APIs. If you want to refine existing API models,
* extend `NoSQLQuery` instead.
*/
abstract class Range extends DataFlow::Node {
/** Gets the argument that specifies the NoSQL query to be executed. */
abstract DataFlow::Node getQuery();
}
}
/**
* A data-flow node that executes NoSQL queries.
*
* Extend this class to refine existing API models. If you want to model new APIs,
* extend `NoSQLQuery::Range` instead.
*/
class NoSQLQuery extends DataFlow::Node {
NoSQLQuery::Range range;
NoSQLQuery() { this = range }
/** Gets the argument that specifies the NoSQL query to be executed. */
DataFlow::Node getQuery() { result = range.getQuery() }
}
/** Provides classes for modeling NoSQL sanitization-related APIs. */
module NoSQLSanitizer {
/**
* A data-flow node that collects functions sanitizing NoSQL queries.
*
* Extend this class to model new APIs. If you want to refine existing API models,
* extend `NoSQLSanitizer` instead.
*/
abstract class Range extends DataFlow::Node {
/** Gets the argument that specifies the NoSQL query to be sanitized. */
abstract DataFlow::Node getAnInput();
}
}
/**
* A data-flow node that collects functions sanitizing NoSQL queries.
*
* Extend this class to model new APIs. If you want to refine existing API models,
* extend `NoSQLSanitizer::Range` instead.
*/
class NoSQLSanitizer extends DataFlow::Node {
NoSQLSanitizer::Range range;
NoSQLSanitizer() { this = range }
/** Gets the argument that specifies the NoSQL query to be sanitized. */
DataFlow::Node getAnInput() { result = range.getAnInput() }
}

View File

@@ -4,3 +4,4 @@
private import experimental.semmle.python.frameworks.Stdlib
private import experimental.semmle.python.frameworks.LDAP
private import experimental.semmle.python.frameworks.NoSQL

View File

@@ -0,0 +1,215 @@
/**
* Provides classes modeling security-relevant aspects of the standard libraries.
* Note: some modeling is done internally in the dataflow/taint tracking implementation.
*/
private import python
private import semmle.python.dataflow.new.DataFlow
private import semmle.python.dataflow.new.TaintTracking
private import semmle.python.dataflow.new.RemoteFlowSources
private import experimental.semmle.python.Concepts
private import semmle.python.ApiGraphs
private module NoSQL {
// API Nodes returning `Mongo` instances.
/** Gets a reference to `pymongo.MongoClient` */
private API::Node pyMongo() {
result = API::moduleImport("pymongo").getMember("MongoClient").getReturn()
}
/** Gets a reference to `flask_pymongo.PyMongo` */
private API::Node flask_PyMongo() {
result = API::moduleImport("flask_pymongo").getMember("PyMongo").getReturn()
}
/** Gets a reference to `mongoengine` */
private API::Node mongoEngine() { result = API::moduleImport("mongoengine") }
/** Gets a reference to `flask_mongoengine.MongoEngine` */
private API::Node flask_MongoEngine() {
result = API::moduleImport("flask_mongoengine").getMember("MongoEngine").getReturn()
}
/**
* Gets a reference to an initialized `Mongo` instance.
* See `pyMongo()`, `flask_PyMongo()`
*/
private API::Node mongoInstance() {
result = pyMongo() or
result = flask_PyMongo()
}
/**
* Gets a reference to an initialized `Mongo` DB instance.
* See `mongoEngine()`, `flask_MongoEngine()`
*/
private API::Node mongoDBInstance() {
result = mongoEngine().getMember(["get_db", "connect"]).getReturn() or
result = mongoEngine().getMember("connection").getMember(["get_db", "connect"]).getReturn() or
result = flask_MongoEngine().getMember("get_db").getReturn()
}
/**
* Gets a reference to a `Mongo` DB use.
*
* See `mongoInstance()`, `mongoDBInstance()`.
*/
private DataFlow::LocalSourceNode mongoDB(DataFlow::TypeTracker t) {
t.start() and
(
exists(SubscriptNode subscript |
subscript.getObject() = mongoInstance().getAUse().asCfgNode() and
result.asCfgNode() = subscript
)
or
result.(DataFlow::AttrRead).getObject() = mongoInstance().getAUse()
or
result = mongoDBInstance().getAUse()
)
or
exists(DataFlow::TypeTracker t2 | result = mongoDB(t2).track(t2, t))
}
/**
* Gets a reference to a `Mongo` DB use.
*
* ```py
* from flask_pymongo import PyMongo
* mongo = PyMongo(app)
* mongo.db.user.find({'name': safe_search})
* ```
*
* `mongo.db` would be a use of a `Mongo` instance, and so the result.
*/
private DataFlow::Node mongoDB() { mongoDB(DataFlow::TypeTracker::end()).flowsTo(result) }
/**
* Gets a reference to a `Mongo` collection use.
*
* See `mongoDB()`.
*/
private DataFlow::LocalSourceNode mongoCollection(DataFlow::TypeTracker t) {
t.start() and
(
exists(SubscriptNode subscript | result.asCfgNode() = subscript |
subscript.getObject() = mongoDB().asCfgNode()
)
or
result.(DataFlow::AttrRead).getObject() = mongoDB()
)
or
exists(DataFlow::TypeTracker t2 | result = mongoCollection(t2).track(t2, t))
}
/**
* Gets a reference to a `Mongo` collection use.
*
* ```py
* from flask_pymongo import PyMongo
* mongo = PyMongo(app)
* mongo.db.user.find({'name': safe_search})
* ```
*
* `mongo.db.user` would be a use of a `Mongo` collection, and so the result.
*/
private DataFlow::Node mongoCollection() {
mongoCollection(DataFlow::TypeTracker::end()).flowsTo(result)
}
/** This class represents names of find_* relevant `Mongo` collection-level operation methods. */
private class MongoCollectionMethodNames extends string {
MongoCollectionMethodNames() {
this in [
"find", "find_raw_batches", "find_one", "find_one_and_delete", "find_and_modify",
"find_one_and_replace", "find_one_and_update", "find_one_or_404"
]
}
}
/**
* Gets a reference to a `Mongo` collection method.
*
* ```py
* from flask_pymongo import PyMongo
* mongo = PyMongo(app)
* mongo.db.user.find({'name': safe_search})
* ```
*
* `mongo.db.user.find` would be a collection method, and so the result.
*/
private DataFlow::Node mongoCollectionMethod() {
mongoCollection() = result.(DataFlow::AttrRead).getObject() and
result.(DataFlow::AttrRead).getAttributeName() instanceof MongoCollectionMethodNames
}
/**
* Gets a reference to a `Mongo` collection method call
*
* ```py
* from flask_pymongo import PyMongo
* mongo = PyMongo(app)
* mongo.db.user.find({'name': safe_search})
* ```
*
* `mongo.db.user.find({'name': safe_search})` would be a collection method call, and so the result.
*/
private class MongoCollectionCall extends DataFlow::CallCfgNode, NoSQLQuery::Range {
MongoCollectionCall() { this.getFunction() = mongoCollectionMethod() }
override DataFlow::Node getQuery() { result = this.getArg(0) }
}
/**
* Gets a reference to a call from a class whose base is a reference to `mongoEngine()` or `flask_MongoEngine()`'s
* `Document` or `EmbeddedDocument` objects and its attribute is `objects`.
*
* ```py
* from flask_mongoengine import MongoEngine
* db = MongoEngine(app)
* class Movie(db.Document):
* title = db.StringField(required=True)
*
* Movie.objects(__raw__=json_search)
* ```
*
* `Movie.objects(__raw__=json_search)` would be the result.
*/
private class MongoEngineObjectsCall extends DataFlow::CallCfgNode, NoSQLQuery::Range {
MongoEngineObjectsCall() {
this =
[mongoEngine(), flask_MongoEngine()]
.getMember(["Document", "EmbeddedDocument"])
.getASubclass()
.getMember("objects")
.getACall()
}
override DataFlow::Node getQuery() { result = this.getArgByName(_) }
}
/** Gets a reference to `mongosanitizer.sanitizer.sanitize` */
private class MongoSanitizerCall extends DataFlow::CallCfgNode, NoSQLSanitizer::Range {
MongoSanitizerCall() {
this =
API::moduleImport("mongosanitizer").getMember("sanitizer").getMember("sanitize").getACall()
}
override DataFlow::Node getAnInput() { result = this.getArg(0) }
}
/**
* ObjectId returns a string representing an id.
* If at any time ObjectId can't parse it's input (like when a tainted dict in passed in),
* then ObjectId will throw an error preventing the query from running.
*/
private class BsonObjectIdCall extends DataFlow::CallCfgNode, NoSQLSanitizer::Range {
BsonObjectIdCall() {
this =
API::moduleImport(["bson", "bson.objectid", "bson.json_util"])
.getMember("ObjectId")
.getACall()
}
override DataFlow::Node getAnInput() { result = this.getArg(0) }
}
}

View File

@@ -0,0 +1,34 @@
/**
* Provides classes modeling security-relevant aspects of the `xmltodict` PyPI package.
* See https://pypi.org/project/xmltodict/
*/
private import python
private import semmle.python.dataflow.new.DataFlow
private import semmle.python.Concepts
private import semmle.python.ApiGraphs
/**
* Provides models for the `xmltodict` PyPI package.
* See https://pypi.org/project/xmltodict/
*/
private module XmlToDictModel {
/** Gets a reference to the `xmltodict` module. */
API::Node xmltodict() { result = API::moduleImport("xmltodict") }
/**
* A call to `xmltodict.parse`
* See https://github.com/martinblech/xmltodict/blob/ae19c452ca000bf243bfc16274c060bf3bf7cf51/xmltodict.py#L198
*/
private class XmlToDictParseCall extends Decoding::Range, DataFlow::CallCfgNode {
XmlToDictParseCall() { this = xmltodict().getMember("parse").getACall() }
override predicate mayExecuteInput() { none() }
override DataFlow::Node getAnInput() { result = this.getArg(0) }
override DataFlow::Node getOutput() { result = this }
override string getFormat() { result = "XML" }
}
}

View File

@@ -0,0 +1,57 @@
import python
import semmle.python.dataflow.new.DataFlow
import semmle.python.dataflow.new.DataFlow2
import semmle.python.dataflow.new.TaintTracking
import semmle.python.dataflow.new.TaintTracking2
import semmle.python.dataflow.new.RemoteFlowSources
import semmle.python.security.dataflow.ChainedConfigs12
import experimental.semmle.python.Concepts
import semmle.python.Concepts
/**
* A taint-tracking configuration for detecting string-to-dict conversions.
*/
class RFSToDictConfig extends TaintTracking::Configuration {
RFSToDictConfig() { this = "RFSToDictConfig" }
override predicate isSource(DataFlow::Node source) { source instanceof RemoteFlowSource }
override predicate isSink(DataFlow::Node sink) {
exists(Decoding decoding | decoding.getFormat() = "JSON" and sink = decoding.getOutput())
}
override predicate isSanitizer(DataFlow::Node sanitizer) {
sanitizer = any(NoSQLSanitizer noSQLSanitizer).getAnInput()
}
}
/**
* A taint-tracking configuration for detecting NoSQL injections (previously converted to a dict).
*/
class FromDataDictToSink extends TaintTracking2::Configuration {
FromDataDictToSink() { this = "FromDataDictToSink" }
override predicate isSource(DataFlow::Node source) {
exists(Decoding decoding | decoding.getFormat() = "JSON" and source = decoding.getOutput())
}
override predicate isSink(DataFlow::Node sink) { sink = any(NoSQLQuery noSQLQuery).getQuery() }
override predicate isSanitizer(DataFlow::Node sanitizer) {
sanitizer = any(NoSQLSanitizer noSQLSanitizer).getAnInput()
}
}
/**
* A predicate checking string-to-dict conversion and its arrival to a NoSQL injection sink.
*/
predicate noSQLInjectionFlow(CustomPathNode source, CustomPathNode sink) {
exists(
RFSToDictConfig config, DataFlow::PathNode mid1, DataFlow2::PathNode mid2,
FromDataDictToSink config2
|
config.hasFlowPath(source.asNode1(), mid1) and
config2.hasFlowPath(mid2, sink.asNode2()) and
mid1.getNode().asCfgNode() = mid2.getNode().asCfgNode()
)
}