Merge pull request #14070 from yoff/python/promote-nosql-query

Python: promote nosql query
This commit is contained in:
yoff
2023-09-29 14:21:22 +02:00
committed by GitHub
34 changed files with 883 additions and 311 deletions

View File

@@ -378,6 +378,68 @@ module SqlExecution {
}
}
/** Provides a class for modeling NoSQL execution APIs. */
module NoSqlExecution {
/**
* A data-flow node that executes NoSQL queries.
*
* Extend this class to model new APIs. If you want to refine existing API models,
* extend `NoSqlExecution` instead.
*/
abstract class Range extends DataFlow::Node {
/** Gets the argument that specifies the NoSQL query to be executed. */
abstract DataFlow::Node getQuery();
/** Holds if this query will unpack/interpret a dictionary */
abstract predicate interpretsDict();
/** Holds if this query can be dangerous when run on a user-controlled string */
abstract predicate vulnerableToStrings();
}
}
/**
* A data-flow node that executes NoSQL queries.
*
* Extend this class to refine existing API models. If you want to model new APIs,
* extend `NoSqlExecution::Range` instead.
*/
class NoSqlExecution extends DataFlow::Node instanceof NoSqlExecution::Range {
/** Gets the argument that specifies the NoSQL query to be executed. */
DataFlow::Node getQuery() { result = super.getQuery() }
/** Holds if this query will unpack/interpret a dictionary */
predicate interpretsDict() { super.interpretsDict() }
/** Holds if this query can be dangerous when run on a user-controlled string */
predicate vulnerableToStrings() { super.vulnerableToStrings() }
}
/** Provides classes for modeling NoSql sanitization-related APIs. */
module NoSqlSanitizer {
/**
* A data-flow node that collects functions sanitizing NoSQL queries.
*
* Extend this class to model new APIs. If you want to refine existing API models,
* extend `NoSQLSanitizer` instead.
*/
abstract class Range extends DataFlow::Node {
/** Gets the argument that specifies the NoSql query to be sanitized. */
abstract DataFlow::Node getAnInput();
}
}
/**
* A data-flow node that collects functions sanitizing NoSQL queries.
*
* Extend this class to model new APIs. If you want to refine existing API models,
* extend `NoSQLSanitizer::Range` instead.
*/
class NoSqlSanitizer extends DataFlow::Node instanceof NoSqlSanitizer::Range {
/** Gets the argument that specifies the NoSql query to be sanitized. */
DataFlow::Node getAnInput() { result = super.getAnInput() }
}
/**
* A data-flow node that executes a regular expression.
*

View File

@@ -10,6 +10,7 @@ private import semmle.python.frameworks.Aiomysql
private import semmle.python.frameworks.Aiosqlite
private import semmle.python.frameworks.Aiopg
private import semmle.python.frameworks.Asyncpg
private import semmle.python.frameworks.BSon
private import semmle.python.frameworks.CassandraDriver
private import semmle.python.frameworks.ClickhouseDriver
private import semmle.python.frameworks.Cryptodome
@@ -42,6 +43,7 @@ private import semmle.python.frameworks.Phoenixdb
private import semmle.python.frameworks.Psycopg2
private import semmle.python.frameworks.Pycurl
private import semmle.python.frameworks.Pydantic
private import semmle.python.frameworks.PyMongo
private import semmle.python.frameworks.Pymssql
private import semmle.python.frameworks.PyMySQL
private import semmle.python.frameworks.Pyodbc

View File

@@ -0,0 +1,38 @@
/**
* Provides classes modeling security-relevant aspects of the `bson` PyPI package.
* See
* - https://pypi.org/project/bson/
* - https://github.com/py-bson/bson
*/
private import python
private import semmle.python.dataflow.new.DataFlow
private import semmle.python.Concepts
private import semmle.python.ApiGraphs
/**
* Provides models for the `bson` PyPI package.
* See
* - https://pypi.org/project/bson/
* - https://github.com/py-bson/bson
*/
private module BSon {
/**
* ObjectId returns a string representing an id.
* If at any time ObjectId can't parse it's input (like when a tainted dict in passed in),
* then ObjectId will throw an error preventing the query from running.
*/
private class BsonObjectIdCall extends DataFlow::CallCfgNode, NoSqlSanitizer::Range {
BsonObjectIdCall() {
exists(API::Node mod |
mod = API::moduleImport("bson")
or
mod = API::moduleImport("bson").getMember(["objectid", "json_util"])
|
this = mod.getMember("ObjectId").getACall()
)
}
override DataFlow::Node getAnInput() { result = this.getArg(0) }
}
}

View File

@@ -0,0 +1,299 @@
/**
* Provides classes modeling security-relevant aspects of the PyMongo bindings.
*/
private import python
private import semmle.python.dataflow.new.DataFlow
private import semmle.python.dataflow.new.TaintTracking
private import semmle.python.dataflow.new.RemoteFlowSources
private import semmle.python.Concepts
private import semmle.python.ApiGraphs
private module PyMongo {
// API Nodes returning `Mongo` instances.
/** Gets a reference to `pymongo.MongoClient` */
private API::Node pyMongo() {
result = API::moduleImport("pymongo").getMember("MongoClient").getReturn()
or
// see https://pymongo.readthedocs.io/en/stable/api/pymongo/mongo_client.html#pymongo.mongo_client.MongoClient
result =
API::moduleImport("pymongo").getMember("mongo_client").getMember("MongoClient").getReturn()
}
/** Gets a reference to `flask_pymongo.PyMongo` */
private API::Node flask_PyMongo() {
result = API::moduleImport("flask_pymongo").getMember("PyMongo").getReturn()
}
/** Gets a reference to `mongoengine` */
private API::Node mongoEngine() { result = API::moduleImport("mongoengine") }
/** Gets a reference to `flask_mongoengine.MongoEngine` */
private API::Node flask_MongoEngine() {
result = API::moduleImport("flask_mongoengine").getMember("MongoEngine").getReturn()
}
/**
* Gets a reference to an initialized `Mongo` instance.
* See `pyMongo()`, `flask_PyMongo()`
*/
private API::Node mongoClientInstance() {
result = pyMongo() or
result = flask_PyMongo()
}
/**
* Gets a reference to a `Mongo` DB instance.
*
* ```py
* from flask_pymongo import PyMongo
* mongo = PyMongo(app)
* mongo.db.user.find({'name': safe_search})
* ```
*
* `mongo.db` would be a `Mongo` instance.
*/
private API::Node mongoDBInstance() {
result = mongoClientInstance().getASubscript()
or
result = mongoClientInstance().getAMember()
or
result = mongoEngine().getMember(["get_db", "connect"]).getReturn()
or
result = mongoEngine().getMember("connection").getMember(["get_db", "connect"]).getReturn()
or
result = flask_MongoEngine().getMember("get_db").getReturn()
or
// see https://pymongo.readthedocs.io/en/stable/api/pymongo/mongo_client.html#pymongo.mongo_client.MongoClient.get_default_database
// see https://pymongo.readthedocs.io/en/stable/api/pymongo/mongo_client.html#pymongo.mongo_client.MongoClient.get_database
result = mongoClientInstance().getMember(["get_default_database", "get_database"]).getReturn()
}
/**
* Gets a reference to a `Mongo` collection.
*
* ```py
* from flask_pymongo import PyMongo
* mongo = PyMongo(app)
* mongo.db.user.find({'name': safe_search})
* ```
*
* `mongo.db.user` would be a `Mongo` collection.
*/
private API::Node mongoCollection() {
result = mongoDBInstance().getASubscript()
or
result = mongoDBInstance().getAMember()
or
// see https://pymongo.readthedocs.io/en/stable/api/pymongo/database.html#pymongo.database.Database.get_collection
// see https://pymongo.readthedocs.io/en/stable/api/pymongo/database.html#pymongo.database.Database.create_collection
result = mongoDBInstance().getMember(["get_collection", "create_collection"]).getReturn()
}
/** Gets the name of a find_* relevant `Mongo` collection-level operation method. */
private string mongoCollectionMethodName() {
result in [
"find", "find_raw_batches", "find_one", "find_one_and_delete", "find_and_modify",
"find_one_and_replace", "find_one_and_update", "find_one_or_404"
]
}
/**
* Gets a reference to a `Mongo` collection method call
*
* ```py
* from flask_pymongo import PyMongo
* mongo = PyMongo(app)
* mongo.db.user.find({'name': safe_search})
* ```
*
* `mongo.db.user.find({'name': safe_search})` would be a collection method call.
*/
private class MongoCollectionCall extends API::CallNode, NoSqlExecution::Range {
MongoCollectionCall() {
this = mongoCollection().getMember(mongoCollectionMethodName()).getACall()
}
/** Gets the argument that specifies the NoSQL query to be executed, as an API::node */
pragma[inline]
API::Node getQueryAsApiNode() {
// 'filter' is allowed keyword in pymongo, see https://pymongo.readthedocs.io/en/stable/api/pymongo/collection.html#pymongo.collection.Collection.find
result = this.getParameter(0, "filter")
}
override DataFlow::Node getQuery() { result = this.getQueryAsApiNode().asSink() }
override predicate interpretsDict() { any() }
override predicate vulnerableToStrings() { none() }
}
/**
* See https://pymongo.readthedocs.io/en/stable/api/pymongo/collection.html#pymongo.collection.Collection.aggregate
*/
private class MongoCollectionAggregation extends API::CallNode, NoSqlExecution::Range {
MongoCollectionAggregation() { this = mongoCollection().getMember("aggregate").getACall() }
override DataFlow::Node getQuery() {
result = this.getParameter(0, "pipeline").getASubscript().asSink()
}
override predicate interpretsDict() { any() }
override predicate vulnerableToStrings() { none() }
}
private class MongoMapReduce extends API::CallNode, NoSqlExecution::Range {
MongoMapReduce() { this = mongoCollection().getMember("map_reduce").getACall() }
override DataFlow::Node getQuery() { result in [this.getArg(0), this.getArg(1)] }
override predicate interpretsDict() { none() }
override predicate vulnerableToStrings() { any() }
}
private class MongoMapReduceQuery extends API::CallNode, NoSqlExecution::Range {
MongoMapReduceQuery() { this = mongoCollection().getMember("map_reduce").getACall() }
override DataFlow::Node getQuery() { result = this.getArgByName("query") }
override predicate interpretsDict() { any() }
override predicate vulnerableToStrings() { none() }
}
/** The `$where` query operator executes a string as JavaScript. */
private class WhereQueryOperator extends DataFlow::Node, Decoding::Range {
DataFlow::Node query;
WhereQueryOperator() {
exists(API::Node dictionary |
dictionary = any(MongoCollectionCall c).getQueryAsApiNode() and
query = dictionary.getSubscript("$where").asSink() and
this = dictionary.getAValueReachingSink()
)
}
override DataFlow::Node getAnInput() { result = query }
override DataFlow::Node getOutput() { result = this }
override string getFormat() { result = "NoSQL" }
override predicate mayExecuteInput() { none() }
}
/**
* The `$function` query operator executes its `body` string as JavaScript.
*
* See https://www.mongodb.com/docs/manual/reference/operator/aggregation/function/#mongodb-expression-exp.-function
*/
private class FunctionQueryOperator extends DataFlow::Node, Decoding::Range {
DataFlow::Node query;
FunctionQueryOperator() {
exists(API::Node dictionary |
dictionary =
any(MongoCollectionCall c).getQueryAsApiNode().getASubscript*().getSubscript("$function") and
query = dictionary.getSubscript("body").asSink() and
this = dictionary.getAValueReachingSink()
)
}
override DataFlow::Node getAnInput() { result = query }
override DataFlow::Node getOutput() { result = this }
override string getFormat() { result = "NoSQL" }
override predicate mayExecuteInput() { none() }
}
/**
* The `$accumulator` query operator executes strings in some of its fields as JavaScript.
*
* See https://www.mongodb.com/docs/manual/reference/operator/aggregation/accumulator/#mongodb-group-grp.-accumulator
*/
private class AccumulatorQueryOperator extends DataFlow::Node, Decoding::Range {
DataFlow::Node query;
AccumulatorQueryOperator() {
exists(API::Node dictionary |
dictionary =
mongoCollection()
.getMember("aggregate")
.getACall()
.getParameter(0)
.getASubscript*()
.getSubscript("$accumulator") and
query = dictionary.getSubscript(["init", "accumulate", "merge", "finalize"]).asSink() and
this = dictionary.getAValueReachingSink()
)
}
override DataFlow::Node getAnInput() { result = query }
override DataFlow::Node getOutput() { result = this }
override string getFormat() { result = "NoSQL" }
override predicate mayExecuteInput() { any() }
}
/**
* Gets a reference to a call from a class whose base is a reference to `mongoEngine()` or `flask_MongoEngine()`'s
* `Document` or `EmbeddedDocument` objects and its attribute is `objects`.
*
* ```py
* from flask_mongoengine import MongoEngine
* db = MongoEngine(app)
* class Movie(db.Document):
* title = db.StringField(required=True)
*
* Movie.objects(__raw__=json_search)
* ```
*
* `Movie.objects(__raw__=json_search)` would be the result.
*/
private class MongoEngineObjectsCall extends DataFlow::CallCfgNode, NoSqlExecution::Range {
MongoEngineObjectsCall() {
this =
[mongoEngine(), flask_MongoEngine()]
.getMember(["Document", "EmbeddedDocument"])
.getASubclass()
.getMember("objects")
.getACall()
}
override DataFlow::Node getQuery() { result = this.getArgByName(_) }
override predicate interpretsDict() { any() }
override predicate vulnerableToStrings() { none() }
}
/** Gets a reference to `mongosanitizer.sanitizer.sanitize` */
private class MongoSanitizerCall extends DataFlow::CallCfgNode, NoSqlSanitizer::Range {
MongoSanitizerCall() {
this =
API::moduleImport("mongosanitizer").getMember("sanitizer").getMember("sanitize").getACall()
}
override DataFlow::Node getAnInput() { result = this.getArg(0) }
}
/**
* An equality operator can protect against dictionary interpretation.
* For instance, in `{'password': {"$eq": password} }`, if a dictionary is injected into
* `password`, it will not match.
*/
private class EqualityOperator extends DataFlow::Node, NoSqlSanitizer::Range {
EqualityOperator() {
this =
any(MongoCollectionCall c).getQueryAsApiNode().getASubscript*().getSubscript("$eq").asSink()
}
override DataFlow::Node getAnInput() { result = this }
}
}

View File

@@ -0,0 +1,104 @@
/**
* Provides default sources, sinks and sanitizers for detecting
* "NoSql injection"
* vulnerabilities, as well as extension points for adding your own.
*/
import python
import semmle.python.dataflow.new.DataFlow
import semmle.python.dataflow.new.RemoteFlowSources
import semmle.python.Concepts
/**
* Provides default sources, sinks and sanitizers for detecting
* "NoSql injection"
* vulnerabilities, as well as extension points for adding your own.
*/
module NoSqlInjection {
private newtype TFlowState =
TString() or
TDict()
/** A flow state, tracking the structure of the data. */
abstract class FlowState extends TFlowState {
/** Gets a textual representation of this element. */
abstract string toString();
}
/** A state where the tracked data is only a string. */
class String extends FlowState, TString {
override string toString() { result = "String" }
}
/**
* A state where the tracked data has been converted to
* a dictionary.
*
* We include cases where data represent JSON objects, so
* it could actually still be just a string. It could
* also contain query operators, or even JavaScript code.
*/
class Dict extends FlowState, TDict {
override string toString() { result = "Dict" }
}
/** A source allowing string inputs. */
abstract class StringSource extends DataFlow::Node { }
/** A source of allowing dictionaries. */
abstract class DictSource extends DataFlow::Node { }
/** A sink vulnerable to user controlled strings. */
abstract class StringSink extends DataFlow::Node { }
/** A sink vulnerable to user controlled dictionaries. */
abstract class DictSink extends DataFlow::Node { }
/** A data flow node where a string is converted into a dictionary. */
abstract class StringToDictConversion extends DataFlow::Node {
/** Gets the argument that specifies the string to be converted. */
abstract DataFlow::Node getAnInput();
/** Gets the resulting dictionary. */
abstract DataFlow::Node getOutput();
}
/** A remote flow source considered a source of user controlled strings. */
class RemoteFlowSourceAsStringSource extends RemoteFlowSource, StringSource { }
/** A NoSQL query that is vulnerable to user controlled strings. */
class NoSqlExecutionAsStringSink extends StringSink {
NoSqlExecutionAsStringSink() {
exists(NoSqlExecution noSqlExecution | this = noSqlExecution.getQuery() |
noSqlExecution.vulnerableToStrings()
)
}
}
/** A NoSQL query that is vulnerable to user controlled dictionaries. */
class NoSqlExecutionAsDictSink extends DictSink {
NoSqlExecutionAsDictSink() {
exists(NoSqlExecution noSqlExecution | this = noSqlExecution.getQuery() |
noSqlExecution.interpretsDict()
)
}
}
/** A JSON decoding converts a string to a dictionary. */
class JsonDecoding extends Decoding, StringToDictConversion {
JsonDecoding() { this.getFormat() = "JSON" }
override DataFlow::Node getAnInput() { result = Decoding.super.getAnInput() }
override DataFlow::Node getOutput() { result = Decoding.super.getOutput() }
}
/** A NoSQL decoding interprets a string as a dictionary. */
class NoSqlDecoding extends Decoding, StringToDictConversion {
NoSqlDecoding() { this.getFormat() = "NoSQL" }
override DataFlow::Node getAnInput() { result = Decoding.super.getAnInput() }
override DataFlow::Node getOutput() { result = Decoding.super.getOutput() }
}
}

View File

@@ -0,0 +1,61 @@
/**
* Provides a taint-tracking configuration for detecting NoSQL injection vulnerabilities
*/
import python
import semmle.python.dataflow.new.DataFlow
import semmle.python.dataflow.new.TaintTracking
import semmle.python.Concepts
private import NoSqlInjectionCustomizations::NoSqlInjection as C
/**
* A taint-tracking configuration for detecting NoSQL injection vulnerabilities.
*/
module NoSqlInjectionConfig implements DataFlow::StateConfigSig {
class FlowState = C::FlowState;
predicate isSource(DataFlow::Node source, FlowState state) {
source instanceof C::StringSource and
state instanceof C::String
or
source instanceof C::DictSource and
state instanceof C::Dict
}
predicate isSink(DataFlow::Node sink, FlowState state) {
sink instanceof C::StringSink and
(
state instanceof C::String
or
// since Dicts can include strings,
// e.g. JSON objects can encode strings.
state instanceof C::Dict
)
or
sink instanceof C::DictSink and
state instanceof C::Dict
}
predicate isBarrier(DataFlow::Node node, FlowState state) {
// Block `String` paths here, since they change state to `Dict`
exists(C::StringToDictConversion c | node = c.getOutput()) and
state instanceof C::String
}
predicate isAdditionalFlowStep(
DataFlow::Node nodeFrom, FlowState stateFrom, DataFlow::Node nodeTo, FlowState stateTo
) {
exists(C::StringToDictConversion c |
nodeFrom = c.getAnInput() and
nodeTo = c.getOutput()
) and
stateFrom instanceof C::String and
stateTo instanceof C::Dict
}
predicate isBarrier(DataFlow::Node node) {
node = any(NoSqlSanitizer noSqlSanitizer).getAnInput()
}
}
module NoSqlInjectionFlow = TaintTracking::GlobalWithState<NoSqlInjectionConfig>;

View File

@@ -4,17 +4,17 @@
* malicious NoSQL code by the user.
* @kind path-problem
* @problem.severity error
* @security-severity 8.8
* @id py/nosql-injection
* @tags security
* experimental
* external/cwe/cwe-943
*/
import python
import experimental.semmle.python.security.injection.NoSQLInjection
import DataFlow::PathGraph
import semmle.python.security.dataflow.NoSqlInjectionQuery
import NoSqlInjectionFlow::PathGraph
from NoSqlInjection::Configuration config, DataFlow::PathNode source, DataFlow::PathNode sink
where config.hasFlowPath(source, sink)
select sink, source, sink, "This NoSQL query contains an unsanitized $@.", source,
from NoSqlInjectionFlow::PathNode source, NoSqlInjectionFlow::PathNode sink
where NoSqlInjectionFlow::flowPath(source, sink)
select sink.getNode(), source, sink, "This NoSQL query contains an unsanitized $@.", source,
"user-provided value"

View File

@@ -0,0 +1,4 @@
---
category: newQuery
---
* The query `py/nosql-injection` for finding NoSQL injection vulnerabilities is now available in the default security suite.

View File

@@ -216,56 +216,6 @@ class SqlEscape extends DataFlow::Node instanceof SqlEscape::Range {
DataFlow::Node getAnInput() { result = super.getAnInput() }
}
/** Provides a class for modeling NoSql execution APIs. */
module NoSqlQuery {
/**
* A data-flow node that executes NoSQL queries.
*
* Extend this class to model new APIs. If you want to refine existing API models,
* extend `NoSQLQuery` instead.
*/
abstract class Range extends DataFlow::Node {
/** Gets the argument that specifies the NoSql query to be executed. */
abstract DataFlow::Node getQuery();
}
}
/**
* A data-flow node that executes NoSQL queries.
*
* Extend this class to refine existing API models. If you want to model new APIs,
* extend `NoSQLQuery::Range` instead.
*/
class NoSqlQuery extends DataFlow::Node instanceof NoSqlQuery::Range {
/** Gets the argument that specifies the NoSql query to be executed. */
DataFlow::Node getQuery() { result = super.getQuery() }
}
/** Provides classes for modeling NoSql sanitization-related APIs. */
module NoSqlSanitizer {
/**
* A data-flow node that collects functions sanitizing NoSQL queries.
*
* Extend this class to model new APIs. If you want to refine existing API models,
* extend `NoSQLSanitizer` instead.
*/
abstract class Range extends DataFlow::Node {
/** Gets the argument that specifies the NoSql query to be sanitized. */
abstract DataFlow::Node getAnInput();
}
}
/**
* A data-flow node that collects functions sanitizing NoSQL queries.
*
* Extend this class to model new APIs. If you want to refine existing API models,
* extend `NoSQLSanitizer::Range` instead.
*/
class NoSqlSanitizer extends DataFlow::Node instanceof NoSqlSanitizer::Range {
/** Gets the argument that specifies the NoSql query to be sanitized. */
DataFlow::Node getAnInput() { result = super.getAnInput() }
}
/** Provides classes for modeling HTTP Header APIs. */
module HeaderDeclaration {
/**

View File

@@ -7,7 +7,6 @@ private import experimental.semmle.python.frameworks.Flask
private import experimental.semmle.python.frameworks.Django
private import experimental.semmle.python.frameworks.Werkzeug
private import experimental.semmle.python.frameworks.LDAP
private import experimental.semmle.python.frameworks.NoSQL
private import experimental.semmle.python.frameworks.JWT
private import experimental.semmle.python.frameworks.Csv
private import experimental.semmle.python.libraries.PyJWT

View File

@@ -1,179 +0,0 @@
/**
* Provides classes modeling security-relevant aspects of the standard libraries.
* Note: some modeling is done internally in the dataflow/taint tracking implementation.
*/
private import python
private import semmle.python.dataflow.new.DataFlow
private import semmle.python.dataflow.new.TaintTracking
private import semmle.python.dataflow.new.RemoteFlowSources
private import experimental.semmle.python.Concepts
private import semmle.python.ApiGraphs
private module NoSql {
// API Nodes returning `Mongo` instances.
/** Gets a reference to `pymongo.MongoClient` */
private API::Node pyMongo() {
result = API::moduleImport("pymongo").getMember("MongoClient").getReturn()
or
// see https://pymongo.readthedocs.io/en/stable/api/pymongo/mongo_client.html#pymongo.mongo_client.MongoClient
result =
API::moduleImport("pymongo").getMember("mongo_client").getMember("MongoClient").getReturn()
}
/** Gets a reference to `flask_pymongo.PyMongo` */
private API::Node flask_PyMongo() {
result = API::moduleImport("flask_pymongo").getMember("PyMongo").getReturn()
}
/** Gets a reference to `mongoengine` */
private API::Node mongoEngine() { result = API::moduleImport("mongoengine") }
/** Gets a reference to `flask_mongoengine.MongoEngine` */
private API::Node flask_MongoEngine() {
result = API::moduleImport("flask_mongoengine").getMember("MongoEngine").getReturn()
}
/**
* Gets a reference to an initialized `Mongo` instance.
* See `pyMongo()`, `flask_PyMongo()`
*/
private API::Node mongoClientInstance() {
result = pyMongo() or
result = flask_PyMongo()
}
/**
* Gets a reference to a `Mongo` DB instance.
*
* ```py
* from flask_pymongo import PyMongo
* mongo = PyMongo(app)
* mongo.db.user.find({'name': safe_search})
* ```
*
* `mongo.db` would be a `Mongo` instance.
*/
private API::Node mongoDBInstance() {
result = mongoClientInstance().getASubscript()
or
result = mongoClientInstance().getAMember()
or
result = mongoEngine().getMember(["get_db", "connect"]).getReturn()
or
result = mongoEngine().getMember("connection").getMember(["get_db", "connect"]).getReturn()
or
result = flask_MongoEngine().getMember("get_db").getReturn()
or
// see https://pymongo.readthedocs.io/en/stable/api/pymongo/mongo_client.html#pymongo.mongo_client.MongoClient.get_default_database
// see https://pymongo.readthedocs.io/en/stable/api/pymongo/mongo_client.html#pymongo.mongo_client.MongoClient.get_database
result = mongoClientInstance().getMember(["get_default_database", "get_database"]).getReturn()
}
/**
* Gets a reference to a `Mongo` collection.
*
* ```py
* from flask_pymongo import PyMongo
* mongo = PyMongo(app)
* mongo.db.user.find({'name': safe_search})
* ```
*
* `mongo.db.user` would be a `Mongo` collection.
*/
private API::Node mongoCollection() {
result = mongoDBInstance().getASubscript()
or
result = mongoDBInstance().getAMember()
or
// see https://pymongo.readthedocs.io/en/stable/api/pymongo/database.html#pymongo.database.Database.get_collection
// see https://pymongo.readthedocs.io/en/stable/api/pymongo/database.html#pymongo.database.Database.create_collection
result = mongoDBInstance().getMember(["get_collection", "create_collection"]).getReturn()
}
/** This class represents names of find_* relevant `Mongo` collection-level operation methods. */
private class MongoCollectionMethodNames extends string {
MongoCollectionMethodNames() {
this in [
"find", "find_raw_batches", "find_one", "find_one_and_delete", "find_and_modify",
"find_one_and_replace", "find_one_and_update", "find_one_or_404"
]
}
}
/**
* Gets a reference to a `Mongo` collection method call
*
* ```py
* from flask_pymongo import PyMongo
* mongo = PyMongo(app)
* mongo.db.user.find({'name': safe_search})
* ```
*
* `mongo.db.user.find({'name': safe_search})` would be a collection method call.
*/
private class MongoCollectionCall extends DataFlow::CallCfgNode, NoSqlQuery::Range {
MongoCollectionCall() {
this = mongoCollection().getMember(any(MongoCollectionMethodNames m)).getACall()
}
override DataFlow::Node getQuery() { result = this.getArg(0) }
}
/**
* Gets a reference to a call from a class whose base is a reference to `mongoEngine()` or `flask_MongoEngine()`'s
* `Document` or `EmbeddedDocument` objects and its attribute is `objects`.
*
* ```py
* from flask_mongoengine import MongoEngine
* db = MongoEngine(app)
* class Movie(db.Document):
* title = db.StringField(required=True)
*
* Movie.objects(__raw__=json_search)
* ```
*
* `Movie.objects(__raw__=json_search)` would be the result.
*/
private class MongoEngineObjectsCall extends DataFlow::CallCfgNode, NoSqlQuery::Range {
MongoEngineObjectsCall() {
this =
[mongoEngine(), flask_MongoEngine()]
.getMember(["Document", "EmbeddedDocument"])
.getASubclass()
.getMember("objects")
.getACall()
}
override DataFlow::Node getQuery() { result = this.getArgByName(_) }
}
/** Gets a reference to `mongosanitizer.sanitizer.sanitize` */
private class MongoSanitizerCall extends DataFlow::CallCfgNode, NoSqlSanitizer::Range {
MongoSanitizerCall() {
this =
API::moduleImport("mongosanitizer").getMember("sanitizer").getMember("sanitize").getACall()
}
override DataFlow::Node getAnInput() { result = this.getArg(0) }
}
/**
* ObjectId returns a string representing an id.
* If at any time ObjectId can't parse it's input (like when a tainted dict in passed in),
* then ObjectId will throw an error preventing the query from running.
*/
private class BsonObjectIdCall extends DataFlow::CallCfgNode, NoSqlSanitizer::Range {
BsonObjectIdCall() {
exists(API::Node mod |
mod = API::moduleImport("bson")
or
mod = API::moduleImport("bson").getMember(["objectid", "json_util"])
|
this = mod.getMember("ObjectId").getACall()
)
}
override DataFlow::Node getAnInput() { result = this.getArg(0) }
}
}

View File

@@ -1,54 +0,0 @@
import python
import semmle.python.dataflow.new.DataFlow
import semmle.python.dataflow.new.TaintTracking
import semmle.python.dataflow.new.RemoteFlowSources
import experimental.semmle.python.Concepts
import semmle.python.Concepts
module NoSqlInjection {
class Configuration extends TaintTracking::Configuration {
Configuration() { this = "NoSQLInjection" }
override predicate isSource(DataFlow::Node source, DataFlow::FlowState state) {
source instanceof RemoteFlowSource and
state instanceof RemoteInput
}
override predicate isSink(DataFlow::Node sink, DataFlow::FlowState state) {
sink = any(NoSqlQuery noSqlQuery).getQuery() and
state instanceof ConvertedToDict
}
override predicate isSanitizer(DataFlow::Node node, DataFlow::FlowState state) {
// Block `RemoteInput` paths here, since they change state to `ConvertedToDict`
exists(Decoding decoding | decoding.getFormat() = "JSON" and node = decoding.getOutput()) and
state instanceof RemoteInput
}
override predicate isAdditionalTaintStep(
DataFlow::Node nodeFrom, DataFlow::FlowState stateFrom, DataFlow::Node nodeTo,
DataFlow::FlowState stateTo
) {
exists(Decoding decoding | decoding.getFormat() = "JSON" |
nodeFrom = decoding.getAnInput() and
nodeTo = decoding.getOutput()
) and
stateFrom instanceof RemoteInput and
stateTo instanceof ConvertedToDict
}
override predicate isSanitizer(DataFlow::Node sanitizer) {
sanitizer = any(NoSqlSanitizer noSqlSanitizer).getAnInput()
}
}
/** A flow state signifying remote input. */
class RemoteInput extends DataFlow::FlowState {
RemoteInput() { this = "RemoteInput" }
}
/** A flow state signifying remote input converted to a dictionary. */
class ConvertedToDict extends DataFlow::FlowState {
ConvertedToDict() { this = "ConvertedToDict" }
}
}

View File

@@ -17,6 +17,7 @@ import semmle.python.security.dataflow.CodeInjectionCustomizations
import semmle.python.security.dataflow.CommandInjectionCustomizations
import semmle.python.security.dataflow.LdapInjectionCustomizations
import semmle.python.security.dataflow.LogInjectionCustomizations
import semmle.python.security.dataflow.NoSqlInjectionCustomizations
import semmle.python.security.dataflow.PathInjectionCustomizations
import semmle.python.security.dataflow.PolynomialReDoSCustomizations
import semmle.python.security.dataflow.ReflectedXSSCustomizations
@@ -57,6 +58,10 @@ DataFlow::Node relevantTaintSink(string kind) {
or
kind = "RegexInjection" and result instanceof RegexInjection::Sink
or
kind = "NoSqlInjection (string sink)" and result instanceof NoSqlInjection::StringSink
or
kind = "NoSqlInjection (dict sink)" and result instanceof NoSqlInjection::DictSink
or
kind = "ServerSideRequestForgery" and result instanceof ServerSideRequestForgery::Sink
or
kind = "SqlInjection" and result instanceof SqlInjection::Sink

View File

@@ -1 +0,0 @@
experimental/Security/CWE-943/NoSQLInjection.ql

View File

@@ -0,0 +1,3 @@
failures
missingAnnotationOnSink
testFailures

View File

@@ -0,0 +1,4 @@
import python
import experimental.dataflow.TestUtil.DataflowQueryTest
import semmle.python.security.dataflow.NoSqlInjectionQuery
import FromTaintTrackingStateConfig<NoSqlInjectionConfig>

View File

@@ -1,4 +1,34 @@
edges
| PoC/server.py:1:26:1:32 | ControlFlowNode for ImportMember | PoC/server.py:1:26:1:32 | GSSA Variable request |
| PoC/server.py:1:26:1:32 | GSSA Variable request | PoC/server.py:26:21:26:27 | ControlFlowNode for request |
| PoC/server.py:1:26:1:32 | GSSA Variable request | PoC/server.py:43:14:43:20 | ControlFlowNode for request |
| PoC/server.py:1:26:1:32 | GSSA Variable request | PoC/server.py:52:14:52:20 | ControlFlowNode for request |
| PoC/server.py:1:26:1:32 | GSSA Variable request | PoC/server.py:77:14:77:20 | ControlFlowNode for request |
| PoC/server.py:1:26:1:32 | GSSA Variable request | PoC/server.py:98:14:98:20 | ControlFlowNode for request |
| PoC/server.py:26:5:26:17 | SSA variable author_string | PoC/server.py:27:25:27:37 | ControlFlowNode for author_string |
| PoC/server.py:26:21:26:27 | ControlFlowNode for request | PoC/server.py:26:5:26:17 | SSA variable author_string |
| PoC/server.py:27:5:27:10 | SSA variable author | PoC/server.py:30:27:30:44 | ControlFlowNode for Dict |
| PoC/server.py:27:5:27:10 | SSA variable author | PoC/server.py:31:34:31:51 | ControlFlowNode for Dict |
| PoC/server.py:27:14:27:38 | ControlFlowNode for Attribute() | PoC/server.py:27:5:27:10 | SSA variable author |
| PoC/server.py:27:25:27:37 | ControlFlowNode for author_string | PoC/server.py:27:14:27:38 | ControlFlowNode for Attribute() |
| PoC/server.py:43:5:43:10 | SSA variable author | PoC/server.py:47:38:47:67 | ControlFlowNode for BinaryExpr |
| PoC/server.py:43:14:43:20 | ControlFlowNode for request | PoC/server.py:43:5:43:10 | SSA variable author |
| PoC/server.py:47:38:47:67 | ControlFlowNode for BinaryExpr | PoC/server.py:47:27:47:68 | ControlFlowNode for Dict |
| PoC/server.py:52:5:52:10 | SSA variable author | PoC/server.py:54:17:54:70 | ControlFlowNode for BinaryExpr |
| PoC/server.py:52:14:52:20 | ControlFlowNode for request | PoC/server.py:52:5:52:10 | SSA variable author |
| PoC/server.py:53:5:53:10 | SSA variable search | PoC/server.py:61:27:61:58 | ControlFlowNode for Dict |
| PoC/server.py:53:14:57:5 | ControlFlowNode for Dict | PoC/server.py:53:5:53:10 | SSA variable search |
| PoC/server.py:54:17:54:70 | ControlFlowNode for BinaryExpr | PoC/server.py:53:14:57:5 | ControlFlowNode for Dict |
| PoC/server.py:77:5:77:10 | SSA variable author | PoC/server.py:80:23:80:101 | ControlFlowNode for BinaryExpr |
| PoC/server.py:77:14:77:20 | ControlFlowNode for request | PoC/server.py:77:5:77:10 | SSA variable author |
| PoC/server.py:78:5:78:15 | SSA variable accumulator | PoC/server.py:84:5:84:9 | SSA variable group |
| PoC/server.py:78:19:83:5 | ControlFlowNode for Dict | PoC/server.py:78:5:78:15 | SSA variable accumulator |
| PoC/server.py:80:23:80:101 | ControlFlowNode for BinaryExpr | PoC/server.py:78:19:83:5 | ControlFlowNode for Dict |
| PoC/server.py:84:5:84:9 | SSA variable group | PoC/server.py:91:29:91:47 | ControlFlowNode for Dict |
| PoC/server.py:84:5:84:9 | SSA variable group | PoC/server.py:92:38:92:56 | ControlFlowNode for Dict |
| PoC/server.py:98:5:98:10 | SSA variable author | PoC/server.py:99:5:99:10 | SSA variable mapper |
| PoC/server.py:98:14:98:20 | ControlFlowNode for request | PoC/server.py:98:5:98:10 | SSA variable author |
| PoC/server.py:99:5:99:10 | SSA variable mapper | PoC/server.py:102:9:102:14 | ControlFlowNode for mapper |
| flask_mongoengine_bad.py:1:26:1:32 | ControlFlowNode for ImportMember | flask_mongoengine_bad.py:1:26:1:32 | GSSA Variable request |
| flask_mongoengine_bad.py:1:26:1:32 | GSSA Variable request | flask_mongoengine_bad.py:19:21:19:27 | ControlFlowNode for request |
| flask_mongoengine_bad.py:1:26:1:32 | GSSA Variable request | flask_mongoengine_bad.py:26:21:26:27 | ControlFlowNode for request |
@@ -60,20 +90,64 @@ edges
| pymongo_test.py:1:26:1:32 | GSSA Variable request | pymongo_test.py:12:21:12:27 | ControlFlowNode for request |
| pymongo_test.py:1:26:1:32 | GSSA Variable request | pymongo_test.py:29:27:29:33 | ControlFlowNode for request |
| pymongo_test.py:1:26:1:32 | GSSA Variable request | pymongo_test.py:39:27:39:33 | ControlFlowNode for request |
| pymongo_test.py:1:26:1:32 | GSSA Variable request | pymongo_test.py:52:26:52:32 | ControlFlowNode for request |
| pymongo_test.py:12:5:12:17 | SSA variable unsafe_search | pymongo_test.py:13:30:13:42 | ControlFlowNode for unsafe_search |
| pymongo_test.py:12:21:12:27 | ControlFlowNode for request | pymongo_test.py:12:5:12:17 | SSA variable unsafe_search |
| pymongo_test.py:13:5:13:15 | SSA variable json_search | pymongo_test.py:15:42:15:62 | ControlFlowNode for Dict |
| pymongo_test.py:13:19:13:43 | ControlFlowNode for Attribute() | pymongo_test.py:13:5:13:15 | SSA variable json_search |
| pymongo_test.py:13:30:13:42 | ControlFlowNode for unsafe_search | pymongo_test.py:13:19:13:43 | ControlFlowNode for Attribute() |
| pymongo_test.py:29:5:29:12 | SSA variable event_id | pymongo_test.py:33:34:33:73 | ControlFlowNode for Dict |
| pymongo_test.py:29:5:29:12 | SSA variable event_id | pymongo_test.py:33:45:33:72 | ControlFlowNode for Fstring |
| pymongo_test.py:29:16:29:51 | ControlFlowNode for Attribute() | pymongo_test.py:29:5:29:12 | SSA variable event_id |
| pymongo_test.py:29:27:29:33 | ControlFlowNode for request | pymongo_test.py:29:27:29:50 | ControlFlowNode for Subscript |
| pymongo_test.py:29:27:29:50 | ControlFlowNode for Subscript | pymongo_test.py:29:16:29:51 | ControlFlowNode for Attribute() |
| pymongo_test.py:39:5:39:12 | SSA variable event_id | pymongo_test.py:43:34:43:73 | ControlFlowNode for Dict |
| pymongo_test.py:33:45:33:72 | ControlFlowNode for Fstring | pymongo_test.py:33:34:33:73 | ControlFlowNode for Dict |
| pymongo_test.py:39:5:39:12 | SSA variable event_id | pymongo_test.py:43:45:43:72 | ControlFlowNode for Fstring |
| pymongo_test.py:39:16:39:51 | ControlFlowNode for Attribute() | pymongo_test.py:39:5:39:12 | SSA variable event_id |
| pymongo_test.py:39:27:39:33 | ControlFlowNode for request | pymongo_test.py:39:27:39:50 | ControlFlowNode for Subscript |
| pymongo_test.py:39:27:39:50 | ControlFlowNode for Subscript | pymongo_test.py:39:16:39:51 | ControlFlowNode for Attribute() |
| pymongo_test.py:43:45:43:72 | ControlFlowNode for Fstring | pymongo_test.py:43:34:43:73 | ControlFlowNode for Dict |
| pymongo_test.py:52:5:52:11 | SSA variable decoded | pymongo_test.py:55:17:55:23 | ControlFlowNode for decoded |
| pymongo_test.py:52:15:52:50 | ControlFlowNode for Attribute() | pymongo_test.py:52:5:52:11 | SSA variable decoded |
| pymongo_test.py:52:26:52:32 | ControlFlowNode for request | pymongo_test.py:52:26:52:49 | ControlFlowNode for Subscript |
| pymongo_test.py:52:26:52:49 | ControlFlowNode for Subscript | pymongo_test.py:52:15:52:50 | ControlFlowNode for Attribute() |
| pymongo_test.py:54:5:54:10 | SSA variable search | pymongo_test.py:59:25:59:56 | ControlFlowNode for Dict |
| pymongo_test.py:54:14:58:5 | ControlFlowNode for Dict | pymongo_test.py:54:5:54:10 | SSA variable search |
| pymongo_test.py:55:17:55:23 | ControlFlowNode for decoded | pymongo_test.py:54:14:58:5 | ControlFlowNode for Dict |
| pymongo_test.py:55:17:55:23 | ControlFlowNode for decoded | pymongo_test.py:61:25:61:57 | ControlFlowNode for Dict |
| pymongo_test.py:55:17:55:23 | ControlFlowNode for decoded | pymongo_test.py:62:25:62:42 | ControlFlowNode for Dict |
| pymongo_test.py:55:17:55:23 | ControlFlowNode for decoded | pymongo_test.py:63:25:63:31 | ControlFlowNode for decoded |
nodes
| PoC/server.py:1:26:1:32 | ControlFlowNode for ImportMember | semmle.label | ControlFlowNode for ImportMember |
| PoC/server.py:1:26:1:32 | GSSA Variable request | semmle.label | GSSA Variable request |
| PoC/server.py:26:5:26:17 | SSA variable author_string | semmle.label | SSA variable author_string |
| PoC/server.py:26:21:26:27 | ControlFlowNode for request | semmle.label | ControlFlowNode for request |
| PoC/server.py:27:5:27:10 | SSA variable author | semmle.label | SSA variable author |
| PoC/server.py:27:14:27:38 | ControlFlowNode for Attribute() | semmle.label | ControlFlowNode for Attribute() |
| PoC/server.py:27:25:27:37 | ControlFlowNode for author_string | semmle.label | ControlFlowNode for author_string |
| PoC/server.py:30:27:30:44 | ControlFlowNode for Dict | semmle.label | ControlFlowNode for Dict |
| PoC/server.py:31:34:31:51 | ControlFlowNode for Dict | semmle.label | ControlFlowNode for Dict |
| PoC/server.py:43:5:43:10 | SSA variable author | semmle.label | SSA variable author |
| PoC/server.py:43:14:43:20 | ControlFlowNode for request | semmle.label | ControlFlowNode for request |
| PoC/server.py:47:27:47:68 | ControlFlowNode for Dict | semmle.label | ControlFlowNode for Dict |
| PoC/server.py:47:38:47:67 | ControlFlowNode for BinaryExpr | semmle.label | ControlFlowNode for BinaryExpr |
| PoC/server.py:52:5:52:10 | SSA variable author | semmle.label | SSA variable author |
| PoC/server.py:52:14:52:20 | ControlFlowNode for request | semmle.label | ControlFlowNode for request |
| PoC/server.py:53:5:53:10 | SSA variable search | semmle.label | SSA variable search |
| PoC/server.py:53:14:57:5 | ControlFlowNode for Dict | semmle.label | ControlFlowNode for Dict |
| PoC/server.py:54:17:54:70 | ControlFlowNode for BinaryExpr | semmle.label | ControlFlowNode for BinaryExpr |
| PoC/server.py:61:27:61:58 | ControlFlowNode for Dict | semmle.label | ControlFlowNode for Dict |
| PoC/server.py:77:5:77:10 | SSA variable author | semmle.label | SSA variable author |
| PoC/server.py:77:14:77:20 | ControlFlowNode for request | semmle.label | ControlFlowNode for request |
| PoC/server.py:78:5:78:15 | SSA variable accumulator | semmle.label | SSA variable accumulator |
| PoC/server.py:78:19:83:5 | ControlFlowNode for Dict | semmle.label | ControlFlowNode for Dict |
| PoC/server.py:80:23:80:101 | ControlFlowNode for BinaryExpr | semmle.label | ControlFlowNode for BinaryExpr |
| PoC/server.py:84:5:84:9 | SSA variable group | semmle.label | SSA variable group |
| PoC/server.py:91:29:91:47 | ControlFlowNode for Dict | semmle.label | ControlFlowNode for Dict |
| PoC/server.py:92:38:92:56 | ControlFlowNode for Dict | semmle.label | ControlFlowNode for Dict |
| PoC/server.py:98:5:98:10 | SSA variable author | semmle.label | SSA variable author |
| PoC/server.py:98:14:98:20 | ControlFlowNode for request | semmle.label | ControlFlowNode for request |
| PoC/server.py:99:5:99:10 | SSA variable mapper | semmle.label | SSA variable mapper |
| PoC/server.py:102:9:102:14 | ControlFlowNode for mapper | semmle.label | ControlFlowNode for mapper |
| flask_mongoengine_bad.py:1:26:1:32 | ControlFlowNode for ImportMember | semmle.label | ControlFlowNode for ImportMember |
| flask_mongoengine_bad.py:1:26:1:32 | GSSA Variable request | semmle.label | GSSA Variable request |
| flask_mongoengine_bad.py:19:5:19:17 | SSA variable unsafe_search | semmle.label | SSA variable unsafe_search |
@@ -147,13 +221,33 @@ nodes
| pymongo_test.py:29:27:29:33 | ControlFlowNode for request | semmle.label | ControlFlowNode for request |
| pymongo_test.py:29:27:29:50 | ControlFlowNode for Subscript | semmle.label | ControlFlowNode for Subscript |
| pymongo_test.py:33:34:33:73 | ControlFlowNode for Dict | semmle.label | ControlFlowNode for Dict |
| pymongo_test.py:33:45:33:72 | ControlFlowNode for Fstring | semmle.label | ControlFlowNode for Fstring |
| pymongo_test.py:39:5:39:12 | SSA variable event_id | semmle.label | SSA variable event_id |
| pymongo_test.py:39:16:39:51 | ControlFlowNode for Attribute() | semmle.label | ControlFlowNode for Attribute() |
| pymongo_test.py:39:27:39:33 | ControlFlowNode for request | semmle.label | ControlFlowNode for request |
| pymongo_test.py:39:27:39:50 | ControlFlowNode for Subscript | semmle.label | ControlFlowNode for Subscript |
| pymongo_test.py:43:34:43:73 | ControlFlowNode for Dict | semmle.label | ControlFlowNode for Dict |
| pymongo_test.py:43:45:43:72 | ControlFlowNode for Fstring | semmle.label | ControlFlowNode for Fstring |
| pymongo_test.py:52:5:52:11 | SSA variable decoded | semmle.label | SSA variable decoded |
| pymongo_test.py:52:15:52:50 | ControlFlowNode for Attribute() | semmle.label | ControlFlowNode for Attribute() |
| pymongo_test.py:52:26:52:32 | ControlFlowNode for request | semmle.label | ControlFlowNode for request |
| pymongo_test.py:52:26:52:49 | ControlFlowNode for Subscript | semmle.label | ControlFlowNode for Subscript |
| pymongo_test.py:54:5:54:10 | SSA variable search | semmle.label | SSA variable search |
| pymongo_test.py:54:14:58:5 | ControlFlowNode for Dict | semmle.label | ControlFlowNode for Dict |
| pymongo_test.py:55:17:55:23 | ControlFlowNode for decoded | semmle.label | ControlFlowNode for decoded |
| pymongo_test.py:59:25:59:56 | ControlFlowNode for Dict | semmle.label | ControlFlowNode for Dict |
| pymongo_test.py:61:25:61:57 | ControlFlowNode for Dict | semmle.label | ControlFlowNode for Dict |
| pymongo_test.py:62:25:62:42 | ControlFlowNode for Dict | semmle.label | ControlFlowNode for Dict |
| pymongo_test.py:63:25:63:31 | ControlFlowNode for decoded | semmle.label | ControlFlowNode for decoded |
subpaths
#select
| PoC/server.py:30:27:30:44 | ControlFlowNode for Dict | PoC/server.py:1:26:1:32 | ControlFlowNode for ImportMember | PoC/server.py:30:27:30:44 | ControlFlowNode for Dict | This NoSQL query contains an unsanitized $@. | PoC/server.py:1:26:1:32 | ControlFlowNode for ImportMember | user-provided value |
| PoC/server.py:31:34:31:51 | ControlFlowNode for Dict | PoC/server.py:1:26:1:32 | ControlFlowNode for ImportMember | PoC/server.py:31:34:31:51 | ControlFlowNode for Dict | This NoSQL query contains an unsanitized $@. | PoC/server.py:1:26:1:32 | ControlFlowNode for ImportMember | user-provided value |
| PoC/server.py:47:27:47:68 | ControlFlowNode for Dict | PoC/server.py:1:26:1:32 | ControlFlowNode for ImportMember | PoC/server.py:47:27:47:68 | ControlFlowNode for Dict | This NoSQL query contains an unsanitized $@. | PoC/server.py:1:26:1:32 | ControlFlowNode for ImportMember | user-provided value |
| PoC/server.py:61:27:61:58 | ControlFlowNode for Dict | PoC/server.py:1:26:1:32 | ControlFlowNode for ImportMember | PoC/server.py:61:27:61:58 | ControlFlowNode for Dict | This NoSQL query contains an unsanitized $@. | PoC/server.py:1:26:1:32 | ControlFlowNode for ImportMember | user-provided value |
| PoC/server.py:91:29:91:47 | ControlFlowNode for Dict | PoC/server.py:1:26:1:32 | ControlFlowNode for ImportMember | PoC/server.py:91:29:91:47 | ControlFlowNode for Dict | This NoSQL query contains an unsanitized $@. | PoC/server.py:1:26:1:32 | ControlFlowNode for ImportMember | user-provided value |
| PoC/server.py:92:38:92:56 | ControlFlowNode for Dict | PoC/server.py:1:26:1:32 | ControlFlowNode for ImportMember | PoC/server.py:92:38:92:56 | ControlFlowNode for Dict | This NoSQL query contains an unsanitized $@. | PoC/server.py:1:26:1:32 | ControlFlowNode for ImportMember | user-provided value |
| PoC/server.py:102:9:102:14 | ControlFlowNode for mapper | PoC/server.py:1:26:1:32 | ControlFlowNode for ImportMember | PoC/server.py:102:9:102:14 | ControlFlowNode for mapper | This NoSQL query contains an unsanitized $@. | PoC/server.py:1:26:1:32 | ControlFlowNode for ImportMember | user-provided value |
| flask_mongoengine_bad.py:22:34:22:44 | ControlFlowNode for json_search | flask_mongoengine_bad.py:1:26:1:32 | ControlFlowNode for ImportMember | flask_mongoengine_bad.py:22:34:22:44 | ControlFlowNode for json_search | This NoSQL query contains an unsanitized $@. | flask_mongoengine_bad.py:1:26:1:32 | ControlFlowNode for ImportMember | user-provided value |
| flask_mongoengine_bad.py:30:39:30:59 | ControlFlowNode for Dict | flask_mongoengine_bad.py:1:26:1:32 | ControlFlowNode for ImportMember | flask_mongoengine_bad.py:30:39:30:59 | ControlFlowNode for Dict | This NoSQL query contains an unsanitized $@. | flask_mongoengine_bad.py:1:26:1:32 | ControlFlowNode for ImportMember | user-provided value |
| flask_pymongo_bad.py:14:31:14:51 | ControlFlowNode for Dict | flask_pymongo_bad.py:1:26:1:32 | ControlFlowNode for ImportMember | flask_pymongo_bad.py:14:31:14:51 | ControlFlowNode for Dict | This NoSQL query contains an unsanitized $@. | flask_pymongo_bad.py:1:26:1:32 | ControlFlowNode for ImportMember | user-provided value |
@@ -166,3 +260,7 @@ subpaths
| pymongo_test.py:15:42:15:62 | ControlFlowNode for Dict | pymongo_test.py:1:26:1:32 | ControlFlowNode for ImportMember | pymongo_test.py:15:42:15:62 | ControlFlowNode for Dict | This NoSQL query contains an unsanitized $@. | pymongo_test.py:1:26:1:32 | ControlFlowNode for ImportMember | user-provided value |
| pymongo_test.py:33:34:33:73 | ControlFlowNode for Dict | pymongo_test.py:1:26:1:32 | ControlFlowNode for ImportMember | pymongo_test.py:33:34:33:73 | ControlFlowNode for Dict | This NoSQL query contains an unsanitized $@. | pymongo_test.py:1:26:1:32 | ControlFlowNode for ImportMember | user-provided value |
| pymongo_test.py:43:34:43:73 | ControlFlowNode for Dict | pymongo_test.py:1:26:1:32 | ControlFlowNode for ImportMember | pymongo_test.py:43:34:43:73 | ControlFlowNode for Dict | This NoSQL query contains an unsanitized $@. | pymongo_test.py:1:26:1:32 | ControlFlowNode for ImportMember | user-provided value |
| pymongo_test.py:59:25:59:56 | ControlFlowNode for Dict | pymongo_test.py:1:26:1:32 | ControlFlowNode for ImportMember | pymongo_test.py:59:25:59:56 | ControlFlowNode for Dict | This NoSQL query contains an unsanitized $@. | pymongo_test.py:1:26:1:32 | ControlFlowNode for ImportMember | user-provided value |
| pymongo_test.py:61:25:61:57 | ControlFlowNode for Dict | pymongo_test.py:1:26:1:32 | ControlFlowNode for ImportMember | pymongo_test.py:61:25:61:57 | ControlFlowNode for Dict | This NoSQL query contains an unsanitized $@. | pymongo_test.py:1:26:1:32 | ControlFlowNode for ImportMember | user-provided value |
| pymongo_test.py:62:25:62:42 | ControlFlowNode for Dict | pymongo_test.py:1:26:1:32 | ControlFlowNode for ImportMember | pymongo_test.py:62:25:62:42 | ControlFlowNode for Dict | This NoSQL query contains an unsanitized $@. | pymongo_test.py:1:26:1:32 | ControlFlowNode for ImportMember | user-provided value |
| pymongo_test.py:63:25:63:31 | ControlFlowNode for decoded | pymongo_test.py:1:26:1:32 | ControlFlowNode for ImportMember | pymongo_test.py:63:25:63:31 | ControlFlowNode for decoded | This NoSQL query contains an unsanitized $@. | pymongo_test.py:1:26:1:32 | ControlFlowNode for ImportMember | user-provided value |

View File

@@ -0,0 +1 @@
Security/CWE-943/NoSqlInjection.ql

View File

@@ -0,0 +1,16 @@
from pymongo import MongoClient
client = MongoClient()
db = client.test_database
import datetime
post = {
"author": "Mike",
"text": "My first blog post!",
"tags": ["mongodb", "python", "pymongo"],
"date": datetime.datetime.now(tz=datetime.timezone.utc),
}
posts = db.posts
post_id = posts.insert_one(post).inserted_id
post_id

View File

@@ -0,0 +1,19 @@
Tutorials:
- [pymongo](https://pymongo.readthedocs.io/en/stable/tutorial.html)
- [install mongodb](https://www.mongodb.com/docs/manual/tutorial/install-mongodb-on-os-x/#std-label-install-mdb-community-macos)
I recommend creating a virtual environment with venv and then installing dependencies via
```
python -m pip --install -r requirements.txt
```
Start mongodb:
```
mongod --config /usr/local/etc/mongod.conf --fork
```
run flask app:
```
flask --app server run
```
Navigate to the root to see routes. For each route try to get the system to reveal the person in the database. If you know the name, you can just input it, but in some cases you can get to the person without knowing the name!

View File

@@ -0,0 +1,2 @@
flask
pymongo==3.9

View File

@@ -0,0 +1,119 @@
from flask import Flask, request
from pymongo import MongoClient
import json
client = MongoClient()
db = client.test_database
posts = db.posts
app = Flask(__name__)
def show_post(post, query):
if post:
return "You found " + post['author'] + "!"
else:
return "You did not find " + query
@app.route('/plain', methods=['GET'])
def plain():
author = request.args['author']
post = posts.find_one({'author': author}) # $ result=OK
return show_post(post, author)
@app.route('/dict', methods=['GET'])
def as_dict():
author_string = request.args['author']
author = json.loads(author_string)
# Use {"$ne": 1} as author
# Found by http://127.0.0.1:5000/dict?author={%22$ne%22:1}
post = posts.find_one({'author': author}) # $ result=BAD
post = posts.find_one(filter={'author': author}) # $ result=BAD
return show_post(post, author)
@app.route('/dictHardened', methods=['GET'])
def as_dict_hardened():
author_string = request.args['author']
author = json.loads(author_string)
post = posts.find_one({'author': {"$eq": author}}) # $ result=OK
return show_post(post, author)
@app.route('/byWhere', methods=['GET'])
def by_where():
author = request.args['author']
# Use `" | "a" === "a` as author
# making the query `this.author === "" | "a" === "a"`
# Found by http://127.0.0.1:5000/byWhere?author=%22%20|%20%22a%22%20===%20%22a
post = posts.find_one({'$where': 'this.author === "'+author+'"'}) # $ result=BAD
return show_post(post, author)
@app.route('/byFunction', methods=['GET'])
def by_function():
author = request.args['author']
search = {
"body": 'function(author) { return(author === "'+author+'") }',
"args": [ "$author" ],
"lang": "js"
}
# Use `" | "a" === "a` as author
# making the query `this.author === "" | "a" === "a"`
# Found by http://127.0.0.1:5000/byFunction?author=%22%20|%20%22a%22%20===%20%22a
post = posts.find_one({'$expr': {'$function': search}}) # $ result=BAD
return show_post(post, author)
@app.route('/byFunctionArg', methods=['GET'])
def by_function_arg():
author = request.args['author']
search = {
"body": 'function(author, target) { return(author === target) }',
"args": [ "$author", author ],
"lang": "js"
}
post = posts.find_one({'$expr': {'$function': search}}) # $ result=OK
return show_post(post, author)
@app.route('/byGroup', methods=['GET'])
def by_group():
author = request.args['author']
accumulator = {
"init": 'function() { return "Not found" }',
"accumulate": 'function(state, author) { return (author === "'+author+'") ? author : state }',
"accumulateArgs": ["$author"],
"merge": 'function(state1, state2) { return (state1 === "Not found") ? state2 : state1 }'
}
group = {
"_id": "null",
"author": { "$accumulator": accumulator }
}
# Use `" | "a" === "a` as author
# making the query `this.author === "" | "a" === "a"`
# Found by http://127.0.0.1:5000/byGroup?author=%22%20|%20%22a%22%20===%20%22a
post = posts.aggregate([{ "$group": group }]).next() # $ result=BAD
post = posts.aggregate(pipeline=[{ "$group": group }]).next() # $ result=BAD
return show_post(post, author)
# works with pymongo 3.9, `map_reduce` is removed in pymongo 4.0
@app.route('/byMapReduce', methods=['GET'])
def by_map_reduce():
author = request.args['author']
mapper = 'function() { emit(this.author, this.author === "'+author+'") }'
reducer = "function(key, values) { return values.some( x => x ) }"
results = posts.map_reduce(
mapper, # $ result=BAD
reducer, # $ result=OK
"results")
# Use `" | "a" === "a` as author
# making the query `this.author === "" | "a" === "a"`
# Found by http://127.0.0.1:5000/byMapReduce?author=%22%20|%20%22a%22%20===%20%22a
post = results.find_one({'value': True})
if(post):
post["author"] = post["_id"]
return show_post(post, author)
@app.route('/', methods=['GET'])
def show_routes():
links = []
for rule in app.url_map.iter_rules():
if "GET" in rule.methods:
links.append((rule.rule, rule.endpoint))
return links

View File

@@ -19,7 +19,7 @@ def subclass_objects():
unsafe_search = request.args['search']
json_search = json.loads(unsafe_search)
return Movie.objects(__raw__=json_search)
return Movie.objects(__raw__=json_search) #$ result=BAD
@app.route("/get_db_find")
def get_db_find():
@@ -27,7 +27,7 @@ def get_db_find():
json_search = json.loads(unsafe_search)
retrieved_db = db.get_db()
return retrieved_db["Movie"].find({'name': json_search})
return retrieved_db["Movie"].find({'name': json_search}) #$ result=BAD
# if __name__ == "__main__":
# app.run(debug=True)

View File

@@ -21,7 +21,7 @@ def subclass_objects():
json_search = json.loads(unsafe_search)
safe_search = sanitize(json_search)
return Movie.objects(__raw__=safe_search)
return Movie.objects(__raw__=safe_search) #$ result=OK
# if __name__ == "__main__":
# app.run(debug=True)

View File

@@ -11,7 +11,7 @@ def home_page():
unsafe_search = request.args['search']
json_search = json.loads(unsafe_search)
return mongo.db.user.find({'name': json_search})
return mongo.db.user.find({'name': json_search}) #$ result=BAD
# if __name__ == "__main__":
# app.run(debug=True)

View File

@@ -13,7 +13,7 @@ def home_page():
json_search = json.loads(unsafe_search)
safe_search = sanitize(json_search)
return mongo.db.user.find({'name': safe_search})
return mongo.db.user.find({'name': safe_search}) #$ result=OK
# if __name__ == "__main__":
# app.run(debug=True)

View File

@@ -18,8 +18,8 @@ def connect_find():
unsafe_search = request.args['search']
json_search = json.loads(unsafe_search)
db = me.connect('mydb')
return db.movie.find({'name': json_search})
db = me.connect('mydb')
return db.movie.find({'name': json_search}) #$ result=BAD
@app.route("/connection_connect_find")
def connection_connect_find():
@@ -27,7 +27,7 @@ def connection_connect_find():
json_search = json.loads(unsafe_search)
db = connect('mydb')
return db.movie.find({'name': json_search})
return db.movie.find({'name': json_search}) #$ result=BAD
@app.route("/get_db_find")
def get_db_find():
@@ -35,7 +35,7 @@ def get_db_find():
json_search = json.loads(unsafe_search)
db = me.get_db()
return db.movie.find({'name': json_search})
return db.movie.find({'name': json_search}) #$ result=BAD
@app.route("/connection_get_db_find")
def connection_get_db_find():
@@ -43,22 +43,22 @@ def connection_get_db_find():
json_search = json.loads(unsafe_search)
db = get_db()
return db.movie.find({'name': json_search})
return db.movie.find({'name': json_search}) #$ result=BAD
@app.route("/subclass_objects")
def subclass_objects():
unsafe_search = request.args['search']
json_search = json.loads(unsafe_search)
return Movie.objects(__raw__=json_search)
return Movie.objects(__raw__=json_search) #$ result=BAD
@app.route("/subscript_find")
def subscript_find():
unsafe_search = request.args['search']
json_search = json.loads(unsafe_search)
db = me.connect('mydb')
return db['movie'].find({'name': json_search})
db = me.connect('mydb')
return db['movie'].find({'name': json_search}) #$ result=BAD
# if __name__ == "__main__":
# app.run(debug=True)

View File

@@ -21,7 +21,7 @@ def connect_find():
safe_search = sanitize(json_search)
db = me.connect('mydb')
return db.movie.find({'name': safe_search})
return db.movie.find({'name': safe_search}) #$ result=OK
# if __name__ == "__main__":
# app.run(debug=True)

View File

@@ -0,0 +1 @@
semmle-extractor-options: --max-import-depth=1 -r PoC

View File

@@ -12,7 +12,7 @@ def bad():
unsafe_search = request.args['search']
json_search = json.loads(unsafe_search)
return client.db.collection.find_one({'data': json_search})
return client.db.collection.find_one({'data': json_search}) #$ result=BAD
@app.route("/good")
@@ -21,7 +21,7 @@ def good():
json_search = json.loads(unsafe_search)
safe_search = sanitize(json_search)
return client.db.collection.find_one({'data': safe_search})
return client.db.collection.find_one({'data': safe_search}) #$ result=OK
@app.route("/bad2")
@@ -30,7 +30,7 @@ def bad2():
client = MongoClient("localhost", 27017, maxPoolSize=50)
db = client.localhost
collection = db['collection']
cursor = collection.find_one({"$where": f"this._id == '${event_id}'"})
cursor = collection.find_one({"$where": f"this._id == '${event_id}'"}) #$ result=BAD
@app.route("/bad3")
@@ -40,8 +40,27 @@ def bad3():
client = MongoClient("localhost", 27017, maxPoolSize=50)
db = client.get_database(name="localhost")
collection = db.get_collection("collection")
cursor = collection.find_one({"$where": f"this._id == '${event_id}'"})
cursor = collection.find_one({"$where": f"this._id == '${event_id}'"}) #$ result=BAD
@app.route("/bad4")
def bad4():
client = MongoClient("localhost", 27017, maxPoolSize=50)
db = client.get_database(name="localhost")
collection = db.get_collection("collection")
decoded = json.loads(request.args['event_id'])
search = {
"body": decoded,
"args": [ "$event_id" ],
"lang": "js"
}
collection.find_one({'$expr': {'$function': search}}) # $ result=BAD
collection.find_one({'$expr': {'$function': decoded}}) # $ result=BAD
collection.find_one({'$expr': decoded}) # $ result=BAD
collection.find_one(decoded) # $ result=BAD
if __name__ == "__main__":
app.run(debug=True)