Merge pull request #4476 from yoff/python-port-sql-injection

Python: Port SqlInjection
This commit is contained in:
Rasmus Wriedt Larsen
2020-10-21 15:55:19 +02:00
committed by GitHub
10 changed files with 470 additions and 1 deletions

View File

@@ -0,0 +1,32 @@
/**
* @name SQL query built from user-controlled sources
* @description Building a SQL query from user-controlled sources is vulnerable to insertion of
* malicious SQL code by the user.
* @kind path-problem
* @problem.severity error
* @precision high
* @id py/sql-injection
* @tags security
* external/cwe/cwe-089
* external/owasp/owasp-a1
*/
import python
import experimental.dataflow.DataFlow
import experimental.dataflow.TaintTracking
import experimental.semmle.python.Concepts
import experimental.dataflow.RemoteFlowSources
import DataFlow::PathGraph
class SQLInjectionConfiguration extends TaintTracking::Configuration {
SQLInjectionConfiguration() { this = "SQLInjectionConfiguration" }
override predicate isSource(DataFlow::Node source) { source instanceof RemoteFlowSource }
override predicate isSink(DataFlow::Node sink) { sink = any(SqlExecution e).getSql() }
}
from SQLInjectionConfiguration config, DataFlow::PathNode source, DataFlow::PathNode sink
where config.hasFlowPath(source, sink)
select sink.getNode(), source, sink, "This SQL query depends on $@.", source.getNode(),
"a user-provided value"

View File

@@ -126,6 +126,35 @@ module CodeExecution {
}
}
/**
* A data-flow node that executes SQL statements.
*
* Extend this class to refine existing API models. If you want to model new APIs,
* extend `SqlExecution::Range` instead.
*/
class SqlExecution extends DataFlow::Node {
SqlExecution::Range range;
SqlExecution() { this = range }
/** Gets the argument that specifies the SQL statements to be executed. */
DataFlow::Node getSql() { result = range.getSql() }
}
/** Provides a class for modeling new SQL execution APIs. */
module SqlExecution {
/**
* A data-flow node that executes SQL statements.
*
* Extend this class to model new APIs. If you want to refine existing API models,
* extend `SqlExecution` instead.
*/
abstract class Range extends DataFlow::Node {
/** Gets the argument that specifies the SQL statements to be executed. */
abstract DataFlow::Node getSql();
}
}
/** Provides classes for modeling HTTP-related APIs. */
module HTTP {
/** Provides classes for modeling HTTP servers. */

View File

@@ -34,7 +34,7 @@ private module Django {
* WARNING: Only holds for a few predefined attributes.
*/
private DataFlow::Node django_attr(DataFlow::TypeTracker t, string attr_name) {
attr_name in ["urls", "http"] and
attr_name in ["db", "urls", "http"] and
(
t.start() and
result = DataFlow::importNode("django" + "." + attr_name)
@@ -70,6 +70,309 @@ private module Django {
/** Provides models for the `django` module. */
module django {
// -------------------------------------------------------------------------
// django.db
// -------------------------------------------------------------------------
/** Gets a reference to the `django.db` module. */
DataFlow::Node db() { result = django_attr("db") }
/** Provides models for the `django.db` module. */
module db {
/** Gets a reference to the `django.db.connection` object. */
private DataFlow::Node connection(DataFlow::TypeTracker t) {
t.start() and
result = DataFlow::importNode("django.db.connection")
or
t.startInAttr("connection") and
result = db()
or
exists(DataFlow::TypeTracker t2 | result = connection(t2).track(t2, t))
}
/** Gets a reference to the `django.db.connection` object. */
DataFlow::Node connection() { result = connection(DataFlow::TypeTracker::end()) }
/** Provides models for the `django.db.connection.cursor` method. */
module cursor {
/** Gets a reference to the `django.db.connection.cursor` metod. */
private DataFlow::Node methodRef(DataFlow::TypeTracker t) {
t.start() and
result = DataFlow::importNode("django.db.connection.cursor")
or
t.startInAttr("cursor") and
result = connection()
or
exists(DataFlow::TypeTracker t2 | result = methodRef(t2).track(t2, t))
}
/** Gets a reference to the `django.db.connection.cursor` metod. */
DataFlow::Node methodRef() { result = methodRef(DataFlow::TypeTracker::end()) }
/** Gets a reference to a result of calling `django.db.connection.cursor`. */
private DataFlow::Node methodResult(DataFlow::TypeTracker t) {
t.start() and
result.asCfgNode().(CallNode).getFunction() = methodRef().asCfgNode()
or
exists(DataFlow::TypeTracker t2 | result = methodResult(t2).track(t2, t))
}
/** Gets a reference to a result of calling `django.db.connection.cursor`. */
DataFlow::Node methodResult() { result = methodResult(DataFlow::TypeTracker::end()) }
}
/** Gets a reference to the `django.db.connection.cursor.execute` function. */
private DataFlow::Node execute(DataFlow::TypeTracker t) {
t.startInAttr("execute") and
result = cursor::methodResult()
or
exists(DataFlow::TypeTracker t2 | result = execute(t2).track(t2, t))
}
/** Gets a reference to the `django.db.connection.cursor.execute` function. */
DataFlow::Node execute() { result = execute(DataFlow::TypeTracker::end()) }
// -------------------------------------------------------------------------
// django.db.models
// -------------------------------------------------------------------------
// NOTE: The modelling of django models is currently fairly incomplete.
// It does not fully take `Model`s, `Manager`s, `and QuerySet`s into account.
// It simply identifies some common dangerous cases.
/** Gets a reference to the `django.db.models` module. */
private DataFlow::Node models(DataFlow::TypeTracker t) {
t.start() and
result = DataFlow::importNode("django.db.models")
or
t.startInAttr("models") and
result = django()
or
exists(DataFlow::TypeTracker t2 | result = models(t2).track(t2, t))
}
/** Gets a reference to the `django.db.models` module. */
DataFlow::Node models() { result = models(DataFlow::TypeTracker::end()) }
/** Provides models for the `django.db.models` module. */
module models {
/** Provides models for the `django.db.models.Model` class. */
module Model {
/** Gets a reference to the `django.db.models.Model` class. */
private DataFlow::Node classRef(DataFlow::TypeTracker t) {
t.start() and
result = DataFlow::importNode("django.db.models.Model")
or
t.startInAttr("Model") and
result = models()
or
exists(DataFlow::TypeTracker t2 | result = classRef(t2).track(t2, t))
}
/** Gets a reference to the `django.db.models.Model` class. */
DataFlow::Node classRef() { result = classRef(DataFlow::TypeTracker::end()) }
/** Gets a definition of a subclass the `django.db.models.Model` class. */
class SubclassDef extends ControlFlowNode {
string name;
SubclassDef() {
exists(ClassExpr ce |
this.getNode() = ce and
ce.getABase() = classRef().asExpr() and
ce.getName() = name
)
}
string getName() { result = name }
}
/**
* A reference to a class that is a subclass of the `django.db.models.Model` class.
* This is an approximation, since it simply matches identifiers.
*/
private DataFlow::Node subclassRef(DataFlow::TypeTracker t) {
t.start() and
result.asCfgNode().(NameNode).getId() = any(SubclassDef cd).getName()
or
exists(DataFlow::TypeTracker t2 | result = subclassRef(t2).track(t2, t))
}
/**
* A reference to a class that is a subclass of the `django.db.models.Model` class.
* This is an approximation, since it simply matches identifiers.
*/
DataFlow::Node subclassRef() { result = subclassRef(DataFlow::TypeTracker::end()) }
}
/** Gets a reference to the `objects` object of a django model. */
private DataFlow::Node objects(DataFlow::TypeTracker t) {
t.startInAttr("objects") and
result = Model::subclassRef()
or
exists(DataFlow::TypeTracker t2 | result = objects(t2).track(t2, t))
}
/** Gets a reference to the `objects` object of a model. */
DataFlow::Node objects() { result = objects(DataFlow::TypeTracker::end()) }
/**
* Gets a reference to the attribute `attr_name` of an `objects` object.
* WARNING: Only holds for a few predefined attributes.
*/
private DataFlow::Node objects_attr(DataFlow::TypeTracker t, string attr_name) {
attr_name in ["annotate", "extra", "raw"] and
t.startInAttr(attr_name) and
result = objects()
or
// Due to bad performance when using normal setup with `objects_attr(t2, attr_name).track(t2, t)`
// we have inlined that code and forced a join
exists(DataFlow::TypeTracker t2 |
exists(DataFlow::StepSummary summary |
objects_attr_first_join(t2, attr_name, result, summary) and
t = t2.append(summary)
)
)
}
pragma[nomagic]
private predicate objects_attr_first_join(
DataFlow::TypeTracker t2, string attr_name, DataFlow::Node res,
DataFlow::StepSummary summary
) {
DataFlow::StepSummary::step(objects_attr(t2, attr_name), res, summary)
}
/**
* Gets a reference to the attribute `attr_name` of an `objects` object.
* WARNING: Only holds for a few predefined attributes.
*/
DataFlow::Node objects_attr(string attr_name) {
result = objects_attr(DataFlow::TypeTracker::end(), attr_name)
}
/** Gets a reference to the `django.db.models.expressions` module. */
private DataFlow::Node expressions(DataFlow::TypeTracker t) {
t.start() and
result = DataFlow::importNode("django.db.models.expressions")
or
t.startInAttr("expressions") and
result = models()
or
exists(DataFlow::TypeTracker t2 | result = expressions(t2).track(t2, t))
}
/** Gets a reference to the `django.db.models.expressions` module. */
DataFlow::Node expressions() { result = expressions(DataFlow::TypeTracker::end()) }
/** Provides models for the `django.db.models.expressions` module. */
module expressions {
/** Provides models for the `django.db.models.expressions.RawSQL` class. */
module RawSQL {
/** Gets a reference to the `django.db.models.expressions.RawSQL` class. */
private DataFlow::Node classRef(DataFlow::TypeTracker t) {
t.start() and
result = DataFlow::importNode("django.db.models.expressions.RawSQL")
or
t.start() and
result = DataFlow::importNode("django.db.models.RawSQL") // Commonly used alias
or
t.startInAttr("RawSQL") and
result = expressions()
or
exists(DataFlow::TypeTracker t2 | result = classRef(t2).track(t2, t))
}
/**
* Gets a reference to the `django.db.models.expressions.RawSQL` class.
*/
DataFlow::Node classRef() { result = classRef(DataFlow::TypeTracker::end()) }
/** Gets an instance of the `django.db.models.expressions.RawSQL` class. */
private DataFlow::Node instance(DataFlow::TypeTracker t, ControlFlowNode sql) {
t.start() and
exists(CallNode c | result.asCfgNode() = c |
c.getFunction() = classRef().asCfgNode() and
c.getArg(0) = sql
)
or
exists(DataFlow::TypeTracker t2 | result = instance(t2, sql).track(t2, t))
}
/** Gets an instance of the `django.db.models.expressions.RawSQL` class. */
DataFlow::Node instance(ControlFlowNode sql) {
result = instance(DataFlow::TypeTracker::end(), sql)
}
}
}
}
}
/**
* A call to the `django.db.connection.cursor.execute` function.
*
* See
* - https://docs.djangoproject.com/en/3.1/topics/db/sql/#executing-custom-sql-directly
* - https://docs.djangoproject.com/en/3.1/topics/db/sql/#connections-and-cursors
*/
private class DbConnectionExecute extends SqlExecution::Range, DataFlow::CfgNode {
override CallNode node;
DbConnectionExecute() { node.getFunction() = django::db::execute().asCfgNode() }
override DataFlow::Node getSql() {
result.asCfgNode() in [node.getArg(0), node.getArgByName("sql")]
}
}
/**
* A call to the `annotate` function on a model using a `RawSQL` argument.
*
* TODO: Consider reworking this to use taint tracking.
*
* See https://docs.djangoproject.com/en/3.1/ref/models/querysets/#annotate
*/
private class ObjectsAnnotate extends SqlExecution::Range, DataFlow::CfgNode {
override CallNode node;
ControlFlowNode sql;
ObjectsAnnotate() {
node.getFunction() = django::db::models::objects_attr("annotate").asCfgNode() and
django::db::models::expressions::RawSQL::instance(sql).asCfgNode() in [node.getArg(_),
node.getArgByName(_)]
}
override DataFlow::Node getSql() { result.asCfgNode() = sql }
}
/**
* A call to the `raw` function on a model.
*
* See
* - https://docs.djangoproject.com/en/3.1/topics/db/sql/#django.db.models.Manager.raw
* - https://docs.djangoproject.com/en/3.1/ref/models/querysets/#raw
*/
private class ObjectsRaw extends SqlExecution::Range, DataFlow::CfgNode {
override CallNode node;
ObjectsRaw() { node.getFunction() = django::db::models::objects_attr("raw").asCfgNode() }
override DataFlow::Node getSql() { result.asCfgNode() = node.getArg(0) }
}
/**
* A call to the `extra` function on a model.
*
* See https://docs.djangoproject.com/en/3.1/ref/models/querysets/#extra
*/
private class ObjectsExtra extends SqlExecution::Range, DataFlow::CfgNode {
override CallNode node;
ObjectsExtra() { node.getFunction() = django::db::models::objects_attr("extra").asCfgNode() }
override DataFlow::Node getSql() {
result.asCfgNode() =
[node.getArg([0, 1, 3, 4]), node.getArgByName(["select", "where", "tables", "order_by"])]
}
}
// -------------------------------------------------------------------------
// django.urls
// -------------------------------------------------------------------------

View File

@@ -0,0 +1,2 @@
import python
import experimental.meta.ConceptsTest

View File

@@ -0,0 +1,29 @@
from django.db import connection, models
from django.db.models.expressions import RawSQL
def test_plain():
cursor = connection.cursor()
cursor.execute("some sql") # $getSql="some sql"
def test_context():
with connection.cursor() as cursor:
cursor.execute("some sql") # $getSql="some sql"
cursor.execute(sql="some sql") # $getSql="some sql"
class User(models.Model):
pass
def test_model():
User.objects.raw("some sql") # $getSql="some sql"
User.objects.annotate(RawSQL("some sql")) # $getSql="some sql"
User.objects.annotate(RawSQL("foo"), RawSQL("bar")) # $getSql="foo" $getSql="bar"
User.objects.annotate(val=RawSQL("some sql")) # $getSql="some sql"
User.objects.extra("some sql") # $getSql="some sql"
User.objects.extra(select="select", where="where", tables="tables", order_by="order_by") # $getSql="select" $getSql="where" $getSql="tables" $getSql="order_by"
raw = RawSQL("so raw")
User.objects.annotate(val=raw) # $getSql="so raw"

View File

@@ -90,6 +90,23 @@ class CodeExecutionTest extends InlineExpectationsTest {
}
}
class SqlExecutionTest extends InlineExpectationsTest {
SqlExecutionTest() { this = "SqlExecutionTest" }
override string getARelevantTag() { result = "getSql" }
override predicate hasActualResult(Location location, string element, string tag, string value) {
exists(SqlExecution e, DataFlow::Node sql |
exists(location.getFile().getRelativePath()) and
sql = e.getSql() and
location = e.getLocation() and
element = sql.toString() and
value = value_from_expr(sql.asExpr()) and
tag = "getSql"
)
}
}
class HttpServerRouteSetupTest extends InlineExpectationsTest {
HttpServerRouteSetupTest() { this = "HttpServerRouteSetupTest" }

View File

@@ -0,0 +1,16 @@
edges
| sql_injection.py:14:15:14:22 | SSA variable username | sql_injection.py:21:24:21:77 | ControlFlowNode for BinaryExpr |
| sql_injection.py:14:15:14:22 | SSA variable username | sql_injection.py:24:38:24:95 | ControlFlowNode for BinaryExpr |
| sql_injection.py:14:15:14:22 | SSA variable username | sql_injection.py:25:26:25:83 | ControlFlowNode for BinaryExpr |
| sql_injection.py:14:15:14:22 | SSA variable username | sql_injection.py:26:28:26:85 | ControlFlowNode for BinaryExpr |
nodes
| sql_injection.py:14:15:14:22 | SSA variable username | semmle.label | SSA variable username |
| sql_injection.py:21:24:21:77 | ControlFlowNode for BinaryExpr | semmle.label | ControlFlowNode for BinaryExpr |
| sql_injection.py:24:38:24:95 | ControlFlowNode for BinaryExpr | semmle.label | ControlFlowNode for BinaryExpr |
| sql_injection.py:25:26:25:83 | ControlFlowNode for BinaryExpr | semmle.label | ControlFlowNode for BinaryExpr |
| sql_injection.py:26:28:26:85 | ControlFlowNode for BinaryExpr | semmle.label | ControlFlowNode for BinaryExpr |
#select
| sql_injection.py:21:24:21:77 | ControlFlowNode for BinaryExpr | sql_injection.py:14:15:14:22 | SSA variable username | sql_injection.py:21:24:21:77 | ControlFlowNode for BinaryExpr | This SQL query depends on $@. | sql_injection.py:14:15:14:22 | SSA variable username | a user-provided value |
| sql_injection.py:24:38:24:95 | ControlFlowNode for BinaryExpr | sql_injection.py:14:15:14:22 | SSA variable username | sql_injection.py:24:38:24:95 | ControlFlowNode for BinaryExpr | This SQL query depends on $@. | sql_injection.py:14:15:14:22 | SSA variable username | a user-provided value |
| sql_injection.py:25:26:25:83 | ControlFlowNode for BinaryExpr | sql_injection.py:14:15:14:22 | SSA variable username | sql_injection.py:25:26:25:83 | ControlFlowNode for BinaryExpr | This SQL query depends on $@. | sql_injection.py:14:15:14:22 | SSA variable username | a user-provided value |
| sql_injection.py:26:28:26:85 | ControlFlowNode for BinaryExpr | sql_injection.py:14:15:14:22 | SSA variable username | sql_injection.py:26:28:26:85 | ControlFlowNode for BinaryExpr | This SQL query depends on $@. | sql_injection.py:14:15:14:22 | SSA variable username | a user-provided value |

View File

@@ -0,0 +1 @@
experimental/Security-new-dataflow/CWE-089/SqlInjection.ql

View File

@@ -0,0 +1,40 @@
"""This is adapted from ql/python/ql/test/query-tests\Security\CWE-089
we now prefer to setup routing by flask
"""
from django.db import connection, models
from django.db.models.expressions import RawSQL
from flask import Flask, request
app = Flask(__name__)
class User(models.Model):
pass
@app.route("/users/<username>")
def show_user(username):
with connection.cursor() as cursor:
# GOOD -- Using parameters
cursor.execute("SELECT * FROM users WHERE username = %s", username)
User.objects.raw("SELECT * FROM users WHERE username = %s", (username,))
# BAD -- Using string formatting
cursor.execute("SELECT * FROM users WHERE username = '%s'" % username)
# BAD -- other ways of executing raw SQL code with string interpolation
User.objects.annotate(RawSQL("insert into names_file ('name') values ('%s')" % username))
User.objects.raw("insert into names_file ('name') values ('%s')" % username)
User.objects.extra("insert into names_file ('name') values ('%s')" % username)
# BAD (but currently no custom query to find this)
#
# It is exposed to SQL injection (https://docs.djangoproject.com/en/2.2/ref/models/querysets/#extra)
# For example, using name = "; DROP ALL TABLES -- "
# will result in SQL: SELECT * FROM name WHERE name = ''; DROP ALL TABLES -- ''
#
# This shouldn't be very widespread, since using a normal string will result in invalid SQL
# Using name = "example", will result in SQL: SELECT * FROM name WHERE name = ''example''
# which in MySQL will give a syntax error
#
# When testing this out locally, none of the queries worked against SQLite3, but I could use
# the SQL injection against MySQL.
User.objects.raw("SELECT * FROM users WHERE username = '%s'", (username,))