Merge pull request #6776 from yoff/python/model-asyncpg

Python: Model `asyncpg`
This commit is contained in:
Rasmus Wriedt Larsen
2021-10-29 13:54:44 +02:00
committed by GitHub
24 changed files with 534 additions and 32 deletions

View File

@@ -304,6 +304,8 @@ module API {
* API graph node for the prefix `foo`), in accordance with the usual semantics of Python.
*/
private import semmle.python.internal.Awaited
cached
newtype TApiNode =
/** The root of the API graph. */
@@ -518,10 +520,9 @@ module API {
)
or
// awaiting
exists(Await await, DataFlow::Node awaitedValue |
exists(DataFlow::Node awaitedValue |
lbl = Label::await() and
ref.asExpr() = await and
await.getValue() = awaitedValue.asExpr() and
ref = awaited(awaitedValue) and
pred.flowsTo(awaitedValue)
)
)

View File

@@ -326,9 +326,47 @@ module CodeExecution {
}
}
/**
* A data-flow node that constructs an SQL statement.
* Often, it is worthy of an alert if an SQL statement is constructed such that
* executing it would be a security risk.
*
* If it is important that the SQL statement is indeed executed, then use `SQLExecution`.
*
* Extend this class to refine existing API models. If you want to model new APIs,
* extend `SqlConstruction::Range` instead.
*/
class SqlConstruction extends DataFlow::Node {
SqlConstruction::Range range;
SqlConstruction() { this = range }
/** Gets the argument that specifies the SQL statements to be constructed. */
DataFlow::Node getSql() { result = range.getSql() }
}
/** Provides a class for modeling new SQL execution APIs. */
module SqlConstruction {
/**
* A data-flow node that constructs an SQL statement.
* Often, it is worthy of an alert if an SQL statement is constructed such that
* executing it would be a security risk.
*
* Extend this class to model new APIs. If you want to refine existing API models,
* extend `SqlExecution` instead.
*/
abstract class Range extends DataFlow::Node {
/** Gets the argument that specifies the SQL statements to be constructed. */
abstract DataFlow::Node getSql();
}
}
/**
* A data-flow node that executes SQL statements.
*
* If the context of interest is such that merely constructing an SQL statement
* would be valuabe to report, then consider using `SqlConstruction`.
*
* Extend this class to refine existing API models. If you want to model new APIs,
* extend `SqlExecution::Range` instead.
*/
@@ -346,6 +384,9 @@ module SqlExecution {
/**
* A data-flow node that executes SQL statements.
*
* If the context of interest is such that merely constructing an SQL statement
* would be valuabe to report, then consider using `SqlConstruction`.
*
* Extend this class to model new APIs. If you want to refine existing API models,
* extend `SqlExecution` instead.
*/

View File

@@ -6,6 +6,7 @@
// `docs/codeql/support/reusables/frameworks.rst`
private import semmle.python.frameworks.Aioch
private import semmle.python.frameworks.Aiohttp
private import semmle.python.frameworks.Asyncpg
private import semmle.python.frameworks.ClickhouseDriver
private import semmle.python.frameworks.Cryptodome
private import semmle.python.frameworks.Cryptography

View File

@@ -178,9 +178,23 @@ module EssaFlow {
// see `with_flow` in `python/ql/src/semmle/python/dataflow/Implementation.qll`
with.getContextExpr() = contextManager.getNode() and
with.getOptionalVars() = var.getNode() and
not with.isAsync() and
contextManager.strictlyDominates(var)
)
or
// Async with var definition
// `async with f(42) as x:`
// nodeFrom is `x`, cfg node
// nodeTo is `x`, essa var
//
// This makes the cfg node the local source of the awaited value.
exists(With with, ControlFlowNode var |
nodeFrom.(CfgNode).getNode() = var and
nodeTo.(EssaNode).getVar().getDefinition().(WithDefinition).getDefiningNode() = var and
with.getOptionalVars() = var.getNode() and
with.isAsync()
)
or
// Parameter definition
// `def foo(x):`
// nodeFrom is `x`, cfgNode

View File

@@ -0,0 +1,162 @@
/**
* Provides classes modeling security-relevant aspects of the `asyncpg` PyPI package.
* See https://magicstack.github.io/asyncpg/.
*/
private import python
private import semmle.python.dataflow.new.DataFlow
private import semmle.python.Concepts
private import semmle.python.ApiGraphs
/** Provides models for the `asyncpg` PyPI package. */
private module Asyncpg {
private import semmle.python.internal.Awaited
/** A `ConectionPool` is created when the result of `asyncpg.create_pool()` is awaited. */
API::Node connectionPool() {
result = API::moduleImport("asyncpg").getMember("create_pool").getReturn().getAwaited()
}
/**
* A `Connection` is created when
* - the result of `asyncpg.connect()` is awaited.
* - the result of calling `aquire` on a `ConnectionPool` is awaited.
*/
API::Node connection() {
result = API::moduleImport("asyncpg").getMember("connect").getReturn().getAwaited()
or
result = connectionPool().getMember("acquire").getReturn().getAwaited()
}
/** `Connection`s and `ConnectionPool`s provide some methods that execute SQL. */
class SqlExecutionOnConnection extends SqlExecution::Range, DataFlow::MethodCallNode {
string methodName;
SqlExecutionOnConnection() {
methodName in ["copy_from_query", "execute", "fetch", "fetchrow", "fetchval", "executemany"] and
this.calls([connectionPool().getAUse(), connection().getAUse()], methodName)
}
override DataFlow::Node getSql() {
methodName in ["copy_from_query", "execute", "fetch", "fetchrow", "fetchval"] and
result in [this.getArg(0), this.getArgByName("query")]
or
methodName = "executemany" and
result in [this.getArg(0), this.getArgByName("command")]
}
}
/** `Connection`s and `ConnectionPool`s provide some methods that access the file system. */
class FileAccessOnConnection extends FileSystemAccess::Range, DataFlow::MethodCallNode {
string methodName;
FileAccessOnConnection() {
methodName in ["copy_from_query", "copy_from_table", "copy_to_table"] and
this.calls([connectionPool().getAUse(), connection().getAUse()], methodName)
}
// The path argument is keyword only.
override DataFlow::Node getAPathArgument() {
methodName in ["copy_from_query", "copy_from_table"] and
result = this.getArgByName("output")
or
methodName = "copy_to_table" and
result = this.getArgByName("source")
}
}
/**
* Provides models of the `PreparedStatement` class in `asyncpg`.
* `PreparedStatement`s are created when the result of calling `prepare(query)` on a connection is awaited.
* The result of calling `prepare(query)` is a `PreparedStatementFactory` and the argument, `query` needs to
* be tracked to the place where a `PreparedStatement` is created and then futher to any executing methods.
* Hence the two type trackers.
*
* TODO: Rewrite this, once we have `API::CallNode` available.
*/
module PreparedStatement {
class PreparedStatementConstruction extends SqlConstruction::Range, DataFlow::CallCfgNode {
PreparedStatementConstruction() { this = connection().getMember("prepare").getACall() }
override DataFlow::Node getSql() { result in [this.getArg(0), this.getArgByName("query")] }
}
private DataFlow::TypeTrackingNode preparedStatementFactory(
DataFlow::TypeTracker t, DataFlow::Node sql
) {
t.start() and
sql = result.(PreparedStatementConstruction).getSql()
or
exists(DataFlow::TypeTracker t2 | result = preparedStatementFactory(t2, sql).track(t2, t))
}
DataFlow::Node preparedStatementFactory(DataFlow::Node sql) {
preparedStatementFactory(DataFlow::TypeTracker::end(), sql).flowsTo(result)
}
private DataFlow::TypeTrackingNode preparedStatement(DataFlow::TypeTracker t, DataFlow::Node sql) {
t.start() and
result = awaited(preparedStatementFactory(sql))
or
exists(DataFlow::TypeTracker t2 | result = preparedStatement(t2, sql).track(t2, t))
}
DataFlow::Node preparedStatement(DataFlow::Node sql) {
preparedStatement(DataFlow::TypeTracker::end(), sql).flowsTo(result)
}
class PreparedStatementExecution extends SqlExecution::Range, DataFlow::MethodCallNode {
DataFlow::Node sql;
PreparedStatementExecution() {
this.calls(preparedStatement(sql), ["executemany", "fetch", "fetchrow", "fetchval"])
}
override DataFlow::Node getSql() { result = sql }
}
}
/**
* Provides models of the `Cursor` class in `asyncpg`.
* `Cursor`s are created
* - when the result of calling `cursor(query)` on a connection is awaited.
* - when the result of calling `cursor()` on a prepared statement is awaited.
* The result of calling `cursor` in either case is a `CursorFactory` and the argument, `query` needs to
* be tracked to the place where a `Cursor` is created, hence the type tracker.
* The creation of the `Cursor` executes the query.
*
* TODO: Rewrite this, once we have `API::CallNode` available.
*/
module Cursor {
class CursorConstruction extends SqlConstruction::Range, DataFlow::CallCfgNode {
CursorConstruction() { this = connection().getMember("cursor").getACall() }
override DataFlow::Node getSql() { result in [this.getArg(0), this.getArgByName("query")] }
}
private DataFlow::TypeTrackingNode cursorFactory(DataFlow::TypeTracker t, DataFlow::Node sql) {
// cursor created from connection
t.start() and
sql = result.(CursorConstruction).getSql()
or
// cursor created from prepared statement
t.start() and
result.(DataFlow::MethodCallNode).calls(PreparedStatement::preparedStatement(sql), "cursor")
or
exists(DataFlow::TypeTracker t2 | result = cursorFactory(t2, sql).track(t2, t))
}
DataFlow::Node cursorFactory(DataFlow::Node sql) {
cursorFactory(DataFlow::TypeTracker::end(), sql).flowsTo(result)
}
/** The creation of a `Cursor` executes the associated query. */
class CursorCreation extends SqlExecution::Range {
DataFlow::Node sql;
CursorCreation() { this = awaited(cursorFactory(sql)) }
override DataFlow::Node getSql() { result = sql }
}
}
}

View File

@@ -313,9 +313,9 @@ module SqlAlchemy {
* A construction of a `sqlalchemy.sql.expression.TextClause`, which represents a
* textual SQL string directly.
*/
abstract class TextClauseConstruction extends DataFlow::CallCfgNode {
abstract class TextClauseConstruction extends SqlConstruction::Range, DataFlow::CallCfgNode {
/** Gets the argument that specifies the SQL text. */
DataFlow::Node getTextArg() { result in [this.getArg(0), this.getArgByName("text")] }
override DataFlow::Node getSql() { result in [this.getArg(0), this.getArgByName("text")] }
}
/** `TextClause` constructions from the `sqlalchemy` package. */

View File

@@ -0,0 +1,47 @@
/**
* INTERNAL: Do not use.
*
* Provides helper class for defining additional API graph edges.
*/
private import python
private import semmle.python.dataflow.new.DataFlow
/**
* INTERNAL: Do not use.
*
* Holds if `result` is the result of awaiting `awaitedValue`.
*/
cached
DataFlow::Node awaited(DataFlow::Node awaitedValue) {
// `await` x
// - `awaitedValue` is `x`
// - `result` is `await x`
exists(Await await |
await.getValue() = awaitedValue.asExpr() and
result.asExpr() = await
)
or
// `async for x in l`
// - `awaitedValue` is `l`
// - `result` is `l` (`x` is behind a read step)
exists(AsyncFor asyncFor |
// To consider `x` the result of awaiting, we would use asyncFor.getTarget() = awaitedValue.asExpr(),
// but that is behind a read step rather than a flow step.
asyncFor.getIter() = awaitedValue.asExpr() and
result.asExpr() = asyncFor.getIter()
)
or
// `async with x as y`
// - `awaitedValue` is `x`
// - `result` is `x` and `y` if it exists
exists(AsyncWith asyncWith |
awaitedValue.asExpr() = asyncWith.getContextExpr() and
result.asExpr() in [
// `x`
asyncWith.getContextExpr(),
// `y`, if it exists
asyncWith.getOptionalVars()
]
)
}

View File

@@ -42,6 +42,13 @@ module SqlInjection {
*/
class RemoteFlowSourceAsSource extends Source, RemoteFlowSource { }
/**
* A SQL statement of a SQL construction, considered as a flow sink.
*/
class SqlConstructionAsSink extends Sink {
SqlConstructionAsSink() { this = any(SqlConstruction c).getSql() }
}
/**
* A SQL statement of a SQL execution, considered as a flow sink.
*/
@@ -49,13 +56,6 @@ module SqlInjection {
SqlExecutionAsSink() { this = any(SqlExecution e).getSql() }
}
/**
* The text argument of a SQLAlchemy TextClause construction, considered as a flow sink.
*/
class TextArgAsSink extends Sink {
TextArgAsSink() { this = any(SqlAlchemy::TextClause::TextClauseConstruction tcc).getTextArg() }
}
/**
* A comparison with a constant string, considered as a sanitizer-guard.
*/