Merge branch 'main' into python/model-aiomysql

This commit is contained in:
yoff
2021-11-10 14:32:36 +01:00
committed by GitHub
100 changed files with 8056 additions and 2002 deletions

View File

@@ -0,0 +1,2 @@
lgtm,codescanning
* Added modeling of `aiopg` for sinks executing SQL.

View File

@@ -7,6 +7,7 @@
private import semmle.python.frameworks.Aioch
private import semmle.python.frameworks.Aiohttp
private import semmle.python.frameworks.Aiomysql
private import semmle.python.frameworks.Aiopg
private import semmle.python.frameworks.Asyncpg
private import semmle.python.frameworks.ClickhouseDriver
private import semmle.python.frameworks.Cryptodome

View File

@@ -53,6 +53,8 @@ private module Cached {
DataFlowPrivate::iterableUnpackingStoreStep(nodeFrom, _, nodeTo)
or
awaitStep(nodeFrom, nodeTo)
or
asyncWithStep(nodeFrom, nodeTo)
}
}
@@ -211,3 +213,24 @@ predicate copyStep(DataFlow::CfgNode nodeFrom, DataFlow::CfgNode nodeTo) {
predicate awaitStep(DataFlow::Node nodeFrom, DataFlow::Node nodeTo) {
nodeTo.asExpr().(Await).getValue() = nodeFrom.asExpr()
}
/**
* Holds if taint can flow from `nodeFrom` to `nodeTo` inside an `async with` statement.
*
* For example in
* ```python
* async with open("foo") as f:
* ```
* the variable `f` is tainted if the result of `open("foo")` is tainted.
*/
predicate asyncWithStep(DataFlow::Node nodeFrom, DataFlow::Node nodeTo) {
exists(With with, ControlFlowNode contextManager, ControlFlowNode var |
nodeFrom.(DataFlow::CfgNode).getNode() = contextManager and
nodeTo.(DataFlow::EssaNode).getVar().getDefinition().(WithDefinition).getDefiningNode() = var and
// see `with_flow` in `python/ql/src/semmle/python/dataflow/Implementation.qll`
with.getContextExpr() = contextManager.getNode() and
with.getOptionalVars() = var.getNode() and
with.isAsync() and
contextManager.strictlyDominates(var)
)
}

View File

@@ -0,0 +1,141 @@
/**
* Provides classes modeling security-relevant aspects of the `aiopg` PyPI package.
* See
* - https://aiopg.readthedocs.io/en/stable/index.html
* - https://pypi.org/project/aiopg/
*/
private import python
private import semmle.python.dataflow.new.DataFlow
private import semmle.python.Concepts
private import semmle.python.ApiGraphs
/** Provides models for the `aiopg` PyPI package. */
private module Aiopg {
private import semmle.python.internal.Awaited
/**
* A `ConectionPool` is created when the result of `aiopg.create_pool()` is awaited.
* See https://aiopg.readthedocs.io/en/stable/core.html#pool
*/
API::Node connectionPool() {
result = API::moduleImport("aiopg").getMember("create_pool").getReturn().getAwaited()
}
/**
* A `Connection` is created when
* - the result of `aiopg.connect()` is awaited.
* - the result of calling `aquire` on a `ConnectionPool` is awaited.
* See https://aiopg.readthedocs.io/en/stable/core.html#connection
*/
API::Node connection() {
result = API::moduleImport("aiopg").getMember("connect").getReturn().getAwaited()
or
result = connectionPool().getMember("acquire").getReturn().getAwaited()
}
/**
* A `Cursor` is created when
* - the result of calling `cursor` on a `ConnectionPool` is awaited.
* - the result of calling `cursor` on a `Connection` is awaited.
* See https://aiopg.readthedocs.io/en/stable/core.html#cursor
*/
API::Node cursor() {
result = connectionPool().getMember("cursor").getReturn().getAwaited()
or
result = connection().getMember("cursor").getReturn().getAwaited()
}
/**
* Calling `execute` on a `Cursor` constructs a query.
* See https://aiopg.readthedocs.io/en/stable/core.html#aiopg.Cursor.execute
*/
class CursorExecuteCall extends SqlConstruction::Range, DataFlow::CallCfgNode {
CursorExecuteCall() { this = cursor().getMember("execute").getACall() }
override DataFlow::Node getSql() { result in [this.getArg(0), this.getArgByName("operation")] }
}
/**
* This is only needed to connect the argument to the execute call with the subsequnt awaiting.
* It should be obsolete once we have `API::CallNode` available.
*/
private DataFlow::TypeTrackingNode cursorExecuteCall(DataFlow::TypeTracker t, DataFlow::Node sql) {
// cursor created from connection
t.start() and
sql = result.(CursorExecuteCall).getSql()
or
exists(DataFlow::TypeTracker t2 | result = cursorExecuteCall(t2, sql).track(t2, t))
}
DataFlow::Node cursorExecuteCall(DataFlow::Node sql) {
cursorExecuteCall(DataFlow::TypeTracker::end(), sql).flowsTo(result)
}
/**
* Awaiting the result of calling `execute` executes the query.
* See https://aiopg.readthedocs.io/en/stable/core.html#aiopg.Cursor.execute
*/
class AwaitedCursorExecuteCall extends SqlExecution::Range {
DataFlow::Node sql;
AwaitedCursorExecuteCall() { this = awaited(cursorExecuteCall(sql)) }
override DataFlow::Node getSql() { result = sql }
}
/**
* An `Engine` is created when the result of calling `aiopg.sa.create_engine` is awaited.
* See https://aiopg.readthedocs.io/en/stable/sa.html#engine
*/
API::Node engine() {
result =
API::moduleImport("aiopg").getMember("sa").getMember("create_engine").getReturn().getAwaited()
}
/**
* A `SAConnection` is created when the result of calling `aquire` on an `Engine` is awaited.
* See https://aiopg.readthedocs.io/en/stable/sa.html#connection
*/
API::Node saConnection() { result = engine().getMember("acquire").getReturn().getAwaited() }
/**
* Calling `execute` on a `SAConnection` constructs a query.
* See https://aiopg.readthedocs.io/en/stable/sa.html#aiopg.sa.SAConnection.execute
*/
class SAConnectionExecuteCall extends SqlConstruction::Range, DataFlow::CallCfgNode {
SAConnectionExecuteCall() { this = saConnection().getMember("execute").getACall() }
override DataFlow::Node getSql() { result in [this.getArg(0), this.getArgByName("query")] }
}
/**
* This is only needed to connect the argument to the execute call with the subsequnt awaiting.
* It should be obsolete once we have `API::CallNode` available.
*/
private DataFlow::TypeTrackingNode saConnectionExecuteCall(
DataFlow::TypeTracker t, DataFlow::Node sql
) {
// saConnection created from engine
t.start() and
sql = result.(SAConnectionExecuteCall).getSql()
or
exists(DataFlow::TypeTracker t2 | result = saConnectionExecuteCall(t2, sql).track(t2, t))
}
DataFlow::Node saConnectionExecuteCall(DataFlow::Node sql) {
saConnectionExecuteCall(DataFlow::TypeTracker::end(), sql).flowsTo(result)
}
/**
* Awaiting the result of calling `execute` executes the query.
* See https://aiopg.readthedocs.io/en/stable/sa.html#aiopg.sa.SAConnection.execute
*/
class AwaitedSAConnectionExecuteCall extends SqlExecution::Range {
DataFlow::Node sql;
AwaitedSAConnectionExecuteCall() { this = awaited(saConnectionExecuteCall(sql)) }
override DataFlow::Node getSql() { result = sql }
}
}

View File

@@ -31,7 +31,7 @@ async def test_async_with():
ctx = AsyncContext()
taint(ctx)
async with ctx as tainted:
ensure_tainted(tainted) # $ MISSING: tainted
ensure_tainted(tainted) # $ tainted
class AsyncIter:
@@ -45,7 +45,7 @@ async def test_async_for():
iter = AsyncIter()
taint(iter)
async for tainted in iter:
ensure_tainted(tainted) # $ MISSING: tainted
ensure_tainted(tainted) # $ tainted

View File

@@ -0,0 +1,30 @@
# Add taintlib to PATH so it can be imported during runtime without any hassle
import sys; import os; sys.path.append(os.path.dirname(os.path.dirname((__file__))))
from taintlib import *
# This has no runtime impact, but allows autocomplete to work
from typing import TYPE_CHECKING
if TYPE_CHECKING:
from ..taintlib import *
# Actual tests
class Iter:
def __iter__(self):
return self
def __next__(self):
raise StopIteration
def test_for():
iter = Iter()
taint(iter)
for tainted in iter:
ensure_tainted(tainted) # $ tainted
# Make tests runable
test_for()

View File

@@ -0,0 +1,60 @@
# Add taintlib to PATH so it can be imported during runtime without any hassle
import sys; import os; sys.path.append(os.path.dirname(os.path.dirname((__file__))))
from taintlib import *
# This has no runtime impact, but allows autocomplete to work
from typing import TYPE_CHECKING
if TYPE_CHECKING:
from ..taintlib import *
# Actual tests
class Context:
def __enter__(self):
return ""
def __exit__(self, exc_type, exc, tb):
pass
def test_with():
ctx = Context()
taint(ctx)
with ctx as tainted:
ensure_tainted(tainted) # $ tainted
class Context_taint:
def __enter__(self):
return TAINTED_STRING
def __exit__(self, exc_type, exc, tb):
pass
def test_with_taint():
ctx = Context_taint()
with ctx as tainted:
ensure_tainted(tainted) # $ MISSING: tainted
class Context_arg:
def __init__(self, arg):
self.arg = arg
def __enter__(self):
return self.arg
def __exit__(self, exc_type, exc, tb):
pass
def test_with_arg():
ctx = Context_arg(TAINTED_STRING)
with ctx as tainted:
ensure_tainted(tainted) # $ tainted
# Make tests runable
test_with()
test_with_taint()
test_with_arg()

View File

@@ -30,24 +30,36 @@ DataFlow::Node shouldNotBeTainted() {
)
}
class TestTaintTrackingConfiguration extends TaintTracking::Configuration {
TestTaintTrackingConfiguration() { this = "TestTaintTrackingConfiguration" }
// this module allows the configuration to be imported in other `.ql` files without the
// top level query predicates of this file coming into scope.
module Conf {
class TestTaintTrackingConfiguration extends TaintTracking::Configuration {
TestTaintTrackingConfiguration() { this = "TestTaintTrackingConfiguration" }
override predicate isSource(DataFlow::Node source) {
source.asCfgNode().(NameNode).getId() in [
"TAINTED_STRING", "TAINTED_BYTES", "TAINTED_LIST", "TAINTED_DICT"
]
or
source instanceof RemoteFlowSource
}
override predicate isSource(DataFlow::Node source) {
source.asCfgNode().(NameNode).getId() in [
"TAINTED_STRING", "TAINTED_BYTES", "TAINTED_LIST", "TAINTED_DICT"
]
or
// User defined sources
exists(CallNode call |
call.getFunction().(NameNode).getId() = "taint" and
source.(DataFlow::CfgNode).getNode() = call.getAnArg()
)
or
source instanceof RemoteFlowSource
}
override predicate isSink(DataFlow::Node sink) {
sink = shouldBeTainted()
or
sink = shouldNotBeTainted()
override predicate isSink(DataFlow::Node sink) {
sink = shouldBeTainted()
or
sink = shouldNotBeTainted()
}
}
}
import Conf
class InlineTaintTest extends InlineExpectationsTest {
InlineTaintTest() { this = "InlineTaintTest" }

View File

@@ -0,0 +1,4 @@
edges
nodes
subpaths
#select

View File

@@ -0,0 +1,25 @@
/**
* @kind path-problem
*/
// This query is for debugging InlineTaintTestFailures.
// The intended usage is
// 1. load the database of the failing test
// 2. run this query to see actual paths
// 3. if necessary, look at partial paths by (un)commenting appropriate lines
import python
import semmle.python.dataflow.new.DataFlow
import experimental.meta.InlineTaintTest::Conf
// import DataFlow::PartialPathGraph
import DataFlow::PathGraph
class Conf extends TestTaintTrackingConfiguration {
override int explorationLimit() { result = 5 }
}
// from Conf config, DataFlow::PartialPathNode source, DataFlow::PartialPathNode sink
// where config.hasPartialFlow(source, sink, _)
from Conf config, DataFlow::PathNode source, DataFlow::PathNode sink
where config.hasFlowPath(source, sink)
select sink.getNode(), source, sink, "This node receives taint from $@.", source.getNode(),
"this source"

View File

@@ -0,0 +1,2 @@
import python
import experimental.meta.ConceptsTest

View File

@@ -0,0 +1,33 @@
import aiopg
# Only a cursor can execute sql.
async def test_cursor():
# Create connection directly
conn = await aiopg.connect()
cur = await conn.cursor()
await cur.execute("sql") # $ getSql="sql" constructedSql="sql"
# Create connection via pool
async with aiopg.create_pool() as pool:
# Create Cursor via Connection
async with pool.acquire() as conn:
async with conn.cursor() as cur:
await cur.execute("sql") # $ getSql="sql" constructedSql="sql"
# Create Cursor directly
async with pool.cursor() as cur:
await cur.execute("sql") # $ getSql="sql" constructedSql="sql"
# variants using as few `async with` as possible
pool = await aiopg.create_pool()
conn = await pool.acquire()
cur = await conn.cursor()
await cur.execute("sql") # $ getSql="sql" constructedSql="sql"
# Test SQLAlchemy integration
from aiopg.sa import create_engine
async def test_engine():
engine = await create_engine()
conn = await engine.acquire()
await conn.execute("sql") # $ getSql="sql" constructedSql="sql"