Merge branch 'main' into python/model-aiomysql

2026-04-30 03:05:15 +02:00 · 2021-11-10 14:32:36 +01:00
parent 57e7bfbdba de926dc2a1
commit d23a920ed4
100 changed files with 8056 additions and 2002 deletions
--- a/python/change-notes/2021-11-09-model-aiopg.md
+++ b/python/change-notes/2021-11-09-model-aiopg.md
@@ -0,0 +1,2 @@
+lgtm,codescanning
+* Added modeling of `aiopg` for sinks executing SQL.
--- a/python/ql/lib/semmle/python/Frameworks.qll
+++ b/python/ql/lib/semmle/python/Frameworks.qll
@@ -7,6 +7,7 @@
 private import semmle.python.frameworks.Aioch
 private import semmle.python.frameworks.Aiohttp
 private import semmle.python.frameworks.Aiomysql
+private import semmle.python.frameworks.Aiopg
 private import semmle.python.frameworks.Asyncpg
 private import semmle.python.frameworks.ClickhouseDriver
 private import semmle.python.frameworks.Cryptodome
--- a/python/ql/lib/semmle/python/dataflow/new/internal/TaintTrackingPrivate.qll
+++ b/python/ql/lib/semmle/python/dataflow/new/internal/TaintTrackingPrivate.qll
@@ -53,6 +53,8 @@ private module Cached {
    DataFlowPrivate::iterableUnpackingStoreStep(nodeFrom, _, nodeTo)
    or
    awaitStep(nodeFrom, nodeTo)
+    or
+    asyncWithStep(nodeFrom, nodeTo)
  }
 }

@@ -211,3 +213,24 @@ predicate copyStep(DataFlow::CfgNode nodeFrom, DataFlow::CfgNode nodeTo) {
 predicate awaitStep(DataFlow::Node nodeFrom, DataFlow::Node nodeTo) {
  nodeTo.asExpr().(Await).getValue() = nodeFrom.asExpr()
 }
+
+/**
+ * Holds if taint can flow from `nodeFrom` to `nodeTo` inside an `async with` statement.
+ *
+ * For example in
+ * ```python
+ * async with open("foo") as f:
+ * ```
+ * the variable `f` is tainted if the result of `open("foo")` is tainted.
+ */
+predicate asyncWithStep(DataFlow::Node nodeFrom, DataFlow::Node nodeTo) {
+  exists(With with, ControlFlowNode contextManager, ControlFlowNode var |
+    nodeFrom.(DataFlow::CfgNode).getNode() = contextManager and
+    nodeTo.(DataFlow::EssaNode).getVar().getDefinition().(WithDefinition).getDefiningNode() = var and
+    // see `with_flow` in `python/ql/src/semmle/python/dataflow/Implementation.qll`
+    with.getContextExpr() = contextManager.getNode() and
+    with.getOptionalVars() = var.getNode() and
+    with.isAsync() and
+    contextManager.strictlyDominates(var)
+  )
+}
--- a/python/ql/lib/semmle/python/frameworks/Aiopg.qll
+++ b/python/ql/lib/semmle/python/frameworks/Aiopg.qll
@@ -0,0 +1,141 @@
+/**
+ * Provides classes modeling security-relevant aspects of the `aiopg` PyPI package.
+ * See
+ * - https://aiopg.readthedocs.io/en/stable/index.html
+ * - https://pypi.org/project/aiopg/
+ */
+
+private import python
+private import semmle.python.dataflow.new.DataFlow
+private import semmle.python.Concepts
+private import semmle.python.ApiGraphs
+
+/** Provides models for the `aiopg` PyPI package. */
+private module Aiopg {
+  private import semmle.python.internal.Awaited
+
+  /**
+   * A `ConectionPool` is created when the result of `aiopg.create_pool()` is awaited.
+   * See https://aiopg.readthedocs.io/en/stable/core.html#pool
+   */
+  API::Node connectionPool() {
+    result = API::moduleImport("aiopg").getMember("create_pool").getReturn().getAwaited()
+  }
+
+  /**
+   * A `Connection` is created when
+   * - the result of `aiopg.connect()` is awaited.
+   * - the result of calling `aquire` on a `ConnectionPool` is awaited.
+   * See https://aiopg.readthedocs.io/en/stable/core.html#connection
+   */
+  API::Node connection() {
+    result = API::moduleImport("aiopg").getMember("connect").getReturn().getAwaited()
+    or
+    result = connectionPool().getMember("acquire").getReturn().getAwaited()
+  }
+
+  /**
+   * A `Cursor` is created when
+   * - the result of calling `cursor` on a `ConnectionPool` is awaited.
+   * - the result of calling `cursor` on a `Connection` is awaited.
+   * See https://aiopg.readthedocs.io/en/stable/core.html#cursor
+   */
+  API::Node cursor() {
+    result = connectionPool().getMember("cursor").getReturn().getAwaited()
+    or
+    result = connection().getMember("cursor").getReturn().getAwaited()
+  }
+
+  /**
+   * Calling `execute` on a `Cursor` constructs a query.
+   * See https://aiopg.readthedocs.io/en/stable/core.html#aiopg.Cursor.execute
+   */
+  class CursorExecuteCall extends SqlConstruction::Range, DataFlow::CallCfgNode {
+    CursorExecuteCall() { this = cursor().getMember("execute").getACall() }
+
+    override DataFlow::Node getSql() { result in [this.getArg(0), this.getArgByName("operation")] }
+  }
+
+  /**
+   * This is only needed to connect the argument to the execute call with the subsequnt awaiting.
+   * It should be obsolete once we have `API::CallNode` available.
+   */
+  private DataFlow::TypeTrackingNode cursorExecuteCall(DataFlow::TypeTracker t, DataFlow::Node sql) {
+    // cursor created from connection
+    t.start() and
+    sql = result.(CursorExecuteCall).getSql()
+    or
+    exists(DataFlow::TypeTracker t2 | result = cursorExecuteCall(t2, sql).track(t2, t))
+  }
+
+  DataFlow::Node cursorExecuteCall(DataFlow::Node sql) {
+    cursorExecuteCall(DataFlow::TypeTracker::end(), sql).flowsTo(result)
+  }
+
+  /**
+   * Awaiting the result of calling `execute` executes the query.
+   * See https://aiopg.readthedocs.io/en/stable/core.html#aiopg.Cursor.execute
+   */
+  class AwaitedCursorExecuteCall extends SqlExecution::Range {
+    DataFlow::Node sql;
+
+    AwaitedCursorExecuteCall() { this = awaited(cursorExecuteCall(sql)) }
+
+    override DataFlow::Node getSql() { result = sql }
+  }
+
+  /**
+   * An `Engine` is created when the result of calling `aiopg.sa.create_engine` is awaited.
+   * See https://aiopg.readthedocs.io/en/stable/sa.html#engine
+   */
+  API::Node engine() {
+    result =
+      API::moduleImport("aiopg").getMember("sa").getMember("create_engine").getReturn().getAwaited()
+  }
+
+  /**
+   * A `SAConnection` is created when the result of calling `aquire` on an `Engine` is awaited.
+   * See https://aiopg.readthedocs.io/en/stable/sa.html#connection
+   */
+  API::Node saConnection() { result = engine().getMember("acquire").getReturn().getAwaited() }
+
+  /**
+   * Calling `execute` on a `SAConnection` constructs a query.
+   * See https://aiopg.readthedocs.io/en/stable/sa.html#aiopg.sa.SAConnection.execute
+   */
+  class SAConnectionExecuteCall extends SqlConstruction::Range, DataFlow::CallCfgNode {
+    SAConnectionExecuteCall() { this = saConnection().getMember("execute").getACall() }
+
+    override DataFlow::Node getSql() { result in [this.getArg(0), this.getArgByName("query")] }
+  }
+
+  /**
+   * This is only needed to connect the argument to the execute call with the subsequnt awaiting.
+   * It should be obsolete once we have `API::CallNode` available.
+   */
+  private DataFlow::TypeTrackingNode saConnectionExecuteCall(
+    DataFlow::TypeTracker t, DataFlow::Node sql
+  ) {
+    // saConnection created from engine
+    t.start() and
+    sql = result.(SAConnectionExecuteCall).getSql()
+    or
+    exists(DataFlow::TypeTracker t2 | result = saConnectionExecuteCall(t2, sql).track(t2, t))
+  }
+
+  DataFlow::Node saConnectionExecuteCall(DataFlow::Node sql) {
+    saConnectionExecuteCall(DataFlow::TypeTracker::end(), sql).flowsTo(result)
+  }
+
+  /**
+   * Awaiting the result of calling `execute` executes the query.
+   * See https://aiopg.readthedocs.io/en/stable/sa.html#aiopg.sa.SAConnection.execute
+   */
+  class AwaitedSAConnectionExecuteCall extends SqlExecution::Range {
+    DataFlow::Node sql;
+
+    AwaitedSAConnectionExecuteCall() { this = awaited(saConnectionExecuteCall(sql)) }
+
+    override DataFlow::Node getSql() { result = sql }
+  }
+}
--- a/python/ql/test/experimental/dataflow/tainttracking/defaultAdditionalTaintStep/test_async.py
+++ b/python/ql/test/experimental/dataflow/tainttracking/defaultAdditionalTaintStep/test_async.py
@@ -31,7 +31,7 @@ async def test_async_with():
    ctx = AsyncContext()
    taint(ctx)
    async with ctx as tainted:
-        ensure_tainted(tainted) # $ MISSING: tainted
+        ensure_tainted(tainted) # $ tainted


 class AsyncIter:
@@ -45,7 +45,7 @@ async def test_async_for():
    iter = AsyncIter()
    taint(iter)
    async for tainted in iter:
-        ensure_tainted(tainted) # $ MISSING: tainted
+        ensure_tainted(tainted) # $ tainted



--- a/python/ql/test/experimental/dataflow/tainttracking/defaultAdditionalTaintStep/test_for.py
+++ b/python/ql/test/experimental/dataflow/tainttracking/defaultAdditionalTaintStep/test_for.py
@@ -0,0 +1,30 @@
+# Add taintlib to PATH so it can be imported during runtime without any hassle
+import sys; import os; sys.path.append(os.path.dirname(os.path.dirname((__file__))))
+from taintlib import *
+
+# This has no runtime impact, but allows autocomplete to work
+from typing import TYPE_CHECKING
+if TYPE_CHECKING:
+    from ..taintlib import *
+
+
+# Actual tests
+
+class Iter:
+    def __iter__(self):
+        return self
+
+    def __next__(self):
+        raise StopIteration
+
+def test_for():
+    iter = Iter()
+    taint(iter)
+    for tainted in iter:
+        ensure_tainted(tainted) # $ tainted
+
+
+
+# Make tests runable
+
+test_for()
--- a/python/ql/test/experimental/dataflow/tainttracking/defaultAdditionalTaintStep/test_with.py
+++ b/python/ql/test/experimental/dataflow/tainttracking/defaultAdditionalTaintStep/test_with.py
@@ -0,0 +1,60 @@
+# Add taintlib to PATH so it can be imported during runtime without any hassle
+import sys; import os; sys.path.append(os.path.dirname(os.path.dirname((__file__))))
+from taintlib import *
+
+# This has no runtime impact, but allows autocomplete to work
+from typing import TYPE_CHECKING
+if TYPE_CHECKING:
+    from ..taintlib import *
+
+
+# Actual tests
+
+class Context:
+    def __enter__(self):
+        return ""
+
+    def __exit__(self, exc_type, exc, tb):
+        pass
+
+def test_with():
+    ctx = Context()
+    taint(ctx)
+    with ctx as tainted:
+        ensure_tainted(tainted) # $ tainted
+
+class Context_taint:
+    def __enter__(self):
+        return TAINTED_STRING
+
+    def __exit__(self, exc_type, exc, tb):
+        pass
+
+def test_with_taint():
+    ctx = Context_taint()
+    with ctx as tainted:
+        ensure_tainted(tainted) # $ MISSING: tainted
+
+
+class Context_arg:
+    def __init__(self, arg):
+        self.arg = arg
+
+    def __enter__(self):
+        return self.arg
+
+    def __exit__(self, exc_type, exc, tb):
+        pass
+
+def test_with_arg():
+    ctx = Context_arg(TAINTED_STRING)
+    with ctx as tainted:
+        ensure_tainted(tainted) # $ tainted
+
+
+
+# Make tests runable
+
+test_with()
+test_with_taint()
+test_with_arg()
--- a/python/ql/test/experimental/meta/InlineTaintTest.qll
+++ b/python/ql/test/experimental/meta/InlineTaintTest.qll
@@ -30,24 +30,36 @@ DataFlow::Node shouldNotBeTainted() {
  )
 }

-class TestTaintTrackingConfiguration extends TaintTracking::Configuration {
-  TestTaintTrackingConfiguration() { this = "TestTaintTrackingConfiguration" }
+// this module allows the configuration to be imported in other `.ql` files without the
+// top level query predicates of this file coming into scope.
+module Conf {
+  class TestTaintTrackingConfiguration extends TaintTracking::Configuration {
+    TestTaintTrackingConfiguration() { this = "TestTaintTrackingConfiguration" }

-  override predicate isSource(DataFlow::Node source) {
-    source.asCfgNode().(NameNode).getId() in [
-        "TAINTED_STRING", "TAINTED_BYTES", "TAINTED_LIST", "TAINTED_DICT"
-      ]
-    or
-    source instanceof RemoteFlowSource
-  }
+    override predicate isSource(DataFlow::Node source) {
+      source.asCfgNode().(NameNode).getId() in [
+          "TAINTED_STRING", "TAINTED_BYTES", "TAINTED_LIST", "TAINTED_DICT"
+        ]
+      or
+      // User defined sources
+      exists(CallNode call |
+        call.getFunction().(NameNode).getId() = "taint" and
+        source.(DataFlow::CfgNode).getNode() = call.getAnArg()
+      )
+      or
+      source instanceof RemoteFlowSource
+    }

-  override predicate isSink(DataFlow::Node sink) {
-    sink = shouldBeTainted()
-    or
-    sink = shouldNotBeTainted()
+    override predicate isSink(DataFlow::Node sink) {
+      sink = shouldBeTainted()
+      or
+      sink = shouldNotBeTainted()
+    }
  }
 }

+import Conf
+
 class InlineTaintTest extends InlineExpectationsTest {
  InlineTaintTest() { this = "InlineTaintTest" }

--- a/python/ql/test/experimental/meta/debug/InlineTaintTestPaths.expected
+++ b/python/ql/test/experimental/meta/debug/InlineTaintTestPaths.expected
@@ -0,0 +1,4 @@
+edges
+nodes
+subpaths
+#select
--- a/python/ql/test/experimental/meta/debug/InlineTaintTestPaths.ql
+++ b/python/ql/test/experimental/meta/debug/InlineTaintTestPaths.ql
@@ -0,0 +1,25 @@
+/**
+ * @kind path-problem
+ */
+
+// This query is for debugging InlineTaintTestFailures.
+// The intended usage is
+// 1. load the database of the failing test
+// 2. run this query to see actual paths
+// 3. if necessary, look at partial paths by (un)commenting appropriate lines
+import python
+import semmle.python.dataflow.new.DataFlow
+import experimental.meta.InlineTaintTest::Conf
+// import DataFlow::PartialPathGraph
+import DataFlow::PathGraph
+
+class Conf extends TestTaintTrackingConfiguration {
+  override int explorationLimit() { result = 5 }
+}
+
+// from Conf config, DataFlow::PartialPathNode source, DataFlow::PartialPathNode sink
+// where config.hasPartialFlow(source, sink, _)
+from Conf config, DataFlow::PathNode source, DataFlow::PathNode sink
+where config.hasFlowPath(source, sink)
+select sink.getNode(), source, sink, "This node receives taint from $@.", source.getNode(),
+  "this source"
--- a/python/ql/test/library-tests/frameworks/aiopg/ConceptsTest.expected
+++ b/python/ql/test/library-tests/frameworks/aiopg/ConceptsTest.expected
--- a/python/ql/test/library-tests/frameworks/aiopg/ConceptsTest.ql
+++ b/python/ql/test/library-tests/frameworks/aiopg/ConceptsTest.ql
@@ -0,0 +1,2 @@
+import python
+import experimental.meta.ConceptsTest
--- a/python/ql/test/library-tests/frameworks/aiopg/test.py
+++ b/python/ql/test/library-tests/frameworks/aiopg/test.py
@@ -0,0 +1,33 @@
+import aiopg
+
+# Only a cursor can execute sql.
+async def test_cursor():
+    # Create connection directly
+    conn = await aiopg.connect()
+    cur = await conn.cursor()
+    await cur.execute("sql")  # $ getSql="sql" constructedSql="sql"
+
+    # Create connection via pool
+    async with aiopg.create_pool() as pool:
+        # Create Cursor via Connection
+        async with pool.acquire() as conn:
+            async with conn.cursor() as cur:
+                await cur.execute("sql")  # $ getSql="sql" constructedSql="sql"
+
+        # Create Cursor directly
+        async with pool.cursor() as cur:
+            await cur.execute("sql")  # $ getSql="sql" constructedSql="sql"
+
+    # variants using as few `async with` as possible
+    pool = await aiopg.create_pool()
+    conn = await pool.acquire()
+    cur = await conn.cursor()
+    await cur.execute("sql")  # $ getSql="sql" constructedSql="sql"
+
+# Test SQLAlchemy integration
+from aiopg.sa import create_engine
+
+async def test_engine():
+    engine = await create_engine()
+    conn = await engine.acquire()
+    await conn.execute("sql")  # $ getSql="sql" constructedSql="sql"