Add Streamlit SQLAlchemy models

2026-04-26 17:25:19 +02:00 · 2024-07-30 17:20:52 +02:00
parent 9741ddb926
commit f796efe470
1 changed files with 59 additions and 15 deletions
--- a/python/ql/lib/semmle/python/frameworks/Streamlit.qll
+++ b/python/ql/lib/semmle/python/frameworks/Streamlit.qll
@@ -8,6 +8,7 @@ import semmle.python.dataflow.new.RemoteFlowSources
 import semmle.python.dataflow.new.TaintTracking
 import semmle.python.ApiGraphs
 import semmle.python.Concepts
+import semmle.python.frameworks.SqlAlchemy

 /**
 * Provides models for the `gradio` PyPI package.
@@ -27,31 +28,74 @@ module Streamlit {

    override string getSourceType() { result = "Streamlit user input" }
  }
+/**
+ * The Streamlit SQLConnection class, which is used to create a connection to a SQL Database.
+ * Streamlit wraps around SQL Alchemy for most database functionality, and adds some on top of it, such as the `query` method.
+ * Streamlit can also connect to Snowflake and Snowpark databases, but the modeling is not the same, so we need to limit the scope to SQL databases.
+ * https://docs.streamlit.io/develop/api-reference/connections/st.connections.sqlconnection#:~:text=to%20data.-,st.connections.SQLConnection,-Streamlit%20Version
+ * We can connect to SQL databases for example with `import streamlit as st; conn = st.connection('pets_db', type='sql')`
+ */
+  private class StreamlitSQLConnection extends API::CallNode {
+    StreamlitSQLConnection() {
+      exists(StringLiteral str, API::CallNode n |
+        str.getText().matches("sql")
+        and
+        n = API::moduleImport("streamlit").getMember("connection").getACall()
+        and
+        DataFlow::exprNode(str).(DataFlow::LocalSourceNode)
+        .flowsTo([n.getArg(1), n.getArgByName("type")])
+        and this = n
+      )

+    }
+  }
  /**
-   * The `query` call that can execute raw queries on a connection to a SQL/Sonwflake/Snowpark database.
+   * The `query` call that can execute raw queries on a connection to a SQL database.
   * https://docs.streamlit.io/develop/api-reference/connections/st.connection
   */
  private class QueryMethodCall extends DataFlow::CallCfgNode, SqlExecution::Range {
+
    QueryMethodCall() {
-      this =
-        API::moduleImport("streamlit")
-            .getMember("connection")
-            .getReturn()
-            .getMember("query")
-            .getACall()
+      exists(StreamlitSQLConnection s |
+        this = s.getReturn().getMember("query").getACall())
    }

    override DataFlow::Node getSql() { result in [this.getArg(0), this.getArgByName("sql")] }
  }
-private class StreamlitConnection extends SqlAlchemy::Connection::InstanceSource {
-    StreamlitConnection() {
-      this =
-        API::moduleImport("streamlit")
-            .getMember("connection")
-            .getReturn()
-            .getMember("connect")
-            .getACall()
+
+
+/**
+ * The Streamlit SQLConnection.connect() call, which returns a a new sqlalchemy.engine.Connection object.
+ * Streamlit creates a connection to a SQL database basing off SQL Alchemy, so we can reuse the models that we already have.
+ */
+private class StreamlitSQLAlchemyConnection extends SqlAlchemy::Connection::InstanceSource {
+    StreamlitSQLAlchemyConnection() {
+      exists(StreamlitSQLConnection s |
+        this = s.getReturn().getMember("connect").getACall())
+    }
+  }
+
+/**
+ * The underlying SQLAlchemy Engine, accessed via `st.connection().engine`.
+ * Streamlit creates an engine to a SQL database basing off SQL Alchemy, so we can reuse the models that we already have.
+ */
+private class StreamlitSQLAlchemyEngine extends SqlAlchemy::Engine::InstanceSource {
+    StreamlitSQLAlchemyEngine() {
+      exists(StreamlitSQLConnection s |
+        this = s.getReturn().getMember("engine").asSource())
+    }
+  }
+
+/**
+ * The SQLAlchemy Session, accessed via `st.connection().session`.
+ * Streamlit can create a session to a SQL database basing off SQL Alchemy, so we can reuse the models that we already have.
+ * For example, the modeling for `session` includes an `execute` method, which is used to execute raw SQL queries.
+ * https://docs.streamlit.io/develop/api-reference/connections/st.connections.sqlconnection#:~:text=SQLConnection.engine-,SQLConnection.session,-Streamlit%20Version
+ */
+  private class StreamlitSession extends SqlAlchemy::Session::InstanceSource {
+    StreamlitSession() {
+      exists(StreamlitSQLConnection s |
+        this = s.getReturn().getMember("session").asSource())
    }
  }
 }