Merge branch 'main' into azure_python_sdk_url_summary_upstream

This commit is contained in:
Ben Rodes
2026-02-02 09:00:35 -05:00
committed by GitHub
5287 changed files with 495117 additions and 207887 deletions

View File

@@ -0,0 +1,6 @@
extensions:
- addsTo:
pack: codeql/python-all
extensible: barrierGuardModel
data:
- ['django', 'Member[utils].Member[http].Member[url_has_allowed_host_and_scheme].Argument[0,url:]', "true", 'url-redirection']

View File

@@ -2965,38 +2965,6 @@ module PrivateDjango {
override predicate csrfEnabled() { decoratorName in ["csrf_protect", "requires_csrf_token"] }
}
private predicate djangoUrlHasAllowedHostAndScheme(
DataFlow::GuardNode g, ControlFlowNode node, boolean branch
) {
exists(API::CallNode call |
call =
API::moduleImport("django")
.getMember("utils")
.getMember("http")
.getMember("url_has_allowed_host_and_scheme")
.getACall() and
g = call.asCfgNode() and
node = call.getParameter(0, "url").asSink().asCfgNode() and
branch = true
)
}
/**
* A call to `django.utils.http.url_has_allowed_host_and_scheme`, considered as a sanitizer-guard for URL redirection.
*
* See https://docs.djangoproject.com/en/4.2/_modules/django/utils/http/
*/
private class DjangoAllowedUrl extends UrlRedirect::Sanitizer {
DjangoAllowedUrl() {
this = DataFlow::BarrierGuard<djangoUrlHasAllowedHostAndScheme/3>::getABarrierNode()
}
override predicate sanitizes(UrlRedirect::FlowState state) {
// sanitize all flow states
any()
}
}
// ---------------------------------------------------------------------------
// Templates
// ---------------------------------------------------------------------------

View File

@@ -621,24 +621,15 @@ module Flask {
}
override DataFlow::Node getAPathArgument() {
result in [
this.getArg(0), this.getArgByName("directory"),
// as described in the docs, the `filename` argument is restrained to be within
// the provided directory, so is not exposed to path-injection. (but is still a
// path-argument).
this.getArg(1), this.getArgByName("filename")
]
result = this.getArg([0, 1]) or
result = this.getArgByName(["directory", "filename"])
}
}
/**
* To exclude `filename` argument to `flask.send_from_directory` as a path-injection sink.
*/
private class FlaskSendFromDirectoryCallFilenameSanitizer extends PathInjection::Sanitizer {
FlaskSendFromDirectoryCallFilenameSanitizer() {
this = any(FlaskSendFromDirectoryCall c).getArg(1)
or
this = any(FlaskSendFromDirectoryCall c).getArgByName("filename")
override DataFlow::Node getAVulnerablePathArgument() {
result = this.getAPathArgument() and
// as described in the docs, the `filename` argument is restricted to be within
// the provided directory, so is not exposed to path-injection.
not result in [this.getArg(1), this.getArgByName("filename")]
}
}
@@ -674,7 +665,7 @@ module Flask {
*
* see https://flask.palletsprojects.com/en/2.3.x/api/#flask.render_template_string
*/
private class RenderTemplateStringSummary extends SummarizedCallable {
private class RenderTemplateStringSummary extends SummarizedCallable::Range {
RenderTemplateStringSummary() { this = "flask.render_template_string" }
override DataFlow::CallCfgNode getACall() {
@@ -700,7 +691,7 @@ module Flask {
*
* see https://flask.palletsprojects.com/en/2.3.x/api/#flask.stream_template_string
*/
private class StreamTemplateStringSummary extends SummarizedCallable {
private class StreamTemplateStringSummary extends SummarizedCallable::Range {
StreamTemplateStringSummary() { this = "flask.stream_template_string" }
override DataFlow::CallCfgNode getACall() {

View File

@@ -0,0 +1,119 @@
/**
* Provides definitions and modeling for the `python-socketio` PyPI package.
* See https://python-socketio.readthedocs.io/en/stable/.
*/
private import python
private import semmle.python.dataflow.new.TaintTracking
private import semmle.python.dataflow.new.RemoteFlowSources
private import semmle.python.Concepts
private import semmle.python.ApiGraphs
private import semmle.python.frameworks.internal.PoorMansFunctionResolution
/**
* Provides models for the `python-socketio` PyPI package.
* See https://python-socketio.readthedocs.io/en/stable/.
*/
module SocketIO {
/** Provides models for socketio `Server` and `AsyncServer` classes. */
module Server {
/** Gets an instance of a socketio `Server` or `AsyncServer`. */
API::Node server() {
result = API::moduleImport("socketio").getMember(["Server", "AsyncServer"]).getAnInstance()
}
/** Gets a decorator that indicates a socketio event handler. */
private API::Node serverEventAnnotation() {
result = server().getMember("event")
or
result = server().getMember("on").getReturn()
}
private class EventHandler extends Http::Server::RequestHandler::Range {
EventHandler() {
serverEventAnnotation().getAValueReachableFromSource().asExpr() = this.getADecorator()
or
exists(DataFlow::CallCfgNode c, DataFlow::Node arg |
c = server().getMember("on").getACall()
|
(
arg = c.getArg(1)
or
arg = c.getArgByName("handler")
) and
poorMansFunctionTracker(this) = arg
)
}
override Parameter getARoutedParameter() {
result = this.getAnArg() and
not result = this.getArg(0) // First parameter is `sid`, which is not a remote flow source as it cannot be controlled by the client.
}
override string getFramework() { result = "socketio" }
}
private class CallbackArgument extends DataFlow::Node {
CallbackArgument() {
exists(DataFlow::CallCfgNode c |
c = [server(), Namespace::instance()].getMember(["emit", "send"]).getACall()
|
this = c.getArgByName("callback")
)
}
}
private class CallbackHandler extends Http::Server::RequestHandler::Range {
CallbackHandler() { any(CallbackArgument ca) = poorMansFunctionTracker(this) }
override Parameter getARoutedParameter() { result = this.getAnArg() }
override string getFramework() { result = "socketio" }
}
private class SocketIOCall extends RemoteFlowSource::Range {
SocketIOCall() { this = [server(), Namespace::instance()].getMember("call").getACall() }
override string getSourceType() { result = "socketio call" }
}
}
/** Provides modeling for socketio server Namespace/AsyncNamespace classes. */
module Namespace {
/** Gets a reference to the `socketio.Namespace` or `socketio.AsyncNamespace` classes or any subclass. */
API::Node subclassRef() {
result =
API::moduleImport("socketio").getMember(["Namespace", "AsyncNamespace"]).getASubclass*()
}
/** Gets a reference to an instance of a subclass of `socketio.Namespace` or `socketio.AsyncNamespace`. */
API::Node instance() {
result = subclassRef().getAnInstance()
or
result = subclassRef().getAMember().getSelfParameter()
}
/** A socketio Namespace class. */
class NamespaceClass extends Class {
NamespaceClass() { this.getABase() = subclassRef().asSource().asExpr() }
/** Gets a handler for socketio events. */
Function getAnEventHandler() {
result = this.getAMethod() and
result.getName().matches("on_%")
}
}
private class NamespaceEventHandler extends Http::Server::RequestHandler::Range {
NamespaceEventHandler() { this = any(NamespaceClass nc).getAnEventHandler() }
override Parameter getARoutedParameter() {
result = this.getAnArg() and
not result = this.getArg(0) and
not result = this.getArg(1) // First 2 parameters are `self` and `sid`.
}
override string getFramework() { result = "socketio" }
}
}
}

View File

@@ -142,6 +142,8 @@ extensions:
- ["typing", "Member[cast]", "Argument[1,val:]", "ReturnValue", "value"]
# See https://docs.python.org/3/library/urllib.parse.html#urllib.parse.parse_qs
- ["urllib", "Member[parse].Member[parse_qs]", "Argument[0,qs:]", "ReturnValue", "taint"]
# See https://docs.python.org/3/library/urllib.parse.html#urllib.parse.urlparse
- ["urllib", "Member[parse].Member[urlparse]", "Argument[0,urlstring:]", "ReturnValue", "taint"]
# See https://docs.python.org/3/library/urllib.parse.html#urllib.parse.quote
- ["urllib", "Member[parse].Member[quote]", "Argument[0,string:]", "ReturnValue", "taint"]
# See https://docs.python.org/3/library/urllib.parse.html#urllib.parse.quote_plus
@@ -181,7 +183,9 @@ extensions:
- addsTo:
pack: codeql/python-all
extensible: typeModel
data: []
data:
# See https://docs.python.org/3/library/urllib.parse.html#urllib.parse.urlparse
- ["urllib.parse.ParseResult~Subclass", 'urllib', 'Member[parse].Member[urlparse]']
- addsTo:
pack: codeql/python-all

View File

@@ -245,6 +245,67 @@ module Stdlib {
}
}
/**
* Provides models for the `urllib.parse.ParseResult` class
*
* See https://docs.python.org/3.9/library/urllib.parse.html#urllib.parse.ParseResult.
*/
module ParseResult {
/** Gets a reference to the `urllib.parse.ParseResult` class. */
API::Node classRef() {
result = API::moduleImport("urllib").getMember("parse").getMember("ParseResult")
or
result = ModelOutput::getATypeNode("urllib.parse.ParseResult~Subclass").getASubclass*()
}
/**
* A source of instances of `urllib.parse.ParseResult`, extend this class to model new instances.
*
* This can include instantiations of the class, return values from function
* calls, or a special parameter that will be set when functions are called by an external
* library.
*
* Use the predicate `ParseResult::instance()` to get references to instances of `urllib.parse.ParseResult`.
*/
abstract class InstanceSource extends DataFlow::LocalSourceNode { }
/** A direct instantiation of `urllib.parse.ParseResult`. */
private class ClassInstantiation extends InstanceSource, DataFlow::CallCfgNode {
ClassInstantiation() { this = classRef().getACall() }
}
/** Gets a reference to an instance of `urllib.parse.ParseResult`. */
private DataFlow::TypeTrackingNode instance(DataFlow::TypeTracker t) {
t.start() and
result instanceof InstanceSource
or
exists(DataFlow::TypeTracker t2 | result = instance(t2).track(t2, t))
}
/** Gets a reference to an instance of `urllib.parse.ParseResult`. */
DataFlow::Node instance() { instance(DataFlow::TypeTracker::end()).flowsTo(result) }
/**
* Taint propagation for `urllib.parse.ParseResult`.
*/
private class InstanceTaintSteps extends InstanceTaintStepsHelper {
InstanceTaintSteps() { this = "urllib.parse.ParseResult" }
override DataFlow::Node getInstance() { result = instance() }
override string getAttributeName() {
result in [
"netloc", "path", "params", "query", "fragment", "username", "password", "hostname",
"port"
]
}
override string getMethodName() { none() }
override string getAsyncMethodName() { none() }
}
}
// ---------------------------------------------------------------------------
// logging
// ---------------------------------------------------------------------------
@@ -3126,7 +3187,7 @@ module StdlibPrivate {
*
* See https://docs.python.org/3.11/library/re.html#re-objects
*/
class RePatternSummary extends SummarizedCallable {
class RePatternSummary extends SummarizedCallable::Range {
RePatternSummary() { this = "re.Pattern" }
override DataFlow::CallCfgNode getACall() {
@@ -3166,7 +3227,7 @@ module StdlibPrivate {
*
* See https://docs.python.org/3/library/re.html#re.Match
*/
class ReMatchSummary extends SummarizedCallable {
class ReMatchSummary extends SummarizedCallable::Range {
ReMatchSummary() { this = ["re.Match", "compiled re.Match"] }
override DataFlow::CallCfgNode getACall() {
@@ -3230,7 +3291,7 @@ module StdlibPrivate {
*
* See https://docs.python.org/3/library/re.html#re.Match
*/
class ReMatchMethodsSummary extends SummarizedCallable {
class ReMatchMethodsSummary extends SummarizedCallable::Range {
string methodName;
ReMatchMethodsSummary() {
@@ -3274,7 +3335,7 @@ module StdlibPrivate {
*
* See https://docs.python.org/3/library/re.html#functions
*/
class ReFunctionsSummary extends SummarizedCallable {
class ReFunctionsSummary extends SummarizedCallable::Range {
string methodName;
ReFunctionsSummary() {
@@ -4122,7 +4183,7 @@ module StdlibPrivate {
*
* see https://docs.python.org/3/library/stdtypes.html#dict
*/
class DictSummary extends SummarizedCallable {
class DictSummary extends SummarizedCallable::Range {
DictSummary() { this = "builtins.dict" }
override DataFlow::CallCfgNode getACall() { result = API::builtin("dict").getACall() }
@@ -4161,7 +4222,7 @@ module StdlibPrivate {
}
/** A flow summary for `list`. */
class ListSummary extends SummarizedCallable {
class ListSummary extends SummarizedCallable::Range {
ListSummary() { this = "builtins.list" }
override DataFlow::CallCfgNode getACall() { result = API::builtin("list").getACall() }
@@ -4191,7 +4252,7 @@ module StdlibPrivate {
}
/** A flow summary for tuple */
class TupleSummary extends SummarizedCallable {
class TupleSummary extends SummarizedCallable::Range {
TupleSummary() { this = "builtins.tuple" }
override DataFlow::CallCfgNode getACall() { result = API::builtin("tuple").getACall() }
@@ -4216,7 +4277,7 @@ module StdlibPrivate {
}
/** A flow summary for set */
class SetSummary extends SummarizedCallable {
class SetSummary extends SummarizedCallable::Range {
SetSummary() { this = "builtins.set" }
override DataFlow::CallCfgNode getACall() { result = API::builtin("set").getACall() }
@@ -4246,7 +4307,7 @@ module StdlibPrivate {
}
/** A flow summary for frozenset */
class FrozensetSummary extends SummarizedCallable {
class FrozensetSummary extends SummarizedCallable::Range {
FrozensetSummary() { this = "builtins.frozenset" }
override DataFlow::CallCfgNode getACall() { result = API::builtin("frozenset").getACall() }
@@ -4264,7 +4325,7 @@ module StdlibPrivate {
// Flow summaries for functions operating on containers
// ---------------------------------------------------------------------------
/** A flow summary for `reversed`. */
class ReversedSummary extends SummarizedCallable {
class ReversedSummary extends SummarizedCallable::Range {
ReversedSummary() { this = "builtins.reversed" }
override DataFlow::CallCfgNode getACall() { result = API::builtin("reversed").getACall() }
@@ -4294,7 +4355,7 @@ module StdlibPrivate {
}
/** A flow summary for `sorted`. */
class SortedSummary extends SummarizedCallable {
class SortedSummary extends SummarizedCallable::Range {
SortedSummary() { this = "builtins.sorted" }
override DataFlow::CallCfgNode getACall() { result = API::builtin("sorted").getACall() }
@@ -4326,7 +4387,7 @@ module StdlibPrivate {
}
/** A flow summary for `iter`. */
class IterSummary extends SummarizedCallable {
class IterSummary extends SummarizedCallable::Range {
IterSummary() { this = "builtins.iter" }
override DataFlow::CallCfgNode getACall() { result = API::builtin("iter").getACall() }
@@ -4356,7 +4417,7 @@ module StdlibPrivate {
}
/** A flow summary for `next`. */
class NextSummary extends SummarizedCallable {
class NextSummary extends SummarizedCallable::Range {
NextSummary() { this = "builtins.next" }
override DataFlow::CallCfgNode getACall() { result = API::builtin("next").getACall() }
@@ -4386,7 +4447,7 @@ module StdlibPrivate {
}
/** A flow summary for `map`. */
class MapSummary extends SummarizedCallable {
class MapSummary extends SummarizedCallable::Range {
MapSummary() { this = "builtins.map" }
override DataFlow::CallCfgNode getACall() { result = API::builtin("map").getACall() }
@@ -4421,7 +4482,7 @@ module StdlibPrivate {
}
/** A flow summary for `filter`. */
class FilterSummary extends SummarizedCallable {
class FilterSummary extends SummarizedCallable::Range {
FilterSummary() { this = "builtins.filter" }
override DataFlow::CallCfgNode getACall() { result = API::builtin("filter").getACall() }
@@ -4447,7 +4508,7 @@ module StdlibPrivate {
}
/**A summary for `enumerate`. */
class EnumerateSummary extends SummarizedCallable {
class EnumerateSummary extends SummarizedCallable::Range {
EnumerateSummary() { this = "builtins.enumerate" }
override DataFlow::CallCfgNode getACall() { result = API::builtin("enumerate").getACall() }
@@ -4473,7 +4534,7 @@ module StdlibPrivate {
}
/** A flow summary for `zip`. */
class ZipSummary extends SummarizedCallable {
class ZipSummary extends SummarizedCallable::Range {
ZipSummary() { this = "builtins.zip" }
override DataFlow::CallCfgNode getACall() { result = API::builtin("zip").getACall() }
@@ -4507,7 +4568,7 @@ module StdlibPrivate {
// Flow summaries for container methods
// ---------------------------------------------------------------------------
/** A flow summary for `copy`. */
class CopySummary extends SummarizedCallable {
class CopySummary extends SummarizedCallable::Range {
CopySummary() { this = "collection.copy" }
override DataFlow::CallCfgNode getACall() {
@@ -4530,7 +4591,7 @@ module StdlibPrivate {
}
/** A flow summary for `copy.replace`. */
class ReplaceSummary extends SummarizedCallable {
class ReplaceSummary extends SummarizedCallable::Range {
ReplaceSummary() { this = "copy.replace" }
override DataFlow::CallCfgNode getACall() {
@@ -4563,7 +4624,7 @@ module StdlibPrivate {
* I also handles the default value when `pop` is called
* on a dictionary, since that also does not depend on the key.
*/
class PopSummary extends SummarizedCallable {
class PopSummary extends SummarizedCallable::Range {
PopSummary() { this = "collection.pop" }
override DataFlow::CallCfgNode getACall() {
@@ -4594,7 +4655,7 @@ module StdlibPrivate {
}
/** A flow summary for `dict.pop` */
class DictPopSummary extends SummarizedCallable {
class DictPopSummary extends SummarizedCallable::Range {
string key;
DictPopSummary() {
@@ -4617,7 +4678,7 @@ module StdlibPrivate {
}
/** A flow summary for `dict.get` at specific content. */
class DictGetSummary extends SummarizedCallable {
class DictGetSummary extends SummarizedCallable::Range {
string key;
DictGetSummary() {
@@ -4645,7 +4706,7 @@ module StdlibPrivate {
}
/** A flow summary for `dict.get` disregarding content. */
class DictGetAnySummary extends SummarizedCallable {
class DictGetAnySummary extends SummarizedCallable::Range {
DictGetAnySummary() { this = "dict.get" }
override DataFlow::CallCfgNode getACall() {
@@ -4668,7 +4729,7 @@ module StdlibPrivate {
}
/** A flow summary for `dict.popitem` */
class DictPopitemSummary extends SummarizedCallable {
class DictPopitemSummary extends SummarizedCallable::Range {
DictPopitemSummary() { this = "dict.popitem" }
override DataFlow::CallCfgNode getACall() {
@@ -4692,7 +4753,7 @@ module StdlibPrivate {
*
* See https://docs.python.org/3.10/library/stdtypes.html#dict.setdefault
*/
class DictSetdefaultSummary extends SummarizedCallable {
class DictSetdefaultSummary extends SummarizedCallable::Range {
DictSetdefaultSummary() { this = "dict.setdefault" }
override DataFlow::CallCfgNode getACall() {
@@ -4717,7 +4778,7 @@ module StdlibPrivate {
* This summary handles read and store steps. See `DictSetdefaultSummary`
* for the dataflow steps.
*/
class DictSetdefaultKeySummary extends SummarizedCallable {
class DictSetdefaultKeySummary extends SummarizedCallable::Range {
string key;
DictSetdefaultKeySummary() {
@@ -4750,7 +4811,7 @@ module StdlibPrivate {
*
* See https://docs.python.org/3.10/library/stdtypes.html#dict.values
*/
class DictValues extends SummarizedCallable {
class DictValues extends SummarizedCallable::Range {
DictValues() { this = "dict.values" }
override DataFlow::CallCfgNode getACall() {
@@ -4779,7 +4840,7 @@ module StdlibPrivate {
*
* See https://docs.python.org/3.10/library/stdtypes.html#dict.keys
*/
class DictKeys extends SummarizedCallable {
class DictKeys extends SummarizedCallable::Range {
DictKeys() { this = "dict.keys" }
override DataFlow::CallCfgNode getACall() { result.(DataFlow::MethodCallNode).calls(_, "keys") }
@@ -4801,7 +4862,7 @@ module StdlibPrivate {
*
* See https://docs.python.org/3.10/library/stdtypes.html#dict.items
*/
class DictItems extends SummarizedCallable {
class DictItems extends SummarizedCallable::Range {
DictItems() { this = "dict.items" }
override DataFlow::CallCfgNode getACall() {
@@ -4831,7 +4892,7 @@ module StdlibPrivate {
*
* See https://docs.python.org/3.10/library/stdtypes.html#typesseq-mutable
*/
class ListAppend extends SummarizedCallable {
class ListAppend extends SummarizedCallable::Range {
ListAppend() { this = "list.append" }
override DataFlow::CallCfgNode getACall() {
@@ -4860,7 +4921,7 @@ module StdlibPrivate {
*
* See https://docs.python.org/3.10/library/stdtypes.html#frozenset.add
*/
class SetAdd extends SummarizedCallable {
class SetAdd extends SummarizedCallable::Range {
SetAdd() { this = "set.add" }
override DataFlow::CallCfgNode getACall() { result.(DataFlow::MethodCallNode).calls(_, "add") }
@@ -4887,7 +4948,7 @@ module StdlibPrivate {
*
* See https://devdocs.io/python~3.11/library/os#os.getenv
*/
class OsGetEnv extends SummarizedCallable {
class OsGetEnv extends SummarizedCallable::Range {
OsGetEnv() { this = "os.getenv" }
override DataFlow::CallCfgNode getACall() {

View File

@@ -135,6 +135,8 @@ module Tornado {
API::Node subclassRef() {
result = web().getMember("RequestHandler").getASubclass*()
or
result = WebSocket::WebSocketHandler::subclassRef()
or
result = ModelOutput::getATypeNode("tornado.web.RequestHandler~Subclass").getASubclass*()
}
@@ -428,6 +430,49 @@ module Tornado {
}
}
}
// ---------------------------------------------------------------------------
// tornado.websocket
// ---------------------------------------------------------------------------
/** Gets a reference to the `tornado.websocket` module. */
API::Node websocket() { result = Tornado::tornado().getMember("websocket") }
/** Provides models for the `tornado.websocket` module */
module WebSocket {
/**
* Provides models for the `tornado.websocket.WebSocketHandler` class and subclasses.
*
* See https://www.tornadoweb.org/en/stable/websocket.html#tornado.websocket.WebSocketHandler.
*/
module WebSocketHandler {
/** Gets a reference to the `tornado.websocket.WebSocketHandler` class or any subclass. */
API::Node subclassRef() {
result = websocket().getMember("WebSocketHandler").getASubclass*()
or
result =
ModelOutput::getATypeNode("tornado.websocket.WebSocketHandler~Subclass").getASubclass*()
}
/** A subclass of `tornado.websocket.WebSocketHandler`. */
class WebSocketHandlerClass extends Web::RequestHandler::RequestHandlerClass {
WebSocketHandlerClass() { this.getParent() = subclassRef().asSource().asExpr() }
override Function getARequestHandler() {
result = super.getARequestHandler()
or
result = this.getAMethod() and
result.getName() = "open"
}
/** Gets a function that could handle incoming WebSocket events, if any. */
Function getAWebSocketEventHandler() {
result = this.getAMethod() and
result.getName() =
["on_message", "on_close", "on_ping", "on_pong", "select_subprotocol", "check_origin"]
}
}
}
}
}
// ---------------------------------------------------------------------------
@@ -542,6 +587,27 @@ module Tornado {
override string getFramework() { result = "Tornado" }
}
/** A request handler for WebSocket events. */
private class TornadoWebSocketEventHandler extends Http::Server::RequestHandler::Range {
TornadoWebSocketEventHandler() {
exists(TornadoModule::WebSocket::WebSocketHandler::WebSocketHandlerClass cls |
cls.getAWebSocketEventHandler() = this
)
}
override Parameter getARoutedParameter() {
// The `open` method is handled as a normal request handler in `TornadoRouteSetup` or `TornadoRequestHandlerWithoutKnownRoute`.
// For other event handlers (such as `on_message`), all parameters should be remote flow sources, as they are not affected by routing.
result in [
this.getArg(_), this.getArgByName(_), this.getVararg().(Parameter),
this.getKwarg().(Parameter)
] and
not result = this.getArg(0)
}
override string getFramework() { result = "Tornado" }
}
// ---------------------------------------------------------------------------
// Response modeling
// ---------------------------------------------------------------------------

View File

@@ -0,0 +1,84 @@
/**
* Provides definitions and modeling for the `websockets` PyPI package.
*
* See https://websockets.readthedocs.io/en/stable/
*/
private import python
private import semmle.python.dataflow.new.RemoteFlowSources
private import semmle.python.Concepts
private import semmle.python.ApiGraphs
private import semmle.python.frameworks.internal.PoorMansFunctionResolution
private import semmle.python.frameworks.internal.InstanceTaintStepsHelper
/**
* Provides models for the `websockets` PyPI package.
* See https://websockets.readthedocs.io/en/stable/
*/
module Websockets {
private class HandlerArg extends DataFlow::Node {
HandlerArg() {
exists(DataFlow::CallCfgNode c |
c =
API::moduleImport("websockets")
.getMember(["asyncio", "sync"])
.getMember("server")
.getMember(["serve", "unix_serve"])
.getACall()
|
(this = c.getArg(0) or this = c.getArgByName("handler"))
)
}
}
/** A websocket handler that is passed to `serve`. */
// TODO: handlers defined via route maps, e.g. through `websockets.asyncio.router.route`, are more complex to handle.
class WebSocketHandler extends Http::Server::RequestHandler::Range {
WebSocketHandler() { poorMansFunctionTracker(this) = any(HandlerArg a) }
override Parameter getARoutedParameter() { result = this.getAnArg() }
override string getFramework() { result = "websockets" }
}
/** Provides taint models for instances of `ServerConnection` objects passed to websocket handlers. */
module ServerConnection {
/**
* A source of instances of `websockets.asyncio.ServerConnection` and `websockets.sync.ServerConnection`, extend this class to model new instances.
*
* This can include instantiations of the class, return values from function
* calls, or a special parameter that will be set when functions are called by an external
* library.
*
* Use the predicate `ServerConnection::instance()` to get references to instances of `websockets.asyncio.ServerConnection` and `websockets.sync.ServerConnection`.
*/
abstract class InstanceSource extends DataFlow::LocalSourceNode { }
/** Gets a reference to an instance of `websockets.asyncio.ServerConnection` or `websockets.sync.ServerConnection`. */
private DataFlow::TypeTrackingNode instance(DataFlow::TypeTracker t) {
t.start() and
result instanceof InstanceSource
or
exists(DataFlow::TypeTracker t2 | result = instance(t2).track(t2, t))
}
/** Gets a reference to an instance of `websockets.asyncio.ServerConnection` or `websockets.sync.ServerConnection`. */
DataFlow::Node instance() { instance(DataFlow::TypeTracker::end()).flowsTo(result) }
private class HandlerParam extends DataFlow::Node, InstanceSource {
HandlerParam() { exists(WebSocketHandler h | this = DataFlow::parameterNode(h.getArg(0))) }
}
private class InstanceTaintSteps extends InstanceTaintStepsHelper {
InstanceTaintSteps() { this = "websockets.asyncio.ServerConnection" }
override DataFlow::Node getInstance() { result = instance() }
override string getAttributeName() { none() }
override string getAsyncMethodName() { result = ["recv", "recv_streaming"] }
override string getMethodName() { result = ["recv", "recv_streaming"] }
}
}
}

View File

@@ -0,0 +1,6 @@
extensions:
- addsTo:
pack: codeql/python-all
extensible: sinkModel
data:
- ['agents', 'Member[Agent].Argument[instructions:]', 'prompt-injection']

View File

@@ -24,21 +24,25 @@ private import semmle.python.Concepts
* A threat-model flow source originating from a data extension.
*/
private class ThreatModelSourceFromDataExtension extends ThreatModelSource::Range {
ThreatModelSourceFromDataExtension() { this = ModelOutput::getASourceNode(_).asSource() }
ThreatModelSourceFromDataExtension() { ModelOutput::sourceNode(this, _) }
override string getThreatModel() { this = ModelOutput::getASourceNode(result).asSource() }
override string getThreatModel() { ModelOutput::sourceNode(this, result) }
override string getSourceType() {
result = "Source node (" + this.getThreatModel() + ") [from data-extension]"
}
}
private class SummarizedCallableFromModel extends SummarizedCallable {
private class SummarizedCallableFromModel extends SummarizedCallable::Range {
string type;
string path;
string input_;
string output_;
string kind;
string model_;
SummarizedCallableFromModel() {
ModelOutput::relevantSummaryModel(type, path, _, _, _, _) and
ModelOutput::relevantSummaryModel(type, path, input_, output_, kind, model_) and
this = type + ";" + path
}
@@ -52,14 +56,13 @@ private class SummarizedCallableFromModel extends SummarizedCallable {
}
override predicate propagatesFlow(
string input, string output, boolean preservesValue, string model
string input, string output, boolean preservesValue, Provenance p, boolean isExact, string model
) {
exists(string kind | ModelOutput::relevantSummaryModel(type, path, input, output, kind, model) |
kind = "value" and
preservesValue = true
or
kind = "taint" and
preservesValue = false
)
input = input_ and
output = output_ and
(if kind = "value" then preservesValue = true else preservesValue = false) and
p = "manual" and
isExact = true and
model = model_
}
}

View File

@@ -62,6 +62,8 @@
* should be prefixed with a tilde character (`~`). For example, `~Bar` can be used to indicate that
* the type is not intended to match a static type.
*/
overlay[local?]
module;
private import codeql.util.Unit
private import ApiGraphModelsSpecific as Specific
@@ -342,6 +344,26 @@ private predicate sinkModel(string type, string path, string kind, string model)
)
}
/** Holds if a barrier model exists for the given parameters. */
private predicate barrierModel(string type, string path, string kind, string model) {
// No deprecation adapter for barrier models, they were not around back then.
exists(QlBuiltins::ExtensionId madId |
Extensions::barrierModel(type, path, kind, madId) and
model = "MaD:" + madId.toString()
)
}
/** Holds if a barrier guard model exists for the given parameters. */
private predicate barrierGuardModel(
string type, string path, string branch, string kind, string model
) {
// No deprecation adapter for barrier models, they were not around back then.
exists(QlBuiltins::ExtensionId madId |
Extensions::barrierGuardModel(type, path, branch, kind, madId) and
model = "MaD:" + madId.toString()
)
}
/** Holds if a summary model `row` exists for the given parameters. */
private predicate summaryModel(
string type, string path, string input, string output, string kind, string model
@@ -398,6 +420,8 @@ predicate isRelevantType(string type) {
(
sourceModel(type, _, _, _) or
sinkModel(type, _, _, _) or
barrierModel(type, _, _, _) or
barrierGuardModel(type, _, _, _, _) or
summaryModel(type, _, _, _, _, _) or
typeModel(_, type, _)
) and
@@ -425,6 +449,8 @@ predicate isRelevantFullPath(string type, string path) {
(
sourceModel(type, path, _, _) or
sinkModel(type, path, _, _) or
barrierModel(type, path, _, _) or
barrierGuardModel(type, path, _, _, _) or
summaryModel(type, path, _, _, _, _) or
typeModel(_, type, path)
)
@@ -490,6 +516,7 @@ private predicate invocationMatchesCallSiteFilter(
Specific::invocationMatchesExtraCallSiteFilter(invoke, token)
}
overlay[local?]
private class TypeModelUseEntry extends API::EntryPoint {
private string type;
@@ -503,6 +530,7 @@ private class TypeModelUseEntry extends API::EntryPoint {
API::Node getNodeForType(string type_) { type = type_ and result = this.getANode() }
}
overlay[local?]
private class TypeModelDefEntry extends API::EntryPoint {
private string type;
@@ -743,6 +771,32 @@ module ModelOutput {
)
}
/**
* Holds if a barrier model contributed `barrier` with the given `kind`.
*/
cached
API::Node getABarrierNode(string kind, string model) {
exists(string type, string path |
barrierModel(type, path, kind, model) and
result = getNodeFromPath(type, path)
)
}
/**
* Holds if a barrier model contributed `barrier` with the given `kind` for the given `branch`.
*/
cached
API::Node getABarrierGuardNode(string kind, boolean branch, string model) {
exists(string type, string path, string branch_str |
branch = true and branch_str = "true"
or
branch = false and branch_str = "false"
|
barrierGuardModel(type, path, branch_str, kind, model) and
result = getNodeFromPath(type, path)
)
}
/**
* Holds if a relevant summary exists for these parameters.
*/
@@ -785,15 +839,50 @@ module ModelOutput {
private import codeql.mad.ModelValidation as SharedModelVal
/**
* Holds if a CSV source model contributed `source` with the given `kind`.
* Holds if an external model contributed `source` with the given `kind`.
*/
API::Node getASourceNode(string kind) { result = getASourceNode(kind, _) }
/**
* Holds if a CSV sink model contributed `sink` with the given `kind`.
* Holds if an external model contributed `sink` with the given `kind`.
*/
API::Node getASinkNode(string kind) { result = getASinkNode(kind, _) }
/**
* Holds if an external model contributed `barrier` with the given `kind`.
*
* INTERNAL: Do not use.
*/
API::Node getABarrierNode(string kind) { result = getABarrierNode(kind, _) }
/**
* Holds if an external model contributed `barrier-guard` with the given `kind` and `branch`.
*
* INTERNAL: Do not use.
*/
API::Node getABarrierGuardNode(string kind, boolean branch) {
result = getABarrierGuardNode(kind, branch, _)
}
/**
* Holds if `node` is specified as a source with the given kind in an external model.
*/
predicate sourceNode(DataFlow::Node node, string kind) { node = getASourceNode(kind).asSource() }
/**
* Holds if `node` is specified as a sink with the given kind in an external model.
*/
predicate sinkNode(DataFlow::Node node, string kind) { node = getASinkNode(kind).asSink() }
/**
* Holds if `node` is specified as a barrier with the given kind in an external model.
*/
predicate barrierNode(DataFlow::Node node, string kind) {
node = getABarrierNode(kind).asSource()
or
node = DataFlow::ExternalBarrierGuard::getAnExternalBarrierNode(kind)
}
private module KindValConfig implements SharedModelVal::KindValidationConfigSig {
predicate summaryKind(string kind) { summaryModel(_, _, _, _, kind, _) }

View File

@@ -1,6 +1,8 @@
/**
* Defines extensible predicates for contributing library models from data extensions.
*/
overlay[local]
module;
/**
* Holds if the value at `(type, path)` should be seen as a flow
@@ -18,6 +20,26 @@ extensible predicate sourceModel(
*/
extensible predicate sinkModel(string type, string path, string kind, QlBuiltins::ExtensionId madId);
/**
* Holds if the value at `(type, path)` should be seen as a barrier
* of the given `kind` and `madId` is the data extension row number.
*/
extensible predicate barrierModel(
string type, string path, string kind, QlBuiltins::ExtensionId madId
);
/**
* Holds if the value at `(type, path)` should be seen as a barrier guard
* of the given `kind` and `madId` is the data extension row number.
* `path` is assumed to lead to a parameter of a call (possibly `self`), and
* the call is guarding the parameter.
* `branch` is either `true` or `false`, indicating which branch of the guard
* is protecting the parameter.
*/
extensible predicate barrierGuardModel(
string type, string path, string branch, string kind, QlBuiltins::ExtensionId madId
);
/**
* Holds if in calls to `(type, path)`, the value referred to by `input`
* can flow to the value referred to by `output` and `madId` is the data

View File

@@ -142,15 +142,13 @@ API::Node getExtraSuccessorFromNode(API::Node node, AccessPathTokenBase token) {
// `DataFlow::DictionaryElementContent` just from seeing a subscript read, so we would
// need to add that. (also need to handle things like `DictionaryElementAny` which
// doesn't have any value for .getAnArgument())
(
token.getName() = "DictionaryElement" and
result = node.getSubscript(token.getAnArgument())
or
token.getName() = "DictionaryElementAny" and
result = node.getASubscript() and
not exists(token.getAnArgument())
// TODO: ListElement/SetElement/TupleElement
)
token.getName() = "DictionaryElement" and
result = node.getSubscript(token.getAnArgument())
or
token.getName() in ["DictionaryElementAny", "ListElement"] and
result = node.getASubscript() and
not exists(token.getAnArgument())
// TODO: SetElement/TupleElement
// Some features don't have MaD tokens yet, they would need to be added to API-graphs first.
// - decorators ("DecoratedClass", "DecoratedMember", "DecoratedParameter")
}
@@ -261,7 +259,7 @@ predicate isExtraValidTokenNameInIdentifyingAccessPath(string name) {
name =
[
"Member", "Instance", "Awaited", "Call", "Method", "Subclass", "DictionaryElement",
"DictionaryElementAny"
"DictionaryElementAny", "ListElement"
]
}
@@ -270,7 +268,7 @@ predicate isExtraValidTokenNameInIdentifyingAccessPath(string name) {
* in an identifying access path.
*/
predicate isExtraValidNoArgumentTokenInIdentifyingAccessPath(string name) {
name = ["Instance", "Awaited", "Call", "Subclass", "DictionaryElementAny"]
name = ["Instance", "Awaited", "Call", "Subclass", "DictionaryElementAny", "ListElement"]
}
/**

View File

@@ -11,6 +11,16 @@ extensions:
extensible: sinkModel
data: []
- addsTo:
pack: codeql/python-all
extensible: barrierModel
data: []
- addsTo:
pack: codeql/python-all
extensible: barrierGuardModel
data: []
- addsTo:
pack: codeql/python-all
extensible: summaryModel

View File

@@ -0,0 +1,12 @@
extensions:
- addsTo:
pack: codeql/python-all
extensible: sinkModel
data:
- ['OpenAI', 'Member[beta].Member[assistants].Member[create].Argument[instructions:]', 'prompt-injection']
- addsTo:
pack: codeql/python-all
extensible: typeModel
data:
- ['OpenAI', 'openai', 'Member[OpenAI,AsyncOpenAI,AzureOpenAI].ReturnValue']