From bb18bb084c063287d27f90a4855dabb1fdb68e63 Mon Sep 17 00:00:00 2001 From: Josef Svenningsson Date: Thu, 2 Apr 2026 17:42:37 +0100 Subject: [PATCH] Improve prompt inject for Python --- .../python/frameworks/anthropic.model.yml | 17 +++++++++++ .../semmle/python/frameworks/openai.model.yml | 3 ++ .../semmle/python/frameworks/OpenAI.qll | 30 +++++++++++++++++++ .../PromptInjectionCustomizations.qll | 3 ++ 4 files changed, 53 insertions(+) create mode 100644 python/ql/lib/semmle/python/frameworks/anthropic.model.yml diff --git a/python/ql/lib/semmle/python/frameworks/anthropic.model.yml b/python/ql/lib/semmle/python/frameworks/anthropic.model.yml new file mode 100644 index 00000000000..b7ef32218ad --- /dev/null +++ b/python/ql/lib/semmle/python/frameworks/anthropic.model.yml @@ -0,0 +1,17 @@ +extensions: + - addsTo: + pack: codeql/python-all + extensible: sinkModel + data: + - ['Anthropic', 'Member[messages].Member[create].Argument[system:]', 'prompt-injection'] + - ['Anthropic', 'Member[messages].Member[stream].Argument[system:]', 'prompt-injection'] + - ['Anthropic', 'Member[beta].Member[messages].Member[create].Argument[system:]', 'prompt-injection'] + - ['Anthropic', 'Member[messages].Member[create].Argument[messages:].ListElement.DictionaryElement[content]', 'prompt-injection'] + - ['Anthropic', 'Member[messages].Member[stream].Argument[messages:].ListElement.DictionaryElement[content]', 'prompt-injection'] + - ['Anthropic', 'Member[beta].Member[messages].Member[create].Argument[messages:].ListElement.DictionaryElement[content]', 'prompt-injection'] + + - addsTo: + pack: codeql/python-all + extensible: typeModel + data: + - ['Anthropic', 'anthropic', 'Member[Anthropic,AsyncAnthropic].ReturnValue'] diff --git a/python/ql/lib/semmle/python/frameworks/openai.model.yml b/python/ql/lib/semmle/python/frameworks/openai.model.yml index 245d390ab8e..358039595e9 100644 --- a/python/ql/lib/semmle/python/frameworks/openai.model.yml +++ b/python/ql/lib/semmle/python/frameworks/openai.model.yml @@ -4,6 +4,9 @@ extensions: extensible: sinkModel data: - ['OpenAI', 'Member[beta].Member[assistants].Member[create].Argument[instructions:]', 'prompt-injection'] + - ['OpenAI', 'Member[chat].Member[completions].Member[create].Argument[messages:].ListElement.DictionaryElement[content]', 'prompt-injection'] + - ['OpenAI', 'Member[responses].Member[create].Argument[instructions:]', 'prompt-injection'] + - ['OpenAI', 'Member[responses].Member[create].Argument[input:]', 'prompt-injection'] - addsTo: pack: codeql/python-all diff --git a/python/ql/src/experimental/semmle/python/frameworks/OpenAI.qll b/python/ql/src/experimental/semmle/python/frameworks/OpenAI.qll index 74614a739aa..e5649716c8a 100644 --- a/python/ql/src/experimental/semmle/python/frameworks/OpenAI.qll +++ b/python/ql/src/experimental/semmle/python/frameworks/OpenAI.qll @@ -7,6 +7,7 @@ private import python private import semmle.python.ApiGraphs +private import semmle.python.dataflow.new.DataFlow /** * Provides models for agents SDK (instances of the `agents.Runner` class etc). @@ -86,3 +87,32 @@ module OpenAI { ) } } + +/** + * Provides attribute-name-based sink detection for `chat.completions.create` calls. + * This does not rely on API graph type resolution and thus works even when + * the receiver cannot be traced back to a known constructor (e.g. due to `or` expressions). + */ +module ChatCompletionsCreate { + /** + * Gets a `DataFlow::Node` that is the `content` value inside a message dict + * passed to a `*.chat.completions.create(messages=[{..., "content": }])` call, + * matched purely by attribute names in the call chain. + */ + DataFlow::Node getAMessageContentSink() { + exists( + DataFlow::MethodCallNode createCall, DataFlow::AttrRead completionsAttr, + DataFlow::AttrRead chatAttr + | + // Match *.chat.completions.create(...) + createCall.getMethodName() = "create" and + completionsAttr = createCall.getObject().getALocalSource() and + completionsAttr.getAttributeName() = "completions" and + chatAttr = completionsAttr.getObject().getALocalSource() and + chatAttr.getAttributeName() = "chat" + | + // The messages keyword argument value (the list itself, or individual dict content values) + result = createCall.getArgByName("messages") + ) + } +} diff --git a/python/ql/src/experimental/semmle/python/security/dataflow/PromptInjectionCustomizations.qll b/python/ql/src/experimental/semmle/python/security/dataflow/PromptInjectionCustomizations.qll index 181be639395..fd2cfe4478f 100644 --- a/python/ql/src/experimental/semmle/python/security/dataflow/PromptInjectionCustomizations.qll +++ b/python/ql/src/experimental/semmle/python/security/dataflow/PromptInjectionCustomizations.qll @@ -8,6 +8,7 @@ import python private import semmle.python.dataflow.new.DataFlow private import semmle.python.Concepts private import experimental.semmle.python.Concepts +private import semmle.python.Frameworks private import semmle.python.dataflow.new.RemoteFlowSources private import semmle.python.dataflow.new.BarrierGuards private import semmle.python.frameworks.data.ModelsAsData @@ -55,6 +56,8 @@ module PromptInjection { this = OpenAI::getContentNode().asSink() or this = AgentSDK::getContentNode().asSink() + or + this = ChatCompletionsCreate::getAMessageContentSink() } }