Improve prompt inject for Python

This commit is contained in:
Josef Svenningsson
2026-04-02 17:42:37 +01:00
parent c9fa7fa283
commit bb18bb084c
4 changed files with 53 additions and 0 deletions

View File

@@ -0,0 +1,17 @@
extensions:
- addsTo:
pack: codeql/python-all
extensible: sinkModel
data:
- ['Anthropic', 'Member[messages].Member[create].Argument[system:]', 'prompt-injection']
- ['Anthropic', 'Member[messages].Member[stream].Argument[system:]', 'prompt-injection']
- ['Anthropic', 'Member[beta].Member[messages].Member[create].Argument[system:]', 'prompt-injection']
- ['Anthropic', 'Member[messages].Member[create].Argument[messages:].ListElement.DictionaryElement[content]', 'prompt-injection']
- ['Anthropic', 'Member[messages].Member[stream].Argument[messages:].ListElement.DictionaryElement[content]', 'prompt-injection']
- ['Anthropic', 'Member[beta].Member[messages].Member[create].Argument[messages:].ListElement.DictionaryElement[content]', 'prompt-injection']
- addsTo:
pack: codeql/python-all
extensible: typeModel
data:
- ['Anthropic', 'anthropic', 'Member[Anthropic,AsyncAnthropic].ReturnValue']

View File

@@ -4,6 +4,9 @@ extensions:
extensible: sinkModel
data:
- ['OpenAI', 'Member[beta].Member[assistants].Member[create].Argument[instructions:]', 'prompt-injection']
- ['OpenAI', 'Member[chat].Member[completions].Member[create].Argument[messages:].ListElement.DictionaryElement[content]', 'prompt-injection']
- ['OpenAI', 'Member[responses].Member[create].Argument[instructions:]', 'prompt-injection']
- ['OpenAI', 'Member[responses].Member[create].Argument[input:]', 'prompt-injection']
- addsTo:
pack: codeql/python-all

View File

@@ -7,6 +7,7 @@
private import python
private import semmle.python.ApiGraphs
private import semmle.python.dataflow.new.DataFlow
/**
* Provides models for agents SDK (instances of the `agents.Runner` class etc).
@@ -86,3 +87,32 @@ module OpenAI {
)
}
}
/**
* Provides attribute-name-based sink detection for `chat.completions.create` calls.
* This does not rely on API graph type resolution and thus works even when
* the receiver cannot be traced back to a known constructor (e.g. due to `or` expressions).
*/
module ChatCompletionsCreate {
/**
* Gets a `DataFlow::Node` that is the `content` value inside a message dict
* passed to a `*.chat.completions.create(messages=[{..., "content": <HERE>}])` call,
* matched purely by attribute names in the call chain.
*/
DataFlow::Node getAMessageContentSink() {
exists(
DataFlow::MethodCallNode createCall, DataFlow::AttrRead completionsAttr,
DataFlow::AttrRead chatAttr
|
// Match *.chat.completions.create(...)
createCall.getMethodName() = "create" and
completionsAttr = createCall.getObject().getALocalSource() and
completionsAttr.getAttributeName() = "completions" and
chatAttr = completionsAttr.getObject().getALocalSource() and
chatAttr.getAttributeName() = "chat"
|
// The messages keyword argument value (the list itself, or individual dict content values)
result = createCall.getArgByName("messages")
)
}
}

View File

@@ -8,6 +8,7 @@ import python
private import semmle.python.dataflow.new.DataFlow
private import semmle.python.Concepts
private import experimental.semmle.python.Concepts
private import semmle.python.Frameworks
private import semmle.python.dataflow.new.RemoteFlowSources
private import semmle.python.dataflow.new.BarrierGuards
private import semmle.python.frameworks.data.ModelsAsData
@@ -55,6 +56,8 @@ module PromptInjection {
this = OpenAI::getContentNode().asSink()
or
this = AgentSDK::getContentNode().asSink()
or
this = ChatCompletionsCreate::getAMessageContentSink()
}
}