From fe7eabd56fba168d7edef3a375b33eaef397f692 Mon Sep 17 00:00:00 2001 From: BazookaMusic Date: Fri, 15 May 2026 12:39:54 +0200 Subject: [PATCH] Add run from agents into the user prompt and fix an issue with classifying it as a system prompt injection --- .../semmle/javascript/frameworks/OpenAI.qll | 17 ++++ .../UserPromptInjectionCustomizations.qll | 2 + .../SystemPromptInjection/agents_test.js | 4 +- .../UserPromptInjection.expected | 90 +++++++++++-------- .../UserPromptInjection/openai_user_test.js | 35 ++++++++ 5 files changed, 107 insertions(+), 41 deletions(-) diff --git a/javascript/ql/src/experimental/semmle/javascript/frameworks/OpenAI.qll b/javascript/ql/src/experimental/semmle/javascript/frameworks/OpenAI.qll index 17bd260a776..157702c3b64 100644 --- a/javascript/ql/src/experimental/semmle/javascript/frameworks/OpenAI.qll +++ b/javascript/ql/src/experimental/semmle/javascript/frameworks/OpenAI.qll @@ -242,6 +242,23 @@ module AgentSDK { ) } + /** + * Gets user prompt sinks for run(agent, input). + * Covers string input and user-role array messages. + */ + API::Node getUserPromptNode() { + // run(agent, "string") — string input is the user prompt + result = run().getParameter(1) + or + // run(agent, [{ role: "user", content: ... }]) + exists(API::Node msg | + msg = run().getParameter(1).getArrayElement() and + not isSystemOrDevMessage(msg) + | + result = msg.getMember("content") + ) + } + /** * Gets an agent constructor config that visibly lacks input guardrails. * Covers both native Agent({ inputGuardrails: [...] }) and diff --git a/javascript/ql/src/experimental/semmle/javascript/security/PromptInjection/UserPromptInjectionCustomizations.qll b/javascript/ql/src/experimental/semmle/javascript/security/PromptInjection/UserPromptInjectionCustomizations.qll index c777f59242a..0de238a41c1 100644 --- a/javascript/ql/src/experimental/semmle/javascript/security/PromptInjection/UserPromptInjectionCustomizations.qll +++ b/javascript/ql/src/experimental/semmle/javascript/security/PromptInjection/UserPromptInjectionCustomizations.qll @@ -63,6 +63,8 @@ module UserPromptInjection { this = Anthropic::getUserPromptNode().asSink() or this = GoogleGenAI::getUserPromptNode().asSink() + or + this = AgentSDK::getUserPromptNode().asSink() } } diff --git a/javascript/ql/test/experimental/Security/CWE-1427/SystemPromptInjection/agents_test.js b/javascript/ql/test/experimental/Security/CWE-1427/SystemPromptInjection/agents_test.js index ce988bcfa11..26f10ce02a5 100644 --- a/javascript/ql/test/experimental/Security/CWE-1427/SystemPromptInjection/agents_test.js +++ b/javascript/ql/test/experimental/Security/CWE-1427/SystemPromptInjection/agents_test.js @@ -63,8 +63,8 @@ app.get("/agents", async (req, res) => { // === run() with string input === - // SHOULD ALERT - string input to run() is used as a prompt - const r1 = await run(agent1, query); // $ Alert[js/prompt-injection] + // SHOULD NOT ALERT - string input to run() is a user prompt, not system prompt + const r1 = await run(agent1, query); // OK - user prompt sink // === run() with array input: system role === diff --git a/javascript/ql/test/experimental/Security/CWE-1427/UserPromptInjection/UserPromptInjection.expected b/javascript/ql/test/experimental/Security/CWE-1427/UserPromptInjection/UserPromptInjection.expected index 91f8df25fd8..c460f0eba06 100644 --- a/javascript/ql/test/experimental/Security/CWE-1427/UserPromptInjection/UserPromptInjection.expected +++ b/javascript/ql/test/experimental/Security/CWE-1427/UserPromptInjection/UserPromptInjection.expected @@ -9,19 +9,23 @@ edges | gemini_user_test.js:8:9:8:17 | userInput | gemini_user_test.js:51:13:51:21 | userInput | provenance | | | gemini_user_test.js:8:9:8:17 | userInput | gemini_user_test.js:58:13:58:21 | userInput | provenance | | | gemini_user_test.js:8:21:8:39 | req.query.userInput | gemini_user_test.js:8:9:8:17 | userInput | provenance | | -| openai_user_test.js:14:9:14:17 | userInput | openai_user_test.js:22:12:22:20 | userInput | provenance | | -| openai_user_test.js:14:9:14:17 | userInput | openai_user_test.js:31:18:31:26 | userInput | provenance | | -| openai_user_test.js:14:9:14:17 | userInput | openai_user_test.js:42:18:42:26 | userInput | provenance | | -| openai_user_test.js:14:9:14:17 | userInput | openai_user_test.js:56:19:56:27 | userInput | provenance | | -| openai_user_test.js:14:9:14:17 | userInput | openai_user_test.js:66:13:66:21 | userInput | provenance | | -| openai_user_test.js:14:9:14:17 | userInput | openai_user_test.js:71:13:71:21 | userInput | provenance | | -| openai_user_test.js:14:9:14:17 | userInput | openai_user_test.js:75:13:75:21 | userInput | provenance | | -| openai_user_test.js:14:9:14:17 | userInput | openai_user_test.js:82:13:82:21 | userInput | provenance | | -| openai_user_test.js:14:9:14:17 | userInput | openai_user_test.js:88:13:88:21 | userInput | provenance | | -| openai_user_test.js:14:9:14:17 | userInput | openai_user_test.js:94:14:94:22 | userInput | provenance | | -| openai_user_test.js:14:9:14:17 | userInput | openai_user_test.js:100:12:100:20 | userInput | provenance | | -| openai_user_test.js:14:9:14:17 | userInput | openai_user_test.js:147:12:147:20 | userInput | provenance | | -| openai_user_test.js:14:21:14:39 | req.query.userInput | openai_user_test.js:14:9:14:17 | userInput | provenance | | +| openai_user_test.js:15:9:15:17 | userInput | openai_user_test.js:23:12:23:20 | userInput | provenance | | +| openai_user_test.js:15:9:15:17 | userInput | openai_user_test.js:32:18:32:26 | userInput | provenance | | +| openai_user_test.js:15:9:15:17 | userInput | openai_user_test.js:43:18:43:26 | userInput | provenance | | +| openai_user_test.js:15:9:15:17 | userInput | openai_user_test.js:57:19:57:27 | userInput | provenance | | +| openai_user_test.js:15:9:15:17 | userInput | openai_user_test.js:67:13:67:21 | userInput | provenance | | +| openai_user_test.js:15:9:15:17 | userInput | openai_user_test.js:72:13:72:21 | userInput | provenance | | +| openai_user_test.js:15:9:15:17 | userInput | openai_user_test.js:76:13:76:21 | userInput | provenance | | +| openai_user_test.js:15:9:15:17 | userInput | openai_user_test.js:83:13:83:21 | userInput | provenance | | +| openai_user_test.js:15:9:15:17 | userInput | openai_user_test.js:89:13:89:21 | userInput | provenance | | +| openai_user_test.js:15:9:15:17 | userInput | openai_user_test.js:95:14:95:22 | userInput | provenance | | +| openai_user_test.js:15:9:15:17 | userInput | openai_user_test.js:101:12:101:20 | userInput | provenance | | +| openai_user_test.js:15:9:15:17 | userInput | openai_user_test.js:148:12:148:20 | userInput | provenance | | +| openai_user_test.js:15:9:15:17 | userInput | openai_user_test.js:192:20:192:28 | userInput | provenance | | +| openai_user_test.js:15:9:15:17 | userInput | openai_user_test.js:196:30:196:38 | userInput | provenance | | +| openai_user_test.js:15:9:15:17 | userInput | openai_user_test.js:201:27:201:35 | userInput | provenance | | +| openai_user_test.js:15:9:15:17 | userInput | openai_user_test.js:205:30:205:38 | userInput | provenance | | +| openai_user_test.js:15:21:15:39 | req.query.userInput | openai_user_test.js:15:9:15:17 | userInput | provenance | | nodes | anthropic_user_test.js:8:9:8:17 | userInput | semmle.label | userInput | | anthropic_user_test.js:8:21:8:39 | req.query.userInput | semmle.label | req.query.userInput | @@ -35,20 +39,24 @@ nodes | gemini_user_test.js:44:13:44:21 | userInput | semmle.label | userInput | | gemini_user_test.js:51:13:51:21 | userInput | semmle.label | userInput | | gemini_user_test.js:58:13:58:21 | userInput | semmle.label | userInput | -| openai_user_test.js:14:9:14:17 | userInput | semmle.label | userInput | -| openai_user_test.js:14:21:14:39 | req.query.userInput | semmle.label | req.query.userInput | -| openai_user_test.js:22:12:22:20 | userInput | semmle.label | userInput | -| openai_user_test.js:31:18:31:26 | userInput | semmle.label | userInput | -| openai_user_test.js:42:18:42:26 | userInput | semmle.label | userInput | -| openai_user_test.js:56:19:56:27 | userInput | semmle.label | userInput | -| openai_user_test.js:66:13:66:21 | userInput | semmle.label | userInput | -| openai_user_test.js:71:13:71:21 | userInput | semmle.label | userInput | -| openai_user_test.js:75:13:75:21 | userInput | semmle.label | userInput | -| openai_user_test.js:82:13:82:21 | userInput | semmle.label | userInput | -| openai_user_test.js:88:13:88:21 | userInput | semmle.label | userInput | -| openai_user_test.js:94:14:94:22 | userInput | semmle.label | userInput | -| openai_user_test.js:100:12:100:20 | userInput | semmle.label | userInput | -| openai_user_test.js:147:12:147:20 | userInput | semmle.label | userInput | +| openai_user_test.js:15:9:15:17 | userInput | semmle.label | userInput | +| openai_user_test.js:15:21:15:39 | req.query.userInput | semmle.label | req.query.userInput | +| openai_user_test.js:23:12:23:20 | userInput | semmle.label | userInput | +| openai_user_test.js:32:18:32:26 | userInput | semmle.label | userInput | +| openai_user_test.js:43:18:43:26 | userInput | semmle.label | userInput | +| openai_user_test.js:57:19:57:27 | userInput | semmle.label | userInput | +| openai_user_test.js:67:13:67:21 | userInput | semmle.label | userInput | +| openai_user_test.js:72:13:72:21 | userInput | semmle.label | userInput | +| openai_user_test.js:76:13:76:21 | userInput | semmle.label | userInput | +| openai_user_test.js:83:13:83:21 | userInput | semmle.label | userInput | +| openai_user_test.js:89:13:89:21 | userInput | semmle.label | userInput | +| openai_user_test.js:95:14:95:22 | userInput | semmle.label | userInput | +| openai_user_test.js:101:12:101:20 | userInput | semmle.label | userInput | +| openai_user_test.js:148:12:148:20 | userInput | semmle.label | userInput | +| openai_user_test.js:192:20:192:28 | userInput | semmle.label | userInput | +| openai_user_test.js:196:30:196:38 | userInput | semmle.label | userInput | +| openai_user_test.js:201:27:201:35 | userInput | semmle.label | userInput | +| openai_user_test.js:205:30:205:38 | userInput | semmle.label | userInput | subpaths #select | anthropic_user_test.js:18:18:18:26 | userInput | anthropic_user_test.js:8:21:8:39 | req.query.userInput | anthropic_user_test.js:18:18:18:26 | userInput | This prompt construction depends on a $@. | anthropic_user_test.js:8:21:8:39 | req.query.userInput | user-provided value | @@ -59,15 +67,19 @@ subpaths | gemini_user_test.js:44:13:44:21 | userInput | gemini_user_test.js:8:21:8:39 | req.query.userInput | gemini_user_test.js:44:13:44:21 | userInput | This prompt construction depends on a $@. | gemini_user_test.js:8:21:8:39 | req.query.userInput | user-provided value | | gemini_user_test.js:51:13:51:21 | userInput | gemini_user_test.js:8:21:8:39 | req.query.userInput | gemini_user_test.js:51:13:51:21 | userInput | This prompt construction depends on a $@. | gemini_user_test.js:8:21:8:39 | req.query.userInput | user-provided value | | gemini_user_test.js:58:13:58:21 | userInput | gemini_user_test.js:8:21:8:39 | req.query.userInput | gemini_user_test.js:58:13:58:21 | userInput | This prompt construction depends on a $@. | gemini_user_test.js:8:21:8:39 | req.query.userInput | user-provided value | -| openai_user_test.js:22:12:22:20 | userInput | openai_user_test.js:14:21:14:39 | req.query.userInput | openai_user_test.js:22:12:22:20 | userInput | This prompt construction depends on a $@. | openai_user_test.js:14:21:14:39 | req.query.userInput | user-provided value | -| openai_user_test.js:31:18:31:26 | userInput | openai_user_test.js:14:21:14:39 | req.query.userInput | openai_user_test.js:31:18:31:26 | userInput | This prompt construction depends on a $@. | openai_user_test.js:14:21:14:39 | req.query.userInput | user-provided value | -| openai_user_test.js:42:18:42:26 | userInput | openai_user_test.js:14:21:14:39 | req.query.userInput | openai_user_test.js:42:18:42:26 | userInput | This prompt construction depends on a $@. | openai_user_test.js:14:21:14:39 | req.query.userInput | user-provided value | -| openai_user_test.js:56:19:56:27 | userInput | openai_user_test.js:14:21:14:39 | req.query.userInput | openai_user_test.js:56:19:56:27 | userInput | This prompt construction depends on a $@. | openai_user_test.js:14:21:14:39 | req.query.userInput | user-provided value | -| openai_user_test.js:66:13:66:21 | userInput | openai_user_test.js:14:21:14:39 | req.query.userInput | openai_user_test.js:66:13:66:21 | userInput | This prompt construction depends on a $@. | openai_user_test.js:14:21:14:39 | req.query.userInput | user-provided value | -| openai_user_test.js:71:13:71:21 | userInput | openai_user_test.js:14:21:14:39 | req.query.userInput | openai_user_test.js:71:13:71:21 | userInput | This prompt construction depends on a $@. | openai_user_test.js:14:21:14:39 | req.query.userInput | user-provided value | -| openai_user_test.js:75:13:75:21 | userInput | openai_user_test.js:14:21:14:39 | req.query.userInput | openai_user_test.js:75:13:75:21 | userInput | This prompt construction depends on a $@. | openai_user_test.js:14:21:14:39 | req.query.userInput | user-provided value | -| openai_user_test.js:82:13:82:21 | userInput | openai_user_test.js:14:21:14:39 | req.query.userInput | openai_user_test.js:82:13:82:21 | userInput | This prompt construction depends on a $@. | openai_user_test.js:14:21:14:39 | req.query.userInput | user-provided value | -| openai_user_test.js:88:13:88:21 | userInput | openai_user_test.js:14:21:14:39 | req.query.userInput | openai_user_test.js:88:13:88:21 | userInput | This prompt construction depends on a $@. | openai_user_test.js:14:21:14:39 | req.query.userInput | user-provided value | -| openai_user_test.js:94:14:94:22 | userInput | openai_user_test.js:14:21:14:39 | req.query.userInput | openai_user_test.js:94:14:94:22 | userInput | This prompt construction depends on a $@. | openai_user_test.js:14:21:14:39 | req.query.userInput | user-provided value | -| openai_user_test.js:100:12:100:20 | userInput | openai_user_test.js:14:21:14:39 | req.query.userInput | openai_user_test.js:100:12:100:20 | userInput | This prompt construction depends on a $@. | openai_user_test.js:14:21:14:39 | req.query.userInput | user-provided value | -| openai_user_test.js:147:12:147:20 | userInput | openai_user_test.js:14:21:14:39 | req.query.userInput | openai_user_test.js:147:12:147:20 | userInput | This prompt construction depends on a $@. | openai_user_test.js:14:21:14:39 | req.query.userInput | user-provided value | +| openai_user_test.js:23:12:23:20 | userInput | openai_user_test.js:15:21:15:39 | req.query.userInput | openai_user_test.js:23:12:23:20 | userInput | This prompt construction depends on a $@. | openai_user_test.js:15:21:15:39 | req.query.userInput | user-provided value | +| openai_user_test.js:32:18:32:26 | userInput | openai_user_test.js:15:21:15:39 | req.query.userInput | openai_user_test.js:32:18:32:26 | userInput | This prompt construction depends on a $@. | openai_user_test.js:15:21:15:39 | req.query.userInput | user-provided value | +| openai_user_test.js:43:18:43:26 | userInput | openai_user_test.js:15:21:15:39 | req.query.userInput | openai_user_test.js:43:18:43:26 | userInput | This prompt construction depends on a $@. | openai_user_test.js:15:21:15:39 | req.query.userInput | user-provided value | +| openai_user_test.js:57:19:57:27 | userInput | openai_user_test.js:15:21:15:39 | req.query.userInput | openai_user_test.js:57:19:57:27 | userInput | This prompt construction depends on a $@. | openai_user_test.js:15:21:15:39 | req.query.userInput | user-provided value | +| openai_user_test.js:67:13:67:21 | userInput | openai_user_test.js:15:21:15:39 | req.query.userInput | openai_user_test.js:67:13:67:21 | userInput | This prompt construction depends on a $@. | openai_user_test.js:15:21:15:39 | req.query.userInput | user-provided value | +| openai_user_test.js:72:13:72:21 | userInput | openai_user_test.js:15:21:15:39 | req.query.userInput | openai_user_test.js:72:13:72:21 | userInput | This prompt construction depends on a $@. | openai_user_test.js:15:21:15:39 | req.query.userInput | user-provided value | +| openai_user_test.js:76:13:76:21 | userInput | openai_user_test.js:15:21:15:39 | req.query.userInput | openai_user_test.js:76:13:76:21 | userInput | This prompt construction depends on a $@. | openai_user_test.js:15:21:15:39 | req.query.userInput | user-provided value | +| openai_user_test.js:83:13:83:21 | userInput | openai_user_test.js:15:21:15:39 | req.query.userInput | openai_user_test.js:83:13:83:21 | userInput | This prompt construction depends on a $@. | openai_user_test.js:15:21:15:39 | req.query.userInput | user-provided value | +| openai_user_test.js:89:13:89:21 | userInput | openai_user_test.js:15:21:15:39 | req.query.userInput | openai_user_test.js:89:13:89:21 | userInput | This prompt construction depends on a $@. | openai_user_test.js:15:21:15:39 | req.query.userInput | user-provided value | +| openai_user_test.js:95:14:95:22 | userInput | openai_user_test.js:15:21:15:39 | req.query.userInput | openai_user_test.js:95:14:95:22 | userInput | This prompt construction depends on a $@. | openai_user_test.js:15:21:15:39 | req.query.userInput | user-provided value | +| openai_user_test.js:101:12:101:20 | userInput | openai_user_test.js:15:21:15:39 | req.query.userInput | openai_user_test.js:101:12:101:20 | userInput | This prompt construction depends on a $@. | openai_user_test.js:15:21:15:39 | req.query.userInput | user-provided value | +| openai_user_test.js:148:12:148:20 | userInput | openai_user_test.js:15:21:15:39 | req.query.userInput | openai_user_test.js:148:12:148:20 | userInput | This prompt construction depends on a $@. | openai_user_test.js:15:21:15:39 | req.query.userInput | user-provided value | +| openai_user_test.js:192:20:192:28 | userInput | openai_user_test.js:15:21:15:39 | req.query.userInput | openai_user_test.js:192:20:192:28 | userInput | This prompt construction depends on a $@. | openai_user_test.js:15:21:15:39 | req.query.userInput | user-provided value | +| openai_user_test.js:196:30:196:38 | userInput | openai_user_test.js:15:21:15:39 | req.query.userInput | openai_user_test.js:196:30:196:38 | userInput | This prompt construction depends on a $@. | openai_user_test.js:15:21:15:39 | req.query.userInput | user-provided value | +| openai_user_test.js:201:27:201:35 | userInput | openai_user_test.js:15:21:15:39 | req.query.userInput | openai_user_test.js:201:27:201:35 | userInput | This prompt construction depends on a $@. | openai_user_test.js:15:21:15:39 | req.query.userInput | user-provided value | +| openai_user_test.js:205:30:205:38 | userInput | openai_user_test.js:15:21:15:39 | req.query.userInput | openai_user_test.js:205:30:205:38 | userInput | This prompt construction depends on a $@. | openai_user_test.js:15:21:15:39 | req.query.userInput | user-provided value | diff --git a/javascript/ql/test/experimental/Security/CWE-1427/UserPromptInjection/openai_user_test.js b/javascript/ql/test/experimental/Security/CWE-1427/UserPromptInjection/openai_user_test.js index 9a28b74f361..94b7409033b 100644 --- a/javascript/ql/test/experimental/Security/CWE-1427/UserPromptInjection/openai_user_test.js +++ b/javascript/ql/test/experimental/Security/CWE-1427/UserPromptInjection/openai_user_test.js @@ -5,6 +5,7 @@ const { GuardrailsOpenAI, GuardrailsAzureOpenAI, } = require("@openai/guardrails"); +const { Agent, run, Runner } = require("@openai/agents"); const app = express(); const client = new OpenAI(); @@ -180,5 +181,39 @@ app.get("/test", async (req, res) => { ], }); + // === Agent SDK: run() user prompt sinks (SHOULD ALERT) === + + const agent = new Agent({ + name: "Assistant", + instructions: "You are a helpful assistant", + }); + + // run() with string input (user prompt) + await run(agent, userInput); // $ Alert[js/user-prompt-injection] + + // run() with user-role array message + await run(agent, [ + { role: "user", content: userInput }, // $ Alert[js/user-prompt-injection] + ]); + + // Runner instance with string input + const runner = new Runner(); + await runner.run(agent, userInput); // $ Alert[js/user-prompt-injection] + + // Runner instance with user-role array message + await runner.run(agent, [ + { role: "user", content: userInput }, // $ Alert[js/user-prompt-injection] + ]); + + // === Agent SDK: system/developer role in run() (SHOULD NOT ALERT for user-prompt) === + + await run(agent, [ + { role: "system", content: userInput }, // OK for user-prompt-injection (system prompt sink) + ]); + + await run(agent, [ + { role: "developer", content: userInput }, // OK for user-prompt-injection (system prompt sink) + ]); + res.send("done"); });