From 1919206e1ef41ef60381209584389e72cd39718d Mon Sep 17 00:00:00 2001 From: tiferet Date: Mon, 28 Nov 2022 16:52:24 -0800 Subject: [PATCH] Start adding the codex prompt feature --- .../EndpointFeatures.qll | 102 ++++++++++++++++++ 1 file changed, 102 insertions(+) diff --git a/javascript/ql/experimental/adaptivethreatmodeling/lib/experimental/adaptivethreatmodeling/EndpointFeatures.qll b/javascript/ql/experimental/adaptivethreatmodeling/lib/experimental/adaptivethreatmodeling/EndpointFeatures.qll index a75bba0e370..950a1ae1bc2 100644 --- a/javascript/ql/experimental/adaptivethreatmodeling/lib/experimental/adaptivethreatmodeling/EndpointFeatures.qll +++ b/javascript/ql/experimental/adaptivethreatmodeling/lib/experimental/adaptivethreatmodeling/EndpointFeatures.qll @@ -7,6 +7,15 @@ import javascript private import FeaturizationConfig private import FunctionBodyFeatures as FunctionBodyFeatures +private import EndpointTypes as EndpointTypes +private import semmle.javascript.security.dataflow.NosqlInjectionCustomizations as NosqlInjectionCustomizations +private import semmle.javascript.security.dataflow.SqlInjectionCustomizations as SqlInjectionCustomizations +private import semmle.javascript.security.dataflow.TaintedPathCustomizations as TaintedPathCustomizations +private import semmle.javascript.security.dataflow.DomBasedXssCustomizations as DomBasedXssCustomizations +private import experimental.adaptivethreatmodeling.NosqlInjectionATM as NosqlInjectionAtm +private import experimental.adaptivethreatmodeling.SqlInjectionATM as SqlInjectionAtm +private import experimental.adaptivethreatmodeling.TaintedPathATM as TaintedPathAtm +private import experimental.adaptivethreatmodeling.XssATM as XssAtm /** * Gets the value of the token-based feature named `featureName` for the endpoint `endpoint`. @@ -90,6 +99,7 @@ predicate tokenFeatures(DataFlow::Node endpoint, string featureName, string feat * See EndpointFeature */ private newtype TEndpointFeature = + TCodexPrompt() or TEnclosingFunctionName() or TReceiverName() or TEnclosingFunctionBody() or @@ -122,6 +132,98 @@ abstract class EndpointFeature extends TEndpointFeature { string toString() { result = this.getName() } } +/** + * The codex promt for this endpoint. + */ +class CodexPrompt extends EndpointFeature, TCodexPrompt { + override string getName() { result = "codexPrompt" } + + override string getValue(DataFlow::Node endpoint) { + result = getTrainingSetPrompt() + getCurrentEndpointPrompt(endpoint) + } + + /** + * Gets the beginning of the prompt, which contains the training examples, shuffled in random order. + * TODO + */ + private string getTrainingSetPrompt() { + result = hardFPExamplesForCodexPrompt() + //+ hardTPExamplesForCodexPrompt(2, ) + } + + /** + * Gets the last line of the prompt, containing the current endpoint. + * TODO + */ + private string getCurrentEndpointPrompt(DataFlow::Node endpoint) { + result = "Endpoint: " + endpoint.asExpr().toString() + } + + /** + * We can find hard TP examples for the codex prompt by extracting sinks that are found by the classical queries but + * filtered by the endpoint filters. + */ + predicate hardTPExamples( + DataFlow::Node endpoint, EndpointTypes::EndpointType sinkType, string reason + ) { + sinkType instanceof EndpointTypes::NosqlInjectionSinkType and + endpoint instanceof NosqlInjectionCustomizations::NosqlInjection::Sink and + reason = NosqlInjectionAtm::SinkEndpointFilter::getAReasonSinkExcluded(endpoint) + or + sinkType instanceof EndpointTypes::SqlInjectionSinkType and + endpoint instanceof SqlInjectionCustomizations::SqlInjection::Sink and + reason = SqlInjectionAtm::SinkEndpointFilter::getAReasonSinkExcluded(endpoint) + or + sinkType instanceof EndpointTypes::TaintedPathSinkType and + endpoint instanceof TaintedPathCustomizations::TaintedPath::Sink and + reason = TaintedPathAtm::SinkEndpointFilter::getAReasonSinkExcluded(endpoint) + or + sinkType instanceof EndpointTypes::XssSinkType and + endpoint instanceof DomBasedXssCustomizations::DomBasedXss::Sink and + reason = XssAtm::SinkEndpointFilter::getAReasonSinkExcluded(endpoint) + } + + /** + * Hardcode some hard FP examples for each query from the manual triage of the model shipped in 0.4.0. + */ + predicate hardFPExamples(DataFlow::Node endpoint, EndpointTypes::EndpointType sinkType) { + sinkType instanceof EndpointTypes::NosqlInjectionSinkType + or + // and + // TODO: How do I hardcode a dataflow node? + sinkType instanceof EndpointTypes::SqlInjectionSinkType + or + // and + sinkType instanceof EndpointTypes::TaintedPathSinkType + or + // and + sinkType instanceof EndpointTypes::XssSinkType + // and + } + + /** + * Select the specified number of hard TP examples for the codex prompt for each query, using only one example per + * reason. + */ + bindingset[numExamples] + predicate hardTPExamplesForCodexPrompt( + int numExamples, DataFlow::Node endpoint, EndpointTypes::EndpointType sinkType, string reason + ) { + hardTPExamples(endpoint, sinkType, reason) + // and + // TODO + } + + /** + * Select the specified number of hard FP examples for the codex prompt for each query. + * TODO + */ + private string hardFPExamplesForCodexPrompt() { + result = + "# Examples of security vulnerability sinks and non-sinks\n|Dataflow node|Neighborhood|Classification|\n|---|---|---|\n|`m[9] ? m[10] : null`|` this.authority = m[5] ? m[6] : null; this.path = m[7]; this.query = m[9] ? m[10] : null; this.fragment = m[12] ? m[13] : null; return this;`|non-sink|\n|`this.flowRunId`|` variables: { input: { flow_run_id: this.flowRunId, name: e }`|non-sink|\n|`req.body.firstName`|` res.json({ firstName: req.body.firstName, lastName: req.body.lastName, email: req.body.email`|non-sink|\n|`lang[1]`|` if (lang) { document.getElementsByTagName('html')[0].setAttribute('lang', lang[1]); }`|non-sink|\n|`token`|` }, }); tokenProvider.saveNewToken(token).then(ok => { insights.trackEvent({ name: 'ReposCreateTokenFinish',`|non-sink|\n|`filename`|`function sendFile(filename, response) { response.setHeader('Content-Type', mime.lookup(filename)); response.writeHead(200); const fileStream = createReadStream(filename);`|non-sink|\n|`year`|` postsData = await getPostsDateArchive( postType, !isNaN(parseInt(year, 10)) ? parseInt(year, 10) : null, !isNaN(parseInt(month, 10)) ? parseInt(month, 10) : null, !isNaN(parseInt(day, 10)) ? parseInt(day, 10) : null,`|non-sink|\n|`redirectTo === 'login' ? {redirectTo: to.path,} : to.query`|` return next({ name: redirectTo, query: redirectTo === 'login' ? { redirectTo: to.path, } : to.query, }); }`|non-sink|\n" + } +} + /** * The feature for the name of the function that encloses the endpoint. */