Start adding the codex prompt feature

This commit is contained in:
tiferet
2022-11-28 16:52:24 -08:00
parent ad13f5585d
commit 1919206e1e

View File

@@ -7,6 +7,15 @@
import javascript
private import FeaturizationConfig
private import FunctionBodyFeatures as FunctionBodyFeatures
private import EndpointTypes as EndpointTypes
private import semmle.javascript.security.dataflow.NosqlInjectionCustomizations as NosqlInjectionCustomizations
private import semmle.javascript.security.dataflow.SqlInjectionCustomizations as SqlInjectionCustomizations
private import semmle.javascript.security.dataflow.TaintedPathCustomizations as TaintedPathCustomizations
private import semmle.javascript.security.dataflow.DomBasedXssCustomizations as DomBasedXssCustomizations
private import experimental.adaptivethreatmodeling.NosqlInjectionATM as NosqlInjectionAtm
private import experimental.adaptivethreatmodeling.SqlInjectionATM as SqlInjectionAtm
private import experimental.adaptivethreatmodeling.TaintedPathATM as TaintedPathAtm
private import experimental.adaptivethreatmodeling.XssATM as XssAtm
/**
* Gets the value of the token-based feature named `featureName` for the endpoint `endpoint`.
@@ -90,6 +99,7 @@ predicate tokenFeatures(DataFlow::Node endpoint, string featureName, string feat
* See EndpointFeature
*/
private newtype TEndpointFeature =
TCodexPrompt() or
TEnclosingFunctionName() or
TReceiverName() or
TEnclosingFunctionBody() or
@@ -122,6 +132,98 @@ abstract class EndpointFeature extends TEndpointFeature {
string toString() { result = this.getName() }
}
/**
* The codex promt for this endpoint.
*/
class CodexPrompt extends EndpointFeature, TCodexPrompt {
override string getName() { result = "codexPrompt" }
override string getValue(DataFlow::Node endpoint) {
result = getTrainingSetPrompt() + getCurrentEndpointPrompt(endpoint)
}
/**
* Gets the beginning of the prompt, which contains the training examples, shuffled in random order.
* TODO
*/
private string getTrainingSetPrompt() {
result = hardFPExamplesForCodexPrompt()
//+ hardTPExamplesForCodexPrompt(2, )
}
/**
* Gets the last line of the prompt, containing the current endpoint.
* TODO
*/
private string getCurrentEndpointPrompt(DataFlow::Node endpoint) {
result = "Endpoint: " + endpoint.asExpr().toString()
}
/**
* We can find hard TP examples for the codex prompt by extracting sinks that are found by the classical queries but
* filtered by the endpoint filters.
*/
predicate hardTPExamples(
DataFlow::Node endpoint, EndpointTypes::EndpointType sinkType, string reason
) {
sinkType instanceof EndpointTypes::NosqlInjectionSinkType and
endpoint instanceof NosqlInjectionCustomizations::NosqlInjection::Sink and
reason = NosqlInjectionAtm::SinkEndpointFilter::getAReasonSinkExcluded(endpoint)
or
sinkType instanceof EndpointTypes::SqlInjectionSinkType and
endpoint instanceof SqlInjectionCustomizations::SqlInjection::Sink and
reason = SqlInjectionAtm::SinkEndpointFilter::getAReasonSinkExcluded(endpoint)
or
sinkType instanceof EndpointTypes::TaintedPathSinkType and
endpoint instanceof TaintedPathCustomizations::TaintedPath::Sink and
reason = TaintedPathAtm::SinkEndpointFilter::getAReasonSinkExcluded(endpoint)
or
sinkType instanceof EndpointTypes::XssSinkType and
endpoint instanceof DomBasedXssCustomizations::DomBasedXss::Sink and
reason = XssAtm::SinkEndpointFilter::getAReasonSinkExcluded(endpoint)
}
/**
* Hardcode some hard FP examples for each query from the manual triage of the model shipped in 0.4.0.
*/
predicate hardFPExamples(DataFlow::Node endpoint, EndpointTypes::EndpointType sinkType) {
sinkType instanceof EndpointTypes::NosqlInjectionSinkType
or
// and
// TODO: How do I hardcode a dataflow node?
sinkType instanceof EndpointTypes::SqlInjectionSinkType
or
// and
sinkType instanceof EndpointTypes::TaintedPathSinkType
or
// and
sinkType instanceof EndpointTypes::XssSinkType
// and
}
/**
* Select the specified number of hard TP examples for the codex prompt for each query, using only one example per
* reason.
*/
bindingset[numExamples]
predicate hardTPExamplesForCodexPrompt(
int numExamples, DataFlow::Node endpoint, EndpointTypes::EndpointType sinkType, string reason
) {
hardTPExamples(endpoint, sinkType, reason)
// and
// TODO
}
/**
* Select the specified number of hard FP examples for the codex prompt for each query.
* TODO
*/
private string hardFPExamplesForCodexPrompt() {
result =
"# Examples of security vulnerability sinks and non-sinks\n|Dataflow node|Neighborhood|Classification|\n|---|---|---|\n|`m[9] ? m[10] : null`|` this.authority = m[5] ? m[6] : null; this.path = m[7]; this.query = m[9] ? m[10] : null; this.fragment = m[12] ? m[13] : null; return this;`|non-sink|\n|`this.flowRunId`|` variables: { input: { flow_run_id: this.flowRunId, name: e }`|non-sink|\n|`req.body.firstName`|` res.json({ firstName: req.body.firstName, lastName: req.body.lastName, email: req.body.email`|non-sink|\n|`lang[1]`|` if (lang) { document.getElementsByTagName('html')[0].setAttribute('lang', lang[1]); }`|non-sink|\n|`token`|` }, }); tokenProvider.saveNewToken(token).then(ok => { insights.trackEvent({ name: 'ReposCreateTokenFinish',`|non-sink|\n|`filename`|`function sendFile(filename, response) { response.setHeader('Content-Type', mime.lookup(filename)); response.writeHead(200); const fileStream = createReadStream(filename);`|non-sink|\n|`year`|` postsData = await getPostsDateArchive( postType, !isNaN(parseInt(year, 10)) ? parseInt(year, 10) : null, !isNaN(parseInt(month, 10)) ? parseInt(month, 10) : null, !isNaN(parseInt(day, 10)) ? parseInt(day, 10) : null,`|non-sink|\n|`redirectTo === 'login' ? {redirectTo: to.path,} : to.query`|` return next({ name: redirectTo, query: redirectTo === 'login' ? { redirectTo: to.path, } : to.query, }); }`|non-sink|\n"
}
}
/**
* The feature for the name of the function that encloses the endpoint.
*/