mirror of
https://github.com/github/codeql.git
synced 2026-06-03 12:50:16 +02:00
Start adding the codex prompt feature
This commit is contained in:
@@ -7,6 +7,15 @@
|
||||
import javascript
|
||||
private import FeaturizationConfig
|
||||
private import FunctionBodyFeatures as FunctionBodyFeatures
|
||||
private import EndpointTypes as EndpointTypes
|
||||
private import semmle.javascript.security.dataflow.NosqlInjectionCustomizations as NosqlInjectionCustomizations
|
||||
private import semmle.javascript.security.dataflow.SqlInjectionCustomizations as SqlInjectionCustomizations
|
||||
private import semmle.javascript.security.dataflow.TaintedPathCustomizations as TaintedPathCustomizations
|
||||
private import semmle.javascript.security.dataflow.DomBasedXssCustomizations as DomBasedXssCustomizations
|
||||
private import experimental.adaptivethreatmodeling.NosqlInjectionATM as NosqlInjectionAtm
|
||||
private import experimental.adaptivethreatmodeling.SqlInjectionATM as SqlInjectionAtm
|
||||
private import experimental.adaptivethreatmodeling.TaintedPathATM as TaintedPathAtm
|
||||
private import experimental.adaptivethreatmodeling.XssATM as XssAtm
|
||||
|
||||
/**
|
||||
* Gets the value of the token-based feature named `featureName` for the endpoint `endpoint`.
|
||||
@@ -90,6 +99,7 @@ predicate tokenFeatures(DataFlow::Node endpoint, string featureName, string feat
|
||||
* See EndpointFeature
|
||||
*/
|
||||
private newtype TEndpointFeature =
|
||||
TCodexPrompt() or
|
||||
TEnclosingFunctionName() or
|
||||
TReceiverName() or
|
||||
TEnclosingFunctionBody() or
|
||||
@@ -122,6 +132,98 @@ abstract class EndpointFeature extends TEndpointFeature {
|
||||
string toString() { result = this.getName() }
|
||||
}
|
||||
|
||||
/**
|
||||
* The codex promt for this endpoint.
|
||||
*/
|
||||
class CodexPrompt extends EndpointFeature, TCodexPrompt {
|
||||
override string getName() { result = "codexPrompt" }
|
||||
|
||||
override string getValue(DataFlow::Node endpoint) {
|
||||
result = getTrainingSetPrompt() + getCurrentEndpointPrompt(endpoint)
|
||||
}
|
||||
|
||||
/**
|
||||
* Gets the beginning of the prompt, which contains the training examples, shuffled in random order.
|
||||
* TODO
|
||||
*/
|
||||
private string getTrainingSetPrompt() {
|
||||
result = hardFPExamplesForCodexPrompt()
|
||||
//+ hardTPExamplesForCodexPrompt(2, )
|
||||
}
|
||||
|
||||
/**
|
||||
* Gets the last line of the prompt, containing the current endpoint.
|
||||
* TODO
|
||||
*/
|
||||
private string getCurrentEndpointPrompt(DataFlow::Node endpoint) {
|
||||
result = "Endpoint: " + endpoint.asExpr().toString()
|
||||
}
|
||||
|
||||
/**
|
||||
* We can find hard TP examples for the codex prompt by extracting sinks that are found by the classical queries but
|
||||
* filtered by the endpoint filters.
|
||||
*/
|
||||
predicate hardTPExamples(
|
||||
DataFlow::Node endpoint, EndpointTypes::EndpointType sinkType, string reason
|
||||
) {
|
||||
sinkType instanceof EndpointTypes::NosqlInjectionSinkType and
|
||||
endpoint instanceof NosqlInjectionCustomizations::NosqlInjection::Sink and
|
||||
reason = NosqlInjectionAtm::SinkEndpointFilter::getAReasonSinkExcluded(endpoint)
|
||||
or
|
||||
sinkType instanceof EndpointTypes::SqlInjectionSinkType and
|
||||
endpoint instanceof SqlInjectionCustomizations::SqlInjection::Sink and
|
||||
reason = SqlInjectionAtm::SinkEndpointFilter::getAReasonSinkExcluded(endpoint)
|
||||
or
|
||||
sinkType instanceof EndpointTypes::TaintedPathSinkType and
|
||||
endpoint instanceof TaintedPathCustomizations::TaintedPath::Sink and
|
||||
reason = TaintedPathAtm::SinkEndpointFilter::getAReasonSinkExcluded(endpoint)
|
||||
or
|
||||
sinkType instanceof EndpointTypes::XssSinkType and
|
||||
endpoint instanceof DomBasedXssCustomizations::DomBasedXss::Sink and
|
||||
reason = XssAtm::SinkEndpointFilter::getAReasonSinkExcluded(endpoint)
|
||||
}
|
||||
|
||||
/**
|
||||
* Hardcode some hard FP examples for each query from the manual triage of the model shipped in 0.4.0.
|
||||
*/
|
||||
predicate hardFPExamples(DataFlow::Node endpoint, EndpointTypes::EndpointType sinkType) {
|
||||
sinkType instanceof EndpointTypes::NosqlInjectionSinkType
|
||||
or
|
||||
// and
|
||||
// TODO: How do I hardcode a dataflow node?
|
||||
sinkType instanceof EndpointTypes::SqlInjectionSinkType
|
||||
or
|
||||
// and
|
||||
sinkType instanceof EndpointTypes::TaintedPathSinkType
|
||||
or
|
||||
// and
|
||||
sinkType instanceof EndpointTypes::XssSinkType
|
||||
// and
|
||||
}
|
||||
|
||||
/**
|
||||
* Select the specified number of hard TP examples for the codex prompt for each query, using only one example per
|
||||
* reason.
|
||||
*/
|
||||
bindingset[numExamples]
|
||||
predicate hardTPExamplesForCodexPrompt(
|
||||
int numExamples, DataFlow::Node endpoint, EndpointTypes::EndpointType sinkType, string reason
|
||||
) {
|
||||
hardTPExamples(endpoint, sinkType, reason)
|
||||
// and
|
||||
// TODO
|
||||
}
|
||||
|
||||
/**
|
||||
* Select the specified number of hard FP examples for the codex prompt for each query.
|
||||
* TODO
|
||||
*/
|
||||
private string hardFPExamplesForCodexPrompt() {
|
||||
result =
|
||||
"# Examples of security vulnerability sinks and non-sinks\n|Dataflow node|Neighborhood|Classification|\n|---|---|---|\n|`m[9] ? m[10] : null`|` this.authority = m[5] ? m[6] : null; this.path = m[7]; this.query = m[9] ? m[10] : null; this.fragment = m[12] ? m[13] : null; return this;`|non-sink|\n|`this.flowRunId`|` variables: { input: { flow_run_id: this.flowRunId, name: e }`|non-sink|\n|`req.body.firstName`|` res.json({ firstName: req.body.firstName, lastName: req.body.lastName, email: req.body.email`|non-sink|\n|`lang[1]`|` if (lang) { document.getElementsByTagName('html')[0].setAttribute('lang', lang[1]); }`|non-sink|\n|`token`|` }, }); tokenProvider.saveNewToken(token).then(ok => { insights.trackEvent({ name: 'ReposCreateTokenFinish',`|non-sink|\n|`filename`|`function sendFile(filename, response) { response.setHeader('Content-Type', mime.lookup(filename)); response.writeHead(200); const fileStream = createReadStream(filename);`|non-sink|\n|`year`|` postsData = await getPostsDateArchive( postType, !isNaN(parseInt(year, 10)) ? parseInt(year, 10) : null, !isNaN(parseInt(month, 10)) ? parseInt(month, 10) : null, !isNaN(parseInt(day, 10)) ? parseInt(day, 10) : null,`|non-sink|\n|`redirectTo === 'login' ? {redirectTo: to.path,} : to.query`|` return next({ name: redirectTo, query: redirectTo === 'login' ? { redirectTo: to.path, } : to.query, }); }`|non-sink|\n"
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* The feature for the name of the function that encloses the endpoint.
|
||||
*/
|
||||
|
||||
Reference in New Issue
Block a user