From 2bbd010db1d6831919652d90b3b2b32aa36cf974 Mon Sep 17 00:00:00 2001 From: tiferet Date: Thu, 1 Dec 2022 17:10:34 -0800 Subject: [PATCH] Start work on extracting the "training" part of the prompt from positive and negative examples in the current repo. --- .../EndpointFeatures.qll | 104 ++++++++++++------ 1 file changed, 73 insertions(+), 31 deletions(-) diff --git a/javascript/ql/experimental/adaptivethreatmodeling/lib/experimental/adaptivethreatmodeling/EndpointFeatures.qll b/javascript/ql/experimental/adaptivethreatmodeling/lib/experimental/adaptivethreatmodeling/EndpointFeatures.qll index dbf79c60505..a0beaef6460 100644 --- a/javascript/ql/experimental/adaptivethreatmodeling/lib/experimental/adaptivethreatmodeling/EndpointFeatures.qll +++ b/javascript/ql/experimental/adaptivethreatmodeling/lib/experimental/adaptivethreatmodeling/EndpointFeatures.qll @@ -16,6 +16,7 @@ private import experimental.adaptivethreatmodeling.NosqlInjectionATM as NosqlInj private import experimental.adaptivethreatmodeling.SqlInjectionATM as SqlInjectionAtm private import experimental.adaptivethreatmodeling.TaintedPathATM as TaintedPathAtm private import experimental.adaptivethreatmodeling.XssATM as XssAtm +private import experimental.adaptivethreatmodeling.EndpointCharacteristics as EndpointCharacteristics /** * Gets the value of the token-based feature named `featureName` for the endpoint `endpoint`. @@ -149,7 +150,7 @@ class CodexPrompt extends EndpointFeature, TCodexPrompt { private string getTrainingSetPrompt() { result = "# Examples of security vulnerability sinks and non-sinks\n|Dataflow node|Neighborhood|Classification|\n|---|---|---|\n|`refStr`|` const refStr = pageRef.gen === 0 ? ${pageRef.num}R : ${pageRef.num}R${pageRef.gen}; this.#pagesRefCache.set(refStr, pageNum); }`|non-sink|\n|`name`|` return res.redirect(back); } const ndb = req.mainClient.client.db(name); ndb.createCollection(delete_me, function (err) {`|sql injection sink|\n|`It took + (t1 - t0) + msec to create + count + +className+ instances programmatically.`|` } var t1 = new Date().getTime(); dojo.byId(results).innerHTML = It took + (t1 - t0) + msec to create + count + +className+ instances programmatically.; } dojo.ready(makeEm);`|non-sink|\n|`contents`|` const contents = fileData.buffer.toString(); res.json({message: contents});});`|non-sink|\n|`{ _id }`|` // If an ObjectID was correctly created from passed id param, try getting the ObjID first else falling back to try getting the string id // If not valid ObjectID created, try getting string id req.collection.findOne({ _id }, function (err, doc) { if (err) { req.session.error = Error: + err;`|nosql injection sink|\n|`bid`|` const body = `|xss sink|\n|`nick`|` irc.me = nick; irc.nick(nick); irc.user(username, realname);`|non-sink|\n|`{where: {name: req.body.type}}`|` if(req.is(json)) { models.VisualizationType.find({ where: { name: req.body.type } }).then(function(vizType) { if(!vizType) { throw new Error(Unknown Viztype);`|nosql injection sink|\n|`sql`|` var callback = cb; var dbService = this.getService(connectionName); dbService.execute(sql, params, function(err, result) { if (err) { return callback(err);`|sql injection sink|\n|`filename`|` const writeStream = gfs.createWriteStream({ _id: newFileID, filename, mode: w, content_type: mimetype,`|path injection sink|\n|`req.url.substr(7)`|` `|xss sink|\n|`assets/images/ + req.files.upload_file.name`|` res.send({ success: true, file_path: assets/images/ + req.files.upload_file.name }); });`|path injection sink|\n|`path`|`async function handleListingRequest({query: {path, search}}, res) { try { assert(path); const fileSet = await getListing(root, path);`|non-sink|\n|`{ lastLoginIp: lastLoginIp }`|` } models.User.findByPk(loggedInUser.data.id).then(user => { user.update({ lastLoginIp: lastLoginIp }).then(user => { res.json(user) }).catch(error => {`|non-sink|\n|`uploadId`|` Bucket: config.bucket, Key: key, UploadId: uploadId, MultipartUpload: { Parts: parts,`|non-sink|\n|`hash`|` componentDidMount() { const [, hash] = location.href.split(#) this.setState({ hash }) }`|non-sink|\n" - //hardFPExamplesForCodexPrompt() + hardTPExamplesForCodexPrompt(2, ) + //hardNegativeExamplesForCodexPrompt() + hardPositiveExamplesForCodexPrompt(2, ) } /** @@ -162,65 +163,106 @@ class CodexPrompt extends EndpointFeature, TCodexPrompt { } /** - * We can find hard TP examples for the codex prompt by extracting sinks that are found by the classical queries but - * filtered by the endpoint filters. + * Holds if `endpoint` is a sink for a security vulnerability of type `sinkType`, where the string used to label this + * sink type is `sinkName`. */ - private predicate hardTPExamples( - DataFlow::Node endpoint, EndpointTypes::EndpointType sinkType, string reason + private predicate isPositiveExampleFromCurrentRepo( + DataFlow::Node endpoint, EndpointTypes::EndpointType sinkType, string sinkName ) { sinkType instanceof EndpointTypes::NosqlInjectionSinkType and endpoint instanceof NosqlInjectionCustomizations::NosqlInjection::Sink and - reason = NosqlInjectionAtm::SinkEndpointFilter::getAReasonSinkExcluded(endpoint) + sinkName = "nosql injection sink" or sinkType instanceof EndpointTypes::SqlInjectionSinkType and endpoint instanceof SqlInjectionCustomizations::SqlInjection::Sink and - reason = SqlInjectionAtm::SinkEndpointFilter::getAReasonSinkExcluded(endpoint) + sinkName = "sql injection sink" or sinkType instanceof EndpointTypes::TaintedPathSinkType and endpoint instanceof TaintedPathCustomizations::TaintedPath::Sink and - reason = TaintedPathAtm::SinkEndpointFilter::getAReasonSinkExcluded(endpoint) + sinkName = "path injection sink" or sinkType instanceof EndpointTypes::XssSinkType and endpoint instanceof DomBasedXssCustomizations::DomBasedXss::Sink and - reason = XssAtm::SinkEndpointFilter::getAReasonSinkExcluded(endpoint) + sinkName = "xss sink" } /** - * Hardcode some hard FP examples for each query from the manual triage of the model shipped in 0.4.0. + * Holds if `endpoint` is a not a sink for any type of security vulnerability for the reason specified by + * `characteristic`. */ - private predicate hardFPExamples(DataFlow::Node endpoint, EndpointTypes::EndpointType sinkType) { - sinkType instanceof EndpointTypes::NosqlInjectionSinkType - or - // and - // TODO: How do I hardcode a dataflow node? - sinkType instanceof EndpointTypes::SqlInjectionSinkType - or - // and - sinkType instanceof EndpointTypes::TaintedPathSinkType - or - // and - sinkType instanceof EndpointTypes::XssSinkType - // and + private predicate isNegativeExampleFromCurrentRepo( + DataFlow::Node endpoint, EndpointCharacteristics::EndpointCharacteristic characteristic + ) { + characteristic.appliesToEndpoint(endpoint) and + exists(float confidence | + characteristic + .hasImplications(any(EndpointTypes::NegativeType negativeClass), true, confidence) and + confidence >= characteristic.highConfidence() + ) + } + + private predicate selectTwoPositiveExamples( + DataFlow::Node endpoint, EndpointTypes::EndpointType sinkType, string sinkName, File file + ) { + this.positiveExamplesForCodexPrompt(2, endpoint, sinkType, sinkName) and + file = endpoint.getFile() } /** - * Select the specified number of hard TP examples for the codex prompt for each query, using only one example per - * reason. + * Select `numExamples` positive examples for the codex prompt for each query, selecting from a diverse set + * of files. */ bindingset[numExamples] - private predicate hardTPExamplesForCodexPrompt( - int numExamples, DataFlow::Node endpoint, EndpointTypes::EndpointType sinkType, string reason + private predicate positiveExamplesForCodexPrompt( + int numExamples, DataFlow::Node endpoint, EndpointTypes::EndpointType sinkType, string sinkName ) { - this.hardTPExamples(endpoint, sinkType, reason) + this.isPositiveExampleFromCurrentRepo(endpoint, sinkType, sinkName) and + // There is no previous positive example of the same type in the same file. + not exists(DataFlow::Node endpoint2 | + this.isPositiveExampleFromCurrentRepo(endpoint2, sinkType, sinkName) and + endpoint.getFile() = endpoint2.getFile() and + ( + endpoint.getStartLine() > endpoint2.getStartLine() + or + endpoint.getStartLine() = endpoint2.getStartLine() and + endpoint.getStartColumn() > endpoint2.getStartColumn() + ) + ) // and - // TODO + // exists(int rankIndex, File file | + // rankIndex <= numExamples and rankIndex > 0 and + // file = endpoint.asExpr().getLocation().getFile() and not exists(int lowerRankIndex | | ) + // endpoint = rank[rankIndex](string file, int a, int b, int c, int d | + // | + // endpoint order by sinkName, file + // ) + // // and + // // r % (1 / rate).ceil() = 0 + // ) + // exists(int r | r <= numExamples and endpoint = rank[r](string file, int a, int b, int c, int d | + // endpoint.asExpr().getLocation().hasLocationInfo(file, a, b, c, d) + // | + // endpoint order by sinkName, file, a, b, c, d + // ) + // // and + // // r % (1 / rate).ceil() = 0 + // ) + // // select `numExamples` examples for each query + // count( | | 1) <= numExamples and + // // select examples from a diverse set of files + // not exists(string file | + // file = endpoint.getLocation().getFile().getRelativePath() and + // file = any(EndpointTypes::EndpointType t).getAFileWithPositiveExample(t) + // ) + // // and + // // TODO } /** - * Select the specified number of hard FP examples for the codex prompt for each query. + * Select `numExamples` negative examples for the codex prompt, selecting from a diverse set of characteristics. * TODO */ - private string hardFPExamplesForCodexPrompt() { + private string hardNegativeExamplesForCodexPrompt() { result = "# Examples of security vulnerability sinks and non-sinks\n|Dataflow node|Neighborhood|Classification|\n|---|---|---|\n|`m[9] ? m[10] : null`|` this.authority = m[5] ? m[6] : null; this.path = m[7]; this.query = m[9] ? m[10] : null; this.fragment = m[12] ? m[13] : null; return this;`|non-sink|\n|`this.flowRunId`|` variables: { input: { flow_run_id: this.flowRunId, name: e }`|non-sink|\n|`req.body.firstName`|` res.json({ firstName: req.body.firstName, lastName: req.body.lastName, email: req.body.email`|non-sink|\n|`lang[1]`|` if (lang) { document.getElementsByTagName('html')[0].setAttribute('lang', lang[1]); }`|non-sink|\n|`token`|` }, }); tokenProvider.saveNewToken(token).then(ok => { insights.trackEvent({ name: 'ReposCreateTokenFinish',`|non-sink|\n|`filename`|`function sendFile(filename, response) { response.setHeader('Content-Type', mime.lookup(filename)); response.writeHead(200); const fileStream = createReadStream(filename);`|non-sink|\n|`year`|` postsData = await getPostsDateArchive( postType, !isNaN(parseInt(year, 10)) ? parseInt(year, 10) : null, !isNaN(parseInt(month, 10)) ? parseInt(month, 10) : null, !isNaN(parseInt(day, 10)) ? parseInt(day, 10) : null,`|non-sink|\n|`redirectTo === 'login' ? {redirectTo: to.path,} : to.query`|` return next({ name: redirectTo, query: redirectTo === 'login' ? { redirectTo: to.path, } : to.query, }); }`|non-sink|\n" }