From 09bf2218d4f26f5fdf45118ea4c87893cc1eba7f Mon Sep 17 00:00:00 2001 From: tiferet Date: Tue, 6 Dec 2022 11:22:36 -0800 Subject: [PATCH] Merge in `aeisenberg/atm-codex` --- .../EndpointScoring.qll | 85 ++++++++++++------- .../PromptConfiguration.qll | 22 +++++ .../src/SqlInjectionATM.ql | 1 + 3 files changed, 75 insertions(+), 33 deletions(-) create mode 100644 javascript/ql/experimental/adaptivethreatmodeling/lib/experimental/adaptivethreatmodeling/PromptConfiguration.qll diff --git a/javascript/ql/experimental/adaptivethreatmodeling/lib/experimental/adaptivethreatmodeling/EndpointScoring.qll b/javascript/ql/experimental/adaptivethreatmodeling/lib/experimental/adaptivethreatmodeling/EndpointScoring.qll index 6746c06db7b..9913a2fb21a 100644 --- a/javascript/ql/experimental/adaptivethreatmodeling/lib/experimental/adaptivethreatmodeling/EndpointScoring.qll +++ b/javascript/ql/experimental/adaptivethreatmodeling/lib/experimental/adaptivethreatmodeling/EndpointScoring.qll @@ -7,38 +7,60 @@ private import javascript private import BaseScoring private import EndpointFeatures as EndpointFeatures -private import FeaturizationConfig +private import PromptConfiguration private import EndpointTypes private string getACompatibleModelChecksum() { availableMlModels(result, "javascript", _, "atm-endpoint-scoring") } +// class RelevantFeaturizationConfig extends FeaturizationConfig { +// RelevantFeaturizationConfig() { this = "RelevantFeaturization" } +// override DataFlow::Node getAnEndpointToFeaturize() { +// getCfg().isEffectiveSource(result) and any(DataFlow::Configuration cfg).hasFlow(result, _) +// or +// getCfg().isEffectiveSink(result) and any(DataFlow::Configuration cfg).hasFlow(_, result) +// } +// } module ModelScoring { - /** - * A featurization config that only featurizes new candidate endpoints that are part of a flow - * path. - */ - class RelevantFeaturizationConfig extends FeaturizationConfig { - RelevantFeaturizationConfig() { this = "RelevantFeaturization" } - - override DataFlow::Node getAnEndpointToFeaturize() { - getCfg().isEffectiveSource(result) and any(DataFlow::Configuration cfg).hasFlow(result, _) - or - getCfg().isEffectiveSink(result) and any(DataFlow::Configuration cfg).hasFlow(_, result) - } + predicate getARequestedEndpoint(DataFlow::Node node, string prompt) { + exists(PromptConfiguration cfg | + cfg.getPrompt(node) = prompt and cfg.getAnEndpointToFeaturize() = node + ) } - DataFlow::Node getARequestedEndpoint() { - result = any(FeaturizationConfig cfg).getAnEndpointToFeaturize() + predicate endpointScores(DataFlow::Node endpoint, int encodedEndpointType, float score) { + internalEnpointScores(endpoint, mapEndpointType(encodedEndpointType)) and + mapScore(score, mapEndpointType(encodedEndpointType)) } - private int getARequestedEndpointType() { result = any(EndpointType type).getEncoding() } + predicate internalEnpointScores(DataFlow::Node endpoint, string endpointType) = + remoteScoreEndpoints(getARequestedEndpoint/2)(endpoint, endpointType) - predicate endpointScores(DataFlow::Node endpoint, int encodedEndpointType, float score) = - scoreEndpoints(getARequestedEndpoint/0, EndpointFeatures::tokenFeatures/3, - EndpointFeatures::getASupportedFeatureName/0, getARequestedEndpointType/0, - getACompatibleModelChecksum/0)(endpoint, encodedEndpointType, score) + private string mapEndpointType(int encodedEndpointType) { + result = "no sink" and encodedEndpointType = 0 + or + result = "xss sink" and encodedEndpointType = 1 + or + result = "nosql sink" and encodedEndpointType = 2 + or + result = "sql sink" and encodedEndpointType = 3 + or + result = "tainted path sink" and encodedEndpointType = 4 + } + + private predicate mapScore(float score, string endpointType) { + ( + ( + endpointType = "xss sink" or + endpointType = "nosql sink" or + endpointType = "sql sink" or + endpointType = "tainted path sink" + ) and + score = 1.0 + ) + /*or (score = 0.0 and any(endpointType)) */ + } } /** @@ -138,16 +160,13 @@ class EndpointScoringResults extends ScoringResults { ) } } - -module Debugging { - query predicate hopInputEndpoints(DataFlow::Node endpoint) { - endpoint = ModelScoring::getARequestedEndpoint() - } - - query predicate endpointScores = ModelScoring::endpointScores/3; - - query predicate shouldResultBeIncluded(DataFlow::Node source, DataFlow::Node sink) { - any(ScoringResults scoringResults).shouldResultBeIncluded(source, sink) and - any(DataFlow::Configuration cfg).hasFlow(source, sink) - } -} +// module Debugging { +// query predicate hopInputEndpoints(DataFlow::Node endpoint) { +// endpoint = ModelScoring::getARequestedEndpoint() +// } +// query predicate endpointScores = ModelScoring::endpointScores/3; +// query predicate shouldResultBeIncluded(DataFlow::Node source, DataFlow::Node sink) { +// any(ScoringResults scoringResults).shouldResultBeIncluded(source, sink) and +// any(DataFlow::Configuration cfg).hasFlow(source, sink) +// } +// } diff --git a/javascript/ql/experimental/adaptivethreatmodeling/lib/experimental/adaptivethreatmodeling/PromptConfiguration.qll b/javascript/ql/experimental/adaptivethreatmodeling/lib/experimental/adaptivethreatmodeling/PromptConfiguration.qll new file mode 100644 index 00000000000..29159f4a58b --- /dev/null +++ b/javascript/ql/experimental/adaptivethreatmodeling/lib/experimental/adaptivethreatmodeling/PromptConfiguration.qll @@ -0,0 +1,22 @@ +import javascript +private import BaseScoring +import FeaturizationConfig + +class PromptConfiguration extends FeaturizationConfig { + PromptConfiguration() { this = "PromptConfiguration" } + + // abstract predicate getANodeAndPrompt(DataFlow::Node node, string prompt); + string getPrompt(DataFlow::Node node) { + result = + "# Examples of security vulnerability sinks and non-sinks\n|Dataflow node|Neighborhood|Classification|\n|---|---|---|\n| `bid` | `const body = ` | xss sink |\n| `nick` | `irc.me = nick; irc.nick(nick); irc.user(username, realname);` | non-sink || `hash` | `componentDidMount() { const [, hash] = location.href.split(#) this.setState({ hash }) }` | `" + + extractString(node) + "` | " + } + + string extractString(DataFlow::Node node) { result = node.getStringValue() } + + override DataFlow::Node getAnEndpointToFeaturize() { + getCfg().isEffectiveSource(result) and any(DataFlow::Configuration cfg).hasFlow(result, _) + or + getCfg().isEffectiveSink(result) and any(DataFlow::Configuration cfg).hasFlow(_, result) + } +} diff --git a/javascript/ql/experimental/adaptivethreatmodeling/src/SqlInjectionATM.ql b/javascript/ql/experimental/adaptivethreatmodeling/src/SqlInjectionATM.ql index fdeb79de145..f3ab8531743 100644 --- a/javascript/ql/experimental/adaptivethreatmodeling/src/SqlInjectionATM.ql +++ b/javascript/ql/experimental/adaptivethreatmodeling/src/SqlInjectionATM.ql @@ -16,6 +16,7 @@ import experimental.adaptivethreatmodeling.SqlInjectionATM import ATM::ResultsInfo import DataFlow::PathGraph +import experimental.adaptivethreatmodeling.PromptConfiguration from AtmConfig cfg, DataFlow::PathNode source, DataFlow::PathNode sink, float score where cfg.hasBoostedFlowPath(source, sink, score)