From 4b6d1f7b78564d6b419d1451931e92edf0201bbb Mon Sep 17 00:00:00 2001 From: tiferet Date: Mon, 6 Feb 2023 13:31:57 -0800 Subject: [PATCH] Create a new class `other sink`: See https://github.com/github/atm-codex/pull/3 - Add a sink type `OtherMaDSinkType`, and corresponding characteristic `OtherMaDSinkCharacteristic`, for other sinks modeled by a MaD `kind` but not belonging to any of the existing sink types. - Extract positive prompt examples for the new sink type, together with the corresponding MaD `kind`. --- .../EndpointCharacteristics.qll | 39 ++++++++++++------- .../adaptivethreatmodeling/EndpointTypes.qll | 20 +++++++--- .../src/ExtractPositiveExamples.ql | 31 ++++++++++----- 3 files changed, 60 insertions(+), 30 deletions(-) diff --git a/java/ql/experimental/adaptivethreatmodeling/lib/experimental/adaptivethreatmodeling/EndpointCharacteristics.qll b/java/ql/experimental/adaptivethreatmodeling/lib/experimental/adaptivethreatmodeling/EndpointCharacteristics.qll index d4426ef5bdb..70f829313c8 100644 --- a/java/ql/experimental/adaptivethreatmodeling/lib/experimental/adaptivethreatmodeling/EndpointCharacteristics.qll +++ b/java/ql/experimental/adaptivethreatmodeling/lib/experimental/adaptivethreatmodeling/EndpointCharacteristics.qll @@ -410,21 +410,30 @@ private class RequestForgeryOtherSinkCharacteristic extends EndpointCharacterist } } -// /** -// * Endpoints identified as "NosqlInjectionSink" by the standard Java libraries are NoSQL injection sinks with -// * maximal confidence. -// */ -// private class NosqlInjectionSinkCharacteristic extends EndpointCharacteristic { -// NosqlInjectionSinkCharacteristic() { this = any(NosqlInjectionSinkType type).getDescription() } -// override predicate appliesToEndpoint(DataFlow::Node n) { n instanceof NosqlInjection::Sink } -// override predicate hasImplications( -// EndpointType endpointClass, boolean isPositiveIndicator, float confidence -// ) { -// endpointClass instanceof NosqlInjectionSinkType and -// isPositiveIndicator = true and -// confidence = maximalConfidence() -// } -// } +/** + * Endpoints identified by one of the MaD `kind`s that don't belong to any of the existing endpoint types. + */ +class OtherMaDSinkCharacteristic extends EndpointCharacteristic { + OtherMaDSinkCharacteristic() { this = any(OtherMaDSinkType type).getDescription() } + + override predicate appliesToEndpoint(DataFlow::Node n) { + exists(string kind | sinkNode(n, kind) | not kind = any(EndpointType type).getKind()) + } + + override predicate hasImplications( + EndpointType endpointClass, boolean isPositiveIndicator, float confidence + ) { + endpointClass instanceof OtherMaDSinkType and + isPositiveIndicator = true and + confidence = maximalConfidence() + } + + predicate appliesToEndpoint(DataFlow::Node n, string kind) { + sinkNode(n, kind) and + not kind = any(EndpointType type).getKind() + } +} + /* * Characteristics that are indicative of not being a sink of any type, and have historically been used to select * negative samples for training. diff --git a/java/ql/experimental/adaptivethreatmodeling/lib/experimental/adaptivethreatmodeling/EndpointTypes.qll b/java/ql/experimental/adaptivethreatmodeling/lib/experimental/adaptivethreatmodeling/EndpointTypes.qll index 264cf5e0a9a..6632c5a69c5 100644 --- a/java/ql/experimental/adaptivethreatmodeling/lib/experimental/adaptivethreatmodeling/EndpointTypes.qll +++ b/java/ql/experimental/adaptivethreatmodeling/lib/experimental/adaptivethreatmodeling/EndpointTypes.qll @@ -15,7 +15,8 @@ newtype TEndpointType = TUrlOpenSinkType() or TJdbcUrlSinkType() or TCreateFileSinkType() or - TSqlSinkType() + TSqlSinkType() or + TOtherMaDSinkType() /** A class that can be predicted by endpoint scoring models. */ abstract class EndpointType extends TEndpointType { @@ -56,7 +57,7 @@ class SqlSinkType extends EndpointType, TSqlSinkType { override string getKind() { result = "sql" } } -/** Other SQL injection sinks that are not yet included in the MaD sink kinds. */ +/** Other SQL injection sinks that are not yet included in the MaD sink `kind`s. */ class SqlInjectionOtherSinkType extends EndpointType, TSqlInjectionOtherSinkType { override string getDescription() { result = "java persistence or mongodb or other query injection sink" @@ -76,7 +77,7 @@ class CreateFileSinkType extends EndpointType, TCreateFileSinkType { override string getKind() { result = "create-file" } } -/** Other tainted path injection sinks that are not yet included in the MaD sink kinds. */ +/** Other tainted path injection sinks that are not yet included in the MaD sink `kind`s. */ class TaintedPathOtherSinkType extends EndpointType, TTaintedPathOtherSinkType { override string getDescription() { result = "other path injection sink" } @@ -103,9 +104,7 @@ class JdbcUrlSinkType extends EndpointType, TJdbcUrlSinkType { override string getKind() { result = "jdbc-url" } } -/** - * Other SSRF sinks that are not yet included in the MaD sink kinds. - */ +/** Other SSRF sinks that are not yet included in the MaD sink `kind`s. */ class RequestForgeryOtherSinkType extends EndpointType, TRequestForgeryOtherSinkType { override string getDescription() { result = "other server-side request forgery sink" } @@ -113,3 +112,12 @@ class RequestForgeryOtherSinkType extends EndpointType, TRequestForgeryOtherSink override string getKind() { result = "ssrf-other" } } + +/** Other sinks modeled by a MaD `kind` but not belonging to any of the existing sink types. */ +class OtherMaDSinkType extends EndpointType, TOtherMaDSinkType { + override string getDescription() { result = "other sink" } + + override int getEncoding() { result = 8 } + + override string getKind() { result = "other-sink" } +} diff --git a/java/ql/experimental/adaptivethreatmodeling/src/ExtractPositiveExamples.ql b/java/ql/experimental/adaptivethreatmodeling/src/ExtractPositiveExamples.ql index 494b674b348..39cd9ec4d39 100644 --- a/java/ql/experimental/adaptivethreatmodeling/src/ExtractPositiveExamples.ql +++ b/java/ql/experimental/adaptivethreatmodeling/src/ExtractPositiveExamples.ql @@ -24,16 +24,29 @@ private import experimental.adaptivethreatmodeling.RequestForgeryATM as RequestF from DataFlow::Node sink, AtmConfig::AtmConfig config, EndpointType sinkType, string message where - config.isKnownSink(sink, sinkType) and // If there are _any_ erroneous endpoints, return nothing. This will prevent us from accidentally running this query // when there's a codex-generated data extension file in `java/ql/lib/ext`. not EndpointCharacteristics::erroneousEndpoints(_, _, _, _, _) and - // It's valid for a node to satisfy the logic for both `isSink` and `isSanitizer`, but in that case it will be - // treated by the actual query as a sanitizer, since the final logic is something like - // `isSink(n) and not isSanitizer(n)`. We don't want to include such nodes as positive examples in the prompt. - not config.isSanitizer(sink) and - message = - sinkType.getDescription() + "\n" + - // Extract the needed metadata for this endpoint. - any(string metadata | EndpointCharacteristics::hasMetadata(sink, metadata)) + // Extract positive examples of sinks belonging to the existing ATM query configurations. + ( + config.isKnownSink(sink, sinkType) and + // It's valid for a node to satisfy the logic for both `isSink` and `isSanitizer`, but in that case it will be + // treated by the actual query as a sanitizer, since the final logic is something like + // `isSink(n) and not isSanitizer(n)`. We don't want to include such nodes as positive examples in the prompt. + not config.isSanitizer(sink) and + message = + sinkType.getDescription() + "\n" + + // Extract the needed metadata for this endpoint. + any(string metadata | EndpointCharacteristics::hasMetadata(sink, metadata)) + ) + or + // Extract positive examples of sinks belonging to other MaD `kind`s for sink types / queries we have not yet modeled + // in an ATM query configuration. + exists(string kind, EndpointCharacteristics::OtherMaDSinkCharacteristic charecteristic | + charecteristic.appliesToEndpoint(sink, kind) and + message = + charecteristic + "\n" + kind + "\n" + + // Extract the needed metadata for this endpoint. + any(string metadata | EndpointCharacteristics::hasMetadata(sink, metadata)) + ) select sink, message