From 8f6db6b244f7b2a377897bf74dc387efc66181c9 Mon Sep 17 00:00:00 2001 From: tiferet Date: Mon, 13 Feb 2023 14:35:14 -0800 Subject: [PATCH] Switch back to one sink type per supported query, rather than existing MaD `kind`s. --- .../EndpointCharacteristics.qll | 102 +++++++----------- .../adaptivethreatmodeling/EndpointTypes.qll | 68 +++--------- .../RequestForgeryATM.qll | 6 +- .../SqlInjectionATM.qll | 2 +- .../adaptivethreatmodeling/TaintedPathATM.qll | 4 +- .../src/ExtractPositiveExamples.ql | 10 -- .../src/ExtractSinkCandidatesWithFlow.ql | 15 +-- 7 files changed, 61 insertions(+), 146 deletions(-) diff --git a/java/ql/experimental/adaptivethreatmodeling/lib/experimental/adaptivethreatmodeling/EndpointCharacteristics.qll b/java/ql/experimental/adaptivethreatmodeling/lib/experimental/adaptivethreatmodeling/EndpointCharacteristics.qll index 887a31494fc..0b13c7bac95 100644 --- a/java/ql/experimental/adaptivethreatmodeling/lib/experimental/adaptivethreatmodeling/EndpointCharacteristics.qll +++ b/java/ql/experimental/adaptivethreatmodeling/lib/experimental/adaptivethreatmodeling/EndpointCharacteristics.qll @@ -43,9 +43,10 @@ predicate erroneousEndpoints( not ( characteristic instanceof IsSanitizerCharacteristic or characteristic2 instanceof IsSanitizerCharacteristic - ) and - // We currently know of some sink types that overlap with one another. - not knownOverlappingCharacteristics(characteristic, characteristic2) + ) + // and + // // We currently know of some sink types that overlap with one another. + // not knownOverlappingCharacteristics(characteristic, characteristic2) ) and errorMessage = "Endpoint has high-confidence positive indicators for multiple classes" or @@ -70,20 +71,19 @@ predicate erroneousConfidences( errorMessage = "Characteristic has an indicator with confidence outside of [0, 1]" } -/** - * Holds if `characteristic1` and `characteristic2` are among the pairs of currently known positive characteristics that - * have some overlap in their known sinks (always for the same query type). This is not necessarily a problem, because - * both characteristics belong to the same query. - */ -private predicate knownOverlappingCharacteristics( - EndpointCharacteristics::EndpointCharacteristic characteristic1, - EndpointCharacteristics::EndpointCharacteristic characteristic2 -) { - characteristic1 != characteristic2 and - characteristic1 = ["file creation sink", "other path injection sink"] and - characteristic2 = ["file creation sink", "other path injection sink"] -} - +// /** +// * Holds if `characteristic1` and `characteristic2` are among the pairs of currently known positive characteristics that +// * have some overlap in their known sinks (always for the same query type). This is not necessarily a problem, because +// * both characteristics belong to the same query. +// */ +// private predicate knownOverlappingCharacteristics( +// EndpointCharacteristics::EndpointCharacteristic characteristic1, +// EndpointCharacteristics::EndpointCharacteristic characteristic2 +// ) { +// characteristic1 != characteristic2 and +// characteristic1 = ["file creation sink", "other path injection sink"] and +// characteristic2 = ["file creation sink", "other path injection sink"] +// } predicate isTypeAccess(DataFlow::Node n) { n.asExpr() instanceof TypeAccess } /** @@ -282,28 +282,28 @@ abstract class EndpointCharacteristic extends string { // } // } /** - * Endpoints identified as "create-file" sinks by the MaD modeling are file creation sinks with maximal confidence. + * Endpoints identified as "create-file" sinks by the MaD modeling are tainted path sinks with maximal confidence. */ private class CreateFileSinkCharacteristic extends EndpointCharacteristic { - CreateFileSinkCharacteristic() { this = any(CreateFileSinkType type).getDescription() } + CreateFileSinkCharacteristic() { this = "create file" } override predicate appliesToEndpoint(DataFlow::Node n) { sinkNode(n, "create-file") } override predicate hasImplications( EndpointType endpointClass, boolean isPositiveIndicator, float confidence ) { - endpointClass instanceof CreateFileSinkType and + endpointClass instanceof TaintedPathSinkType and isPositiveIndicator = true and confidence = maximalConfidence() } } /** - * Endpoints identified as "TaintedPathSink" by the standard Java libraries are path injection sinks with maximal + * Endpoints identified as "PathCreation" by the standard Java libraries are path injection sinks with maximal * confidence. */ -private class TaintedPathSinkCharacteristic extends EndpointCharacteristic { - TaintedPathSinkCharacteristic() { this = any(TaintedPathOtherSinkType type).getDescription() } +private class CreatePathSinkCharacteristic extends EndpointCharacteristic { + CreatePathSinkCharacteristic() { this = "create path" } override predicate appliesToEndpoint(DataFlow::Node n) { n.asExpr() = any(PathCreation p).getAnInput() @@ -312,7 +312,7 @@ private class TaintedPathSinkCharacteristic extends EndpointCharacteristic { override predicate hasImplications( EndpointType endpointClass, boolean isPositiveIndicator, float confidence ) { - endpointClass instanceof TaintedPathOtherSinkType and + endpointClass instanceof TaintedPathSinkType and isPositiveIndicator = true and confidence = maximalConfidence() } @@ -321,8 +321,8 @@ private class TaintedPathSinkCharacteristic extends EndpointCharacteristic { /** * Endpoints identified as "sql" sinks by the MaD modeling are SQL sinks with maximal confidence. */ -private class SqlSinkCharacteristic extends EndpointCharacteristic { - SqlSinkCharacteristic() { this = any(SqlSinkType type).getDescription() } +private class SqlMaDSinkCharacteristic extends EndpointCharacteristic { + SqlMaDSinkCharacteristic() { this = "mad modeled sql" } override predicate appliesToEndpoint(DataFlow::Node n) { sinkNode(n, "sql") } @@ -340,7 +340,7 @@ private class SqlSinkCharacteristic extends EndpointCharacteristic { * confidence. */ private class SqlInjectionSinkCharacteristic extends EndpointCharacteristic { - SqlInjectionSinkCharacteristic() { this = any(SqlInjectionOtherSinkType type).getDescription() } + SqlInjectionSinkCharacteristic() { this = "other modeled sql" } override predicate appliesToEndpoint(DataFlow::Node n) { n instanceof QueryInjectionSink and @@ -350,54 +350,52 @@ private class SqlInjectionSinkCharacteristic extends EndpointCharacteristic { override predicate hasImplications( EndpointType endpointClass, boolean isPositiveIndicator, float confidence ) { - endpointClass instanceof SqlInjectionOtherSinkType and + endpointClass instanceof SqlSinkType and isPositiveIndicator = true and confidence = maximalConfidence() } } /** - * Endpoints identified as "open-url" sinks by the MaD modeling are URL opening sinks with maximal confidence. + * Endpoints identified as "open-url" sinks by the MaD modeling are SSRF sinks with maximal confidence. */ private class UrlOpenSinkCharacteristic extends EndpointCharacteristic { - UrlOpenSinkCharacteristic() { this = any(UrlOpenSinkType type).getDescription() } + UrlOpenSinkCharacteristic() { this = "open url" } override predicate appliesToEndpoint(DataFlow::Node n) { sinkNode(n, "open-url") } override predicate hasImplications( EndpointType endpointClass, boolean isPositiveIndicator, float confidence ) { - endpointClass instanceof UrlOpenSinkType and + endpointClass instanceof RequestForgerySinkType and isPositiveIndicator = true and confidence = maximalConfidence() } } /** - * Endpoints identified as "jdbc-url" sinks by the MaD modeling are JDBC URL opening sinks with maximal confidence. + * Endpoints identified as "jdbc-url" sinks by the MaD modeling are SSRF sinks with maximal confidence. */ private class JdbcUrlSinkCharacteristic extends EndpointCharacteristic { - JdbcUrlSinkCharacteristic() { this = any(JdbcUrlSinkType type).getDescription() } + JdbcUrlSinkCharacteristic() { this = "jdbc url" } override predicate appliesToEndpoint(DataFlow::Node n) { sinkNode(n, "jdbc-url") } override predicate hasImplications( EndpointType endpointClass, boolean isPositiveIndicator, float confidence ) { - endpointClass instanceof JdbcUrlSinkType and + endpointClass instanceof RequestForgerySinkType and isPositiveIndicator = true and confidence = maximalConfidence() } } /** - * Endpoints identified as "RequestForgerySink" by the standard Java libraries but not by MaD models are server-side - * request forgery sinks with maximal confidence. + * Endpoints identified as "RequestForgerySink" by the standard Java libraries but not by MaD models are SSRF sinks with + * maximal confidence. */ private class RequestForgeryOtherSinkCharacteristic extends EndpointCharacteristic { - RequestForgeryOtherSinkCharacteristic() { - this = any(RequestForgeryOtherSinkType type).getDescription() - } + RequestForgeryOtherSinkCharacteristic() { this = "request forgery" } override predicate appliesToEndpoint(DataFlow::Node n) { n instanceof RequestForgerySink and @@ -408,36 +406,12 @@ private class RequestForgeryOtherSinkCharacteristic extends EndpointCharacterist override predicate hasImplications( EndpointType endpointClass, boolean isPositiveIndicator, float confidence ) { - endpointClass instanceof RequestForgeryOtherSinkType and + endpointClass instanceof RequestForgerySinkType and isPositiveIndicator = true and confidence = maximalConfidence() } } -/** - * Endpoints identified by one of the MaD `kind`s that don't belong to any of the existing endpoint types. - */ -class OtherMaDSinkCharacteristic extends EndpointCharacteristic { - OtherMaDSinkCharacteristic() { this = any(OtherMaDSinkType type).getDescription() } - - override predicate appliesToEndpoint(DataFlow::Node n) { - exists(string kind | sinkNode(n, kind) | not kind = any(EndpointType type).getKind()) - } - - override predicate hasImplications( - EndpointType endpointClass, boolean isPositiveIndicator, float confidence - ) { - endpointClass instanceof OtherMaDSinkType and - isPositiveIndicator = true and - confidence = maximalConfidence() - } - - predicate appliesToEndpoint(DataFlow::Node n, string kind) { - sinkNode(n, kind) and - not kind = any(EndpointType type).getKind() - } -} - /* * Characteristics that are indicative of not being a sink of any type, and have historically been used to select * negative samples for training. diff --git a/java/ql/experimental/adaptivethreatmodeling/lib/experimental/adaptivethreatmodeling/EndpointTypes.qll b/java/ql/experimental/adaptivethreatmodeling/lib/experimental/adaptivethreatmodeling/EndpointTypes.qll index 6632c5a69c5..acb63761bf8 100644 --- a/java/ql/experimental/adaptivethreatmodeling/lib/experimental/adaptivethreatmodeling/EndpointTypes.qll +++ b/java/ql/experimental/adaptivethreatmodeling/lib/experimental/adaptivethreatmodeling/EndpointTypes.qll @@ -7,15 +7,9 @@ */ newtype TEndpointType = TNegativeType() or - TXssSinkType() or - TNosqlInjectionSinkType() or - TSqlInjectionOtherSinkType() or - TTaintedPathOtherSinkType() or - TRequestForgeryOtherSinkType() or - TUrlOpenSinkType() or - TJdbcUrlSinkType() or - TCreateFileSinkType() or TSqlSinkType() or + TTaintedPathSinkType() or + TRequestForgerySinkType() or TOtherMaDSinkType() /** A class that can be predicted by endpoint scoring models. */ @@ -48,7 +42,7 @@ class NegativeType extends EndpointType, TNegativeType { override string getKind() { result = "" } } -/** Sinks of MaD kind `sql` */ +/** All sinks relevant to the SQL injection query */ class SqlSinkType extends EndpointType, TSqlSinkType { override string getDescription() { result = "sql injection sink" } @@ -57,67 +51,29 @@ class SqlSinkType extends EndpointType, TSqlSinkType { override string getKind() { result = "sql" } } -/** Other SQL injection sinks that are not yet included in the MaD sink `kind`s. */ -class SqlInjectionOtherSinkType extends EndpointType, TSqlInjectionOtherSinkType { - override string getDescription() { - result = "java persistence or mongodb or other query injection sink" - } +/** All sinks relevant to the tainted path injection query. */ +class TaintedPathSinkType extends EndpointType, TTaintedPathSinkType { + override string getDescription() { result = "path injection sink" } override int getEncoding() { result = 2 } - override string getKind() { result = "sql-other" } + override string getKind() { result = "tainted-path" } } -/** Sinks of MaD kind `create-file` */ -class CreateFileSinkType extends EndpointType, TCreateFileSinkType { - override string getDescription() { result = "file creation sink" } +/** All sinks relevant to the SSRF query. */ +class RequestForgerySinkType extends EndpointType, TRequestForgerySinkType { + override string getDescription() { result = "ssrf sink" } override int getEncoding() { result = 3 } - override string getKind() { result = "create-file" } -} - -/** Other tainted path injection sinks that are not yet included in the MaD sink `kind`s. */ -class TaintedPathOtherSinkType extends EndpointType, TTaintedPathOtherSinkType { - override string getDescription() { result = "other path injection sink" } - - override int getEncoding() { result = 4 } - - override string getKind() { result = "tainted-path-other" } -} - -/** Sinks of MaD kind `open-url`. */ -class UrlOpenSinkType extends EndpointType, TUrlOpenSinkType { - override string getDescription() { result = "url opening sink" } - - override int getEncoding() { result = 5 } - - override string getKind() { result = "open-url" } -} - -/** Sinks of MaD kind `jdbc-url`. */ -class JdbcUrlSinkType extends EndpointType, TJdbcUrlSinkType { - override string getDescription() { result = "jdbc url sink" } // TODO: What's a good description of this sink type? - - override int getEncoding() { result = 6 } - - override string getKind() { result = "jdbc-url" } -} - -/** Other SSRF sinks that are not yet included in the MaD sink `kind`s. */ -class RequestForgeryOtherSinkType extends EndpointType, TRequestForgeryOtherSinkType { - override string getDescription() { result = "other server-side request forgery sink" } - - override int getEncoding() { result = 7 } - - override string getKind() { result = "ssrf-other" } + override string getKind() { result = "ssrf" } } /** Other sinks modeled by a MaD `kind` but not belonging to any of the existing sink types. */ class OtherMaDSinkType extends EndpointType, TOtherMaDSinkType { override string getDescription() { result = "other sink" } - override int getEncoding() { result = 8 } + override int getEncoding() { result = 4 } override string getKind() { result = "other-sink" } } diff --git a/java/ql/experimental/adaptivethreatmodeling/lib/experimental/adaptivethreatmodeling/RequestForgeryATM.qll b/java/ql/experimental/adaptivethreatmodeling/lib/experimental/adaptivethreatmodeling/RequestForgeryATM.qll index fce3612491e..2ce83096433 100644 --- a/java/ql/experimental/adaptivethreatmodeling/lib/experimental/adaptivethreatmodeling/RequestForgeryATM.qll +++ b/java/ql/experimental/adaptivethreatmodeling/lib/experimental/adaptivethreatmodeling/RequestForgeryATM.qll @@ -23,11 +23,7 @@ class RequestForgeryAtmConfig extends AtmConfig { not source.asExpr().(MethodAccess).getCallee() instanceof UrlConnectionGetInputStreamMethod } - override EndpointType getASinkEndpointType() { - result instanceof RequestForgeryOtherSinkType or - result instanceof UrlOpenSinkType or - result instanceof JdbcUrlSinkType - } + override EndpointType getASinkEndpointType() { result instanceof RequestForgerySinkType } /* * This is largely a copy of the taint tracking configuration for the standard SSRF diff --git a/java/ql/experimental/adaptivethreatmodeling/lib/experimental/adaptivethreatmodeling/SqlInjectionATM.qll b/java/ql/experimental/adaptivethreatmodeling/lib/experimental/adaptivethreatmodeling/SqlInjectionATM.qll index 0cc43f3370a..628cd9d0f50 100644 --- a/java/ql/experimental/adaptivethreatmodeling/lib/experimental/adaptivethreatmodeling/SqlInjectionATM.qll +++ b/java/ql/experimental/adaptivethreatmodeling/lib/experimental/adaptivethreatmodeling/SqlInjectionATM.qll @@ -16,7 +16,7 @@ class SqlInjectionAtmConfig extends AtmConfig { override predicate isKnownSource(DataFlow::Node source) { source instanceof RemoteFlowSource } override EndpointType getASinkEndpointType() { - result instanceof SqlInjectionOtherSinkType or result instanceof SqlSinkType + result instanceof SqlSinkType or result instanceof SqlSinkType } /* diff --git a/java/ql/experimental/adaptivethreatmodeling/lib/experimental/adaptivethreatmodeling/TaintedPathATM.qll b/java/ql/experimental/adaptivethreatmodeling/lib/experimental/adaptivethreatmodeling/TaintedPathATM.qll index 479bbecdbe9..d216c0c9567 100644 --- a/java/ql/experimental/adaptivethreatmodeling/lib/experimental/adaptivethreatmodeling/TaintedPathATM.qll +++ b/java/ql/experimental/adaptivethreatmodeling/lib/experimental/adaptivethreatmodeling/TaintedPathATM.qll @@ -16,9 +16,7 @@ class TaintedPathAtmConfig extends AtmConfig { override predicate isKnownSource(DataFlow::Node source) { source instanceof RemoteFlowSource } - override EndpointType getASinkEndpointType() { - result instanceof TaintedPathOtherSinkType or result instanceof CreateFileSinkType - } + override EndpointType getASinkEndpointType() { result instanceof TaintedPathSinkType } /* * This is largely a copy of the taint tracking configuration for the standard path injection diff --git a/java/ql/experimental/adaptivethreatmodeling/src/ExtractPositiveExamples.ql b/java/ql/experimental/adaptivethreatmodeling/src/ExtractPositiveExamples.ql index 39cd9ec4d39..c35d997f920 100644 --- a/java/ql/experimental/adaptivethreatmodeling/src/ExtractPositiveExamples.ql +++ b/java/ql/experimental/adaptivethreatmodeling/src/ExtractPositiveExamples.ql @@ -39,14 +39,4 @@ where // Extract the needed metadata for this endpoint. any(string metadata | EndpointCharacteristics::hasMetadata(sink, metadata)) ) - or - // Extract positive examples of sinks belonging to other MaD `kind`s for sink types / queries we have not yet modeled - // in an ATM query configuration. - exists(string kind, EndpointCharacteristics::OtherMaDSinkCharacteristic charecteristic | - charecteristic.appliesToEndpoint(sink, kind) and - message = - charecteristic + "\n" + kind + "\n" + - // Extract the needed metadata for this endpoint. - any(string metadata | EndpointCharacteristics::hasMetadata(sink, metadata)) - ) select sink, message diff --git a/java/ql/experimental/adaptivethreatmodeling/src/ExtractSinkCandidatesWithFlow.ql b/java/ql/experimental/adaptivethreatmodeling/src/ExtractSinkCandidatesWithFlow.ql index 43ff479e1c3..b9fe965623b 100644 --- a/java/ql/experimental/adaptivethreatmodeling/src/ExtractSinkCandidatesWithFlow.ql +++ b/java/ql/experimental/adaptivethreatmodeling/src/ExtractSinkCandidatesWithFlow.ql @@ -23,13 +23,14 @@ private import experimental.adaptivethreatmodeling.RequestForgeryATM as RequestF from DataFlow::Node sink, string message where - // If a node is already a know MaD sink for any of our queries, we don't include it as a candidate. Otherwise, we might - // include it as a candidate for query A, but the model will label it as a sink for one of the sink types of query B, - // for which it's already a known sink. This would result in overlap between our detected sinks and the pre-existing - // modeling. We assume that, if a sink has already been modeled in a MaD model, then it doesn't belong to any - // additional sink types. - not exists(AtmConfig::AtmConfig config, EndpointType sinkType | - sinkType = config.getASinkEndpointType() and sinkNode(sink, sinkType.getKind()) + // If a node is already a known sink for any of our existing ATM queries and is already modeled as a MaD sink, we + // don't include it as a candidate. Otherwise, we might include it as a candidate for query A, but the model will + // label it as a sink for one of the sink types of query B, for which it's already a known sink. This would result in + // overlap between our detected sinks and the pre-existing modeling. We assume that, if a sink has already been + // modeled in a MaD model, then it doesn't belong to any additional sink types, and we don't need to reexamine it. + not exists(AtmConfig::AtmConfig config, string kind | + config.isKnownSink(sink) and + sinkNode(sink, kind) ) and // The message is the concatenation of all relevant configs, and we surface only sinks that have at least one relevant // config.