From a9cbc1229bb759a5e215b882f318600b76e20e7a Mon Sep 17 00:00:00 2001 From: Stephan Brandauer Date: Mon, 14 Aug 2023 11:46:40 +0200 Subject: [PATCH] Java: Automodel application mode: include candidates that are useful for regression testing --- ...AutomodelApplicationModeExtractCandidates.ql | 16 +++++++++++----- ...delApplicationModeExtractPositiveExamples.ql | 2 +- .../AutomodelSharedCharacteristics.qll | 17 ++++++++++------- ...delApplicationModeExtractCandidates.expected | 7 ++++--- .../Test.java | 3 +-- 5 files changed, 27 insertions(+), 18 deletions(-) diff --git a/java/ql/src/Telemetry/AutomodelApplicationModeExtractCandidates.ql b/java/ql/src/Telemetry/AutomodelApplicationModeExtractCandidates.ql index d58af008d87..b2f6c0f9e3f 100644 --- a/java/ql/src/Telemetry/AutomodelApplicationModeExtractCandidates.ql +++ b/java/ql/src/Telemetry/AutomodelApplicationModeExtractCandidates.ql @@ -55,7 +55,7 @@ private Endpoint getSampleForSignature( from Endpoint endpoint, string message, ApplicationModeMetadataExtractor meta, DollarAtString package, DollarAtString type, DollarAtString subtypes, DollarAtString name, DollarAtString signature, - DollarAtString input, DollarAtString isVarargsArray + DollarAtString input, DollarAtString isVarargsArray, DollarAtString alreadyAiModeled where not exists(CharacteristicsImpl::UninterestingToModelCharacteristic u | u.appliesToEndpoint(endpoint) @@ -67,20 +67,25 @@ where // label it as a sink for one of the sink types of query B, for which it's already a known sink. This would result in // overlap between our detected sinks and the pre-existing modeling. We assume that, if a sink has already been // modeled in a MaD model, then it doesn't belong to any additional sink types, and we don't need to reexamine it. - not CharacteristicsImpl::isSink(endpoint, _, _) and + ( + not CharacteristicsImpl::isSink(endpoint, _, _) and alreadyAiModeled = "false" + or + CharacteristicsImpl::isSink(endpoint, _, any(string s | s.matches("%ai-%"))) and + alreadyAiModeled = "true" + ) and meta.hasMetadata(endpoint, package, type, subtypes, name, signature, input, isVarargsArray) and includeAutomodelCandidate(package, type, name, signature) and // The message is the concatenation of all sink types for which this endpoint is known neither to be a sink nor to be // a non-sink, and we surface only endpoints that have at least one such sink type. message = strictconcat(AutomodelEndpointTypes::SinkType sinkType | - not CharacteristicsImpl::isKnownSink(endpoint, sinkType) and + not CharacteristicsImpl::isKnownSink(endpoint, sinkType, _) and CharacteristicsImpl::isSinkCandidate(endpoint, sinkType) | sinkType, ", " ) select endpoint.asNode(), - message + "\nrelated locations: $@." + "\nmetadata: $@, $@, $@, $@, $@, $@, $@.", // + message + "\nrelated locations: $@." + "\nmetadata: $@, $@, $@, $@, $@, $@, $@, $@.", // CharacteristicsImpl::getRelatedLocationOrCandidate(endpoint, CallContext()), "CallContext", // package, "package", // type, "type", // @@ -88,4 +93,5 @@ select endpoint.asNode(), name, "name", // method name signature, "signature", // input, "input", // - isVarargsArray, "isVarargsArray" + isVarargsArray, "isVarargsArray", // + alreadyAiModeled, "alreadyAiModeled" diff --git a/java/ql/src/Telemetry/AutomodelApplicationModeExtractPositiveExamples.ql b/java/ql/src/Telemetry/AutomodelApplicationModeExtractPositiveExamples.ql index dac9bef0728..7341f512702 100644 --- a/java/ql/src/Telemetry/AutomodelApplicationModeExtractPositiveExamples.ql +++ b/java/ql/src/Telemetry/AutomodelApplicationModeExtractPositiveExamples.ql @@ -22,7 +22,7 @@ where not erroneousEndpoints(endpoint, _, _, _, _, false) and meta.hasMetadata(endpoint, package, type, subtypes, name, signature, input, isVarargsArray) and // Extract positive examples of sinks belonging to the existing ATM query configurations. - CharacteristicsImpl::isKnownSink(endpoint, sinkType) and + CharacteristicsImpl::isKnownSink(endpoint, sinkType, _) and exists(CharacteristicsImpl::getRelatedLocationOrCandidate(endpoint, CallContext())) select endpoint.asNode(), sinkType + "\nrelated locations: $@." + "\nmetadata: $@, $@, $@, $@, $@, $@, $@.", // diff --git a/java/ql/src/Telemetry/AutomodelSharedCharacteristics.qll b/java/ql/src/Telemetry/AutomodelSharedCharacteristics.qll index c1a9a14a10a..845f8385e5b 100644 --- a/java/ql/src/Telemetry/AutomodelSharedCharacteristics.qll +++ b/java/ql/src/Telemetry/AutomodelSharedCharacteristics.qll @@ -94,14 +94,15 @@ module SharedCharacteristics { /** * Holds if `sink` is a known sink of type `endpointType`. */ - predicate isKnownSink(Candidate::Endpoint sink, Candidate::EndpointType endpointType) { + predicate isKnownSink( + Candidate::Endpoint sink, Candidate::EndpointType endpointType, + EndpointCharacteristic characteristic + ) { // If the list of characteristics includes positive indicators with maximal confidence for this class, then it's a // known sink for the class. not endpointType instanceof Candidate::NegativeEndpointType and - exists(EndpointCharacteristic characteristic | - characteristic.appliesToEndpoint(sink) and - characteristic.hasImplications(endpointType, true, maximalConfidence()) - ) + characteristic.appliesToEndpoint(sink) and + characteristic.hasImplications(endpointType, true, maximalConfidence()) } /** @@ -275,15 +276,17 @@ module SharedCharacteristics { private class KnownSinkCharacteristic extends SinkCharacteristic { string madKind; Candidate::EndpointType endpointType; + string provenance; KnownSinkCharacteristic() { Candidate::isKnownKind(madKind, endpointType) and // bind "this" to a unique string differing from that of the SinkType classes - this = madKind + "-characteristic" + this = madKind + "_" + provenance + "_characteristic" and + Candidate::isSink(_, madKind, provenance) } override predicate appliesToEndpoint(Candidate::Endpoint e) { - Candidate::isSink(e, madKind, _) + Candidate::isSink(e, madKind, provenance) } override Candidate::EndpointType getSinkType() { result = endpointType } diff --git a/java/ql/test/query-tests/Telemetry/AutomodelApplicationModeExtraction/AutomodelApplicationModeExtractCandidates.expected b/java/ql/test/query-tests/Telemetry/AutomodelApplicationModeExtraction/AutomodelApplicationModeExtractCandidates.expected index 5f946f49d19..7903223bee6 100644 --- a/java/ql/test/query-tests/Telemetry/AutomodelApplicationModeExtraction/AutomodelApplicationModeExtractCandidates.expected +++ b/java/ql/test/query-tests/Telemetry/AutomodelApplicationModeExtraction/AutomodelApplicationModeExtractCandidates.expected @@ -1,3 +1,4 @@ -| Test.java:16:3:16:11 | reference | command-injection, path-injection, request-forgery, sql-injection\nrelated locations: $@.\nmetadata: $@, $@, $@, $@, $@, $@, $@. | Test.java:16:3:16:24 | set(...) | CallContext | file://java.util.concurrent.atomic:1:1:1:1 | java.util.concurrent.atomic | package | file://AtomicReference:1:1:1:1 | AtomicReference | type | file://false:1:1:1:1 | false | subtypes | file://set:1:1:1:1 | set | name | file://(String):1:1:1:1 | (String) | signature | file://Argument[this]:1:1:1:1 | Argument[this] | input | file://false:1:1:1:1 | false | isVarargsArray | -| Test.java:21:3:21:10 | supplier | command-injection, path-injection, request-forgery, sql-injection\nrelated locations: $@.\nmetadata: $@, $@, $@, $@, $@, $@, $@. | Test.java:21:3:21:16 | get(...) | CallContext | file://java.util.function:1:1:1:1 | java.util.function | package | file://Supplier:1:1:1:1 | Supplier | type | file://true:1:1:1:1 | true | subtypes | file://get:1:1:1:1 | get | name | file://():1:1:1:1 | () | signature | file://Argument[this]:1:1:1:1 | Argument[this] | input | file://false:1:1:1:1 | false | isVarargsArray | -| Test.java:53:4:53:4 | o | command-injection, path-injection, request-forgery, sql-injection\nrelated locations: $@.\nmetadata: $@, $@, $@, $@, $@, $@, $@. | Test.java:51:3:56:3 | walk(...) | CallContext | file://java.nio.file:1:1:1:1 | java.nio.file | package | file://Files:1:1:1:1 | Files | type | file://false:1:1:1:1 | false | subtypes | file://walk:1:1:1:1 | walk | name | file://(Path,FileVisitOption[]):1:1:1:1 | (Path,FileVisitOption[]) | signature | file://Argument[1]:1:1:1:1 | Argument[1] | input | file://true:1:1:1:1 | true | isVarargsArray | +| Test.java:16:3:16:11 | reference | command-injection, path-injection, request-forgery, sql-injection\nrelated locations: $@.\nmetadata: $@, $@, $@, $@, $@, $@, $@, $@. | Test.java:16:3:16:24 | set(...) | CallContext | file://java.util.concurrent.atomic:1:1:1:1 | java.util.concurrent.atomic | package | file://AtomicReference:1:1:1:1 | AtomicReference | type | file://false:1:1:1:1 | false | subtypes | file://set:1:1:1:1 | set | name | file://(String):1:1:1:1 | (String) | signature | file://Argument[this]:1:1:1:1 | Argument[this] | input | file://false:1:1:1:1 | false | isVarargsArray | file://false:1:1:1:1 | false | alreadyAiModeled | +| Test.java:21:3:21:10 | supplier | command-injection, path-injection, request-forgery, sql-injection\nrelated locations: $@.\nmetadata: $@, $@, $@, $@, $@, $@, $@, $@. | Test.java:21:3:21:16 | get(...) | CallContext | file://java.util.function:1:1:1:1 | java.util.function | package | file://Supplier:1:1:1:1 | Supplier | type | file://true:1:1:1:1 | true | subtypes | file://get:1:1:1:1 | get | name | file://():1:1:1:1 | () | signature | file://Argument[this]:1:1:1:1 | Argument[this] | input | file://false:1:1:1:1 | false | isVarargsArray | file://false:1:1:1:1 | false | alreadyAiModeled | +| Test.java:34:4:34:11 | openPath | command-injection, request-forgery, sql-injection\nrelated locations: $@.\nmetadata: $@, $@, $@, $@, $@, $@, $@, $@. | Test.java:33:10:35:3 | newInputStream(...) | CallContext | file://java.nio.file:1:1:1:1 | java.nio.file | package | file://Files:1:1:1:1 | Files | type | file://false:1:1:1:1 | false | subtypes | file://newInputStream:1:1:1:1 | newInputStream | name | file://(Path,OpenOption[]):1:1:1:1 | (Path,OpenOption[]) | signature | file://Argument[0]:1:1:1:1 | Argument[0] | input | file://false:1:1:1:1 | false | isVarargsArray | file://true:1:1:1:1 | true | alreadyAiModeled | +| Test.java:53:4:53:4 | o | command-injection, path-injection, request-forgery, sql-injection\nrelated locations: $@.\nmetadata: $@, $@, $@, $@, $@, $@, $@, $@. | Test.java:51:3:56:3 | walk(...) | CallContext | file://java.nio.file:1:1:1:1 | java.nio.file | package | file://Files:1:1:1:1 | Files | type | file://false:1:1:1:1 | false | subtypes | file://walk:1:1:1:1 | walk | name | file://(Path,FileVisitOption[]):1:1:1:1 | (Path,FileVisitOption[]) | signature | file://Argument[1]:1:1:1:1 | Argument[1] | input | file://true:1:1:1:1 | true | isVarargsArray | file://false:1:1:1:1 | false | alreadyAiModeled | diff --git a/java/ql/test/query-tests/Telemetry/AutomodelApplicationModeExtraction/Test.java b/java/ql/test/query-tests/Telemetry/AutomodelApplicationModeExtraction/Test.java index 1b5cea4b907..1a660f7752f 100644 --- a/java/ql/test/query-tests/Telemetry/AutomodelApplicationModeExtraction/Test.java +++ b/java/ql/test/query-tests/Telemetry/AutomodelApplicationModeExtraction/Test.java @@ -31,7 +31,7 @@ class Test { public static InputStream getInputStream(Path openPath) throws Exception { return Files.newInputStream( - openPath // positive example (known sink) + openPath // positive example (known sink), candidate ("only" ai-modeled, and useful as a candidate in regression testing) ); } @@ -56,4 +56,3 @@ class Test { ); } } -