From 8e429bd3996ec917443a96acefdf37cb792705cf Mon Sep 17 00:00:00 2001 From: Max Schaefer Date: Thu, 11 Jan 2024 11:05:31 +0000 Subject: [PATCH 01/18] Rename `isSinkCandidate` (and a related predicate) to `isCandidate`. This reflects the fact that these predicates also deal with source candidates. --- ...tomodelApplicationModeExtractCandidates.ql | 2 +- ...AutomodelFrameworkModeExtractCandidates.ql | 2 +- .../src/AutomodelSharedCharacteristics.qll | 24 +++++++++---------- 3 files changed, 14 insertions(+), 14 deletions(-) diff --git a/java/ql/automodel/src/AutomodelApplicationModeExtractCandidates.ql b/java/ql/automodel/src/AutomodelApplicationModeExtractCandidates.ql index 0d6814dbd01..391e3e48e7c 100644 --- a/java/ql/automodel/src/AutomodelApplicationModeExtractCandidates.ql +++ b/java/ql/automodel/src/AutomodelApplicationModeExtractCandidates.ql @@ -63,7 +63,7 @@ where not exists(CharacteristicsImpl::UninterestingToModelCharacteristic u | u.appliesToEndpoint(endpoint) ) and - CharacteristicsImpl::isSinkCandidate(endpoint, _) and + CharacteristicsImpl::isCandidate(endpoint, _) and endpoint = getSampleForSignature(9, package, type, subtypes, name, signature, input, output, isVarargsArray, extensibleType) and diff --git a/java/ql/automodel/src/AutomodelFrameworkModeExtractCandidates.ql b/java/ql/automodel/src/AutomodelFrameworkModeExtractCandidates.ql index c2ab6f33ee7..fa4dc6f0189 100644 --- a/java/ql/automodel/src/AutomodelFrameworkModeExtractCandidates.ql +++ b/java/ql/automodel/src/AutomodelFrameworkModeExtractCandidates.ql @@ -25,7 +25,7 @@ where not exists(CharacteristicsImpl::UninterestingToModelCharacteristic u | u.appliesToEndpoint(endpoint) ) and - CharacteristicsImpl::isSinkCandidate(endpoint, _) and + CharacteristicsImpl::isCandidate(endpoint, _) and // If a node is already a known sink for any of our existing ATM queries and is already modeled as a MaD sink, we // don't include it as a candidate. Otherwise, we might include it as a candidate for query A, but the model will // label it as a sink for one of the sink types of query B, for which it's already a known sink. This would result in diff --git a/java/ql/automodel/src/AutomodelSharedCharacteristics.qll b/java/ql/automodel/src/AutomodelSharedCharacteristics.qll index 3704845fe17..5442f713a30 100644 --- a/java/ql/automodel/src/AutomodelSharedCharacteristics.qll +++ b/java/ql/automodel/src/AutomodelSharedCharacteristics.qll @@ -117,13 +117,14 @@ module SharedCharacteristics { } /** - * Holds if the candidate sink `candidateSink` should be considered as a possible sink of type `sinkType`, and - * classified by the ML model. A candidate sink is a node that cannot be excluded from `sinkType` based on its - * characteristics. + * Holds if the given `endpoint` should be considered as a candidate for type `endpointType`, + * and classified by the ML model. + * + * A candidate is an endpoint that cannot be excluded from `endpointType` based on its characteristics. */ - predicate isSinkCandidate(Candidate::Endpoint candidateSink, Candidate::EndpointType sinkType) { + predicate isCandidate(Candidate::Endpoint candidateSink, Candidate::EndpointType sinkType) { not sinkType instanceof Candidate::NegativeEndpointType and - not exists(getAReasonSinkExcluded(candidateSink, sinkType)) + not exists(getAnExcludingCharacteristic(candidateSink, sinkType)) } /** @@ -139,15 +140,14 @@ module SharedCharacteristics { } /** - * Gets the list of characteristics that cause `candidateSink` to be excluded as an effective sink for a given sink - * type. + * Gets a characteristics that disbar `endpoint` from being a candidate for `endpointType`. */ - EndpointCharacteristic getAReasonSinkExcluded( - Candidate::Endpoint candidateSink, Candidate::EndpointType sinkType + EndpointCharacteristic getAnExcludingCharacteristic( + Candidate::Endpoint endpoint, Candidate::EndpointType endpointType ) { // An endpoint is a sink candidate if none of its characteristics give much indication whether or not it is a sink. - not sinkType instanceof Candidate::NegativeEndpointType and - result.appliesToEndpoint(candidateSink) and + not endpointType instanceof Candidate::NegativeEndpointType and + result.appliesToEndpoint(endpoint) and ( // Exclude endpoints that have a characteristic that implies they're not sinks for _any_ sink type. exists(float confidence | @@ -158,7 +158,7 @@ module SharedCharacteristics { // Exclude endpoints that have a characteristic that implies they're not sinks for _this particular_ sink type. exists(float confidence | confidence >= mediumConfidence() and - result.hasImplications(sinkType, false, confidence) + result.hasImplications(endpointType, false, confidence) ) ) } From a6d996b4781246c33d376cc9215139b4b0d7fe0e Mon Sep 17 00:00:00 2001 From: Max Schaefer Date: Thu, 11 Jan 2024 11:27:34 +0000 Subject: [PATCH 02/18] Add an example of a missed source candidate. `Files.list` has a taint step from its first argument to its result, so that first argument should not be considered a sink candidate (and it is not). However, due to a bug in `IsMaDTaintStepCharacteristic` it is also not considered a source candidate, which is wrong: as the example shows, if that argument is a call we do very much want to consider it as a source candidate. --- .../AutomodelApplicationModeExtractCandidates.expected | 1 + ...odelApplicationModeExtractPositiveExamples.expected | 1 + .../test/AutomodelApplicationModeExtraction/Test.java | 10 +++++++++- 3 files changed, 11 insertions(+), 1 deletion(-) diff --git a/java/ql/automodel/test/AutomodelApplicationModeExtraction/AutomodelApplicationModeExtractCandidates.expected b/java/ql/automodel/test/AutomodelApplicationModeExtraction/AutomodelApplicationModeExtractCandidates.expected index a7be708f9da..908052075da 100644 --- a/java/ql/automodel/test/AutomodelApplicationModeExtraction/AutomodelApplicationModeExtractCandidates.expected +++ b/java/ql/automodel/test/AutomodelApplicationModeExtraction/AutomodelApplicationModeExtractCandidates.expected @@ -11,3 +11,4 @@ | Test.java:56:4:56:4 | o | Related locations: $@, $@, $@.\nmetadata: $@, $@, $@, $@, $@, $@, $@, $@, $@, $@. | Test.java:54:3:59:3 | walk(...) | CallContext | Test.java:54:3:59:3 | walk(...) | MethodDoc | Test.java:54:3:59:3 | walk(...) | ClassDoc | file://java.nio.file:1:1:1:1 | java.nio.file | package | file://Files:1:1:1:1 | Files | type | file://false:1:1:1:1 | false | subtypes | file://walk:1:1:1:1 | walk | name | file://(Path,FileVisitOption[]):1:1:1:1 | (Path,FileVisitOption[]) | signature | file://Argument[1]:1:1:1:1 | Argument[1] | input | file://:1:1:1:1 | | output | file://true:1:1:1:1 | true | isVarargsArray | file://:1:1:1:1 | | alreadyAiModeled | file://sinkModel:1:1:1:1 | sinkModel | extensibleType | | Test.java:63:3:63:3 | c | Related locations: $@, $@, $@.\nmetadata: $@, $@, $@, $@, $@, $@, $@, $@, $@, $@. | Test.java:63:3:63:20 | getInputStream(...) | CallContext | Test.java:63:3:63:3 | c | MethodDoc | Test.java:63:3:63:3 | c | ClassDoc | file://java.net:1:1:1:1 | java.net | package | file://URLConnection:1:1:1:1 | URLConnection | type | file://true:1:1:1:1 | true | subtypes | file://getInputStream:1:1:1:1 | getInputStream | name | file://():1:1:1:1 | () | signature | file://Argument[this]:1:1:1:1 | Argument[this] | input | file://:1:1:1:1 | | output | file://false:1:1:1:1 | false | isVarargsArray | file://:1:1:1:1 | | alreadyAiModeled | file://sinkModel:1:1:1:1 | sinkModel | extensibleType | | Test.java:68:30:68:47 | writer | Related locations: $@, $@, $@.\nmetadata: $@, $@, $@, $@, $@, $@, $@, $@, $@, $@. | Test.java:68:30:68:47 | writer | CallContext | Test.java:68:30:68:47 | writer | MethodDoc | Test.java:68:30:68:47 | writer | ClassDoc | file://java.lang:1:1:1:1 | java.lang | package | file://Throwable:1:1:1:1 | Throwable | type | file://true:1:1:1:1 | true | subtypes | file://printStackTrace:1:1:1:1 | printStackTrace | name | file://(PrintWriter):1:1:1:1 | (PrintWriter) | signature | file://:1:1:1:1 | | input | file://Parameter[0]:1:1:1:1 | Parameter[0] | output | file://false:1:1:1:1 | false | isVarargsArray | file://:1:1:1:1 | | alreadyAiModeled | file://sourceModel:1:1:1:1 | sourceModel | extensibleType | +| Test.java:86:3:88:3 | list(...) | Related locations: $@, $@, $@.\nmetadata: $@, $@, $@, $@, $@, $@, $@, $@, $@, $@. | Test.java:86:3:88:3 | list(...) | CallContext | Test.java:86:3:88:3 | list(...) | MethodDoc | Test.java:86:3:88:3 | list(...) | ClassDoc | file://java.nio.file:1:1:1:1 | java.nio.file | package | file://Files:1:1:1:1 | Files | type | file://false:1:1:1:1 | false | subtypes | file://list:1:1:1:1 | list | name | file://(Path):1:1:1:1 | (Path) | signature | file://:1:1:1:1 | | input | file://ReturnValue:1:1:1:1 | ReturnValue | output | file://false:1:1:1:1 | false | isVarargsArray | file://:1:1:1:1 | | alreadyAiModeled | file://sourceModel:1:1:1:1 | sourceModel | extensibleType | diff --git a/java/ql/automodel/test/AutomodelApplicationModeExtraction/AutomodelApplicationModeExtractPositiveExamples.expected b/java/ql/automodel/test/AutomodelApplicationModeExtraction/AutomodelApplicationModeExtractPositiveExamples.expected index 60db0852024..bdc5667935b 100644 --- a/java/ql/automodel/test/AutomodelApplicationModeExtraction/AutomodelApplicationModeExtractPositiveExamples.expected +++ b/java/ql/automodel/test/AutomodelApplicationModeExtraction/AutomodelApplicationModeExtractPositiveExamples.expected @@ -2,3 +2,4 @@ | Test.java:30:4:30:9 | target | path-injection\nrelated locations: $@, $@, $@.\nmetadata: $@, $@, $@, $@, $@, $@, $@, $@, $@. | Test.java:28:3:32:3 | copy(...) | CallContext | Test.java:30:4:30:9 | target | MethodDoc | Test.java:30:4:30:9 | target | ClassDoc | file://java.nio.file:1:1:1:1 | java.nio.file | package | file://Files:1:1:1:1 | Files | type | file://false:1:1:1:1 | false | subtypes | file://copy:1:1:1:1 | copy | name | file://(Path,Path,CopyOption[]):1:1:1:1 | (Path,Path,CopyOption[]) | signature | file://Argument[1]:1:1:1:1 | Argument[1] | input | file://:1:1:1:1 | | output | file://false:1:1:1:1 | false | isVarargsArray | file://sinkModel:1:1:1:1 | sinkModel | extensibleType | | Test.java:37:4:37:11 | openPath | path-injection\nrelated locations: $@, $@, $@.\nmetadata: $@, $@, $@, $@, $@, $@, $@, $@, $@. | Test.java:36:10:38:3 | newInputStream(...) | CallContext | Test.java:37:4:37:11 | openPath | MethodDoc | Test.java:37:4:37:11 | openPath | ClassDoc | file://java.nio.file:1:1:1:1 | java.nio.file | package | file://Files:1:1:1:1 | Files | type | file://false:1:1:1:1 | false | subtypes | file://newInputStream:1:1:1:1 | newInputStream | name | file://(Path,OpenOption[]):1:1:1:1 | (Path,OpenOption[]) | signature | file://Argument[0]:1:1:1:1 | Argument[0] | input | file://:1:1:1:1 | | output | file://false:1:1:1:1 | false | isVarargsArray | file://sinkModel:1:1:1:1 | sinkModel | extensibleType | | Test.java:63:3:63:20 | getInputStream(...) | remote\nrelated locations: $@, $@, $@.\nmetadata: $@, $@, $@, $@, $@, $@, $@, $@, $@. | Test.java:63:3:63:20 | getInputStream(...) | CallContext | Test.java:63:3:63:20 | getInputStream(...) | MethodDoc | Test.java:63:3:63:20 | getInputStream(...) | ClassDoc | file://java.net:1:1:1:1 | java.net | package | file://URLConnection:1:1:1:1 | URLConnection | type | file://true:1:1:1:1 | true | subtypes | file://getInputStream:1:1:1:1 | getInputStream | name | file://():1:1:1:1 | () | signature | file://:1:1:1:1 | | input | file://ReturnValue:1:1:1:1 | ReturnValue | output | file://false:1:1:1:1 | false | isVarargsArray | file://sourceModel:1:1:1:1 | sourceModel | extensibleType | +| Test.java:87:28:87:28 | p | path-injection\nrelated locations: $@, $@, $@.\nmetadata: $@, $@, $@, $@, $@, $@, $@, $@, $@. | Test.java:87:4:87:29 | createDirectories(...) | CallContext | Test.java:87:28:87:28 | p | MethodDoc | Test.java:87:28:87:28 | p | ClassDoc | file://java.nio.file:1:1:1:1 | java.nio.file | package | file://Files:1:1:1:1 | Files | type | file://false:1:1:1:1 | false | subtypes | file://createDirectories:1:1:1:1 | createDirectories | name | file://(Path,FileAttribute[]):1:1:1:1 | (Path,FileAttribute[]) | signature | file://Argument[0]:1:1:1:1 | Argument[0] | input | file://:1:1:1:1 | | output | file://false:1:1:1:1 | false | isVarargsArray | file://sinkModel:1:1:1:1 | sinkModel | extensibleType | diff --git a/java/ql/automodel/test/AutomodelApplicationModeExtraction/Test.java b/java/ql/automodel/test/AutomodelApplicationModeExtraction/Test.java index 3851a689969..e915d452ad8 100644 --- a/java/ql/automodel/test/AutomodelApplicationModeExtraction/Test.java +++ b/java/ql/automodel/test/AutomodelApplicationModeExtraction/Test.java @@ -52,7 +52,7 @@ class Test { public static void FilesWalkExample(Path p, FileVisitOption o) throws Exception { Files.walk( // the call is a source candidate - p, // negative example (modeled as a taint step) + p, // negative sink example (modeled as a taint step) o, // the implicit varargs array is a candidate o // not a candidate (only the first arg corresponding to a varargs array // is extracted) @@ -80,3 +80,11 @@ class TaskUtils { return ft; } } + +class MoreTests { + public static void FilesListExample(Path p) throws Exception { + Files.list( // the call is a source candidate + Files.createDirectories(p) // the call is a source candidate, but not a sink candidate (modeled as a taint step) + ); + } +} \ No newline at end of file From 03ca244df27e825f307dda08bc3fb1ce19ead20e Mon Sep 17 00:00:00 2001 From: Max Schaefer Date: Thu, 11 Jan 2024 11:34:40 +0000 Subject: [PATCH 03/18] Associate endpoints with their potential endpoint types and check these when determining candidates. This prevents us from associating a sink candidate with a source type and vice versa. However, this does not fix the problem of negative characteristics for sink types excluding source candidates. --- .../src/AutomodelApplicationModeCharacteristics.qll | 12 ++++++++++++ .../src/AutomodelFrameworkModeCharacteristics.qll | 12 ++++++++++++ .../automodel/src/AutomodelSharedCharacteristics.qll | 11 +++++++---- 3 files changed, 31 insertions(+), 4 deletions(-) diff --git a/java/ql/automodel/src/AutomodelApplicationModeCharacteristics.qll b/java/ql/automodel/src/AutomodelApplicationModeCharacteristics.qll index b124e453fbc..3f8250629b8 100644 --- a/java/ql/automodel/src/AutomodelApplicationModeCharacteristics.qll +++ b/java/ql/automodel/src/AutomodelApplicationModeCharacteristics.qll @@ -96,6 +96,18 @@ abstract private class ApplicationModeEndpoint extends TApplicationModeEndpoint else none() // if both exist, it would be a summaryModel (not yet supported) } + /** + * Gets a potential type of this endpoint to make sure that sources are + * associated with source types and sinks with sink types. + */ + AutomodelEndpointTypes::EndpointType getAPotentialType() { + this.getExtensibleType() = "sourceModel" and + result instanceof AutomodelEndpointTypes::SourceType + or + this.getExtensibleType() = "sinkModel" and + result instanceof AutomodelEndpointTypes::SinkType + } + abstract string toString(); } diff --git a/java/ql/automodel/src/AutomodelFrameworkModeCharacteristics.qll b/java/ql/automodel/src/AutomodelFrameworkModeCharacteristics.qll index c6030874def..8b275b5282c 100644 --- a/java/ql/automodel/src/AutomodelFrameworkModeCharacteristics.qll +++ b/java/ql/automodel/src/AutomodelFrameworkModeCharacteristics.qll @@ -90,6 +90,18 @@ abstract class FrameworkModeEndpoint extends TFrameworkModeEndpoint { abstract string getExtensibleType(); + /** + * Gets a potential type of this endpoint to make sure that sources are + * associated with source types and sinks with sink types. + */ + AutomodelEndpointTypes::EndpointType getAPotentialType() { + this.getExtensibleType() = "sourceModel" and + result instanceof AutomodelEndpointTypes::SourceType + or + this.getExtensibleType() = "sinkModel" and + result instanceof AutomodelEndpointTypes::SinkType + } + string toString() { result = this.asTop().toString() } Location getLocation() { result = this.asTop().getLocation() } diff --git a/java/ql/automodel/src/AutomodelSharedCharacteristics.qll b/java/ql/automodel/src/AutomodelSharedCharacteristics.qll index 5442f713a30..d27426f9539 100644 --- a/java/ql/automodel/src/AutomodelSharedCharacteristics.qll +++ b/java/ql/automodel/src/AutomodelSharedCharacteristics.qll @@ -16,7 +16,9 @@ signature module CandidateSig { * An endpoint is a potential candidate for modeling. This will typically be bound to the language's * DataFlow node class, or a subtype thereof. */ - class Endpoint; + class Endpoint { + EndpointType getAPotentialType(); + } /** * A related location for an endpoint. This will typically be bound to the supertype of all AST nodes (eg., `Top`). @@ -122,9 +124,10 @@ module SharedCharacteristics { * * A candidate is an endpoint that cannot be excluded from `endpointType` based on its characteristics. */ - predicate isCandidate(Candidate::Endpoint candidateSink, Candidate::EndpointType sinkType) { - not sinkType instanceof Candidate::NegativeEndpointType and - not exists(getAnExcludingCharacteristic(candidateSink, sinkType)) + predicate isCandidate(Candidate::Endpoint endpoint, Candidate::EndpointType endpointType) { + not endpointType instanceof Candidate::NegativeEndpointType and + endpointType = endpoint.getAPotentialType() and + not exists(getAnExcludingCharacteristic(endpoint, endpointType)) } /** From bcf4f4febd7c0272778eec30e88ba77d451ae648 Mon Sep 17 00:00:00 2001 From: Max Schaefer Date: Thu, 11 Jan 2024 11:56:59 +0000 Subject: [PATCH 04/18] Drop a conjunct which is now spurious. --- .../ql/automodel/src/AutomodelApplicationModeCharacteristics.qll | 1 - 1 file changed, 1 deletion(-) diff --git a/java/ql/automodel/src/AutomodelApplicationModeCharacteristics.qll b/java/ql/automodel/src/AutomodelApplicationModeCharacteristics.qll index 3f8250629b8..e6c42c0e8a9 100644 --- a/java/ql/automodel/src/AutomodelApplicationModeCharacteristics.qll +++ b/java/ql/automodel/src/AutomodelApplicationModeCharacteristics.qll @@ -474,7 +474,6 @@ private class IsMaDTaintStepCharacteristic extends CharacteristicsImpl::NotASink IsMaDTaintStepCharacteristic() { this = "taint step" } override predicate appliesToEndpoint(Endpoint e) { - e.getExtensibleType() = "sinkModel" and FlowSummaryImpl::Private::Steps::summaryThroughStepValue(e.asNode(), _, _) or FlowSummaryImpl::Private::Steps::summaryThroughStepTaint(e.asNode(), _, _) From ff4555ac5b6019bc18db60cbf1f488c2e6d7ac14 Mon Sep 17 00:00:00 2001 From: Max Schaefer Date: Thu, 11 Jan 2024 12:04:56 +0000 Subject: [PATCH 05/18] Get rid of negative sink types. Instead of positively implying the negative sink type, negative sink characteristics now negatively imply all sink types (but not source types). This is simpler and sice we will never have a huge number of sink types it doesn't impact performance either. Changes to test results: - The call to `createDirectories` at `Test.java:87` is now correctly classified as a source candidate, having previously been erroneously excluded by a negative _sink_ characteristic. - The call to `compareTo` at `Test.java:48` is now erroneously classified as a source candidate; it should be suppressed by `IsSanitizerCharacteristic`, which is a negative sink characteristic, but should really be a negative source characteristic. - In framework mode, several endpoints are now erroneously classified as source candidates even though they have neutral models, because `NeutralModelCharacteristic` is currently only a negative sink characteristic and not a negative source characteristic. --- ...utomodelApplicationModeCharacteristics.qll | 4 +- ...lApplicationModeExtractNegativeExamples.ql | 18 +++---- .../automodel/src/AutomodelEndpointTypes.qll | 5 -- .../AutomodelFrameworkModeCharacteristics.qll | 4 +- ...delFrameworkModeExtractNegativeExamples.ql | 18 +++---- .../src/AutomodelSharedCharacteristics.qll | 47 ++++++++----------- ...lApplicationModeExtractCandidates.expected | 2 + ...cationModeExtractNegativeExamples.expected | 1 - ...delFrameworkModeExtractCandidates.expected | 2 + ...meworkModeExtractNegativeExamples.expected | 2 - 10 files changed, 50 insertions(+), 53 deletions(-) diff --git a/java/ql/automodel/src/AutomodelApplicationModeCharacteristics.qll b/java/ql/automodel/src/AutomodelApplicationModeCharacteristics.qll index e6c42c0e8a9..d00a95d564e 100644 --- a/java/ql/automodel/src/AutomodelApplicationModeCharacteristics.qll +++ b/java/ql/automodel/src/AutomodelApplicationModeCharacteristics.qll @@ -259,7 +259,9 @@ module ApplicationCandidatesImpl implements SharedCharacteristics::CandidateSig class EndpointType = AutomodelEndpointTypes::EndpointType; - class NegativeEndpointType = AutomodelEndpointTypes::NegativeSinkType; + class SinkType = AutomodelEndpointTypes::SinkType; + + class SourceType = AutomodelEndpointTypes::SourceType; class RelatedLocation = Location::Top; diff --git a/java/ql/automodel/src/AutomodelApplicationModeExtractNegativeExamples.ql b/java/ql/automodel/src/AutomodelApplicationModeExtractNegativeExamples.ql index 6097e2e22f9..2b324dd0848 100644 --- a/java/ql/automodel/src/AutomodelApplicationModeExtractNegativeExamples.ql +++ b/java/ql/automodel/src/AutomodelApplicationModeExtractNegativeExamples.ql @@ -48,18 +48,20 @@ from where endpoint = getSampleForCharacteristic(characteristic, 100) and extensibleType = endpoint.getExtensibleType() and + // the node is know not to be an endpoint of any appropriate type + forall(EndpointType tp | tp = endpoint.getAPotentialType() | + characteristic.hasImplications(tp, false, _) + ) and + // the lowest confidence across all endpoint types should be at least highConfidence + confidence = min(float c | characteristic.hasImplications(endpoint.getAPotentialType(), false, c)) and confidence >= SharedCharacteristics::highConfidence() and - characteristic.hasImplications(any(NegativeSinkType negative), true, confidence) and meta.hasMetadata(endpoint, package, type, subtypes, name, signature, input, output, isVarargsArray) and - // It's valid for a node to satisfy the logic for both `isSink` and `isSanitizer`, but in that case it will be - // treated by the actual query as a sanitizer, since the final logic is something like - // `isSink(n) and not isSanitizer(n)`. We don't want to include such nodes as negative examples in the prompt, because - // they're ambiguous and might confuse the model, so we explicitly exclude all known sinks from the negative examples. - not exists(EndpointCharacteristic characteristic2, float confidence2, SinkType positiveType | - not positiveType instanceof NegativeSinkType and + // It's valid for a node to be both a potential source/sanitizer and a sink. We don't want to include such nodes + // as negative examples in the prompt, because they're ambiguous and might confuse the model, so we explicitly them here. + not exists(EndpointCharacteristic characteristic2, float confidence2, EndpointType type2 | characteristic2.appliesToEndpoint(endpoint) and confidence2 >= SharedCharacteristics::maximalConfidence() and - characteristic2.hasImplications(positiveType, true, confidence2) + characteristic2.hasImplications(type2, true, confidence2) ) and message = characteristic select endpoint.asNode(), diff --git a/java/ql/automodel/src/AutomodelEndpointTypes.qll b/java/ql/automodel/src/AutomodelEndpointTypes.qll index 8b08722abe2..e37cc80099f 100644 --- a/java/ql/automodel/src/AutomodelEndpointTypes.qll +++ b/java/ql/automodel/src/AutomodelEndpointTypes.qll @@ -30,11 +30,6 @@ abstract class SinkType extends EndpointType { SinkType() { any() } } -/** The `Negative` class for non-sinks. */ -class NegativeSinkType extends SinkType { - NegativeSinkType() { this = "non-sink" } -} - /** A sink relevant to the SQL injection query */ class SqlInjectionSinkType extends SinkType { SqlInjectionSinkType() { this = "sql-injection" } diff --git a/java/ql/automodel/src/AutomodelFrameworkModeCharacteristics.qll b/java/ql/automodel/src/AutomodelFrameworkModeCharacteristics.qll index 8b275b5282c..3fa71b192bf 100644 --- a/java/ql/automodel/src/AutomodelFrameworkModeCharacteristics.qll +++ b/java/ql/automodel/src/AutomodelFrameworkModeCharacteristics.qll @@ -214,7 +214,9 @@ module FrameworkCandidatesImpl implements SharedCharacteristics::CandidateSig { class EndpointType = AutomodelEndpointTypes::EndpointType; - class NegativeEndpointType = AutomodelEndpointTypes::NegativeSinkType; + class SinkType = AutomodelEndpointTypes::SinkType; + + class SourceType = AutomodelEndpointTypes::SourceType; class RelatedLocation = Location::Top; diff --git a/java/ql/automodel/src/AutomodelFrameworkModeExtractNegativeExamples.ql b/java/ql/automodel/src/AutomodelFrameworkModeExtractNegativeExamples.ql index 3cb23096015..ac043142f10 100644 --- a/java/ql/automodel/src/AutomodelFrameworkModeExtractNegativeExamples.ql +++ b/java/ql/automodel/src/AutomodelFrameworkModeExtractNegativeExamples.ql @@ -21,18 +21,20 @@ from where endpoint.getExtensibleType() = extensibleType and characteristic.appliesToEndpoint(endpoint) and + // the node is known not to be an endpoint of any appropriate type + forall(EndpointType tp | tp = endpoint.getAPotentialType() | + characteristic.hasImplications(tp, false, _) + ) and + // the lowest confidence across all endpoint types should be at least highConfidence + confidence = min(float c | characteristic.hasImplications(endpoint.getAPotentialType(), false, c)) and confidence >= SharedCharacteristics::highConfidence() and - characteristic.hasImplications(any(NegativeSinkType negative), true, confidence) and meta.hasMetadata(endpoint, package, type, subtypes, name, signature, input, output, parameterName) and - // It's valid for a node to satisfy the logic for both `isSink` and `isSanitizer`, but in that case it will be - // treated by the actual query as a sanitizer, since the final logic is something like - // `isSink(n) and not isSanitizer(n)`. We don't want to include such nodes as negative examples in the prompt, because - // they're ambiguous and might confuse the model, so we explicitly exclude all known sinks from the negative examples. - not exists(EndpointCharacteristic characteristic2, float confidence2, SinkType positiveType | - not positiveType instanceof NegativeSinkType and + // It's valid for a node to be both a potential source/sanitizer and a sink. We don't want to include such nodes + // as negative examples in the prompt, because they're ambiguous and might confuse the model, so we explicitly them here. + not exists(EndpointCharacteristic characteristic2, float confidence2, EndpointType type2 | characteristic2.appliesToEndpoint(endpoint) and confidence2 >= SharedCharacteristics::maximalConfidence() and - characteristic2.hasImplications(positiveType, true, confidence2) + characteristic2.hasImplications(type2, true, confidence2) ) and message = characteristic select endpoint, diff --git a/java/ql/automodel/src/AutomodelSharedCharacteristics.qll b/java/ql/automodel/src/AutomodelSharedCharacteristics.qll index d27426f9539..5a6876a347e 100644 --- a/java/ql/automodel/src/AutomodelSharedCharacteristics.qll +++ b/java/ql/automodel/src/AutomodelSharedCharacteristics.qll @@ -33,14 +33,19 @@ signature module CandidateSig { class RelatedLocationType; /** - * A class kind for an endpoint. + * An endpoint type considered by this specification. */ class EndpointType extends string; /** - * An EndpointType that denotes the absence of any sink. + * A sink endpoint type considered by this specification. */ - class NegativeEndpointType extends EndpointType; + class SinkType extends EndpointType; + + /** + * A source endpoint type considered by this specification. + */ + class SourceType extends EndpointType; /** * Gets the endpoint as a location. @@ -105,7 +110,7 @@ module SharedCharacteristics { } /** - * Holds if `endpoint` is modeled as `endpointType` (endpoint type must not be negative). + * Holds if `endpoint` is modeled as `endpointType`. */ predicate isKnownAs( Candidate::Endpoint endpoint, Candidate::EndpointType endpointType, @@ -113,7 +118,6 @@ module SharedCharacteristics { ) { // If the list of characteristics includes positive indicators with maximal confidence for this class, then it's a // known sink for the class. - not endpointType instanceof Candidate::NegativeEndpointType and characteristic.appliesToEndpoint(endpoint) and characteristic.hasImplications(endpointType, true, maximalConfidence()) } @@ -125,7 +129,6 @@ module SharedCharacteristics { * A candidate is an endpoint that cannot be excluded from `endpointType` based on its characteristics. */ predicate isCandidate(Candidate::Endpoint endpoint, Candidate::EndpointType endpointType) { - not endpointType instanceof Candidate::NegativeEndpointType and endpointType = endpoint.getAPotentialType() and not exists(getAnExcludingCharacteristic(endpoint, endpointType)) } @@ -143,26 +146,16 @@ module SharedCharacteristics { } /** - * Gets a characteristics that disbar `endpoint` from being a candidate for `endpointType`. + * Gets a characteristics that disbar `endpoint` from being a candidate for `endpointType` + * with at least medium confidence. */ EndpointCharacteristic getAnExcludingCharacteristic( Candidate::Endpoint endpoint, Candidate::EndpointType endpointType ) { - // An endpoint is a sink candidate if none of its characteristics give much indication whether or not it is a sink. - not endpointType instanceof Candidate::NegativeEndpointType and result.appliesToEndpoint(endpoint) and - ( - // Exclude endpoints that have a characteristic that implies they're not sinks for _any_ sink type. - exists(float confidence | - confidence >= mediumConfidence() and - result.hasImplications(any(Candidate::NegativeEndpointType t), true, confidence) - ) - or - // Exclude endpoints that have a characteristic that implies they're not sinks for _this particular_ sink type. - exists(float confidence | - confidence >= mediumConfidence() and - result.hasImplications(endpointType, false, confidence) - ) + exists(float confidence | + confidence >= mediumConfidence() and + result.hasImplications(endpointType, false, confidence) ) } @@ -253,8 +246,8 @@ module SharedCharacteristics { override predicate hasImplications( Candidate::EndpointType endpointType, boolean isPositiveIndicator, float confidence ) { - endpointType instanceof Candidate::NegativeEndpointType and - isPositiveIndicator = true and + endpointType instanceof Candidate::SinkType and + isPositiveIndicator = false and confidence = highConfidence() } } @@ -272,8 +265,8 @@ module SharedCharacteristics { override predicate hasImplications( Candidate::EndpointType endpointType, boolean isPositiveIndicator, float confidence ) { - endpointType instanceof Candidate::NegativeEndpointType and - isPositiveIndicator = true and + endpointType instanceof Candidate::SinkType and + isPositiveIndicator = false and confidence = mediumConfidence() } } @@ -293,8 +286,8 @@ module SharedCharacteristics { override predicate hasImplications( Candidate::EndpointType endpointType, boolean isPositiveIndicator, float confidence ) { - endpointType instanceof Candidate::NegativeEndpointType and - isPositiveIndicator = true and + endpointType instanceof Candidate::SinkType and + isPositiveIndicator = false and confidence = mediumConfidence() } } diff --git a/java/ql/automodel/test/AutomodelApplicationModeExtraction/AutomodelApplicationModeExtractCandidates.expected b/java/ql/automodel/test/AutomodelApplicationModeExtraction/AutomodelApplicationModeExtractCandidates.expected index 908052075da..f55f9be7976 100644 --- a/java/ql/automodel/test/AutomodelApplicationModeExtraction/AutomodelApplicationModeExtractCandidates.expected +++ b/java/ql/automodel/test/AutomodelApplicationModeExtraction/AutomodelApplicationModeExtractCandidates.expected @@ -7,8 +7,10 @@ | Test.java:36:10:38:3 | newInputStream(...) | Related locations: $@, $@, $@.\nmetadata: $@, $@, $@, $@, $@, $@, $@, $@, $@, $@. | Test.java:36:10:38:3 | newInputStream(...) | CallContext | Test.java:36:10:38:3 | newInputStream(...) | MethodDoc | Test.java:36:10:38:3 | newInputStream(...) | ClassDoc | file://java.nio.file:1:1:1:1 | java.nio.file | package | file://Files:1:1:1:1 | Files | type | file://false:1:1:1:1 | false | subtypes | file://newInputStream:1:1:1:1 | newInputStream | name | file://(Path,OpenOption[]):1:1:1:1 | (Path,OpenOption[]) | signature | file://:1:1:1:1 | | input | file://ReturnValue:1:1:1:1 | ReturnValue | output | file://false:1:1:1:1 | false | isVarargsArray | file://:1:1:1:1 | | alreadyAiModeled | file://sourceModel:1:1:1:1 | sourceModel | extensibleType | | Test.java:37:4:37:11 | openPath | Related locations: $@, $@, $@.\nmetadata: $@, $@, $@, $@, $@, $@, $@, $@, $@, $@. | Test.java:36:10:38:3 | newInputStream(...) | CallContext | Test.java:37:4:37:11 | openPath | MethodDoc | Test.java:37:4:37:11 | openPath | ClassDoc | file://java.nio.file:1:1:1:1 | java.nio.file | package | file://Files:1:1:1:1 | Files | type | file://false:1:1:1:1 | false | subtypes | file://newInputStream:1:1:1:1 | newInputStream | name | file://(Path,OpenOption[]):1:1:1:1 | (Path,OpenOption[]) | signature | file://Argument[0]:1:1:1:1 | Argument[0] | input | file://:1:1:1:1 | | output | file://false:1:1:1:1 | false | isVarargsArray | file://ai-manual:1:1:1:1 | ai-manual | alreadyAiModeled | file://sinkModel:1:1:1:1 | sinkModel | extensibleType | | Test.java:43:4:43:22 | get(...) | Related locations: $@, $@, $@.\nmetadata: $@, $@, $@, $@, $@, $@, $@, $@, $@, $@. | Test.java:43:4:43:22 | get(...) | CallContext | Test.java:43:4:43:22 | get(...) | MethodDoc | Test.java:43:4:43:22 | get(...) | ClassDoc | file://java.nio.file:1:1:1:1 | java.nio.file | package | file://Paths:1:1:1:1 | Paths | type | file://false:1:1:1:1 | false | subtypes | file://get:1:1:1:1 | get | name | file://(String,String[]):1:1:1:1 | (String,String[]) | signature | file://:1:1:1:1 | | input | file://ReturnValue:1:1:1:1 | ReturnValue | output | file://false:1:1:1:1 | false | isVarargsArray | file://:1:1:1:1 | | alreadyAiModeled | file://sourceModel:1:1:1:1 | sourceModel | extensibleType | +| Test.java:48:10:50:3 | compareTo(...) | Related locations: $@, $@, $@.\nmetadata: $@, $@, $@, $@, $@, $@, $@, $@, $@, $@. | Test.java:48:10:50:3 | compareTo(...) | CallContext | Test.java:48:10:50:3 | compareTo(...) | MethodDoc | Test.java:48:10:50:3 | compareTo(...) | ClassDoc | file://java.io:1:1:1:1 | java.io | package | file://File:1:1:1:1 | File | type | file://true:1:1:1:1 | true | subtypes | file://compareTo:1:1:1:1 | compareTo | name | file://(File):1:1:1:1 | (File) | signature | file://:1:1:1:1 | | input | file://ReturnValue:1:1:1:1 | ReturnValue | output | file://false:1:1:1:1 | false | isVarargsArray | file://:1:1:1:1 | | alreadyAiModeled | file://sourceModel:1:1:1:1 | sourceModel | extensibleType | | Test.java:54:3:59:3 | walk(...) | Related locations: $@, $@, $@.\nmetadata: $@, $@, $@, $@, $@, $@, $@, $@, $@, $@. | Test.java:54:3:59:3 | walk(...) | CallContext | Test.java:54:3:59:3 | walk(...) | MethodDoc | Test.java:54:3:59:3 | walk(...) | ClassDoc | file://java.nio.file:1:1:1:1 | java.nio.file | package | file://Files:1:1:1:1 | Files | type | file://false:1:1:1:1 | false | subtypes | file://walk:1:1:1:1 | walk | name | file://(Path,FileVisitOption[]):1:1:1:1 | (Path,FileVisitOption[]) | signature | file://:1:1:1:1 | | input | file://ReturnValue:1:1:1:1 | ReturnValue | output | file://false:1:1:1:1 | false | isVarargsArray | file://:1:1:1:1 | | alreadyAiModeled | file://sourceModel:1:1:1:1 | sourceModel | extensibleType | | Test.java:56:4:56:4 | o | Related locations: $@, $@, $@.\nmetadata: $@, $@, $@, $@, $@, $@, $@, $@, $@, $@. | Test.java:54:3:59:3 | walk(...) | CallContext | Test.java:54:3:59:3 | walk(...) | MethodDoc | Test.java:54:3:59:3 | walk(...) | ClassDoc | file://java.nio.file:1:1:1:1 | java.nio.file | package | file://Files:1:1:1:1 | Files | type | file://false:1:1:1:1 | false | subtypes | file://walk:1:1:1:1 | walk | name | file://(Path,FileVisitOption[]):1:1:1:1 | (Path,FileVisitOption[]) | signature | file://Argument[1]:1:1:1:1 | Argument[1] | input | file://:1:1:1:1 | | output | file://true:1:1:1:1 | true | isVarargsArray | file://:1:1:1:1 | | alreadyAiModeled | file://sinkModel:1:1:1:1 | sinkModel | extensibleType | | Test.java:63:3:63:3 | c | Related locations: $@, $@, $@.\nmetadata: $@, $@, $@, $@, $@, $@, $@, $@, $@, $@. | Test.java:63:3:63:20 | getInputStream(...) | CallContext | Test.java:63:3:63:3 | c | MethodDoc | Test.java:63:3:63:3 | c | ClassDoc | file://java.net:1:1:1:1 | java.net | package | file://URLConnection:1:1:1:1 | URLConnection | type | file://true:1:1:1:1 | true | subtypes | file://getInputStream:1:1:1:1 | getInputStream | name | file://():1:1:1:1 | () | signature | file://Argument[this]:1:1:1:1 | Argument[this] | input | file://:1:1:1:1 | | output | file://false:1:1:1:1 | false | isVarargsArray | file://:1:1:1:1 | | alreadyAiModeled | file://sinkModel:1:1:1:1 | sinkModel | extensibleType | | Test.java:68:30:68:47 | writer | Related locations: $@, $@, $@.\nmetadata: $@, $@, $@, $@, $@, $@, $@, $@, $@, $@. | Test.java:68:30:68:47 | writer | CallContext | Test.java:68:30:68:47 | writer | MethodDoc | Test.java:68:30:68:47 | writer | ClassDoc | file://java.lang:1:1:1:1 | java.lang | package | file://Throwable:1:1:1:1 | Throwable | type | file://true:1:1:1:1 | true | subtypes | file://printStackTrace:1:1:1:1 | printStackTrace | name | file://(PrintWriter):1:1:1:1 | (PrintWriter) | signature | file://:1:1:1:1 | | input | file://Parameter[0]:1:1:1:1 | Parameter[0] | output | file://false:1:1:1:1 | false | isVarargsArray | file://:1:1:1:1 | | alreadyAiModeled | file://sourceModel:1:1:1:1 | sourceModel | extensibleType | | Test.java:86:3:88:3 | list(...) | Related locations: $@, $@, $@.\nmetadata: $@, $@, $@, $@, $@, $@, $@, $@, $@, $@. | Test.java:86:3:88:3 | list(...) | CallContext | Test.java:86:3:88:3 | list(...) | MethodDoc | Test.java:86:3:88:3 | list(...) | ClassDoc | file://java.nio.file:1:1:1:1 | java.nio.file | package | file://Files:1:1:1:1 | Files | type | file://false:1:1:1:1 | false | subtypes | file://list:1:1:1:1 | list | name | file://(Path):1:1:1:1 | (Path) | signature | file://:1:1:1:1 | | input | file://ReturnValue:1:1:1:1 | ReturnValue | output | file://false:1:1:1:1 | false | isVarargsArray | file://:1:1:1:1 | | alreadyAiModeled | file://sourceModel:1:1:1:1 | sourceModel | extensibleType | +| Test.java:87:4:87:29 | createDirectories(...) | Related locations: $@, $@, $@.\nmetadata: $@, $@, $@, $@, $@, $@, $@, $@, $@, $@. | Test.java:87:4:87:29 | createDirectories(...) | CallContext | Test.java:87:4:87:29 | createDirectories(...) | MethodDoc | Test.java:87:4:87:29 | createDirectories(...) | ClassDoc | file://java.nio.file:1:1:1:1 | java.nio.file | package | file://Files:1:1:1:1 | Files | type | file://false:1:1:1:1 | false | subtypes | file://createDirectories:1:1:1:1 | createDirectories | name | file://(Path,FileAttribute[]):1:1:1:1 | (Path,FileAttribute[]) | signature | file://:1:1:1:1 | | input | file://ReturnValue:1:1:1:1 | ReturnValue | output | file://false:1:1:1:1 | false | isVarargsArray | file://:1:1:1:1 | | alreadyAiModeled | file://sourceModel:1:1:1:1 | sourceModel | extensibleType | diff --git a/java/ql/automodel/test/AutomodelApplicationModeExtraction/AutomodelApplicationModeExtractNegativeExamples.expected b/java/ql/automodel/test/AutomodelApplicationModeExtraction/AutomodelApplicationModeExtractNegativeExamples.expected index 91f33b6fb05..97f5a39a283 100644 --- a/java/ql/automodel/test/AutomodelApplicationModeExtraction/AutomodelApplicationModeExtractNegativeExamples.expected +++ b/java/ql/automodel/test/AutomodelApplicationModeExtraction/AutomodelApplicationModeExtractNegativeExamples.expected @@ -1,3 +1,2 @@ -| Test.java:48:10:50:3 | compareTo(...) | known sanitizer\nrelated locations: $@, $@, $@.\nmetadata: $@, $@, $@, $@, $@, $@, $@, $@, $@. | Test.java:48:10:50:3 | compareTo(...) | CallContext | Test.java:48:10:50:3 | compareTo(...) | MethodDoc | Test.java:48:10:50:3 | compareTo(...) | ClassDoc | file://java.io:1:1:1:1 | java.io | package | file://File:1:1:1:1 | File | type | file://true:1:1:1:1 | true | subtypes | file://compareTo:1:1:1:1 | compareTo | name | file://(File):1:1:1:1 | (File) | signature | file://:1:1:1:1 | | input | file://ReturnValue:1:1:1:1 | ReturnValue | output | file://false:1:1:1:1 | false | isVarargsArray | file://sourceModel:1:1:1:1 | sourceModel | extensibleType | | Test.java:49:4:49:5 | f2 | known non-sink\nrelated locations: $@, $@, $@.\nmetadata: $@, $@, $@, $@, $@, $@, $@, $@, $@. | Test.java:48:10:50:3 | compareTo(...) | CallContext | Test.java:49:4:49:5 | f2 | MethodDoc | Test.java:49:4:49:5 | f2 | ClassDoc | file://java.io:1:1:1:1 | java.io | package | file://File:1:1:1:1 | File | type | file://true:1:1:1:1 | true | subtypes | file://compareTo:1:1:1:1 | compareTo | name | file://(File):1:1:1:1 | (File) | signature | file://Argument[0]:1:1:1:1 | Argument[0] | input | file://:1:1:1:1 | | output | file://false:1:1:1:1 | false | isVarargsArray | file://sinkModel:1:1:1:1 | sinkModel | extensibleType | | Test.java:55:4:55:4 | p | taint step\nrelated locations: $@, $@, $@.\nmetadata: $@, $@, $@, $@, $@, $@, $@, $@, $@. | Test.java:54:3:59:3 | walk(...) | CallContext | Test.java:55:4:55:4 | p | MethodDoc | Test.java:55:4:55:4 | p | ClassDoc | file://java.nio.file:1:1:1:1 | java.nio.file | package | file://Files:1:1:1:1 | Files | type | file://false:1:1:1:1 | false | subtypes | file://walk:1:1:1:1 | walk | name | file://(Path,FileVisitOption[]):1:1:1:1 | (Path,FileVisitOption[]) | signature | file://Argument[0]:1:1:1:1 | Argument[0] | input | file://:1:1:1:1 | | output | file://false:1:1:1:1 | false | isVarargsArray | file://sinkModel:1:1:1:1 | sinkModel | extensibleType | diff --git a/java/ql/automodel/test/AutomodelFrameworkModeExtraction/AutomodelFrameworkModeExtractCandidates.expected b/java/ql/automodel/test/AutomodelFrameworkModeExtraction/AutomodelFrameworkModeExtractCandidates.expected index 17880ae9e4e..80d354cd4f9 100644 --- a/java/ql/automodel/test/AutomodelFrameworkModeExtraction/AutomodelFrameworkModeExtractCandidates.expected +++ b/java/ql/automodel/test/AutomodelFrameworkModeExtraction/AutomodelFrameworkModeExtractCandidates.expected @@ -14,6 +14,8 @@ | com/github/codeql/test/PublicInterface.java:4:22:4:31 | arg | Related locations: $@, $@.\nmetadata: $@, $@, $@, $@, $@, $@, $@, $@, $@, $@. | com/github/codeql/test/PublicInterface.java:4:22:4:31 | arg | MethodDoc | com/github/codeql/test/PublicInterface.java:4:22:4:31 | arg | ClassDoc | file://com.github.codeql.test:1:1:1:1 | com.github.codeql.test | package | file://PublicInterface:1:1:1:1 | PublicInterface | type | file://true:1:1:1:1 | true | subtypes | file://stuff:1:1:1:1 | stuff | name | file://(String):1:1:1:1 | (String) | signature | file://:1:1:1:1 | | input | file://Parameter[0]:1:1:1:1 | Parameter[0] | output | file://arg:1:1:1:1 | arg | parameterName | file://:1:1:1:1 | | alreadyAiModeled | file://sourceModel:1:1:1:1 | sourceModel | extensibleType | | com/github/codeql/test/PublicInterface.java:4:22:4:31 | arg | Related locations: $@, $@.\nmetadata: $@, $@, $@, $@, $@, $@, $@, $@, $@, $@. | com/github/codeql/test/PublicInterface.java:4:22:4:31 | arg | MethodDoc | com/github/codeql/test/PublicInterface.java:4:22:4:31 | arg | ClassDoc | file://com.github.codeql.test:1:1:1:1 | com.github.codeql.test | package | file://PublicInterface:1:1:1:1 | PublicInterface | type | file://true:1:1:1:1 | true | subtypes | file://stuff:1:1:1:1 | stuff | name | file://(String):1:1:1:1 | (String) | signature | file://Argument[0]:1:1:1:1 | Argument[0] | input | file://:1:1:1:1 | | output | file://arg:1:1:1:1 | arg | parameterName | file://:1:1:1:1 | | alreadyAiModeled | file://sinkModel:1:1:1:1 | sinkModel | extensibleType | | com/github/codeql/test/PublicInterface.java:6:36:6:45 | arg | Related locations: $@, $@.\nmetadata: $@, $@, $@, $@, $@, $@, $@, $@, $@, $@. | com/github/codeql/test/PublicInterface.java:6:36:6:45 | arg | MethodDoc | com/github/codeql/test/PublicInterface.java:6:36:6:45 | arg | ClassDoc | file://com.github.codeql.test:1:1:1:1 | com.github.codeql.test | package | file://PublicInterface:1:1:1:1 | PublicInterface | type | file://false:1:1:1:1 | false | subtypes | file://staticStuff:1:1:1:1 | staticStuff | name | file://(String):1:1:1:1 | (String) | signature | file://Argument[0]:1:1:1:1 | Argument[0] | input | file://:1:1:1:1 | | output | file://arg:1:1:1:1 | arg | parameterName | file://:1:1:1:1 | | alreadyAiModeled | file://sinkModel:1:1:1:1 | sinkModel | extensibleType | +| java/io/File.java:4:16:4:24 | compareTo | Related locations: $@, $@.\nmetadata: $@, $@, $@, $@, $@, $@, $@, $@, $@, $@. | java/io/File.java:4:16:4:24 | compareTo | MethodDoc | java/io/File.java:4:16:4:24 | compareTo | ClassDoc | file://java.io:1:1:1:1 | java.io | package | file://File:1:1:1:1 | File | type | file://true:1:1:1:1 | true | subtypes | file://compareTo:1:1:1:1 | compareTo | name | file://(File):1:1:1:1 | (File) | signature | file://:1:1:1:1 | | input | file://Parameter[this]:1:1:1:1 | Parameter[this] | output | file://this:1:1:1:1 | this | parameterName | file://:1:1:1:1 | | alreadyAiModeled | file://sourceModel:1:1:1:1 | sourceModel | extensibleType | +| java/io/File.java:5:9:5:21 | pathname | Related locations: $@, $@.\nmetadata: $@, $@, $@, $@, $@, $@, $@, $@, $@, $@. | java/io/File.java:5:9:5:21 | pathname | MethodDoc | java/io/File.java:5:9:5:21 | pathname | ClassDoc | file://java.io:1:1:1:1 | java.io | package | file://File:1:1:1:1 | File | type | file://true:1:1:1:1 | true | subtypes | file://compareTo:1:1:1:1 | compareTo | name | file://(File):1:1:1:1 | (File) | signature | file://:1:1:1:1 | | input | file://Parameter[0]:1:1:1:1 | Parameter[0] | output | file://pathname:1:1:1:1 | pathname | parameterName | file://:1:1:1:1 | | alreadyAiModeled | file://sourceModel:1:1:1:1 | sourceModel | extensibleType | | java/io/File.java:10:20:10:34 | setLastModified | Related locations: $@, $@.\nmetadata: $@, $@, $@, $@, $@, $@, $@, $@, $@, $@. | java/io/File.java:10:20:10:34 | setLastModified | MethodDoc | java/io/File.java:10:20:10:34 | setLastModified | ClassDoc | file://java.io:1:1:1:1 | java.io | package | file://File:1:1:1:1 | File | type | file://true:1:1:1:1 | true | subtypes | file://setLastModified:1:1:1:1 | setLastModified | name | file://(long):1:1:1:1 | (long) | signature | file://:1:1:1:1 | | input | file://Parameter[this]:1:1:1:1 | Parameter[this] | output | file://this:1:1:1:1 | this | parameterName | file://:1:1:1:1 | | alreadyAiModeled | file://sourceModel:1:1:1:1 | sourceModel | extensibleType | | java/io/File.java:10:20:10:34 | setLastModified | Related locations: $@, $@.\nmetadata: $@, $@, $@, $@, $@, $@, $@, $@, $@, $@. | java/io/File.java:10:20:10:34 | setLastModified | MethodDoc | java/io/File.java:10:20:10:34 | setLastModified | ClassDoc | file://java.io:1:1:1:1 | java.io | package | file://File:1:1:1:1 | File | type | file://true:1:1:1:1 | true | subtypes | file://setLastModified:1:1:1:1 | setLastModified | name | file://(long):1:1:1:1 | (long) | signature | file://Argument[this]:1:1:1:1 | Argument[this] | input | file://:1:1:1:1 | | output | file://this:1:1:1:1 | this | parameterName | file://:1:1:1:1 | | alreadyAiModeled | file://sinkModel:1:1:1:1 | sinkModel | extensibleType | | java/io/File.java:10:36:10:44 | time | Related locations: $@, $@.\nmetadata: $@, $@, $@, $@, $@, $@, $@, $@, $@, $@. | java/io/File.java:10:36:10:44 | time | MethodDoc | java/io/File.java:10:36:10:44 | time | ClassDoc | file://java.io:1:1:1:1 | java.io | package | file://File:1:1:1:1 | File | type | file://true:1:1:1:1 | true | subtypes | file://setLastModified:1:1:1:1 | setLastModified | name | file://(long):1:1:1:1 | (long) | signature | file://:1:1:1:1 | | input | file://Parameter[0]:1:1:1:1 | Parameter[0] | output | file://time:1:1:1:1 | time | parameterName | file://:1:1:1:1 | | alreadyAiModeled | file://sourceModel:1:1:1:1 | sourceModel | extensibleType | diff --git a/java/ql/automodel/test/AutomodelFrameworkModeExtraction/AutomodelFrameworkModeExtractNegativeExamples.expected b/java/ql/automodel/test/AutomodelFrameworkModeExtraction/AutomodelFrameworkModeExtractNegativeExamples.expected index 5e01f052425..7c7fe623d09 100644 --- a/java/ql/automodel/test/AutomodelFrameworkModeExtraction/AutomodelFrameworkModeExtractNegativeExamples.expected +++ b/java/ql/automodel/test/AutomodelFrameworkModeExtraction/AutomodelFrameworkModeExtractNegativeExamples.expected @@ -1,4 +1,2 @@ -| java/io/File.java:4:16:4:24 | compareTo | known non-sink\nrelated locations: $@, $@.\nmetadata: $@, $@, $@, $@, $@, $@, $@, $@, $@. | java/io/File.java:4:16:4:24 | compareTo | MethodDoc | java/io/File.java:4:16:4:24 | compareTo | ClassDoc | file://java.io:1:1:1:1 | java.io | package | file://File:1:1:1:1 | File | type | file://true:1:1:1:1 | true | subtypes | file://compareTo:1:1:1:1 | compareTo | name | file://(File):1:1:1:1 | (File) | signature | file://:1:1:1:1 | | input | file://Parameter[this]:1:1:1:1 | Parameter[this] | output | file://this:1:1:1:1 | this | parameterName | file://sourceModel:1:1:1:1 | sourceModel | extensibleType | | java/io/File.java:4:16:4:24 | compareTo | known non-sink\nrelated locations: $@, $@.\nmetadata: $@, $@, $@, $@, $@, $@, $@, $@, $@. | java/io/File.java:4:16:4:24 | compareTo | MethodDoc | java/io/File.java:4:16:4:24 | compareTo | ClassDoc | file://java.io:1:1:1:1 | java.io | package | file://File:1:1:1:1 | File | type | file://true:1:1:1:1 | true | subtypes | file://compareTo:1:1:1:1 | compareTo | name | file://(File):1:1:1:1 | (File) | signature | file://Argument[this]:1:1:1:1 | Argument[this] | input | file://:1:1:1:1 | | output | file://this:1:1:1:1 | this | parameterName | file://sinkModel:1:1:1:1 | sinkModel | extensibleType | -| java/io/File.java:5:9:5:21 | pathname | known non-sink\nrelated locations: $@, $@.\nmetadata: $@, $@, $@, $@, $@, $@, $@, $@, $@. | java/io/File.java:5:9:5:21 | pathname | MethodDoc | java/io/File.java:5:9:5:21 | pathname | ClassDoc | file://java.io:1:1:1:1 | java.io | package | file://File:1:1:1:1 | File | type | file://true:1:1:1:1 | true | subtypes | file://compareTo:1:1:1:1 | compareTo | name | file://(File):1:1:1:1 | (File) | signature | file://:1:1:1:1 | | input | file://Parameter[0]:1:1:1:1 | Parameter[0] | output | file://pathname:1:1:1:1 | pathname | parameterName | file://sourceModel:1:1:1:1 | sourceModel | extensibleType | | java/io/File.java:5:9:5:21 | pathname | known non-sink\nrelated locations: $@, $@.\nmetadata: $@, $@, $@, $@, $@, $@, $@, $@, $@. | java/io/File.java:5:9:5:21 | pathname | MethodDoc | java/io/File.java:5:9:5:21 | pathname | ClassDoc | file://java.io:1:1:1:1 | java.io | package | file://File:1:1:1:1 | File | type | file://true:1:1:1:1 | true | subtypes | file://compareTo:1:1:1:1 | compareTo | name | file://(File):1:1:1:1 | (File) | signature | file://Argument[0]:1:1:1:1 | Argument[0] | input | file://:1:1:1:1 | | output | file://pathname:1:1:1:1 | pathname | parameterName | file://sinkModel:1:1:1:1 | sinkModel | extensibleType | From 6e9c90a6bb60d24c0be885c8b5c222aa28b8dc8d Mon Sep 17 00:00:00 2001 From: Max Schaefer Date: Thu, 11 Jan 2024 12:36:48 +0000 Subject: [PATCH 06/18] Properly distinguish negative source and sink characteristics. In particular, `IsSanitizerCharacteristic` is a negative _source_ characteristic (not a negative sink characteristic), while `NeutralModelCharacteristic` is both. This eliminates the erroneous test results. --- .../src/AutomodelSharedCharacteristics.qll | 37 ++++++++++++++++--- ...lApplicationModeExtractCandidates.expected | 1 - ...cationModeExtractNegativeExamples.expected | 3 +- ...delFrameworkModeExtractCandidates.expected | 2 - ...meworkModeExtractNegativeExamples.expected | 6 ++- 5 files changed, 38 insertions(+), 11 deletions(-) diff --git a/java/ql/automodel/src/AutomodelSharedCharacteristics.qll b/java/ql/automodel/src/AutomodelSharedCharacteristics.qll index 5a6876a347e..6712eb6d1b8 100644 --- a/java/ql/automodel/src/AutomodelSharedCharacteristics.qll +++ b/java/ql/automodel/src/AutomodelSharedCharacteristics.qll @@ -252,6 +252,23 @@ module SharedCharacteristics { } } + /** + * A high-confidence characteristic that indicates that an endpoint is not a source of any type. These endpoints can be + * used as negative samples for training or for a few-shot prompt. + */ + abstract class NotASourceCharacteristic extends EndpointCharacteristic { + bindingset[this] + NotASourceCharacteristic() { any() } + + override predicate hasImplications( + Candidate::EndpointType endpointType, boolean isPositiveIndicator, float confidence + ) { + endpointType instanceof Candidate::SourceType and + isPositiveIndicator = false and + confidence = highConfidence() + } + } + /** * A medium-confidence characteristic that indicates that an endpoint is unlikely to be a sink of any type. These * endpoints can be excluded from scoring at inference time, both to save time and to avoid false positives. They should @@ -340,17 +357,27 @@ module SharedCharacteristics { /** * A negative characteristic that indicates that an endpoint was manually modeled as a neutral model. */ - private class NeutralModelCharacteristic extends NotASinkCharacteristic { - NeutralModelCharacteristic() { this = "known non-sink" } + private class NeutralModelCharacteristic extends NotASinkCharacteristic, + NotASourceCharacteristic + { + NeutralModelCharacteristic() { this = "known non-endpoint" } + + // this is a negative characteristic for both sinks and sources + override predicate hasImplications( + Candidate::EndpointType endpointType, boolean isPositiveIndicator, float confidence + ) { + NotASinkCharacteristic.super.hasImplications(endpointType, isPositiveIndicator, confidence) or + NotASourceCharacteristic.super + .hasImplications(endpointType, isPositiveIndicator, confidence) + } override predicate appliesToEndpoint(Candidate::Endpoint e) { Candidate::isNeutral(e) } } /** - * A negative characteristic that indicates that an endpoint is not part of the source code for the project being - * analyzed. + * A negative characteristic that indicates that an endpoint is a sanitizer, and thus not a source. */ - private class IsSanitizerCharacteristic extends NotASinkCharacteristic { + private class IsSanitizerCharacteristic extends NotASourceCharacteristic { IsSanitizerCharacteristic() { this = "known sanitizer" } override predicate appliesToEndpoint(Candidate::Endpoint e) { Candidate::isSanitizer(e, _) } diff --git a/java/ql/automodel/test/AutomodelApplicationModeExtraction/AutomodelApplicationModeExtractCandidates.expected b/java/ql/automodel/test/AutomodelApplicationModeExtraction/AutomodelApplicationModeExtractCandidates.expected index f55f9be7976..41598f8858c 100644 --- a/java/ql/automodel/test/AutomodelApplicationModeExtraction/AutomodelApplicationModeExtractCandidates.expected +++ b/java/ql/automodel/test/AutomodelApplicationModeExtraction/AutomodelApplicationModeExtractCandidates.expected @@ -7,7 +7,6 @@ | Test.java:36:10:38:3 | newInputStream(...) | Related locations: $@, $@, $@.\nmetadata: $@, $@, $@, $@, $@, $@, $@, $@, $@, $@. | Test.java:36:10:38:3 | newInputStream(...) | CallContext | Test.java:36:10:38:3 | newInputStream(...) | MethodDoc | Test.java:36:10:38:3 | newInputStream(...) | ClassDoc | file://java.nio.file:1:1:1:1 | java.nio.file | package | file://Files:1:1:1:1 | Files | type | file://false:1:1:1:1 | false | subtypes | file://newInputStream:1:1:1:1 | newInputStream | name | file://(Path,OpenOption[]):1:1:1:1 | (Path,OpenOption[]) | signature | file://:1:1:1:1 | | input | file://ReturnValue:1:1:1:1 | ReturnValue | output | file://false:1:1:1:1 | false | isVarargsArray | file://:1:1:1:1 | | alreadyAiModeled | file://sourceModel:1:1:1:1 | sourceModel | extensibleType | | Test.java:37:4:37:11 | openPath | Related locations: $@, $@, $@.\nmetadata: $@, $@, $@, $@, $@, $@, $@, $@, $@, $@. | Test.java:36:10:38:3 | newInputStream(...) | CallContext | Test.java:37:4:37:11 | openPath | MethodDoc | Test.java:37:4:37:11 | openPath | ClassDoc | file://java.nio.file:1:1:1:1 | java.nio.file | package | file://Files:1:1:1:1 | Files | type | file://false:1:1:1:1 | false | subtypes | file://newInputStream:1:1:1:1 | newInputStream | name | file://(Path,OpenOption[]):1:1:1:1 | (Path,OpenOption[]) | signature | file://Argument[0]:1:1:1:1 | Argument[0] | input | file://:1:1:1:1 | | output | file://false:1:1:1:1 | false | isVarargsArray | file://ai-manual:1:1:1:1 | ai-manual | alreadyAiModeled | file://sinkModel:1:1:1:1 | sinkModel | extensibleType | | Test.java:43:4:43:22 | get(...) | Related locations: $@, $@, $@.\nmetadata: $@, $@, $@, $@, $@, $@, $@, $@, $@, $@. | Test.java:43:4:43:22 | get(...) | CallContext | Test.java:43:4:43:22 | get(...) | MethodDoc | Test.java:43:4:43:22 | get(...) | ClassDoc | file://java.nio.file:1:1:1:1 | java.nio.file | package | file://Paths:1:1:1:1 | Paths | type | file://false:1:1:1:1 | false | subtypes | file://get:1:1:1:1 | get | name | file://(String,String[]):1:1:1:1 | (String,String[]) | signature | file://:1:1:1:1 | | input | file://ReturnValue:1:1:1:1 | ReturnValue | output | file://false:1:1:1:1 | false | isVarargsArray | file://:1:1:1:1 | | alreadyAiModeled | file://sourceModel:1:1:1:1 | sourceModel | extensibleType | -| Test.java:48:10:50:3 | compareTo(...) | Related locations: $@, $@, $@.\nmetadata: $@, $@, $@, $@, $@, $@, $@, $@, $@, $@. | Test.java:48:10:50:3 | compareTo(...) | CallContext | Test.java:48:10:50:3 | compareTo(...) | MethodDoc | Test.java:48:10:50:3 | compareTo(...) | ClassDoc | file://java.io:1:1:1:1 | java.io | package | file://File:1:1:1:1 | File | type | file://true:1:1:1:1 | true | subtypes | file://compareTo:1:1:1:1 | compareTo | name | file://(File):1:1:1:1 | (File) | signature | file://:1:1:1:1 | | input | file://ReturnValue:1:1:1:1 | ReturnValue | output | file://false:1:1:1:1 | false | isVarargsArray | file://:1:1:1:1 | | alreadyAiModeled | file://sourceModel:1:1:1:1 | sourceModel | extensibleType | | Test.java:54:3:59:3 | walk(...) | Related locations: $@, $@, $@.\nmetadata: $@, $@, $@, $@, $@, $@, $@, $@, $@, $@. | Test.java:54:3:59:3 | walk(...) | CallContext | Test.java:54:3:59:3 | walk(...) | MethodDoc | Test.java:54:3:59:3 | walk(...) | ClassDoc | file://java.nio.file:1:1:1:1 | java.nio.file | package | file://Files:1:1:1:1 | Files | type | file://false:1:1:1:1 | false | subtypes | file://walk:1:1:1:1 | walk | name | file://(Path,FileVisitOption[]):1:1:1:1 | (Path,FileVisitOption[]) | signature | file://:1:1:1:1 | | input | file://ReturnValue:1:1:1:1 | ReturnValue | output | file://false:1:1:1:1 | false | isVarargsArray | file://:1:1:1:1 | | alreadyAiModeled | file://sourceModel:1:1:1:1 | sourceModel | extensibleType | | Test.java:56:4:56:4 | o | Related locations: $@, $@, $@.\nmetadata: $@, $@, $@, $@, $@, $@, $@, $@, $@, $@. | Test.java:54:3:59:3 | walk(...) | CallContext | Test.java:54:3:59:3 | walk(...) | MethodDoc | Test.java:54:3:59:3 | walk(...) | ClassDoc | file://java.nio.file:1:1:1:1 | java.nio.file | package | file://Files:1:1:1:1 | Files | type | file://false:1:1:1:1 | false | subtypes | file://walk:1:1:1:1 | walk | name | file://(Path,FileVisitOption[]):1:1:1:1 | (Path,FileVisitOption[]) | signature | file://Argument[1]:1:1:1:1 | Argument[1] | input | file://:1:1:1:1 | | output | file://true:1:1:1:1 | true | isVarargsArray | file://:1:1:1:1 | | alreadyAiModeled | file://sinkModel:1:1:1:1 | sinkModel | extensibleType | | Test.java:63:3:63:3 | c | Related locations: $@, $@, $@.\nmetadata: $@, $@, $@, $@, $@, $@, $@, $@, $@, $@. | Test.java:63:3:63:20 | getInputStream(...) | CallContext | Test.java:63:3:63:3 | c | MethodDoc | Test.java:63:3:63:3 | c | ClassDoc | file://java.net:1:1:1:1 | java.net | package | file://URLConnection:1:1:1:1 | URLConnection | type | file://true:1:1:1:1 | true | subtypes | file://getInputStream:1:1:1:1 | getInputStream | name | file://():1:1:1:1 | () | signature | file://Argument[this]:1:1:1:1 | Argument[this] | input | file://:1:1:1:1 | | output | file://false:1:1:1:1 | false | isVarargsArray | file://:1:1:1:1 | | alreadyAiModeled | file://sinkModel:1:1:1:1 | sinkModel | extensibleType | diff --git a/java/ql/automodel/test/AutomodelApplicationModeExtraction/AutomodelApplicationModeExtractNegativeExamples.expected b/java/ql/automodel/test/AutomodelApplicationModeExtraction/AutomodelApplicationModeExtractNegativeExamples.expected index 97f5a39a283..0118ef09719 100644 --- a/java/ql/automodel/test/AutomodelApplicationModeExtraction/AutomodelApplicationModeExtractNegativeExamples.expected +++ b/java/ql/automodel/test/AutomodelApplicationModeExtraction/AutomodelApplicationModeExtractNegativeExamples.expected @@ -1,2 +1,3 @@ -| Test.java:49:4:49:5 | f2 | known non-sink\nrelated locations: $@, $@, $@.\nmetadata: $@, $@, $@, $@, $@, $@, $@, $@, $@. | Test.java:48:10:50:3 | compareTo(...) | CallContext | Test.java:49:4:49:5 | f2 | MethodDoc | Test.java:49:4:49:5 | f2 | ClassDoc | file://java.io:1:1:1:1 | java.io | package | file://File:1:1:1:1 | File | type | file://true:1:1:1:1 | true | subtypes | file://compareTo:1:1:1:1 | compareTo | name | file://(File):1:1:1:1 | (File) | signature | file://Argument[0]:1:1:1:1 | Argument[0] | input | file://:1:1:1:1 | | output | file://false:1:1:1:1 | false | isVarargsArray | file://sinkModel:1:1:1:1 | sinkModel | extensibleType | +| Test.java:48:10:50:3 | compareTo(...) | known sanitizer\nrelated locations: $@, $@, $@.\nmetadata: $@, $@, $@, $@, $@, $@, $@, $@, $@. | Test.java:48:10:50:3 | compareTo(...) | CallContext | Test.java:48:10:50:3 | compareTo(...) | MethodDoc | Test.java:48:10:50:3 | compareTo(...) | ClassDoc | file://java.io:1:1:1:1 | java.io | package | file://File:1:1:1:1 | File | type | file://true:1:1:1:1 | true | subtypes | file://compareTo:1:1:1:1 | compareTo | name | file://(File):1:1:1:1 | (File) | signature | file://:1:1:1:1 | | input | file://ReturnValue:1:1:1:1 | ReturnValue | output | file://false:1:1:1:1 | false | isVarargsArray | file://sourceModel:1:1:1:1 | sourceModel | extensibleType | +| Test.java:49:4:49:5 | f2 | known non-endpoint\nrelated locations: $@, $@, $@.\nmetadata: $@, $@, $@, $@, $@, $@, $@, $@, $@. | Test.java:48:10:50:3 | compareTo(...) | CallContext | Test.java:49:4:49:5 | f2 | MethodDoc | Test.java:49:4:49:5 | f2 | ClassDoc | file://java.io:1:1:1:1 | java.io | package | file://File:1:1:1:1 | File | type | file://true:1:1:1:1 | true | subtypes | file://compareTo:1:1:1:1 | compareTo | name | file://(File):1:1:1:1 | (File) | signature | file://Argument[0]:1:1:1:1 | Argument[0] | input | file://:1:1:1:1 | | output | file://false:1:1:1:1 | false | isVarargsArray | file://sinkModel:1:1:1:1 | sinkModel | extensibleType | | Test.java:55:4:55:4 | p | taint step\nrelated locations: $@, $@, $@.\nmetadata: $@, $@, $@, $@, $@, $@, $@, $@, $@. | Test.java:54:3:59:3 | walk(...) | CallContext | Test.java:55:4:55:4 | p | MethodDoc | Test.java:55:4:55:4 | p | ClassDoc | file://java.nio.file:1:1:1:1 | java.nio.file | package | file://Files:1:1:1:1 | Files | type | file://false:1:1:1:1 | false | subtypes | file://walk:1:1:1:1 | walk | name | file://(Path,FileVisitOption[]):1:1:1:1 | (Path,FileVisitOption[]) | signature | file://Argument[0]:1:1:1:1 | Argument[0] | input | file://:1:1:1:1 | | output | file://false:1:1:1:1 | false | isVarargsArray | file://sinkModel:1:1:1:1 | sinkModel | extensibleType | diff --git a/java/ql/automodel/test/AutomodelFrameworkModeExtraction/AutomodelFrameworkModeExtractCandidates.expected b/java/ql/automodel/test/AutomodelFrameworkModeExtraction/AutomodelFrameworkModeExtractCandidates.expected index 80d354cd4f9..17880ae9e4e 100644 --- a/java/ql/automodel/test/AutomodelFrameworkModeExtraction/AutomodelFrameworkModeExtractCandidates.expected +++ b/java/ql/automodel/test/AutomodelFrameworkModeExtraction/AutomodelFrameworkModeExtractCandidates.expected @@ -14,8 +14,6 @@ | com/github/codeql/test/PublicInterface.java:4:22:4:31 | arg | Related locations: $@, $@.\nmetadata: $@, $@, $@, $@, $@, $@, $@, $@, $@, $@. | com/github/codeql/test/PublicInterface.java:4:22:4:31 | arg | MethodDoc | com/github/codeql/test/PublicInterface.java:4:22:4:31 | arg | ClassDoc | file://com.github.codeql.test:1:1:1:1 | com.github.codeql.test | package | file://PublicInterface:1:1:1:1 | PublicInterface | type | file://true:1:1:1:1 | true | subtypes | file://stuff:1:1:1:1 | stuff | name | file://(String):1:1:1:1 | (String) | signature | file://:1:1:1:1 | | input | file://Parameter[0]:1:1:1:1 | Parameter[0] | output | file://arg:1:1:1:1 | arg | parameterName | file://:1:1:1:1 | | alreadyAiModeled | file://sourceModel:1:1:1:1 | sourceModel | extensibleType | | com/github/codeql/test/PublicInterface.java:4:22:4:31 | arg | Related locations: $@, $@.\nmetadata: $@, $@, $@, $@, $@, $@, $@, $@, $@, $@. | com/github/codeql/test/PublicInterface.java:4:22:4:31 | arg | MethodDoc | com/github/codeql/test/PublicInterface.java:4:22:4:31 | arg | ClassDoc | file://com.github.codeql.test:1:1:1:1 | com.github.codeql.test | package | file://PublicInterface:1:1:1:1 | PublicInterface | type | file://true:1:1:1:1 | true | subtypes | file://stuff:1:1:1:1 | stuff | name | file://(String):1:1:1:1 | (String) | signature | file://Argument[0]:1:1:1:1 | Argument[0] | input | file://:1:1:1:1 | | output | file://arg:1:1:1:1 | arg | parameterName | file://:1:1:1:1 | | alreadyAiModeled | file://sinkModel:1:1:1:1 | sinkModel | extensibleType | | com/github/codeql/test/PublicInterface.java:6:36:6:45 | arg | Related locations: $@, $@.\nmetadata: $@, $@, $@, $@, $@, $@, $@, $@, $@, $@. | com/github/codeql/test/PublicInterface.java:6:36:6:45 | arg | MethodDoc | com/github/codeql/test/PublicInterface.java:6:36:6:45 | arg | ClassDoc | file://com.github.codeql.test:1:1:1:1 | com.github.codeql.test | package | file://PublicInterface:1:1:1:1 | PublicInterface | type | file://false:1:1:1:1 | false | subtypes | file://staticStuff:1:1:1:1 | staticStuff | name | file://(String):1:1:1:1 | (String) | signature | file://Argument[0]:1:1:1:1 | Argument[0] | input | file://:1:1:1:1 | | output | file://arg:1:1:1:1 | arg | parameterName | file://:1:1:1:1 | | alreadyAiModeled | file://sinkModel:1:1:1:1 | sinkModel | extensibleType | -| java/io/File.java:4:16:4:24 | compareTo | Related locations: $@, $@.\nmetadata: $@, $@, $@, $@, $@, $@, $@, $@, $@, $@. | java/io/File.java:4:16:4:24 | compareTo | MethodDoc | java/io/File.java:4:16:4:24 | compareTo | ClassDoc | file://java.io:1:1:1:1 | java.io | package | file://File:1:1:1:1 | File | type | file://true:1:1:1:1 | true | subtypes | file://compareTo:1:1:1:1 | compareTo | name | file://(File):1:1:1:1 | (File) | signature | file://:1:1:1:1 | | input | file://Parameter[this]:1:1:1:1 | Parameter[this] | output | file://this:1:1:1:1 | this | parameterName | file://:1:1:1:1 | | alreadyAiModeled | file://sourceModel:1:1:1:1 | sourceModel | extensibleType | -| java/io/File.java:5:9:5:21 | pathname | Related locations: $@, $@.\nmetadata: $@, $@, $@, $@, $@, $@, $@, $@, $@, $@. | java/io/File.java:5:9:5:21 | pathname | MethodDoc | java/io/File.java:5:9:5:21 | pathname | ClassDoc | file://java.io:1:1:1:1 | java.io | package | file://File:1:1:1:1 | File | type | file://true:1:1:1:1 | true | subtypes | file://compareTo:1:1:1:1 | compareTo | name | file://(File):1:1:1:1 | (File) | signature | file://:1:1:1:1 | | input | file://Parameter[0]:1:1:1:1 | Parameter[0] | output | file://pathname:1:1:1:1 | pathname | parameterName | file://:1:1:1:1 | | alreadyAiModeled | file://sourceModel:1:1:1:1 | sourceModel | extensibleType | | java/io/File.java:10:20:10:34 | setLastModified | Related locations: $@, $@.\nmetadata: $@, $@, $@, $@, $@, $@, $@, $@, $@, $@. | java/io/File.java:10:20:10:34 | setLastModified | MethodDoc | java/io/File.java:10:20:10:34 | setLastModified | ClassDoc | file://java.io:1:1:1:1 | java.io | package | file://File:1:1:1:1 | File | type | file://true:1:1:1:1 | true | subtypes | file://setLastModified:1:1:1:1 | setLastModified | name | file://(long):1:1:1:1 | (long) | signature | file://:1:1:1:1 | | input | file://Parameter[this]:1:1:1:1 | Parameter[this] | output | file://this:1:1:1:1 | this | parameterName | file://:1:1:1:1 | | alreadyAiModeled | file://sourceModel:1:1:1:1 | sourceModel | extensibleType | | java/io/File.java:10:20:10:34 | setLastModified | Related locations: $@, $@.\nmetadata: $@, $@, $@, $@, $@, $@, $@, $@, $@, $@. | java/io/File.java:10:20:10:34 | setLastModified | MethodDoc | java/io/File.java:10:20:10:34 | setLastModified | ClassDoc | file://java.io:1:1:1:1 | java.io | package | file://File:1:1:1:1 | File | type | file://true:1:1:1:1 | true | subtypes | file://setLastModified:1:1:1:1 | setLastModified | name | file://(long):1:1:1:1 | (long) | signature | file://Argument[this]:1:1:1:1 | Argument[this] | input | file://:1:1:1:1 | | output | file://this:1:1:1:1 | this | parameterName | file://:1:1:1:1 | | alreadyAiModeled | file://sinkModel:1:1:1:1 | sinkModel | extensibleType | | java/io/File.java:10:36:10:44 | time | Related locations: $@, $@.\nmetadata: $@, $@, $@, $@, $@, $@, $@, $@, $@, $@. | java/io/File.java:10:36:10:44 | time | MethodDoc | java/io/File.java:10:36:10:44 | time | ClassDoc | file://java.io:1:1:1:1 | java.io | package | file://File:1:1:1:1 | File | type | file://true:1:1:1:1 | true | subtypes | file://setLastModified:1:1:1:1 | setLastModified | name | file://(long):1:1:1:1 | (long) | signature | file://:1:1:1:1 | | input | file://Parameter[0]:1:1:1:1 | Parameter[0] | output | file://time:1:1:1:1 | time | parameterName | file://:1:1:1:1 | | alreadyAiModeled | file://sourceModel:1:1:1:1 | sourceModel | extensibleType | diff --git a/java/ql/automodel/test/AutomodelFrameworkModeExtraction/AutomodelFrameworkModeExtractNegativeExamples.expected b/java/ql/automodel/test/AutomodelFrameworkModeExtraction/AutomodelFrameworkModeExtractNegativeExamples.expected index 7c7fe623d09..5ede383087c 100644 --- a/java/ql/automodel/test/AutomodelFrameworkModeExtraction/AutomodelFrameworkModeExtractNegativeExamples.expected +++ b/java/ql/automodel/test/AutomodelFrameworkModeExtraction/AutomodelFrameworkModeExtractNegativeExamples.expected @@ -1,2 +1,4 @@ -| java/io/File.java:4:16:4:24 | compareTo | known non-sink\nrelated locations: $@, $@.\nmetadata: $@, $@, $@, $@, $@, $@, $@, $@, $@. | java/io/File.java:4:16:4:24 | compareTo | MethodDoc | java/io/File.java:4:16:4:24 | compareTo | ClassDoc | file://java.io:1:1:1:1 | java.io | package | file://File:1:1:1:1 | File | type | file://true:1:1:1:1 | true | subtypes | file://compareTo:1:1:1:1 | compareTo | name | file://(File):1:1:1:1 | (File) | signature | file://Argument[this]:1:1:1:1 | Argument[this] | input | file://:1:1:1:1 | | output | file://this:1:1:1:1 | this | parameterName | file://sinkModel:1:1:1:1 | sinkModel | extensibleType | -| java/io/File.java:5:9:5:21 | pathname | known non-sink\nrelated locations: $@, $@.\nmetadata: $@, $@, $@, $@, $@, $@, $@, $@, $@. | java/io/File.java:5:9:5:21 | pathname | MethodDoc | java/io/File.java:5:9:5:21 | pathname | ClassDoc | file://java.io:1:1:1:1 | java.io | package | file://File:1:1:1:1 | File | type | file://true:1:1:1:1 | true | subtypes | file://compareTo:1:1:1:1 | compareTo | name | file://(File):1:1:1:1 | (File) | signature | file://Argument[0]:1:1:1:1 | Argument[0] | input | file://:1:1:1:1 | | output | file://pathname:1:1:1:1 | pathname | parameterName | file://sinkModel:1:1:1:1 | sinkModel | extensibleType | +| java/io/File.java:4:16:4:24 | compareTo | known non-endpoint\nrelated locations: $@, $@.\nmetadata: $@, $@, $@, $@, $@, $@, $@, $@, $@. | java/io/File.java:4:16:4:24 | compareTo | MethodDoc | java/io/File.java:4:16:4:24 | compareTo | ClassDoc | file://java.io:1:1:1:1 | java.io | package | file://File:1:1:1:1 | File | type | file://true:1:1:1:1 | true | subtypes | file://compareTo:1:1:1:1 | compareTo | name | file://(File):1:1:1:1 | (File) | signature | file://:1:1:1:1 | | input | file://Parameter[this]:1:1:1:1 | Parameter[this] | output | file://this:1:1:1:1 | this | parameterName | file://sourceModel:1:1:1:1 | sourceModel | extensibleType | +| java/io/File.java:4:16:4:24 | compareTo | known non-endpoint\nrelated locations: $@, $@.\nmetadata: $@, $@, $@, $@, $@, $@, $@, $@, $@. | java/io/File.java:4:16:4:24 | compareTo | MethodDoc | java/io/File.java:4:16:4:24 | compareTo | ClassDoc | file://java.io:1:1:1:1 | java.io | package | file://File:1:1:1:1 | File | type | file://true:1:1:1:1 | true | subtypes | file://compareTo:1:1:1:1 | compareTo | name | file://(File):1:1:1:1 | (File) | signature | file://Argument[this]:1:1:1:1 | Argument[this] | input | file://:1:1:1:1 | | output | file://this:1:1:1:1 | this | parameterName | file://sinkModel:1:1:1:1 | sinkModel | extensibleType | +| java/io/File.java:5:9:5:21 | pathname | known non-endpoint\nrelated locations: $@, $@.\nmetadata: $@, $@, $@, $@, $@, $@, $@, $@, $@. | java/io/File.java:5:9:5:21 | pathname | MethodDoc | java/io/File.java:5:9:5:21 | pathname | ClassDoc | file://java.io:1:1:1:1 | java.io | package | file://File:1:1:1:1 | File | type | file://true:1:1:1:1 | true | subtypes | file://compareTo:1:1:1:1 | compareTo | name | file://(File):1:1:1:1 | (File) | signature | file://:1:1:1:1 | | input | file://Parameter[0]:1:1:1:1 | Parameter[0] | output | file://pathname:1:1:1:1 | pathname | parameterName | file://sourceModel:1:1:1:1 | sourceModel | extensibleType | +| java/io/File.java:5:9:5:21 | pathname | known non-endpoint\nrelated locations: $@, $@.\nmetadata: $@, $@, $@, $@, $@, $@, $@, $@, $@. | java/io/File.java:5:9:5:21 | pathname | MethodDoc | java/io/File.java:5:9:5:21 | pathname | ClassDoc | file://java.io:1:1:1:1 | java.io | package | file://File:1:1:1:1 | File | type | file://true:1:1:1:1 | true | subtypes | file://compareTo:1:1:1:1 | compareTo | name | file://(File):1:1:1:1 | (File) | signature | file://Argument[0]:1:1:1:1 | Argument[0] | input | file://:1:1:1:1 | | output | file://pathname:1:1:1:1 | pathname | parameterName | file://sinkModel:1:1:1:1 | sinkModel | extensibleType | From 9f443d4f83f305da0c2b2d7729c1ebe61ce8e57c Mon Sep 17 00:00:00 2001 From: Max Schaefer Date: Fri, 12 Jan 2024 09:55:37 +0000 Subject: [PATCH 07/18] Make `Unexploitable*Characteristic` more precise. --- ...utomodelApplicationModeCharacteristics.qll | 33 ++++++++++++++----- .../AutomodelFrameworkModeCharacteristics.qll | 30 ++++++++++++----- .../src/AutomodelSharedCharacteristics.qll | 28 +++++++++------- ...delFrameworkModeExtractCandidates.expected | 2 ++ ...meworkModeExtractNegativeExamples.expected | 3 ++ .../com/github/codeql/test/PublicClass.java | 6 +++- 6 files changed, 73 insertions(+), 29 deletions(-) diff --git a/java/ql/automodel/src/AutomodelApplicationModeCharacteristics.qll b/java/ql/automodel/src/AutomodelApplicationModeCharacteristics.qll index d00a95d564e..c91ea4a4e23 100644 --- a/java/ql/automodel/src/AutomodelApplicationModeCharacteristics.qll +++ b/java/ql/automodel/src/AutomodelApplicationModeCharacteristics.qll @@ -416,7 +416,8 @@ class ApplicationModeMetadataExtractor extends string { */ /** - * A negative characteristic that indicates that an is-style boolean method is unexploitable even if it is a sink. + * A negative characteristic that indicates that parameters of an is-style boolean method should not be considered sinks, + * and its return value should not be considered a source. * * A sink is highly unlikely to be exploitable if its callable's name starts with `is` and the callable has a boolean return * type (e.g. `isDirectory`). These kinds of calls normally do only checks, and appear before the proper call that does @@ -424,33 +425,47 @@ class ApplicationModeMetadataExtractor extends string { * * TODO: this might filter too much, it's possible that methods with more than one parameter contain interesting sinks */ -private class UnexploitableIsCharacteristic extends CharacteristicsImpl::NotASinkCharacteristic { +private class UnexploitableIsCharacteristic extends CharacteristicsImpl::NeitherSourceNorSinkCharacteristic +{ UnexploitableIsCharacteristic() { this = "unexploitable (is-style boolean method)" } override predicate appliesToEndpoint(Endpoint e) { - not ApplicationCandidatesImpl::isSink(e, _, _) and e.getCallable().getName().matches("is%") and - e.getCallable().getReturnType() instanceof BooleanType + e.getCallable().getReturnType() instanceof BooleanType and + ( + e.getExtensibleType() = "sinkModel" and + not ApplicationCandidatesImpl::isSink(e, _, _) + or + e.getExtensibleType() = "sourceModel" and + not ApplicationCandidatesImpl::isSource(e, _, _) and + e.getMaDOutput() = "ReturnValue" + ) } } /** - * A negative characteristic that indicates that an existence-checking boolean method is unexploitable even if it is a - * sink. + * A negative characteristic that indicates that parameters of an existence-checking boolean method should not be + * considered sinks, and its return value should not be considered a source. * * A sink is highly unlikely to be exploitable if its callable's name is `exists` or `notExists` and the callable has a * boolean return type. These kinds of calls normally do only checks, and appear before the proper call that does the * dangerous/interesting thing, so we want the latter to be modeled as the sink. */ -private class UnexploitableExistsCharacteristic extends CharacteristicsImpl::NotASinkCharacteristic { +private class UnexploitableExistsCharacteristic extends CharacteristicsImpl::NeitherSourceNorSinkCharacteristic { UnexploitableExistsCharacteristic() { this = "unexploitable (existence-checking boolean method)" } override predicate appliesToEndpoint(Endpoint e) { - not ApplicationCandidatesImpl::isSink(e, _, _) and exists(Callable callable | - callable = ApplicationModeGetCallable::getCallable(e) and + callable = e.getCallable() and callable.getName().toLowerCase() = ["exists", "notexists"] and callable.getReturnType() instanceof BooleanType + | + e.getExtensibleType() = "sinkModel" and + not ApplicationCandidatesImpl::isSink(e, _, _) + or + e.getExtensibleType() = "sourceModel" and + not ApplicationCandidatesImpl::isSource(e, _, _) and + e.getMaDOutput() = "ReturnValue" ) } } diff --git a/java/ql/automodel/src/AutomodelFrameworkModeCharacteristics.qll b/java/ql/automodel/src/AutomodelFrameworkModeCharacteristics.qll index 3fa71b192bf..c7f4752c843 100644 --- a/java/ql/automodel/src/AutomodelFrameworkModeCharacteristics.qll +++ b/java/ql/automodel/src/AutomodelFrameworkModeCharacteristics.qll @@ -324,7 +324,8 @@ class FrameworkModeMetadataExtractor extends string { */ /** - * A negative characteristic that indicates that an is-style boolean method is unexploitable even if it is a sink. + * A negative characteristic that indicates that parameters of an is-style boolean method should not be considered sinks, + * and its return value should not be considered a source. * * A sink is highly unlikely to be exploitable if its callable's name starts with `is` and the callable has a boolean return * type (e.g. `isDirectory`). These kinds of calls normally do only checks, and appear before the proper call that does @@ -332,33 +333,46 @@ class FrameworkModeMetadataExtractor extends string { * * TODO: this might filter too much, it's possible that methods with more than one parameter contain interesting sinks */ -private class UnexploitableIsCharacteristic extends CharacteristicsImpl::NotASinkCharacteristic { +private class UnexploitableIsCharacteristic extends CharacteristicsImpl::NeitherSourceNorSinkCharacteristic { UnexploitableIsCharacteristic() { this = "unexploitable (is-style boolean method)" } override predicate appliesToEndpoint(Endpoint e) { - not FrameworkCandidatesImpl::isSink(e, _, _) and e.getEnclosingCallable().getName().matches("is%") and - e.getEnclosingCallable().getReturnType() instanceof BooleanType + e.getEnclosingCallable().getReturnType() instanceof BooleanType and + ( + e.getExtensibleType() = "sinkModel" and + not FrameworkCandidatesImpl::isSink(e, _, _) + or + e.getExtensibleType() = "sourceModel" and + not FrameworkCandidatesImpl::isSource(e, _, _) and + e.getMaDOutput() = "ReturnValue" + ) } } /** - * A negative characteristic that indicates that an existence-checking boolean method is unexploitable even if it is a - * sink. + * A negative characteristic that indicates that parameters of an existence-checking boolean method should not be + * considered sinks, and its return value should not be considered a source. * * A sink is highly unlikely to be exploitable if its callable's name is `exists` or `notExists` and the callable has a * boolean return type. These kinds of calls normally do only checks, and appear before the proper call that does the * dangerous/interesting thing, so we want the latter to be modeled as the sink. */ -private class UnexploitableExistsCharacteristic extends CharacteristicsImpl::NotASinkCharacteristic { +private class UnexploitableExistsCharacteristic extends CharacteristicsImpl::NeitherSourceNorSinkCharacteristic { UnexploitableExistsCharacteristic() { this = "unexploitable (existence-checking boolean method)" } override predicate appliesToEndpoint(Endpoint e) { - not FrameworkCandidatesImpl::isSink(e, _, _) and exists(Callable callable | callable = e.getEnclosingCallable() and callable.getName().toLowerCase() = ["exists", "notexists"] and callable.getReturnType() instanceof BooleanType + | + e.getExtensibleType() = "sourceModel" and + not FrameworkCandidatesImpl::isSink(e, _, _) + or + e.getExtensibleType() = "sourceModel" and + not FrameworkCandidatesImpl::isSource(e, _, _) and + e.getMaDOutput() = "ReturnValue" ) } } diff --git a/java/ql/automodel/src/AutomodelSharedCharacteristics.qll b/java/ql/automodel/src/AutomodelSharedCharacteristics.qll index 6712eb6d1b8..d2a23520369 100644 --- a/java/ql/automodel/src/AutomodelSharedCharacteristics.qll +++ b/java/ql/automodel/src/AutomodelSharedCharacteristics.qll @@ -269,6 +269,22 @@ module SharedCharacteristics { } } + /** + * A high-confidence characteristic that indicates that an endpoint is neither a source nor a sink of any type. + */ + abstract class NeitherSourceNorSinkCharacteristic extends NotASinkCharacteristic, NotASourceCharacteristic { + bindingset[this] + NeitherSourceNorSinkCharacteristic() { any() } + + final override predicate hasImplications( + Candidate::EndpointType endpointType, boolean isPositiveIndicator, float confidence + ) { + NotASinkCharacteristic.super.hasImplications(endpointType, isPositiveIndicator, confidence) or + NotASourceCharacteristic.super + .hasImplications(endpointType, isPositiveIndicator, confidence) + } + } + /** * A medium-confidence characteristic that indicates that an endpoint is unlikely to be a sink of any type. These * endpoints can be excluded from scoring at inference time, both to save time and to avoid false positives. They should @@ -357,20 +373,10 @@ module SharedCharacteristics { /** * A negative characteristic that indicates that an endpoint was manually modeled as a neutral model. */ - private class NeutralModelCharacteristic extends NotASinkCharacteristic, - NotASourceCharacteristic + private class NeutralModelCharacteristic extends NeitherSourceNorSinkCharacteristic { NeutralModelCharacteristic() { this = "known non-endpoint" } - // this is a negative characteristic for both sinks and sources - override predicate hasImplications( - Candidate::EndpointType endpointType, boolean isPositiveIndicator, float confidence - ) { - NotASinkCharacteristic.super.hasImplications(endpointType, isPositiveIndicator, confidence) or - NotASourceCharacteristic.super - .hasImplications(endpointType, isPositiveIndicator, confidence) - } - override predicate appliesToEndpoint(Candidate::Endpoint e) { Candidate::isNeutral(e) } } diff --git a/java/ql/automodel/test/AutomodelFrameworkModeExtraction/AutomodelFrameworkModeExtractCandidates.expected b/java/ql/automodel/test/AutomodelFrameworkModeExtraction/AutomodelFrameworkModeExtractCandidates.expected index 17880ae9e4e..411f1c57b2c 100644 --- a/java/ql/automodel/test/AutomodelFrameworkModeExtraction/AutomodelFrameworkModeExtractCandidates.expected +++ b/java/ql/automodel/test/AutomodelFrameworkModeExtraction/AutomodelFrameworkModeExtractCandidates.expected @@ -9,6 +9,8 @@ | com/github/codeql/test/PublicClass.java:13:33:13:42 | arg | Related locations: $@, $@.\nmetadata: $@, $@, $@, $@, $@, $@, $@, $@, $@, $@. | com/github/codeql/test/PublicClass.java:13:33:13:42 | arg | MethodDoc | com/github/codeql/test/PublicClass.java:13:33:13:42 | arg | ClassDoc | file://com.github.codeql.test:1:1:1:1 | com.github.codeql.test | package | file://PublicClass:1:1:1:1 | PublicClass | type | file://true:1:1:1:1 | true | subtypes | file://nonPublicStuff:1:1:1:1 | nonPublicStuff | name | file://(String):1:1:1:1 | (String) | signature | file://Argument[0]:1:1:1:1 | Argument[0] | input | file://:1:1:1:1 | | output | file://arg:1:1:1:1 | arg | parameterName | file://:1:1:1:1 | | alreadyAiModeled | file://sinkModel:1:1:1:1 | sinkModel | extensibleType | | com/github/codeql/test/PublicClass.java:22:10:22:20 | PublicClass | Related locations: $@, $@.\nmetadata: $@, $@, $@, $@, $@, $@, $@, $@, $@, $@. | com/github/codeql/test/PublicClass.java:22:10:22:20 | PublicClass | MethodDoc | com/github/codeql/test/PublicClass.java:22:10:22:20 | PublicClass | ClassDoc | file://com.github.codeql.test:1:1:1:1 | com.github.codeql.test | package | file://PublicClass:1:1:1:1 | PublicClass | type | file://true:1:1:1:1 | true | subtypes | file://PublicClass:1:1:1:1 | PublicClass | name | file://(Object):1:1:1:1 | (Object) | signature | file://:1:1:1:1 | | input | file://ReturnValue:1:1:1:1 | ReturnValue | output | file://:1:1:1:1 | | parameterName | file://:1:1:1:1 | | alreadyAiModeled | file://sourceModel:1:1:1:1 | sourceModel | extensibleType | | com/github/codeql/test/PublicClass.java:22:22:22:33 | input | Related locations: $@, $@.\nmetadata: $@, $@, $@, $@, $@, $@, $@, $@, $@, $@. | com/github/codeql/test/PublicClass.java:22:22:22:33 | input | MethodDoc | com/github/codeql/test/PublicClass.java:22:22:22:33 | input | ClassDoc | file://com.github.codeql.test:1:1:1:1 | com.github.codeql.test | package | file://PublicClass:1:1:1:1 | PublicClass | type | file://true:1:1:1:1 | true | subtypes | file://PublicClass:1:1:1:1 | PublicClass | name | file://(Object):1:1:1:1 | (Object) | signature | file://Argument[0]:1:1:1:1 | Argument[0] | input | file://:1:1:1:1 | | output | file://input:1:1:1:1 | input | parameterName | file://:1:1:1:1 | | alreadyAiModeled | file://sinkModel:1:1:1:1 | sinkModel | extensibleType | +| com/github/codeql/test/PublicClass.java:26:18:26:26 | isIgnored | Related locations: $@, $@.\nmetadata: $@, $@, $@, $@, $@, $@, $@, $@, $@, $@. | com/github/codeql/test/PublicClass.java:26:18:26:26 | isIgnored | MethodDoc | com/github/codeql/test/PublicClass.java:26:18:26:26 | isIgnored | ClassDoc | file://com.github.codeql.test:1:1:1:1 | com.github.codeql.test | package | file://PublicClass:1:1:1:1 | PublicClass | type | file://true:1:1:1:1 | true | subtypes | file://isIgnored:1:1:1:1 | isIgnored | name | file://(Object):1:1:1:1 | (Object) | signature | file://:1:1:1:1 | | input | file://Parameter[this]:1:1:1:1 | Parameter[this] | output | file://this:1:1:1:1 | this | parameterName | file://:1:1:1:1 | | alreadyAiModeled | file://sourceModel:1:1:1:1 | sourceModel | extensibleType | +| com/github/codeql/test/PublicClass.java:26:28:26:39 | input | Related locations: $@, $@.\nmetadata: $@, $@, $@, $@, $@, $@, $@, $@, $@, $@. | com/github/codeql/test/PublicClass.java:26:28:26:39 | input | MethodDoc | com/github/codeql/test/PublicClass.java:26:28:26:39 | input | ClassDoc | file://com.github.codeql.test:1:1:1:1 | com.github.codeql.test | package | file://PublicClass:1:1:1:1 | PublicClass | type | file://true:1:1:1:1 | true | subtypes | file://isIgnored:1:1:1:1 | isIgnored | name | file://(Object):1:1:1:1 | (Object) | signature | file://:1:1:1:1 | | input | file://Parameter[0]:1:1:1:1 | Parameter[0] | output | file://input:1:1:1:1 | input | parameterName | file://:1:1:1:1 | | alreadyAiModeled | file://sourceModel:1:1:1:1 | sourceModel | extensibleType | | com/github/codeql/test/PublicInterface.java:4:16:4:20 | stuff | Related locations: $@, $@.\nmetadata: $@, $@, $@, $@, $@, $@, $@, $@, $@, $@. | com/github/codeql/test/PublicInterface.java:4:16:4:20 | stuff | MethodDoc | com/github/codeql/test/PublicInterface.java:4:16:4:20 | stuff | ClassDoc | file://com.github.codeql.test:1:1:1:1 | com.github.codeql.test | package | file://PublicInterface:1:1:1:1 | PublicInterface | type | file://true:1:1:1:1 | true | subtypes | file://stuff:1:1:1:1 | stuff | name | file://(String):1:1:1:1 | (String) | signature | file://:1:1:1:1 | | input | file://Parameter[this]:1:1:1:1 | Parameter[this] | output | file://this:1:1:1:1 | this | parameterName | file://:1:1:1:1 | | alreadyAiModeled | file://sourceModel:1:1:1:1 | sourceModel | extensibleType | | com/github/codeql/test/PublicInterface.java:4:16:4:20 | stuff | Related locations: $@, $@.\nmetadata: $@, $@, $@, $@, $@, $@, $@, $@, $@, $@. | com/github/codeql/test/PublicInterface.java:4:16:4:20 | stuff | MethodDoc | com/github/codeql/test/PublicInterface.java:4:16:4:20 | stuff | ClassDoc | file://com.github.codeql.test:1:1:1:1 | com.github.codeql.test | package | file://PublicInterface:1:1:1:1 | PublicInterface | type | file://true:1:1:1:1 | true | subtypes | file://stuff:1:1:1:1 | stuff | name | file://(String):1:1:1:1 | (String) | signature | file://Argument[this]:1:1:1:1 | Argument[this] | input | file://:1:1:1:1 | | output | file://this:1:1:1:1 | this | parameterName | file://:1:1:1:1 | | alreadyAiModeled | file://sinkModel:1:1:1:1 | sinkModel | extensibleType | | com/github/codeql/test/PublicInterface.java:4:22:4:31 | arg | Related locations: $@, $@.\nmetadata: $@, $@, $@, $@, $@, $@, $@, $@, $@, $@. | com/github/codeql/test/PublicInterface.java:4:22:4:31 | arg | MethodDoc | com/github/codeql/test/PublicInterface.java:4:22:4:31 | arg | ClassDoc | file://com.github.codeql.test:1:1:1:1 | com.github.codeql.test | package | file://PublicInterface:1:1:1:1 | PublicInterface | type | file://true:1:1:1:1 | true | subtypes | file://stuff:1:1:1:1 | stuff | name | file://(String):1:1:1:1 | (String) | signature | file://:1:1:1:1 | | input | file://Parameter[0]:1:1:1:1 | Parameter[0] | output | file://arg:1:1:1:1 | arg | parameterName | file://:1:1:1:1 | | alreadyAiModeled | file://sourceModel:1:1:1:1 | sourceModel | extensibleType | diff --git a/java/ql/automodel/test/AutomodelFrameworkModeExtraction/AutomodelFrameworkModeExtractNegativeExamples.expected b/java/ql/automodel/test/AutomodelFrameworkModeExtraction/AutomodelFrameworkModeExtractNegativeExamples.expected index 5ede383087c..8a9b4d3d2e3 100644 --- a/java/ql/automodel/test/AutomodelFrameworkModeExtraction/AutomodelFrameworkModeExtractNegativeExamples.expected +++ b/java/ql/automodel/test/AutomodelFrameworkModeExtraction/AutomodelFrameworkModeExtractNegativeExamples.expected @@ -1,3 +1,6 @@ +| com/github/codeql/test/PublicClass.java:26:18:26:26 | isIgnored | unexploitable (is-style boolean method)\nrelated locations: $@, $@.\nmetadata: $@, $@, $@, $@, $@, $@, $@, $@, $@. | com/github/codeql/test/PublicClass.java:26:18:26:26 | isIgnored | MethodDoc | com/github/codeql/test/PublicClass.java:26:18:26:26 | isIgnored | ClassDoc | file://com.github.codeql.test:1:1:1:1 | com.github.codeql.test | package | file://PublicClass:1:1:1:1 | PublicClass | type | file://true:1:1:1:1 | true | subtypes | file://isIgnored:1:1:1:1 | isIgnored | name | file://(Object):1:1:1:1 | (Object) | signature | file://:1:1:1:1 | | input | file://ReturnValue:1:1:1:1 | ReturnValue | output | file://:1:1:1:1 | | parameterName | file://sourceModel:1:1:1:1 | sourceModel | extensibleType | +| com/github/codeql/test/PublicClass.java:26:18:26:26 | isIgnored | unexploitable (is-style boolean method)\nrelated locations: $@, $@.\nmetadata: $@, $@, $@, $@, $@, $@, $@, $@, $@. | com/github/codeql/test/PublicClass.java:26:18:26:26 | isIgnored | MethodDoc | com/github/codeql/test/PublicClass.java:26:18:26:26 | isIgnored | ClassDoc | file://com.github.codeql.test:1:1:1:1 | com.github.codeql.test | package | file://PublicClass:1:1:1:1 | PublicClass | type | file://true:1:1:1:1 | true | subtypes | file://isIgnored:1:1:1:1 | isIgnored | name | file://(Object):1:1:1:1 | (Object) | signature | file://Argument[this]:1:1:1:1 | Argument[this] | input | file://:1:1:1:1 | | output | file://this:1:1:1:1 | this | parameterName | file://sinkModel:1:1:1:1 | sinkModel | extensibleType | +| com/github/codeql/test/PublicClass.java:26:28:26:39 | input | unexploitable (is-style boolean method)\nrelated locations: $@, $@.\nmetadata: $@, $@, $@, $@, $@, $@, $@, $@, $@. | com/github/codeql/test/PublicClass.java:26:28:26:39 | input | MethodDoc | com/github/codeql/test/PublicClass.java:26:28:26:39 | input | ClassDoc | file://com.github.codeql.test:1:1:1:1 | com.github.codeql.test | package | file://PublicClass:1:1:1:1 | PublicClass | type | file://true:1:1:1:1 | true | subtypes | file://isIgnored:1:1:1:1 | isIgnored | name | file://(Object):1:1:1:1 | (Object) | signature | file://Argument[0]:1:1:1:1 | Argument[0] | input | file://:1:1:1:1 | | output | file://input:1:1:1:1 | input | parameterName | file://sinkModel:1:1:1:1 | sinkModel | extensibleType | | java/io/File.java:4:16:4:24 | compareTo | known non-endpoint\nrelated locations: $@, $@.\nmetadata: $@, $@, $@, $@, $@, $@, $@, $@, $@. | java/io/File.java:4:16:4:24 | compareTo | MethodDoc | java/io/File.java:4:16:4:24 | compareTo | ClassDoc | file://java.io:1:1:1:1 | java.io | package | file://File:1:1:1:1 | File | type | file://true:1:1:1:1 | true | subtypes | file://compareTo:1:1:1:1 | compareTo | name | file://(File):1:1:1:1 | (File) | signature | file://:1:1:1:1 | | input | file://Parameter[this]:1:1:1:1 | Parameter[this] | output | file://this:1:1:1:1 | this | parameterName | file://sourceModel:1:1:1:1 | sourceModel | extensibleType | | java/io/File.java:4:16:4:24 | compareTo | known non-endpoint\nrelated locations: $@, $@.\nmetadata: $@, $@, $@, $@, $@, $@, $@, $@, $@. | java/io/File.java:4:16:4:24 | compareTo | MethodDoc | java/io/File.java:4:16:4:24 | compareTo | ClassDoc | file://java.io:1:1:1:1 | java.io | package | file://File:1:1:1:1 | File | type | file://true:1:1:1:1 | true | subtypes | file://compareTo:1:1:1:1 | compareTo | name | file://(File):1:1:1:1 | (File) | signature | file://Argument[this]:1:1:1:1 | Argument[this] | input | file://:1:1:1:1 | | output | file://this:1:1:1:1 | this | parameterName | file://sinkModel:1:1:1:1 | sinkModel | extensibleType | | java/io/File.java:5:9:5:21 | pathname | known non-endpoint\nrelated locations: $@, $@.\nmetadata: $@, $@, $@, $@, $@, $@, $@, $@, $@. | java/io/File.java:5:9:5:21 | pathname | MethodDoc | java/io/File.java:5:9:5:21 | pathname | ClassDoc | file://java.io:1:1:1:1 | java.io | package | file://File:1:1:1:1 | File | type | file://true:1:1:1:1 | true | subtypes | file://compareTo:1:1:1:1 | compareTo | name | file://(File):1:1:1:1 | (File) | signature | file://:1:1:1:1 | | input | file://Parameter[0]:1:1:1:1 | Parameter[0] | output | file://pathname:1:1:1:1 | pathname | parameterName | file://sourceModel:1:1:1:1 | sourceModel | extensibleType | diff --git a/java/ql/automodel/test/AutomodelFrameworkModeExtraction/com/github/codeql/test/PublicClass.java b/java/ql/automodel/test/AutomodelFrameworkModeExtraction/com/github/codeql/test/PublicClass.java index 33eb443368c..e0915ac628f 100644 --- a/java/ql/automodel/test/AutomodelFrameworkModeExtraction/com/github/codeql/test/PublicClass.java +++ b/java/ql/automodel/test/AutomodelFrameworkModeExtraction/com/github/codeql/test/PublicClass.java @@ -5,7 +5,7 @@ public class PublicClass { System.out.println(arg); } - public static void staticStuff(String arg) { // `arg` is a candidate, `this` is not a candidate (static method), `arg` is not a source candidate (static methods can not be overloaded) + public static void staticStuff(String arg) { // `arg` is a sink candidate, but not a source candidate (not overrideabe); `this` is not a candidate (static method) System.out.println(arg); } @@ -22,4 +22,8 @@ public class PublicClass { public PublicClass(Object input) { // the `this` qualifier is not a candidate } + + public Boolean isIgnored(Object input) { // `input` is a source candidate, but not a sink candidate (is-style method); `this` is not a candidate + return false; + } } From 76b84301e3bb0971a6512b873d062708795474dd Mon Sep 17 00:00:00 2001 From: Max Schaefer Date: Fri, 12 Jan 2024 11:15:30 +0000 Subject: [PATCH 08/18] Share some code. --- ...utomodelApplicationModeCharacteristics.qll | 74 +++++++------------ 1 file changed, 28 insertions(+), 46 deletions(-) diff --git a/java/ql/automodel/src/AutomodelApplicationModeCharacteristics.qll b/java/ql/automodel/src/AutomodelApplicationModeCharacteristics.qll index c91ea4a4e23..ff01969df7a 100644 --- a/java/ql/automodel/src/AutomodelApplicationModeCharacteristics.qll +++ b/java/ql/automodel/src/AutomodelApplicationModeCharacteristics.qll @@ -64,8 +64,6 @@ abstract private class ApplicationModeEndpoint extends TApplicationModeEndpoint */ abstract Callable getCallable(); - abstract Call getCall(); - /** * Gets the input (if any) for this endpoint, eg.: `Argument[0]`. * @@ -111,50 +109,50 @@ abstract private class ApplicationModeEndpoint extends TApplicationModeEndpoint abstract string toString(); } +class TCallArgument = TExplicitArgument or TInstanceArgument or TImplicitVarargsArray; + /** - * A class representing nodes that are arguments to calls. + * An endpoint that represents an "argument" to a call in a broad sense, including + * both explicit arguments and the instance argument. */ -class ExplicitArgument extends ApplicationModeEndpoint, TExplicitArgument { +abstract class CallArgument extends ApplicationModeEndpoint, TCallArgument { Call call; DataFlow::Node arg; - ExplicitArgument() { this = TExplicitArgument(call, arg) } - override Callable getCallable() { result = call.getCallee().getSourceDeclaration() } - override Call getCall() { result = call } + override string getMaDOutput() { none() } + + override DataFlow::Node asNode() { result = arg } + + Call getCall() { result = call } + + override string toString() { result = arg.toString() } +} + +/** + * An endpoint that represents an explicit argument to a call. + */ +class ExplicitArgument extends CallArgument, TExplicitArgument { + ExplicitArgument() { this = TExplicitArgument(call, arg) } private int getArgIndex() { this.asTop() = call.getArgument(result) } override string getMaDInput() { result = "Argument[" + this.getArgIndex() + "]" } - override string getMaDOutput() { none() } - override Top asTop() { result = arg.asExpr() } - - override DataFlow::Node asNode() { result = arg } - - override string toString() { result = arg.toString() } } -class InstanceArgument extends ApplicationModeEndpoint, TInstanceArgument { - Call call; - DataFlow::Node arg; - +/** + * An endpoint that represents the instance argument to a call. + */ +class InstanceArgument extends CallArgument, TInstanceArgument { InstanceArgument() { this = TInstanceArgument(call, arg) } - override Callable getCallable() { result = call.getCallee().getSourceDeclaration() } - - override Call getCall() { result = call } - override string getMaDInput() { result = "Argument[this]" } - override string getMaDOutput() { none() } - override Top asTop() { if exists(arg.asExpr()) then result = arg.asExpr() else result = call } - override DataFlow::Node asNode() { result = arg } - override string toString() { result = arg.toString() } } @@ -167,26 +165,14 @@ class InstanceArgument extends ApplicationModeEndpoint, TInstanceArgument { * In order to be able to distinguish between varargs endpoints and regular endpoints, we export the `isVarargsArray` * meta data field in the extraction queries. */ -class ImplicitVarargsArray extends ApplicationModeEndpoint, TImplicitVarargsArray { - Call call; - DataFlow::Node vararg; +class ImplicitVarargsArray extends CallArgument, TImplicitVarargsArray { int idx; - ImplicitVarargsArray() { this = TImplicitVarargsArray(call, vararg, idx) } - - override Callable getCallable() { result = call.getCallee().getSourceDeclaration() } - - override Call getCall() { result = call } + ImplicitVarargsArray() { this = TImplicitVarargsArray(call, arg, idx) } override string getMaDInput() { result = "Argument[" + idx + "]" } - override string getMaDOutput() { none() } - override Top asTop() { result = call } - - override DataFlow::Node asNode() { result = vararg } - - override string toString() { result = vararg.toString() } } /** @@ -200,8 +186,6 @@ class MethodReturnValue extends ApplicationModeEndpoint, TMethodReturnValue { override Callable getCallable() { result = call.getCallee().getSourceDeclaration() } - override Call getCall() { result = call } - override string getMaDInput() { none() } override string getMaDOutput() { result = "ReturnValue" } @@ -231,8 +215,6 @@ class OverriddenParameter extends ApplicationModeEndpoint, TOverriddenParameter result = overriddenMethod.getSourceDeclaration() } - override Call getCall() { none() } - private int getArgIndex() { p.getCallable().getParameter(result) = p } override string getMaDInput() { none() } @@ -338,7 +320,7 @@ module ApplicationCandidatesImpl implements SharedCharacteristics::CandidateSig */ RelatedLocation getRelatedLocation(Endpoint e, RelatedLocationType type) { type = CallContext() and - result = e.getCall() + result = e.(CallArgument).getCall() or type = MethodDoc() and result = e.getCallable().(Documentable).getJavadoc() @@ -560,9 +542,9 @@ private class OtherArgumentToModeledMethodCharacteristic extends Characteristics override predicate appliesToEndpoint(Endpoint e) { e.getExtensibleType() = "sinkModel" and not ApplicationCandidatesImpl::isSink(e, _, _) and - exists(Endpoint otherSink | + exists(CallArgument otherSink | ApplicationCandidatesImpl::isSink(otherSink, _, "manual") and - e.getCall() = otherSink.getCall() and + e.(CallArgument).getCall() = otherSink.getCall() and e != otherSink ) } From 06ba5ea9f8989b95f525cab564f8232746dda573 Mon Sep 17 00:00:00 2001 From: Max Schaefer Date: Fri, 12 Jan 2024 11:14:36 +0000 Subject: [PATCH 09/18] Eliminate `GetCallable` modules and use `getCallable` instead. --- ...utomodelApplicationModeCharacteristics.qll | 30 +++++-------------- .../AutomodelFrameworkModeCharacteristics.qll | 1 - .../src/AutomodelSharedGetCallable.qll | 21 ------------- 3 files changed, 7 insertions(+), 45 deletions(-) delete mode 100644 java/ql/automodel/src/AutomodelSharedGetCallable.qll diff --git a/java/ql/automodel/src/AutomodelApplicationModeCharacteristics.qll b/java/ql/automodel/src/AutomodelApplicationModeCharacteristics.qll index ff01969df7a..5903496603a 100644 --- a/java/ql/automodel/src/AutomodelApplicationModeCharacteristics.qll +++ b/java/ql/automodel/src/AutomodelApplicationModeCharacteristics.qll @@ -15,7 +15,6 @@ private import semmle.code.java.security.QueryInjection private import semmle.code.java.dataflow.internal.ModelExclusions as ModelExclusions private import AutomodelJavaUtil as AutomodelJavaUtil private import semmle.code.java.security.PathSanitizer as PathSanitizer -private import AutomodelSharedGetCallable as AutomodelSharedGetCallable import AutomodelSharedCharacteristics as SharedCharacteristics import AutomodelEndpointTypes as AutomodelEndpointTypes @@ -330,22 +329,6 @@ module ApplicationCandidatesImpl implements SharedCharacteristics::CandidateSig } } -private class JavaCallable = Callable; - -private module ApplicationModeGetCallable implements AutomodelSharedGetCallable::GetCallableSig { - class Callable = JavaCallable; - - class Endpoint = ApplicationCandidatesImpl::Endpoint; - - /** - * Returns the API callable being modeled. - * - * We usually want to use `.getSourceDeclaration()` instead of just 'the' callable, - * because the source declaration callable has erased generic type parameters. - */ - Callable getCallable(Endpoint e) { result = e.getCall().getCallee() } -} - /** * Contains endpoints that are defined in QL code rather than as a MaD model. Ideally this predicate * should be empty. @@ -459,8 +442,7 @@ private class ExceptionCharacteristic extends CharacteristicsImpl::NotASinkChara ExceptionCharacteristic() { this = "exception" } override predicate appliesToEndpoint(Endpoint e) { - ApplicationModeGetCallable::getCallable(e).getDeclaringType().getASupertype*() instanceof - TypeThrowable + e.(CallArgument).getCallable().getDeclaringType().getASupertype*() instanceof TypeThrowable } } @@ -493,18 +475,20 @@ private class LocalCall extends CharacteristicsImpl::UninterestingToModelCharact LocalCall() { this = "local call" } override predicate appliesToEndpoint(Endpoint e) { - ApplicationModeGetCallable::getCallable(e).fromSource() + e.(CallArgument).getCallable().fromSource() + or + e.(MethodReturnValue).getCallable().fromSource() } } /** - * A Characteristic that marks endpoints as uninteresting to model, according to the Java ModelExclusions module. + * A characteristic that marks endpoints as uninteresting to model, according to the Java ModelExclusions module. */ private class ExcludedFromModeling extends CharacteristicsImpl::UninterestingToModelCharacteristic { ExcludedFromModeling() { this = "excluded from modeling" } override predicate appliesToEndpoint(Endpoint e) { - ModelExclusions::isUninterestingForModels(ApplicationModeGetCallable::getCallable(e)) + ModelExclusions::isUninterestingForModels(e.getCallable()) } } @@ -518,7 +502,7 @@ private class NonPublicMethodCharacteristic extends CharacteristicsImpl::Uninter override predicate appliesToEndpoint(Endpoint e) { e.getExtensibleType() = "sinkModel" and - not ApplicationModeGetCallable::getCallable(e).isPublic() + not e.getCallable().isPublic() } } diff --git a/java/ql/automodel/src/AutomodelFrameworkModeCharacteristics.qll b/java/ql/automodel/src/AutomodelFrameworkModeCharacteristics.qll index c7f4752c843..0af1a272c88 100644 --- a/java/ql/automodel/src/AutomodelFrameworkModeCharacteristics.qll +++ b/java/ql/automodel/src/AutomodelFrameworkModeCharacteristics.qll @@ -15,7 +15,6 @@ private import semmle.code.java.security.QueryInjection private import semmle.code.java.security.RequestForgery private import semmle.code.java.dataflow.internal.ModelExclusions as ModelExclusions private import AutomodelJavaUtil as AutomodelJavaUtil -private import AutomodelSharedGetCallable as AutomodelSharedGetCallable import AutomodelSharedCharacteristics as SharedCharacteristics import AutomodelEndpointTypes as AutomodelEndpointTypes diff --git a/java/ql/automodel/src/AutomodelSharedGetCallable.qll b/java/ql/automodel/src/AutomodelSharedGetCallable.qll deleted file mode 100644 index 87e20969381..00000000000 --- a/java/ql/automodel/src/AutomodelSharedGetCallable.qll +++ /dev/null @@ -1,21 +0,0 @@ -/** - * An automodel extraction mode instantiates this interface to define how to access - * the callable that's associated with an endpoint. - */ -signature module GetCallableSig { - /** - * A callable is the definition of a method, function, etc. - something that can be called. - */ - class Callable; - - /** - * An endpoint is a potential candidate for modeling. This will typically be bound to the language's - * DataFlow node class, or a subtype thereof. - */ - class Endpoint; - - /** - * Gets the callable that's associated with the given endpoint. - */ - Callable getCallable(Endpoint endpoint); -} From ea26e2145441f54f3aab3e4666f2a30c69d51e3c Mon Sep 17 00:00:00 2001 From: Max Schaefer Date: Fri, 12 Jan 2024 12:20:22 +0000 Subject: [PATCH 10/18] Extend negative characteristics for exceptions to source models. --- .../AutomodelApplicationModeCharacteristics.qll | 15 ++++++++++++--- .../AutomodelFrameworkModeCharacteristics.qll | 16 +++++++++++++--- 2 files changed, 25 insertions(+), 6 deletions(-) diff --git a/java/ql/automodel/src/AutomodelApplicationModeCharacteristics.qll b/java/ql/automodel/src/AutomodelApplicationModeCharacteristics.qll index 5903496603a..fd91434ec34 100644 --- a/java/ql/automodel/src/AutomodelApplicationModeCharacteristics.qll +++ b/java/ql/automodel/src/AutomodelApplicationModeCharacteristics.qll @@ -436,13 +436,22 @@ private class UnexploitableExistsCharacteristic extends CharacteristicsImpl::Nei } /** - * A negative characteristic that indicates that an endpoint is an argument to an exception, which is not a sink. + * A negative characteristic that indicates that parameters of an exception method or constructor should not be considered sinks, + * and its return value should not be considered a source. */ -private class ExceptionCharacteristic extends CharacteristicsImpl::NotASinkCharacteristic { +private class ExceptionCharacteristic extends CharacteristicsImpl::NeitherSourceNorSinkCharacteristic { ExceptionCharacteristic() { this = "exception" } override predicate appliesToEndpoint(Endpoint e) { - e.(CallArgument).getCallable().getDeclaringType().getASupertype*() instanceof TypeThrowable + e.getCallable().getDeclaringType().getASupertype*() instanceof TypeThrowable and + ( + e.getExtensibleType() = "sinkModel" and + not ApplicationCandidatesImpl::isSink(e, _, _) + or + e.getExtensibleType() = "sourceModel" and + not ApplicationCandidatesImpl::isSource(e, _, _) and + e.getMaDOutput() = "ReturnValue" + ) } } diff --git a/java/ql/automodel/src/AutomodelFrameworkModeCharacteristics.qll b/java/ql/automodel/src/AutomodelFrameworkModeCharacteristics.qll index 0af1a272c88..387fb3aaad6 100644 --- a/java/ql/automodel/src/AutomodelFrameworkModeCharacteristics.qll +++ b/java/ql/automodel/src/AutomodelFrameworkModeCharacteristics.qll @@ -377,16 +377,26 @@ private class UnexploitableExistsCharacteristic extends CharacteristicsImpl::Nei } /** - * A negative characteristic that indicates that an endpoint is an argument to an exception, which is not a sink. + * A negative characteristic that indicates that parameters of an exception method or constructor should not be considered sinks, + * and its return value should not be considered a source. */ -private class ExceptionCharacteristic extends CharacteristicsImpl::NotASinkCharacteristic { +private class ExceptionCharacteristic extends CharacteristicsImpl::NeitherSourceNorSinkCharacteristic { ExceptionCharacteristic() { this = "exception" } override predicate appliesToEndpoint(Endpoint e) { - e.getEnclosingCallable().getDeclaringType().getASupertype*() instanceof TypeThrowable + e.getEnclosingCallable().getDeclaringType().getASupertype*() instanceof TypeThrowable and + ( + e.getExtensibleType() = "sinkModel" and + not FrameworkCandidatesImpl::isSink(e, _, _) + or + e.getExtensibleType() = "sourceModel" and + not FrameworkCandidatesImpl::isSource(e, _, _) and + e.getMaDOutput() = "ReturnValue" + ) } } + /** * A characteristic that limits candidates to parameters of methods that are recognized as `ModelApi`, iow., APIs that * are considered worth modeling. From 45ca301593d511fc6d67dc85b5e6863644cace19 Mon Sep 17 00:00:00 2001 From: Max Schaefer Date: Fri, 12 Jan 2024 13:18:05 +0000 Subject: [PATCH 11/18] Rename a predicate. --- .../AutomodelFrameworkModeCharacteristics.qll | 46 +++++++++---------- 1 file changed, 23 insertions(+), 23 deletions(-) diff --git a/java/ql/automodel/src/AutomodelFrameworkModeCharacteristics.qll b/java/ql/automodel/src/AutomodelFrameworkModeCharacteristics.qll index 387fb3aaad6..e41b8b1ea78 100644 --- a/java/ql/automodel/src/AutomodelFrameworkModeCharacteristics.qll +++ b/java/ql/automodel/src/AutomodelFrameworkModeCharacteristics.qll @@ -83,7 +83,7 @@ abstract class FrameworkModeEndpoint extends TFrameworkModeEndpoint { /** * Returns the callable that contains the endpoint. */ - abstract Callable getEnclosingCallable(); + abstract Callable getCallable(); abstract Top asTop(); @@ -117,7 +117,7 @@ class ExplicitParameterEndpoint extends FrameworkModeEndpoint, TExplicitParamete override string getParamName() { result = param.getName() } - override Callable getEnclosingCallable() { result = param.getCallable() } + override Callable getCallable() { result = param.getCallable() } override Top asTop() { result = param } @@ -137,7 +137,7 @@ class QualifierEndpoint extends FrameworkModeEndpoint, TQualifier { override string getParamName() { result = "this" } - override Callable getEnclosingCallable() { result = callable } + override Callable getCallable() { result = callable } override Top asTop() { result = callable } @@ -155,7 +155,7 @@ class ReturnValue extends FrameworkModeEndpoint, TReturnValue { override string getParamName() { none() } - override Callable getEnclosingCallable() { result = callable } + override Callable getCallable() { result = callable } override Top asTop() { result = callable } @@ -174,7 +174,7 @@ class OverridableParameter extends FrameworkModeEndpoint, TOverridableParameter override string getParamName() { result = param.getName() } - override Callable getEnclosingCallable() { result = method } + override Callable getCallable() { result = method } override Top asTop() { result = param } @@ -192,7 +192,7 @@ class OverridableQualifier extends FrameworkModeEndpoint, TOverridableQualifier override string getParamName() { result = "this" } - override Callable getEnclosingCallable() { result = m } + override Callable getCallable() { result = m } override Top asTop() { result = m } @@ -257,8 +257,8 @@ module FrameworkCandidatesImpl implements SharedCharacteristics::CandidateSig { additional predicate sinkSpec( Endpoint e, string package, string type, string name, string signature, string ext, string input ) { - e.getEnclosingCallable().hasQualifiedName(package, type, name) and - signature = ExternalFlow::paramsString(e.getEnclosingCallable()) and + e.getCallable().hasQualifiedName(package, type, name) and + signature = ExternalFlow::paramsString(e.getCallable()) and ext = "" and input = e.getMaDInput() } @@ -267,8 +267,8 @@ module FrameworkCandidatesImpl implements SharedCharacteristics::CandidateSig { Endpoint e, string package, string type, string name, string signature, string ext, string output ) { - e.getEnclosingCallable().hasQualifiedName(package, type, name) and - signature = ExternalFlow::paramsString(e.getEnclosingCallable()) and + e.getCallable().hasQualifiedName(package, type, name) and + signature = ExternalFlow::paramsString(e.getCallable()) and ext = "" and output = e.getMaDOutput() } @@ -280,10 +280,10 @@ module FrameworkCandidatesImpl implements SharedCharacteristics::CandidateSig { */ RelatedLocation getRelatedLocation(Endpoint e, RelatedLocationType type) { type = MethodDoc() and - result = e.getEnclosingCallable().(Documentable).getJavadoc() + result = e.getCallable().(Documentable).getJavadoc() or type = ClassDoc() and - result = e.getEnclosingCallable().getDeclaringType().(Documentable).getJavadoc() + result = e.getCallable().getDeclaringType().(Documentable).getJavadoc() } } @@ -308,13 +308,13 @@ class FrameworkModeMetadataExtractor extends string { string input, string output, string parameterName ) { (if exists(e.getParamName()) then parameterName = e.getParamName() else parameterName = "") and - name = e.getEnclosingCallable().getName() and + name = e.getCallable().getName() and (if exists(e.getMaDInput()) then input = e.getMaDInput() else input = "") and (if exists(e.getMaDOutput()) then output = e.getMaDOutput() else output = "") and - package = e.getEnclosingCallable().getDeclaringType().getPackage().getName() and - type = e.getEnclosingCallable().getDeclaringType().getErasure().(RefType).nestedName() and - subtypes = AutomodelJavaUtil::considerSubtypes(e.getEnclosingCallable()).toString() and - signature = ExternalFlow::paramsString(e.getEnclosingCallable()) + package = e.getCallable().getDeclaringType().getPackage().getName() and + type = e.getCallable().getDeclaringType().getErasure().(RefType).nestedName() and + subtypes = AutomodelJavaUtil::considerSubtypes(e.getCallable()).toString() and + signature = ExternalFlow::paramsString(e.getCallable()) } } @@ -336,8 +336,8 @@ private class UnexploitableIsCharacteristic extends CharacteristicsImpl::Neither UnexploitableIsCharacteristic() { this = "unexploitable (is-style boolean method)" } override predicate appliesToEndpoint(Endpoint e) { - e.getEnclosingCallable().getName().matches("is%") and - e.getEnclosingCallable().getReturnType() instanceof BooleanType and + e.getCallable().getName().matches("is%") and + e.getCallable().getReturnType() instanceof BooleanType and ( e.getExtensibleType() = "sinkModel" and not FrameworkCandidatesImpl::isSink(e, _, _) @@ -362,11 +362,11 @@ private class UnexploitableExistsCharacteristic extends CharacteristicsImpl::Nei override predicate appliesToEndpoint(Endpoint e) { exists(Callable callable | - callable = e.getEnclosingCallable() and + callable = e.getCallable() and callable.getName().toLowerCase() = ["exists", "notexists"] and callable.getReturnType() instanceof BooleanType | - e.getExtensibleType() = "sourceModel" and + e.getExtensibleType() = "sinkModel" and not FrameworkCandidatesImpl::isSink(e, _, _) or e.getExtensibleType() = "sourceModel" and @@ -384,7 +384,7 @@ private class ExceptionCharacteristic extends CharacteristicsImpl::NeitherSource ExceptionCharacteristic() { this = "exception" } override predicate appliesToEndpoint(Endpoint e) { - e.getEnclosingCallable().getDeclaringType().getASupertype*() instanceof TypeThrowable and + e.getCallable().getDeclaringType().getASupertype*() instanceof TypeThrowable and ( e.getExtensibleType() = "sinkModel" and not FrameworkCandidatesImpl::isSink(e, _, _) @@ -405,6 +405,6 @@ private class NotAModelApi extends CharacteristicsImpl::UninterestingToModelChar NotAModelApi() { this = "not a model API" } override predicate appliesToEndpoint(Endpoint e) { - not e.getEnclosingCallable() instanceof ModelExclusions::ModelApi + not e.getCallable() instanceof ModelExclusions::ModelApi } } From bb63fcde4338eeabc34f381cec32a1d2d9aad788 Mon Sep 17 00:00:00 2001 From: Max Schaefer Date: Fri, 12 Jan 2024 14:57:05 +0000 Subject: [PATCH 12/18] Refactor to avoid bad join order. --- ...utomodelApplicationModeCharacteristics.qll | 26 +++++++++---- ...tomodelApplicationModeExtractCandidates.ql | 22 +++++------ ...lApplicationModeExtractNegativeExamples.ql | 4 +- ...lApplicationModeExtractPositiveExamples.ql | 5 +-- .../AutomodelFrameworkModeCharacteristics.qll | 39 ++++++++++++------- ...AutomodelFrameworkModeExtractCandidates.ql | 21 ++++------ ...delFrameworkModeExtractNegativeExamples.ql | 4 +- ...delFrameworkModeExtractPositiveExamples.ql | 5 +-- .../src/AutomodelSharedCharacteristics.qll | 10 ++--- 9 files changed, 74 insertions(+), 62 deletions(-) diff --git a/java/ql/automodel/src/AutomodelApplicationModeCharacteristics.qll b/java/ql/automodel/src/AutomodelApplicationModeCharacteristics.qll index fd91434ec34..88e837f5fc1 100644 --- a/java/ql/automodel/src/AutomodelApplicationModeCharacteristics.qll +++ b/java/ql/automodel/src/AutomodelApplicationModeCharacteristics.qll @@ -356,10 +356,10 @@ class ApplicationModeMetadataExtractor extends string { predicate hasMetadata( Endpoint e, string package, string type, string subtypes, string name, string signature, - string input, string output, string isVarargsArray + string input, string output, string isVarargsArray, string alreadyAiModeled, + string extensibleType ) { - exists(Callable callable | - e.getCallable() = callable and + exists(Callable callable | e.getCallable() = callable | (if exists(e.getMaDInput()) then input = e.getMaDInput() else input = "") and (if exists(e.getMaDOutput()) then output = e.getMaDOutput() else output = "") and package = callable.getDeclaringType().getPackage().getName() and @@ -369,9 +369,17 @@ class ApplicationModeMetadataExtractor extends string { subtypes = AutomodelJavaUtil::considerSubtypes(callable).toString() and name = callable.getName() and signature = ExternalFlow::paramsString(callable) and - if e instanceof ImplicitVarargsArray - then isVarargsArray = "true" - else isVarargsArray = "false" + ( + if e instanceof ImplicitVarargsArray + then isVarargsArray = "true" + else isVarargsArray = "false" + ) and + extensibleType = e.getExtensibleType() + ) and + ( + not CharacteristicsImpl::isModeled(e, _, _, _) and alreadyAiModeled = "" + or + CharacteristicsImpl::isModeled(e, _, _, alreadyAiModeled) ) } } @@ -416,7 +424,8 @@ private class UnexploitableIsCharacteristic extends CharacteristicsImpl::Neither * boolean return type. These kinds of calls normally do only checks, and appear before the proper call that does the * dangerous/interesting thing, so we want the latter to be modeled as the sink. */ -private class UnexploitableExistsCharacteristic extends CharacteristicsImpl::NeitherSourceNorSinkCharacteristic { +private class UnexploitableExistsCharacteristic extends CharacteristicsImpl::NeitherSourceNorSinkCharacteristic +{ UnexploitableExistsCharacteristic() { this = "unexploitable (existence-checking boolean method)" } override predicate appliesToEndpoint(Endpoint e) { @@ -439,7 +448,8 @@ private class UnexploitableExistsCharacteristic extends CharacteristicsImpl::Nei * A negative characteristic that indicates that parameters of an exception method or constructor should not be considered sinks, * and its return value should not be considered a source. */ -private class ExceptionCharacteristic extends CharacteristicsImpl::NeitherSourceNorSinkCharacteristic { +private class ExceptionCharacteristic extends CharacteristicsImpl::NeitherSourceNorSinkCharacteristic +{ ExceptionCharacteristic() { this = "exception" } override predicate appliesToEndpoint(Endpoint e) { diff --git a/java/ql/automodel/src/AutomodelApplicationModeExtractCandidates.ql b/java/ql/automodel/src/AutomodelApplicationModeExtractCandidates.ql index 391e3e48e7c..e3384a52bb8 100644 --- a/java/ql/automodel/src/AutomodelApplicationModeExtractCandidates.ql +++ b/java/ql/automodel/src/AutomodelApplicationModeExtractCandidates.ql @@ -25,20 +25,20 @@ private import AutomodelJavaUtil bindingset[limit] private Endpoint getSampleForSignature( int limit, string package, string type, string subtypes, string name, string signature, - string input, string output, string isVarargs, string extensibleType + string input, string output, string isVarargs, string extensibleType, string alreadyAiModeled ) { exists(int n, int num_endpoints, ApplicationModeMetadataExtractor meta | num_endpoints = count(Endpoint e | - e.getExtensibleType() = extensibleType and - meta.hasMetadata(e, package, type, subtypes, name, signature, input, output, isVarargs) + meta.hasMetadata(e, package, type, subtypes, name, signature, input, output, isVarargs, + alreadyAiModeled, extensibleType) ) | result = rank[n](Endpoint e, Location loc | loc = e.asTop().getLocation() and - e.getExtensibleType() = extensibleType and - meta.hasMetadata(e, package, type, subtypes, name, signature, input, output, isVarargs) + meta.hasMetadata(e, package, type, subtypes, name, signature, input, output, isVarargs, + alreadyAiModeled, extensibleType) | e order by @@ -66,19 +66,15 @@ where CharacteristicsImpl::isCandidate(endpoint, _) and endpoint = getSampleForSignature(9, package, type, subtypes, name, signature, input, output, - isVarargsArray, extensibleType) and + isVarargsArray, extensibleType, alreadyAiModeled) and + meta.hasMetadata(endpoint, package, type, subtypes, name, signature, input, output, + isVarargsArray, alreadyAiModeled, extensibleType) and // If a node is already modeled in MaD, we don't include it as a candidate. Otherwise, we might include it as a // candidate for query A, but the model will label it as a sink for one of the sink types of query B, for which it's // already a known sink. This would result in overlap between our detected sinks and the pre-existing modeling. We // assume that, if a sink has already been modeled in a MaD model, then it doesn't belong to any additional sink // types, and we don't need to reexamine it. - ( - not CharacteristicsImpl::isModeled(endpoint, _, _, _) and alreadyAiModeled = "" - or - alreadyAiModeled.matches("%ai-%") and - CharacteristicsImpl::isModeled(endpoint, _, _, alreadyAiModeled) - ) and - meta.hasMetadata(endpoint, package, type, subtypes, name, signature, input, output, isVarargsArray) and + alreadyAiModeled.matches(["", "%ai-%"]) and includeAutomodelCandidate(package, type, name, signature) select endpoint.asNode(), "Related locations: $@, $@, $@." + "\nmetadata: $@, $@, $@, $@, $@, $@, $@, $@, $@, $@.", // diff --git a/java/ql/automodel/src/AutomodelApplicationModeExtractNegativeExamples.ql b/java/ql/automodel/src/AutomodelApplicationModeExtractNegativeExamples.ql index 2b324dd0848..5afa2cc2f2f 100644 --- a/java/ql/automodel/src/AutomodelApplicationModeExtractNegativeExamples.ql +++ b/java/ql/automodel/src/AutomodelApplicationModeExtractNegativeExamples.ql @@ -47,7 +47,6 @@ from DollarAtString output, DollarAtString isVarargsArray, DollarAtString extensibleType where endpoint = getSampleForCharacteristic(characteristic, 100) and - extensibleType = endpoint.getExtensibleType() and // the node is know not to be an endpoint of any appropriate type forall(EndpointType tp | tp = endpoint.getAPotentialType() | characteristic.hasImplications(tp, false, _) @@ -55,7 +54,8 @@ where // the lowest confidence across all endpoint types should be at least highConfidence confidence = min(float c | characteristic.hasImplications(endpoint.getAPotentialType(), false, c)) and confidence >= SharedCharacteristics::highConfidence() and - meta.hasMetadata(endpoint, package, type, subtypes, name, signature, input, output, isVarargsArray) and + meta.hasMetadata(endpoint, package, type, subtypes, name, signature, input, output, + isVarargsArray, _, extensibleType) and // It's valid for a node to be both a potential source/sanitizer and a sink. We don't want to include such nodes // as negative examples in the prompt, because they're ambiguous and might confuse the model, so we explicitly them here. not exists(EndpointCharacteristic characteristic2, float confidence2, EndpointType type2 | diff --git a/java/ql/automodel/src/AutomodelApplicationModeExtractPositiveExamples.ql b/java/ql/automodel/src/AutomodelApplicationModeExtractPositiveExamples.ql index e933900aecc..8702e6808a4 100644 --- a/java/ql/automodel/src/AutomodelApplicationModeExtractPositiveExamples.ql +++ b/java/ql/automodel/src/AutomodelApplicationModeExtractPositiveExamples.ql @@ -18,9 +18,8 @@ from DollarAtString signature, DollarAtString input, DollarAtString output, DollarAtString isVarargsArray, DollarAtString extensibleType where - extensibleType = endpoint.getExtensibleType() and - meta.hasMetadata(endpoint, package, type, subtypes, name, signature, input, output, isVarargsArray) and - // Extract positive examples of sinks belonging to the existing ATM query configurations. + meta.hasMetadata(endpoint, package, type, subtypes, name, signature, input, output, + isVarargsArray, _, extensibleType) and CharacteristicsImpl::isKnownAs(endpoint, endpointType, _) and exists(CharacteristicsImpl::getRelatedLocationOrCandidate(endpoint, CallContext())) select endpoint.asNode(), diff --git a/java/ql/automodel/src/AutomodelFrameworkModeCharacteristics.qll b/java/ql/automodel/src/AutomodelFrameworkModeCharacteristics.qll index e41b8b1ea78..ac6a5a836de 100644 --- a/java/ql/automodel/src/AutomodelFrameworkModeCharacteristics.qll +++ b/java/ql/automodel/src/AutomodelFrameworkModeCharacteristics.qll @@ -305,16 +305,27 @@ class FrameworkModeMetadataExtractor extends string { predicate hasMetadata( Endpoint e, string package, string type, string subtypes, string name, string signature, - string input, string output, string parameterName + string input, string output, string parameterName, string alreadyAiModeled, + string extensibleType ) { - (if exists(e.getParamName()) then parameterName = e.getParamName() else parameterName = "") and - name = e.getCallable().getName() and - (if exists(e.getMaDInput()) then input = e.getMaDInput() else input = "") and - (if exists(e.getMaDOutput()) then output = e.getMaDOutput() else output = "") and - package = e.getCallable().getDeclaringType().getPackage().getName() and - type = e.getCallable().getDeclaringType().getErasure().(RefType).nestedName() and - subtypes = AutomodelJavaUtil::considerSubtypes(e.getCallable()).toString() and - signature = ExternalFlow::paramsString(e.getCallable()) + exists(Callable callable | e.getCallable() = callable | + (if exists(e.getMaDInput()) then input = e.getMaDInput() else input = "") and + (if exists(e.getMaDOutput()) then output = e.getMaDOutput() else output = "") and + package = callable.getDeclaringType().getPackage().getName() and + // we're using the erased types because the MaD convention is to not specify type parameters. + // Whether something is or isn't a sink doesn't usually depend on the type parameters. + type = callable.getDeclaringType().getErasure().(RefType).nestedName() and + subtypes = AutomodelJavaUtil::considerSubtypes(callable).toString() and + name = callable.getName() and + signature = ExternalFlow::paramsString(callable) and + (if exists(e.getParamName()) then parameterName = e.getParamName() else parameterName = "") and + e.getExtensibleType() = extensibleType + ) and + ( + not CharacteristicsImpl::isModeled(e, _, _, _) and alreadyAiModeled = "" + or + CharacteristicsImpl::isModeled(e, _, _, alreadyAiModeled) + ) } } @@ -332,7 +343,8 @@ class FrameworkModeMetadataExtractor extends string { * * TODO: this might filter too much, it's possible that methods with more than one parameter contain interesting sinks */ -private class UnexploitableIsCharacteristic extends CharacteristicsImpl::NeitherSourceNorSinkCharacteristic { +private class UnexploitableIsCharacteristic extends CharacteristicsImpl::NeitherSourceNorSinkCharacteristic +{ UnexploitableIsCharacteristic() { this = "unexploitable (is-style boolean method)" } override predicate appliesToEndpoint(Endpoint e) { @@ -357,7 +369,8 @@ private class UnexploitableIsCharacteristic extends CharacteristicsImpl::Neither * boolean return type. These kinds of calls normally do only checks, and appear before the proper call that does the * dangerous/interesting thing, so we want the latter to be modeled as the sink. */ -private class UnexploitableExistsCharacteristic extends CharacteristicsImpl::NeitherSourceNorSinkCharacteristic { +private class UnexploitableExistsCharacteristic extends CharacteristicsImpl::NeitherSourceNorSinkCharacteristic +{ UnexploitableExistsCharacteristic() { this = "unexploitable (existence-checking boolean method)" } override predicate appliesToEndpoint(Endpoint e) { @@ -380,7 +393,8 @@ private class UnexploitableExistsCharacteristic extends CharacteristicsImpl::Nei * A negative characteristic that indicates that parameters of an exception method or constructor should not be considered sinks, * and its return value should not be considered a source. */ -private class ExceptionCharacteristic extends CharacteristicsImpl::NeitherSourceNorSinkCharacteristic { +private class ExceptionCharacteristic extends CharacteristicsImpl::NeitherSourceNorSinkCharacteristic +{ ExceptionCharacteristic() { this = "exception" } override predicate appliesToEndpoint(Endpoint e) { @@ -396,7 +410,6 @@ private class ExceptionCharacteristic extends CharacteristicsImpl::NeitherSource } } - /** * A characteristic that limits candidates to parameters of methods that are recognized as `ModelApi`, iow., APIs that * are considered worth modeling. diff --git a/java/ql/automodel/src/AutomodelFrameworkModeExtractCandidates.ql b/java/ql/automodel/src/AutomodelFrameworkModeExtractCandidates.ql index fa4dc6f0189..80f3d3089c0 100644 --- a/java/ql/automodel/src/AutomodelFrameworkModeExtractCandidates.ql +++ b/java/ql/automodel/src/AutomodelFrameworkModeExtractCandidates.ql @@ -21,23 +21,18 @@ from DollarAtString input, DollarAtString output, DollarAtString parameterName, DollarAtString alreadyAiModeled, DollarAtString extensibleType where - endpoint.getExtensibleType() = extensibleType and not exists(CharacteristicsImpl::UninterestingToModelCharacteristic u | u.appliesToEndpoint(endpoint) ) and CharacteristicsImpl::isCandidate(endpoint, _) and - // If a node is already a known sink for any of our existing ATM queries and is already modeled as a MaD sink, we - // don't include it as a candidate. Otherwise, we might include it as a candidate for query A, but the model will - // label it as a sink for one of the sink types of query B, for which it's already a known sink. This would result in - // overlap between our detected sinks and the pre-existing modeling. We assume that, if a sink has already been - // modeled in a MaD model, then it doesn't belong to any additional sink types, and we don't need to reexamine it. - ( - not CharacteristicsImpl::isSink(endpoint, _, _) and alreadyAiModeled = "" - or - alreadyAiModeled.matches("%ai-%") and - CharacteristicsImpl::isSink(endpoint, _, alreadyAiModeled) - ) and - meta.hasMetadata(endpoint, package, type, subtypes, name, signature, input, output, parameterName) and + meta.hasMetadata(endpoint, package, type, subtypes, name, signature, input, output, parameterName, + alreadyAiModeled, extensibleType) and + // If a node is already modeled in MaD, we don't include it as a candidate. Otherwise, we might include it as a + // candidate for query A, but the model will label it as a sink for one of the sink types of query B, for which it's + // already a known sink. This would result in overlap between our detected sinks and the pre-existing modeling. We + // assume that, if a sink has already been modeled in a MaD model, then it doesn't belong to any additional sink + // types, and we don't need to reexamine it. + alreadyAiModeled.matches(["", "%ai-%"]) and includeAutomodelCandidate(package, type, name, signature) select endpoint, "Related locations: $@, $@." + "\nmetadata: $@, $@, $@, $@, $@, $@, $@, $@, $@, $@.", // diff --git a/java/ql/automodel/src/AutomodelFrameworkModeExtractNegativeExamples.ql b/java/ql/automodel/src/AutomodelFrameworkModeExtractNegativeExamples.ql index ac043142f10..912d8da9219 100644 --- a/java/ql/automodel/src/AutomodelFrameworkModeExtractNegativeExamples.ql +++ b/java/ql/automodel/src/AutomodelFrameworkModeExtractNegativeExamples.ql @@ -19,7 +19,6 @@ from DollarAtString input, DollarAtString output, DollarAtString parameterName, DollarAtString extensibleType where - endpoint.getExtensibleType() = extensibleType and characteristic.appliesToEndpoint(endpoint) and // the node is known not to be an endpoint of any appropriate type forall(EndpointType tp | tp = endpoint.getAPotentialType() | @@ -28,7 +27,8 @@ where // the lowest confidence across all endpoint types should be at least highConfidence confidence = min(float c | characteristic.hasImplications(endpoint.getAPotentialType(), false, c)) and confidence >= SharedCharacteristics::highConfidence() and - meta.hasMetadata(endpoint, package, type, subtypes, name, signature, input, output, parameterName) and + meta.hasMetadata(endpoint, package, type, subtypes, name, signature, input, output, parameterName, + _, extensibleType) and // It's valid for a node to be both a potential source/sanitizer and a sink. We don't want to include such nodes // as negative examples in the prompt, because they're ambiguous and might confuse the model, so we explicitly them here. not exists(EndpointCharacteristic characteristic2, float confidence2, EndpointType type2 | diff --git a/java/ql/automodel/src/AutomodelFrameworkModeExtractPositiveExamples.ql b/java/ql/automodel/src/AutomodelFrameworkModeExtractPositiveExamples.ql index dbc3d760d9a..d83ac04c582 100644 --- a/java/ql/automodel/src/AutomodelFrameworkModeExtractPositiveExamples.ql +++ b/java/ql/automodel/src/AutomodelFrameworkModeExtractPositiveExamples.ql @@ -18,9 +18,8 @@ from DollarAtString signature, DollarAtString input, DollarAtString output, DollarAtString parameterName, DollarAtString extensibleType where - endpoint.getExtensibleType() = extensibleType and - meta.hasMetadata(endpoint, package, type, subtypes, name, signature, input, output, parameterName) and - // Extract positive examples of sinks belonging to the existing ATM query configurations. + meta.hasMetadata(endpoint, package, type, subtypes, name, signature, input, output, parameterName, + _, extensibleType) and CharacteristicsImpl::isKnownAs(endpoint, endpointType, _) select endpoint, endpointType + "\nrelated locations: $@, $@." + "\nmetadata: $@, $@, $@, $@, $@, $@, $@, $@, $@.", // diff --git a/java/ql/automodel/src/AutomodelSharedCharacteristics.qll b/java/ql/automodel/src/AutomodelSharedCharacteristics.qll index d2a23520369..b71daa92512 100644 --- a/java/ql/automodel/src/AutomodelSharedCharacteristics.qll +++ b/java/ql/automodel/src/AutomodelSharedCharacteristics.qll @@ -272,7 +272,9 @@ module SharedCharacteristics { /** * A high-confidence characteristic that indicates that an endpoint is neither a source nor a sink of any type. */ - abstract class NeitherSourceNorSinkCharacteristic extends NotASinkCharacteristic, NotASourceCharacteristic { + abstract class NeitherSourceNorSinkCharacteristic extends NotASinkCharacteristic, + NotASourceCharacteristic + { bindingset[this] NeitherSourceNorSinkCharacteristic() { any() } @@ -280,8 +282,7 @@ module SharedCharacteristics { Candidate::EndpointType endpointType, boolean isPositiveIndicator, float confidence ) { NotASinkCharacteristic.super.hasImplications(endpointType, isPositiveIndicator, confidence) or - NotASourceCharacteristic.super - .hasImplications(endpointType, isPositiveIndicator, confidence) + NotASourceCharacteristic.super.hasImplications(endpointType, isPositiveIndicator, confidence) } } @@ -373,8 +374,7 @@ module SharedCharacteristics { /** * A negative characteristic that indicates that an endpoint was manually modeled as a neutral model. */ - private class NeutralModelCharacteristic extends NeitherSourceNorSinkCharacteristic - { + private class NeutralModelCharacteristic extends NeitherSourceNorSinkCharacteristic { NeutralModelCharacteristic() { this = "known non-endpoint" } override predicate appliesToEndpoint(Candidate::Endpoint e) { Candidate::isNeutral(e) } From 919330fb533ebc5ffbf82dc61bb9dc2db6bce4a7 Mon Sep 17 00:00:00 2001 From: Max Schaefer Date: Fri, 12 Jan 2024 17:38:58 +0000 Subject: [PATCH 13/18] Some more performance refactoring. --- ...lApplicationModeExtractNegativeExamples.ql | 30 ++++++++++++------- 1 file changed, 20 insertions(+), 10 deletions(-) diff --git a/java/ql/automodel/src/AutomodelApplicationModeExtractNegativeExamples.ql b/java/ql/automodel/src/AutomodelApplicationModeExtractNegativeExamples.ql index 5afa2cc2f2f..77f652ada64 100644 --- a/java/ql/automodel/src/AutomodelApplicationModeExtractNegativeExamples.ql +++ b/java/ql/automodel/src/AutomodelApplicationModeExtractNegativeExamples.ql @@ -40,13 +40,11 @@ Endpoint getSampleForCharacteristic(EndpointCharacteristic c, int limit) { ) } -from - Endpoint endpoint, EndpointCharacteristic characteristic, float confidence, string message, - ApplicationModeMetadataExtractor meta, DollarAtString package, DollarAtString type, - DollarAtString subtypes, DollarAtString name, DollarAtString signature, DollarAtString input, - DollarAtString output, DollarAtString isVarargsArray, DollarAtString extensibleType -where - endpoint = getSampleForCharacteristic(characteristic, 100) and +predicate candidate( + Endpoint endpoint, EndpointCharacteristic characteristic, float confidence, string package, + string type, string subtypes, string name, string signature, string input, string output, + string isVarargsArray, string extensibleType +) { // the node is know not to be an endpoint of any appropriate type forall(EndpointType tp | tp = endpoint.getAPotentialType() | characteristic.hasImplications(tp, false, _) @@ -54,15 +52,27 @@ where // the lowest confidence across all endpoint types should be at least highConfidence confidence = min(float c | characteristic.hasImplications(endpoint.getAPotentialType(), false, c)) and confidence >= SharedCharacteristics::highConfidence() and - meta.hasMetadata(endpoint, package, type, subtypes, name, signature, input, output, - isVarargsArray, _, extensibleType) and + any(ApplicationModeMetadataExtractor meta) + .hasMetadata(endpoint, package, type, subtypes, name, signature, input, output, + isVarargsArray, _, extensibleType) and // It's valid for a node to be both a potential source/sanitizer and a sink. We don't want to include such nodes // as negative examples in the prompt, because they're ambiguous and might confuse the model, so we explicitly them here. not exists(EndpointCharacteristic characteristic2, float confidence2, EndpointType type2 | characteristic2.appliesToEndpoint(endpoint) and confidence2 >= SharedCharacteristics::maximalConfidence() and characteristic2.hasImplications(type2, true, confidence2) - ) and + ) +} + +from + Endpoint endpoint, EndpointCharacteristic characteristic, float confidence, string message, + DollarAtString package, DollarAtString type, DollarAtString subtypes, DollarAtString name, + DollarAtString signature, DollarAtString input, DollarAtString output, + DollarAtString isVarargsArray, DollarAtString extensibleType +where + endpoint = getSampleForCharacteristic(characteristic, 100) and + candidate(endpoint, characteristic, confidence, package, type, subtypes, name, signature, input, + output, isVarargsArray, extensibleType) and message = characteristic select endpoint.asNode(), message + "\nrelated locations: $@, $@, $@." + "\nmetadata: $@, $@, $@, $@, $@, $@, $@, $@, $@.", // From 68cf9aca12772352665582b46147dc6087cddc92 Mon Sep 17 00:00:00 2001 From: Max Schaefer Date: Mon, 15 Jan 2024 11:50:59 +0000 Subject: [PATCH 14/18] Remove a few `getExtensibleType` checks which are now unnecessary. --- .../src/AutomodelApplicationModeCharacteristics.qll | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/java/ql/automodel/src/AutomodelApplicationModeCharacteristics.qll b/java/ql/automodel/src/AutomodelApplicationModeCharacteristics.qll index 88e837f5fc1..2442c60fc0a 100644 --- a/java/ql/automodel/src/AutomodelApplicationModeCharacteristics.qll +++ b/java/ql/automodel/src/AutomodelApplicationModeCharacteristics.qll @@ -520,8 +520,7 @@ private class NonPublicMethodCharacteristic extends CharacteristicsImpl::Uninter NonPublicMethodCharacteristic() { this = "non-public method" } override predicate appliesToEndpoint(Endpoint e) { - e.getExtensibleType() = "sinkModel" and - not e.getCallable().isPublic() + exists(Callable c | c = e.getCallable() | not c.isPublic()) } } @@ -543,7 +542,6 @@ private class OtherArgumentToModeledMethodCharacteristic extends Characteristics } override predicate appliesToEndpoint(Endpoint e) { - e.getExtensibleType() = "sinkModel" and not ApplicationCandidatesImpl::isSink(e, _, _) and exists(CallArgument otherSink | ApplicationCandidatesImpl::isSink(otherSink, _, "manual") and @@ -562,7 +560,6 @@ private class FunctionValueCharacteristic extends CharacteristicsImpl::LikelyNot FunctionValueCharacteristic() { this = "function value" } override predicate appliesToEndpoint(Endpoint e) { - e.getExtensibleType() = "sinkModel" and e.asNode().asExpr() instanceof FunctionalExpr } } @@ -579,7 +576,6 @@ private class CannotBeTaintedCharacteristic extends CharacteristicsImpl::LikelyN CannotBeTaintedCharacteristic() { this = "cannot be tainted" } override predicate appliesToEndpoint(Endpoint e) { - e.getExtensibleType() = "sinkModel" and not this.isKnownOutNodeForStep(e) } From 3befce98b3ebf9b21cec4853a700f02b98df7a99 Mon Sep 17 00:00:00 2001 From: Max Schaefer Date: Mon, 15 Jan 2024 12:09:39 +0000 Subject: [PATCH 15/18] When checking whether an endpoint has already been modelled, make sure to take the extensibleType into account. --- .../automodel/src/AutomodelApplicationModeCharacteristics.qll | 4 ++-- .../automodel/src/AutomodelFrameworkModeCharacteristics.qll | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/java/ql/automodel/src/AutomodelApplicationModeCharacteristics.qll b/java/ql/automodel/src/AutomodelApplicationModeCharacteristics.qll index 2442c60fc0a..1b972422300 100644 --- a/java/ql/automodel/src/AutomodelApplicationModeCharacteristics.qll +++ b/java/ql/automodel/src/AutomodelApplicationModeCharacteristics.qll @@ -377,9 +377,9 @@ class ApplicationModeMetadataExtractor extends string { extensibleType = e.getExtensibleType() ) and ( - not CharacteristicsImpl::isModeled(e, _, _, _) and alreadyAiModeled = "" + not CharacteristicsImpl::isModeled(e, _, extensibleType, _) and alreadyAiModeled = "" or - CharacteristicsImpl::isModeled(e, _, _, alreadyAiModeled) + CharacteristicsImpl::isModeled(e, _, extensibleType, alreadyAiModeled) ) } } diff --git a/java/ql/automodel/src/AutomodelFrameworkModeCharacteristics.qll b/java/ql/automodel/src/AutomodelFrameworkModeCharacteristics.qll index ac6a5a836de..99e59268403 100644 --- a/java/ql/automodel/src/AutomodelFrameworkModeCharacteristics.qll +++ b/java/ql/automodel/src/AutomodelFrameworkModeCharacteristics.qll @@ -322,9 +322,9 @@ class FrameworkModeMetadataExtractor extends string { e.getExtensibleType() = extensibleType ) and ( - not CharacteristicsImpl::isModeled(e, _, _, _) and alreadyAiModeled = "" + not CharacteristicsImpl::isModeled(e, _, extensibleType, _) and alreadyAiModeled = "" or - CharacteristicsImpl::isModeled(e, _, _, alreadyAiModeled) + CharacteristicsImpl::isModeled(e, _, extensibleType, alreadyAiModeled) ) } } From fee44074f790b1ba905f416f3dc005d74f16a51e Mon Sep 17 00:00:00 2001 From: Max Schaefer Date: Mon, 15 Jan 2024 13:44:45 +0000 Subject: [PATCH 16/18] Autoformat. --- .../src/AutomodelApplicationModeCharacteristics.qll | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) diff --git a/java/ql/automodel/src/AutomodelApplicationModeCharacteristics.qll b/java/ql/automodel/src/AutomodelApplicationModeCharacteristics.qll index 1b972422300..1ae0253d0b3 100644 --- a/java/ql/automodel/src/AutomodelApplicationModeCharacteristics.qll +++ b/java/ql/automodel/src/AutomodelApplicationModeCharacteristics.qll @@ -559,9 +559,7 @@ private class OtherArgumentToModeledMethodCharacteristic extends Characteristics private class FunctionValueCharacteristic extends CharacteristicsImpl::LikelyNotASinkCharacteristic { FunctionValueCharacteristic() { this = "function value" } - override predicate appliesToEndpoint(Endpoint e) { - e.asNode().asExpr() instanceof FunctionalExpr - } + override predicate appliesToEndpoint(Endpoint e) { e.asNode().asExpr() instanceof FunctionalExpr } } /** @@ -575,9 +573,7 @@ private class CannotBeTaintedCharacteristic extends CharacteristicsImpl::LikelyN { CannotBeTaintedCharacteristic() { this = "cannot be tainted" } - override predicate appliesToEndpoint(Endpoint e) { - not this.isKnownOutNodeForStep(e) - } + override predicate appliesToEndpoint(Endpoint e) { not this.isKnownOutNodeForStep(e) } /** * Holds if the node `n` is known as the predecessor in a modeled flow step. From 90a4552c4fad9ddcd4f16ec85c245a5644e968db Mon Sep 17 00:00:00 2001 From: Max Schaefer Date: Mon, 15 Jan 2024 13:45:03 +0000 Subject: [PATCH 17/18] Fix omittable exists. --- .../src/AutomodelApplicationModeExtractNegativeExamples.ql | 4 ++-- .../src/AutomodelFrameworkModeExtractNegativeExamples.ql | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/java/ql/automodel/src/AutomodelApplicationModeExtractNegativeExamples.ql b/java/ql/automodel/src/AutomodelApplicationModeExtractNegativeExamples.ql index 77f652ada64..3f9a697e491 100644 --- a/java/ql/automodel/src/AutomodelApplicationModeExtractNegativeExamples.ql +++ b/java/ql/automodel/src/AutomodelApplicationModeExtractNegativeExamples.ql @@ -57,10 +57,10 @@ predicate candidate( isVarargsArray, _, extensibleType) and // It's valid for a node to be both a potential source/sanitizer and a sink. We don't want to include such nodes // as negative examples in the prompt, because they're ambiguous and might confuse the model, so we explicitly them here. - not exists(EndpointCharacteristic characteristic2, float confidence2, EndpointType type2 | + not exists(EndpointCharacteristic characteristic2, float confidence2 | characteristic2.appliesToEndpoint(endpoint) and confidence2 >= SharedCharacteristics::maximalConfidence() and - characteristic2.hasImplications(type2, true, confidence2) + characteristic2.hasImplications(endpoint.getAPotentialType(), true, confidence2) ) } diff --git a/java/ql/automodel/src/AutomodelFrameworkModeExtractNegativeExamples.ql b/java/ql/automodel/src/AutomodelFrameworkModeExtractNegativeExamples.ql index 912d8da9219..eb7109276b3 100644 --- a/java/ql/automodel/src/AutomodelFrameworkModeExtractNegativeExamples.ql +++ b/java/ql/automodel/src/AutomodelFrameworkModeExtractNegativeExamples.ql @@ -31,10 +31,10 @@ where _, extensibleType) and // It's valid for a node to be both a potential source/sanitizer and a sink. We don't want to include such nodes // as negative examples in the prompt, because they're ambiguous and might confuse the model, so we explicitly them here. - not exists(EndpointCharacteristic characteristic2, float confidence2, EndpointType type2 | + not exists(EndpointCharacteristic characteristic2, float confidence2 | characteristic2.appliesToEndpoint(endpoint) and confidence2 >= SharedCharacteristics::maximalConfidence() and - characteristic2.hasImplications(type2, true, confidence2) + characteristic2.hasImplications(endpoint.getAPotentialType(), true, confidence2) ) and message = characteristic select endpoint, From 8614d7bddb663f76a197fef05a8a8f7f0eb21903 Mon Sep 17 00:00:00 2001 From: Max Schaefer Date: Wed, 17 Jan 2024 14:27:24 +0000 Subject: [PATCH 18/18] Address review feedback. --- ...utomodelApplicationModeCharacteristics.qll | 12 ----------- ...lApplicationModeExtractNegativeExamples.ql | 16 +++++++++----- .../AutomodelFrameworkModeCharacteristics.qll | 12 ----------- ...delFrameworkModeExtractNegativeExamples.ql | 14 +++++++++---- .../src/AutomodelSharedCharacteristics.qll | 21 ++++++++++++++++--- ...cationModeExtractNegativeExamples.expected | 2 +- ...meworkModeExtractNegativeExamples.expected | 8 +++---- 7 files changed, 44 insertions(+), 41 deletions(-) diff --git a/java/ql/automodel/src/AutomodelApplicationModeCharacteristics.qll b/java/ql/automodel/src/AutomodelApplicationModeCharacteristics.qll index 1ae0253d0b3..44d74a5565f 100644 --- a/java/ql/automodel/src/AutomodelApplicationModeCharacteristics.qll +++ b/java/ql/automodel/src/AutomodelApplicationModeCharacteristics.qll @@ -93,18 +93,6 @@ abstract private class ApplicationModeEndpoint extends TApplicationModeEndpoint else none() // if both exist, it would be a summaryModel (not yet supported) } - /** - * Gets a potential type of this endpoint to make sure that sources are - * associated with source types and sinks with sink types. - */ - AutomodelEndpointTypes::EndpointType getAPotentialType() { - this.getExtensibleType() = "sourceModel" and - result instanceof AutomodelEndpointTypes::SourceType - or - this.getExtensibleType() = "sinkModel" and - result instanceof AutomodelEndpointTypes::SinkType - } - abstract string toString(); } diff --git a/java/ql/automodel/src/AutomodelApplicationModeExtractNegativeExamples.ql b/java/ql/automodel/src/AutomodelApplicationModeExtractNegativeExamples.ql index 3f9a697e491..f4ee90331f6 100644 --- a/java/ql/automodel/src/AutomodelApplicationModeExtractNegativeExamples.ql +++ b/java/ql/automodel/src/AutomodelApplicationModeExtractNegativeExamples.ql @@ -45,22 +45,28 @@ predicate candidate( string type, string subtypes, string name, string signature, string input, string output, string isVarargsArray, string extensibleType ) { - // the node is know not to be an endpoint of any appropriate type - forall(EndpointType tp | tp = endpoint.getAPotentialType() | + // the node is known not to be an endpoint of any appropriate type + forall(EndpointType tp | tp = CharacteristicsImpl::getAPotentialType(endpoint) | characteristic.hasImplications(tp, false, _) ) and // the lowest confidence across all endpoint types should be at least highConfidence - confidence = min(float c | characteristic.hasImplications(endpoint.getAPotentialType(), false, c)) and + confidence = + min(float c | + characteristic.hasImplications(CharacteristicsImpl::getAPotentialType(endpoint), false, c) + ) and confidence >= SharedCharacteristics::highConfidence() and any(ApplicationModeMetadataExtractor meta) .hasMetadata(endpoint, package, type, subtypes, name, signature, input, output, isVarargsArray, _, extensibleType) and // It's valid for a node to be both a potential source/sanitizer and a sink. We don't want to include such nodes - // as negative examples in the prompt, because they're ambiguous and might confuse the model, so we explicitly them here. + // as negative examples in the prompt, because they're ambiguous and might confuse the model, so we explicitly exclude them here. not exists(EndpointCharacteristic characteristic2, float confidence2 | + characteristic2 != characteristic + | characteristic2.appliesToEndpoint(endpoint) and confidence2 >= SharedCharacteristics::maximalConfidence() and - characteristic2.hasImplications(endpoint.getAPotentialType(), true, confidence2) + characteristic2 + .hasImplications(CharacteristicsImpl::getAPotentialType(endpoint), true, confidence2) ) } diff --git a/java/ql/automodel/src/AutomodelFrameworkModeCharacteristics.qll b/java/ql/automodel/src/AutomodelFrameworkModeCharacteristics.qll index 99e59268403..a47e470b4a1 100644 --- a/java/ql/automodel/src/AutomodelFrameworkModeCharacteristics.qll +++ b/java/ql/automodel/src/AutomodelFrameworkModeCharacteristics.qll @@ -89,18 +89,6 @@ abstract class FrameworkModeEndpoint extends TFrameworkModeEndpoint { abstract string getExtensibleType(); - /** - * Gets a potential type of this endpoint to make sure that sources are - * associated with source types and sinks with sink types. - */ - AutomodelEndpointTypes::EndpointType getAPotentialType() { - this.getExtensibleType() = "sourceModel" and - result instanceof AutomodelEndpointTypes::SourceType - or - this.getExtensibleType() = "sinkModel" and - result instanceof AutomodelEndpointTypes::SinkType - } - string toString() { result = this.asTop().toString() } Location getLocation() { result = this.asTop().getLocation() } diff --git a/java/ql/automodel/src/AutomodelFrameworkModeExtractNegativeExamples.ql b/java/ql/automodel/src/AutomodelFrameworkModeExtractNegativeExamples.ql index eb7109276b3..dc9f61ab49c 100644 --- a/java/ql/automodel/src/AutomodelFrameworkModeExtractNegativeExamples.ql +++ b/java/ql/automodel/src/AutomodelFrameworkModeExtractNegativeExamples.ql @@ -21,20 +21,26 @@ from where characteristic.appliesToEndpoint(endpoint) and // the node is known not to be an endpoint of any appropriate type - forall(EndpointType tp | tp = endpoint.getAPotentialType() | + forall(EndpointType tp | tp = CharacteristicsImpl::getAPotentialType(endpoint) | characteristic.hasImplications(tp, false, _) ) and // the lowest confidence across all endpoint types should be at least highConfidence - confidence = min(float c | characteristic.hasImplications(endpoint.getAPotentialType(), false, c)) and + confidence = + min(float c | + characteristic.hasImplications(CharacteristicsImpl::getAPotentialType(endpoint), false, c) + ) and confidence >= SharedCharacteristics::highConfidence() and meta.hasMetadata(endpoint, package, type, subtypes, name, signature, input, output, parameterName, _, extensibleType) and // It's valid for a node to be both a potential source/sanitizer and a sink. We don't want to include such nodes - // as negative examples in the prompt, because they're ambiguous and might confuse the model, so we explicitly them here. + // as negative examples in the prompt, because they're ambiguous and might confuse the model, so we explicitly exclude them here. not exists(EndpointCharacteristic characteristic2, float confidence2 | + characteristic2 != characteristic + | characteristic2.appliesToEndpoint(endpoint) and confidence2 >= SharedCharacteristics::maximalConfidence() and - characteristic2.hasImplications(endpoint.getAPotentialType(), true, confidence2) + characteristic2 + .hasImplications(CharacteristicsImpl::getAPotentialType(endpoint), true, confidence2) ) and message = characteristic select endpoint, diff --git a/java/ql/automodel/src/AutomodelSharedCharacteristics.qll b/java/ql/automodel/src/AutomodelSharedCharacteristics.qll index b71daa92512..5641a534660 100644 --- a/java/ql/automodel/src/AutomodelSharedCharacteristics.qll +++ b/java/ql/automodel/src/AutomodelSharedCharacteristics.qll @@ -17,7 +17,10 @@ signature module CandidateSig { * DataFlow node class, or a subtype thereof. */ class Endpoint { - EndpointType getAPotentialType(); + /** + * Gets the kind of this endpoint, either "sourceModel" or "sinkModel". + */ + string getExtensibleType(); } /** @@ -122,6 +125,18 @@ module SharedCharacteristics { characteristic.hasImplications(endpointType, true, maximalConfidence()) } + /** + * Gets a potential type of this endpoint to make sure that sources are + * associated with source types and sinks with sink types. + */ + Candidate::EndpointType getAPotentialType(Candidate::Endpoint endpoint) { + endpoint.getExtensibleType() = "sourceModel" and + result instanceof Candidate::SourceType + or + endpoint.getExtensibleType() = "sinkModel" and + result instanceof Candidate::SinkType + } + /** * Holds if the given `endpoint` should be considered as a candidate for type `endpointType`, * and classified by the ML model. @@ -129,7 +144,7 @@ module SharedCharacteristics { * A candidate is an endpoint that cannot be excluded from `endpointType` based on its characteristics. */ predicate isCandidate(Candidate::Endpoint endpoint, Candidate::EndpointType endpointType) { - endpointType = endpoint.getAPotentialType() and + endpointType = getAPotentialType(endpoint) and not exists(getAnExcludingCharacteristic(endpoint, endpointType)) } @@ -375,7 +390,7 @@ module SharedCharacteristics { * A negative characteristic that indicates that an endpoint was manually modeled as a neutral model. */ private class NeutralModelCharacteristic extends NeitherSourceNorSinkCharacteristic { - NeutralModelCharacteristic() { this = "known non-endpoint" } + NeutralModelCharacteristic() { this = "known non-sink" } override predicate appliesToEndpoint(Candidate::Endpoint e) { Candidate::isNeutral(e) } } diff --git a/java/ql/automodel/test/AutomodelApplicationModeExtraction/AutomodelApplicationModeExtractNegativeExamples.expected b/java/ql/automodel/test/AutomodelApplicationModeExtraction/AutomodelApplicationModeExtractNegativeExamples.expected index 0118ef09719..91f33b6fb05 100644 --- a/java/ql/automodel/test/AutomodelApplicationModeExtraction/AutomodelApplicationModeExtractNegativeExamples.expected +++ b/java/ql/automodel/test/AutomodelApplicationModeExtraction/AutomodelApplicationModeExtractNegativeExamples.expected @@ -1,3 +1,3 @@ | Test.java:48:10:50:3 | compareTo(...) | known sanitizer\nrelated locations: $@, $@, $@.\nmetadata: $@, $@, $@, $@, $@, $@, $@, $@, $@. | Test.java:48:10:50:3 | compareTo(...) | CallContext | Test.java:48:10:50:3 | compareTo(...) | MethodDoc | Test.java:48:10:50:3 | compareTo(...) | ClassDoc | file://java.io:1:1:1:1 | java.io | package | file://File:1:1:1:1 | File | type | file://true:1:1:1:1 | true | subtypes | file://compareTo:1:1:1:1 | compareTo | name | file://(File):1:1:1:1 | (File) | signature | file://:1:1:1:1 | | input | file://ReturnValue:1:1:1:1 | ReturnValue | output | file://false:1:1:1:1 | false | isVarargsArray | file://sourceModel:1:1:1:1 | sourceModel | extensibleType | -| Test.java:49:4:49:5 | f2 | known non-endpoint\nrelated locations: $@, $@, $@.\nmetadata: $@, $@, $@, $@, $@, $@, $@, $@, $@. | Test.java:48:10:50:3 | compareTo(...) | CallContext | Test.java:49:4:49:5 | f2 | MethodDoc | Test.java:49:4:49:5 | f2 | ClassDoc | file://java.io:1:1:1:1 | java.io | package | file://File:1:1:1:1 | File | type | file://true:1:1:1:1 | true | subtypes | file://compareTo:1:1:1:1 | compareTo | name | file://(File):1:1:1:1 | (File) | signature | file://Argument[0]:1:1:1:1 | Argument[0] | input | file://:1:1:1:1 | | output | file://false:1:1:1:1 | false | isVarargsArray | file://sinkModel:1:1:1:1 | sinkModel | extensibleType | +| Test.java:49:4:49:5 | f2 | known non-sink\nrelated locations: $@, $@, $@.\nmetadata: $@, $@, $@, $@, $@, $@, $@, $@, $@. | Test.java:48:10:50:3 | compareTo(...) | CallContext | Test.java:49:4:49:5 | f2 | MethodDoc | Test.java:49:4:49:5 | f2 | ClassDoc | file://java.io:1:1:1:1 | java.io | package | file://File:1:1:1:1 | File | type | file://true:1:1:1:1 | true | subtypes | file://compareTo:1:1:1:1 | compareTo | name | file://(File):1:1:1:1 | (File) | signature | file://Argument[0]:1:1:1:1 | Argument[0] | input | file://:1:1:1:1 | | output | file://false:1:1:1:1 | false | isVarargsArray | file://sinkModel:1:1:1:1 | sinkModel | extensibleType | | Test.java:55:4:55:4 | p | taint step\nrelated locations: $@, $@, $@.\nmetadata: $@, $@, $@, $@, $@, $@, $@, $@, $@. | Test.java:54:3:59:3 | walk(...) | CallContext | Test.java:55:4:55:4 | p | MethodDoc | Test.java:55:4:55:4 | p | ClassDoc | file://java.nio.file:1:1:1:1 | java.nio.file | package | file://Files:1:1:1:1 | Files | type | file://false:1:1:1:1 | false | subtypes | file://walk:1:1:1:1 | walk | name | file://(Path,FileVisitOption[]):1:1:1:1 | (Path,FileVisitOption[]) | signature | file://Argument[0]:1:1:1:1 | Argument[0] | input | file://:1:1:1:1 | | output | file://false:1:1:1:1 | false | isVarargsArray | file://sinkModel:1:1:1:1 | sinkModel | extensibleType | diff --git a/java/ql/automodel/test/AutomodelFrameworkModeExtraction/AutomodelFrameworkModeExtractNegativeExamples.expected b/java/ql/automodel/test/AutomodelFrameworkModeExtraction/AutomodelFrameworkModeExtractNegativeExamples.expected index 8a9b4d3d2e3..e1bcaca7ddd 100644 --- a/java/ql/automodel/test/AutomodelFrameworkModeExtraction/AutomodelFrameworkModeExtractNegativeExamples.expected +++ b/java/ql/automodel/test/AutomodelFrameworkModeExtraction/AutomodelFrameworkModeExtractNegativeExamples.expected @@ -1,7 +1,7 @@ | com/github/codeql/test/PublicClass.java:26:18:26:26 | isIgnored | unexploitable (is-style boolean method)\nrelated locations: $@, $@.\nmetadata: $@, $@, $@, $@, $@, $@, $@, $@, $@. | com/github/codeql/test/PublicClass.java:26:18:26:26 | isIgnored | MethodDoc | com/github/codeql/test/PublicClass.java:26:18:26:26 | isIgnored | ClassDoc | file://com.github.codeql.test:1:1:1:1 | com.github.codeql.test | package | file://PublicClass:1:1:1:1 | PublicClass | type | file://true:1:1:1:1 | true | subtypes | file://isIgnored:1:1:1:1 | isIgnored | name | file://(Object):1:1:1:1 | (Object) | signature | file://:1:1:1:1 | | input | file://ReturnValue:1:1:1:1 | ReturnValue | output | file://:1:1:1:1 | | parameterName | file://sourceModel:1:1:1:1 | sourceModel | extensibleType | | com/github/codeql/test/PublicClass.java:26:18:26:26 | isIgnored | unexploitable (is-style boolean method)\nrelated locations: $@, $@.\nmetadata: $@, $@, $@, $@, $@, $@, $@, $@, $@. | com/github/codeql/test/PublicClass.java:26:18:26:26 | isIgnored | MethodDoc | com/github/codeql/test/PublicClass.java:26:18:26:26 | isIgnored | ClassDoc | file://com.github.codeql.test:1:1:1:1 | com.github.codeql.test | package | file://PublicClass:1:1:1:1 | PublicClass | type | file://true:1:1:1:1 | true | subtypes | file://isIgnored:1:1:1:1 | isIgnored | name | file://(Object):1:1:1:1 | (Object) | signature | file://Argument[this]:1:1:1:1 | Argument[this] | input | file://:1:1:1:1 | | output | file://this:1:1:1:1 | this | parameterName | file://sinkModel:1:1:1:1 | sinkModel | extensibleType | | com/github/codeql/test/PublicClass.java:26:28:26:39 | input | unexploitable (is-style boolean method)\nrelated locations: $@, $@.\nmetadata: $@, $@, $@, $@, $@, $@, $@, $@, $@. | com/github/codeql/test/PublicClass.java:26:28:26:39 | input | MethodDoc | com/github/codeql/test/PublicClass.java:26:28:26:39 | input | ClassDoc | file://com.github.codeql.test:1:1:1:1 | com.github.codeql.test | package | file://PublicClass:1:1:1:1 | PublicClass | type | file://true:1:1:1:1 | true | subtypes | file://isIgnored:1:1:1:1 | isIgnored | name | file://(Object):1:1:1:1 | (Object) | signature | file://Argument[0]:1:1:1:1 | Argument[0] | input | file://:1:1:1:1 | | output | file://input:1:1:1:1 | input | parameterName | file://sinkModel:1:1:1:1 | sinkModel | extensibleType | -| java/io/File.java:4:16:4:24 | compareTo | known non-endpoint\nrelated locations: $@, $@.\nmetadata: $@, $@, $@, $@, $@, $@, $@, $@, $@. | java/io/File.java:4:16:4:24 | compareTo | MethodDoc | java/io/File.java:4:16:4:24 | compareTo | ClassDoc | file://java.io:1:1:1:1 | java.io | package | file://File:1:1:1:1 | File | type | file://true:1:1:1:1 | true | subtypes | file://compareTo:1:1:1:1 | compareTo | name | file://(File):1:1:1:1 | (File) | signature | file://:1:1:1:1 | | input | file://Parameter[this]:1:1:1:1 | Parameter[this] | output | file://this:1:1:1:1 | this | parameterName | file://sourceModel:1:1:1:1 | sourceModel | extensibleType | -| java/io/File.java:4:16:4:24 | compareTo | known non-endpoint\nrelated locations: $@, $@.\nmetadata: $@, $@, $@, $@, $@, $@, $@, $@, $@. | java/io/File.java:4:16:4:24 | compareTo | MethodDoc | java/io/File.java:4:16:4:24 | compareTo | ClassDoc | file://java.io:1:1:1:1 | java.io | package | file://File:1:1:1:1 | File | type | file://true:1:1:1:1 | true | subtypes | file://compareTo:1:1:1:1 | compareTo | name | file://(File):1:1:1:1 | (File) | signature | file://Argument[this]:1:1:1:1 | Argument[this] | input | file://:1:1:1:1 | | output | file://this:1:1:1:1 | this | parameterName | file://sinkModel:1:1:1:1 | sinkModel | extensibleType | -| java/io/File.java:5:9:5:21 | pathname | known non-endpoint\nrelated locations: $@, $@.\nmetadata: $@, $@, $@, $@, $@, $@, $@, $@, $@. | java/io/File.java:5:9:5:21 | pathname | MethodDoc | java/io/File.java:5:9:5:21 | pathname | ClassDoc | file://java.io:1:1:1:1 | java.io | package | file://File:1:1:1:1 | File | type | file://true:1:1:1:1 | true | subtypes | file://compareTo:1:1:1:1 | compareTo | name | file://(File):1:1:1:1 | (File) | signature | file://:1:1:1:1 | | input | file://Parameter[0]:1:1:1:1 | Parameter[0] | output | file://pathname:1:1:1:1 | pathname | parameterName | file://sourceModel:1:1:1:1 | sourceModel | extensibleType | -| java/io/File.java:5:9:5:21 | pathname | known non-endpoint\nrelated locations: $@, $@.\nmetadata: $@, $@, $@, $@, $@, $@, $@, $@, $@. | java/io/File.java:5:9:5:21 | pathname | MethodDoc | java/io/File.java:5:9:5:21 | pathname | ClassDoc | file://java.io:1:1:1:1 | java.io | package | file://File:1:1:1:1 | File | type | file://true:1:1:1:1 | true | subtypes | file://compareTo:1:1:1:1 | compareTo | name | file://(File):1:1:1:1 | (File) | signature | file://Argument[0]:1:1:1:1 | Argument[0] | input | file://:1:1:1:1 | | output | file://pathname:1:1:1:1 | pathname | parameterName | file://sinkModel:1:1:1:1 | sinkModel | extensibleType | +| java/io/File.java:4:16:4:24 | compareTo | known non-sink\nrelated locations: $@, $@.\nmetadata: $@, $@, $@, $@, $@, $@, $@, $@, $@. | java/io/File.java:4:16:4:24 | compareTo | MethodDoc | java/io/File.java:4:16:4:24 | compareTo | ClassDoc | file://java.io:1:1:1:1 | java.io | package | file://File:1:1:1:1 | File | type | file://true:1:1:1:1 | true | subtypes | file://compareTo:1:1:1:1 | compareTo | name | file://(File):1:1:1:1 | (File) | signature | file://:1:1:1:1 | | input | file://Parameter[this]:1:1:1:1 | Parameter[this] | output | file://this:1:1:1:1 | this | parameterName | file://sourceModel:1:1:1:1 | sourceModel | extensibleType | +| java/io/File.java:4:16:4:24 | compareTo | known non-sink\nrelated locations: $@, $@.\nmetadata: $@, $@, $@, $@, $@, $@, $@, $@, $@. | java/io/File.java:4:16:4:24 | compareTo | MethodDoc | java/io/File.java:4:16:4:24 | compareTo | ClassDoc | file://java.io:1:1:1:1 | java.io | package | file://File:1:1:1:1 | File | type | file://true:1:1:1:1 | true | subtypes | file://compareTo:1:1:1:1 | compareTo | name | file://(File):1:1:1:1 | (File) | signature | file://Argument[this]:1:1:1:1 | Argument[this] | input | file://:1:1:1:1 | | output | file://this:1:1:1:1 | this | parameterName | file://sinkModel:1:1:1:1 | sinkModel | extensibleType | +| java/io/File.java:5:9:5:21 | pathname | known non-sink\nrelated locations: $@, $@.\nmetadata: $@, $@, $@, $@, $@, $@, $@, $@, $@. | java/io/File.java:5:9:5:21 | pathname | MethodDoc | java/io/File.java:5:9:5:21 | pathname | ClassDoc | file://java.io:1:1:1:1 | java.io | package | file://File:1:1:1:1 | File | type | file://true:1:1:1:1 | true | subtypes | file://compareTo:1:1:1:1 | compareTo | name | file://(File):1:1:1:1 | (File) | signature | file://:1:1:1:1 | | input | file://Parameter[0]:1:1:1:1 | Parameter[0] | output | file://pathname:1:1:1:1 | pathname | parameterName | file://sourceModel:1:1:1:1 | sourceModel | extensibleType | +| java/io/File.java:5:9:5:21 | pathname | known non-sink\nrelated locations: $@, $@.\nmetadata: $@, $@, $@, $@, $@, $@, $@, $@, $@. | java/io/File.java:5:9:5:21 | pathname | MethodDoc | java/io/File.java:5:9:5:21 | pathname | ClassDoc | file://java.io:1:1:1:1 | java.io | package | file://File:1:1:1:1 | File | type | file://true:1:1:1:1 | true | subtypes | file://compareTo:1:1:1:1 | compareTo | name | file://(File):1:1:1:1 | (File) | signature | file://Argument[0]:1:1:1:1 | Argument[0] | input | file://:1:1:1:1 | | output | file://pathname:1:1:1:1 | pathname | parameterName | file://sinkModel:1:1:1:1 | sinkModel | extensibleType |