diff --git a/java/ql/experimental/adaptivethreatmodeling/lib/experimental/adaptivethreatmodeling/EndpointCharacteristics.qll b/java/ql/experimental/adaptivethreatmodeling/lib/experimental/adaptivethreatmodeling/EndpointCharacteristics.qll index 6e383be43ee..9011e64f526 100644 --- a/java/ql/experimental/adaptivethreatmodeling/lib/experimental/adaptivethreatmodeling/EndpointCharacteristics.qll +++ b/java/ql/experimental/adaptivethreatmodeling/lib/experimental/adaptivethreatmodeling/EndpointCharacteristics.qll @@ -40,7 +40,7 @@ predicate isKnownSink(DataFlow::Node sink, SinkType sinkType) { */ predicate erroneousEndpoints( DataFlow::Node endpoint, EndpointCharacteristic characteristic, EndpointType endpointClass, - float confidence, string errorMessage + float confidence, string errorMessage, boolean ignoreKnownModelingErrors ) { // An endpoint's characteristics should not include positive indicators with medium/high confidence for more than one // sink/source type. @@ -63,10 +63,13 @@ predicate erroneousEndpoints( not ( characteristic instanceof IsSanitizerCharacteristic or characteristic2 instanceof IsSanitizerCharacteristic + ) and + ( + ignoreKnownModelingErrors = true and + not knownOverlappingCharacteristics(characteristic, characteristic2) + or + ignoreKnownModelingErrors = false ) - // and - // // We currently know of some sink types that overlap with one another. - // not knownOverlappingCharacteristics(characteristic, characteristic2) ) and errorMessage = "Endpoint has high-confidence positive indicators for multiple classes" or @@ -80,6 +83,7 @@ predicate erroneousEndpoints( confidence > characteristic.mediumConfidence() and confidence2 > characteristic2.mediumConfidence() ) and + ignoreKnownModelingErrors = false and errorMessage = "Endpoint has high-confidence positive and negative indicators for the same class" } @@ -104,6 +108,18 @@ predicate erroneousConfidences( // characteristic1 = ["file creation sink", "other path injection sink"] and // characteristic2 = ["file creation sink", "other path injection sink"] // } +/** + * Holds if `characteristic1` and `characteristic2` are among the pairs of currently known positive characteristics that + * have some overlap in their results. This indicates a problem with the underlying Java modeling. + */ +private predicate knownOverlappingCharacteristics( + EndpointCharacteristic characteristic1, EndpointCharacteristic characteristic2 +) { + characteristic1 != characteristic2 and + characteristic1 = ["mad taint step", "create path"] and + characteristic2 = ["mad taint step", "create path"] +} + predicate isTypeAccess(DataFlow::Node n) { n.asExpr() instanceof TypeAccess } /** @@ -374,21 +390,22 @@ private class IsSanitizerCharacteristic extends NotASinkCharacteristic { } } -// /** -// * A negative characteristic that indicates that an endpoint is a MaD taint step. MaD modeled taint steps are global, -// * so they are not sinks for any query. Non-MaD taint steps might be specific to a particular query, so we don't -// * filter those out. -// */ -// private class IsMaDTaintStepCharacteristic extends NotASinkCharacteristic { -// IsMaDTaintStepCharacteristic() { this = "mad taint step" } -// override predicate appliesToEndpoint(DataFlow::Node n) { -// FlowSummaryImpl::Private::Steps::summaryThroughStepValue(n, _, _) -// or -// // FlowSummaryImpl::Private::Steps::summaryThroughStepTaint(n, _, _) or -// // FlowSummaryImpl::Private::Steps::summaryGetterStep(n, _, _, _) or -// FlowSummaryImpl::Private::Steps::summarySetterStep(n, _, _, _) -// } -// } +/** + * A negative characteristic that indicates that an endpoint is a MaD taint step. MaD modeled taint steps are global, + * so they are not sinks for any query. Non-MaD taint steps might be specific to a particular query, so we don't + * filter those out. + */ +private class IsMaDTaintStepCharacteristic extends NotASinkCharacteristic { + IsMaDTaintStepCharacteristic() { this = "mad taint step" } + + override predicate appliesToEndpoint(DataFlow::Node n) { + FlowSummaryImpl::Private::Steps::summaryThroughStepValue(n, _, _) or + FlowSummaryImpl::Private::Steps::summaryThroughStepTaint(n, _, _) or + FlowSummaryImpl::Private::Steps::summaryGetterStep(n, _, _, _) or + FlowSummaryImpl::Private::Steps::summarySetterStep(n, _, _, _) + } +} + /** * A negative characteristic that indicates that an endpoint is an argument to a safe external API method. * diff --git a/java/ql/experimental/adaptivethreatmodeling/src/ExtractNegativeExamples.ql b/java/ql/experimental/adaptivethreatmodeling/src/ExtractNegativeExamples.ql index 0600456b573..9741802db9e 100644 --- a/java/ql/experimental/adaptivethreatmodeling/src/ExtractNegativeExamples.ql +++ b/java/ql/experimental/adaptivethreatmodeling/src/ExtractNegativeExamples.ql @@ -35,7 +35,7 @@ where characteristic.hasImplications(any(NegativeSinkType negative), true, confidence) and // Exclude endpoints that have contradictory endpoint characteristics, because we only want examples we're highly // certain about in the prompt. - not EndpointCharacteristics::erroneousEndpoints(endpoint, _, _, _, _) and + not EndpointCharacteristics::erroneousEndpoints(endpoint, _, _, _, _, false) and // Exclude type access nodes because they will never be on a flow path so they're not useful negative examples. not EndpointCharacteristics::isTypeAccess(endpoint) and // It's valid for a node to satisfy the logic for both `isSink` and `isSanitizer`, but in that case it will be diff --git a/java/ql/experimental/adaptivethreatmodeling/src/ExtractPositiveExamples.ql b/java/ql/experimental/adaptivethreatmodeling/src/ExtractPositiveExamples.ql index d69774cf60d..cc833e5d012 100644 --- a/java/ql/experimental/adaptivethreatmodeling/src/ExtractPositiveExamples.ql +++ b/java/ql/experimental/adaptivethreatmodeling/src/ExtractPositiveExamples.ql @@ -22,9 +22,9 @@ private import experimental.adaptivethreatmodeling.ATMConfigs // To import the c from DataFlow::Node sink, SinkType sinkType, string message where - // If there are _any_ erroneous endpoints, return nothing. This will prevent us from accidentally running this query - // when there's a codex-generated data extension file in `java/ql/lib/ext`. - not EndpointCharacteristics::erroneousEndpoints(_, _, _, _, _) and + // Exclude endpoints that have contradictory endpoint characteristics, because we only want examples we're highly + // certain about in the prompt. + not EndpointCharacteristics::erroneousEndpoints(sink, _, _, _, _, false) and // Extract positive examples of sinks belonging to the existing ATM query configurations. ( EndpointCharacteristics::isKnownSink(sink, sinkType) and @@ -35,9 +35,16 @@ where // Include only sinks that are arguments to an external API call, because these are the sinks we are most interested // in. sink instanceof ExternalAPIs::ExternalApiDataNode and - message = - sinkType + "\n" + - // Extract the needed metadata for this endpoint. - any(string metadata | EndpointCharacteristics::hasMetadata(sink, metadata)) + // If there are _any_ erroneous endpoints, return an error message for all rows. This will prevent us from + // accidentally running this query when there's a codex-generated data extension file in `java/ql/lib/ext`. + if not EndpointCharacteristics::erroneousEndpoints(_, _, _, _, _, true) + then + message = + sinkType + "\n" + + // Extract the needed metadata for this endpoint. + any(string metadata | EndpointCharacteristics::hasMetadata(sink, metadata)) + else + message = + "Error: There are erroneous endpoints! Please check whether there's a codex-generated data extension file in `java/ql/lib/ext`." ) select sink, message