Refactor framework-mode queries to make them more easily testable.

This commit is contained in:
Max Schaefer
2024-01-17 12:05:58 +00:00
parent adea805546
commit 6c47a5d5f9
5 changed files with 105 additions and 63 deletions

View File

@@ -372,7 +372,7 @@ class ApplicationModeMetadataExtractor extends string {
}
/**
* Holds if the given `endpoint` should be considered a candidate for the `extensibleType.
* Holds if the given `endpoint` should be considered a candidate for the `extensibleType`.
*
* The other parameters record various other properties of interest.
*/
@@ -380,21 +380,20 @@ predicate isCandidate(
Endpoint endpoint, string package, string type, string subtypes, string name, string signature,
string input, string output, string isVarargs, string extensibleType, string alreadyAiModeled
) {
exists(ApplicationModeMetadataExtractor meta |
CharacteristicsImpl::isCandidate(endpoint, _) and
not exists(CharacteristicsImpl::UninterestingToModelCharacteristic u |
u.appliesToEndpoint(endpoint)
) and
meta.hasMetadata(endpoint, package, type, subtypes, name, signature, input, output, isVarargs,
alreadyAiModeled, extensibleType) and
// If a node is already modeled in MaD, we don't include it as a candidate. Otherwise, we might include it as a
// candidate for query A, but the model will label it as a sink for one of the sink types of query B, for which it's
// already a known sink. This would result in overlap between our detected sinks and the pre-existing modeling. We
// assume that, if a sink has already been modeled in a MaD model, then it doesn't belong to any additional sink
// types, and we don't need to reexamine it.
alreadyAiModeled.matches(["", "%ai-%"]) and
AutomodelJavaUtil::includeAutomodelCandidate(package, type, name, signature)
)
CharacteristicsImpl::isCandidate(endpoint, _) and
not exists(CharacteristicsImpl::UninterestingToModelCharacteristic u |
u.appliesToEndpoint(endpoint)
) and
any(ApplicationModeMetadataExtractor meta)
.hasMetadata(endpoint, package, type, subtypes, name, signature, input, output, isVarargs,
alreadyAiModeled, extensibleType) and
// If a node is already modeled in MaD, we don't include it as a candidate. Otherwise, we might include it as a
// candidate for query A, but the model will label it as a sink for one of the sink types of query B, for which it's
// already a known sink. This would result in overlap between our detected sinks and the pre-existing modeling. We
// assume that, if a sink has already been modeled in a MaD model, then it doesn't belong to any additional sink
// types, and we don't need to reexamine it.
alreadyAiModeled.matches(["", "%ai-%"]) and
AutomodelJavaUtil::includeAutomodelCandidate(package, type, name, signature)
}
/**

View File

@@ -312,6 +312,85 @@ class FrameworkModeMetadataExtractor extends string {
}
}
/**
* Holds if the given `endpoint` should be considered a candidate for the `extensibleType`.
*
* The other parameters record various other properties of interest.
*/
predicate isCandidate(
Endpoint endpoint, string package, string type, string subtypes, string name, string signature,
string input, string output, string parameterName, string extensibleType, string alreadyAiModeled
) {
CharacteristicsImpl::isCandidate(endpoint, _) and
not exists(CharacteristicsImpl::UninterestingToModelCharacteristic u |
u.appliesToEndpoint(endpoint)
) and
any(FrameworkModeMetadataExtractor meta)
.hasMetadata(endpoint, package, type, subtypes, name, signature, input, output, parameterName,
alreadyAiModeled, extensibleType) and
// If a node is already modeled in MaD, we don't include it as a candidate. Otherwise, we might include it as a
// candidate for query A, but the model will label it as a sink for one of the sink types of query B, for which it's
// already a known sink. This would result in overlap between our detected sinks and the pre-existing modeling. We
// assume that, if a sink has already been modeled in a MaD model, then it doesn't belong to any additional sink
// types, and we don't need to reexamine it.
alreadyAiModeled.matches(["", "%ai-%"]) and
AutomodelJavaUtil::includeAutomodelCandidate(package, type, name, signature)
}
/**
* Holds if the given `endpoint` is a negative example for the `extensibleType`
* because of the `characteristic`.
*
* The other parameters record various other properties of interest.
*/
predicate isNegativeExample(
Endpoint endpoint, EndpointCharacteristic characteristic, float confidence, string package,
string type, string subtypes, string name, string signature, string input, string output,
string parameterName, string extensibleType
) {
characteristic.appliesToEndpoint(endpoint) and
// the node is known not to be an endpoint of any appropriate type
forall(AutomodelEndpointTypes::EndpointType tp |
tp = CharacteristicsImpl::getAPotentialType(endpoint)
|
characteristic.hasImplications(tp, false, _)
) and
// the lowest confidence across all endpoint types should be at least highConfidence
confidence =
min(float c |
characteristic.hasImplications(CharacteristicsImpl::getAPotentialType(endpoint), false, c)
) and
confidence >= SharedCharacteristics::highConfidence() and
any(FrameworkModeMetadataExtractor meta)
.hasMetadata(endpoint, package, type, subtypes, name, signature, input, output, parameterName,
_, extensibleType) and
// It's valid for a node to be both a potential source/sanitizer and a sink. We don't want to include such nodes
// as negative examples in the prompt, because they're ambiguous and might confuse the model, so we explicitly exclude them here.
not exists(EndpointCharacteristic characteristic2, float confidence2 |
characteristic2 != characteristic
|
characteristic2.appliesToEndpoint(endpoint) and
confidence2 >= SharedCharacteristics::maximalConfidence() and
characteristic2
.hasImplications(CharacteristicsImpl::getAPotentialType(endpoint), true, confidence2)
)
}
/**
* Holds if the given `endpoint` is a positive example for the `endpointType`.
*
* The other parameters record various other properties of interest.
*/
predicate isPositiveExample(
Endpoint endpoint, string endpointType, string package, string type, string subtypes, string name,
string signature, string input, string output, string parameterName, string extensibleType
) {
any(FrameworkModeMetadataExtractor meta)
.hasMetadata(endpoint, package, type, subtypes, name, signature, input, output, parameterName,
_, extensibleType) and
CharacteristicsImpl::isKnownAs(endpoint, endpointType, _)
}
/*
* EndpointCharacteristic classes that are specific to Automodel for Java.
*/

View File

@@ -16,24 +16,12 @@ private import AutomodelFrameworkModeCharacteristics
private import AutomodelJavaUtil
from
Endpoint endpoint, FrameworkModeMetadataExtractor meta, DollarAtString package,
Endpoint endpoint, DollarAtString package,
DollarAtString type, DollarAtString subtypes, DollarAtString name, DollarAtString signature,
DollarAtString input, DollarAtString output, DollarAtString parameterName,
DollarAtString alreadyAiModeled, DollarAtString extensibleType
where
not exists(CharacteristicsImpl::UninterestingToModelCharacteristic u |
u.appliesToEndpoint(endpoint)
) and
CharacteristicsImpl::isCandidate(endpoint, _) and
meta.hasMetadata(endpoint, package, type, subtypes, name, signature, input, output, parameterName,
alreadyAiModeled, extensibleType) and
// If a node is already modeled in MaD, we don't include it as a candidate. Otherwise, we might include it as a
// candidate for query A, but the model will label it as a sink for one of the sink types of query B, for which it's
// already a known sink. This would result in overlap between our detected sinks and the pre-existing modeling. We
// assume that, if a sink has already been modeled in a MaD model, then it doesn't belong to any additional sink
// types, and we don't need to reexamine it.
alreadyAiModeled.matches(["", "%ai-%"]) and
includeAutomodelCandidate(package, type, name, signature)
isCandidate(endpoint, package, type, subtypes, name, signature, input, output, parameterName, extensibleType, alreadyAiModeled)
select endpoint,
"Related locations: $@, $@." + "\nmetadata: $@, $@, $@, $@, $@, $@, $@, $@, $@, $@.", //
CharacteristicsImpl::getRelatedLocationOrCandidate(endpoint, MethodDoc()), "MethodDoc", //

View File

@@ -14,37 +14,15 @@ private import AutomodelJavaUtil
from
Endpoint endpoint, EndpointCharacteristic characteristic, float confidence,
DollarAtString message, FrameworkModeMetadataExtractor meta, DollarAtString package,
DollarAtString type, DollarAtString subtypes, DollarAtString name, DollarAtString signature,
DollarAtString input, DollarAtString output, DollarAtString parameterName,
DollarAtString extensibleType
DollarAtString package, DollarAtString type, DollarAtString subtypes, DollarAtString name,
DollarAtString signature, DollarAtString input, DollarAtString output,
DollarAtString parameterName, DollarAtString extensibleType
where
characteristic.appliesToEndpoint(endpoint) and
// the node is known not to be an endpoint of any appropriate type
forall(EndpointType tp | tp = CharacteristicsImpl::getAPotentialType(endpoint) |
characteristic.hasImplications(tp, false, _)
) and
// the lowest confidence across all endpoint types should be at least highConfidence
confidence =
min(float c |
characteristic.hasImplications(CharacteristicsImpl::getAPotentialType(endpoint), false, c)
) and
confidence >= SharedCharacteristics::highConfidence() and
meta.hasMetadata(endpoint, package, type, subtypes, name, signature, input, output, parameterName,
_, extensibleType) and
// It's valid for a node to be both a potential source/sanitizer and a sink. We don't want to include such nodes
// as negative examples in the prompt, because they're ambiguous and might confuse the model, so we explicitly exclude them here.
not exists(EndpointCharacteristic characteristic2, float confidence2 |
characteristic2 != characteristic
|
characteristic2.appliesToEndpoint(endpoint) and
confidence2 >= SharedCharacteristics::maximalConfidence() and
characteristic2
.hasImplications(CharacteristicsImpl::getAPotentialType(endpoint), true, confidence2)
) and
message = characteristic
isNegativeExample(endpoint, characteristic, confidence, package, type, subtypes, name, signature,
input, output, parameterName, extensibleType)
select endpoint,
message + "\nrelated locations: $@, $@." + "\nmetadata: $@, $@, $@, $@, $@, $@, $@, $@, $@.", //
characteristic + "\nrelated locations: $@, $@." +
"\nmetadata: $@, $@, $@, $@, $@, $@, $@, $@, $@.", //
CharacteristicsImpl::getRelatedLocationOrCandidate(endpoint, MethodDoc()), "MethodDoc", //
CharacteristicsImpl::getRelatedLocationOrCandidate(endpoint, ClassDoc()), "ClassDoc", //
package, "package", //

View File

@@ -13,14 +13,12 @@ private import AutomodelEndpointTypes
private import AutomodelJavaUtil
from
Endpoint endpoint, EndpointType endpointType, FrameworkModeMetadataExtractor meta,
Endpoint endpoint, EndpointType endpointType,
DollarAtString package, DollarAtString type, DollarAtString subtypes, DollarAtString name,
DollarAtString signature, DollarAtString input, DollarAtString output,
DollarAtString parameterName, DollarAtString extensibleType
where
meta.hasMetadata(endpoint, package, type, subtypes, name, signature, input, output, parameterName,
_, extensibleType) and
CharacteristicsImpl::isKnownAs(endpoint, endpointType, _)
isPositiveExample(endpoint, endpointType, package, type, subtypes, name, signature, input, output, parameterName, extensibleType)
select endpoint,
endpointType + "\nrelated locations: $@, $@." + "\nmetadata: $@, $@, $@, $@, $@, $@, $@, $@, $@.", //
CharacteristicsImpl::getRelatedLocationOrCandidate(endpoint, MethodDoc()), "MethodDoc", //