Refactor to avoid bad join order.

This commit is contained in:
Max Schaefer
2024-01-12 14:57:05 +00:00
parent 45ca301593
commit bb63fcde43
9 changed files with 74 additions and 62 deletions

View File

@@ -356,10 +356,10 @@ class ApplicationModeMetadataExtractor extends string {
predicate hasMetadata(
Endpoint e, string package, string type, string subtypes, string name, string signature,
string input, string output, string isVarargsArray
string input, string output, string isVarargsArray, string alreadyAiModeled,
string extensibleType
) {
exists(Callable callable |
e.getCallable() = callable and
exists(Callable callable | e.getCallable() = callable |
(if exists(e.getMaDInput()) then input = e.getMaDInput() else input = "") and
(if exists(e.getMaDOutput()) then output = e.getMaDOutput() else output = "") and
package = callable.getDeclaringType().getPackage().getName() and
@@ -369,9 +369,17 @@ class ApplicationModeMetadataExtractor extends string {
subtypes = AutomodelJavaUtil::considerSubtypes(callable).toString() and
name = callable.getName() and
signature = ExternalFlow::paramsString(callable) and
if e instanceof ImplicitVarargsArray
then isVarargsArray = "true"
else isVarargsArray = "false"
(
if e instanceof ImplicitVarargsArray
then isVarargsArray = "true"
else isVarargsArray = "false"
) and
extensibleType = e.getExtensibleType()
) and
(
not CharacteristicsImpl::isModeled(e, _, _, _) and alreadyAiModeled = ""
or
CharacteristicsImpl::isModeled(e, _, _, alreadyAiModeled)
)
}
}
@@ -416,7 +424,8 @@ private class UnexploitableIsCharacteristic extends CharacteristicsImpl::Neither
* boolean return type. These kinds of calls normally do only checks, and appear before the proper call that does the
* dangerous/interesting thing, so we want the latter to be modeled as the sink.
*/
private class UnexploitableExistsCharacteristic extends CharacteristicsImpl::NeitherSourceNorSinkCharacteristic {
private class UnexploitableExistsCharacteristic extends CharacteristicsImpl::NeitherSourceNorSinkCharacteristic
{
UnexploitableExistsCharacteristic() { this = "unexploitable (existence-checking boolean method)" }
override predicate appliesToEndpoint(Endpoint e) {
@@ -439,7 +448,8 @@ private class UnexploitableExistsCharacteristic extends CharacteristicsImpl::Nei
* A negative characteristic that indicates that parameters of an exception method or constructor should not be considered sinks,
* and its return value should not be considered a source.
*/
private class ExceptionCharacteristic extends CharacteristicsImpl::NeitherSourceNorSinkCharacteristic {
private class ExceptionCharacteristic extends CharacteristicsImpl::NeitherSourceNorSinkCharacteristic
{
ExceptionCharacteristic() { this = "exception" }
override predicate appliesToEndpoint(Endpoint e) {

View File

@@ -25,20 +25,20 @@ private import AutomodelJavaUtil
bindingset[limit]
private Endpoint getSampleForSignature(
int limit, string package, string type, string subtypes, string name, string signature,
string input, string output, string isVarargs, string extensibleType
string input, string output, string isVarargs, string extensibleType, string alreadyAiModeled
) {
exists(int n, int num_endpoints, ApplicationModeMetadataExtractor meta |
num_endpoints =
count(Endpoint e |
e.getExtensibleType() = extensibleType and
meta.hasMetadata(e, package, type, subtypes, name, signature, input, output, isVarargs)
meta.hasMetadata(e, package, type, subtypes, name, signature, input, output, isVarargs,
alreadyAiModeled, extensibleType)
)
|
result =
rank[n](Endpoint e, Location loc |
loc = e.asTop().getLocation() and
e.getExtensibleType() = extensibleType and
meta.hasMetadata(e, package, type, subtypes, name, signature, input, output, isVarargs)
meta.hasMetadata(e, package, type, subtypes, name, signature, input, output, isVarargs,
alreadyAiModeled, extensibleType)
|
e
order by
@@ -66,19 +66,15 @@ where
CharacteristicsImpl::isCandidate(endpoint, _) and
endpoint =
getSampleForSignature(9, package, type, subtypes, name, signature, input, output,
isVarargsArray, extensibleType) and
isVarargsArray, extensibleType, alreadyAiModeled) and
meta.hasMetadata(endpoint, package, type, subtypes, name, signature, input, output,
isVarargsArray, alreadyAiModeled, extensibleType) and
// If a node is already modeled in MaD, we don't include it as a candidate. Otherwise, we might include it as a
// candidate for query A, but the model will label it as a sink for one of the sink types of query B, for which it's
// already a known sink. This would result in overlap between our detected sinks and the pre-existing modeling. We
// assume that, if a sink has already been modeled in a MaD model, then it doesn't belong to any additional sink
// types, and we don't need to reexamine it.
(
not CharacteristicsImpl::isModeled(endpoint, _, _, _) and alreadyAiModeled = ""
or
alreadyAiModeled.matches("%ai-%") and
CharacteristicsImpl::isModeled(endpoint, _, _, alreadyAiModeled)
) and
meta.hasMetadata(endpoint, package, type, subtypes, name, signature, input, output, isVarargsArray) and
alreadyAiModeled.matches(["", "%ai-%"]) and
includeAutomodelCandidate(package, type, name, signature)
select endpoint.asNode(),
"Related locations: $@, $@, $@." + "\nmetadata: $@, $@, $@, $@, $@, $@, $@, $@, $@, $@.", //

View File

@@ -47,7 +47,6 @@ from
DollarAtString output, DollarAtString isVarargsArray, DollarAtString extensibleType
where
endpoint = getSampleForCharacteristic(characteristic, 100) and
extensibleType = endpoint.getExtensibleType() and
// the node is know not to be an endpoint of any appropriate type
forall(EndpointType tp | tp = endpoint.getAPotentialType() |
characteristic.hasImplications(tp, false, _)
@@ -55,7 +54,8 @@ where
// the lowest confidence across all endpoint types should be at least highConfidence
confidence = min(float c | characteristic.hasImplications(endpoint.getAPotentialType(), false, c)) and
confidence >= SharedCharacteristics::highConfidence() and
meta.hasMetadata(endpoint, package, type, subtypes, name, signature, input, output, isVarargsArray) and
meta.hasMetadata(endpoint, package, type, subtypes, name, signature, input, output,
isVarargsArray, _, extensibleType) and
// It's valid for a node to be both a potential source/sanitizer and a sink. We don't want to include such nodes
// as negative examples in the prompt, because they're ambiguous and might confuse the model, so we explicitly them here.
not exists(EndpointCharacteristic characteristic2, float confidence2, EndpointType type2 |

View File

@@ -18,9 +18,8 @@ from
DollarAtString signature, DollarAtString input, DollarAtString output,
DollarAtString isVarargsArray, DollarAtString extensibleType
where
extensibleType = endpoint.getExtensibleType() and
meta.hasMetadata(endpoint, package, type, subtypes, name, signature, input, output, isVarargsArray) and
// Extract positive examples of sinks belonging to the existing ATM query configurations.
meta.hasMetadata(endpoint, package, type, subtypes, name, signature, input, output,
isVarargsArray, _, extensibleType) and
CharacteristicsImpl::isKnownAs(endpoint, endpointType, _) and
exists(CharacteristicsImpl::getRelatedLocationOrCandidate(endpoint, CallContext()))
select endpoint.asNode(),

View File

@@ -305,16 +305,27 @@ class FrameworkModeMetadataExtractor extends string {
predicate hasMetadata(
Endpoint e, string package, string type, string subtypes, string name, string signature,
string input, string output, string parameterName
string input, string output, string parameterName, string alreadyAiModeled,
string extensibleType
) {
(if exists(e.getParamName()) then parameterName = e.getParamName() else parameterName = "") and
name = e.getCallable().getName() and
(if exists(e.getMaDInput()) then input = e.getMaDInput() else input = "") and
(if exists(e.getMaDOutput()) then output = e.getMaDOutput() else output = "") and
package = e.getCallable().getDeclaringType().getPackage().getName() and
type = e.getCallable().getDeclaringType().getErasure().(RefType).nestedName() and
subtypes = AutomodelJavaUtil::considerSubtypes(e.getCallable()).toString() and
signature = ExternalFlow::paramsString(e.getCallable())
exists(Callable callable | e.getCallable() = callable |
(if exists(e.getMaDInput()) then input = e.getMaDInput() else input = "") and
(if exists(e.getMaDOutput()) then output = e.getMaDOutput() else output = "") and
package = callable.getDeclaringType().getPackage().getName() and
// we're using the erased types because the MaD convention is to not specify type parameters.
// Whether something is or isn't a sink doesn't usually depend on the type parameters.
type = callable.getDeclaringType().getErasure().(RefType).nestedName() and
subtypes = AutomodelJavaUtil::considerSubtypes(callable).toString() and
name = callable.getName() and
signature = ExternalFlow::paramsString(callable) and
(if exists(e.getParamName()) then parameterName = e.getParamName() else parameterName = "") and
e.getExtensibleType() = extensibleType
) and
(
not CharacteristicsImpl::isModeled(e, _, _, _) and alreadyAiModeled = ""
or
CharacteristicsImpl::isModeled(e, _, _, alreadyAiModeled)
)
}
}
@@ -332,7 +343,8 @@ class FrameworkModeMetadataExtractor extends string {
*
* TODO: this might filter too much, it's possible that methods with more than one parameter contain interesting sinks
*/
private class UnexploitableIsCharacteristic extends CharacteristicsImpl::NeitherSourceNorSinkCharacteristic {
private class UnexploitableIsCharacteristic extends CharacteristicsImpl::NeitherSourceNorSinkCharacteristic
{
UnexploitableIsCharacteristic() { this = "unexploitable (is-style boolean method)" }
override predicate appliesToEndpoint(Endpoint e) {
@@ -357,7 +369,8 @@ private class UnexploitableIsCharacteristic extends CharacteristicsImpl::Neither
* boolean return type. These kinds of calls normally do only checks, and appear before the proper call that does the
* dangerous/interesting thing, so we want the latter to be modeled as the sink.
*/
private class UnexploitableExistsCharacteristic extends CharacteristicsImpl::NeitherSourceNorSinkCharacteristic {
private class UnexploitableExistsCharacteristic extends CharacteristicsImpl::NeitherSourceNorSinkCharacteristic
{
UnexploitableExistsCharacteristic() { this = "unexploitable (existence-checking boolean method)" }
override predicate appliesToEndpoint(Endpoint e) {
@@ -380,7 +393,8 @@ private class UnexploitableExistsCharacteristic extends CharacteristicsImpl::Nei
* A negative characteristic that indicates that parameters of an exception method or constructor should not be considered sinks,
* and its return value should not be considered a source.
*/
private class ExceptionCharacteristic extends CharacteristicsImpl::NeitherSourceNorSinkCharacteristic {
private class ExceptionCharacteristic extends CharacteristicsImpl::NeitherSourceNorSinkCharacteristic
{
ExceptionCharacteristic() { this = "exception" }
override predicate appliesToEndpoint(Endpoint e) {
@@ -396,7 +410,6 @@ private class ExceptionCharacteristic extends CharacteristicsImpl::NeitherSource
}
}
/**
* A characteristic that limits candidates to parameters of methods that are recognized as `ModelApi`, iow., APIs that
* are considered worth modeling.

View File

@@ -21,23 +21,18 @@ from
DollarAtString input, DollarAtString output, DollarAtString parameterName,
DollarAtString alreadyAiModeled, DollarAtString extensibleType
where
endpoint.getExtensibleType() = extensibleType and
not exists(CharacteristicsImpl::UninterestingToModelCharacteristic u |
u.appliesToEndpoint(endpoint)
) and
CharacteristicsImpl::isCandidate(endpoint, _) and
// If a node is already a known sink for any of our existing ATM queries and is already modeled as a MaD sink, we
// don't include it as a candidate. Otherwise, we might include it as a candidate for query A, but the model will
// label it as a sink for one of the sink types of query B, for which it's already a known sink. This would result in
// overlap between our detected sinks and the pre-existing modeling. We assume that, if a sink has already been
// modeled in a MaD model, then it doesn't belong to any additional sink types, and we don't need to reexamine it.
(
not CharacteristicsImpl::isSink(endpoint, _, _) and alreadyAiModeled = ""
or
alreadyAiModeled.matches("%ai-%") and
CharacteristicsImpl::isSink(endpoint, _, alreadyAiModeled)
) and
meta.hasMetadata(endpoint, package, type, subtypes, name, signature, input, output, parameterName) and
meta.hasMetadata(endpoint, package, type, subtypes, name, signature, input, output, parameterName,
alreadyAiModeled, extensibleType) and
// If a node is already modeled in MaD, we don't include it as a candidate. Otherwise, we might include it as a
// candidate for query A, but the model will label it as a sink for one of the sink types of query B, for which it's
// already a known sink. This would result in overlap between our detected sinks and the pre-existing modeling. We
// assume that, if a sink has already been modeled in a MaD model, then it doesn't belong to any additional sink
// types, and we don't need to reexamine it.
alreadyAiModeled.matches(["", "%ai-%"]) and
includeAutomodelCandidate(package, type, name, signature)
select endpoint,
"Related locations: $@, $@." + "\nmetadata: $@, $@, $@, $@, $@, $@, $@, $@, $@, $@.", //

View File

@@ -19,7 +19,6 @@ from
DollarAtString input, DollarAtString output, DollarAtString parameterName,
DollarAtString extensibleType
where
endpoint.getExtensibleType() = extensibleType and
characteristic.appliesToEndpoint(endpoint) and
// the node is known not to be an endpoint of any appropriate type
forall(EndpointType tp | tp = endpoint.getAPotentialType() |
@@ -28,7 +27,8 @@ where
// the lowest confidence across all endpoint types should be at least highConfidence
confidence = min(float c | characteristic.hasImplications(endpoint.getAPotentialType(), false, c)) and
confidence >= SharedCharacteristics::highConfidence() and
meta.hasMetadata(endpoint, package, type, subtypes, name, signature, input, output, parameterName) and
meta.hasMetadata(endpoint, package, type, subtypes, name, signature, input, output, parameterName,
_, extensibleType) and
// It's valid for a node to be both a potential source/sanitizer and a sink. We don't want to include such nodes
// as negative examples in the prompt, because they're ambiguous and might confuse the model, so we explicitly them here.
not exists(EndpointCharacteristic characteristic2, float confidence2, EndpointType type2 |

View File

@@ -18,9 +18,8 @@ from
DollarAtString signature, DollarAtString input, DollarAtString output,
DollarAtString parameterName, DollarAtString extensibleType
where
endpoint.getExtensibleType() = extensibleType and
meta.hasMetadata(endpoint, package, type, subtypes, name, signature, input, output, parameterName) and
// Extract positive examples of sinks belonging to the existing ATM query configurations.
meta.hasMetadata(endpoint, package, type, subtypes, name, signature, input, output, parameterName,
_, extensibleType) and
CharacteristicsImpl::isKnownAs(endpoint, endpointType, _)
select endpoint,
endpointType + "\nrelated locations: $@, $@." + "\nmetadata: $@, $@, $@, $@, $@, $@, $@, $@, $@.", //

View File

@@ -272,7 +272,9 @@ module SharedCharacteristics<CandidateSig Candidate> {
/**
* A high-confidence characteristic that indicates that an endpoint is neither a source nor a sink of any type.
*/
abstract class NeitherSourceNorSinkCharacteristic extends NotASinkCharacteristic, NotASourceCharacteristic {
abstract class NeitherSourceNorSinkCharacteristic extends NotASinkCharacteristic,
NotASourceCharacteristic
{
bindingset[this]
NeitherSourceNorSinkCharacteristic() { any() }
@@ -280,8 +282,7 @@ module SharedCharacteristics<CandidateSig Candidate> {
Candidate::EndpointType endpointType, boolean isPositiveIndicator, float confidence
) {
NotASinkCharacteristic.super.hasImplications(endpointType, isPositiveIndicator, confidence) or
NotASourceCharacteristic.super
.hasImplications(endpointType, isPositiveIndicator, confidence)
NotASourceCharacteristic.super.hasImplications(endpointType, isPositiveIndicator, confidence)
}
}
@@ -373,8 +374,7 @@ module SharedCharacteristics<CandidateSig Candidate> {
/**
* A negative characteristic that indicates that an endpoint was manually modeled as a neutral model.
*/
private class NeutralModelCharacteristic extends NeitherSourceNorSinkCharacteristic
{
private class NeutralModelCharacteristic extends NeitherSourceNorSinkCharacteristic {
NeutralModelCharacteristic() { this = "known non-endpoint" }
override predicate appliesToEndpoint(Candidate::Endpoint e) { Candidate::isNeutral(e) }