Address review feedback.

This commit is contained in:
Max Schaefer
2024-01-17 14:27:24 +00:00
parent 90a4552c4f
commit 8614d7bddb
7 changed files with 44 additions and 41 deletions

View File

@@ -93,18 +93,6 @@ abstract private class ApplicationModeEndpoint extends TApplicationModeEndpoint
else none() // if both exist, it would be a summaryModel (not yet supported)
}
/**
* Gets a potential type of this endpoint to make sure that sources are
* associated with source types and sinks with sink types.
*/
AutomodelEndpointTypes::EndpointType getAPotentialType() {
this.getExtensibleType() = "sourceModel" and
result instanceof AutomodelEndpointTypes::SourceType
or
this.getExtensibleType() = "sinkModel" and
result instanceof AutomodelEndpointTypes::SinkType
}
abstract string toString();
}

View File

@@ -45,22 +45,28 @@ predicate candidate(
string type, string subtypes, string name, string signature, string input, string output,
string isVarargsArray, string extensibleType
) {
// the node is know not to be an endpoint of any appropriate type
forall(EndpointType tp | tp = endpoint.getAPotentialType() |
// the node is known not to be an endpoint of any appropriate type
forall(EndpointType tp | tp = CharacteristicsImpl::getAPotentialType(endpoint) |
characteristic.hasImplications(tp, false, _)
) and
// the lowest confidence across all endpoint types should be at least highConfidence
confidence = min(float c | characteristic.hasImplications(endpoint.getAPotentialType(), false, c)) and
confidence =
min(float c |
characteristic.hasImplications(CharacteristicsImpl::getAPotentialType(endpoint), false, c)
) and
confidence >= SharedCharacteristics::highConfidence() and
any(ApplicationModeMetadataExtractor meta)
.hasMetadata(endpoint, package, type, subtypes, name, signature, input, output,
isVarargsArray, _, extensibleType) and
// It's valid for a node to be both a potential source/sanitizer and a sink. We don't want to include such nodes
// as negative examples in the prompt, because they're ambiguous and might confuse the model, so we explicitly them here.
// as negative examples in the prompt, because they're ambiguous and might confuse the model, so we explicitly exclude them here.
not exists(EndpointCharacteristic characteristic2, float confidence2 |
characteristic2 != characteristic
|
characteristic2.appliesToEndpoint(endpoint) and
confidence2 >= SharedCharacteristics::maximalConfidence() and
characteristic2.hasImplications(endpoint.getAPotentialType(), true, confidence2)
characteristic2
.hasImplications(CharacteristicsImpl::getAPotentialType(endpoint), true, confidence2)
)
}

View File

@@ -89,18 +89,6 @@ abstract class FrameworkModeEndpoint extends TFrameworkModeEndpoint {
abstract string getExtensibleType();
/**
* Gets a potential type of this endpoint to make sure that sources are
* associated with source types and sinks with sink types.
*/
AutomodelEndpointTypes::EndpointType getAPotentialType() {
this.getExtensibleType() = "sourceModel" and
result instanceof AutomodelEndpointTypes::SourceType
or
this.getExtensibleType() = "sinkModel" and
result instanceof AutomodelEndpointTypes::SinkType
}
string toString() { result = this.asTop().toString() }
Location getLocation() { result = this.asTop().getLocation() }

View File

@@ -21,20 +21,26 @@ from
where
characteristic.appliesToEndpoint(endpoint) and
// the node is known not to be an endpoint of any appropriate type
forall(EndpointType tp | tp = endpoint.getAPotentialType() |
forall(EndpointType tp | tp = CharacteristicsImpl::getAPotentialType(endpoint) |
characteristic.hasImplications(tp, false, _)
) and
// the lowest confidence across all endpoint types should be at least highConfidence
confidence = min(float c | characteristic.hasImplications(endpoint.getAPotentialType(), false, c)) and
confidence =
min(float c |
characteristic.hasImplications(CharacteristicsImpl::getAPotentialType(endpoint), false, c)
) and
confidence >= SharedCharacteristics::highConfidence() and
meta.hasMetadata(endpoint, package, type, subtypes, name, signature, input, output, parameterName,
_, extensibleType) and
// It's valid for a node to be both a potential source/sanitizer and a sink. We don't want to include such nodes
// as negative examples in the prompt, because they're ambiguous and might confuse the model, so we explicitly them here.
// as negative examples in the prompt, because they're ambiguous and might confuse the model, so we explicitly exclude them here.
not exists(EndpointCharacteristic characteristic2, float confidence2 |
characteristic2 != characteristic
|
characteristic2.appliesToEndpoint(endpoint) and
confidence2 >= SharedCharacteristics::maximalConfidence() and
characteristic2.hasImplications(endpoint.getAPotentialType(), true, confidence2)
characteristic2
.hasImplications(CharacteristicsImpl::getAPotentialType(endpoint), true, confidence2)
) and
message = characteristic
select endpoint,

View File

@@ -17,7 +17,10 @@ signature module CandidateSig {
* DataFlow node class, or a subtype thereof.
*/
class Endpoint {
EndpointType getAPotentialType();
/**
* Gets the kind of this endpoint, either "sourceModel" or "sinkModel".
*/
string getExtensibleType();
}
/**
@@ -122,6 +125,18 @@ module SharedCharacteristics<CandidateSig Candidate> {
characteristic.hasImplications(endpointType, true, maximalConfidence())
}
/**
* Gets a potential type of this endpoint to make sure that sources are
* associated with source types and sinks with sink types.
*/
Candidate::EndpointType getAPotentialType(Candidate::Endpoint endpoint) {
endpoint.getExtensibleType() = "sourceModel" and
result instanceof Candidate::SourceType
or
endpoint.getExtensibleType() = "sinkModel" and
result instanceof Candidate::SinkType
}
/**
* Holds if the given `endpoint` should be considered as a candidate for type `endpointType`,
* and classified by the ML model.
@@ -129,7 +144,7 @@ module SharedCharacteristics<CandidateSig Candidate> {
* A candidate is an endpoint that cannot be excluded from `endpointType` based on its characteristics.
*/
predicate isCandidate(Candidate::Endpoint endpoint, Candidate::EndpointType endpointType) {
endpointType = endpoint.getAPotentialType() and
endpointType = getAPotentialType(endpoint) and
not exists(getAnExcludingCharacteristic(endpoint, endpointType))
}
@@ -375,7 +390,7 @@ module SharedCharacteristics<CandidateSig Candidate> {
* A negative characteristic that indicates that an endpoint was manually modeled as a neutral model.
*/
private class NeutralModelCharacteristic extends NeitherSourceNorSinkCharacteristic {
NeutralModelCharacteristic() { this = "known non-endpoint" }
NeutralModelCharacteristic() { this = "known non-sink" }
override predicate appliesToEndpoint(Candidate::Endpoint e) { Candidate::isNeutral(e) }
}