Java: automodel application mode: add isVarargsArray metadata value

This commit is contained in:
Stephan Brandauer
2023-08-07 11:04:44 +02:00
parent 5abf7769a7
commit 0781cb78e8
7 changed files with 33 additions and 23 deletions

View File

@@ -213,7 +213,7 @@ class ApplicationModeMetadataExtractor extends string {
predicate hasMetadata(
Endpoint e, string package, string type, string subtypes, string name, string signature,
string input
string input, string isVarargsArray
) {
exists(Callable callable |
e.getCall().getCallee() = callable and
@@ -224,7 +224,10 @@ class ApplicationModeMetadataExtractor extends string {
type = callable.getDeclaringType().getErasure().(RefType).nestedName() and
subtypes = AutomodelJavaUtil::considerSubtypes(callable).toString() and
name = callable.getName() and
signature = ExternalFlow::paramsString(callable)
signature = ExternalFlow::paramsString(callable) and
if e instanceof ImplicitVarargsArray
then isVarargsArray = "true"
else isVarargsArray = "false"
)
}
}

View File

@@ -25,16 +25,18 @@ private import AutomodelJavaUtil
bindingset[limit]
private Endpoint getSampleForSignature(
int limit, string package, string type, string subtypes, string name, string signature,
string input
string input, string isVarargs
) {
exists(int n, int num_endpoints, ApplicationModeMetadataExtractor meta |
num_endpoints =
count(Endpoint e | meta.hasMetadata(e, package, type, subtypes, name, signature, input))
count(Endpoint e |
meta.hasMetadata(e, package, type, subtypes, name, signature, input, isVarargs)
)
|
result =
rank[n](Endpoint e, Location loc |
loc = e.asTop().getLocation() and
meta.hasMetadata(e, package, type, subtypes, name, signature, input)
meta.hasMetadata(e, package, type, subtypes, name, signature, input, isVarargs)
|
e
order by
@@ -53,19 +55,20 @@ private Endpoint getSampleForSignature(
from
Endpoint endpoint, string message, ApplicationModeMetadataExtractor meta, DollarAtString package,
DollarAtString type, DollarAtString subtypes, DollarAtString name, DollarAtString signature,
DollarAtString input
DollarAtString input, DollarAtString isVarargsArray
where
not exists(CharacteristicsImpl::UninterestingToModelCharacteristic u |
u.appliesToEndpoint(endpoint)
) and
endpoint = getSampleForSignature(9, package, type, subtypes, name, signature, input) and
endpoint =
getSampleForSignature(9, package, type, subtypes, name, signature, input, isVarargsArray) and
// If a node is already a known sink for any of our existing ATM queries and is already modeled as a MaD sink, we
// don't include it as a candidate. Otherwise, we might include it as a candidate for query A, but the model will
// label it as a sink for one of the sink types of query B, for which it's already a known sink. This would result in
// overlap between our detected sinks and the pre-existing modeling. We assume that, if a sink has already been
// modeled in a MaD model, then it doesn't belong to any additional sink types, and we don't need to reexamine it.
not CharacteristicsImpl::isSink(endpoint, _, _) and
meta.hasMetadata(endpoint, package, type, subtypes, name, signature, input) and
meta.hasMetadata(endpoint, package, type, subtypes, name, signature, input, isVarargsArray) and
// The message is the concatenation of all sink types for which this endpoint is known neither to be a sink nor to be
// a non-sink, and we surface only endpoints that have at least one such sink type.
message =
@@ -83,4 +86,5 @@ select endpoint.asNode(),
subtypes, "subtypes", //
name, "name", // method name
signature, "signature", //
input, "input" //
input, "input", //
isVarargsArray, "isVarargsArray"

View File

@@ -43,7 +43,8 @@ Endpoint getSampleForCharacteristic(EndpointCharacteristic c, int limit) {
from
Endpoint endpoint, EndpointCharacteristic characteristic, float confidence, string message,
ApplicationModeMetadataExtractor meta, DollarAtString package, DollarAtString type,
DollarAtString subtypes, DollarAtString name, DollarAtString signature, DollarAtString input
DollarAtString subtypes, DollarAtString name, DollarAtString signature, DollarAtString input,
DollarAtString isVarargsArray
where
endpoint = getSampleForCharacteristic(characteristic, 100) and
confidence >= SharedCharacteristics::highConfidence() and
@@ -51,7 +52,7 @@ where
// Exclude endpoints that have contradictory endpoint characteristics, because we only want examples we're highly
// certain about in the prompt.
not erroneousEndpoints(endpoint, _, _, _, _, false) and
meta.hasMetadata(endpoint, package, type, subtypes, name, signature, input) and
meta.hasMetadata(endpoint, package, type, subtypes, name, signature, input, isVarargsArray) and
// It's valid for a node to satisfy the logic for both `isSink` and `isSanitizer`, but in that case it will be
// treated by the actual query as a sanitizer, since the final logic is something like
// `isSink(n) and not isSanitizer(n)`. We don't want to include such nodes as negative examples in the prompt, because
@@ -71,4 +72,5 @@ select endpoint.asNode(),
subtypes, "subtypes", //
name, "name", //
signature, "signature", //
input, "input" //
input, "input", //
isVarargsArray, "isVarargsArray" //

View File

@@ -15,12 +15,12 @@ private import AutomodelJavaUtil
from
Endpoint endpoint, SinkType sinkType, ApplicationModeMetadataExtractor meta,
DollarAtString package, DollarAtString type, DollarAtString subtypes, DollarAtString name,
DollarAtString signature, DollarAtString input
DollarAtString signature, DollarAtString input, DollarAtString isVarargsArray
where
// Exclude endpoints that have contradictory endpoint characteristics, because we only want examples we're highly
// certain about in the prompt.
not erroneousEndpoints(endpoint, _, _, _, _, false) and
meta.hasMetadata(endpoint, package, type, subtypes, name, signature, input) and
meta.hasMetadata(endpoint, package, type, subtypes, name, signature, input, isVarargsArray) and
// Extract positive examples of sinks belonging to the existing ATM query configurations.
CharacteristicsImpl::isKnownSink(endpoint, sinkType) and
exists(CharacteristicsImpl::getRelatedLocationOrCandidate(endpoint, CallContext()))
@@ -32,4 +32,5 @@ select endpoint.asNode(),
subtypes, "subtypes", //
name, "name", //
signature, "signature", //
input, "input" //
input, "input", //
isVarargsArray, "isVarargsArray"