mirror of
https://github.com/github/codeql.git
synced 2026-04-22 23:35:14 +02:00
Refactor to avoid bad join order.
This commit is contained in:
@@ -356,10 +356,10 @@ class ApplicationModeMetadataExtractor extends string {
|
||||
|
||||
predicate hasMetadata(
|
||||
Endpoint e, string package, string type, string subtypes, string name, string signature,
|
||||
string input, string output, string isVarargsArray
|
||||
string input, string output, string isVarargsArray, string alreadyAiModeled,
|
||||
string extensibleType
|
||||
) {
|
||||
exists(Callable callable |
|
||||
e.getCallable() = callable and
|
||||
exists(Callable callable | e.getCallable() = callable |
|
||||
(if exists(e.getMaDInput()) then input = e.getMaDInput() else input = "") and
|
||||
(if exists(e.getMaDOutput()) then output = e.getMaDOutput() else output = "") and
|
||||
package = callable.getDeclaringType().getPackage().getName() and
|
||||
@@ -369,9 +369,17 @@ class ApplicationModeMetadataExtractor extends string {
|
||||
subtypes = AutomodelJavaUtil::considerSubtypes(callable).toString() and
|
||||
name = callable.getName() and
|
||||
signature = ExternalFlow::paramsString(callable) and
|
||||
if e instanceof ImplicitVarargsArray
|
||||
then isVarargsArray = "true"
|
||||
else isVarargsArray = "false"
|
||||
(
|
||||
if e instanceof ImplicitVarargsArray
|
||||
then isVarargsArray = "true"
|
||||
else isVarargsArray = "false"
|
||||
) and
|
||||
extensibleType = e.getExtensibleType()
|
||||
) and
|
||||
(
|
||||
not CharacteristicsImpl::isModeled(e, _, _, _) and alreadyAiModeled = ""
|
||||
or
|
||||
CharacteristicsImpl::isModeled(e, _, _, alreadyAiModeled)
|
||||
)
|
||||
}
|
||||
}
|
||||
@@ -416,7 +424,8 @@ private class UnexploitableIsCharacteristic extends CharacteristicsImpl::Neither
|
||||
* boolean return type. These kinds of calls normally do only checks, and appear before the proper call that does the
|
||||
* dangerous/interesting thing, so we want the latter to be modeled as the sink.
|
||||
*/
|
||||
private class UnexploitableExistsCharacteristic extends CharacteristicsImpl::NeitherSourceNorSinkCharacteristic {
|
||||
private class UnexploitableExistsCharacteristic extends CharacteristicsImpl::NeitherSourceNorSinkCharacteristic
|
||||
{
|
||||
UnexploitableExistsCharacteristic() { this = "unexploitable (existence-checking boolean method)" }
|
||||
|
||||
override predicate appliesToEndpoint(Endpoint e) {
|
||||
@@ -439,7 +448,8 @@ private class UnexploitableExistsCharacteristic extends CharacteristicsImpl::Nei
|
||||
* A negative characteristic that indicates that parameters of an exception method or constructor should not be considered sinks,
|
||||
* and its return value should not be considered a source.
|
||||
*/
|
||||
private class ExceptionCharacteristic extends CharacteristicsImpl::NeitherSourceNorSinkCharacteristic {
|
||||
private class ExceptionCharacteristic extends CharacteristicsImpl::NeitherSourceNorSinkCharacteristic
|
||||
{
|
||||
ExceptionCharacteristic() { this = "exception" }
|
||||
|
||||
override predicate appliesToEndpoint(Endpoint e) {
|
||||
|
||||
@@ -25,20 +25,20 @@ private import AutomodelJavaUtil
|
||||
bindingset[limit]
|
||||
private Endpoint getSampleForSignature(
|
||||
int limit, string package, string type, string subtypes, string name, string signature,
|
||||
string input, string output, string isVarargs, string extensibleType
|
||||
string input, string output, string isVarargs, string extensibleType, string alreadyAiModeled
|
||||
) {
|
||||
exists(int n, int num_endpoints, ApplicationModeMetadataExtractor meta |
|
||||
num_endpoints =
|
||||
count(Endpoint e |
|
||||
e.getExtensibleType() = extensibleType and
|
||||
meta.hasMetadata(e, package, type, subtypes, name, signature, input, output, isVarargs)
|
||||
meta.hasMetadata(e, package, type, subtypes, name, signature, input, output, isVarargs,
|
||||
alreadyAiModeled, extensibleType)
|
||||
)
|
||||
|
|
||||
result =
|
||||
rank[n](Endpoint e, Location loc |
|
||||
loc = e.asTop().getLocation() and
|
||||
e.getExtensibleType() = extensibleType and
|
||||
meta.hasMetadata(e, package, type, subtypes, name, signature, input, output, isVarargs)
|
||||
meta.hasMetadata(e, package, type, subtypes, name, signature, input, output, isVarargs,
|
||||
alreadyAiModeled, extensibleType)
|
||||
|
|
||||
e
|
||||
order by
|
||||
@@ -66,19 +66,15 @@ where
|
||||
CharacteristicsImpl::isCandidate(endpoint, _) and
|
||||
endpoint =
|
||||
getSampleForSignature(9, package, type, subtypes, name, signature, input, output,
|
||||
isVarargsArray, extensibleType) and
|
||||
isVarargsArray, extensibleType, alreadyAiModeled) and
|
||||
meta.hasMetadata(endpoint, package, type, subtypes, name, signature, input, output,
|
||||
isVarargsArray, alreadyAiModeled, extensibleType) and
|
||||
// If a node is already modeled in MaD, we don't include it as a candidate. Otherwise, we might include it as a
|
||||
// candidate for query A, but the model will label it as a sink for one of the sink types of query B, for which it's
|
||||
// already a known sink. This would result in overlap between our detected sinks and the pre-existing modeling. We
|
||||
// assume that, if a sink has already been modeled in a MaD model, then it doesn't belong to any additional sink
|
||||
// types, and we don't need to reexamine it.
|
||||
(
|
||||
not CharacteristicsImpl::isModeled(endpoint, _, _, _) and alreadyAiModeled = ""
|
||||
or
|
||||
alreadyAiModeled.matches("%ai-%") and
|
||||
CharacteristicsImpl::isModeled(endpoint, _, _, alreadyAiModeled)
|
||||
) and
|
||||
meta.hasMetadata(endpoint, package, type, subtypes, name, signature, input, output, isVarargsArray) and
|
||||
alreadyAiModeled.matches(["", "%ai-%"]) and
|
||||
includeAutomodelCandidate(package, type, name, signature)
|
||||
select endpoint.asNode(),
|
||||
"Related locations: $@, $@, $@." + "\nmetadata: $@, $@, $@, $@, $@, $@, $@, $@, $@, $@.", //
|
||||
|
||||
@@ -47,7 +47,6 @@ from
|
||||
DollarAtString output, DollarAtString isVarargsArray, DollarAtString extensibleType
|
||||
where
|
||||
endpoint = getSampleForCharacteristic(characteristic, 100) and
|
||||
extensibleType = endpoint.getExtensibleType() and
|
||||
// the node is know not to be an endpoint of any appropriate type
|
||||
forall(EndpointType tp | tp = endpoint.getAPotentialType() |
|
||||
characteristic.hasImplications(tp, false, _)
|
||||
@@ -55,7 +54,8 @@ where
|
||||
// the lowest confidence across all endpoint types should be at least highConfidence
|
||||
confidence = min(float c | characteristic.hasImplications(endpoint.getAPotentialType(), false, c)) and
|
||||
confidence >= SharedCharacteristics::highConfidence() and
|
||||
meta.hasMetadata(endpoint, package, type, subtypes, name, signature, input, output, isVarargsArray) and
|
||||
meta.hasMetadata(endpoint, package, type, subtypes, name, signature, input, output,
|
||||
isVarargsArray, _, extensibleType) and
|
||||
// It's valid for a node to be both a potential source/sanitizer and a sink. We don't want to include such nodes
|
||||
// as negative examples in the prompt, because they're ambiguous and might confuse the model, so we explicitly them here.
|
||||
not exists(EndpointCharacteristic characteristic2, float confidence2, EndpointType type2 |
|
||||
|
||||
@@ -18,9 +18,8 @@ from
|
||||
DollarAtString signature, DollarAtString input, DollarAtString output,
|
||||
DollarAtString isVarargsArray, DollarAtString extensibleType
|
||||
where
|
||||
extensibleType = endpoint.getExtensibleType() and
|
||||
meta.hasMetadata(endpoint, package, type, subtypes, name, signature, input, output, isVarargsArray) and
|
||||
// Extract positive examples of sinks belonging to the existing ATM query configurations.
|
||||
meta.hasMetadata(endpoint, package, type, subtypes, name, signature, input, output,
|
||||
isVarargsArray, _, extensibleType) and
|
||||
CharacteristicsImpl::isKnownAs(endpoint, endpointType, _) and
|
||||
exists(CharacteristicsImpl::getRelatedLocationOrCandidate(endpoint, CallContext()))
|
||||
select endpoint.asNode(),
|
||||
|
||||
@@ -305,16 +305,27 @@ class FrameworkModeMetadataExtractor extends string {
|
||||
|
||||
predicate hasMetadata(
|
||||
Endpoint e, string package, string type, string subtypes, string name, string signature,
|
||||
string input, string output, string parameterName
|
||||
string input, string output, string parameterName, string alreadyAiModeled,
|
||||
string extensibleType
|
||||
) {
|
||||
(if exists(e.getParamName()) then parameterName = e.getParamName() else parameterName = "") and
|
||||
name = e.getCallable().getName() and
|
||||
(if exists(e.getMaDInput()) then input = e.getMaDInput() else input = "") and
|
||||
(if exists(e.getMaDOutput()) then output = e.getMaDOutput() else output = "") and
|
||||
package = e.getCallable().getDeclaringType().getPackage().getName() and
|
||||
type = e.getCallable().getDeclaringType().getErasure().(RefType).nestedName() and
|
||||
subtypes = AutomodelJavaUtil::considerSubtypes(e.getCallable()).toString() and
|
||||
signature = ExternalFlow::paramsString(e.getCallable())
|
||||
exists(Callable callable | e.getCallable() = callable |
|
||||
(if exists(e.getMaDInput()) then input = e.getMaDInput() else input = "") and
|
||||
(if exists(e.getMaDOutput()) then output = e.getMaDOutput() else output = "") and
|
||||
package = callable.getDeclaringType().getPackage().getName() and
|
||||
// we're using the erased types because the MaD convention is to not specify type parameters.
|
||||
// Whether something is or isn't a sink doesn't usually depend on the type parameters.
|
||||
type = callable.getDeclaringType().getErasure().(RefType).nestedName() and
|
||||
subtypes = AutomodelJavaUtil::considerSubtypes(callable).toString() and
|
||||
name = callable.getName() and
|
||||
signature = ExternalFlow::paramsString(callable) and
|
||||
(if exists(e.getParamName()) then parameterName = e.getParamName() else parameterName = "") and
|
||||
e.getExtensibleType() = extensibleType
|
||||
) and
|
||||
(
|
||||
not CharacteristicsImpl::isModeled(e, _, _, _) and alreadyAiModeled = ""
|
||||
or
|
||||
CharacteristicsImpl::isModeled(e, _, _, alreadyAiModeled)
|
||||
)
|
||||
}
|
||||
}
|
||||
|
||||
@@ -332,7 +343,8 @@ class FrameworkModeMetadataExtractor extends string {
|
||||
*
|
||||
* TODO: this might filter too much, it's possible that methods with more than one parameter contain interesting sinks
|
||||
*/
|
||||
private class UnexploitableIsCharacteristic extends CharacteristicsImpl::NeitherSourceNorSinkCharacteristic {
|
||||
private class UnexploitableIsCharacteristic extends CharacteristicsImpl::NeitherSourceNorSinkCharacteristic
|
||||
{
|
||||
UnexploitableIsCharacteristic() { this = "unexploitable (is-style boolean method)" }
|
||||
|
||||
override predicate appliesToEndpoint(Endpoint e) {
|
||||
@@ -357,7 +369,8 @@ private class UnexploitableIsCharacteristic extends CharacteristicsImpl::Neither
|
||||
* boolean return type. These kinds of calls normally do only checks, and appear before the proper call that does the
|
||||
* dangerous/interesting thing, so we want the latter to be modeled as the sink.
|
||||
*/
|
||||
private class UnexploitableExistsCharacteristic extends CharacteristicsImpl::NeitherSourceNorSinkCharacteristic {
|
||||
private class UnexploitableExistsCharacteristic extends CharacteristicsImpl::NeitherSourceNorSinkCharacteristic
|
||||
{
|
||||
UnexploitableExistsCharacteristic() { this = "unexploitable (existence-checking boolean method)" }
|
||||
|
||||
override predicate appliesToEndpoint(Endpoint e) {
|
||||
@@ -380,7 +393,8 @@ private class UnexploitableExistsCharacteristic extends CharacteristicsImpl::Nei
|
||||
* A negative characteristic that indicates that parameters of an exception method or constructor should not be considered sinks,
|
||||
* and its return value should not be considered a source.
|
||||
*/
|
||||
private class ExceptionCharacteristic extends CharacteristicsImpl::NeitherSourceNorSinkCharacteristic {
|
||||
private class ExceptionCharacteristic extends CharacteristicsImpl::NeitherSourceNorSinkCharacteristic
|
||||
{
|
||||
ExceptionCharacteristic() { this = "exception" }
|
||||
|
||||
override predicate appliesToEndpoint(Endpoint e) {
|
||||
@@ -396,7 +410,6 @@ private class ExceptionCharacteristic extends CharacteristicsImpl::NeitherSource
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* A characteristic that limits candidates to parameters of methods that are recognized as `ModelApi`, iow., APIs that
|
||||
* are considered worth modeling.
|
||||
|
||||
@@ -21,23 +21,18 @@ from
|
||||
DollarAtString input, DollarAtString output, DollarAtString parameterName,
|
||||
DollarAtString alreadyAiModeled, DollarAtString extensibleType
|
||||
where
|
||||
endpoint.getExtensibleType() = extensibleType and
|
||||
not exists(CharacteristicsImpl::UninterestingToModelCharacteristic u |
|
||||
u.appliesToEndpoint(endpoint)
|
||||
) and
|
||||
CharacteristicsImpl::isCandidate(endpoint, _) and
|
||||
// If a node is already a known sink for any of our existing ATM queries and is already modeled as a MaD sink, we
|
||||
// don't include it as a candidate. Otherwise, we might include it as a candidate for query A, but the model will
|
||||
// label it as a sink for one of the sink types of query B, for which it's already a known sink. This would result in
|
||||
// overlap between our detected sinks and the pre-existing modeling. We assume that, if a sink has already been
|
||||
// modeled in a MaD model, then it doesn't belong to any additional sink types, and we don't need to reexamine it.
|
||||
(
|
||||
not CharacteristicsImpl::isSink(endpoint, _, _) and alreadyAiModeled = ""
|
||||
or
|
||||
alreadyAiModeled.matches("%ai-%") and
|
||||
CharacteristicsImpl::isSink(endpoint, _, alreadyAiModeled)
|
||||
) and
|
||||
meta.hasMetadata(endpoint, package, type, subtypes, name, signature, input, output, parameterName) and
|
||||
meta.hasMetadata(endpoint, package, type, subtypes, name, signature, input, output, parameterName,
|
||||
alreadyAiModeled, extensibleType) and
|
||||
// If a node is already modeled in MaD, we don't include it as a candidate. Otherwise, we might include it as a
|
||||
// candidate for query A, but the model will label it as a sink for one of the sink types of query B, for which it's
|
||||
// already a known sink. This would result in overlap between our detected sinks and the pre-existing modeling. We
|
||||
// assume that, if a sink has already been modeled in a MaD model, then it doesn't belong to any additional sink
|
||||
// types, and we don't need to reexamine it.
|
||||
alreadyAiModeled.matches(["", "%ai-%"]) and
|
||||
includeAutomodelCandidate(package, type, name, signature)
|
||||
select endpoint,
|
||||
"Related locations: $@, $@." + "\nmetadata: $@, $@, $@, $@, $@, $@, $@, $@, $@, $@.", //
|
||||
|
||||
@@ -19,7 +19,6 @@ from
|
||||
DollarAtString input, DollarAtString output, DollarAtString parameterName,
|
||||
DollarAtString extensibleType
|
||||
where
|
||||
endpoint.getExtensibleType() = extensibleType and
|
||||
characteristic.appliesToEndpoint(endpoint) and
|
||||
// the node is known not to be an endpoint of any appropriate type
|
||||
forall(EndpointType tp | tp = endpoint.getAPotentialType() |
|
||||
@@ -28,7 +27,8 @@ where
|
||||
// the lowest confidence across all endpoint types should be at least highConfidence
|
||||
confidence = min(float c | characteristic.hasImplications(endpoint.getAPotentialType(), false, c)) and
|
||||
confidence >= SharedCharacteristics::highConfidence() and
|
||||
meta.hasMetadata(endpoint, package, type, subtypes, name, signature, input, output, parameterName) and
|
||||
meta.hasMetadata(endpoint, package, type, subtypes, name, signature, input, output, parameterName,
|
||||
_, extensibleType) and
|
||||
// It's valid for a node to be both a potential source/sanitizer and a sink. We don't want to include such nodes
|
||||
// as negative examples in the prompt, because they're ambiguous and might confuse the model, so we explicitly them here.
|
||||
not exists(EndpointCharacteristic characteristic2, float confidence2, EndpointType type2 |
|
||||
|
||||
@@ -18,9 +18,8 @@ from
|
||||
DollarAtString signature, DollarAtString input, DollarAtString output,
|
||||
DollarAtString parameterName, DollarAtString extensibleType
|
||||
where
|
||||
endpoint.getExtensibleType() = extensibleType and
|
||||
meta.hasMetadata(endpoint, package, type, subtypes, name, signature, input, output, parameterName) and
|
||||
// Extract positive examples of sinks belonging to the existing ATM query configurations.
|
||||
meta.hasMetadata(endpoint, package, type, subtypes, name, signature, input, output, parameterName,
|
||||
_, extensibleType) and
|
||||
CharacteristicsImpl::isKnownAs(endpoint, endpointType, _)
|
||||
select endpoint,
|
||||
endpointType + "\nrelated locations: $@, $@." + "\nmetadata: $@, $@, $@, $@, $@, $@, $@, $@, $@.", //
|
||||
|
||||
@@ -272,7 +272,9 @@ module SharedCharacteristics<CandidateSig Candidate> {
|
||||
/**
|
||||
* A high-confidence characteristic that indicates that an endpoint is neither a source nor a sink of any type.
|
||||
*/
|
||||
abstract class NeitherSourceNorSinkCharacteristic extends NotASinkCharacteristic, NotASourceCharacteristic {
|
||||
abstract class NeitherSourceNorSinkCharacteristic extends NotASinkCharacteristic,
|
||||
NotASourceCharacteristic
|
||||
{
|
||||
bindingset[this]
|
||||
NeitherSourceNorSinkCharacteristic() { any() }
|
||||
|
||||
@@ -280,8 +282,7 @@ module SharedCharacteristics<CandidateSig Candidate> {
|
||||
Candidate::EndpointType endpointType, boolean isPositiveIndicator, float confidence
|
||||
) {
|
||||
NotASinkCharacteristic.super.hasImplications(endpointType, isPositiveIndicator, confidence) or
|
||||
NotASourceCharacteristic.super
|
||||
.hasImplications(endpointType, isPositiveIndicator, confidence)
|
||||
NotASourceCharacteristic.super.hasImplications(endpointType, isPositiveIndicator, confidence)
|
||||
}
|
||||
}
|
||||
|
||||
@@ -373,8 +374,7 @@ module SharedCharacteristics<CandidateSig Candidate> {
|
||||
/**
|
||||
* A negative characteristic that indicates that an endpoint was manually modeled as a neutral model.
|
||||
*/
|
||||
private class NeutralModelCharacteristic extends NeitherSourceNorSinkCharacteristic
|
||||
{
|
||||
private class NeutralModelCharacteristic extends NeitherSourceNorSinkCharacteristic {
|
||||
NeutralModelCharacteristic() { this = "known non-endpoint" }
|
||||
|
||||
override predicate appliesToEndpoint(Candidate::Endpoint e) { Candidate::isNeutral(e) }
|
||||
|
||||
Reference in New Issue
Block a user