export related locations using notation

This commit is contained in:
Stephan Brandauer
2023-05-03 10:27:46 +02:00
parent f1644adca9
commit 09f3296134
5 changed files with 62 additions and 30 deletions

View File

@@ -3,6 +3,7 @@
*/
private import java
private import semmle.code.Location as Location
private import semmle.code.java.dataflow.DataFlow
private import semmle.code.java.dataflow.TaintTracking
private import semmle.code.java.security.PathCreation
@@ -23,10 +24,12 @@ module CandidatesImpl implements SharedCharacteristics::CandidateSig {
class NegativeEndpointType = AutomodelEndpointTypes::NegativeSinkType;
class RelatedLocation = Location::Top;
// Sanitizers are currently not modeled in MaD. TODO: check if this has large negative impact.
predicate isSanitizer(Endpoint e, EndpointType t) { none() }
string getLocationString(Endpoint e) { result = e.getLocation().toString() }
RelatedLocation toRelatedLocation(Endpoint e) { result = e.asParameter() }
predicate isKnownLabel(string label, string humanReadableLabel, EndpointType type) {
label = "read-file" and
@@ -87,11 +90,9 @@ module CandidatesImpl implements SharedCharacteristics::CandidateSig {
predicate hasMetadata(Endpoint e, string metadata) {
exists(
string package, string type, boolean subtypes, string name, string signature, string ext,
int input, string provenance, boolean isPublic, boolean isFinal, boolean isStatic,
string callableJavaDoc
int input, boolean isPublic, boolean isFinal, boolean isStatic
|
hasMetadata(e, package, type, name, signature, input, isFinal, isStatic, isPublic,
callableJavaDoc) and
hasMetadata(e, package, type, name, signature, input, isFinal, isStatic, isPublic) and
(if isFinal = true or isStatic = true then subtypes = false else subtypes = true) and
ext = "" and
/*
@@ -100,7 +101,6 @@ module CandidatesImpl implements SharedCharacteristics::CandidateSig {
* a certain annotation.
*/
provenance = "ai-generated" and
metadata =
"{" //
+ "'Package': '" + package //
@@ -109,14 +109,18 @@ module CandidatesImpl implements SharedCharacteristics::CandidateSig {
+ ", 'Name': '" + name //
+ ", 'ParamName': '" + e.toString() //
+ "', 'Signature': '" + signature //
+ "', 'Ext': '" + ext //
+ "', 'Argument index': " + input //
+ ", 'Provenance': '" + provenance //
+ "', 'Is public': " + isPublic //
+ "', 'Callable JavaDoc': '" + callableJavaDoc.replaceAll("'", "\"") //
+ "'}" // TODO: Why are the curly braces added twice?
)
}
RelatedLocation getRelatedLocation(Endpoint e, string name) {
name = "Callable-JavaDoc" and
result = e.getEnclosingCallable().(Documentable).getJavadoc()
or
name = "Class-JavaDoc" and
result = e.getEnclosingCallable().getDeclaringType().(Documentable).getJavadoc()
}
}
module CharacteristicsImpl = SharedCharacteristics::SharedCharacteristics<CandidatesImpl>;
@@ -136,7 +140,7 @@ class Endpoint = CandidatesImpl::Endpoint;
*/
predicate hasMetadata(
Endpoint n, string package, string type, string name, string signature, int input,
boolean isFinal, boolean isStatic, boolean isPublic, string callableJavaDoc
boolean isFinal, boolean isStatic, boolean isPublic
) {
exists(Callable callable |
n.asParameter() = callable.getParameter(input) and
@@ -154,10 +158,7 @@ predicate hasMetadata(
) and
name = callable.getSourceDeclaration().getName() and
signature = ExternalFlow::paramsString(callable) and // TODO: Why are brackets being escaped (`\[\]` vs `[]`)?
(if callable.isPublic() then isPublic = true else isPublic = false) and
if exists(callable.(Documentable).getJavadoc())
then callableJavaDoc = callable.(Documentable).getJavadoc().toString()
else callableJavaDoc = ""
(if callable.isPublic() then isPublic = true else isPublic = false)
)
}

View File

@@ -14,26 +14,29 @@
import AutomodelEndpointCharacteristics
from Endpoint sinkCandidate, string message
from Endpoint endpoint, string message
where
not exists(CharacteristicsImpl::UninterestingToModelCharacteristic u |
u.appliesToEndpoint(sinkCandidate)
u.appliesToEndpoint(endpoint)
) and
// If a node is already a known sink for any of our existing ATM queries and is already modeled as a MaD sink, we
// don't include it as a candidate. Otherwise, we might include it as a candidate for query A, but the model will
// label it as a sink for one of the sink types of query B, for which it's already a known sink. This would result in
// overlap between our detected sinks and the pre-existing modeling. We assume that, if a sink has already been
// modeled in a MaD model, then it doesn't belong to any additional sink types, and we don't need to reexamine it.
not CharacteristicsImpl::isSink(sinkCandidate, _) and
not CharacteristicsImpl::isSink(endpoint, _) and
// The message is the concatenation of all sink types for which this endpoint is known neither to be a sink nor to be
// a non-sink, and we surface only endpoints that have at least one such sink type.
message =
strictconcat(AutomodelEndpointTypes::SinkType sinkType |
not CharacteristicsImpl::isKnownSink(sinkCandidate, sinkType) and
CharacteristicsImpl::isSinkCandidate(sinkCandidate, sinkType)
not CharacteristicsImpl::isKnownSink(endpoint, sinkType) and
CharacteristicsImpl::isSinkCandidate(endpoint, sinkType)
|
sinkType + ", "
) + "\n" +
// Extract the needed metadata for this endpoint.
any(string metadata | CharacteristicsImpl::hasMetadata(sinkCandidate, metadata))
select sinkCandidate, message
any(string metadata | CharacteristicsImpl::hasMetadata(endpoint, metadata))
select endpoint, message + "\nrelated locations: $@, $@", //
CharacteristicsImpl::getRelatedLocationOrCandidate(endpoint, "Callable-JavaDoc"),
"Callable-JavaDoc", //
CharacteristicsImpl::getRelatedLocationOrCandidate(endpoint, "Class-JavaDoc"), "Class-JavaDoc" //

View File

@@ -33,4 +33,7 @@ where
characteristic + "\n" +
// Extract the needed metadata for this endpoint.
any(string metadata | CharacteristicsImpl::hasMetadata(endpoint, metadata))
select endpoint, message
select endpoint, message + "\nrelated locations: $@, $@",
CharacteristicsImpl::getRelatedLocationOrCandidate(endpoint, "Callable-JavaDoc"),
"Callable-JavaDoc", //
CharacteristicsImpl::getRelatedLocationOrCandidate(endpoint, "Class-JavaDoc"), "Class-JavaDoc" //

View File

@@ -33,4 +33,7 @@ where
message =
"Error: There are erroneous endpoints! Please check whether there's a codex-generated data extension file in `java/ql/lib/ext`."
)
select sink, message
select sink, message + "\nrelated locations: $@, $@",
CharacteristicsImpl::getRelatedLocationOrCandidate(sink, "Callable-JavaDoc"),
"Callable-JavaDoc", //
CharacteristicsImpl::getRelatedLocationOrCandidate(sink, "Class-JavaDoc"), "Class-JavaDoc" //

View File

@@ -12,8 +12,20 @@ float mediumConfidence() { result = 0.6 }
* "not any of the other known endpoint types".
*/
signature module CandidateSig {
/**
* An endpoint is a potential candidate for modelling. This will typically be bound to the language's
* DataFlow node class, or a subtype thereof.
*/
class Endpoint;
/**
* A related location for an endpoint. This will typically be bound to the supertype of all AST nodes.
*/
class RelatedLocation;
/**
* A class label for an endpoint.
*/
class EndpointType;
/**
@@ -21,8 +33,7 @@ signature module CandidateSig {
*/
class NegativeEndpointType extends EndpointType;
/** Gets the string representing the file+range of the endpoint. */
string getLocationString(Endpoint e);
RelatedLocation toRelatedLocation(Endpoint e);
/**
* Defines what labels are known, and what endpoint type they correspond to.
@@ -56,6 +67,8 @@ signature module CandidateSig {
* The meta data will be passed on to the machine learning code by the extraction queries.
*/
predicate hasMetadata(Endpoint e, string metadata);
RelatedLocation getRelatedLocation(Endpoint e, string name);
}
/**
@@ -67,9 +80,9 @@ signature module CandidateSig {
* implementations of endpoint characteristics exported by this module.
*/
module SharedCharacteristics<CandidateSig Candidate> {
predicate isSink(Candidate::Endpoint e, string label) { Candidate::isSink(e, label) }
predicate isSink = Candidate::isSink/2;
predicate isNeutral(Candidate::Endpoint e) { Candidate::isNeutral(e) }
predicate isNeutral = Candidate::isNeutral/1;
/**
* Holds if `sink` is a known sink of type `endpointType`.
@@ -94,8 +107,17 @@ module SharedCharacteristics<CandidateSig Candidate> {
not exists(getAReasonSinkExcluded(candidateSink, sinkType))
}
predicate hasMetadata(Candidate::Endpoint n, string metadata) {
Candidate::hasMetadata(n, metadata)
predicate hasMetadata = Candidate::hasMetadata/2;
/**
* If it exists, gets a related location for a given endpoint or candidate.
* If it doesn't exist, returns the candidate itself as a 'null' value.
*/
bindingset[name]
Candidate::RelatedLocation getRelatedLocationOrCandidate(Candidate::Endpoint e, string name) {
if exists(Candidate::getRelatedLocation(e, name))
then result = Candidate::getRelatedLocation(e, name)
else result = Candidate::toRelatedLocation(e)
}
/**