mirror of
https://github.com/github/codeql.git
synced 2026-04-26 01:05:15 +02:00
export related locations using notation
This commit is contained in:
@@ -3,6 +3,7 @@
|
||||
*/
|
||||
|
||||
private import java
|
||||
private import semmle.code.Location as Location
|
||||
private import semmle.code.java.dataflow.DataFlow
|
||||
private import semmle.code.java.dataflow.TaintTracking
|
||||
private import semmle.code.java.security.PathCreation
|
||||
@@ -23,10 +24,12 @@ module CandidatesImpl implements SharedCharacteristics::CandidateSig {
|
||||
|
||||
class NegativeEndpointType = AutomodelEndpointTypes::NegativeSinkType;
|
||||
|
||||
class RelatedLocation = Location::Top;
|
||||
|
||||
// Sanitizers are currently not modeled in MaD. TODO: check if this has large negative impact.
|
||||
predicate isSanitizer(Endpoint e, EndpointType t) { none() }
|
||||
|
||||
string getLocationString(Endpoint e) { result = e.getLocation().toString() }
|
||||
RelatedLocation toRelatedLocation(Endpoint e) { result = e.asParameter() }
|
||||
|
||||
predicate isKnownLabel(string label, string humanReadableLabel, EndpointType type) {
|
||||
label = "read-file" and
|
||||
@@ -87,11 +90,9 @@ module CandidatesImpl implements SharedCharacteristics::CandidateSig {
|
||||
predicate hasMetadata(Endpoint e, string metadata) {
|
||||
exists(
|
||||
string package, string type, boolean subtypes, string name, string signature, string ext,
|
||||
int input, string provenance, boolean isPublic, boolean isFinal, boolean isStatic,
|
||||
string callableJavaDoc
|
||||
int input, boolean isPublic, boolean isFinal, boolean isStatic
|
||||
|
|
||||
hasMetadata(e, package, type, name, signature, input, isFinal, isStatic, isPublic,
|
||||
callableJavaDoc) and
|
||||
hasMetadata(e, package, type, name, signature, input, isFinal, isStatic, isPublic) and
|
||||
(if isFinal = true or isStatic = true then subtypes = false else subtypes = true) and
|
||||
ext = "" and
|
||||
/*
|
||||
@@ -100,7 +101,6 @@ module CandidatesImpl implements SharedCharacteristics::CandidateSig {
|
||||
* a certain annotation.
|
||||
*/
|
||||
|
||||
provenance = "ai-generated" and
|
||||
metadata =
|
||||
"{" //
|
||||
+ "'Package': '" + package //
|
||||
@@ -109,14 +109,18 @@ module CandidatesImpl implements SharedCharacteristics::CandidateSig {
|
||||
+ ", 'Name': '" + name //
|
||||
+ ", 'ParamName': '" + e.toString() //
|
||||
+ "', 'Signature': '" + signature //
|
||||
+ "', 'Ext': '" + ext //
|
||||
+ "', 'Argument index': " + input //
|
||||
+ ", 'Provenance': '" + provenance //
|
||||
+ "', 'Is public': " + isPublic //
|
||||
+ "', 'Callable JavaDoc': '" + callableJavaDoc.replaceAll("'", "\"") //
|
||||
+ "'}" // TODO: Why are the curly braces added twice?
|
||||
)
|
||||
}
|
||||
|
||||
RelatedLocation getRelatedLocation(Endpoint e, string name) {
|
||||
name = "Callable-JavaDoc" and
|
||||
result = e.getEnclosingCallable().(Documentable).getJavadoc()
|
||||
or
|
||||
name = "Class-JavaDoc" and
|
||||
result = e.getEnclosingCallable().getDeclaringType().(Documentable).getJavadoc()
|
||||
}
|
||||
}
|
||||
|
||||
module CharacteristicsImpl = SharedCharacteristics::SharedCharacteristics<CandidatesImpl>;
|
||||
@@ -136,7 +140,7 @@ class Endpoint = CandidatesImpl::Endpoint;
|
||||
*/
|
||||
predicate hasMetadata(
|
||||
Endpoint n, string package, string type, string name, string signature, int input,
|
||||
boolean isFinal, boolean isStatic, boolean isPublic, string callableJavaDoc
|
||||
boolean isFinal, boolean isStatic, boolean isPublic
|
||||
) {
|
||||
exists(Callable callable |
|
||||
n.asParameter() = callable.getParameter(input) and
|
||||
@@ -154,10 +158,7 @@ predicate hasMetadata(
|
||||
) and
|
||||
name = callable.getSourceDeclaration().getName() and
|
||||
signature = ExternalFlow::paramsString(callable) and // TODO: Why are brackets being escaped (`\[\]` vs `[]`)?
|
||||
(if callable.isPublic() then isPublic = true else isPublic = false) and
|
||||
if exists(callable.(Documentable).getJavadoc())
|
||||
then callableJavaDoc = callable.(Documentable).getJavadoc().toString()
|
||||
else callableJavaDoc = ""
|
||||
(if callable.isPublic() then isPublic = true else isPublic = false)
|
||||
)
|
||||
}
|
||||
|
||||
|
||||
@@ -14,26 +14,29 @@
|
||||
|
||||
import AutomodelEndpointCharacteristics
|
||||
|
||||
from Endpoint sinkCandidate, string message
|
||||
from Endpoint endpoint, string message
|
||||
where
|
||||
not exists(CharacteristicsImpl::UninterestingToModelCharacteristic u |
|
||||
u.appliesToEndpoint(sinkCandidate)
|
||||
u.appliesToEndpoint(endpoint)
|
||||
) and
|
||||
// If a node is already a known sink for any of our existing ATM queries and is already modeled as a MaD sink, we
|
||||
// don't include it as a candidate. Otherwise, we might include it as a candidate for query A, but the model will
|
||||
// label it as a sink for one of the sink types of query B, for which it's already a known sink. This would result in
|
||||
// overlap between our detected sinks and the pre-existing modeling. We assume that, if a sink has already been
|
||||
// modeled in a MaD model, then it doesn't belong to any additional sink types, and we don't need to reexamine it.
|
||||
not CharacteristicsImpl::isSink(sinkCandidate, _) and
|
||||
not CharacteristicsImpl::isSink(endpoint, _) and
|
||||
// The message is the concatenation of all sink types for which this endpoint is known neither to be a sink nor to be
|
||||
// a non-sink, and we surface only endpoints that have at least one such sink type.
|
||||
message =
|
||||
strictconcat(AutomodelEndpointTypes::SinkType sinkType |
|
||||
not CharacteristicsImpl::isKnownSink(sinkCandidate, sinkType) and
|
||||
CharacteristicsImpl::isSinkCandidate(sinkCandidate, sinkType)
|
||||
not CharacteristicsImpl::isKnownSink(endpoint, sinkType) and
|
||||
CharacteristicsImpl::isSinkCandidate(endpoint, sinkType)
|
||||
|
|
||||
sinkType + ", "
|
||||
) + "\n" +
|
||||
// Extract the needed metadata for this endpoint.
|
||||
any(string metadata | CharacteristicsImpl::hasMetadata(sinkCandidate, metadata))
|
||||
select sinkCandidate, message
|
||||
any(string metadata | CharacteristicsImpl::hasMetadata(endpoint, metadata))
|
||||
select endpoint, message + "\nrelated locations: $@, $@", //
|
||||
CharacteristicsImpl::getRelatedLocationOrCandidate(endpoint, "Callable-JavaDoc"),
|
||||
"Callable-JavaDoc", //
|
||||
CharacteristicsImpl::getRelatedLocationOrCandidate(endpoint, "Class-JavaDoc"), "Class-JavaDoc" //
|
||||
|
||||
@@ -33,4 +33,7 @@ where
|
||||
characteristic + "\n" +
|
||||
// Extract the needed metadata for this endpoint.
|
||||
any(string metadata | CharacteristicsImpl::hasMetadata(endpoint, metadata))
|
||||
select endpoint, message
|
||||
select endpoint, message + "\nrelated locations: $@, $@",
|
||||
CharacteristicsImpl::getRelatedLocationOrCandidate(endpoint, "Callable-JavaDoc"),
|
||||
"Callable-JavaDoc", //
|
||||
CharacteristicsImpl::getRelatedLocationOrCandidate(endpoint, "Class-JavaDoc"), "Class-JavaDoc" //
|
||||
|
||||
@@ -33,4 +33,7 @@ where
|
||||
message =
|
||||
"Error: There are erroneous endpoints! Please check whether there's a codex-generated data extension file in `java/ql/lib/ext`."
|
||||
)
|
||||
select sink, message
|
||||
select sink, message + "\nrelated locations: $@, $@",
|
||||
CharacteristicsImpl::getRelatedLocationOrCandidate(sink, "Callable-JavaDoc"),
|
||||
"Callable-JavaDoc", //
|
||||
CharacteristicsImpl::getRelatedLocationOrCandidate(sink, "Class-JavaDoc"), "Class-JavaDoc" //
|
||||
|
||||
@@ -12,8 +12,20 @@ float mediumConfidence() { result = 0.6 }
|
||||
* "not any of the other known endpoint types".
|
||||
*/
|
||||
signature module CandidateSig {
|
||||
/**
|
||||
* An endpoint is a potential candidate for modelling. This will typically be bound to the language's
|
||||
* DataFlow node class, or a subtype thereof.
|
||||
*/
|
||||
class Endpoint;
|
||||
|
||||
/**
|
||||
* A related location for an endpoint. This will typically be bound to the supertype of all AST nodes.
|
||||
*/
|
||||
class RelatedLocation;
|
||||
|
||||
/**
|
||||
* A class label for an endpoint.
|
||||
*/
|
||||
class EndpointType;
|
||||
|
||||
/**
|
||||
@@ -21,8 +33,7 @@ signature module CandidateSig {
|
||||
*/
|
||||
class NegativeEndpointType extends EndpointType;
|
||||
|
||||
/** Gets the string representing the file+range of the endpoint. */
|
||||
string getLocationString(Endpoint e);
|
||||
RelatedLocation toRelatedLocation(Endpoint e);
|
||||
|
||||
/**
|
||||
* Defines what labels are known, and what endpoint type they correspond to.
|
||||
@@ -56,6 +67,8 @@ signature module CandidateSig {
|
||||
* The meta data will be passed on to the machine learning code by the extraction queries.
|
||||
*/
|
||||
predicate hasMetadata(Endpoint e, string metadata);
|
||||
|
||||
RelatedLocation getRelatedLocation(Endpoint e, string name);
|
||||
}
|
||||
|
||||
/**
|
||||
@@ -67,9 +80,9 @@ signature module CandidateSig {
|
||||
* implementations of endpoint characteristics exported by this module.
|
||||
*/
|
||||
module SharedCharacteristics<CandidateSig Candidate> {
|
||||
predicate isSink(Candidate::Endpoint e, string label) { Candidate::isSink(e, label) }
|
||||
predicate isSink = Candidate::isSink/2;
|
||||
|
||||
predicate isNeutral(Candidate::Endpoint e) { Candidate::isNeutral(e) }
|
||||
predicate isNeutral = Candidate::isNeutral/1;
|
||||
|
||||
/**
|
||||
* Holds if `sink` is a known sink of type `endpointType`.
|
||||
@@ -94,8 +107,17 @@ module SharedCharacteristics<CandidateSig Candidate> {
|
||||
not exists(getAReasonSinkExcluded(candidateSink, sinkType))
|
||||
}
|
||||
|
||||
predicate hasMetadata(Candidate::Endpoint n, string metadata) {
|
||||
Candidate::hasMetadata(n, metadata)
|
||||
predicate hasMetadata = Candidate::hasMetadata/2;
|
||||
|
||||
/**
|
||||
* If it exists, gets a related location for a given endpoint or candidate.
|
||||
* If it doesn't exist, returns the candidate itself as a 'null' value.
|
||||
*/
|
||||
bindingset[name]
|
||||
Candidate::RelatedLocation getRelatedLocationOrCandidate(Candidate::Endpoint e, string name) {
|
||||
if exists(Candidate::getRelatedLocation(e, name))
|
||||
then result = Candidate::getRelatedLocation(e, name)
|
||||
else result = Candidate::toRelatedLocation(e)
|
||||
}
|
||||
|
||||
/**
|
||||
|
||||
Reference in New Issue
Block a user