mirror of
https://github.com/github/codeql.git
synced 2025-12-24 04:36:35 +01:00
add typecheckable mechanism to enforce minimal set of metadata
This commit is contained in:
@@ -12,9 +12,11 @@
|
||||
* @tags internal automodel extract candidates
|
||||
*/
|
||||
|
||||
import AutomodelEndpointCharacteristics
|
||||
private import AutomodelFrameworkModeCharacteristics
|
||||
|
||||
from Endpoint endpoint, string message
|
||||
from
|
||||
Endpoint endpoint, string message, MetadataExtractor meta, string package, string type,
|
||||
boolean subtypes, string name, string signature, int input
|
||||
where
|
||||
not exists(CharacteristicsImpl::UninterestingToModelCharacteristic u |
|
||||
u.appliesToEndpoint(endpoint)
|
||||
@@ -25,18 +27,20 @@ where
|
||||
// overlap between our detected sinks and the pre-existing modeling. We assume that, if a sink has already been
|
||||
// modeled in a MaD model, then it doesn't belong to any additional sink types, and we don't need to reexamine it.
|
||||
not CharacteristicsImpl::isSink(endpoint, _) and
|
||||
meta.hasMetadata(endpoint, package, type, subtypes, name, signature, input) and
|
||||
// The message is the concatenation of all sink types for which this endpoint is known neither to be a sink nor to be
|
||||
// a non-sink, and we surface only endpoints that have at least one such sink type.
|
||||
message =
|
||||
strictconcat(AutomodelEndpointTypes::SinkType sinkType |
|
||||
not CharacteristicsImpl::isKnownSink(endpoint, sinkType) and
|
||||
CharacteristicsImpl::isSinkCandidate(endpoint, sinkType)
|
||||
|
|
||||
sinkType + ", "
|
||||
) + "\n" +
|
||||
// Extract the needed metadata for this endpoint.
|
||||
any(string metadata | CharacteristicsImpl::hasMetadata(endpoint, metadata))
|
||||
select endpoint, message + "\nrelated locations: $@, $@.", //
|
||||
not CharacteristicsImpl::isKnownSink(endpoint, sinkType) and
|
||||
CharacteristicsImpl::isSinkCandidate(endpoint, sinkType)
|
||||
|
|
||||
sinkType + ", "
|
||||
)
|
||||
select endpoint,
|
||||
message + "\nrelated locations: $@, $@." + "\nmetadata: $@, $@, $@, $@, $@, $@.", //
|
||||
CharacteristicsImpl::getRelatedLocationOrCandidate(endpoint, "Callable-JavaDoc"),
|
||||
"Callable-JavaDoc", //
|
||||
CharacteristicsImpl::getRelatedLocationOrCandidate(endpoint, "Class-JavaDoc"), "Class-JavaDoc" //
|
||||
"Callable-JavaDoc", CharacteristicsImpl::getRelatedLocationOrCandidate(endpoint, "Class-JavaDoc"),
|
||||
"Class-JavaDoc", //
|
||||
package, "package", type, "type", subtypes.toString(), "subtypes", name, "name", signature,
|
||||
"signature", input.toString(), "input" //
|
||||
|
||||
@@ -8,10 +8,13 @@
|
||||
* @tags internal automodel extract examples negative
|
||||
*/
|
||||
|
||||
import AutomodelEndpointCharacteristics
|
||||
import AutomodelEndpointTypes
|
||||
private import AutomodelFrameworkModeCharacteristics
|
||||
private import AutomodelEndpointTypes
|
||||
|
||||
from Endpoint endpoint, EndpointCharacteristic characteristic, float confidence, string message
|
||||
from
|
||||
Endpoint endpoint, EndpointCharacteristic characteristic, float confidence, string message,
|
||||
MetadataExtractor meta, string package, string type, boolean subtypes, string name,
|
||||
string signature, int input
|
||||
where
|
||||
characteristic.appliesToEndpoint(endpoint) and
|
||||
confidence >= SharedCharacteristics::highConfidence() and
|
||||
@@ -19,6 +22,7 @@ where
|
||||
// Exclude endpoints that have contradictory endpoint characteristics, because we only want examples we're highly
|
||||
// certain about in the prompt.
|
||||
not erroneousEndpoints(endpoint, _, _, _, _, false) and
|
||||
meta.hasMetadata(endpoint, package, type, subtypes, name, signature, input) and
|
||||
// It's valid for a node to satisfy the logic for both `isSink` and `isSanitizer`, but in that case it will be
|
||||
// treated by the actual query as a sanitizer, since the final logic is something like
|
||||
// `isSink(n) and not isSanitizer(n)`. We don't want to include such nodes as negative examples in the prompt, because
|
||||
@@ -29,11 +33,11 @@ where
|
||||
confidence2 >= SharedCharacteristics::maximalConfidence() and
|
||||
characteristic2.hasImplications(positiveType, true, confidence2)
|
||||
) and
|
||||
message =
|
||||
characteristic + "\n" +
|
||||
// Extract the needed metadata for this endpoint.
|
||||
any(string metadata | CharacteristicsImpl::hasMetadata(endpoint, metadata))
|
||||
select endpoint, message + "\nrelated locations: $@, $@.",
|
||||
message = characteristic
|
||||
select endpoint,
|
||||
message + "\nrelated locations: $@, $@." + "\nmetadata: $@, $@, $@, $@, $@, $@.", //
|
||||
CharacteristicsImpl::getRelatedLocationOrCandidate(endpoint, "Callable-JavaDoc"),
|
||||
"Callable-JavaDoc", //
|
||||
CharacteristicsImpl::getRelatedLocationOrCandidate(endpoint, "Class-JavaDoc"), "Class-JavaDoc" //
|
||||
"Callable-JavaDoc", CharacteristicsImpl::getRelatedLocationOrCandidate(endpoint, "Class-JavaDoc"),
|
||||
"Class-JavaDoc", //
|
||||
package, "package", type, "type", subtypes.toString(), "subtypes", name, "name", signature,
|
||||
"signature", input.toString(), "input" //
|
||||
|
||||
@@ -8,25 +8,23 @@
|
||||
* @tags internal automodel extract examples positive
|
||||
*/
|
||||
|
||||
private import java
|
||||
private import semmle.code.java.security.ExternalAPIs as ExternalAPIs
|
||||
private import AutomodelEndpointCharacteristics
|
||||
private import AutomodelFrameworkModeCharacteristics
|
||||
private import AutomodelEndpointTypes
|
||||
|
||||
from Endpoint sink, SinkType sinkType, string message
|
||||
from
|
||||
Endpoint endpoint, SinkType sinkType, MetadataExtractor meta, string package, string type,
|
||||
boolean subtypes, string name, string signature, int input
|
||||
where
|
||||
// Exclude endpoints that have contradictory endpoint characteristics, because we only want examples we're highly
|
||||
// certain about in the prompt.
|
||||
not erroneousEndpoints(sink, _, _, _, _, false) and
|
||||
not erroneousEndpoints(endpoint, _, _, _, _, false) and
|
||||
meta.hasMetadata(endpoint, package, type, subtypes, name, signature, input) and
|
||||
// Extract positive examples of sinks belonging to the existing ATM query configurations.
|
||||
(
|
||||
CharacteristicsImpl::isKnownSink(sink, sinkType) and
|
||||
message =
|
||||
sinkType + "\n" +
|
||||
// Extract the needed metadata for this endpoint.
|
||||
any(string metadata | CharacteristicsImpl::hasMetadata(sink, metadata))
|
||||
)
|
||||
select sink, message + "\nrelated locations: $@, $@.",
|
||||
CharacteristicsImpl::getRelatedLocationOrCandidate(sink, "Callable-JavaDoc"),
|
||||
"Callable-JavaDoc", //
|
||||
CharacteristicsImpl::getRelatedLocationOrCandidate(sink, "Class-JavaDoc"), "Class-JavaDoc" //
|
||||
CharacteristicsImpl::isKnownSink(endpoint, sinkType)
|
||||
select endpoint,
|
||||
sinkType + "\nrelated locations: $@, $@." + "\nmetadata: $@, $@, $@, $@, $@, $@.", //
|
||||
CharacteristicsImpl::getRelatedLocationOrCandidate(endpoint, "Callable-JavaDoc"),
|
||||
"Callable-JavaDoc", CharacteristicsImpl::getRelatedLocationOrCandidate(endpoint, "Class-JavaDoc"),
|
||||
"Class-JavaDoc", //
|
||||
package, "package", type, "type", subtypes.toString(), "subtypes", name, "name", signature,
|
||||
"signature", input.toString(), "input" //
|
||||
|
||||
@@ -17,6 +17,22 @@ private import semmle.code.java.dataflow.internal.ModelExclusions as ModelExclus
|
||||
import AutomodelSharedCharacteristics as SharedCharacteristics
|
||||
import AutomodelEndpointTypes as AutomodelEndpointTypes
|
||||
|
||||
Callable getCallable(DataFlow::ParameterNode e) { result = e.getEnclosingCallable() }
|
||||
|
||||
/**
|
||||
* A meta data extractor. Any Java extraction mode needs to implement exactly
|
||||
* one instance of this class.
|
||||
*/
|
||||
abstract class MetadataExtractor extends string {
|
||||
bindingset[this]
|
||||
MetadataExtractor() { any() }
|
||||
|
||||
abstract predicate hasMetadata(
|
||||
DataFlow::ParameterNode e, string package, string type, boolean subtypes, string name,
|
||||
string signature, int input
|
||||
);
|
||||
}
|
||||
|
||||
module FrameworkCandidatesImpl implements SharedCharacteristics::CandidateSig {
|
||||
class Endpoint = DataFlow::ParameterNode;
|
||||
|
||||
@@ -87,26 +103,6 @@ module FrameworkCandidatesImpl implements SharedCharacteristics::CandidateSig {
|
||||
exists(int paramIdx | e.isParameterOf(_, paramIdx) | input = "Argument[" + paramIdx + "]")
|
||||
}
|
||||
|
||||
predicate hasMetadata(Endpoint e, string metadata) {
|
||||
exists(
|
||||
string package, string type, boolean subtypes, string name, string signature, int input,
|
||||
boolean isPublic, boolean isFinal, boolean isStatic
|
||||
|
|
||||
hasMetadata(e, package, type, name, signature, input, isFinal, isStatic, isPublic) and
|
||||
(if isFinal = true or isStatic = true then subtypes = false else subtypes = true) and
|
||||
metadata =
|
||||
"{" //
|
||||
+ "'Package': '" + package //
|
||||
+ "', 'Type': '" + type //
|
||||
+ "', 'Subtypes': " + subtypes //
|
||||
+ ", 'Name': '" + name //
|
||||
+ ", 'ParamName': '" + e.toString() //
|
||||
+ "', 'Signature': '" + signature //
|
||||
+ "', 'Argument index': " + input //
|
||||
+ "'}" // TODO: Why are the curly braces added twice?
|
||||
)
|
||||
}
|
||||
|
||||
RelatedLocation getRelatedLocation(Endpoint e, string name) {
|
||||
name = "Callable-JavaDoc" and
|
||||
result = getCallable(e).(Documentable).getJavadoc()
|
||||
@@ -116,8 +112,6 @@ module FrameworkCandidatesImpl implements SharedCharacteristics::CandidateSig {
|
||||
}
|
||||
}
|
||||
|
||||
Callable getCallable(Endpoint e) { result = e.getEnclosingCallable() }
|
||||
|
||||
module CharacteristicsImpl = SharedCharacteristics::SharedCharacteristics<FrameworkCandidatesImpl>;
|
||||
|
||||
class EndpointCharacteristic = CharacteristicsImpl::EndpointCharacteristic;
|
||||
@@ -129,32 +123,32 @@ class Endpoint = FrameworkCandidatesImpl::Endpoint;
|
||||
*/
|
||||
|
||||
/**
|
||||
* Holds if `n` has the given metadata.
|
||||
*
|
||||
* This is a helper function to extract and export needed information about each endpoint.
|
||||
* A MetadataExtractor that extracts metadata for framework mode.
|
||||
*/
|
||||
predicate hasMetadata(
|
||||
Endpoint n, string package, string type, string name, string signature, int input,
|
||||
boolean isFinal, boolean isStatic, boolean isPublic
|
||||
) {
|
||||
exists(Callable callable |
|
||||
n.asParameter() = callable.getParameter(input) and
|
||||
package = callable.getDeclaringType().getPackage().getName() and
|
||||
type = callable.getDeclaringType().getErasure().(RefType).nestedName() and
|
||||
(
|
||||
if callable.isStatic() or callable.getDeclaringType().isStatic()
|
||||
then isStatic = true
|
||||
else isStatic = false
|
||||
) and
|
||||
(
|
||||
if callable.isFinal() or callable.getDeclaringType().isFinal()
|
||||
then isFinal = true
|
||||
else isFinal = false
|
||||
) and
|
||||
name = callable.getSourceDeclaration().getName() and
|
||||
signature = ExternalFlow::paramsString(callable) and // TODO: Why are brackets being escaped (`\[\]` vs `[]`)?
|
||||
(if callable.isPublic() then isPublic = true else isPublic = false)
|
||||
)
|
||||
class FrameworkModeMetadataExtractor extends MetadataExtractor {
|
||||
FrameworkModeMetadataExtractor() { this = "FrameworkModeMetadataExtractor" }
|
||||
|
||||
override predicate hasMetadata(
|
||||
Endpoint e, string package, string type, boolean subtypes, string name, string signature,
|
||||
int input
|
||||
) {
|
||||
exists(Callable callable |
|
||||
e.asParameter() = callable.getParameter(input) and
|
||||
package = callable.getDeclaringType().getPackage().getName() and
|
||||
type = callable.getDeclaringType().getErasure().(RefType).nestedName() and
|
||||
(
|
||||
if
|
||||
callable.isStatic() or
|
||||
callable.getDeclaringType().isStatic() or
|
||||
callable.isFinal() or
|
||||
callable.getDeclaringType().isFinal()
|
||||
then subtypes = true
|
||||
else subtypes = false
|
||||
) and
|
||||
name = e.toString() and
|
||||
signature = ExternalFlow::paramsString(callable)
|
||||
)
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
|
||||
@@ -55,19 +55,6 @@ signature module CandidateSig {
|
||||
*/
|
||||
predicate isNeutral(Endpoint e);
|
||||
|
||||
/**
|
||||
* Holds if `e` has the given metadata.
|
||||
*
|
||||
* This is a helper function to extract and export needed information about each endpoint in the sink candidate query
|
||||
* as well as the queries that extract positive and negative examples for the prompt / training set. The metadata is
|
||||
* extracted as a string in the format of a Python dictionary, eg.:
|
||||
*
|
||||
* `{'Package': 'com.foo.util', 'Type': 'HelperClass', ... }`.
|
||||
*
|
||||
* The meta data will be passed on to the machine learning code by the extraction queries.
|
||||
*/
|
||||
predicate hasMetadata(Endpoint e, string metadata);
|
||||
|
||||
RelatedLocation getRelatedLocation(Endpoint e, string name);
|
||||
}
|
||||
|
||||
@@ -107,8 +94,6 @@ module SharedCharacteristics<CandidateSig Candidate> {
|
||||
not exists(getAReasonSinkExcluded(candidateSink, sinkType))
|
||||
}
|
||||
|
||||
predicate hasMetadata = Candidate::hasMetadata/2;
|
||||
|
||||
/**
|
||||
* If it exists, gets a related location for a given endpoint or candidate.
|
||||
* If it doesn't exist, returns the candidate itself as a 'null' value.
|
||||
|
||||
Reference in New Issue
Block a user