mirror of
https://github.com/github/codeql.git
synced 2026-05-21 14:47:10 +02:00
When extracting positive and negative examples for the Java prompt, extract the data used in the MaD extensible predicate.
This will enable the codex prompt to optionally use this data in additional columns.
This commit is contained in:
@@ -27,7 +27,8 @@ DataFlow::Node getSampleFromSampleRate(float rate) {
|
||||
|
||||
from
|
||||
DataFlow::Node endpoint, EndpointCharacteristics::EndpointCharacteristic characteristic,
|
||||
float confidence
|
||||
float confidence, string message, string package, string type, boolean subtypes, string name,
|
||||
string signature, string ext, string input, string provenance
|
||||
where
|
||||
characteristic.appliesToEndpoint(endpoint) and
|
||||
confidence >= characteristic.highConfidence() and
|
||||
@@ -50,5 +51,22 @@ where
|
||||
not positiveType instanceof NegativeType and
|
||||
characteristic2.hasImplications(positiveType, true, confidence2)
|
||||
) and
|
||||
endpoint = getSampleFromSampleRate(0.01)
|
||||
select endpoint, "Non-sink of type " + characteristic + " with confidence " + confidence.toString()
|
||||
endpoint = getSampleFromSampleRate(0.01) and
|
||||
exists(Callable callee, Call call, int index |
|
||||
endpoint.asExpr() = call.getArgument(index) and
|
||||
callee = call.getCallee() and
|
||||
package = callee.getDeclaringType().getPackage().getName() and
|
||||
type = callee.getDeclaringType().getName() and //TODO: Will this work for inner classes? Will it produce X$Y? What about lambdas? What about enums? What about interfaces? What about annotations?
|
||||
subtypes = true and // see https://github.slack.com/archives/CP9127VUK/p1673979477496069
|
||||
name = callee.getName() and // TODO: Will this work for constructors?
|
||||
signature = callee.paramsString() and
|
||||
ext = "" and // see https://github.slack.com/archives/CP9127VUK/p1673979477496069
|
||||
input = "Argument[" + index + "]" and // TODO: why are slashes added?
|
||||
provenance = "manual" // TODO
|
||||
) and
|
||||
message =
|
||||
"Non-sink of type " + characteristic + " with confidence " + confidence.toString() +
|
||||
"\n{'Package': '" + package + "', 'Type': '" + type + "', 'Subtypes': " + subtypes +
|
||||
", 'Name': '" + name + "', 'Signature': '" + signature + "', 'Ext': '" + ext +
|
||||
"', 'Argument index': '" + input + "', 'Provenance': '" + provenance + "'}" // TODO: Why are the curly braces added twice?
|
||||
select endpoint, message
|
||||
|
||||
@@ -21,7 +21,9 @@ private import experimental.adaptivethreatmodeling.RequestForgeryATM as RequestF
|
||||
* the ML-gnerarated, noisy sinks will end up poluting the positive examples used in the prompt!
|
||||
*/
|
||||
|
||||
from DataFlow::Node sink, AtmConfig::AtmConfig config
|
||||
from
|
||||
DataFlow::Node sink, AtmConfig::AtmConfig config, string message, string package, string type,
|
||||
boolean subtypes, string name, string signature, string ext, string input, string provenance
|
||||
where
|
||||
config.isKnownSink(sink) and
|
||||
// If there are _any_ erroneous endpoints, return nothing. This will prevent us from accidentally running this query
|
||||
@@ -30,5 +32,22 @@ where
|
||||
// It's valid for a node to satisfy the logic for both `isSink` and `isSanitizer`, but in that case it will be
|
||||
// treated by the actual query as a sanitizer, since the final logic is something like
|
||||
// `isSink(n) and not isSanitizer(n)`. We don't want to include such nodes as positive examples in the prompt.
|
||||
not config.isSanitizer(sink)
|
||||
select sink, config.getASinkEndpointType().getDescription()
|
||||
not config.isSanitizer(sink) and
|
||||
exists(Callable callee, Call call, int index |
|
||||
sink.asExpr() = call.getArgument(index) and
|
||||
callee = call.getCallee() and
|
||||
package = callee.getDeclaringType().getPackage().getName() and
|
||||
type = callee.getDeclaringType().getName() and //TODO: Will this work for inner classes? Will it produce X$Y? What about lambdas? What about enums? What about interfaces? What about annotations?
|
||||
subtypes = true and // see https://github.slack.com/archives/CP9127VUK/p1673979477496069
|
||||
name = callee.getName() and // TODO: Will this work for constructors?
|
||||
signature = callee.paramsString() and
|
||||
ext = "" and // see https://github.slack.com/archives/CP9127VUK/p1673979477496069
|
||||
input = "Argument[" + index + "]" and // TODO: why are slashes added?
|
||||
provenance = "manual" // TODO
|
||||
) and
|
||||
message =
|
||||
config.getASinkEndpointType().getDescription() + "\n{'Package': '" + package + "', 'Type': '" +
|
||||
type + "', 'Subtypes': " + subtypes + ", 'Name': '" + name + "', 'Signature': '" + signature +
|
||||
"', 'Ext': '" + ext + "', 'Argument index': '" + input + "', 'Provenance': '" + provenance +
|
||||
"'}" // TODO: Why are the curly braces added twice?
|
||||
select sink, message
|
||||
|
||||
@@ -42,7 +42,7 @@ where
|
||||
sinkPathNode.getNode() = sink
|
||||
|
|
||||
config.getASinkEndpointType().getDescription(), ", "
|
||||
) + "\n{'package': '" + package + "', 'type': '" + type + "', 'subtypes': " + subtypes +
|
||||
", 'name': '" + name + "', 'signature': '" + signature + "', 'ext': '" + ext + "', 'input': '"
|
||||
+ input + "', 'provenance': '" + provenance + "'}" // TODO: Why are the curly braces added twice?
|
||||
) + "\n{'Package': '" + package + "', 'Type': '" + type + "', 'Subtypes': " + subtypes +
|
||||
", 'Name': '" + name + "', 'Signature': '" + signature + "', 'Ext': '" + ext +
|
||||
"', 'Argument index': '" + input + "', 'Provenance': '" + provenance + "'}" // TODO: Why are the curly braces added twice?
|
||||
select sink, message
|
||||
|
||||
Reference in New Issue
Block a user