mirror of
https://github.com/github/codeql.git
synced 2026-05-24 16:17:07 +02:00
Refactor the CodeQL code that extracts metadata for methods presented to Codex, to make it easy to add another field
This commit is contained in:
@@ -27,8 +27,7 @@ DataFlow::Node getSampleFromSampleRate(float rate) {
|
||||
|
||||
from
|
||||
DataFlow::Node endpoint, EndpointCharacteristics::EndpointCharacteristic characteristic,
|
||||
float confidence, string message, string package, string type, boolean subtypes, string name,
|
||||
string signature, string ext, string input, string provenance
|
||||
float confidence, string message
|
||||
where
|
||||
characteristic.appliesToEndpoint(endpoint) and
|
||||
confidence >= characteristic.highConfidence() and
|
||||
@@ -52,21 +51,10 @@ where
|
||||
characteristic2.hasImplications(positiveType, true, confidence2)
|
||||
) and
|
||||
endpoint = getSampleFromSampleRate(0.01) and
|
||||
exists(Callable callee, Call call, int index |
|
||||
endpoint.asExpr() = call.getArgument(index) and
|
||||
callee = call.getCallee() and
|
||||
package = callee.getDeclaringType().getPackage().getName() and
|
||||
type = callee.getDeclaringType().getName() and //TODO: Will this work for inner classes? Will it produce X$Y? What about lambdas? What about enums? What about interfaces? What about annotations?
|
||||
subtypes = true and // see https://github.slack.com/archives/CP9127VUK/p1673979477496069
|
||||
name = callee.getName() and // TODO: Will this work for constructors?
|
||||
signature = callee.paramsString() and
|
||||
ext = "" and // see https://github.slack.com/archives/CP9127VUK/p1673979477496069
|
||||
input = "Argument[" + index + "]" and // TODO: why are slashes added?
|
||||
provenance = "manual" // TODO
|
||||
) and
|
||||
message =
|
||||
"Non-sink of type " + characteristic + " with confidence " + confidence.toString() +
|
||||
"\n{'Package': '" + package + "', 'Type': '" + type + "', 'Subtypes': " + subtypes +
|
||||
", 'Name': '" + name + "', 'Signature': '" + signature + "', 'Ext': '" + ext +
|
||||
"', 'Argument index': '" + input + "', 'Provenance': '" + provenance + "'}" // TODO: Why are the curly braces added twice?
|
||||
"Non-sink of type " + characteristic + " with confidence " + confidence.toString() + "\n" +
|
||||
// Extract the needed metadata for this endpoint.
|
||||
any(string concatenatedMetadata |
|
||||
EndpointCharacteristics::hasMetaData(endpoint, concatenatedMetadata)
|
||||
)
|
||||
select endpoint, message
|
||||
|
||||
@@ -21,9 +21,7 @@ private import experimental.adaptivethreatmodeling.RequestForgeryATM as RequestF
|
||||
* the ML-gnerarated, noisy sinks will end up poluting the positive examples used in the prompt!
|
||||
*/
|
||||
|
||||
from
|
||||
DataFlow::Node sink, AtmConfig::AtmConfig config, string message, string package, string type,
|
||||
boolean subtypes, string name, string signature, string ext, string input, string provenance
|
||||
from DataFlow::Node sink, AtmConfig::AtmConfig config, string message
|
||||
where
|
||||
config.isKnownSink(sink) and
|
||||
// If there are _any_ erroneous endpoints, return nothing. This will prevent us from accidentally running this query
|
||||
@@ -33,21 +31,10 @@ where
|
||||
// treated by the actual query as a sanitizer, since the final logic is something like
|
||||
// `isSink(n) and not isSanitizer(n)`. We don't want to include such nodes as positive examples in the prompt.
|
||||
not config.isSanitizer(sink) and
|
||||
exists(Callable callee, Call call, int index |
|
||||
sink.asExpr() = call.getArgument(index) and
|
||||
callee = call.getCallee() and
|
||||
package = callee.getDeclaringType().getPackage().getName() and
|
||||
type = callee.getDeclaringType().getName() and //TODO: Will this work for inner classes? Will it produce X$Y? What about lambdas? What about enums? What about interfaces? What about annotations?
|
||||
subtypes = true and // see https://github.slack.com/archives/CP9127VUK/p1673979477496069
|
||||
name = callee.getName() and // TODO: Will this work for constructors?
|
||||
signature = callee.paramsString() and
|
||||
ext = "" and // see https://github.slack.com/archives/CP9127VUK/p1673979477496069
|
||||
input = "Argument[" + index + "]" and // TODO: why are slashes added?
|
||||
provenance = "manual" // TODO
|
||||
) and
|
||||
message =
|
||||
config.getASinkEndpointType().getDescription() + "\n{'Package': '" + package + "', 'Type': '" +
|
||||
type + "', 'Subtypes': " + subtypes + ", 'Name': '" + name + "', 'Signature': '" + signature +
|
||||
"', 'Ext': '" + ext + "', 'Argument index': '" + input + "', 'Provenance': '" + provenance +
|
||||
"'}" // TODO: Why are the curly braces added twice?
|
||||
config.getASinkEndpointType().getDescription() + "\n" +
|
||||
// Extract the needed metadata for this endpoint.
|
||||
any(string concatenatedMetadata |
|
||||
EndpointCharacteristics::hasMetaData(sink, concatenatedMetadata)
|
||||
)
|
||||
select sink, message
|
||||
|
||||
@@ -13,27 +13,14 @@
|
||||
|
||||
private import java
|
||||
import semmle.code.java.dataflow.TaintTracking
|
||||
private import experimental.adaptivethreatmodeling.EndpointCharacteristics as EndpointCharacteristics
|
||||
private import experimental.adaptivethreatmodeling.ATMConfig as AtmConfig
|
||||
private import experimental.adaptivethreatmodeling.SqlInjectionATM as SqlInjectionAtm
|
||||
private import experimental.adaptivethreatmodeling.TaintedPathATM as TaintedPathAtm
|
||||
private import experimental.adaptivethreatmodeling.RequestForgeryATM as RequestForgeryAtm
|
||||
|
||||
from
|
||||
DataFlow::Node sink, string message, string package, string type, boolean subtypes, string name,
|
||||
string signature, string ext, string input, string provenance
|
||||
from DataFlow::Node sink, string message
|
||||
where
|
||||
exists(Callable callee, Call call, int index |
|
||||
sink.asExpr() = call.getArgument(index) and
|
||||
callee = call.getCallee() and
|
||||
package = callee.getDeclaringType().getPackage().getName() and
|
||||
type = callee.getDeclaringType().getName() and //TODO: Will this work for inner classes? Will it produce X$Y? What about lambdas? What about enums? What about interfaces? What about annotations?
|
||||
subtypes = true and // see https://github.slack.com/archives/CP9127VUK/p1673979477496069
|
||||
name = callee.getName() and // TODO: Will this work for constructors?
|
||||
signature = callee.paramsString() and
|
||||
ext = "" and // see https://github.slack.com/archives/CP9127VUK/p1673979477496069
|
||||
input = "Argument[" + index + "]" and // TODO: why are slashes added?
|
||||
provenance = "manual" // TODO
|
||||
) and
|
||||
// The message is the concatenation of all relevant configs, and we surface only sinks that have at least one relevant
|
||||
// config.
|
||||
message =
|
||||
@@ -42,7 +29,9 @@ where
|
||||
sinkPathNode.getNode() = sink
|
||||
|
|
||||
config.getASinkEndpointType().getDescription(), ", "
|
||||
) + "\n{'Package': '" + package + "', 'Type': '" + type + "', 'Subtypes': " + subtypes +
|
||||
", 'Name': '" + name + "', 'Signature': '" + signature + "', 'Ext': '" + ext +
|
||||
"', 'Argument index': '" + input + "', 'Provenance': '" + provenance + "'}" // TODO: Why are the curly braces added twice?
|
||||
) + "\n" +
|
||||
// Extract the needed metadata for this endpoint.
|
||||
any(string concatenatedMetadata |
|
||||
EndpointCharacteristics::hasMetaData(sink, concatenatedMetadata)
|
||||
)
|
||||
select sink, message
|
||||
|
||||
Reference in New Issue
Block a user