mirror of
https://github.com/github/codeql.git
synced 2026-04-28 02:05:14 +02:00
Merge pull request #13636 from github/tausbn/add-sink-alert-metrics-query
Java: Add metric queries for counting sinks coming from models
This commit is contained in:
187
java/ql/src/Telemetry/AutomodelAlertSinkUtil.qll
Normal file
187
java/ql/src/Telemetry/AutomodelAlertSinkUtil.qll
Normal file
@@ -0,0 +1,187 @@
|
||||
private import java
|
||||
private import semmle.code.java.dataflow.ExternalFlow as ExternalFlow
|
||||
private import semmle.code.java.dataflow.internal.DataFlow
|
||||
private import semmle.code.java.dataflow.TaintTracking
|
||||
private import semmle.code.java.security.RequestForgeryConfig
|
||||
private import semmle.code.java.security.CommandLineQuery
|
||||
private import semmle.code.java.security.SqlConcatenatedQuery
|
||||
private import semmle.code.java.security.SqlInjectionQuery
|
||||
private import semmle.code.java.security.UrlRedirectQuery
|
||||
private import semmle.code.java.security.TaintedPathQuery
|
||||
private import semmle.code.java.security.SqlInjectionQuery
|
||||
private import AutomodelJavaUtil
|
||||
|
||||
private newtype TSinkModel =
|
||||
MkSinkModel(
|
||||
string package, string type, boolean subtypes, string name, string signature, string ext,
|
||||
string input, string kind, string provenance
|
||||
) {
|
||||
ExternalFlow::sinkModel(package, type, subtypes, name, signature, ext, input, kind, provenance)
|
||||
}
|
||||
|
||||
class SinkModel extends TSinkModel {
|
||||
string package;
|
||||
string type;
|
||||
boolean subtypes;
|
||||
string name;
|
||||
string signature;
|
||||
string ext;
|
||||
string input;
|
||||
string kind;
|
||||
string provenance;
|
||||
|
||||
SinkModel() {
|
||||
this = MkSinkModel(package, type, subtypes, name, signature, ext, input, kind, provenance)
|
||||
}
|
||||
|
||||
/** Gets the package for this sink model. */
|
||||
string getPackage() { result = package }
|
||||
|
||||
/** Gets the type for this sink model. */
|
||||
string getType() { result = type }
|
||||
|
||||
/** Gets whether this sink model considers subtypes. */
|
||||
boolean getSubtypes() { result = subtypes }
|
||||
|
||||
/** Gets the name for this sink model. */
|
||||
string getName() { result = name }
|
||||
|
||||
/** Gets the signature for this sink model. */
|
||||
string getSignature() { result = signature }
|
||||
|
||||
/** Gets the input for this sink model. */
|
||||
string getInput() { result = input }
|
||||
|
||||
/** Gets the extension for this sink model. */
|
||||
string getExt() { result = ext }
|
||||
|
||||
/** Gets the kind for this sink model. */
|
||||
string getKind() { result = kind }
|
||||
|
||||
/** Gets the provenance for this sink model. */
|
||||
string getProvenance() { result = provenance }
|
||||
|
||||
/** Gets the number of instances of this sink model. */
|
||||
int getInstanceCount() { result = count(PotentialSinkModelExpr p | p.getSinkModel() = this) }
|
||||
|
||||
/** Gets a string representation of this sink model. */
|
||||
string toString() {
|
||||
result =
|
||||
"SinkModel(" + package + ", " + type + ", " + subtypes + ", " + name + ", " + signature + ", "
|
||||
+ ext + ", " + input + ", " + kind + ", " + provenance + ")"
|
||||
}
|
||||
|
||||
/** Gets a string representation of this sink model as it would appear in a Models-as-Data file. */
|
||||
string getRepr() {
|
||||
result =
|
||||
"\"" + package + "\", \"" + type + "\", " + pyBool(subtypes) + ", \"" + name + "\", \"" +
|
||||
signature + "\", \"" + ext + "\", \"" + input + "\", \"" + kind + "\", \"" + provenance +
|
||||
"\""
|
||||
}
|
||||
}
|
||||
|
||||
/** An expression that may correspond to a sink model. */
|
||||
class PotentialSinkModelExpr extends Expr {
|
||||
/**
|
||||
* Holds if this expression has the given signature. The signature should contain enough
|
||||
* information to determine a corresponding sink model, if one exists.
|
||||
*/
|
||||
pragma[nomagic]
|
||||
predicate hasSignature(
|
||||
string package, string type, boolean subtypes, string name, string signature, string input
|
||||
) {
|
||||
exists(Call call, Callable callable, int argIdx |
|
||||
call.getCallee() = callable and
|
||||
(
|
||||
this = call.getArgument(argIdx)
|
||||
or
|
||||
this = call.getQualifier() and argIdx = -1
|
||||
) and
|
||||
input = getArgumentForIndex(argIdx) and
|
||||
package = callable.getDeclaringType().getPackage().getName() and
|
||||
type = callable.getDeclaringType().getErasure().(RefType).nestedName() and
|
||||
subtypes = considerSubtypes(callable) and
|
||||
name = callable.getName() and
|
||||
signature = ExternalFlow::paramsString(callable)
|
||||
)
|
||||
}
|
||||
|
||||
/** Gets a sink model that corresponds to this expression. */
|
||||
SinkModel getSinkModel() {
|
||||
this.hasSignature(result.getPackage(), result.getType(), result.getSubtypes(), result.getName(),
|
||||
result.getSignature(), result.getInput())
|
||||
}
|
||||
}
|
||||
|
||||
private string pyBool(boolean b) {
|
||||
b = true and result = "True"
|
||||
or
|
||||
b = false and result = "False"
|
||||
}
|
||||
|
||||
/**
|
||||
* Gets a string representation of the existing sink model at the expression `e`, in the format in
|
||||
* which it would appear in a Models-as-Data file. Also restricts the provenance of the sink model
|
||||
* to be `ai-generated`.
|
||||
*/
|
||||
string getSinkModelRepr(PotentialSinkModelExpr e) {
|
||||
result = e.getSinkModel().getRepr() and
|
||||
e.getSinkModel().getProvenance() = "ai-generated"
|
||||
}
|
||||
|
||||
/**
|
||||
* Gets the string representation of a sink model in a format suitable for appending to an alert
|
||||
* message.
|
||||
*/
|
||||
string getSinkModelQueryRepr(PotentialSinkModelExpr e) {
|
||||
result = "\nsinkModel: " + getSinkModelRepr(e)
|
||||
}
|
||||
|
||||
/**
|
||||
* A parameterised module that takes a dataflow config, and exposes a predicate for counting the
|
||||
* number of AI-generated sink models that appear in alerts for that query.
|
||||
*/
|
||||
private module SinkTallier<DataFlow::ConfigSig Config> {
|
||||
module ConfigFlow = TaintTracking::Global<Config>;
|
||||
|
||||
predicate getSinkModelCount(int c, SinkModel s) {
|
||||
s = any(ConfigFlow::PathNode sink).getNode().asExpr().(PotentialSinkModelExpr).getSinkModel() and
|
||||
c =
|
||||
strictcount(ConfigFlow::PathNode sink |
|
||||
ConfigFlow::flowPath(_, sink) and
|
||||
s = sink.getNode().asExpr().(PotentialSinkModelExpr).getSinkModel()
|
||||
)
|
||||
}
|
||||
}
|
||||
|
||||
predicate sinkModelTallyPerQuery(string queryName, int alertCount, SinkModel sinkModel) {
|
||||
queryName = "java/request-forgery" and
|
||||
SinkTallier<RequestForgeryConfig>::getSinkModelCount(alertCount, sinkModel)
|
||||
or
|
||||
queryName = "java/command-line-injection" and
|
||||
exists(int c1, int c2 |
|
||||
SinkTallier<RemoteUserInputToArgumentToExecFlowConfig>::getSinkModelCount(c1, sinkModel) and
|
||||
SinkTallier<LocalUserInputToArgumentToExecFlowConfig>::getSinkModelCount(c2, sinkModel) and
|
||||
alertCount = c1 + c2
|
||||
)
|
||||
or
|
||||
queryName = "java/concatenated-sql-query" and
|
||||
SinkTallier<UncontrolledStringBuilderSourceFlowConfig>::getSinkModelCount(alertCount, sinkModel)
|
||||
or
|
||||
queryName = "java/ssrf" and
|
||||
SinkTallier<RequestForgeryConfig>::getSinkModelCount(alertCount, sinkModel)
|
||||
or
|
||||
queryName = "java/path-injection" and
|
||||
SinkTallier<TaintedPathConfig>::getSinkModelCount(alertCount, sinkModel)
|
||||
or
|
||||
queryName = "java/unvalidated-url-redirection" and
|
||||
SinkTallier<UrlRedirectConfig>::getSinkModelCount(alertCount, sinkModel)
|
||||
or
|
||||
queryName = "java/sql-injection" and
|
||||
SinkTallier<QueryInjectionFlowConfig>::getSinkModelCount(alertCount, sinkModel)
|
||||
}
|
||||
|
||||
predicate sinkModelTally(int alertCount, SinkModel sinkModel) {
|
||||
sinkModelTallyPerQuery(_, _, sinkModel) and
|
||||
alertCount = sum(int c | sinkModelTallyPerQuery(_, c, sinkModel))
|
||||
}
|
||||
16
java/ql/src/Telemetry/AutomodelAlertSinks.ql
Normal file
16
java/ql/src/Telemetry/AutomodelAlertSinks.ql
Normal file
@@ -0,0 +1,16 @@
|
||||
/**
|
||||
* @name Number of alerts per sink model
|
||||
* @description Counts the number of alerts using `ai-generated` sink models.
|
||||
* @kind table
|
||||
* @id java/ml/metrics-count-alerts-per-sink-model
|
||||
* @tags internal automodel metrics
|
||||
*/
|
||||
|
||||
private import java
|
||||
private import AutomodelAlertSinkUtil
|
||||
|
||||
from int alertCount, SinkModel s
|
||||
where sinkModelTally(alertCount, s) and s.getProvenance() = "ai-generated"
|
||||
select alertCount, s.getPackage() as package, s.getType() as type, s.getSubtypes() as subtypes,
|
||||
s.getName() as name, s.getSignature() as signature, s.getInput() as input, s.getExt() as ext,
|
||||
s.getKind() as kind, s.getProvenance() as provenance order by alertCount desc
|
||||
19
java/ql/src/Telemetry/AutomodelAlertSinksPerQuery.ql
Normal file
19
java/ql/src/Telemetry/AutomodelAlertSinksPerQuery.ql
Normal file
@@ -0,0 +1,19 @@
|
||||
/**
|
||||
* @name Number of alerts per sink model and query
|
||||
* @description Counts the number of alerts per query using `ai-generated` sink models.
|
||||
* @kind table
|
||||
* @id java/ml/metrics-count-alerts-per-sink-model-and-query
|
||||
* @tags internal automodel metrics
|
||||
*/
|
||||
|
||||
private import java
|
||||
private import AutomodelAlertSinkUtil
|
||||
|
||||
from string queryId, int alertCount, SinkModel s
|
||||
where
|
||||
sinkModelTallyPerQuery(queryId, alertCount, s) and
|
||||
s.getProvenance() = "ai-generated"
|
||||
select queryId, alertCount, s.getPackage() as package, s.getType() as type,
|
||||
s.getSubtypes() as subtypes, s.getName() as name, s.getSignature() as signature,
|
||||
s.getInput() as input, s.getExt() as ext, s.getKind() as kind, s.getProvenance() as provenance
|
||||
order by queryId, alertCount desc
|
||||
19
java/ql/src/Telemetry/AutomodelCountGeneratedSinks.ql
Normal file
19
java/ql/src/Telemetry/AutomodelCountGeneratedSinks.ql
Normal file
@@ -0,0 +1,19 @@
|
||||
/**
|
||||
* @name Number of instances of each sink model
|
||||
* @description Counts the number of instances of `ai-generated` sink models.
|
||||
* @kind table
|
||||
* @id java/ml/metrics-count-instances-per-sink-model
|
||||
* @tags internal automodel metrics
|
||||
*/
|
||||
|
||||
private import java
|
||||
private import AutomodelAlertSinkUtil
|
||||
|
||||
from int instanceCount, SinkModel s
|
||||
where
|
||||
instanceCount = s.getInstanceCount() and
|
||||
instanceCount > 0 and
|
||||
s.getProvenance() = "ai-generated"
|
||||
select instanceCount, s.getPackage() as package, s.getType() as type, s.getSubtypes() as subtypes,
|
||||
s.getName() as name, s.getSignature() as signature, s.getInput() as input, s.getExt() as ext,
|
||||
s.getKind() as kind, s.getProvenance() as provenance order by instanceCount desc
|
||||
@@ -56,6 +56,7 @@ string getArgumentForIndex(int index) {
|
||||
* It would technically be ok to always use the value 'true', but this would
|
||||
* break convention.
|
||||
*/
|
||||
pragma[nomagic]
|
||||
boolean considerSubtypes(Callable callable) {
|
||||
if
|
||||
callable.isStatic() or
|
||||
|
||||
62
java/ql/src/Telemetry/AutomodelSinkModelMrvaQueries.ql
Normal file
62
java/ql/src/Telemetry/AutomodelSinkModelMrvaQueries.ql
Normal file
@@ -0,0 +1,62 @@
|
||||
/**
|
||||
* This file contains query predicates for use when gathering metrics at scale using Multi Repo
|
||||
* Variant Analysis.
|
||||
*/
|
||||
|
||||
private import java
|
||||
private import AutomodelAlertSinkUtil
|
||||
|
||||
/**
|
||||
* Holds if `alertCount` is the number of alerts for the query with ID `queryId` for which the
|
||||
* sinks correspond to the given `ai-generated` sink model.
|
||||
*/
|
||||
query predicate sinkModelCountPerQuery(
|
||||
string queryId, int alertCount, string package, string type, boolean subtypes, string name,
|
||||
string signature, string input, string ext, string kind, string provenance
|
||||
) {
|
||||
exists(SinkModel s |
|
||||
sinkModelTallyPerQuery(queryId, alertCount, s) and
|
||||
s.getProvenance() = "ai-generated" and
|
||||
s.getPackage() = package and
|
||||
s.getType() = type and
|
||||
s.getSubtypes() = subtypes and
|
||||
s.getName() = name and
|
||||
s.getSignature() = signature and
|
||||
s.getInput() = input and
|
||||
s.getExt() = ext and
|
||||
s.getKind() = kind and
|
||||
s.getProvenance() = provenance
|
||||
)
|
||||
}
|
||||
|
||||
/**
|
||||
* Holds if `instanceCount` is the number of instances corresponding to the given `ai-generated`
|
||||
* sink model (as identified by the `package`, `name`, `input`, etc.).
|
||||
*/
|
||||
query predicate instanceCount(
|
||||
int instanceCount, string package, string type, boolean subtypes, string name, string signature,
|
||||
string input, string ext, string kind, string provenance
|
||||
) {
|
||||
exists(SinkModel s |
|
||||
instanceCount = s.getInstanceCount() and
|
||||
instanceCount > 0 and
|
||||
s.getProvenance() = "ai-generated" and
|
||||
s.getPackage() = package and
|
||||
s.getType() = type and
|
||||
s.getSubtypes() = subtypes and
|
||||
s.getName() = name and
|
||||
s.getSignature() = signature and
|
||||
s.getInput() = input and
|
||||
s.getExt() = ext and
|
||||
s.getKind() = kind and
|
||||
s.getProvenance() = provenance
|
||||
)
|
||||
}
|
||||
|
||||
// MRVA requires a select clause, so we repurpose it to tell us which query predicates had results.
|
||||
from string hadResults
|
||||
where
|
||||
sinkModelCountPerQuery(_, _, _, _, _, _, _, _, _, _, _) and hadResults = "sinkModelCountPerQuery"
|
||||
or
|
||||
instanceCount(_, _, _, _, _, _, _, _, _, _) and hadResults = "instanceCount"
|
||||
select hadResults
|
||||
Reference in New Issue
Block a user