Compare commits

...

1 Commits

Author SHA1 Message Date
tiferet
7162f28641 Subsample negative training samples down to 10% 2022-10-03 16:34:48 -07:00

View File

@@ -7,12 +7,30 @@
import javascript
import ExtractEndpointData as ExtractEndpointData
bindingset[rate]
DataFlow::Node getSampleFromSampleRate(float rate) {
exists(int r |
result =
rank[r](DataFlow::Node n, string path, int a, int b, int c, int d |
n.asExpr().getLocation().hasLocationInfo(path, a, b, c, d)
|
n order by path, a, b, c, d
) and
r % (1 / rate).ceil() = 0
)
}
query predicate endpoints(
DataFlow::Node endpoint, string queryName, string key, string value, string valueType
) {
ExtractEndpointData::endpoints(endpoint, queryName, key, value, valueType) and
// only select endpoints that are either Sink or NotASink
ExtractEndpointData::endpoints(endpoint, queryName, "sinkLabel", ["Sink", "NotASink"], "string") and
(
ExtractEndpointData::endpoints(endpoint, queryName, "sinkLabel", "Sink", "string")
or
ExtractEndpointData::endpoints(endpoint, queryName, "sinkLabel", "NotASink", "string") and
endpoint = getSampleFromSampleRate(0.1)
) and
// do not select endpoints filtered out by end-to-end evaluation
ExtractEndpointData::endpoints(endpoint, queryName, "isExcludedFromEndToEndEvaluation", "false",
"boolean") and