mirror of
https://github.com/github/codeql.git
synced 2025-12-16 16:53:25 +01:00
Merge pull request #7800 from github/henrymercer/js-atm-add-model-building-pack
JS: Add model building pack for ML-powered queries
This commit is contained in:
@@ -6,6 +6,7 @@
|
||||
"*/ql/examples/qlpack.yml",
|
||||
"cpp/ql/test/query-tests/Security/CWE/CWE-190/semmle/tainted/qlpack.yml",
|
||||
"javascript/ql/experimental/adaptivethreatmodeling/lib/qlpack.yml",
|
||||
"javascript/ql/experimental/adaptivethreatmodeling/modelbuilding/qlpack.yml",
|
||||
"javascript/ql/experimental/adaptivethreatmodeling/src/qlpack.yml",
|
||||
"csharp/ql/campaigns/Solorigate/lib/qlpack.yml",
|
||||
"csharp/ql/campaigns/Solorigate/src/qlpack.yml",
|
||||
|
||||
@@ -0,0 +1,67 @@
|
||||
/**
|
||||
* @name Debug result inclusion
|
||||
* @description Use this query to understand why some alerts are included or excluded from the
|
||||
* results of boosted queries. The results for this query are the union of the alerts
|
||||
* generated by each boosted query. Each alert includes an explanation why it was
|
||||
* included or excluded for each of the four security queries.
|
||||
* @kind problem
|
||||
* @problem.severity error
|
||||
* @id adaptive-threat-modeling/js/debug-result-inclusion
|
||||
*/
|
||||
|
||||
import javascript
|
||||
import experimental.adaptivethreatmodeling.ATMConfig
|
||||
import extraction.ExtractEndpointData
|
||||
|
||||
string getAReasonSinkExcluded(DataFlow::Node sinkCandidate, Query query) {
|
||||
query instanceof NosqlInjectionQuery and
|
||||
result = NosqlInjectionATM::SinkEndpointFilter::getAReasonSinkExcluded(sinkCandidate)
|
||||
or
|
||||
query instanceof SqlInjectionQuery and
|
||||
result = SqlInjectionATM::SinkEndpointFilter::getAReasonSinkExcluded(sinkCandidate)
|
||||
or
|
||||
query instanceof TaintedPathQuery and
|
||||
result = TaintedPathATM::SinkEndpointFilter::getAReasonSinkExcluded(sinkCandidate)
|
||||
or
|
||||
query instanceof XssQuery and
|
||||
result = XssATM::SinkEndpointFilter::getAReasonSinkExcluded(sinkCandidate)
|
||||
}
|
||||
|
||||
pragma[inline]
|
||||
string getDescriptionForAlertCandidate(
|
||||
DataFlow::Node sourceCandidate, DataFlow::Node sinkCandidate, Query query
|
||||
) {
|
||||
result = "excluded[reason=" + getAReasonSinkExcluded(sinkCandidate, query) + "]"
|
||||
or
|
||||
getATMCfg(query).isKnownSink(sinkCandidate) and
|
||||
result = "excluded[reason=known-sink]"
|
||||
or
|
||||
not exists(getAReasonSinkExcluded(sinkCandidate, query)) and
|
||||
not getDataFlowCfg(query).hasFlow(sourceCandidate, sinkCandidate) and
|
||||
(
|
||||
if
|
||||
getDataFlowCfg(query).isSource(sourceCandidate) or
|
||||
getDataFlowCfg(query).isSource(sourceCandidate, _)
|
||||
then result = "no flow"
|
||||
else result = "not a known source"
|
||||
)
|
||||
or
|
||||
getDataFlowCfg(query).hasFlow(sourceCandidate, sinkCandidate) and
|
||||
result = "included"
|
||||
}
|
||||
|
||||
pragma[inline]
|
||||
string getDescriptionForAlert(DataFlow::Node sourceCandidate, DataFlow::Node sinkCandidate) {
|
||||
result =
|
||||
concat(Query query |
|
||||
|
|
||||
query.getName() + ": " +
|
||||
getDescriptionForAlertCandidate(sourceCandidate, sinkCandidate, query), ", "
|
||||
)
|
||||
}
|
||||
|
||||
from DataFlow::Configuration cfg, DataFlow::Node source, DataFlow::Node sink
|
||||
where cfg.hasFlow(source, sink)
|
||||
select sink,
|
||||
"This is an ATM result that may depend on $@ [" + getDescriptionForAlert(source, sink) + "]",
|
||||
source, "a user-provided value"
|
||||
@@ -0,0 +1,11 @@
|
||||
private import javascript
|
||||
private import extraction.Exclusions as Exclusions
|
||||
|
||||
/**
|
||||
* Holds if the flow from `source` to `sink` should be excluded from the results of an end-to-end
|
||||
* evaluation query.
|
||||
*/
|
||||
pragma[inline]
|
||||
predicate isFlowExcluded(DataFlow::Node source, DataFlow::Node sink) {
|
||||
Exclusions::isFileExcluded([source.getFile(), sink.getFile()])
|
||||
}
|
||||
@@ -0,0 +1,27 @@
|
||||
/**
|
||||
* EndpointScoresIntegrationTest.ql
|
||||
*
|
||||
* Extract scores for each test endpoint that is an argument to a function call in the database.
|
||||
* This is used by integration tests to verify that QL and the modeling codebase agree on the scores
|
||||
* of a set of test endpoints.
|
||||
*/
|
||||
|
||||
import javascript
|
||||
import experimental.adaptivethreatmodeling.ATMConfig
|
||||
import experimental.adaptivethreatmodeling.FeaturizationConfig
|
||||
import experimental.adaptivethreatmodeling.EndpointScoring::ModelScoring as ModelScoring
|
||||
|
||||
/**
|
||||
* A featurization config that featurizes endpoints that are arguments to function calls.
|
||||
*
|
||||
* This should only be used in extraction queries and tests.
|
||||
*/
|
||||
class FunctionArgumentFeaturizationConfig extends FeaturizationConfig {
|
||||
FunctionArgumentFeaturizationConfig() { this = "FunctionArgumentFeaturization" }
|
||||
|
||||
override DataFlow::Node getAnEndpointToFeaturize() {
|
||||
exists(DataFlow::CallNode call | result = call.getAnArgument())
|
||||
}
|
||||
}
|
||||
|
||||
query predicate endpointScores = ModelScoring::endpointScores/3;
|
||||
@@ -0,0 +1,16 @@
|
||||
/**
|
||||
* ModelCheck.ql
|
||||
*
|
||||
* Returns checksums of ATM models.
|
||||
*/
|
||||
|
||||
/**
|
||||
* The `availableMlModels` template predicate.
|
||||
*
|
||||
* This is populated by the evaluator with metadata for the available machine learning models.
|
||||
*/
|
||||
external predicate availableMlModels(
|
||||
string modelChecksum, string modelLanguage, string modelName, string modelType
|
||||
);
|
||||
|
||||
select any(string checksum | availableMlModels(checksum, "javascript", _, _))
|
||||
@@ -0,0 +1,24 @@
|
||||
/**
|
||||
* NosqlInjection.ql
|
||||
*
|
||||
* Version of the standard NoSQL injection query with an output relation ready to plug into the
|
||||
* evaluation pipeline.
|
||||
*/
|
||||
|
||||
import semmle.javascript.security.dataflow.NosqlInjection
|
||||
import EndToEndEvaluation as EndToEndEvaluation
|
||||
|
||||
from
|
||||
DataFlow::Configuration cfg, DataFlow::Node source, DataFlow::Node sink, string filePathSink,
|
||||
int startLineSink, int endLineSink, int startColumnSink, int endColumnSink, string filePathSource,
|
||||
int startLineSource, int endLineSource, int startColumnSource, int endColumnSource
|
||||
where
|
||||
cfg instanceof NosqlInjection::Configuration and
|
||||
cfg.hasFlow(source, sink) and
|
||||
not EndToEndEvaluation::isFlowExcluded(source, sink) and
|
||||
sink.hasLocationInfo(filePathSink, startLineSink, startColumnSink, endLineSink, endColumnSink) and
|
||||
source
|
||||
.hasLocationInfo(filePathSource, startLineSource, startColumnSource, endLineSource,
|
||||
endColumnSource)
|
||||
select source, startLineSource, startColumnSource, endLineSource, endColumnSource, filePathSource,
|
||||
sink, startLineSink, startColumnSink, endLineSink, endColumnSink, filePathSink
|
||||
@@ -0,0 +1,28 @@
|
||||
/**
|
||||
* NosqlInjectionATM.ql
|
||||
*
|
||||
* Version of the boosted NoSQL injection query with an output relation ready to plug into the
|
||||
* evaluation pipeline.
|
||||
*/
|
||||
|
||||
import ATM::ResultsInfo
|
||||
import EndToEndEvaluation as EndToEndEvaluation
|
||||
import experimental.adaptivethreatmodeling.NosqlInjectionATM
|
||||
|
||||
from
|
||||
DataFlow::Configuration cfg, DataFlow::Node source, DataFlow::Node sink, string filePathSink,
|
||||
int startLineSink, int endLineSink, int startColumnSink, int endColumnSink, string filePathSource,
|
||||
int startLineSource, int endLineSource, int startColumnSource, int endColumnSource, float score
|
||||
where
|
||||
cfg.hasFlow(source, sink) and
|
||||
not EndToEndEvaluation::isFlowExcluded(source, sink) and
|
||||
not isFlowLikelyInBaseQuery(source, sink) and
|
||||
sink.hasLocationInfo(filePathSink, startLineSink, startColumnSink, endLineSink, endColumnSink) and
|
||||
source
|
||||
.hasLocationInfo(filePathSource, startLineSource, startColumnSource, endLineSource,
|
||||
endColumnSource) and
|
||||
getScoreForFlow(source, sink) = score
|
||||
select source, startLineSource, startColumnSource, endLineSource, endColumnSource, filePathSource,
|
||||
sink, startLineSink, startColumnSink, endLineSink, endColumnSink, filePathSink, score order by
|
||||
score desc, startLineSource, startColumnSource, endLineSource, endColumnSource, filePathSource,
|
||||
startLineSink, startColumnSink, endLineSink, endColumnSink, filePathSink
|
||||
@@ -0,0 +1,29 @@
|
||||
/**
|
||||
* NosqlInjectionATMLite.ql
|
||||
*
|
||||
* Arbitrarily ranked version of the boosted NoSQL injection query with an output relation ready to
|
||||
* plug into the evaluation pipeline. This is useful (a) for evaluating the performance of endpoint
|
||||
* filters, and (b) as a baseline to compare the model against.
|
||||
*/
|
||||
|
||||
import ATM::ResultsInfo
|
||||
import EndToEndEvaluation as EndToEndEvaluation
|
||||
import experimental.adaptivethreatmodeling.NosqlInjectionATM
|
||||
|
||||
from
|
||||
DataFlow::Configuration cfg, DataFlow::Node source, DataFlow::Node sink, string filePathSink,
|
||||
int startLineSink, int endLineSink, int startColumnSink, int endColumnSink, string filePathSource,
|
||||
int startLineSource, int endLineSource, int startColumnSource, int endColumnSource, float score
|
||||
where
|
||||
cfg.hasFlow(source, sink) and
|
||||
not EndToEndEvaluation::isFlowExcluded(source, sink) and
|
||||
not isFlowLikelyInBaseQuery(source, sink) and
|
||||
sink.hasLocationInfo(filePathSink, startLineSink, startColumnSink, endLineSink, endColumnSink) and
|
||||
source
|
||||
.hasLocationInfo(filePathSource, startLineSource, startColumnSource, endLineSource,
|
||||
endColumnSource) and
|
||||
score = 0
|
||||
select source, startLineSource, startColumnSource, endLineSource, endColumnSource, filePathSource,
|
||||
sink, startLineSink, startColumnSink, endLineSink, endColumnSink, filePathSink, score order by
|
||||
score desc, startLineSource, startColumnSource, endLineSource, endColumnSource, filePathSource,
|
||||
startLineSink, startColumnSink, endLineSink, endColumnSink, filePathSink
|
||||
@@ -0,0 +1,24 @@
|
||||
/**
|
||||
* SqlInjection.ql
|
||||
*
|
||||
* Version of the standard SQL injection query with an output relation ready to plug into the
|
||||
* evaluation pipeline.
|
||||
*/
|
||||
|
||||
import semmle.javascript.security.dataflow.SqlInjection
|
||||
import EndToEndEvaluation as EndToEndEvaluation
|
||||
|
||||
from
|
||||
DataFlow::Configuration cfg, DataFlow::Node source, DataFlow::Node sink, string filePathSink,
|
||||
int startLineSink, int endLineSink, int startColumnSink, int endColumnSink, string filePathSource,
|
||||
int startLineSource, int endLineSource, int startColumnSource, int endColumnSource
|
||||
where
|
||||
cfg instanceof SqlInjection::Configuration and
|
||||
cfg.hasFlow(source, sink) and
|
||||
not EndToEndEvaluation::isFlowExcluded(source, sink) and
|
||||
sink.hasLocationInfo(filePathSink, startLineSink, startColumnSink, endLineSink, endColumnSink) and
|
||||
source
|
||||
.hasLocationInfo(filePathSource, startLineSource, startColumnSource, endLineSource,
|
||||
endColumnSource)
|
||||
select source, startLineSource, startColumnSource, endLineSource, endColumnSource, filePathSource,
|
||||
sink, startLineSink, startColumnSink, endLineSink, endColumnSink, filePathSink
|
||||
@@ -0,0 +1,28 @@
|
||||
/**
|
||||
* SqlInjectionATM.ql
|
||||
*
|
||||
* Version of the boosted SQL injection query with an output relation ready to plug into the
|
||||
* evaluation pipeline.
|
||||
*/
|
||||
|
||||
import ATM::ResultsInfo
|
||||
import EndToEndEvaluation as EndToEndEvaluation
|
||||
import experimental.adaptivethreatmodeling.SqlInjectionATM
|
||||
|
||||
from
|
||||
DataFlow::Configuration cfg, DataFlow::Node source, DataFlow::Node sink, string filePathSink,
|
||||
int startLineSink, int endLineSink, int startColumnSink, int endColumnSink, string filePathSource,
|
||||
int startLineSource, int endLineSource, int startColumnSource, int endColumnSource, float score
|
||||
where
|
||||
cfg.hasFlow(source, sink) and
|
||||
not EndToEndEvaluation::isFlowExcluded(source, sink) and
|
||||
not isFlowLikelyInBaseQuery(source, sink) and
|
||||
sink.hasLocationInfo(filePathSink, startLineSink, startColumnSink, endLineSink, endColumnSink) and
|
||||
source
|
||||
.hasLocationInfo(filePathSource, startLineSource, startColumnSource, endLineSource,
|
||||
endColumnSource) and
|
||||
getScoreForFlow(source, sink) = score
|
||||
select source, startLineSource, startColumnSource, endLineSource, endColumnSource, filePathSource,
|
||||
sink, startLineSink, startColumnSink, endLineSink, endColumnSink, filePathSink, score order by
|
||||
score desc, startLineSource, startColumnSource, endLineSource, endColumnSource, filePathSource,
|
||||
startLineSink, startColumnSink, endLineSink, endColumnSink, filePathSink
|
||||
@@ -0,0 +1,29 @@
|
||||
/**
|
||||
* SqlInjectionATMLite.ql
|
||||
*
|
||||
* Arbitrarily ranked version of the boosted SQL injection query with an output relation ready to
|
||||
* plug into the evaluation pipeline. This is useful (a) for evaluating the performance of endpoint
|
||||
* filters, and (b) as a baseline to compare the model against.
|
||||
*/
|
||||
|
||||
import ATM::ResultsInfo
|
||||
import EndToEndEvaluation as EndToEndEvaluation
|
||||
import experimental.adaptivethreatmodeling.SqlInjectionATM
|
||||
|
||||
from
|
||||
DataFlow::Configuration cfg, DataFlow::Node source, DataFlow::Node sink, string filePathSink,
|
||||
int startLineSink, int endLineSink, int startColumnSink, int endColumnSink, string filePathSource,
|
||||
int startLineSource, int endLineSource, int startColumnSource, int endColumnSource, float score
|
||||
where
|
||||
cfg.hasFlow(source, sink) and
|
||||
not EndToEndEvaluation::isFlowExcluded(source, sink) and
|
||||
not isFlowLikelyInBaseQuery(source, sink) and
|
||||
sink.hasLocationInfo(filePathSink, startLineSink, startColumnSink, endLineSink, endColumnSink) and
|
||||
source
|
||||
.hasLocationInfo(filePathSource, startLineSource, startColumnSource, endLineSource,
|
||||
endColumnSource) and
|
||||
score = 0
|
||||
select source, startLineSource, startColumnSource, endLineSource, endColumnSource, filePathSource,
|
||||
sink, startLineSink, startColumnSink, endLineSink, endColumnSink, filePathSink, score order by
|
||||
score desc, startLineSource, startColumnSource, endLineSource, endColumnSource, filePathSource,
|
||||
startLineSink, startColumnSink, endLineSink, endColumnSink, filePathSink
|
||||
@@ -0,0 +1,24 @@
|
||||
/**
|
||||
* TaintedPath.ql
|
||||
*
|
||||
* Version of the standard path injection query with an output relation ready to plug into the
|
||||
* evaluation pipeline.
|
||||
*/
|
||||
|
||||
import semmle.javascript.security.dataflow.TaintedPath
|
||||
import EndToEndEvaluation as EndToEndEvaluation
|
||||
|
||||
from
|
||||
DataFlow::Configuration cfg, DataFlow::Node source, DataFlow::Node sink, string filePathSink,
|
||||
int startLineSink, int endLineSink, int startColumnSink, int endColumnSink, string filePathSource,
|
||||
int startLineSource, int endLineSource, int startColumnSource, int endColumnSource
|
||||
where
|
||||
cfg instanceof TaintedPath::Configuration and
|
||||
cfg.hasFlow(source, sink) and
|
||||
not EndToEndEvaluation::isFlowExcluded(source, sink) and
|
||||
sink.hasLocationInfo(filePathSink, startLineSink, startColumnSink, endLineSink, endColumnSink) and
|
||||
source
|
||||
.hasLocationInfo(filePathSource, startLineSource, startColumnSource, endLineSource,
|
||||
endColumnSource)
|
||||
select source, startLineSource, startColumnSource, endLineSource, endColumnSource, filePathSource,
|
||||
sink, startLineSink, startColumnSink, endLineSink, endColumnSink, filePathSink
|
||||
@@ -0,0 +1,28 @@
|
||||
/**
|
||||
* TaintedPathATM.ql
|
||||
*
|
||||
* Version of the boosted path injection query with an output relation ready to plug into the
|
||||
* evaluation pipeline.
|
||||
*/
|
||||
|
||||
import ATM::ResultsInfo
|
||||
import EndToEndEvaluation as EndToEndEvaluation
|
||||
import experimental.adaptivethreatmodeling.TaintedPathATM
|
||||
|
||||
from
|
||||
DataFlow::Configuration cfg, DataFlow::Node source, DataFlow::Node sink, string filePathSink,
|
||||
int startLineSink, int endLineSink, int startColumnSink, int endColumnSink, string filePathSource,
|
||||
int startLineSource, int endLineSource, int startColumnSource, int endColumnSource, float score
|
||||
where
|
||||
cfg.hasFlow(source, sink) and
|
||||
not EndToEndEvaluation::isFlowExcluded(source, sink) and
|
||||
not isFlowLikelyInBaseQuery(source, sink) and
|
||||
sink.hasLocationInfo(filePathSink, startLineSink, startColumnSink, endLineSink, endColumnSink) and
|
||||
source
|
||||
.hasLocationInfo(filePathSource, startLineSource, startColumnSource, endLineSource,
|
||||
endColumnSource) and
|
||||
getScoreForFlow(source, sink) = score
|
||||
select source, startLineSource, startColumnSource, endLineSource, endColumnSource, filePathSource,
|
||||
sink, startLineSink, startColumnSink, endLineSink, endColumnSink, filePathSink, score order by
|
||||
score desc, startLineSource, startColumnSource, endLineSource, endColumnSource, filePathSource,
|
||||
startLineSink, startColumnSink, endLineSink, endColumnSink, filePathSink
|
||||
@@ -0,0 +1,29 @@
|
||||
/**
|
||||
* TaintedPathATMLite.ql
|
||||
*
|
||||
* Arbitrarily ranked version of the boosted path injection query with an output relation ready to
|
||||
* plug into the evaluation pipeline. This is useful (a) for evaluating the performance of endpoint
|
||||
* filters, and (b) as a baseline to compare the model against.
|
||||
*/
|
||||
|
||||
import ATM::ResultsInfo
|
||||
import EndToEndEvaluation as EndToEndEvaluation
|
||||
import experimental.adaptivethreatmodeling.TaintedPathATM
|
||||
|
||||
from
|
||||
DataFlow::Configuration cfg, DataFlow::Node source, DataFlow::Node sink, string filePathSink,
|
||||
int startLineSink, int endLineSink, int startColumnSink, int endColumnSink, string filePathSource,
|
||||
int startLineSource, int endLineSource, int startColumnSource, int endColumnSource, float score
|
||||
where
|
||||
cfg.hasFlow(source, sink) and
|
||||
not EndToEndEvaluation::isFlowExcluded(source, sink) and
|
||||
not isFlowLikelyInBaseQuery(source, sink) and
|
||||
sink.hasLocationInfo(filePathSink, startLineSink, startColumnSink, endLineSink, endColumnSink) and
|
||||
source
|
||||
.hasLocationInfo(filePathSource, startLineSource, startColumnSource, endLineSource,
|
||||
endColumnSource) and
|
||||
score = 0
|
||||
select source, startLineSource, startColumnSource, endLineSource, endColumnSource, filePathSource,
|
||||
sink, startLineSink, startColumnSink, endLineSink, endColumnSink, filePathSink, score order by
|
||||
score desc, startLineSource, startColumnSource, endLineSource, endColumnSource, filePathSource,
|
||||
startLineSink, startColumnSink, endLineSink, endColumnSink, filePathSink
|
||||
@@ -0,0 +1,24 @@
|
||||
/**
|
||||
* Xss.ql
|
||||
*
|
||||
* Version of the standard XSS query with an output relation ready to plug into the evaluation
|
||||
* pipeline.
|
||||
*/
|
||||
|
||||
import semmle.javascript.security.dataflow.DomBasedXss
|
||||
import EndToEndEvaluation as EndToEndEvaluation
|
||||
|
||||
from
|
||||
DataFlow::Configuration cfg, DataFlow::Node source, DataFlow::Node sink, string filePathSink,
|
||||
int startLineSink, int endLineSink, int startColumnSink, int endColumnSink, string filePathSource,
|
||||
int startLineSource, int endLineSource, int startColumnSource, int endColumnSource
|
||||
where
|
||||
cfg instanceof DomBasedXss::Configuration and
|
||||
cfg.hasFlow(source, sink) and
|
||||
not EndToEndEvaluation::isFlowExcluded(source, sink) and
|
||||
sink.hasLocationInfo(filePathSink, startLineSink, startColumnSink, endLineSink, endColumnSink) and
|
||||
source
|
||||
.hasLocationInfo(filePathSource, startLineSource, startColumnSource, endLineSource,
|
||||
endColumnSource)
|
||||
select source, startLineSource, startColumnSource, endLineSource, endColumnSource, filePathSource,
|
||||
sink, startLineSink, startColumnSink, endLineSink, endColumnSink, filePathSink
|
||||
@@ -0,0 +1,29 @@
|
||||
/**
|
||||
* XssATM.ql
|
||||
*
|
||||
* Version of the boosted XSS query with an output relation ready to plug into the evaluation
|
||||
* pipeline.
|
||||
*/
|
||||
|
||||
import javascript
|
||||
import ATM::ResultsInfo
|
||||
import EndToEndEvaluation as EndToEndEvaluation
|
||||
import experimental.adaptivethreatmodeling.XssATM
|
||||
|
||||
from
|
||||
DataFlow::Configuration cfg, DataFlow::Node source, DataFlow::Node sink, string filePathSink,
|
||||
int startLineSink, int endLineSink, int startColumnSink, int endColumnSink, string filePathSource,
|
||||
int startLineSource, int endLineSource, int startColumnSource, int endColumnSource, float score
|
||||
where
|
||||
cfg.hasFlow(source, sink) and
|
||||
not EndToEndEvaluation::isFlowExcluded(source, sink) and
|
||||
not isFlowLikelyInBaseQuery(source, sink) and
|
||||
sink.hasLocationInfo(filePathSink, startLineSink, startColumnSink, endLineSink, endColumnSink) and
|
||||
source
|
||||
.hasLocationInfo(filePathSource, startLineSource, startColumnSource, endLineSource,
|
||||
endColumnSource) and
|
||||
getScoreForFlow(source, sink) = score
|
||||
select source, startLineSource, startColumnSource, endLineSource, endColumnSource, filePathSource,
|
||||
sink, startLineSink, startColumnSink, endLineSink, endColumnSink, filePathSink, score order by
|
||||
score desc, startLineSource, startColumnSource, endLineSource, endColumnSource, filePathSource,
|
||||
startLineSink, startColumnSink, endLineSink, endColumnSink, filePathSink
|
||||
@@ -0,0 +1,30 @@
|
||||
/**
|
||||
* XssATMLite.ql
|
||||
*
|
||||
* Arbitrarily ranked version of the boosted XSS query with an output relation ready to plug into
|
||||
* the evaluation pipeline. This is useful (a) for evaluating the performance of endpoint filters,
|
||||
* and (b) as a baseline to compare the model against.
|
||||
*/
|
||||
|
||||
import javascript
|
||||
import ATM::ResultsInfo
|
||||
import EndToEndEvaluation as EndToEndEvaluation
|
||||
import experimental.adaptivethreatmodeling.XssATM
|
||||
|
||||
from
|
||||
DataFlow::Configuration cfg, DataFlow::Node source, DataFlow::Node sink, string filePathSink,
|
||||
int startLineSink, int endLineSink, int startColumnSink, int endColumnSink, string filePathSource,
|
||||
int startLineSource, int endLineSource, int startColumnSource, int endColumnSource, float score
|
||||
where
|
||||
cfg.hasFlow(source, sink) and
|
||||
not EndToEndEvaluation::isFlowExcluded(source, sink) and
|
||||
not isFlowLikelyInBaseQuery(source, sink) and
|
||||
sink.hasLocationInfo(filePathSink, startLineSink, startColumnSink, endLineSink, endColumnSink) and
|
||||
source
|
||||
.hasLocationInfo(filePathSource, startLineSource, startColumnSource, endLineSource,
|
||||
endColumnSource) and
|
||||
score = 0
|
||||
select source, startLineSource, startColumnSource, endLineSource, endColumnSource, filePathSource,
|
||||
sink, startLineSink, startColumnSink, endLineSink, endColumnSink, filePathSink, score order by
|
||||
score desc, startLineSource, startColumnSource, endLineSource, endColumnSource, filePathSource,
|
||||
startLineSink, startColumnSink, endLineSink, endColumnSink, filePathSink
|
||||
@@ -0,0 +1,26 @@
|
||||
/*
|
||||
* For internal use only.
|
||||
*
|
||||
* [DEPRECATED] Counts alerts and sinks for JavaScript security queries.
|
||||
*
|
||||
* This query is deprecated due to the performance implications of bringing in data flow
|
||||
* configurations from multiple queries. Instead use `CountSourcesAndSinks.ql` to count sinks for
|
||||
* JavaScript security queries, and count alerts by running the standard or evaluation queries for
|
||||
* each security vulnerability.
|
||||
*/
|
||||
|
||||
import semmle.javascript.security.dataflow.NosqlInjection
|
||||
import semmle.javascript.security.dataflow.SqlInjection
|
||||
import semmle.javascript.security.dataflow.TaintedPath
|
||||
import semmle.javascript.security.dataflow.DomBasedXss
|
||||
|
||||
int numAlerts(DataFlow::Configuration cfg) {
|
||||
result = count(DataFlow::Node source, DataFlow::Node sink | cfg.hasFlow(source, sink))
|
||||
}
|
||||
|
||||
select numAlerts(any(NosqlInjection::Configuration cfg)) as numNosqlAlerts,
|
||||
numAlerts(any(SqlInjection::Configuration cfg)) as numSqlAlerts,
|
||||
numAlerts(any(TaintedPath::Configuration cfg)) as numTaintedPathAlerts,
|
||||
numAlerts(any(DomBasedXss::Configuration cfg)) as numXssAlerts,
|
||||
count(NosqlInjection::Sink sink) as numNosqlSinks, count(SqlInjection::Sink sink) as numSqlSinks,
|
||||
count(TaintedPath::Sink sink) as numTaintedPathSinks, count(DomBasedXss::Sink sink) as numXssSinks
|
||||
@@ -0,0 +1,72 @@
|
||||
/*
|
||||
* For internal use only.
|
||||
*
|
||||
* Counts sources and sinks for JavaScript security queries.
|
||||
*/
|
||||
|
||||
import javascript
|
||||
import semmle.javascript.dataflow.Configuration
|
||||
// javascript/ql/lib/semmle/javascript/security/dataflow$ ls *Query.qll | sed -e 's/\(.*\)Query.qll/import semmle.javascript.security.dataflow.\1Query as \1/'
|
||||
import semmle.javascript.security.dataflow.BrokenCryptoAlgorithmQuery as BrokenCryptoAlgorithm
|
||||
import semmle.javascript.security.dataflow.BuildArtifactLeakQuery as BuildArtifactLeak
|
||||
import semmle.javascript.security.dataflow.CleartextLoggingQuery as CleartextLogging
|
||||
import semmle.javascript.security.dataflow.CleartextStorageQuery as CleartextStorage
|
||||
import semmle.javascript.security.dataflow.ClientSideUrlRedirectQuery as ClientSideUrlRedirect
|
||||
import semmle.javascript.security.dataflow.CodeInjectionQuery as CodeInjection
|
||||
import semmle.javascript.security.dataflow.CommandInjectionQuery as CommandInjection
|
||||
import semmle.javascript.security.dataflow.ConditionalBypassQuery as ConditionalBypass
|
||||
import semmle.javascript.security.dataflow.CorsMisconfigurationForCredentialsQuery as CorsMisconfigurationForCredentials
|
||||
import semmle.javascript.security.dataflow.DeepObjectResourceExhaustionQuery as DeepObjectResourceExhaustion
|
||||
import semmle.javascript.security.dataflow.DifferentKindsComparisonBypassQuery as DifferentKindsComparisonBypass
|
||||
import semmle.javascript.security.dataflow.DomBasedXssQuery as DomBasedXss
|
||||
import semmle.javascript.security.dataflow.ExceptionXssQuery as ExceptionXss
|
||||
import semmle.javascript.security.dataflow.ExternalAPIUsedWithUntrustedDataQuery as ExternalAPIUsedWithUntrustedData
|
||||
import semmle.javascript.security.dataflow.FileAccessToHttpQuery as FileAccessToHttp
|
||||
import semmle.javascript.security.dataflow.HardcodedCredentialsQuery as HardcodedCredentials
|
||||
import semmle.javascript.security.dataflow.HardcodedDataInterpretedAsCodeQuery as HardcodedDataInterpretedAsCode
|
||||
import semmle.javascript.security.dataflow.HostHeaderPoisoningInEmailGenerationQuery as HostHeaderPoisoningInEmailGeneration
|
||||
import semmle.javascript.security.dataflow.HttpToFileAccessQuery as HttpToFileAccess
|
||||
import semmle.javascript.security.dataflow.ImproperCodeSanitizationQuery as ImproperCodeSanitization
|
||||
import semmle.javascript.security.dataflow.IncompleteHtmlAttributeSanitizationQuery as IncompleteHtmlAttributeSanitization
|
||||
import semmle.javascript.security.dataflow.IndirectCommandInjectionQuery as IndirectCommandInjection
|
||||
import semmle.javascript.security.dataflow.InsecureDownloadQuery as InsecureDownload
|
||||
import semmle.javascript.security.dataflow.InsecureRandomnessQuery as InsecureRandomness
|
||||
import semmle.javascript.security.dataflow.InsufficientPasswordHashQuery as InsufficientPasswordHash
|
||||
import semmle.javascript.security.dataflow.LogInjectionQuery as LogInjection
|
||||
import semmle.javascript.security.dataflow.LoopBoundInjectionQuery as LoopBoundInjection
|
||||
import semmle.javascript.security.dataflow.NosqlInjectionQuery as NosqlInjection
|
||||
import semmle.javascript.security.dataflow.PostMessageStarQuery as PostMessageStar
|
||||
import semmle.javascript.security.dataflow.PrototypePollutingAssignmentQuery as PrototypePollutingAssignment
|
||||
import semmle.javascript.security.dataflow.PrototypePollutionQuery as PrototypePollution
|
||||
import semmle.javascript.security.dataflow.ReflectedXssQuery as ReflectedXss
|
||||
import semmle.javascript.security.dataflow.RegExpInjectionQuery as RegExpInjection
|
||||
import semmle.javascript.security.dataflow.RemotePropertyInjectionQuery as RemotePropertyInjection
|
||||
import semmle.javascript.security.dataflow.RequestForgeryQuery as RequestForgery
|
||||
import semmle.javascript.security.dataflow.ServerSideUrlRedirectQuery as ServerSideUrlRedirect
|
||||
import semmle.javascript.security.dataflow.ShellCommandInjectionFromEnvironmentQuery as ShellCommandInjectionFromEnvironment
|
||||
import semmle.javascript.security.dataflow.SqlInjectionQuery as SqlInjection
|
||||
import semmle.javascript.security.dataflow.StackTraceExposureQuery as StackTraceExposure
|
||||
import semmle.javascript.security.dataflow.StoredXssQuery as StoredXss
|
||||
import semmle.javascript.security.dataflow.TaintedFormatStringQuery as TaintedFormatString
|
||||
import semmle.javascript.security.dataflow.TaintedPathQuery as TaintedPath
|
||||
import semmle.javascript.security.dataflow.TemplateObjectInjectionQuery as TemplateObjectInjection
|
||||
import semmle.javascript.security.dataflow.TypeConfusionThroughParameterTamperingQuery as TypeConfusionThroughParameterTampering
|
||||
import semmle.javascript.security.dataflow.UnsafeDeserializationQuery as UnsafeDeserialization
|
||||
import semmle.javascript.security.dataflow.UnsafeDynamicMethodAccessQuery as UnsafeDynamicMethodAccess
|
||||
import semmle.javascript.security.dataflow.UnsafeHtmlConstructionQuery as UnsafeHtmlConstruction
|
||||
import semmle.javascript.security.dataflow.UnsafeJQueryPluginQuery as UnsafeJQueryPlugin
|
||||
import semmle.javascript.security.dataflow.UnsafeShellCommandConstructionQuery as UnsafeShellCommandConstruction
|
||||
import semmle.javascript.security.dataflow.UnvalidatedDynamicMethodCallQuery as UnvalidatedDynamicMethodCall
|
||||
import semmle.javascript.security.dataflow.XmlBombQuery as XmlBomb
|
||||
import semmle.javascript.security.dataflow.XpathInjectionQuery as XpathInjection
|
||||
import semmle.javascript.security.dataflow.XssThroughDomQuery as XssThroughDom
|
||||
import semmle.javascript.security.dataflow.XxeQuery as Xxe
|
||||
import semmle.javascript.security.dataflow.ZipSlipQuery as ZipSlip
|
||||
|
||||
DataFlow::Node getASink(Configuration cfg) { cfg.isSink(result) or cfg.isSink(result, _) }
|
||||
|
||||
DataFlow::Node getASource(Configuration cfg) { cfg.isSource(result) or cfg.isSource(result, _) }
|
||||
|
||||
from Configuration cfg, int sources, int sinks
|
||||
where count(getASource(cfg)) = sources and count(getASink(cfg)) = sinks
|
||||
select cfg, sources, sinks
|
||||
@@ -0,0 +1,49 @@
|
||||
/*
|
||||
* For internal use only.
|
||||
*
|
||||
* Defines files that should be excluded from the evaluation of ML models.
|
||||
*/
|
||||
|
||||
private import javascript
|
||||
private import semmle.javascript.filters.ClassifyFiles as ClassifyFiles
|
||||
|
||||
/** Holds if the file should be excluded from end-to-end evaluation. */
|
||||
predicate isFileExcluded(File file) {
|
||||
// Ignore files that are outside the root folder of the analyzed source location.
|
||||
//
|
||||
// If the file doesn't have a relative path, then the source file is located outside the root
|
||||
// folder of the analyzed source location, meaning that the files are additional files added to
|
||||
// the database like standard library files that we would like to ignore.
|
||||
not exists(file.getRelativePath())
|
||||
or
|
||||
// Ignore files based on their path.
|
||||
exists(string ignorePattern, string separator |
|
||||
ignorePattern =
|
||||
// Exclude test files
|
||||
"(tests?|test[_-]?case|" +
|
||||
// Exclude library files
|
||||
//
|
||||
// - The Bower and npm package managers store packages in bower_components and node_modules
|
||||
// folders respectively.
|
||||
// - Specific exclusion for end-to-end: `applications/examples/static/epydoc` contains
|
||||
// library code from Epydoc.
|
||||
"3rd[_-]?party|bower_components|extern(s|al)?|node_modules|resources|third[_-]?party|_?vendor|"
|
||||
+ "applications" + separator + "examples" + separator + "static" + separator + "epydoc|" +
|
||||
// Exclude generated code
|
||||
"gen|\\.?generated|" +
|
||||
// Exclude benchmarks
|
||||
"benchmarks?|" +
|
||||
// Exclude documentation
|
||||
"docs?|documentation)" and
|
||||
separator = "(\\/|\\.)" and
|
||||
exists(
|
||||
file.getRelativePath()
|
||||
.toLowerCase()
|
||||
.regexpFind(separator + ignorePattern + separator + "|" + "^" + ignorePattern + separator +
|
||||
"|" + separator + ignorePattern + "$", _, _)
|
||||
)
|
||||
)
|
||||
or
|
||||
// Ignore externs, generated, library, and test files.
|
||||
ClassifyFiles::classify(file, ["externs", "generated", "library", "test"])
|
||||
}
|
||||
@@ -0,0 +1,11 @@
|
||||
/*
|
||||
* For internal use only.
|
||||
*
|
||||
* Extracts training and evaluation data we can use to train ML models for ML-powered queries.
|
||||
*/
|
||||
|
||||
import ExtractEndpointData as ExtractEndpointData
|
||||
|
||||
query predicate endpoints = ExtractEndpointData::endpoints/5;
|
||||
|
||||
query predicate tokenFeatures = ExtractEndpointData::tokenFeatures/3;
|
||||
@@ -0,0 +1,195 @@
|
||||
/*
|
||||
* For internal use only.
|
||||
*
|
||||
* Library code for training and evaluation data we can use to train ML models for ML-powered
|
||||
* queries.
|
||||
*/
|
||||
|
||||
import javascript
|
||||
import Exclusions as Exclusions
|
||||
import evaluation.EndToEndEvaluation as EndToEndEvaluation
|
||||
import experimental.adaptivethreatmodeling.ATMConfig
|
||||
import experimental.adaptivethreatmodeling.CoreKnowledge as CoreKnowledge
|
||||
import experimental.adaptivethreatmodeling.EndpointFeatures as EndpointFeatures
|
||||
import experimental.adaptivethreatmodeling.EndpointScoring as EndpointScoring
|
||||
import experimental.adaptivethreatmodeling.EndpointTypes
|
||||
import experimental.adaptivethreatmodeling.FilteringReasons
|
||||
import experimental.adaptivethreatmodeling.NosqlInjectionATM as NosqlInjectionATM
|
||||
import experimental.adaptivethreatmodeling.SqlInjectionATM as SqlInjectionATM
|
||||
import experimental.adaptivethreatmodeling.TaintedPathATM as TaintedPathATM
|
||||
import experimental.adaptivethreatmodeling.XssATM as XssATM
|
||||
import Labels
|
||||
import NoFeaturizationRestrictionsConfig
|
||||
import Queries
|
||||
|
||||
/** Gets the ATM configuration object for the specified query. */
|
||||
ATMConfig getATMCfg(Query query) {
|
||||
query instanceof NosqlInjectionQuery and
|
||||
result instanceof NosqlInjectionATM::NosqlInjectionATMConfig
|
||||
or
|
||||
query instanceof SqlInjectionQuery and result instanceof SqlInjectionATM::SqlInjectionATMConfig
|
||||
or
|
||||
query instanceof TaintedPathQuery and result instanceof TaintedPathATM::TaintedPathATMConfig
|
||||
or
|
||||
query instanceof XssQuery and result instanceof XssATM::DomBasedXssATMConfig
|
||||
}
|
||||
|
||||
/** Gets the ATM data flow configuration for the specified query. */
|
||||
DataFlow::Configuration getDataFlowCfg(Query query) {
|
||||
query instanceof NosqlInjectionQuery and result instanceof NosqlInjectionATM::Configuration
|
||||
or
|
||||
query instanceof SqlInjectionQuery and result instanceof SqlInjectionATM::Configuration
|
||||
or
|
||||
query instanceof TaintedPathQuery and result instanceof TaintedPathATM::Configuration
|
||||
or
|
||||
query instanceof XssQuery and result instanceof XssATM::Configuration
|
||||
}
|
||||
|
||||
/** Gets a known sink for the specified query. */
|
||||
private DataFlow::Node getASink(Query query) {
|
||||
getATMCfg(query).isKnownSink(result) and
|
||||
// Only consider the source code for the project being analyzed.
|
||||
exists(result.getFile().getRelativePath())
|
||||
}
|
||||
|
||||
/** Gets a data flow node that is known not to be a sink for the specified query. */
|
||||
private DataFlow::Node getANotASink(NotASinkReason reason) {
|
||||
CoreKnowledge::isOtherModeledArgument(result, reason) and
|
||||
// Some endpoints can be assigned both a `NotASinkReason` and a `LikelyNotASinkReason`. We
|
||||
// consider these endpoints to be `LikelyNotASink`, therefore this line excludes them from the
|
||||
// definition of `NotASink`.
|
||||
not CoreKnowledge::isOtherModeledArgument(result, any(LikelyNotASinkReason t)) and
|
||||
not result = getASink(_) and
|
||||
// Only consider the source code for the project being analyzed.
|
||||
exists(result.getFile().getRelativePath())
|
||||
}
|
||||
|
||||
/**
|
||||
* Gets a data flow node whose label is unknown for the specified query.
|
||||
*
|
||||
* In other words, this is an endpoint that is not `Sink`, `NotASink`, or `LikelyNotASink` for the
|
||||
* specified query.
|
||||
*/
|
||||
private DataFlow::Node getAnUnknown(Query query) {
|
||||
(
|
||||
getATMCfg(query).isEffectiveSink(result) or
|
||||
getATMCfg(query).isEffectiveSinkWithOverridingScore(result, _, _)
|
||||
) and
|
||||
not result = getASink(query) and
|
||||
// Only consider the source code for the project being analyzed.
|
||||
exists(result.getFile().getRelativePath())
|
||||
}
|
||||
|
||||
/** Gets the query-specific sink label for the given endpoint, if such a label exists. */
|
||||
private EndpointLabel getSinkLabelForEndpoint(DataFlow::Node endpoint, Query query) {
|
||||
endpoint = getASink(query) and result instanceof SinkLabel
|
||||
or
|
||||
endpoint = getANotASink(_) and result instanceof NotASinkLabel
|
||||
or
|
||||
endpoint = getAnUnknown(query) and result instanceof UnknownLabel
|
||||
}
|
||||
|
||||
/** Gets an endpoint that should be extracted. */
|
||||
DataFlow::Node getAnEndpoint(Query query) { exists(getSinkLabelForEndpoint(result, query)) }
|
||||
|
||||
/**
|
||||
* Endpoints and associated metadata.
|
||||
*
|
||||
* Note that we draw a distinction between _features_, that are provided to the model at training
|
||||
* and query time, and _metadata_, that is only provided to the model at training time.
|
||||
*
|
||||
* Internal: See the design document for
|
||||
* [extensible extraction queries](https://docs.google.com/document/d/1g3ci2Nf1hGMG6ZUP0Y4PqCy_8elcoC_dhBvgTxdAWpg)
|
||||
* for technical information about the design of this predicate.
|
||||
*/
|
||||
predicate endpoints(
|
||||
DataFlow::Node endpoint, string queryName, string key, string value, string valueType
|
||||
) {
|
||||
exists(Query query |
|
||||
// Only provide metadata for labelled endpoints, since we do not extract all endpoints.
|
||||
endpoint = getAnEndpoint(query) and
|
||||
queryName = query.getName() and
|
||||
(
|
||||
// Holds if there is a taint flow path from a known source to the endpoint
|
||||
key = "hasFlowFromSource" and
|
||||
(
|
||||
if FlowFromSource::hasFlowFromSource(endpoint, query)
|
||||
then value = "true"
|
||||
else value = "false"
|
||||
) and
|
||||
valueType = "boolean"
|
||||
or
|
||||
// Constant expressions always evaluate to a constant primitive value. Therefore they can't ever
|
||||
// appear in an alert, making them less interesting training examples.
|
||||
key = "isConstantExpression" and
|
||||
(if endpoint.asExpr() instanceof ConstantExpr then value = "true" else value = "false") and
|
||||
valueType = "boolean"
|
||||
or
|
||||
// Holds if alerts involving the endpoint are excluded from the end-to-end evaluation.
|
||||
key = "isExcludedFromEndToEndEvaluation" and
|
||||
(if Exclusions::isFileExcluded(endpoint.getFile()) then value = "true" else value = "false") and
|
||||
valueType = "boolean"
|
||||
or
|
||||
// The label for this query, considering the endpoint as a sink.
|
||||
key = "sinkLabel" and
|
||||
value = getSinkLabelForEndpoint(endpoint, query).getEncoding() and
|
||||
valueType = "string"
|
||||
or
|
||||
// The reason, or reasons, why the endpoint was labeled NotASink for this query.
|
||||
key = "notASinkReason" and
|
||||
exists(FilteringReason reason |
|
||||
endpoint = getANotASink(reason) and
|
||||
value = reason.getDescription()
|
||||
) and
|
||||
valueType = "string"
|
||||
)
|
||||
)
|
||||
}
|
||||
|
||||
/**
|
||||
* `EndpointFeatures::tokenFeatures` has no results when `featureName` is absent for the endpoint
|
||||
* `endpoint`. To preserve compatibility with the data pipeline, this relation will instead set
|
||||
* `featureValue` to the empty string in this case.
|
||||
*/
|
||||
predicate tokenFeatures(DataFlow::Node endpoint, string featureName, string featureValue) {
|
||||
endpoints(endpoint, _, _, _, _) and
|
||||
(
|
||||
EndpointFeatures::tokenFeatures(endpoint, featureName, featureValue)
|
||||
or
|
||||
// Performance note: this creates a Cartesian product between `endpoint` and `featureName`.
|
||||
featureName = EndpointFeatures::getASupportedFeatureName() and
|
||||
not exists(string value | EndpointFeatures::tokenFeatures(endpoint, featureName, value)) and
|
||||
featureValue = ""
|
||||
)
|
||||
}
|
||||
|
||||
module FlowFromSource {
|
||||
predicate hasFlowFromSource(DataFlow::Node endpoint, Query q) {
|
||||
exists(Configuration cfg | cfg.getQuery() = q | cfg.hasFlow(_, endpoint))
|
||||
}
|
||||
|
||||
/**
|
||||
* A data flow configuration that replicates the data flow configuration for a specific query, but
|
||||
* replaces the set of sinks with the set of endpoints we're extracting.
|
||||
*
|
||||
* We use this to find out when there is flow to a particular endpoint from a known source.
|
||||
*
|
||||
* This configuration behaves in a very similar way to the `ForwardExploringConfiguration` class
|
||||
* from the CodeQL standard libraries for JavaScript.
|
||||
*/
|
||||
private class Configuration extends DataFlow::Configuration {
|
||||
Query q;
|
||||
|
||||
Configuration() { this = getDataFlowCfg(q) }
|
||||
|
||||
Query getQuery() { result = q }
|
||||
|
||||
/** The sinks are the endpoints we're extracting. */
|
||||
override predicate isSink(DataFlow::Node sink) { sink = getAnEndpoint(q) }
|
||||
|
||||
/** The sinks are the endpoints we're extracting. */
|
||||
override predicate isSink(DataFlow::Node sink, DataFlow::FlowLabel lbl) {
|
||||
sink = getAnEndpoint(q)
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,25 @@
|
||||
/*
|
||||
* For internal use only.
|
||||
*
|
||||
* Extracts evaluation data we can use to evaluate ML models for ML-powered queries.
|
||||
*/
|
||||
|
||||
import javascript
|
||||
import ExtractEndpointData as ExtractEndpointData
|
||||
|
||||
query predicate endpoints(
|
||||
DataFlow::Node endpoint, string queryName, string key, string value, string valueType
|
||||
) {
|
||||
ExtractEndpointData::endpoints(endpoint, queryName, key, value, valueType) and
|
||||
// only select endpoints that are either Sink, NotASink or Unknown
|
||||
ExtractEndpointData::endpoints(endpoint, queryName, "sinkLabel", ["Sink", "NotASink", "Unknown"],
|
||||
"string") and
|
||||
// do not select endpoints filtered out by end-to-end evaluation
|
||||
ExtractEndpointData::endpoints(endpoint, queryName, "isExcludedFromEndToEndEvaluation", "false",
|
||||
"boolean")
|
||||
}
|
||||
|
||||
query predicate tokenFeatures(DataFlow::Node endpoint, string featureName, string featureValue) {
|
||||
endpoints(endpoint, _, _, _, _) and
|
||||
ExtractEndpointData::tokenFeatures(endpoint, featureName, featureValue)
|
||||
}
|
||||
@@ -0,0 +1,26 @@
|
||||
/*
|
||||
* For internal use only.
|
||||
*
|
||||
* Extracts training data we can use to train ML models for ML-powered queries.
|
||||
*/
|
||||
|
||||
import javascript
|
||||
import ExtractEndpointData as ExtractEndpointData
|
||||
|
||||
query predicate endpoints(
|
||||
DataFlow::Node endpoint, string queryName, string key, string value, string valueType
|
||||
) {
|
||||
ExtractEndpointData::endpoints(endpoint, queryName, key, value, valueType) and
|
||||
// only select endpoints that are either Sink or NotASink
|
||||
ExtractEndpointData::endpoints(endpoint, queryName, "sinkLabel", ["Sink", "NotASink"], "string") and
|
||||
// do not select endpoints filtered out by end-to-end evaluation
|
||||
ExtractEndpointData::endpoints(endpoint, queryName, "isExcludedFromEndToEndEvaluation", "false",
|
||||
"boolean") and
|
||||
// only select endpoints that can be part of a tainted flow
|
||||
ExtractEndpointData::endpoints(endpoint, queryName, "isConstantExpression", "false", "boolean")
|
||||
}
|
||||
|
||||
query predicate tokenFeatures(DataFlow::Node endpoint, string featureName, string featureValue) {
|
||||
endpoints(endpoint, _, _, _, _) and
|
||||
ExtractEndpointData::tokenFeatures(endpoint, featureName, featureValue)
|
||||
}
|
||||
@@ -0,0 +1,10 @@
|
||||
/**
|
||||
* @name Endpoint types
|
||||
* @description Maps endpoint type encodings to human-readable descriptions.
|
||||
* @kind table
|
||||
*/
|
||||
|
||||
import experimental.adaptivethreatmodeling.EndpointTypes
|
||||
|
||||
from EndpointType type
|
||||
select type.getEncoding() as encoding, type.getDescription() as description order by encoding
|
||||
@@ -0,0 +1,44 @@
|
||||
/*
|
||||
* For internal use only.
|
||||
*
|
||||
* Query for finding misclassified endpoints which we can use to debug ML-powered queries.
|
||||
*/
|
||||
|
||||
import javascript
|
||||
import experimental.adaptivethreatmodeling.AdaptiveThreatModeling
|
||||
import experimental.adaptivethreatmodeling.ATMConfig
|
||||
import experimental.adaptivethreatmodeling.BaseScoring
|
||||
import experimental.adaptivethreatmodeling.EndpointFeatures as EndpointFeatures
|
||||
import experimental.adaptivethreatmodeling.EndpointTypes
|
||||
import semmle.javascript.security.dataflow.NosqlInjectionCustomizations
|
||||
|
||||
/** The positive endpoint type for which you wish to find misclassified examples. */
|
||||
EndpointType getEndpointType() { result instanceof NosqlInjectionSinkType }
|
||||
|
||||
/** Get a positive endpoint. This will be run through the classifier to determine whether it is misclassified. */
|
||||
DataFlow::Node getAPositiveEndpoint() { result instanceof NosqlInjection::Sink }
|
||||
|
||||
/** An ATM configuration to find misclassified endpoints of type `getEndpointType()`. */
|
||||
class ExtractMisclassifiedEndpointsATMConfig extends ATMConfig {
|
||||
ExtractMisclassifiedEndpointsATMConfig() { this = "ExtractMisclassifiedEndpointsATMConfig" }
|
||||
|
||||
override predicate isEffectiveSink(DataFlow::Node sinkCandidate) {
|
||||
sinkCandidate = getAPositiveEndpoint()
|
||||
}
|
||||
|
||||
override EndpointType getASinkEndpointType() { result = getEndpointType() }
|
||||
}
|
||||
|
||||
/** Get an endpoint from `getAPositiveEndpoint()` that is incorrectly excluded from the results. */
|
||||
DataFlow::Node getAMisclassifedEndpoint() {
|
||||
any(ExtractMisclassifiedEndpointsATMConfig config).isEffectiveSink(result) and
|
||||
not any(ScoringResults results).shouldResultBeIncluded(_, result)
|
||||
}
|
||||
|
||||
/** The token features for each misclassified endpoint. */
|
||||
query predicate tokenFeaturesForMisclassifiedEndpoints(
|
||||
DataFlow::Node endpoint, string featureName, string featureValue
|
||||
) {
|
||||
endpoint = getAMisclassifedEndpoint() and
|
||||
EndpointFeatures::tokenFeatures(endpoint, featureName, featureValue)
|
||||
}
|
||||
@@ -0,0 +1,29 @@
|
||||
/*
|
||||
* For internal use only.
|
||||
*
|
||||
* Labels used in training and evaluation data to indicate knowledge about whether an endpoint is a
|
||||
* sink for a particular security query.
|
||||
*/
|
||||
|
||||
newtype TEndpointLabel =
|
||||
TSinkLabel() or
|
||||
TNotASinkLabel() or
|
||||
TUnknownLabel()
|
||||
|
||||
abstract class EndpointLabel extends TEndpointLabel {
|
||||
abstract string getEncoding();
|
||||
|
||||
string toString() { result = getEncoding() }
|
||||
}
|
||||
|
||||
class SinkLabel extends EndpointLabel, TSinkLabel {
|
||||
override string getEncoding() { result = "Sink" }
|
||||
}
|
||||
|
||||
class NotASinkLabel extends EndpointLabel, TNotASinkLabel {
|
||||
override string getEncoding() { result = "NotASink" }
|
||||
}
|
||||
|
||||
class UnknownLabel extends EndpointLabel, TUnknownLabel {
|
||||
override string getEncoding() { result = "Unknown" }
|
||||
}
|
||||
@@ -0,0 +1,16 @@
|
||||
/*
|
||||
* For internal use only.
|
||||
*/
|
||||
|
||||
private import experimental.adaptivethreatmodeling.FeaturizationConfig
|
||||
|
||||
/**
|
||||
* A featurization config that featurizes all endpoints.
|
||||
*
|
||||
* This should only be used in extraction queries and tests.
|
||||
*/
|
||||
class NoRestrictionsFeaturizationConfig extends FeaturizationConfig {
|
||||
NoRestrictionsFeaturizationConfig() { this = "NoRestrictionsFeaturization" }
|
||||
|
||||
override DataFlow::Node getAnEndpointToFeaturize() { any() }
|
||||
}
|
||||
@@ -0,0 +1,33 @@
|
||||
/*
|
||||
* For internal use only.
|
||||
*
|
||||
* Represents the security queries for which we currently have ML-powered versions.
|
||||
*/
|
||||
|
||||
newtype TQuery =
|
||||
TNosqlInjectionQuery() or
|
||||
TSqlInjectionQuery() or
|
||||
TTaintedPathQuery() or
|
||||
TXssQuery()
|
||||
|
||||
abstract class Query extends TQuery {
|
||||
abstract string getName();
|
||||
|
||||
string toString() { result = getName() }
|
||||
}
|
||||
|
||||
class NosqlInjectionQuery extends Query, TNosqlInjectionQuery {
|
||||
override string getName() { result = "NosqlInjection" }
|
||||
}
|
||||
|
||||
class SqlInjectionQuery extends Query, TSqlInjectionQuery {
|
||||
override string getName() { result = "SqlInjection" }
|
||||
}
|
||||
|
||||
class TaintedPathQuery extends Query, TTaintedPathQuery {
|
||||
override string getName() { result = "TaintedPath" }
|
||||
}
|
||||
|
||||
class XssQuery extends Query, TXssQuery {
|
||||
override string getName() { result = "Xss" }
|
||||
}
|
||||
@@ -0,0 +1,9 @@
|
||||
name: codeql/javascript-experimental-atm-model-building
|
||||
version: 0.0.0
|
||||
extractor: javascript
|
||||
library: false
|
||||
groups:
|
||||
- javascript
|
||||
- experimental
|
||||
dependencies:
|
||||
codeql/javascript-experimental-atm-lib: "*"
|
||||
Reference in New Issue
Block a user