Merge pull request #7800 from github/henrymercer/js-atm-add-model-building-pack

JS: Add model building pack for ML-powered queries
This commit is contained in:
Henry Mercer
2022-02-01 20:51:19 +00:00
committed by GitHub
30 changed files with 993 additions and 0 deletions

View File

@@ -6,6 +6,7 @@
"*/ql/examples/qlpack.yml",
"cpp/ql/test/query-tests/Security/CWE/CWE-190/semmle/tainted/qlpack.yml",
"javascript/ql/experimental/adaptivethreatmodeling/lib/qlpack.yml",
"javascript/ql/experimental/adaptivethreatmodeling/modelbuilding/qlpack.yml",
"javascript/ql/experimental/adaptivethreatmodeling/src/qlpack.yml",
"csharp/ql/campaigns/Solorigate/lib/qlpack.yml",
"csharp/ql/campaigns/Solorigate/src/qlpack.yml",

View File

@@ -0,0 +1,67 @@
/**
* @name Debug result inclusion
* @description Use this query to understand why some alerts are included or excluded from the
* results of boosted queries. The results for this query are the union of the alerts
* generated by each boosted query. Each alert includes an explanation why it was
* included or excluded for each of the four security queries.
* @kind problem
* @problem.severity error
* @id adaptive-threat-modeling/js/debug-result-inclusion
*/
import javascript
import experimental.adaptivethreatmodeling.ATMConfig
import extraction.ExtractEndpointData
string getAReasonSinkExcluded(DataFlow::Node sinkCandidate, Query query) {
query instanceof NosqlInjectionQuery and
result = NosqlInjectionATM::SinkEndpointFilter::getAReasonSinkExcluded(sinkCandidate)
or
query instanceof SqlInjectionQuery and
result = SqlInjectionATM::SinkEndpointFilter::getAReasonSinkExcluded(sinkCandidate)
or
query instanceof TaintedPathQuery and
result = TaintedPathATM::SinkEndpointFilter::getAReasonSinkExcluded(sinkCandidate)
or
query instanceof XssQuery and
result = XssATM::SinkEndpointFilter::getAReasonSinkExcluded(sinkCandidate)
}
pragma[inline]
string getDescriptionForAlertCandidate(
DataFlow::Node sourceCandidate, DataFlow::Node sinkCandidate, Query query
) {
result = "excluded[reason=" + getAReasonSinkExcluded(sinkCandidate, query) + "]"
or
getATMCfg(query).isKnownSink(sinkCandidate) and
result = "excluded[reason=known-sink]"
or
not exists(getAReasonSinkExcluded(sinkCandidate, query)) and
not getDataFlowCfg(query).hasFlow(sourceCandidate, sinkCandidate) and
(
if
getDataFlowCfg(query).isSource(sourceCandidate) or
getDataFlowCfg(query).isSource(sourceCandidate, _)
then result = "no flow"
else result = "not a known source"
)
or
getDataFlowCfg(query).hasFlow(sourceCandidate, sinkCandidate) and
result = "included"
}
pragma[inline]
string getDescriptionForAlert(DataFlow::Node sourceCandidate, DataFlow::Node sinkCandidate) {
result =
concat(Query query |
|
query.getName() + ": " +
getDescriptionForAlertCandidate(sourceCandidate, sinkCandidate, query), ", "
)
}
from DataFlow::Configuration cfg, DataFlow::Node source, DataFlow::Node sink
where cfg.hasFlow(source, sink)
select sink,
"This is an ATM result that may depend on $@ [" + getDescriptionForAlert(source, sink) + "]",
source, "a user-provided value"

View File

@@ -0,0 +1,11 @@
private import javascript
private import extraction.Exclusions as Exclusions
/**
* Holds if the flow from `source` to `sink` should be excluded from the results of an end-to-end
* evaluation query.
*/
pragma[inline]
predicate isFlowExcluded(DataFlow::Node source, DataFlow::Node sink) {
Exclusions::isFileExcluded([source.getFile(), sink.getFile()])
}

View File

@@ -0,0 +1,27 @@
/**
* EndpointScoresIntegrationTest.ql
*
* Extract scores for each test endpoint that is an argument to a function call in the database.
* This is used by integration tests to verify that QL and the modeling codebase agree on the scores
* of a set of test endpoints.
*/
import javascript
import experimental.adaptivethreatmodeling.ATMConfig
import experimental.adaptivethreatmodeling.FeaturizationConfig
import experimental.adaptivethreatmodeling.EndpointScoring::ModelScoring as ModelScoring
/**
* A featurization config that featurizes endpoints that are arguments to function calls.
*
* This should only be used in extraction queries and tests.
*/
class FunctionArgumentFeaturizationConfig extends FeaturizationConfig {
FunctionArgumentFeaturizationConfig() { this = "FunctionArgumentFeaturization" }
override DataFlow::Node getAnEndpointToFeaturize() {
exists(DataFlow::CallNode call | result = call.getAnArgument())
}
}
query predicate endpointScores = ModelScoring::endpointScores/3;

View File

@@ -0,0 +1,16 @@
/**
* ModelCheck.ql
*
* Returns checksums of ATM models.
*/
/**
* The `availableMlModels` template predicate.
*
* This is populated by the evaluator with metadata for the available machine learning models.
*/
external predicate availableMlModels(
string modelChecksum, string modelLanguage, string modelName, string modelType
);
select any(string checksum | availableMlModels(checksum, "javascript", _, _))

View File

@@ -0,0 +1,24 @@
/**
* NosqlInjection.ql
*
* Version of the standard NoSQL injection query with an output relation ready to plug into the
* evaluation pipeline.
*/
import semmle.javascript.security.dataflow.NosqlInjection
import EndToEndEvaluation as EndToEndEvaluation
from
DataFlow::Configuration cfg, DataFlow::Node source, DataFlow::Node sink, string filePathSink,
int startLineSink, int endLineSink, int startColumnSink, int endColumnSink, string filePathSource,
int startLineSource, int endLineSource, int startColumnSource, int endColumnSource
where
cfg instanceof NosqlInjection::Configuration and
cfg.hasFlow(source, sink) and
not EndToEndEvaluation::isFlowExcluded(source, sink) and
sink.hasLocationInfo(filePathSink, startLineSink, startColumnSink, endLineSink, endColumnSink) and
source
.hasLocationInfo(filePathSource, startLineSource, startColumnSource, endLineSource,
endColumnSource)
select source, startLineSource, startColumnSource, endLineSource, endColumnSource, filePathSource,
sink, startLineSink, startColumnSink, endLineSink, endColumnSink, filePathSink

View File

@@ -0,0 +1,28 @@
/**
* NosqlInjectionATM.ql
*
* Version of the boosted NoSQL injection query with an output relation ready to plug into the
* evaluation pipeline.
*/
import ATM::ResultsInfo
import EndToEndEvaluation as EndToEndEvaluation
import experimental.adaptivethreatmodeling.NosqlInjectionATM
from
DataFlow::Configuration cfg, DataFlow::Node source, DataFlow::Node sink, string filePathSink,
int startLineSink, int endLineSink, int startColumnSink, int endColumnSink, string filePathSource,
int startLineSource, int endLineSource, int startColumnSource, int endColumnSource, float score
where
cfg.hasFlow(source, sink) and
not EndToEndEvaluation::isFlowExcluded(source, sink) and
not isFlowLikelyInBaseQuery(source, sink) and
sink.hasLocationInfo(filePathSink, startLineSink, startColumnSink, endLineSink, endColumnSink) and
source
.hasLocationInfo(filePathSource, startLineSource, startColumnSource, endLineSource,
endColumnSource) and
getScoreForFlow(source, sink) = score
select source, startLineSource, startColumnSource, endLineSource, endColumnSource, filePathSource,
sink, startLineSink, startColumnSink, endLineSink, endColumnSink, filePathSink, score order by
score desc, startLineSource, startColumnSource, endLineSource, endColumnSource, filePathSource,
startLineSink, startColumnSink, endLineSink, endColumnSink, filePathSink

View File

@@ -0,0 +1,29 @@
/**
* NosqlInjectionATMLite.ql
*
* Arbitrarily ranked version of the boosted NoSQL injection query with an output relation ready to
* plug into the evaluation pipeline. This is useful (a) for evaluating the performance of endpoint
* filters, and (b) as a baseline to compare the model against.
*/
import ATM::ResultsInfo
import EndToEndEvaluation as EndToEndEvaluation
import experimental.adaptivethreatmodeling.NosqlInjectionATM
from
DataFlow::Configuration cfg, DataFlow::Node source, DataFlow::Node sink, string filePathSink,
int startLineSink, int endLineSink, int startColumnSink, int endColumnSink, string filePathSource,
int startLineSource, int endLineSource, int startColumnSource, int endColumnSource, float score
where
cfg.hasFlow(source, sink) and
not EndToEndEvaluation::isFlowExcluded(source, sink) and
not isFlowLikelyInBaseQuery(source, sink) and
sink.hasLocationInfo(filePathSink, startLineSink, startColumnSink, endLineSink, endColumnSink) and
source
.hasLocationInfo(filePathSource, startLineSource, startColumnSource, endLineSource,
endColumnSource) and
score = 0
select source, startLineSource, startColumnSource, endLineSource, endColumnSource, filePathSource,
sink, startLineSink, startColumnSink, endLineSink, endColumnSink, filePathSink, score order by
score desc, startLineSource, startColumnSource, endLineSource, endColumnSource, filePathSource,
startLineSink, startColumnSink, endLineSink, endColumnSink, filePathSink

View File

@@ -0,0 +1,24 @@
/**
* SqlInjection.ql
*
* Version of the standard SQL injection query with an output relation ready to plug into the
* evaluation pipeline.
*/
import semmle.javascript.security.dataflow.SqlInjection
import EndToEndEvaluation as EndToEndEvaluation
from
DataFlow::Configuration cfg, DataFlow::Node source, DataFlow::Node sink, string filePathSink,
int startLineSink, int endLineSink, int startColumnSink, int endColumnSink, string filePathSource,
int startLineSource, int endLineSource, int startColumnSource, int endColumnSource
where
cfg instanceof SqlInjection::Configuration and
cfg.hasFlow(source, sink) and
not EndToEndEvaluation::isFlowExcluded(source, sink) and
sink.hasLocationInfo(filePathSink, startLineSink, startColumnSink, endLineSink, endColumnSink) and
source
.hasLocationInfo(filePathSource, startLineSource, startColumnSource, endLineSource,
endColumnSource)
select source, startLineSource, startColumnSource, endLineSource, endColumnSource, filePathSource,
sink, startLineSink, startColumnSink, endLineSink, endColumnSink, filePathSink

View File

@@ -0,0 +1,28 @@
/**
* SqlInjectionATM.ql
*
* Version of the boosted SQL injection query with an output relation ready to plug into the
* evaluation pipeline.
*/
import ATM::ResultsInfo
import EndToEndEvaluation as EndToEndEvaluation
import experimental.adaptivethreatmodeling.SqlInjectionATM
from
DataFlow::Configuration cfg, DataFlow::Node source, DataFlow::Node sink, string filePathSink,
int startLineSink, int endLineSink, int startColumnSink, int endColumnSink, string filePathSource,
int startLineSource, int endLineSource, int startColumnSource, int endColumnSource, float score
where
cfg.hasFlow(source, sink) and
not EndToEndEvaluation::isFlowExcluded(source, sink) and
not isFlowLikelyInBaseQuery(source, sink) and
sink.hasLocationInfo(filePathSink, startLineSink, startColumnSink, endLineSink, endColumnSink) and
source
.hasLocationInfo(filePathSource, startLineSource, startColumnSource, endLineSource,
endColumnSource) and
getScoreForFlow(source, sink) = score
select source, startLineSource, startColumnSource, endLineSource, endColumnSource, filePathSource,
sink, startLineSink, startColumnSink, endLineSink, endColumnSink, filePathSink, score order by
score desc, startLineSource, startColumnSource, endLineSource, endColumnSource, filePathSource,
startLineSink, startColumnSink, endLineSink, endColumnSink, filePathSink

View File

@@ -0,0 +1,29 @@
/**
* SqlInjectionATMLite.ql
*
* Arbitrarily ranked version of the boosted SQL injection query with an output relation ready to
* plug into the evaluation pipeline. This is useful (a) for evaluating the performance of endpoint
* filters, and (b) as a baseline to compare the model against.
*/
import ATM::ResultsInfo
import EndToEndEvaluation as EndToEndEvaluation
import experimental.adaptivethreatmodeling.SqlInjectionATM
from
DataFlow::Configuration cfg, DataFlow::Node source, DataFlow::Node sink, string filePathSink,
int startLineSink, int endLineSink, int startColumnSink, int endColumnSink, string filePathSource,
int startLineSource, int endLineSource, int startColumnSource, int endColumnSource, float score
where
cfg.hasFlow(source, sink) and
not EndToEndEvaluation::isFlowExcluded(source, sink) and
not isFlowLikelyInBaseQuery(source, sink) and
sink.hasLocationInfo(filePathSink, startLineSink, startColumnSink, endLineSink, endColumnSink) and
source
.hasLocationInfo(filePathSource, startLineSource, startColumnSource, endLineSource,
endColumnSource) and
score = 0
select source, startLineSource, startColumnSource, endLineSource, endColumnSource, filePathSource,
sink, startLineSink, startColumnSink, endLineSink, endColumnSink, filePathSink, score order by
score desc, startLineSource, startColumnSource, endLineSource, endColumnSource, filePathSource,
startLineSink, startColumnSink, endLineSink, endColumnSink, filePathSink

View File

@@ -0,0 +1,24 @@
/**
* TaintedPath.ql
*
* Version of the standard path injection query with an output relation ready to plug into the
* evaluation pipeline.
*/
import semmle.javascript.security.dataflow.TaintedPath
import EndToEndEvaluation as EndToEndEvaluation
from
DataFlow::Configuration cfg, DataFlow::Node source, DataFlow::Node sink, string filePathSink,
int startLineSink, int endLineSink, int startColumnSink, int endColumnSink, string filePathSource,
int startLineSource, int endLineSource, int startColumnSource, int endColumnSource
where
cfg instanceof TaintedPath::Configuration and
cfg.hasFlow(source, sink) and
not EndToEndEvaluation::isFlowExcluded(source, sink) and
sink.hasLocationInfo(filePathSink, startLineSink, startColumnSink, endLineSink, endColumnSink) and
source
.hasLocationInfo(filePathSource, startLineSource, startColumnSource, endLineSource,
endColumnSource)
select source, startLineSource, startColumnSource, endLineSource, endColumnSource, filePathSource,
sink, startLineSink, startColumnSink, endLineSink, endColumnSink, filePathSink

View File

@@ -0,0 +1,28 @@
/**
* TaintedPathATM.ql
*
* Version of the boosted path injection query with an output relation ready to plug into the
* evaluation pipeline.
*/
import ATM::ResultsInfo
import EndToEndEvaluation as EndToEndEvaluation
import experimental.adaptivethreatmodeling.TaintedPathATM
from
DataFlow::Configuration cfg, DataFlow::Node source, DataFlow::Node sink, string filePathSink,
int startLineSink, int endLineSink, int startColumnSink, int endColumnSink, string filePathSource,
int startLineSource, int endLineSource, int startColumnSource, int endColumnSource, float score
where
cfg.hasFlow(source, sink) and
not EndToEndEvaluation::isFlowExcluded(source, sink) and
not isFlowLikelyInBaseQuery(source, sink) and
sink.hasLocationInfo(filePathSink, startLineSink, startColumnSink, endLineSink, endColumnSink) and
source
.hasLocationInfo(filePathSource, startLineSource, startColumnSource, endLineSource,
endColumnSource) and
getScoreForFlow(source, sink) = score
select source, startLineSource, startColumnSource, endLineSource, endColumnSource, filePathSource,
sink, startLineSink, startColumnSink, endLineSink, endColumnSink, filePathSink, score order by
score desc, startLineSource, startColumnSource, endLineSource, endColumnSource, filePathSource,
startLineSink, startColumnSink, endLineSink, endColumnSink, filePathSink

View File

@@ -0,0 +1,29 @@
/**
* TaintedPathATMLite.ql
*
* Arbitrarily ranked version of the boosted path injection query with an output relation ready to
* plug into the evaluation pipeline. This is useful (a) for evaluating the performance of endpoint
* filters, and (b) as a baseline to compare the model against.
*/
import ATM::ResultsInfo
import EndToEndEvaluation as EndToEndEvaluation
import experimental.adaptivethreatmodeling.TaintedPathATM
from
DataFlow::Configuration cfg, DataFlow::Node source, DataFlow::Node sink, string filePathSink,
int startLineSink, int endLineSink, int startColumnSink, int endColumnSink, string filePathSource,
int startLineSource, int endLineSource, int startColumnSource, int endColumnSource, float score
where
cfg.hasFlow(source, sink) and
not EndToEndEvaluation::isFlowExcluded(source, sink) and
not isFlowLikelyInBaseQuery(source, sink) and
sink.hasLocationInfo(filePathSink, startLineSink, startColumnSink, endLineSink, endColumnSink) and
source
.hasLocationInfo(filePathSource, startLineSource, startColumnSource, endLineSource,
endColumnSource) and
score = 0
select source, startLineSource, startColumnSource, endLineSource, endColumnSource, filePathSource,
sink, startLineSink, startColumnSink, endLineSink, endColumnSink, filePathSink, score order by
score desc, startLineSource, startColumnSource, endLineSource, endColumnSource, filePathSource,
startLineSink, startColumnSink, endLineSink, endColumnSink, filePathSink

View File

@@ -0,0 +1,24 @@
/**
* Xss.ql
*
* Version of the standard XSS query with an output relation ready to plug into the evaluation
* pipeline.
*/
import semmle.javascript.security.dataflow.DomBasedXss
import EndToEndEvaluation as EndToEndEvaluation
from
DataFlow::Configuration cfg, DataFlow::Node source, DataFlow::Node sink, string filePathSink,
int startLineSink, int endLineSink, int startColumnSink, int endColumnSink, string filePathSource,
int startLineSource, int endLineSource, int startColumnSource, int endColumnSource
where
cfg instanceof DomBasedXss::Configuration and
cfg.hasFlow(source, sink) and
not EndToEndEvaluation::isFlowExcluded(source, sink) and
sink.hasLocationInfo(filePathSink, startLineSink, startColumnSink, endLineSink, endColumnSink) and
source
.hasLocationInfo(filePathSource, startLineSource, startColumnSource, endLineSource,
endColumnSource)
select source, startLineSource, startColumnSource, endLineSource, endColumnSource, filePathSource,
sink, startLineSink, startColumnSink, endLineSink, endColumnSink, filePathSink

View File

@@ -0,0 +1,29 @@
/**
* XssATM.ql
*
* Version of the boosted XSS query with an output relation ready to plug into the evaluation
* pipeline.
*/
import javascript
import ATM::ResultsInfo
import EndToEndEvaluation as EndToEndEvaluation
import experimental.adaptivethreatmodeling.XssATM
from
DataFlow::Configuration cfg, DataFlow::Node source, DataFlow::Node sink, string filePathSink,
int startLineSink, int endLineSink, int startColumnSink, int endColumnSink, string filePathSource,
int startLineSource, int endLineSource, int startColumnSource, int endColumnSource, float score
where
cfg.hasFlow(source, sink) and
not EndToEndEvaluation::isFlowExcluded(source, sink) and
not isFlowLikelyInBaseQuery(source, sink) and
sink.hasLocationInfo(filePathSink, startLineSink, startColumnSink, endLineSink, endColumnSink) and
source
.hasLocationInfo(filePathSource, startLineSource, startColumnSource, endLineSource,
endColumnSource) and
getScoreForFlow(source, sink) = score
select source, startLineSource, startColumnSource, endLineSource, endColumnSource, filePathSource,
sink, startLineSink, startColumnSink, endLineSink, endColumnSink, filePathSink, score order by
score desc, startLineSource, startColumnSource, endLineSource, endColumnSource, filePathSource,
startLineSink, startColumnSink, endLineSink, endColumnSink, filePathSink

View File

@@ -0,0 +1,30 @@
/**
* XssATMLite.ql
*
* Arbitrarily ranked version of the boosted XSS query with an output relation ready to plug into
* the evaluation pipeline. This is useful (a) for evaluating the performance of endpoint filters,
* and (b) as a baseline to compare the model against.
*/
import javascript
import ATM::ResultsInfo
import EndToEndEvaluation as EndToEndEvaluation
import experimental.adaptivethreatmodeling.XssATM
from
DataFlow::Configuration cfg, DataFlow::Node source, DataFlow::Node sink, string filePathSink,
int startLineSink, int endLineSink, int startColumnSink, int endColumnSink, string filePathSource,
int startLineSource, int endLineSource, int startColumnSource, int endColumnSource, float score
where
cfg.hasFlow(source, sink) and
not EndToEndEvaluation::isFlowExcluded(source, sink) and
not isFlowLikelyInBaseQuery(source, sink) and
sink.hasLocationInfo(filePathSink, startLineSink, startColumnSink, endLineSink, endColumnSink) and
source
.hasLocationInfo(filePathSource, startLineSource, startColumnSource, endLineSource,
endColumnSource) and
score = 0
select source, startLineSource, startColumnSource, endLineSource, endColumnSource, filePathSource,
sink, startLineSink, startColumnSink, endLineSink, endColumnSink, filePathSink, score order by
score desc, startLineSource, startColumnSource, endLineSource, endColumnSource, filePathSource,
startLineSink, startColumnSink, endLineSink, endColumnSink, filePathSink

View File

@@ -0,0 +1,26 @@
/*
* For internal use only.
*
* [DEPRECATED] Counts alerts and sinks for JavaScript security queries.
*
* This query is deprecated due to the performance implications of bringing in data flow
* configurations from multiple queries. Instead use `CountSourcesAndSinks.ql` to count sinks for
* JavaScript security queries, and count alerts by running the standard or evaluation queries for
* each security vulnerability.
*/
import semmle.javascript.security.dataflow.NosqlInjection
import semmle.javascript.security.dataflow.SqlInjection
import semmle.javascript.security.dataflow.TaintedPath
import semmle.javascript.security.dataflow.DomBasedXss
int numAlerts(DataFlow::Configuration cfg) {
result = count(DataFlow::Node source, DataFlow::Node sink | cfg.hasFlow(source, sink))
}
select numAlerts(any(NosqlInjection::Configuration cfg)) as numNosqlAlerts,
numAlerts(any(SqlInjection::Configuration cfg)) as numSqlAlerts,
numAlerts(any(TaintedPath::Configuration cfg)) as numTaintedPathAlerts,
numAlerts(any(DomBasedXss::Configuration cfg)) as numXssAlerts,
count(NosqlInjection::Sink sink) as numNosqlSinks, count(SqlInjection::Sink sink) as numSqlSinks,
count(TaintedPath::Sink sink) as numTaintedPathSinks, count(DomBasedXss::Sink sink) as numXssSinks

View File

@@ -0,0 +1,72 @@
/*
* For internal use only.
*
* Counts sources and sinks for JavaScript security queries.
*/
import javascript
import semmle.javascript.dataflow.Configuration
// javascript/ql/lib/semmle/javascript/security/dataflow$ ls *Query.qll | sed -e 's/\(.*\)Query.qll/import semmle.javascript.security.dataflow.\1Query as \1/'
import semmle.javascript.security.dataflow.BrokenCryptoAlgorithmQuery as BrokenCryptoAlgorithm
import semmle.javascript.security.dataflow.BuildArtifactLeakQuery as BuildArtifactLeak
import semmle.javascript.security.dataflow.CleartextLoggingQuery as CleartextLogging
import semmle.javascript.security.dataflow.CleartextStorageQuery as CleartextStorage
import semmle.javascript.security.dataflow.ClientSideUrlRedirectQuery as ClientSideUrlRedirect
import semmle.javascript.security.dataflow.CodeInjectionQuery as CodeInjection
import semmle.javascript.security.dataflow.CommandInjectionQuery as CommandInjection
import semmle.javascript.security.dataflow.ConditionalBypassQuery as ConditionalBypass
import semmle.javascript.security.dataflow.CorsMisconfigurationForCredentialsQuery as CorsMisconfigurationForCredentials
import semmle.javascript.security.dataflow.DeepObjectResourceExhaustionQuery as DeepObjectResourceExhaustion
import semmle.javascript.security.dataflow.DifferentKindsComparisonBypassQuery as DifferentKindsComparisonBypass
import semmle.javascript.security.dataflow.DomBasedXssQuery as DomBasedXss
import semmle.javascript.security.dataflow.ExceptionXssQuery as ExceptionXss
import semmle.javascript.security.dataflow.ExternalAPIUsedWithUntrustedDataQuery as ExternalAPIUsedWithUntrustedData
import semmle.javascript.security.dataflow.FileAccessToHttpQuery as FileAccessToHttp
import semmle.javascript.security.dataflow.HardcodedCredentialsQuery as HardcodedCredentials
import semmle.javascript.security.dataflow.HardcodedDataInterpretedAsCodeQuery as HardcodedDataInterpretedAsCode
import semmle.javascript.security.dataflow.HostHeaderPoisoningInEmailGenerationQuery as HostHeaderPoisoningInEmailGeneration
import semmle.javascript.security.dataflow.HttpToFileAccessQuery as HttpToFileAccess
import semmle.javascript.security.dataflow.ImproperCodeSanitizationQuery as ImproperCodeSanitization
import semmle.javascript.security.dataflow.IncompleteHtmlAttributeSanitizationQuery as IncompleteHtmlAttributeSanitization
import semmle.javascript.security.dataflow.IndirectCommandInjectionQuery as IndirectCommandInjection
import semmle.javascript.security.dataflow.InsecureDownloadQuery as InsecureDownload
import semmle.javascript.security.dataflow.InsecureRandomnessQuery as InsecureRandomness
import semmle.javascript.security.dataflow.InsufficientPasswordHashQuery as InsufficientPasswordHash
import semmle.javascript.security.dataflow.LogInjectionQuery as LogInjection
import semmle.javascript.security.dataflow.LoopBoundInjectionQuery as LoopBoundInjection
import semmle.javascript.security.dataflow.NosqlInjectionQuery as NosqlInjection
import semmle.javascript.security.dataflow.PostMessageStarQuery as PostMessageStar
import semmle.javascript.security.dataflow.PrototypePollutingAssignmentQuery as PrototypePollutingAssignment
import semmle.javascript.security.dataflow.PrototypePollutionQuery as PrototypePollution
import semmle.javascript.security.dataflow.ReflectedXssQuery as ReflectedXss
import semmle.javascript.security.dataflow.RegExpInjectionQuery as RegExpInjection
import semmle.javascript.security.dataflow.RemotePropertyInjectionQuery as RemotePropertyInjection
import semmle.javascript.security.dataflow.RequestForgeryQuery as RequestForgery
import semmle.javascript.security.dataflow.ServerSideUrlRedirectQuery as ServerSideUrlRedirect
import semmle.javascript.security.dataflow.ShellCommandInjectionFromEnvironmentQuery as ShellCommandInjectionFromEnvironment
import semmle.javascript.security.dataflow.SqlInjectionQuery as SqlInjection
import semmle.javascript.security.dataflow.StackTraceExposureQuery as StackTraceExposure
import semmle.javascript.security.dataflow.StoredXssQuery as StoredXss
import semmle.javascript.security.dataflow.TaintedFormatStringQuery as TaintedFormatString
import semmle.javascript.security.dataflow.TaintedPathQuery as TaintedPath
import semmle.javascript.security.dataflow.TemplateObjectInjectionQuery as TemplateObjectInjection
import semmle.javascript.security.dataflow.TypeConfusionThroughParameterTamperingQuery as TypeConfusionThroughParameterTampering
import semmle.javascript.security.dataflow.UnsafeDeserializationQuery as UnsafeDeserialization
import semmle.javascript.security.dataflow.UnsafeDynamicMethodAccessQuery as UnsafeDynamicMethodAccess
import semmle.javascript.security.dataflow.UnsafeHtmlConstructionQuery as UnsafeHtmlConstruction
import semmle.javascript.security.dataflow.UnsafeJQueryPluginQuery as UnsafeJQueryPlugin
import semmle.javascript.security.dataflow.UnsafeShellCommandConstructionQuery as UnsafeShellCommandConstruction
import semmle.javascript.security.dataflow.UnvalidatedDynamicMethodCallQuery as UnvalidatedDynamicMethodCall
import semmle.javascript.security.dataflow.XmlBombQuery as XmlBomb
import semmle.javascript.security.dataflow.XpathInjectionQuery as XpathInjection
import semmle.javascript.security.dataflow.XssThroughDomQuery as XssThroughDom
import semmle.javascript.security.dataflow.XxeQuery as Xxe
import semmle.javascript.security.dataflow.ZipSlipQuery as ZipSlip
DataFlow::Node getASink(Configuration cfg) { cfg.isSink(result) or cfg.isSink(result, _) }
DataFlow::Node getASource(Configuration cfg) { cfg.isSource(result) or cfg.isSource(result, _) }
from Configuration cfg, int sources, int sinks
where count(getASource(cfg)) = sources and count(getASink(cfg)) = sinks
select cfg, sources, sinks

View File

@@ -0,0 +1,49 @@
/*
* For internal use only.
*
* Defines files that should be excluded from the evaluation of ML models.
*/
private import javascript
private import semmle.javascript.filters.ClassifyFiles as ClassifyFiles
/** Holds if the file should be excluded from end-to-end evaluation. */
predicate isFileExcluded(File file) {
// Ignore files that are outside the root folder of the analyzed source location.
//
// If the file doesn't have a relative path, then the source file is located outside the root
// folder of the analyzed source location, meaning that the files are additional files added to
// the database like standard library files that we would like to ignore.
not exists(file.getRelativePath())
or
// Ignore files based on their path.
exists(string ignorePattern, string separator |
ignorePattern =
// Exclude test files
"(tests?|test[_-]?case|" +
// Exclude library files
//
// - The Bower and npm package managers store packages in bower_components and node_modules
// folders respectively.
// - Specific exclusion for end-to-end: `applications/examples/static/epydoc` contains
// library code from Epydoc.
"3rd[_-]?party|bower_components|extern(s|al)?|node_modules|resources|third[_-]?party|_?vendor|"
+ "applications" + separator + "examples" + separator + "static" + separator + "epydoc|" +
// Exclude generated code
"gen|\\.?generated|" +
// Exclude benchmarks
"benchmarks?|" +
// Exclude documentation
"docs?|documentation)" and
separator = "(\\/|\\.)" and
exists(
file.getRelativePath()
.toLowerCase()
.regexpFind(separator + ignorePattern + separator + "|" + "^" + ignorePattern + separator +
"|" + separator + ignorePattern + "$", _, _)
)
)
or
// Ignore externs, generated, library, and test files.
ClassifyFiles::classify(file, ["externs", "generated", "library", "test"])
}

View File

@@ -0,0 +1,11 @@
/*
* For internal use only.
*
* Extracts training and evaluation data we can use to train ML models for ML-powered queries.
*/
import ExtractEndpointData as ExtractEndpointData
query predicate endpoints = ExtractEndpointData::endpoints/5;
query predicate tokenFeatures = ExtractEndpointData::tokenFeatures/3;

View File

@@ -0,0 +1,195 @@
/*
* For internal use only.
*
* Library code for training and evaluation data we can use to train ML models for ML-powered
* queries.
*/
import javascript
import Exclusions as Exclusions
import evaluation.EndToEndEvaluation as EndToEndEvaluation
import experimental.adaptivethreatmodeling.ATMConfig
import experimental.adaptivethreatmodeling.CoreKnowledge as CoreKnowledge
import experimental.adaptivethreatmodeling.EndpointFeatures as EndpointFeatures
import experimental.adaptivethreatmodeling.EndpointScoring as EndpointScoring
import experimental.adaptivethreatmodeling.EndpointTypes
import experimental.adaptivethreatmodeling.FilteringReasons
import experimental.adaptivethreatmodeling.NosqlInjectionATM as NosqlInjectionATM
import experimental.adaptivethreatmodeling.SqlInjectionATM as SqlInjectionATM
import experimental.adaptivethreatmodeling.TaintedPathATM as TaintedPathATM
import experimental.adaptivethreatmodeling.XssATM as XssATM
import Labels
import NoFeaturizationRestrictionsConfig
import Queries
/** Gets the ATM configuration object for the specified query. */
ATMConfig getATMCfg(Query query) {
query instanceof NosqlInjectionQuery and
result instanceof NosqlInjectionATM::NosqlInjectionATMConfig
or
query instanceof SqlInjectionQuery and result instanceof SqlInjectionATM::SqlInjectionATMConfig
or
query instanceof TaintedPathQuery and result instanceof TaintedPathATM::TaintedPathATMConfig
or
query instanceof XssQuery and result instanceof XssATM::DomBasedXssATMConfig
}
/** Gets the ATM data flow configuration for the specified query. */
DataFlow::Configuration getDataFlowCfg(Query query) {
query instanceof NosqlInjectionQuery and result instanceof NosqlInjectionATM::Configuration
or
query instanceof SqlInjectionQuery and result instanceof SqlInjectionATM::Configuration
or
query instanceof TaintedPathQuery and result instanceof TaintedPathATM::Configuration
or
query instanceof XssQuery and result instanceof XssATM::Configuration
}
/** Gets a known sink for the specified query. */
private DataFlow::Node getASink(Query query) {
getATMCfg(query).isKnownSink(result) and
// Only consider the source code for the project being analyzed.
exists(result.getFile().getRelativePath())
}
/** Gets a data flow node that is known not to be a sink for the specified query. */
private DataFlow::Node getANotASink(NotASinkReason reason) {
CoreKnowledge::isOtherModeledArgument(result, reason) and
// Some endpoints can be assigned both a `NotASinkReason` and a `LikelyNotASinkReason`. We
// consider these endpoints to be `LikelyNotASink`, therefore this line excludes them from the
// definition of `NotASink`.
not CoreKnowledge::isOtherModeledArgument(result, any(LikelyNotASinkReason t)) and
not result = getASink(_) and
// Only consider the source code for the project being analyzed.
exists(result.getFile().getRelativePath())
}
/**
* Gets a data flow node whose label is unknown for the specified query.
*
* In other words, this is an endpoint that is not `Sink`, `NotASink`, or `LikelyNotASink` for the
* specified query.
*/
private DataFlow::Node getAnUnknown(Query query) {
(
getATMCfg(query).isEffectiveSink(result) or
getATMCfg(query).isEffectiveSinkWithOverridingScore(result, _, _)
) and
not result = getASink(query) and
// Only consider the source code for the project being analyzed.
exists(result.getFile().getRelativePath())
}
/** Gets the query-specific sink label for the given endpoint, if such a label exists. */
private EndpointLabel getSinkLabelForEndpoint(DataFlow::Node endpoint, Query query) {
endpoint = getASink(query) and result instanceof SinkLabel
or
endpoint = getANotASink(_) and result instanceof NotASinkLabel
or
endpoint = getAnUnknown(query) and result instanceof UnknownLabel
}
/** Gets an endpoint that should be extracted. */
DataFlow::Node getAnEndpoint(Query query) { exists(getSinkLabelForEndpoint(result, query)) }
/**
* Endpoints and associated metadata.
*
* Note that we draw a distinction between _features_, that are provided to the model at training
* and query time, and _metadata_, that is only provided to the model at training time.
*
* Internal: See the design document for
* [extensible extraction queries](https://docs.google.com/document/d/1g3ci2Nf1hGMG6ZUP0Y4PqCy_8elcoC_dhBvgTxdAWpg)
* for technical information about the design of this predicate.
*/
predicate endpoints(
DataFlow::Node endpoint, string queryName, string key, string value, string valueType
) {
exists(Query query |
// Only provide metadata for labelled endpoints, since we do not extract all endpoints.
endpoint = getAnEndpoint(query) and
queryName = query.getName() and
(
// Holds if there is a taint flow path from a known source to the endpoint
key = "hasFlowFromSource" and
(
if FlowFromSource::hasFlowFromSource(endpoint, query)
then value = "true"
else value = "false"
) and
valueType = "boolean"
or
// Constant expressions always evaluate to a constant primitive value. Therefore they can't ever
// appear in an alert, making them less interesting training examples.
key = "isConstantExpression" and
(if endpoint.asExpr() instanceof ConstantExpr then value = "true" else value = "false") and
valueType = "boolean"
or
// Holds if alerts involving the endpoint are excluded from the end-to-end evaluation.
key = "isExcludedFromEndToEndEvaluation" and
(if Exclusions::isFileExcluded(endpoint.getFile()) then value = "true" else value = "false") and
valueType = "boolean"
or
// The label for this query, considering the endpoint as a sink.
key = "sinkLabel" and
value = getSinkLabelForEndpoint(endpoint, query).getEncoding() and
valueType = "string"
or
// The reason, or reasons, why the endpoint was labeled NotASink for this query.
key = "notASinkReason" and
exists(FilteringReason reason |
endpoint = getANotASink(reason) and
value = reason.getDescription()
) and
valueType = "string"
)
)
}
/**
* `EndpointFeatures::tokenFeatures` has no results when `featureName` is absent for the endpoint
* `endpoint`. To preserve compatibility with the data pipeline, this relation will instead set
* `featureValue` to the empty string in this case.
*/
predicate tokenFeatures(DataFlow::Node endpoint, string featureName, string featureValue) {
endpoints(endpoint, _, _, _, _) and
(
EndpointFeatures::tokenFeatures(endpoint, featureName, featureValue)
or
// Performance note: this creates a Cartesian product between `endpoint` and `featureName`.
featureName = EndpointFeatures::getASupportedFeatureName() and
not exists(string value | EndpointFeatures::tokenFeatures(endpoint, featureName, value)) and
featureValue = ""
)
}
module FlowFromSource {
predicate hasFlowFromSource(DataFlow::Node endpoint, Query q) {
exists(Configuration cfg | cfg.getQuery() = q | cfg.hasFlow(_, endpoint))
}
/**
* A data flow configuration that replicates the data flow configuration for a specific query, but
* replaces the set of sinks with the set of endpoints we're extracting.
*
* We use this to find out when there is flow to a particular endpoint from a known source.
*
* This configuration behaves in a very similar way to the `ForwardExploringConfiguration` class
* from the CodeQL standard libraries for JavaScript.
*/
private class Configuration extends DataFlow::Configuration {
Query q;
Configuration() { this = getDataFlowCfg(q) }
Query getQuery() { result = q }
/** The sinks are the endpoints we're extracting. */
override predicate isSink(DataFlow::Node sink) { sink = getAnEndpoint(q) }
/** The sinks are the endpoints we're extracting. */
override predicate isSink(DataFlow::Node sink, DataFlow::FlowLabel lbl) {
sink = getAnEndpoint(q)
}
}
}

View File

@@ -0,0 +1,25 @@
/*
* For internal use only.
*
* Extracts evaluation data we can use to evaluate ML models for ML-powered queries.
*/
import javascript
import ExtractEndpointData as ExtractEndpointData
query predicate endpoints(
DataFlow::Node endpoint, string queryName, string key, string value, string valueType
) {
ExtractEndpointData::endpoints(endpoint, queryName, key, value, valueType) and
// only select endpoints that are either Sink, NotASink or Unknown
ExtractEndpointData::endpoints(endpoint, queryName, "sinkLabel", ["Sink", "NotASink", "Unknown"],
"string") and
// do not select endpoints filtered out by end-to-end evaluation
ExtractEndpointData::endpoints(endpoint, queryName, "isExcludedFromEndToEndEvaluation", "false",
"boolean")
}
query predicate tokenFeatures(DataFlow::Node endpoint, string featureName, string featureValue) {
endpoints(endpoint, _, _, _, _) and
ExtractEndpointData::tokenFeatures(endpoint, featureName, featureValue)
}

View File

@@ -0,0 +1,26 @@
/*
* For internal use only.
*
* Extracts training data we can use to train ML models for ML-powered queries.
*/
import javascript
import ExtractEndpointData as ExtractEndpointData
query predicate endpoints(
DataFlow::Node endpoint, string queryName, string key, string value, string valueType
) {
ExtractEndpointData::endpoints(endpoint, queryName, key, value, valueType) and
// only select endpoints that are either Sink or NotASink
ExtractEndpointData::endpoints(endpoint, queryName, "sinkLabel", ["Sink", "NotASink"], "string") and
// do not select endpoints filtered out by end-to-end evaluation
ExtractEndpointData::endpoints(endpoint, queryName, "isExcludedFromEndToEndEvaluation", "false",
"boolean") and
// only select endpoints that can be part of a tainted flow
ExtractEndpointData::endpoints(endpoint, queryName, "isConstantExpression", "false", "boolean")
}
query predicate tokenFeatures(DataFlow::Node endpoint, string featureName, string featureValue) {
endpoints(endpoint, _, _, _, _) and
ExtractEndpointData::tokenFeatures(endpoint, featureName, featureValue)
}

View File

@@ -0,0 +1,10 @@
/**
* @name Endpoint types
* @description Maps endpoint type encodings to human-readable descriptions.
* @kind table
*/
import experimental.adaptivethreatmodeling.EndpointTypes
from EndpointType type
select type.getEncoding() as encoding, type.getDescription() as description order by encoding

View File

@@ -0,0 +1,44 @@
/*
* For internal use only.
*
* Query for finding misclassified endpoints which we can use to debug ML-powered queries.
*/
import javascript
import experimental.adaptivethreatmodeling.AdaptiveThreatModeling
import experimental.adaptivethreatmodeling.ATMConfig
import experimental.adaptivethreatmodeling.BaseScoring
import experimental.adaptivethreatmodeling.EndpointFeatures as EndpointFeatures
import experimental.adaptivethreatmodeling.EndpointTypes
import semmle.javascript.security.dataflow.NosqlInjectionCustomizations
/** The positive endpoint type for which you wish to find misclassified examples. */
EndpointType getEndpointType() { result instanceof NosqlInjectionSinkType }
/** Get a positive endpoint. This will be run through the classifier to determine whether it is misclassified. */
DataFlow::Node getAPositiveEndpoint() { result instanceof NosqlInjection::Sink }
/** An ATM configuration to find misclassified endpoints of type `getEndpointType()`. */
class ExtractMisclassifiedEndpointsATMConfig extends ATMConfig {
ExtractMisclassifiedEndpointsATMConfig() { this = "ExtractMisclassifiedEndpointsATMConfig" }
override predicate isEffectiveSink(DataFlow::Node sinkCandidate) {
sinkCandidate = getAPositiveEndpoint()
}
override EndpointType getASinkEndpointType() { result = getEndpointType() }
}
/** Get an endpoint from `getAPositiveEndpoint()` that is incorrectly excluded from the results. */
DataFlow::Node getAMisclassifedEndpoint() {
any(ExtractMisclassifiedEndpointsATMConfig config).isEffectiveSink(result) and
not any(ScoringResults results).shouldResultBeIncluded(_, result)
}
/** The token features for each misclassified endpoint. */
query predicate tokenFeaturesForMisclassifiedEndpoints(
DataFlow::Node endpoint, string featureName, string featureValue
) {
endpoint = getAMisclassifedEndpoint() and
EndpointFeatures::tokenFeatures(endpoint, featureName, featureValue)
}

View File

@@ -0,0 +1,29 @@
/*
* For internal use only.
*
* Labels used in training and evaluation data to indicate knowledge about whether an endpoint is a
* sink for a particular security query.
*/
newtype TEndpointLabel =
TSinkLabel() or
TNotASinkLabel() or
TUnknownLabel()
abstract class EndpointLabel extends TEndpointLabel {
abstract string getEncoding();
string toString() { result = getEncoding() }
}
class SinkLabel extends EndpointLabel, TSinkLabel {
override string getEncoding() { result = "Sink" }
}
class NotASinkLabel extends EndpointLabel, TNotASinkLabel {
override string getEncoding() { result = "NotASink" }
}
class UnknownLabel extends EndpointLabel, TUnknownLabel {
override string getEncoding() { result = "Unknown" }
}

View File

@@ -0,0 +1,16 @@
/*
* For internal use only.
*/
private import experimental.adaptivethreatmodeling.FeaturizationConfig
/**
* A featurization config that featurizes all endpoints.
*
* This should only be used in extraction queries and tests.
*/
class NoRestrictionsFeaturizationConfig extends FeaturizationConfig {
NoRestrictionsFeaturizationConfig() { this = "NoRestrictionsFeaturization" }
override DataFlow::Node getAnEndpointToFeaturize() { any() }
}

View File

@@ -0,0 +1,33 @@
/*
* For internal use only.
*
* Represents the security queries for which we currently have ML-powered versions.
*/
newtype TQuery =
TNosqlInjectionQuery() or
TSqlInjectionQuery() or
TTaintedPathQuery() or
TXssQuery()
abstract class Query extends TQuery {
abstract string getName();
string toString() { result = getName() }
}
class NosqlInjectionQuery extends Query, TNosqlInjectionQuery {
override string getName() { result = "NosqlInjection" }
}
class SqlInjectionQuery extends Query, TSqlInjectionQuery {
override string getName() { result = "SqlInjection" }
}
class TaintedPathQuery extends Query, TTaintedPathQuery {
override string getName() { result = "TaintedPath" }
}
class XssQuery extends Query, TXssQuery {
override string getName() { result = "Xss" }
}

View File

@@ -0,0 +1,9 @@
name: codeql/javascript-experimental-atm-model-building
version: 0.0.0
extractor: javascript
library: false
groups:
- javascript
- experimental
dependencies:
codeql/javascript-experimental-atm-lib: "*"