mirror of
https://github.com/github/codeql.git
synced 2026-04-29 10:45:15 +02:00
Merge pull request #11263 from github/tiferet/extract-training-data
ATM: Extract training data
This commit is contained in:
@@ -45,6 +45,9 @@ abstract class EndpointCharacteristic extends string {
|
||||
EndpointType endpointClass, boolean isPositiveIndicator, float confidence
|
||||
);
|
||||
|
||||
/** Indicators with confidence at or above this threshold are considered to be high-confidence indicators. */
|
||||
final float getHighConfidenceThreshold() { result = 0.8 }
|
||||
|
||||
// The following are some confidence values that are used in practice by the subclasses. They are defined as named
|
||||
// constants here to make it easier to change them in the future.
|
||||
final float maximalConfidence() { result = 1.0 }
|
||||
|
||||
@@ -11,7 +11,7 @@
|
||||
|
||||
import javascript
|
||||
import experimental.adaptivethreatmodeling.ATMConfig
|
||||
import extraction.ExtractEndpointData
|
||||
import extraction.ExtractEndpointDataTraining
|
||||
|
||||
string getAReasonSinkExcluded(DataFlow::Node sinkCandidate, Query query) {
|
||||
query instanceof NosqlInjectionQuery and
|
||||
@@ -33,7 +33,7 @@ string getDescriptionForAlertCandidate(
|
||||
) {
|
||||
result = "excluded[reason=" + getAReasonSinkExcluded(sinkCandidate, query) + "]"
|
||||
or
|
||||
getAtmCfg(query).isKnownSink(sinkCandidate) and
|
||||
getDataFlowCfg(query).(AtmConfig).isKnownSink(sinkCandidate) and
|
||||
result = "excluded[reason=known-sink]"
|
||||
or
|
||||
not exists(getAReasonSinkExcluded(sinkCandidate, query)) and
|
||||
|
||||
@@ -1,11 +0,0 @@
|
||||
/*
|
||||
* For internal use only.
|
||||
*
|
||||
* Extracts training and evaluation data we can use to train ML models for ML-powered queries.
|
||||
*/
|
||||
|
||||
import ExtractEndpointData as ExtractEndpointData
|
||||
|
||||
query predicate endpoints = ExtractEndpointData::endpoints/5;
|
||||
|
||||
query predicate tokenFeatures = ExtractEndpointData::tokenFeatures/3;
|
||||
@@ -1,215 +0,0 @@
|
||||
/*
|
||||
* For internal use only.
|
||||
*
|
||||
* Library code for training and evaluation data we can use to train ML models for ML-powered
|
||||
* queries.
|
||||
*/
|
||||
|
||||
import javascript
|
||||
import Exclusions as Exclusions
|
||||
import evaluation.EndToEndEvaluation as EndToEndEvaluation
|
||||
import experimental.adaptivethreatmodeling.ATMConfig
|
||||
import experimental.adaptivethreatmodeling.CoreKnowledge as CoreKnowledge
|
||||
import experimental.adaptivethreatmodeling.EndpointFeatures as EndpointFeatures
|
||||
import experimental.adaptivethreatmodeling.EndpointScoring as EndpointScoring
|
||||
import experimental.adaptivethreatmodeling.EndpointTypes
|
||||
import experimental.adaptivethreatmodeling.FilteringReasons
|
||||
import experimental.adaptivethreatmodeling.NosqlInjectionATM as NosqlInjectionAtm
|
||||
|
||||
/** DEPRECATED: Alias for NosqlInjectionAtm */
|
||||
deprecated module NosqlInjectionATM = NosqlInjectionAtm;
|
||||
|
||||
import experimental.adaptivethreatmodeling.SqlInjectionATM as SqlInjectionAtm
|
||||
|
||||
/** DEPRECATED: Alias for SqlInjectionAtm */
|
||||
deprecated module SqlInjectionATM = SqlInjectionAtm;
|
||||
|
||||
import experimental.adaptivethreatmodeling.TaintedPathATM as TaintedPathAtm
|
||||
|
||||
/** DEPRECATED: Alias for TaintedPathAtm */
|
||||
deprecated module TaintedPathATM = TaintedPathAtm;
|
||||
|
||||
import experimental.adaptivethreatmodeling.XssATM as XssAtm
|
||||
|
||||
/** DEPRECATED: Alias for XssAtm */
|
||||
deprecated module XssATM = XssAtm;
|
||||
|
||||
import Labels
|
||||
import NoFeaturizationRestrictionsConfig
|
||||
import Queries
|
||||
|
||||
/** Gets the ATM configuration object for the specified query. */
|
||||
AtmConfig getAtmCfg(Query query) {
|
||||
query instanceof NosqlInjectionQuery and
|
||||
result instanceof NosqlInjectionAtm::NosqlInjectionAtmConfig
|
||||
or
|
||||
query instanceof SqlInjectionQuery and result instanceof SqlInjectionAtm::SqlInjectionAtmConfig
|
||||
or
|
||||
query instanceof TaintedPathQuery and result instanceof TaintedPathAtm::TaintedPathAtmConfig
|
||||
or
|
||||
query instanceof XssQuery and result instanceof XssAtm::DomBasedXssAtmConfig
|
||||
}
|
||||
|
||||
/** DEPRECATED: Alias for getAtmCfg */
|
||||
deprecated ATMConfig getATMCfg(Query query) { result = getAtmCfg(query) }
|
||||
|
||||
/** Gets the ATM data flow configuration for the specified query. */
|
||||
DataFlow::Configuration getDataFlowCfg(Query query) {
|
||||
query instanceof NosqlInjectionQuery and result instanceof NosqlInjectionAtm::Configuration
|
||||
or
|
||||
query instanceof SqlInjectionQuery and result instanceof SqlInjectionAtm::Configuration
|
||||
or
|
||||
query instanceof TaintedPathQuery and result instanceof TaintedPathAtm::Configuration
|
||||
or
|
||||
query instanceof XssQuery and result instanceof XssAtm::Configuration
|
||||
}
|
||||
|
||||
/** Gets a known sink for the specified query. */
|
||||
private DataFlow::Node getASink(Query query) {
|
||||
getAtmCfg(query).isKnownSink(result) and
|
||||
// Only consider the source code for the project being analyzed.
|
||||
exists(result.getFile().getRelativePath())
|
||||
}
|
||||
|
||||
/** Gets a data flow node that is known not to be a sink for the specified query. */
|
||||
private DataFlow::Node getANotASink(NotASinkReason reason) {
|
||||
CoreKnowledge::isOtherModeledArgument(result, reason) and
|
||||
// Some endpoints can be assigned both a `NotASinkReason` and a `LikelyNotASinkReason`. We
|
||||
// consider these endpoints to be `LikelyNotASink`, therefore this line excludes them from the
|
||||
// definition of `NotASink`.
|
||||
not CoreKnowledge::isOtherModeledArgument(result, any(LikelyNotASinkReason t)) and
|
||||
not result = getASink(_) and
|
||||
// Only consider the source code for the project being analyzed.
|
||||
exists(result.getFile().getRelativePath())
|
||||
}
|
||||
|
||||
/**
|
||||
* Gets a data flow node whose label is unknown for the specified query.
|
||||
*
|
||||
* In other words, this is an endpoint that is not `Sink`, `NotASink`, or `LikelyNotASink` for the
|
||||
* specified query.
|
||||
*/
|
||||
private DataFlow::Node getAnUnknown(Query query) {
|
||||
getAtmCfg(query).isEffectiveSink(result) and
|
||||
// Effective sinks should exclude sinks but this is a defensive requirement
|
||||
not result = getASink(query) and
|
||||
// Effective sinks should exclude NotASink but for some queries (e.g. Xss) this is currently not always the case and
|
||||
// so this is a defensive requirement
|
||||
not result = getANotASink(_) and
|
||||
// Only consider the source code for the project being analyzed.
|
||||
exists(result.getFile().getRelativePath())
|
||||
}
|
||||
|
||||
/** Gets the query-specific sink label for the given endpoint, if such a label exists. */
|
||||
private EndpointLabel getSinkLabelForEndpoint(DataFlow::Node endpoint, Query query) {
|
||||
endpoint = getASink(query) and result instanceof SinkLabel
|
||||
or
|
||||
endpoint = getANotASink(_) and result instanceof NotASinkLabel
|
||||
or
|
||||
endpoint = getAnUnknown(query) and result instanceof UnknownLabel
|
||||
}
|
||||
|
||||
/** Gets an endpoint that should be extracted. */
|
||||
DataFlow::Node getAnEndpoint(Query query) { exists(getSinkLabelForEndpoint(result, query)) }
|
||||
|
||||
/**
|
||||
* Endpoints and associated metadata.
|
||||
*
|
||||
* Note that we draw a distinction between _features_, that are provided to the model at training
|
||||
* and query time, and _metadata_, that is only provided to the model at training time.
|
||||
*
|
||||
* Internal: See the design document for
|
||||
* [extensible extraction queries](https://docs.google.com/document/d/1g3ci2Nf1hGMG6ZUP0Y4PqCy_8elcoC_dhBvgTxdAWpg)
|
||||
* for technical information about the design of this predicate.
|
||||
*/
|
||||
predicate endpoints(
|
||||
DataFlow::Node endpoint, string queryName, string key, string value, string valueType
|
||||
) {
|
||||
exists(Query query |
|
||||
// Only provide metadata for labelled endpoints, since we do not extract all endpoints.
|
||||
endpoint = getAnEndpoint(query) and
|
||||
queryName = query.getName() and
|
||||
(
|
||||
// Holds if there is a taint flow path from a known source to the endpoint
|
||||
key = "hasFlowFromSource" and
|
||||
(
|
||||
if FlowFromSource::hasFlowFromSource(endpoint, query)
|
||||
then value = "true"
|
||||
else value = "false"
|
||||
) and
|
||||
valueType = "boolean"
|
||||
or
|
||||
// Constant expressions always evaluate to a constant primitive value. Therefore they can't ever
|
||||
// appear in an alert, making them less interesting training examples.
|
||||
key = "isConstantExpression" and
|
||||
(if endpoint.asExpr() instanceof ConstantExpr then value = "true" else value = "false") and
|
||||
valueType = "boolean"
|
||||
or
|
||||
// Holds if alerts involving the endpoint are excluded from the end-to-end evaluation.
|
||||
key = "isExcludedFromEndToEndEvaluation" and
|
||||
(if Exclusions::isFileExcluded(endpoint.getFile()) then value = "true" else value = "false") and
|
||||
valueType = "boolean"
|
||||
or
|
||||
// The label for this query, considering the endpoint as a sink.
|
||||
key = "sinkLabel" and
|
||||
value = getSinkLabelForEndpoint(endpoint, query).getEncoding() and
|
||||
valueType = "string"
|
||||
or
|
||||
// The reason, or reasons, why the endpoint was labeled NotASink for this query.
|
||||
key = "notASinkReason" and
|
||||
exists(FilteringReason reason |
|
||||
endpoint = getANotASink(reason) and
|
||||
value = reason.getDescription()
|
||||
) and
|
||||
valueType = "string"
|
||||
)
|
||||
)
|
||||
}
|
||||
|
||||
/**
|
||||
* `EndpointFeatures::tokenFeatures` has no results when `featureName` is absent for the endpoint
|
||||
* `endpoint`. To preserve compatibility with the data pipeline, this relation will instead set
|
||||
* `featureValue` to the empty string in this case.
|
||||
*/
|
||||
predicate tokenFeatures(DataFlow::Node endpoint, string featureName, string featureValue) {
|
||||
endpoints(endpoint, _, _, _, _) and
|
||||
(
|
||||
EndpointFeatures::tokenFeatures(endpoint, featureName, featureValue)
|
||||
or
|
||||
// Performance note: this creates a Cartesian product between `endpoint` and `featureName`.
|
||||
featureName = EndpointFeatures::getASupportedFeatureName() and
|
||||
not exists(string value | EndpointFeatures::tokenFeatures(endpoint, featureName, value)) and
|
||||
featureValue = ""
|
||||
)
|
||||
}
|
||||
|
||||
module FlowFromSource {
|
||||
predicate hasFlowFromSource(DataFlow::Node endpoint, Query q) {
|
||||
exists(Configuration cfg | cfg.getQuery() = q | cfg.hasFlow(_, endpoint))
|
||||
}
|
||||
|
||||
/**
|
||||
* A data flow configuration that replicates the data flow configuration for a specific query, but
|
||||
* replaces the set of sinks with the set of endpoints we're extracting.
|
||||
*
|
||||
* We use this to find out when there is flow to a particular endpoint from a known source.
|
||||
*
|
||||
* This configuration behaves in a very similar way to the `ForwardExploringConfiguration` class
|
||||
* from the CodeQL standard libraries for JavaScript.
|
||||
*/
|
||||
private class Configuration extends DataFlow::Configuration {
|
||||
Query q;
|
||||
|
||||
Configuration() { this = getDataFlowCfg(q) }
|
||||
|
||||
Query getQuery() { result = q }
|
||||
|
||||
/** Holds if `sink` is an endpoint we're extracting. */
|
||||
override predicate isSink(DataFlow::Node sink) { sink = getAnEndpoint(q) }
|
||||
|
||||
/** Holds if `sink` is an endpoint we're extracting. */
|
||||
override predicate isSink(DataFlow::Node sink, DataFlow::FlowLabel lbl) {
|
||||
sink = getAnEndpoint(q) and exists(lbl)
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -4,23 +4,8 @@
|
||||
* Extracts training data we can use to train ML models for ML-powered queries.
|
||||
*/
|
||||
|
||||
import javascript
|
||||
import ExtractEndpointData as ExtractEndpointData
|
||||
private import ExtractEndpointDataTraining as ExtractEndpointDataTraining
|
||||
|
||||
query predicate endpoints(
|
||||
DataFlow::Node endpoint, string queryName, string key, string value, string valueType
|
||||
) {
|
||||
ExtractEndpointData::endpoints(endpoint, queryName, key, value, valueType) and
|
||||
// only select endpoints that are either Sink or NotASink
|
||||
ExtractEndpointData::endpoints(endpoint, queryName, "sinkLabel", ["Sink", "NotASink"], "string") and
|
||||
// do not select endpoints filtered out by end-to-end evaluation
|
||||
ExtractEndpointData::endpoints(endpoint, queryName, "isExcludedFromEndToEndEvaluation", "false",
|
||||
"boolean") and
|
||||
// only select endpoints that can be part of a tainted flow
|
||||
ExtractEndpointData::endpoints(endpoint, queryName, "isConstantExpression", "false", "boolean")
|
||||
}
|
||||
query predicate endpoints = ExtractEndpointDataTraining::reformattedTrainingEndpoints/5;
|
||||
|
||||
query predicate tokenFeatures(DataFlow::Node endpoint, string featureName, string featureValue) {
|
||||
endpoints(endpoint, _, _, _, _) and
|
||||
ExtractEndpointData::tokenFeatures(endpoint, featureName, featureValue)
|
||||
}
|
||||
query predicate tokenFeatures = ExtractEndpointDataTraining::tokenFeatures/3;
|
||||
|
||||
@@ -0,0 +1,238 @@
|
||||
/*
|
||||
* For internal use only.
|
||||
*
|
||||
* Extracts training data we can use to train ML models for ML-powered queries.
|
||||
*/
|
||||
|
||||
import javascript
|
||||
import experimental.adaptivethreatmodeling.EndpointCharacteristics
|
||||
import experimental.adaptivethreatmodeling.EndpointFeatures as EndpointFeatures
|
||||
import NoFeaturizationRestrictionsConfig
|
||||
private import Exclusions as Exclusions
|
||||
import Queries
|
||||
import experimental.adaptivethreatmodeling.NosqlInjectionATM as NosqlInjectionAtm
|
||||
import experimental.adaptivethreatmodeling.SqlInjectionATM as SqlInjectionAtm
|
||||
import experimental.adaptivethreatmodeling.TaintedPathATM as TaintedPathAtm
|
||||
import experimental.adaptivethreatmodeling.XssATM as XssAtm
|
||||
|
||||
/**
|
||||
* Gets the set of featureName-featureValue pairs for each endpoint in the training set.
|
||||
*
|
||||
* `EndpointFeatures::tokenFeatures` has no results when `featureName` is absent for the endpoint
|
||||
* `endpoint`. To preserve compatibility with the data pipeline, this relation will instead set
|
||||
* `featureValue` to the empty string in this case.
|
||||
*/
|
||||
predicate tokenFeatures(DataFlow::Node endpoint, string featureName, string featureValue) {
|
||||
trainingEndpoints(endpoint, _, _) and
|
||||
(
|
||||
EndpointFeatures::tokenFeatures(endpoint, featureName, featureValue)
|
||||
or
|
||||
// Performance note: this creates a Cartesian product between `endpoint` and `featureName`.
|
||||
featureName = EndpointFeatures::getASupportedFeatureName() and
|
||||
not exists(string value | EndpointFeatures::tokenFeatures(endpoint, featureName, value)) and
|
||||
featureValue = ""
|
||||
)
|
||||
}
|
||||
|
||||
/**
|
||||
* Holds if the given endpoint should be included in the training set as a sample belonging to endpointClass, and has
|
||||
* the given characteristic. This query uses the endpoint characteristics to select and label endpoints for the training
|
||||
* set, and provides a list of characteristics for each endpoint in the training set, which is used in the modeling
|
||||
* code.
|
||||
*
|
||||
* Params:
|
||||
* endpoint: The endpoint to include / exclude.
|
||||
* endpointClass: The sink type. See the documentation of EndpointType.getEncoding for details about the relationship
|
||||
* between an EndpointType and a class in the classifier.
|
||||
* characteristic: Provides the list of characteristics that apply to the endpoint, which the modeling code currently
|
||||
* uses for type balancing.
|
||||
*
|
||||
* Note: This predicate will produce multiple tuples for endpoints that have multiple characteristics, which we must
|
||||
* then group together into a list of characteristics.
|
||||
*/
|
||||
query predicate trainingEndpoints(
|
||||
DataFlow::Node endpoint, EndpointType endpointClass, EndpointCharacteristic characteristic
|
||||
) {
|
||||
characteristic.getEndpoints(endpoint) and
|
||||
// Only consider the source code for the project being analyzed.
|
||||
exists(endpoint.getFile().getRelativePath()) and
|
||||
// Only select endpoints that can be part of a tainted flow: Constant expressions always evaluate to a constant
|
||||
// primitive value. Therefore they can't ever appear in an alert, making them less interesting training examples.
|
||||
// TODO: Experiment with removing this requirement.
|
||||
not endpoint.asExpr() instanceof ConstantExpr and
|
||||
// Do not select endpoints filtered out by end-to-end evaluation.
|
||||
// TODO: Experiment with removing this requirement.
|
||||
not Exclusions::isFileExcluded(endpoint.getFile()) and
|
||||
// Filter out negative examples that also have a LikelyNotASinkReason, because this is currently done here
|
||||
// https://github.com/github/codeql/blob/387e57546bf7352f7c1cfe781daa1a3799b7063e/javascript/ql/experimental/adaptivethreatmodeling/modelbuilding/extraction/ExtractEndpointData.qll#L77
|
||||
// TODO: Experiment with removing this requirement.
|
||||
not (
|
||||
endpointClass instanceof NegativeType and
|
||||
exists(EndpointCharacteristic c |
|
||||
c.getEndpoints(endpoint) and
|
||||
c instanceof LikelyNotASinkCharacteristic
|
||||
)
|
||||
) and
|
||||
(
|
||||
// If the list of characteristics includes positive indicators with high confidence for this class, select this as a
|
||||
// training sample belonging to the class.
|
||||
exists(EndpointCharacteristic characteristic2, float confidence |
|
||||
characteristic2.getEndpoints(endpoint) and
|
||||
characteristic2.getImplications(endpointClass, true, confidence) and
|
||||
confidence >= characteristic2.getHighConfidenceThreshold()
|
||||
) and
|
||||
(
|
||||
// Temporarily limit this only to positive classes. For negative classes, additionally select only endpoints that
|
||||
// have no high confidence indicators that they are sinks, because this is what was previously done.
|
||||
// TODO: Experiment with removing this requirement, and instead ensuring that an endpoint never has both a high
|
||||
// confidence indicator that it _is_ a sink and a high confidence indicator that it is _not_ a sink.
|
||||
not endpointClass instanceof NegativeType
|
||||
or
|
||||
not exists(EndpointCharacteristic characteristic3, float confidence3, EndpointType posClass |
|
||||
characteristic3.getEndpoints(endpoint) and
|
||||
characteristic3.getImplications(posClass, true, confidence3) and
|
||||
confidence3 >= characteristic3.getHighConfidenceThreshold() and
|
||||
not posClass instanceof NegativeType
|
||||
)
|
||||
)
|
||||
or
|
||||
// If the list of characteristics includes negative indicators with high confidence for all classes other than 0,
|
||||
// select this as a training sample of class 0 (this means we had query-specific characteristics to decide this
|
||||
// endpoint isn't a sink for each of our sink types).
|
||||
endpointClass instanceof NegativeType and
|
||||
forall(EndpointType otherClass | not otherClass instanceof NegativeType |
|
||||
exists(EndpointCharacteristic characteristic2, float confidence |
|
||||
characteristic2.getEndpoints(endpoint) and
|
||||
characteristic2.getImplications(otherClass, false, confidence) and
|
||||
confidence >= characteristic2.getHighConfidenceThreshold()
|
||||
)
|
||||
)
|
||||
)
|
||||
}
|
||||
|
||||
/**
|
||||
* Temporary:
|
||||
* Reformat the training data that was extracted with the new logic to match the format produced by the old predicate.
|
||||
* This is the format expected by the endpoint pipeline.
|
||||
*/
|
||||
query predicate reformattedTrainingEndpoints(
|
||||
DataFlow::Node endpoint, string queryName, string key, string value, string valueType
|
||||
) {
|
||||
trainingEndpoints(endpoint, _, _) and
|
||||
exists(Query query |
|
||||
queryName = query.getName() and
|
||||
// For sinks, only list that sink type, but for non-sinks, list all sink types.
|
||||
(
|
||||
exists(EndpointType endpointClass |
|
||||
endpointClass.getDescription().matches(queryName + "%") and
|
||||
not endpointClass instanceof NegativeType and
|
||||
trainingEndpoints(endpoint, endpointClass, _)
|
||||
)
|
||||
or
|
||||
exists(EndpointType endpointClass |
|
||||
endpointClass instanceof NegativeType and
|
||||
trainingEndpoints(endpoint, endpointClass, _)
|
||||
)
|
||||
) and
|
||||
(
|
||||
// NOTE: We don't use hasFlowFromSource in training, so we could just hardcode it to be false.
|
||||
key = "hasFlowFromSource" and
|
||||
(
|
||||
if FlowFromSource::hasFlowFromSource(endpoint, query)
|
||||
then value = "true"
|
||||
else value = "false"
|
||||
) and
|
||||
valueType = "boolean"
|
||||
or
|
||||
// Constant expressions always evaluate to a constant primitive value. Therefore they can't ever
|
||||
// appear in an alert, making them less interesting training examples.
|
||||
key = "isConstantExpression" and
|
||||
(if endpoint.asExpr() instanceof ConstantExpr then value = "true" else value = "false") and
|
||||
valueType = "boolean"
|
||||
or
|
||||
// Holds if alerts involving the endpoint are excluded from the end-to-end evaluation.
|
||||
key = "isExcludedFromEndToEndEvaluation" and
|
||||
(if Exclusions::isFileExcluded(endpoint.getFile()) then value = "true" else value = "false") and
|
||||
valueType = "boolean"
|
||||
or
|
||||
// The label for this query, considering the endpoint as a sink.
|
||||
key = "sinkLabel" and
|
||||
valueType = "string" and
|
||||
value = "Sink" and
|
||||
exists(EndpointType endpointClass |
|
||||
endpointClass.getDescription().matches(queryName + "%") and
|
||||
not endpointClass instanceof NegativeType and
|
||||
trainingEndpoints(endpoint, endpointClass, _)
|
||||
)
|
||||
or
|
||||
key = "sinkLabel" and
|
||||
valueType = "string" and
|
||||
value = "NotASink" and
|
||||
exists(EndpointType endpointClass |
|
||||
endpointClass instanceof NegativeType and
|
||||
trainingEndpoints(endpoint, endpointClass, _)
|
||||
)
|
||||
or
|
||||
// The reason, or reasons, why the endpoint was labeled NotASink for this query, only for negative examples.
|
||||
key = "notASinkReason" and
|
||||
exists(EndpointCharacteristic characteristic, EndpointType endpointClass |
|
||||
characteristic.getEndpoints(endpoint) and
|
||||
characteristic.getImplications(endpointClass, true, _) and
|
||||
endpointClass instanceof NegativeType and
|
||||
value = characteristic
|
||||
) and
|
||||
// Don't include a notASinkReason for endpoints that are also known sinks.
|
||||
not exists(EndpointCharacteristic characteristic3, float confidence3, EndpointType posClass |
|
||||
characteristic3.getEndpoints(endpoint) and
|
||||
characteristic3.getImplications(posClass, true, confidence3) and
|
||||
confidence3 >= characteristic3.getHighConfidenceThreshold() and
|
||||
not posClass instanceof NegativeType
|
||||
) and
|
||||
valueType = "string"
|
||||
)
|
||||
)
|
||||
}
|
||||
|
||||
/**
|
||||
* Gets the ATM data flow configuration for the specified query.
|
||||
* TODO: Delete this once we are no longer surfacing `hasFlowFromSource`.
|
||||
*/
|
||||
DataFlow::Configuration getDataFlowCfg(Query query) {
|
||||
query instanceof NosqlInjectionQuery and result instanceof NosqlInjectionAtm::Configuration
|
||||
or
|
||||
query instanceof SqlInjectionQuery and result instanceof SqlInjectionAtm::Configuration
|
||||
or
|
||||
query instanceof TaintedPathQuery and result instanceof TaintedPathAtm::Configuration
|
||||
or
|
||||
query instanceof XssQuery and result instanceof XssAtm::Configuration
|
||||
}
|
||||
|
||||
// TODO: Delete this once we are no longer surfacing `hasFlowFromSource`.
|
||||
private module FlowFromSource {
|
||||
predicate hasFlowFromSource(DataFlow::Node endpoint, Query q) {
|
||||
exists(Configuration cfg | cfg.getQuery() = q | cfg.hasFlow(_, endpoint))
|
||||
}
|
||||
|
||||
/**
|
||||
* A data flow configuration that replicates the data flow configuration for a specific query, but
|
||||
* replaces the set of sinks with the set of endpoints we're extracting.
|
||||
*
|
||||
* We use this to find out when there is flow to a particular endpoint from a known source.
|
||||
*
|
||||
* This configuration behaves in a very similar way to the `ForwardExploringConfiguration` class
|
||||
* from the CodeQL standard libraries for JavaScript.
|
||||
*/
|
||||
private class Configuration extends DataFlow::Configuration {
|
||||
Query q;
|
||||
|
||||
Configuration() { this = getDataFlowCfg(q) }
|
||||
|
||||
Query getQuery() { result = q }
|
||||
|
||||
/** Holds if `sink` is an endpoint we're extracting. */
|
||||
override predicate isSink(DataFlow::Node sink) { any() }
|
||||
|
||||
/** Holds if `sink` is an endpoint we're extracting. */
|
||||
override predicate isSink(DataFlow::Node sink, DataFlow::FlowLabel lbl) { exists(lbl) }
|
||||
}
|
||||
}
|
||||
@@ -1 +0,0 @@
|
||||
extraction/ExtractEndpointData.ql
|
||||
@@ -1 +0,0 @@
|
||||
extraction/ExtractEndpointData.ql
|
||||
Reference in New Issue
Block a user