mirror of
https://github.com/github/codeql.git
synced 2026-05-03 04:39:29 +02:00
JS: Initial commit of Adaptive Threat Modeling
This commit is contained in:
@@ -0,0 +1,6 @@
|
||||
# [Internal only] Adaptive Threat Modeling for JavaScript
|
||||
|
||||
This directory contains CodeQL libraries and queries that power adaptive threat modeling for JavaScript.
|
||||
All APIs are experimental and may change in the future.
|
||||
|
||||
These queries can only be run by internal users; for external users they will return no results.
|
||||
@@ -0,0 +1,112 @@
|
||||
/*
|
||||
* For internal use only.
|
||||
*
|
||||
* Configures boosting for adaptive threat modeling (ATM).
|
||||
*/
|
||||
|
||||
private import javascript as raw
|
||||
import EndpointTypes
|
||||
|
||||
/**
|
||||
* EXPERIMENTAL. This API may change in the future.
|
||||
*
|
||||
* A configuration class for defining known endpoints and endpoint filters for adaptive threat
|
||||
* modeling (ATM). Each boosted query must define its own extension of this abstract class.
|
||||
*
|
||||
* A configuration defines a set of known sources (`isKnownSource`) and sinks (`isKnownSink`).
|
||||
* It must also define a sink endpoint filter (`isEffectiveSink`) that filters candidate sinks
|
||||
* predicted by the machine learning model to a set of effective sinks.
|
||||
*
|
||||
* To get started with ATM, you can copy-paste an implementation of the relevant predicates from a
|
||||
* `DataFlow::Configuration` or `TaintTracking::Configuration` class for a standard security query.
|
||||
* For example, for SQL injection you can start by defining the `isKnownSource` and `isKnownSink`
|
||||
* predicates in the ATM configuration by copying and pasting the implementations of `isSource` and
|
||||
* `isSink` from `SqlInjection::Configuration`.
|
||||
*
|
||||
* Note that if the security query configuration defines additional edges beyond the standard data
|
||||
* flow edges, such as `NosqlInjection::Configuration`, you may need to replace the definition of
|
||||
* `isAdditionalFlowStep` with a more generalised definition of additional edges. See
|
||||
* `NosqlInjectionATM.qll` for an example of doing this.
|
||||
*/
|
||||
abstract class ATMConfig extends string {
|
||||
bindingset[this]
|
||||
ATMConfig() { any() }
|
||||
|
||||
/**
|
||||
* EXPERIMENTAL. This API may change in the future.
|
||||
*
|
||||
* Holds if `source` is a known source of flow.
|
||||
*/
|
||||
predicate isKnownSource(raw::DataFlow::Node source) { none() }
|
||||
|
||||
/**
|
||||
* EXPERIMENTAL. This API may change in the future.
|
||||
*
|
||||
* Holds if `sink` is a known sink of flow.
|
||||
*/
|
||||
predicate isKnownSink(raw::DataFlow::Node sink) { none() }
|
||||
|
||||
/**
|
||||
* EXPERIMENTAL. This API may change in the future.
|
||||
*
|
||||
* Holds if the candidate source `candidateSource` predicted by the machine learning model should be
|
||||
* an effective source, i.e. one considered as a possible source of flow in the boosted query.
|
||||
*/
|
||||
predicate isEffectiveSource(raw::DataFlow::Node candidateSource) { none() }
|
||||
|
||||
/**
|
||||
* EXPERIMENTAL. This API may change in the future.
|
||||
*
|
||||
* Holds if the candidate sink `candidateSink` predicted by the machine learning model should be
|
||||
* an effective sink, i.e. one considered as a possible sink of flow in the boosted query.
|
||||
*/
|
||||
predicate isEffectiveSink(raw::DataFlow::Node candidateSink) { none() }
|
||||
|
||||
/**
|
||||
* EXPERIMENTAL. This API may change in the future.
|
||||
*
|
||||
* Holds if the candidate sink `candidateSink` predicted by the machine learning model should be
|
||||
* an effective sink that overrides the score provided by the machine learning model with the
|
||||
* score `score` for reason `why`. The effective sinks identified by this predicate MUST be a
|
||||
* subset of those identified by the `isEffectiveSink` predicate.
|
||||
*
|
||||
* For example, in the ATM external API query, we use this method to ensure the ATM external API
|
||||
* query produces the same results as the standard external API query, but assigns flows
|
||||
* involving sinks that are filtered out by the endpoint filters a score of 0.
|
||||
*
|
||||
* This predicate can be phased out once we no longer need to rely on predicates like
|
||||
* `paddedScore` in the ATM CodeQL libraries to add scores to alert messages in a way that works
|
||||
* with lexical sort orders.
|
||||
*/
|
||||
predicate isEffectiveSinkWithOverridingScore(
|
||||
raw::DataFlow::Node candidateSink, float score, string why
|
||||
) {
|
||||
none()
|
||||
}
|
||||
|
||||
/**
|
||||
* EXPERIMENTAL. This API may change in the future.
|
||||
*
|
||||
* Get an endpoint type for the sources of this query. A query may have multiple applicable
|
||||
* endpoint types for its sources.
|
||||
*/
|
||||
EndpointType getASourceEndpointType() { none() }
|
||||
|
||||
/**
|
||||
* EXPERIMENTAL. This API may change in the future.
|
||||
*
|
||||
* Get an endpoint type for the sinks of this query. A query may have multiple applicable
|
||||
* endpoint types for its sinks.
|
||||
*/
|
||||
EndpointType getASinkEndpointType() { none() }
|
||||
|
||||
/**
|
||||
* EXPERIMENTAL. This API may change in the future.
|
||||
*
|
||||
* Specifies the default cut-off value that controls how many alerts are produced.
|
||||
* The cut-off value must be in the range [0,1].
|
||||
* A cut-off value of 0 only produces alerts that are likely true-positives.
|
||||
* A cut-off value of 1 produces all alerts including those that are likely false-positives.
|
||||
*/
|
||||
float getScoreCutoff() { result = 0.0 }
|
||||
}
|
||||
@@ -0,0 +1,125 @@
|
||||
/*
|
||||
* For internal use only.
|
||||
*
|
||||
* Provides information about the results of boosted queries for use in adaptive threat modeling (ATM).
|
||||
*/
|
||||
|
||||
private import javascript as raw
|
||||
private import raw::DataFlow as DataFlow
|
||||
import ATMConfig
|
||||
private import BaseScoring
|
||||
private import EndpointScoring as EndpointScoring
|
||||
|
||||
module ATM {
|
||||
/**
|
||||
* EXPERIMENTAL. This API may change in the future.
|
||||
*
|
||||
* This module contains informational predicates about the results returned by adaptive threat
|
||||
* modeling (ATM).
|
||||
*/
|
||||
module ResultsInfo {
|
||||
/**
|
||||
* Indicates whether the flow from source to sink represents a result with
|
||||
* sufficiently high likelihood of being a true-positive.
|
||||
*/
|
||||
pragma[inline]
|
||||
private predicate shouldResultBeIncluded(DataFlow::Node source, DataFlow::Node sink) {
|
||||
any(ScoringResults results).shouldResultBeIncluded(source, sink)
|
||||
}
|
||||
|
||||
/**
|
||||
* EXPERIMENTAL. This API may change in the future.
|
||||
*
|
||||
* Returns the score for the flow between the source `source` and the `sink` sink in the
|
||||
* boosted query.
|
||||
*/
|
||||
pragma[inline]
|
||||
float getScoreForFlow(DataFlow::Node source, DataFlow::Node sink) {
|
||||
any(DataFlow::Configuration cfg).hasFlow(source, sink) and
|
||||
shouldResultBeIncluded(source, sink) and
|
||||
result = unique(float s | s = any(ScoringResults results).getScoreForFlow(source, sink))
|
||||
}
|
||||
|
||||
/**
|
||||
* Pad a score returned from `getKnownScoreForFlow` to a particular length by adding a decimal
|
||||
* point if one does not already exist, and "0"s after that decimal point.
|
||||
*
|
||||
* Note that this predicate must itself define an upper bound on `length`, so that it has a
|
||||
* finite number of results. Currently this is defined as 12.
|
||||
*/
|
||||
private string paddedScore(float score, int length) {
|
||||
// In this definition, we must restrict the values that `length` and `score` can take on so
|
||||
// that the predicate has a finite number of results.
|
||||
(score = getScoreForFlow(_, _) or score = 0) and
|
||||
length = result.length() and
|
||||
(
|
||||
// We need to make sure the padded score contains a "." so lexically sorting the padded
|
||||
// scores is equivalent to numerically sorting the scores.
|
||||
score.toString().charAt(_) = "." and
|
||||
result = score.toString()
|
||||
or
|
||||
not score.toString().charAt(_) = "." and
|
||||
result = score.toString() + "."
|
||||
)
|
||||
or
|
||||
result = paddedScore(score, length - 1) + "0" and
|
||||
length <= 12
|
||||
}
|
||||
|
||||
/**
|
||||
* EXPERIMENTAL. This API may change in the future.
|
||||
*
|
||||
* Return a string representing the score of the flow between `source` and `sink` in the
|
||||
* boosted query.
|
||||
*
|
||||
* The returned string is a fixed length, such that lexically sorting the strings returned by
|
||||
* this predicate gives the same sort order as numerically sorting the scores of the flows.
|
||||
*/
|
||||
pragma[inline]
|
||||
string getScoreStringForFlow(DataFlow::Node source, DataFlow::Node sink) {
|
||||
exists(float score |
|
||||
score = getScoreForFlow(source, sink) and
|
||||
(
|
||||
// A length of 12 is equivalent to 10 decimal places.
|
||||
score.toString().length() >= 12 and
|
||||
result = score.toString().substring(0, 12)
|
||||
or
|
||||
score.toString().length() < 12 and
|
||||
result = paddedScore(score, 12)
|
||||
)
|
||||
)
|
||||
}
|
||||
|
||||
/**
|
||||
* EXPERIMENTAL. This API may change in the future.
|
||||
*
|
||||
* Indicates whether the flow from source to sink is likely to be reported by the base security
|
||||
* query.
|
||||
*
|
||||
* Currently this is a heuristic: it ignores potential differences in the definitions of
|
||||
* additional flow steps.
|
||||
*/
|
||||
pragma[inline]
|
||||
predicate isFlowLikelyInBaseQuery(DataFlow::Node source, DataFlow::Node sink) {
|
||||
getCfg().isKnownSource(source) and getCfg().isKnownSink(sink)
|
||||
}
|
||||
|
||||
/**
|
||||
* EXPERIMENTAL. This API may change in the future.
|
||||
*
|
||||
* Get additional information about why ATM included the flow from source to sink as an alert.
|
||||
*/
|
||||
pragma[inline]
|
||||
string getAdditionalAlertInfo(DataFlow::Node source, DataFlow::Node sink) {
|
||||
exists(string sourceOrigins, string sinkOrigins |
|
||||
sourceOrigins = concat(any(ScoringResults results).getASourceOrigin(source), ", ") and
|
||||
sinkOrigins = concat(any(ScoringResults results).getASinkOrigin(sink), ", ") and
|
||||
result =
|
||||
"[Source origins: " +
|
||||
any(string s | if sourceOrigins != "" then s = sourceOrigins else s = "unknown") +
|
||||
"; sink origins: " +
|
||||
any(string s | if sinkOrigins != "" then s = sinkOrigins else s = "unknown") + "]"
|
||||
)
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,121 @@
|
||||
/*
|
||||
* For internal use only.
|
||||
*
|
||||
* Provides shared scoring functionality for use in adaptive threat modeling (ATM).
|
||||
*/
|
||||
|
||||
private import javascript
|
||||
private import ATMConfig
|
||||
|
||||
external predicate adaptiveThreatModelingModels(
|
||||
string modelChecksum, string modelLanguage, string modelName, string modelType
|
||||
);
|
||||
|
||||
/** Get the ATM configuration. */
|
||||
ATMConfig getCfg() { any() }
|
||||
|
||||
/**
|
||||
* This module provides functionality that takes an endpoint and provides an entity that encloses that
|
||||
* endpoint and is suitable for similarity analysis.
|
||||
*/
|
||||
module EndpointToEntity {
|
||||
private import CodeToFeatures
|
||||
|
||||
/**
|
||||
* Get an entity enclosing the endpoint that is suitable for similarity analysis. In general,
|
||||
* this may associate multiple entities to a single endpoint.
|
||||
*/
|
||||
DatabaseFeatures::Entity getAnEntityForEndpoint(DataFlow::Node endpoint) {
|
||||
DatabaseFeatures::entities(result, _, _, _, _, _, _, _, _) and
|
||||
result.getDefinedFunction() = endpoint.getContainer().getEnclosingContainer*()
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* This module provides functionality that takes an entity and provides effective endpoints within
|
||||
* that entity.
|
||||
*
|
||||
* We use the following terminology to describe endpoints:
|
||||
*
|
||||
* - The *candidate* endpoints are the set of data flow nodes that should be passed to the
|
||||
* appropriate endpoint filter to produce the set of effective endpoints.
|
||||
* When we have a model that beats the performance of the baseline, we will likely define the
|
||||
* candidate endpoints based on the most confident predictions of the model.
|
||||
* - An *effective* endpoint is a candidate endpoint which passes through the endpoint filter.
|
||||
* In other words, it is a candidate endpoint for which the `isEffectiveSink` (or
|
||||
* `isEffectiveSource`) predicate defined in the `ATMConfig` instance in scope holds.
|
||||
*/
|
||||
module EntityToEffectiveEndpoint {
|
||||
private import CodeToFeatures
|
||||
|
||||
/**
|
||||
* Returns endpoint candidates within the specified entities.
|
||||
*
|
||||
* The baseline implementation of this is that a candidate endpoint is any data flow node that is
|
||||
* enclosed within the specified entity.
|
||||
*/
|
||||
private DataFlow::Node getABaselineEndpointCandidate(DatabaseFeatures::Entity entity) {
|
||||
result.getContainer().getEnclosingContainer*() = entity.getDefinedFunction()
|
||||
}
|
||||
|
||||
/**
|
||||
* Get an effective source enclosed by the specified entity.
|
||||
*
|
||||
* N.B. This is _not_ an inverse of `EndpointToEntity::getAnEntityForEndpoint`: the effective
|
||||
* source may occur in a function defined within the specified entity.
|
||||
*/
|
||||
DataFlow::Node getAnEffectiveSource(DatabaseFeatures::Entity entity) {
|
||||
result = getABaselineEndpointCandidate(entity) and
|
||||
getCfg().isEffectiveSource(result)
|
||||
}
|
||||
|
||||
/**
|
||||
* Get an effective sink enclosed by the specified entity.
|
||||
*
|
||||
* N.B. This is _not_ an inverse of `EndpointToEntity::getAnEntityForEndpoint`: the effective
|
||||
* sink may occur in a function defined within the specified entity.
|
||||
*/
|
||||
DataFlow::Node getAnEffectiveSink(DatabaseFeatures::Entity entity) {
|
||||
result = getABaselineEndpointCandidate(entity) and
|
||||
getCfg().isEffectiveSink(result)
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Scoring information produced by a scoring model.
|
||||
*
|
||||
* Scoring models include embedding models and endpoint scoring models.
|
||||
*/
|
||||
abstract class ScoringResults extends string {
|
||||
bindingset[this]
|
||||
ScoringResults() { any() }
|
||||
|
||||
/**
|
||||
* Get ATM's confidence that a path between `source` and `sink` represents a security
|
||||
* vulnerability. This will be a number between 0.0 and 1.0.
|
||||
*/
|
||||
abstract float getScoreForFlow(DataFlow::Node source, DataFlow::Node sink);
|
||||
|
||||
/**
|
||||
* Get a string representing why ATM included the given source in the dataflow analysis.
|
||||
*
|
||||
* In general, there may be multiple reasons why ATM included the given source, in which case
|
||||
* this predicate should have multiple results.
|
||||
*/
|
||||
abstract string getASourceOrigin(DataFlow::Node source);
|
||||
|
||||
/**
|
||||
* Get a string representing why ATM included the given sink in the dataflow analysis.
|
||||
*
|
||||
* In general, there may be multiple reasons why ATM included the given sink, in which case this
|
||||
* predicate should have multiple results.
|
||||
*/
|
||||
abstract string getASinkOrigin(DataFlow::Node sink);
|
||||
|
||||
/**
|
||||
* Indicates whether the flow from source to sink represents a result with
|
||||
* sufficiently high likelihood of being a true-positive.
|
||||
*/
|
||||
pragma[inline]
|
||||
abstract predicate shouldResultBeIncluded(DataFlow::Node source, DataFlow::Node sink);
|
||||
}
|
||||
@@ -0,0 +1,444 @@
|
||||
/*
|
||||
* For internal use only.
|
||||
*
|
||||
* Extracts data about the functions in the database for use in adaptive threat modeling (ATM).
|
||||
*/
|
||||
|
||||
module Raw {
|
||||
private import javascript as raw
|
||||
|
||||
class RawAstNode = raw::ASTNode;
|
||||
|
||||
class Entity = raw::Function;
|
||||
|
||||
class Location = raw::Location;
|
||||
|
||||
/**
|
||||
* Exposed as a tool for defining anchors for semantic search.
|
||||
*/
|
||||
class UnderlyingFunction = raw::Function;
|
||||
|
||||
/**
|
||||
* Determines whether an entity should be omitted from ATM.
|
||||
*/
|
||||
predicate isEntityIgnored(Entity entity) {
|
||||
// Ignore entities which don't have definitions, for example those in TypeScript
|
||||
// declaration files.
|
||||
not exists(entity.getBody())
|
||||
or
|
||||
// Ignore entities with an empty body, for example the JavaScript function () => {}.
|
||||
entity.getNumBodyStmt() = 0 and not exists(entity.getAReturnedExpr())
|
||||
}
|
||||
|
||||
newtype WrappedAstNode = TAstNode(RawAstNode rawNode)
|
||||
|
||||
/**
|
||||
* This class represents nodes in the AST.
|
||||
*/
|
||||
class AstNode extends TAstNode {
|
||||
RawAstNode rawNode;
|
||||
|
||||
AstNode() { this = TAstNode(rawNode) }
|
||||
|
||||
AstNode getAChildNode() { result = TAstNode(rawNode.getAChild()) }
|
||||
|
||||
AstNode getParentNode() { result = TAstNode(rawNode.getParent()) }
|
||||
|
||||
/**
|
||||
* Holds if the AST node has `result` as its `index`th attribute.
|
||||
*
|
||||
* The index is not intended to mean anything, and is only here for disambiguation.
|
||||
* There are no guarantees about any particular index being used (or not being used).
|
||||
*/
|
||||
string astNodeAttribute(int index) {
|
||||
(
|
||||
// NB: Unary and binary operator expressions e.g. -a, a + b and compound
|
||||
// assignments e.g. a += b can be identified by the expression type.
|
||||
result = rawNode.(raw::Identifier).getName()
|
||||
or
|
||||
// Computed property accesses for which we can predetermine the property being accessed.
|
||||
// NB: May alias with operators e.g. could have '+' as a property name.
|
||||
result = rawNode.(raw::IndexExpr).getPropertyName()
|
||||
or
|
||||
// We use `getRawValue` to give us distinct representations for `0xa`, `0xA`, and `10`.
|
||||
result = rawNode.(raw::NumberLiteral).getRawValue()
|
||||
or
|
||||
// We use `getValue` rather than `getRawValue` so we assign `"a"` and `'a'` the same representation.
|
||||
not rawNode instanceof raw::NumberLiteral and
|
||||
result = rawNode.(raw::Literal).getValue()
|
||||
or
|
||||
result = rawNode.(raw::TemplateElement).getRawValue()
|
||||
) and
|
||||
index = 0
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns a string indicating the "type" of the AST node.
|
||||
*/
|
||||
string astNodeType() {
|
||||
// The definition of this method should correspond with that of the `@ast_node` entry in the
|
||||
// dbscheme.
|
||||
result = "js_exprs." + any(int kind | exprs(rawNode, kind, _, _, _))
|
||||
or
|
||||
result = "js_properties." + any(int kind | properties(rawNode, _, _, kind, _))
|
||||
or
|
||||
result = "js_stmts." + any(int kind | stmts(rawNode, kind, _, _, _))
|
||||
or
|
||||
result = "js_toplevel" and rawNode instanceof raw::TopLevel
|
||||
or
|
||||
result = "js_typeexprs." + any(int kind | typeexprs(rawNode, kind, _, _, _))
|
||||
}
|
||||
|
||||
/**
|
||||
* Holds if `result` is the `index`'th child of the AST node, for some arbitrary indexing.
|
||||
* A root of the AST should be its own child, with an arbitrary (though conventionally
|
||||
* 0) index.
|
||||
*
|
||||
* Notably, the order in which child nodes are visited is not required to be meaningful,
|
||||
* and no particular index is required to be meaningful. However, `(parent, index)`
|
||||
* should be a keyset.
|
||||
*/
|
||||
pragma[nomagic]
|
||||
AstNode astNodeChild(int index) {
|
||||
result =
|
||||
rank[index - 1](AstNode child, raw::Location l |
|
||||
child = this.getAChildNode() and l = child.getLocation()
|
||||
|
|
||||
child
|
||||
order by
|
||||
l.getStartLine(), l.getStartColumn(), l.getEndLine(), l.getEndColumn(),
|
||||
child.astNodeType()
|
||||
)
|
||||
or
|
||||
not exists(result.getParentNode()) and this = result and index = 0
|
||||
}
|
||||
|
||||
raw::Location getLocation() { result = rawNode.getLocation() }
|
||||
|
||||
string toString() { result = rawNode.toString() }
|
||||
|
||||
predicate isEntityNameNode(Entity entity) {
|
||||
exists(int index |
|
||||
TAstNode(entity) = getParentNode() and
|
||||
this = getParentNode().astNodeChild(index) and
|
||||
// An entity name node must be the first child of the entity.
|
||||
index = min(int otherIndex | exists(getParentNode().astNodeChild(otherIndex))) and
|
||||
entity.getName() = rawNode.(raw::VarDecl).getName()
|
||||
)
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Holds if `result` is the `index`'th child of the `parent` entity. Such
|
||||
* a node is a root of an AST associated with this entity.
|
||||
*/
|
||||
AstNode entityChild(AstNode parent, int index) {
|
||||
// In JavaScript, entities appear in the AST parent/child relationship.
|
||||
result = parent.astNodeChild(index)
|
||||
}
|
||||
|
||||
/**
|
||||
* Holds if `node` is contained in `entity`. Note that a single node may be contained
|
||||
* in multiple entities, if they are nested. An entity, in particular, should be
|
||||
* reported as contained within itself.
|
||||
*/
|
||||
predicate entityContains(Entity entity, AstNode node) {
|
||||
node.getParentNode*() = TAstNode(entity) and not node.isEntityNameNode(entity)
|
||||
}
|
||||
|
||||
/**
|
||||
* Get the name of the entity.
|
||||
*
|
||||
* We attempt to assign unnamed entities approximate names if they are passed to a likely
|
||||
* external library function. If we can't assign them an approximate name, we give them the name
|
||||
* `""`, so that these entities are included in `AdaptiveThreatModeling.qll`.
|
||||
*
|
||||
* For entities which have multiple names, we choose the lexically smallest name.
|
||||
*/
|
||||
string getEntityName(Entity entity) {
|
||||
if exists(entity.getName())
|
||||
then
|
||||
// https://github.com/github/ml-ql-adaptive-threat-modeling/issues/244 discusses making use
|
||||
// of all the names during training.
|
||||
result = min(entity.getName())
|
||||
else
|
||||
if exists(getApproximateNameForEntity(entity))
|
||||
then result = getApproximateNameForEntity(entity)
|
||||
else result = ""
|
||||
}
|
||||
|
||||
/**
|
||||
* Holds if the call `call` has `entity` is its `argumentIndex`th argument.
|
||||
*/
|
||||
private predicate entityUsedAsArgumentToCall(
|
||||
Entity entity, raw::DataFlow::CallNode call, int argumentIndex
|
||||
) {
|
||||
raw::DataFlow::localFlowStep*(call.getArgument(argumentIndex), entity.flow())
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns a generated name for the entity. This name is generated such that
|
||||
* entities with the same names have similar behaviour.
|
||||
*/
|
||||
private string getApproximateNameForEntity(Entity entity) {
|
||||
count(raw::DataFlow::CallNode call, int index | entityUsedAsArgumentToCall(entity, call, index)) =
|
||||
1 and
|
||||
exists(raw::DataFlow::CallNode call, int index, string basePart |
|
||||
entityUsedAsArgumentToCall(entity, call, index) and
|
||||
(
|
||||
if count(getReceiverName(call)) = 1
|
||||
then basePart = getReceiverName(call) + "."
|
||||
else basePart = ""
|
||||
) and
|
||||
result = basePart + call.getCalleeName() + "#functionalargument"
|
||||
)
|
||||
}
|
||||
|
||||
private string getReceiverName(raw::DataFlow::CallNode call) {
|
||||
result = call.getReceiver().asExpr().(raw::VarAccess).getName()
|
||||
}
|
||||
|
||||
/** Consistency checks: these predicates should each have no results */
|
||||
module Consistency {
|
||||
/** `getEntityName` should assign each entity a single name. */
|
||||
query predicate entityWithManyNames(Entity entity, string name) {
|
||||
name = getEntityName(entity) and
|
||||
count(getEntityName(entity)) > 1
|
||||
}
|
||||
|
||||
query predicate nodeWithNoType(AstNode node) { not exists(node.astNodeType()) }
|
||||
|
||||
query predicate nodeWithManyTypes(AstNode node, string type) {
|
||||
type = node.astNodeType() and
|
||||
count(node.astNodeType()) > 1
|
||||
}
|
||||
|
||||
query predicate nodeWithNoParent(AstNode node, string type) {
|
||||
not node = any(AstNode parent).astNodeChild(_) and
|
||||
type = node.astNodeType() and
|
||||
not exists(RawAstNode rawNode | node = TAstNode(rawNode) and rawNode instanceof raw::Module)
|
||||
}
|
||||
|
||||
query predicate duplicateChildIndex(AstNode parent, int index, AstNode child) {
|
||||
child = parent.astNodeChild(index) and
|
||||
count(parent.astNodeChild(index)) > 1
|
||||
}
|
||||
|
||||
query predicate duplicateAttributeIndex(AstNode node, int index) {
|
||||
exists(node.astNodeAttribute(index)) and
|
||||
count(node.astNodeAttribute(index)) > 1
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
module Wrapped {
|
||||
/*
|
||||
* We require any node with attributes to be a leaf. Where a non-leaf node
|
||||
* has an attribute, we instead create a synthetic leaf node that has that
|
||||
* attribute.
|
||||
*/
|
||||
|
||||
/**
|
||||
* Holds if the AST node `e` is a leaf node.
|
||||
*/
|
||||
private predicate isLeaf(Raw::AstNode e) { not exists(e.astNodeChild(_)) }
|
||||
|
||||
newtype WrappedEntity =
|
||||
TEntity(Raw::Entity entity) {
|
||||
exists(entity.getLocation().getFile().getRelativePath()) and
|
||||
Raw::entityContains(entity, _)
|
||||
}
|
||||
|
||||
/**
|
||||
* A type ranging over the kinds of entities for which we want to consider embeddings.
|
||||
*/
|
||||
class Entity extends WrappedEntity {
|
||||
Raw::Entity rawEntity;
|
||||
|
||||
Entity() { this = TEntity(rawEntity) and not Raw::isEntityIgnored(rawEntity) }
|
||||
|
||||
string getName() { result = Raw::getEntityName(rawEntity) }
|
||||
|
||||
AstNode getAstRoot(int index) {
|
||||
result = TAstNode(rawEntity, Raw::entityChild(Raw::TAstNode(rawEntity), index))
|
||||
}
|
||||
|
||||
string toString() { result = rawEntity.toString() }
|
||||
|
||||
Raw::Location getLocation() { result = rawEntity.getLocation() }
|
||||
|
||||
Raw::UnderlyingFunction getDefinedFunction() { result = rawEntity }
|
||||
}
|
||||
|
||||
newtype WrappedAstNode =
|
||||
TAstNode(Raw::Entity enclosingEntity, Raw::AstNode node) {
|
||||
Raw::entityContains(enclosingEntity, node)
|
||||
} or
|
||||
TSyntheticNode(
|
||||
Raw::Entity enclosingEntity, Raw::AstNode node, int syntheticChildIndex, int attrIndex
|
||||
) {
|
||||
Raw::entityContains(enclosingEntity, node) and
|
||||
exists(node.astNodeAttribute(attrIndex)) and
|
||||
not isLeaf(node) and
|
||||
if exists(node.astNodeChild(_))
|
||||
then
|
||||
syntheticChildIndex =
|
||||
attrIndex - min(int other | exists(node.astNodeAttribute(other))) +
|
||||
max(int other | exists(node.astNodeChild(other))) + 1
|
||||
else syntheticChildIndex = attrIndex
|
||||
}
|
||||
|
||||
pragma[nomagic]
|
||||
private AstNode injectedChild(Raw::Entity enclosingEntity, Raw::AstNode parent, int index) {
|
||||
result = TAstNode(enclosingEntity, parent.astNodeChild(index)) or
|
||||
result = TSyntheticNode(enclosingEntity, parent, index, _)
|
||||
}
|
||||
|
||||
/**
|
||||
* A type ranging over AST nodes. Ultimately, only nodes contained in entities will
|
||||
* be considered.
|
||||
*/
|
||||
class AstNode extends WrappedAstNode {
|
||||
Raw::Entity enclosingEntity;
|
||||
Raw::AstNode rawNode;
|
||||
|
||||
AstNode() {
|
||||
(
|
||||
this = TAstNode(enclosingEntity, rawNode) or
|
||||
this = TSyntheticNode(enclosingEntity, rawNode, _, _)
|
||||
) and
|
||||
not Raw::isEntityIgnored(enclosingEntity)
|
||||
}
|
||||
|
||||
string getAttribute(int index) {
|
||||
result = rawNode.astNodeAttribute(index) and
|
||||
not exists(TSyntheticNode(enclosingEntity, rawNode, _, index))
|
||||
}
|
||||
|
||||
string getType() { result = rawNode.astNodeType() }
|
||||
|
||||
AstNode getChild(int index) { result = injectedChild(enclosingEntity, rawNode, index) }
|
||||
|
||||
string toString() { result = getType() }
|
||||
|
||||
Raw::Location getLocation() { result = rawNode.getLocation() }
|
||||
}
|
||||
|
||||
/**
|
||||
* A synthetic AST node, created to be a leaf for an otherwise non-leaf attribute.
|
||||
*/
|
||||
class SyntheticAstNode extends AstNode, TSyntheticNode {
|
||||
int childIndex;
|
||||
int attributeIndex;
|
||||
|
||||
SyntheticAstNode() {
|
||||
this = TSyntheticNode(enclosingEntity, rawNode, childIndex, attributeIndex)
|
||||
}
|
||||
|
||||
override string getAttribute(int index) {
|
||||
result = rawNode.astNodeAttribute(attributeIndex) and index = attributeIndex
|
||||
}
|
||||
|
||||
override string getType() {
|
||||
result = rawNode.astNodeType() + "::<synthetic " + childIndex + ">"
|
||||
}
|
||||
|
||||
override AstNode getChild(int index) { none() }
|
||||
}
|
||||
}
|
||||
|
||||
module DatabaseFeatures {
|
||||
/**
|
||||
* Exposed as a tool for defining anchors for semantic search.
|
||||
*/
|
||||
class UnderlyingFunction = Raw::UnderlyingFunction;
|
||||
|
||||
private class Location = Raw::Location;
|
||||
|
||||
private newtype TEntityOrAstNode =
|
||||
TEntity(Wrapped::Entity entity) or
|
||||
TAstNode(Wrapped::AstNode astNode)
|
||||
|
||||
class EntityOrAstNode extends TEntityOrAstNode {
|
||||
abstract string getType();
|
||||
|
||||
abstract string toString();
|
||||
|
||||
abstract Location getLocation();
|
||||
}
|
||||
|
||||
class Entity extends EntityOrAstNode, TEntity {
|
||||
Wrapped::Entity entity;
|
||||
|
||||
Entity() { this = TEntity(entity) }
|
||||
|
||||
string getName() { result = entity.getName() }
|
||||
|
||||
AstNode getAstRoot(int index) { result = TAstNode(entity.getAstRoot(index)) }
|
||||
|
||||
override string getType() { result = "javascript function" }
|
||||
|
||||
override string toString() { result = "Entity: " + getName() }
|
||||
|
||||
override Location getLocation() { result = entity.getLocation() }
|
||||
|
||||
UnderlyingFunction getDefinedFunction() { result = entity.getDefinedFunction() }
|
||||
}
|
||||
|
||||
class AstNode extends EntityOrAstNode, TAstNode {
|
||||
Wrapped::AstNode rawNode;
|
||||
|
||||
AstNode() { this = TAstNode(rawNode) }
|
||||
|
||||
AstNode getChild(int index) { result = TAstNode(rawNode.getChild(index)) }
|
||||
|
||||
string getAttribute(int index) { result = rawNode.getAttribute(index) }
|
||||
|
||||
override string getType() { result = rawNode.getType() }
|
||||
|
||||
override string toString() { result = this.getType() }
|
||||
|
||||
override Location getLocation() { result = rawNode.getLocation() }
|
||||
}
|
||||
|
||||
/** Consistency checks: these predicates should each have no results */
|
||||
module Consistency {
|
||||
query predicate nonLeafAttribute(AstNode node, int index, string attribute) {
|
||||
attribute = node.getAttribute(index) and
|
||||
exists(node.getChild(_))
|
||||
}
|
||||
}
|
||||
|
||||
query predicate entities(
|
||||
Entity entity, string entity_name, string entity_type, string path, int startLine,
|
||||
int startColumn, int endLine, int endColumn, string absolutePath
|
||||
) {
|
||||
entity_name = entity.getName() and
|
||||
entity_type = entity.getType() and
|
||||
exists(Location l | l = entity.getLocation() |
|
||||
path = l.getFile().getRelativePath() and
|
||||
absolutePath = l.getFile().getAbsolutePath() and
|
||||
l.hasLocationInfo(_, startLine, startColumn, endLine, endColumn)
|
||||
)
|
||||
}
|
||||
|
||||
query predicate astNodes(
|
||||
Entity enclosingEntity, EntityOrAstNode parent, int index, AstNode node, string node_type
|
||||
) {
|
||||
node = enclosingEntity.getAstRoot(index) and
|
||||
parent = enclosingEntity and
|
||||
node_type = node.getType()
|
||||
or
|
||||
astNodes(enclosingEntity, _, _, parent, _) and
|
||||
node = parent.(AstNode).getChild(index) and
|
||||
node_type = node.getType()
|
||||
}
|
||||
|
||||
query predicate nodeAttributes(AstNode node, string attr) {
|
||||
// Only get attributes of AST nodes we extract.
|
||||
// This excludes nodes in standard libraries since the standard library files
|
||||
// are located outside the source root.
|
||||
astNodes(_, _, _, node, _) and
|
||||
attr = node.getAttribute(_)
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,208 @@
|
||||
/*
|
||||
* For internal use only.
|
||||
*
|
||||
* Provides predicates that expose the knowledge of models
|
||||
* in the core CodeQL JavaScript libraries.
|
||||
*/
|
||||
|
||||
private import javascript
|
||||
private import semmle.javascript.security.dataflow.XxeCustomizations
|
||||
private import semmle.javascript.security.dataflow.RemotePropertyInjectionCustomizations
|
||||
private import semmle.javascript.security.dataflow.TypeConfusionThroughParameterTamperingCustomizations
|
||||
private import semmle.javascript.security.dataflow.ZipSlipCustomizations
|
||||
private import semmle.javascript.security.dataflow.TaintedPathCustomizations
|
||||
private import semmle.javascript.security.dataflow.CleartextLoggingCustomizations
|
||||
private import semmle.javascript.security.dataflow.XpathInjectionCustomizations
|
||||
private import semmle.javascript.security.dataflow.Xss::Shared as Xss
|
||||
private import semmle.javascript.security.dataflow.StackTraceExposureCustomizations
|
||||
private import semmle.javascript.security.dataflow.ClientSideUrlRedirectCustomizations
|
||||
private import semmle.javascript.security.dataflow.CodeInjectionCustomizations
|
||||
private import semmle.javascript.security.dataflow.RequestForgeryCustomizations
|
||||
private import semmle.javascript.security.dataflow.CorsMisconfigurationForCredentialsCustomizations
|
||||
private import semmle.javascript.security.dataflow.ShellCommandInjectionFromEnvironmentCustomizations
|
||||
private import semmle.javascript.security.dataflow.DifferentKindsComparisonBypassCustomizations
|
||||
private import semmle.javascript.security.dataflow.CommandInjectionCustomizations
|
||||
private import semmle.javascript.security.dataflow.PrototypePollutionCustomizations
|
||||
private import semmle.javascript.security.dataflow.UnvalidatedDynamicMethodCallCustomizations
|
||||
private import semmle.javascript.security.dataflow.TaintedFormatStringCustomizations
|
||||
private import semmle.javascript.security.dataflow.NosqlInjectionCustomizations
|
||||
private import semmle.javascript.security.dataflow.PostMessageStarCustomizations
|
||||
private import semmle.javascript.security.dataflow.RegExpInjectionCustomizations
|
||||
private import semmle.javascript.security.dataflow.SqlInjectionCustomizations
|
||||
private import semmle.javascript.security.dataflow.InsecureRandomnessCustomizations
|
||||
private import semmle.javascript.security.dataflow.XmlBombCustomizations
|
||||
private import semmle.javascript.security.dataflow.InsufficientPasswordHashCustomizations
|
||||
private import semmle.javascript.security.dataflow.HardcodedCredentialsCustomizations
|
||||
private import semmle.javascript.security.dataflow.FileAccessToHttpCustomizations
|
||||
private import semmle.javascript.security.dataflow.UnsafeDynamicMethodAccessCustomizations
|
||||
private import semmle.javascript.security.dataflow.UnsafeDeserializationCustomizations
|
||||
private import semmle.javascript.security.dataflow.HardcodedDataInterpretedAsCodeCustomizations
|
||||
private import semmle.javascript.security.dataflow.ServerSideUrlRedirectCustomizations
|
||||
private import semmle.javascript.security.dataflow.IndirectCommandInjectionCustomizations
|
||||
private import semmle.javascript.security.dataflow.ConditionalBypassCustomizations
|
||||
private import semmle.javascript.security.dataflow.HttpToFileAccessCustomizations
|
||||
private import semmle.javascript.security.dataflow.BrokenCryptoAlgorithmCustomizations
|
||||
private import semmle.javascript.security.dataflow.LoopBoundInjectionCustomizations
|
||||
private import semmle.javascript.security.dataflow.CleartextStorageCustomizations
|
||||
import FilteringReasons
|
||||
|
||||
/**
|
||||
* Holds if the node `n` is a known sink in a modeled library, or a sibling-argument of such a sink.
|
||||
*/
|
||||
predicate isArgumentToKnownLibrarySinkFunction(DataFlow::Node n) {
|
||||
exists(DataFlow::InvokeNode invk, DataFlow::Node known |
|
||||
invk.getAnArgument() = n and invk.getAnArgument() = known and isKnownLibrarySink(known)
|
||||
)
|
||||
}
|
||||
|
||||
/**
|
||||
* Holds if the node `n` is a known sink for the external API security query.
|
||||
*
|
||||
* This corresponds to known sinks from security queries whose sources include remote flow and
|
||||
* DOM-based sources.
|
||||
*/
|
||||
predicate isKnownExternalAPIQuerySink(DataFlow::Node n) {
|
||||
n instanceof Xxe::Sink or
|
||||
n instanceof TaintedPath::Sink or
|
||||
n instanceof XpathInjection::Sink or
|
||||
n instanceof Xss::Sink or
|
||||
n instanceof ClientSideUrlRedirect::Sink or
|
||||
n instanceof CodeInjection::Sink or
|
||||
n instanceof RequestForgery::Sink or
|
||||
n instanceof CorsMisconfigurationForCredentials::Sink or
|
||||
n instanceof CommandInjection::Sink or
|
||||
n instanceof PrototypePollution::Sink or
|
||||
n instanceof UnvalidatedDynamicMethodCall::Sink or
|
||||
n instanceof TaintedFormatString::Sink or
|
||||
n instanceof NosqlInjection::Sink or
|
||||
n instanceof PostMessageStar::Sink or
|
||||
n instanceof RegExpInjection::Sink or
|
||||
n instanceof SqlInjection::Sink or
|
||||
n instanceof XmlBomb::Sink or
|
||||
n instanceof ZipSlip::Sink or
|
||||
n instanceof UnsafeDeserialization::Sink or
|
||||
n instanceof ServerSideUrlRedirect::Sink or
|
||||
n instanceof CleartextStorage::Sink or
|
||||
n instanceof HttpToFileAccess::Sink
|
||||
}
|
||||
|
||||
/**
|
||||
* Holds if the node `n` is a known sink in a modeled library.
|
||||
*/
|
||||
predicate isKnownLibrarySink(DataFlow::Node n) {
|
||||
isKnownExternalAPIQuerySink(n) or
|
||||
n instanceof CleartextLogging::Sink or
|
||||
n instanceof StackTraceExposure::Sink or
|
||||
n instanceof ShellCommandInjectionFromEnvironment::Sink or
|
||||
n instanceof InsecureRandomness::Sink or
|
||||
n instanceof FileAccessToHttp::Sink or
|
||||
n instanceof IndirectCommandInjection::Sink
|
||||
}
|
||||
|
||||
/**
|
||||
* Holds if the node `n` is known as the predecessor in a modeled flow step.
|
||||
*/
|
||||
predicate isKnownStepSrc(DataFlow::Node n) {
|
||||
any(TaintTracking::AdditionalTaintStep s).step(n, _) or
|
||||
any(DataFlow::AdditionalFlowStep s).step(n, _) or
|
||||
any(DataFlow::AdditionalFlowStep s).step(n, _, _, _)
|
||||
}
|
||||
|
||||
/**
|
||||
* Holds if `n` is an argument to a function of a builtin object.
|
||||
*/
|
||||
private predicate isArgumentToBuiltinFunction(DataFlow::Node n, FilteringReason reason) {
|
||||
exists(DataFlow::SourceNode builtin, DataFlow::SourceNode receiver, DataFlow::InvokeNode invk |
|
||||
(
|
||||
builtin instanceof DataFlow::ArrayCreationNode and
|
||||
reason instanceof ArgumentToArrayReason
|
||||
or
|
||||
builtin =
|
||||
DataFlow::globalVarRef([
|
||||
"Map", "Set", "WeakMap", "WeakSet", "Number", "Object", "String", "Array", "Error",
|
||||
"Math", "Boolean"
|
||||
]) and
|
||||
reason instanceof ArgumentToBuiltinGlobalVarRefReason
|
||||
)
|
||||
|
|
||||
receiver = [builtin.getAnInvocation(), builtin] and
|
||||
invk = [receiver, receiver.getAPropertyRead()].getAnInvocation() and
|
||||
invk.getAnArgument() = n
|
||||
)
|
||||
or
|
||||
exists(Expr primitive, MethodCallExpr c |
|
||||
primitive instanceof ConstantString or
|
||||
primitive instanceof NumberLiteral or
|
||||
primitive instanceof BooleanLiteral
|
||||
|
|
||||
c.calls(primitive, _) and
|
||||
c.getAnArgument() = n.asExpr() and
|
||||
reason instanceof ConstantReceiverReason
|
||||
)
|
||||
or
|
||||
exists(DataFlow::CallNode call |
|
||||
call.getAnArgument() = n and
|
||||
call.getCalleeName() =
|
||||
[
|
||||
"indexOf", "hasOwnProperty", "substring", "isDecimal", "decode", "encode", "keys", "shift",
|
||||
"values", "forEach", "toString", "slice", "splice", "push", "isArray", "sort"
|
||||
] and
|
||||
reason instanceof BuiltinCallNameReason
|
||||
)
|
||||
}
|
||||
|
||||
predicate isOtherModeledArgument(DataFlow::Node n, FilteringReason reason) {
|
||||
isArgumentToBuiltinFunction(n, reason)
|
||||
or
|
||||
any(LodashUnderscore::Member m).getACall().getAnArgument() = n and
|
||||
reason instanceof LodashUnderscoreArgumentReason
|
||||
or
|
||||
exists(ClientRequest r |
|
||||
r.getAnArgument() = n or n = r.getUrl() or n = r.getHost() or n = r.getADataNode()
|
||||
) and
|
||||
reason instanceof ClientRequestReason
|
||||
or
|
||||
exists(PromiseDefinition p |
|
||||
n = [p.getResolveParameter(), p.getRejectParameter()].getACall().getAnArgument()
|
||||
) and
|
||||
reason instanceof PromiseDefinitionReason
|
||||
or
|
||||
n instanceof CryptographicKey and reason instanceof CryptographicKeyReason
|
||||
or
|
||||
any(CryptographicOperation op).getInput().flow() = n and
|
||||
reason instanceof CryptographicOperationFlowReason
|
||||
or
|
||||
exists(DataFlow::CallNode call | n = call.getAnArgument() |
|
||||
call.getCalleeName() = getAStandardLoggerMethodName() and
|
||||
reason instanceof LoggerMethodReason
|
||||
or
|
||||
call.getCalleeName() = ["setTimeout", "clearTimeout"] and
|
||||
reason instanceof TimeoutReason
|
||||
or
|
||||
call.getReceiver() = DataFlow::globalVarRef(["localStorage", "sessionStorage"]) and
|
||||
reason instanceof ReceiverStorageReason
|
||||
or
|
||||
call instanceof StringOps::StartsWith and reason instanceof StringStartsWithReason
|
||||
or
|
||||
call instanceof StringOps::EndsWith and reason instanceof StringEndsWithReason
|
||||
or
|
||||
call instanceof StringOps::RegExpTest and reason instanceof StringRegExpTestReason
|
||||
or
|
||||
call instanceof EventRegistration and reason instanceof EventRegistrationReason
|
||||
or
|
||||
call instanceof EventDispatch and reason instanceof EventDispatchReason
|
||||
or
|
||||
call = any(MembershipCandidate c).getTest() and
|
||||
reason instanceof MembershipCandidateTestReason
|
||||
or
|
||||
call instanceof FileSystemAccess and reason instanceof FileSystemAccessReason
|
||||
or
|
||||
call instanceof DatabaseAccess and reason instanceof DatabaseAccessReason
|
||||
or
|
||||
call = DOM::domValueRef() and reason instanceof DOMReason
|
||||
or
|
||||
call.getCalleeName() = "next" and
|
||||
exists(DataFlow::FunctionNode f | call = f.getLastParameter().getACall()) and
|
||||
reason instanceof NextFunctionCallReason
|
||||
)
|
||||
}
|
||||
@@ -0,0 +1,290 @@
|
||||
/*
|
||||
* For internal use only.
|
||||
*
|
||||
* Extracts data about the database for use in adaptive threat modeling (ATM).
|
||||
*/
|
||||
|
||||
import javascript
|
||||
import CodeToFeatures
|
||||
import EndpointScoring
|
||||
|
||||
/**
|
||||
* Gets the value of the token-based feature named `featureName` for the endpoint `endpoint`.
|
||||
*
|
||||
* This is a single string containing a space-separated list of tokens.
|
||||
*/
|
||||
private string getTokenFeature(DataFlow::Node endpoint, string featureName) {
|
||||
// Features for endpoints that are contained within a function.
|
||||
exists(DatabaseFeatures::Entity entity | entity = getRepresentativeEntityForEndpoint(endpoint) |
|
||||
// The name of the function that encloses the endpoint.
|
||||
featureName = "enclosingFunctionName" and result = entity.getName()
|
||||
or
|
||||
// A feature containing natural language tokens from the function that encloses the endpoint in
|
||||
// the order that they appear in the source code.
|
||||
featureName = "enclosingFunctionBody" and
|
||||
result = unique(string x | x = FunctionBodies::getBodyTokenFeatureForEntity(entity))
|
||||
)
|
||||
or
|
||||
exists(getACallBasedTokenFeatureComponent(endpoint, _, featureName)) and
|
||||
result =
|
||||
concat(DataFlow::CallNode call, string component |
|
||||
component = getACallBasedTokenFeatureComponent(endpoint, call, featureName)
|
||||
|
|
||||
component, " "
|
||||
)
|
||||
or
|
||||
// The access path of the function being called, both with and without structural info, if the
|
||||
// function being called originates from an external API. For example, the endpoint here:
|
||||
//
|
||||
// ```js
|
||||
// const mongoose = require('mongoose'),
|
||||
// User = mongoose.model('User', null);
|
||||
// User.findOne(ENDPOINT);
|
||||
// ```
|
||||
//
|
||||
// would have a callee access path with structural info of
|
||||
// `mongoose member model instanceorreturn member findOne instanceorreturn`, and a callee access
|
||||
// path without structural info of `mongoose model findOne`.
|
||||
//
|
||||
// These features indicate that the callee comes from (reading the access path backwards) an
|
||||
// instance of the `findOne` member of an instance of the `model` member of the `mongoose`
|
||||
// external library.
|
||||
exists(AccessPaths::Boolean includeStructuralInfo |
|
||||
featureName =
|
||||
"calleeAccessPath" +
|
||||
any(string x | if includeStructuralInfo = true then x = "WithStructuralInfo" else x = "") and
|
||||
result =
|
||||
concat(API::Node node, string accessPath |
|
||||
node.getInducingNode().(DataFlow::CallNode).getAnArgument() = endpoint and
|
||||
accessPath = AccessPaths::getAccessPath(node, includeStructuralInfo)
|
||||
|
|
||||
accessPath, " "
|
||||
)
|
||||
)
|
||||
}
|
||||
|
||||
/**
|
||||
* Gets a value of the function-call-related token-based feature named `featureName` associated
|
||||
* with the function call `call` and the endpoint `endpoint`.
|
||||
*
|
||||
* This may in general report multiple strings, each containing a space-separated list of tokens.
|
||||
*
|
||||
* **Technical details:** This predicate can have multiple values per endpoint and feature name. As a
|
||||
* result, the results from this predicate must be concatenated together. However concatenating
|
||||
* other features like the function body tokens is expensive, so we separate out this predicate
|
||||
* from others like `FunctionBodies::getBodyTokenFeatureForEntity` to avoid having to perform this
|
||||
* concatenation operation on other features like the function body tokens.
|
||||
*/
|
||||
private string getACallBasedTokenFeatureComponent(
|
||||
DataFlow::Node endpoint, DataFlow::CallNode call, string featureName
|
||||
) {
|
||||
// Features for endpoints that are an argument to a function call.
|
||||
endpoint = call.getAnArgument() and
|
||||
(
|
||||
// The name of the function being called, e.g. in a call `Artist.findOne(...)`, this is `findOne`.
|
||||
featureName = "calleeName" and result = call.getCalleeName()
|
||||
or
|
||||
// The name of the receiver of the call, e.g. in a call `Artist.findOne(...)`, this is `Artist`.
|
||||
featureName = "receiverName" and result = call.getReceiver().asExpr().(VarRef).getName()
|
||||
or
|
||||
// The argument index of the endpoint, e.g. in `f(a, endpoint, b)`, this is 1.
|
||||
featureName = "argumentIndex" and
|
||||
result = any(int argIndex | call.getArgument(argIndex) = endpoint).toString()
|
||||
or
|
||||
// The name of the API that the function being called originates from, if the function being
|
||||
// called originates from an external API. For example, the endpoint here:
|
||||
//
|
||||
// ```js
|
||||
// const mongoose = require('mongoose'),
|
||||
// User = mongoose.model('User', null);
|
||||
// User.findOne(ENDPOINT);
|
||||
// ```
|
||||
//
|
||||
// would have a callee API name of `mongoose`.
|
||||
featureName = "calleeApiName" and
|
||||
result = getAnApiName(call)
|
||||
)
|
||||
}
|
||||
|
||||
/** This module provides functionality for getting the function body feature associated with a particular entity. */
|
||||
module FunctionBodies {
|
||||
/** Holds if `node` is an AST node within the entity `entity` and `token` is a node attribute associated with `node`. */
|
||||
private predicate bodyTokens(
|
||||
DatabaseFeatures::Entity entity, DatabaseFeatures::AstNode node, string token
|
||||
) {
|
||||
DatabaseFeatures::astNodes(entity, _, _, node, _) and
|
||||
token = unique(string t | DatabaseFeatures::nodeAttributes(node, t))
|
||||
}
|
||||
|
||||
/**
|
||||
* Gets the body token feature for the specified entity.
|
||||
*
|
||||
* This is a string containing natural language tokens in the order that they appear in the source code for the entity.
|
||||
*/
|
||||
string getBodyTokenFeatureForEntity(DatabaseFeatures::Entity entity) {
|
||||
// If a function has more than 256 body subtokens, then featurize it as absent. This
|
||||
// approximates the behavior of the classifer on non-generic body features where large body
|
||||
// features are replaced by the absent token.
|
||||
if count(DatabaseFeatures::AstNode node, string token | bodyTokens(entity, node, token)) > 256
|
||||
then result = ""
|
||||
else
|
||||
result =
|
||||
concat(int i, string rankedToken |
|
||||
rankedToken =
|
||||
rank[i](DatabaseFeatures::AstNode node, string token, Location l |
|
||||
bodyTokens(entity, node, token) and l = node.getLocation()
|
||||
|
|
||||
token
|
||||
order by
|
||||
l.getFile().getAbsolutePath(), l.getStartLine(), l.getStartColumn(), l.getEndLine(),
|
||||
l.getEndColumn(), token
|
||||
)
|
||||
|
|
||||
rankedToken, " " order by i
|
||||
)
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns a name of the API that a node originates from, if the node originates from an API.
|
||||
*
|
||||
* This predicate may have multiple results if the node corresponds to multiple nodes in the API graph forest.
|
||||
*/
|
||||
pragma[inline]
|
||||
private string getAnApiName(DataFlow::Node node) {
|
||||
API::moduleImport(result).getASuccessor*().getInducingNode() = node
|
||||
}
|
||||
|
||||
/**
|
||||
* This module provides functionality for getting a representation of the access path of nodes
|
||||
* within the program.
|
||||
*
|
||||
* For example, it gives the `User.find` callee here:
|
||||
*
|
||||
* ```js
|
||||
* const mongoose = require('mongoose'),
|
||||
* User = mongoose.model('User', null);
|
||||
* User.find({ 'isAdmin': true })
|
||||
* ```
|
||||
* the access path `mongoose member model instanceorreturn member find instanceorreturn`.
|
||||
*
|
||||
* This access path is based on the simplified access path that the untrusted data flowing to
|
||||
* external API query associates to each of its sinks, with modifications to optionally include
|
||||
* explicit structural information and to improve how well the path tokenizes.
|
||||
*/
|
||||
private module AccessPaths {
|
||||
bindingset[str]
|
||||
private predicate isNumericString(string str) { exists(str.toInt()) }
|
||||
|
||||
/**
|
||||
* Gets a parameter of `base` with name `name`, or a property named `name` of a destructuring parameter.
|
||||
*/
|
||||
private API::Node getNamedParameter(API::Node base, string name) {
|
||||
exists(API::Node param |
|
||||
param = base.getAParameter() and
|
||||
not param = base.getReceiver()
|
||||
|
|
||||
result = param and
|
||||
name = param.getAnImmediateUse().asExpr().(Parameter).getName()
|
||||
or
|
||||
param.getAnImmediateUse().asExpr() instanceof DestructuringPattern and
|
||||
result = param.getMember(name)
|
||||
)
|
||||
}
|
||||
|
||||
/**
|
||||
* A utility class that is equivalent to `boolean` but does not require type joining.
|
||||
*/
|
||||
class Boolean extends boolean {
|
||||
Boolean() { this = true or this = false }
|
||||
}
|
||||
|
||||
/** Get the access path for the node. This includes structural information like `member`, `param`, and `functionalarg` if `includeStructuralInfo` is true. */
|
||||
string getAccessPath(API::Node node, Boolean includeStructuralInfo) {
|
||||
node = API::moduleImport(result)
|
||||
or
|
||||
exists(API::Node base, string baseName |
|
||||
base.getDepth() < node.getDepth() and baseName = getAccessPath(base, includeStructuralInfo)
|
||||
|
|
||||
// e.g. `new X`, `X()`
|
||||
node = [base.getInstance(), base.getReturn()] and
|
||||
if includeStructuralInfo = true
|
||||
then result = baseName + " instanceorreturn"
|
||||
else result = baseName
|
||||
or
|
||||
// e.g. `x.y`, `x[y]`, `const { y } = x`, where `y` is non-numeric and is known at analysis
|
||||
// time.
|
||||
exists(string member |
|
||||
node = base.getMember(member) and
|
||||
not node = base.getUnknownMember() and
|
||||
not isNumericString(member) and
|
||||
not (member = "default" and base = API::moduleImport(_)) and
|
||||
not member = "then" // use the 'promised' edges for .then callbacks
|
||||
|
|
||||
if includeStructuralInfo = true
|
||||
then result = baseName + " member " + member
|
||||
else result = baseName + " " + member
|
||||
)
|
||||
or
|
||||
// e.g. `x.y`, `x[y]`, `const { y } = x`, where `y` is numeric or not known at analysis time.
|
||||
(
|
||||
node = base.getUnknownMember() or
|
||||
node = base.getMember(any(string s | isNumericString(s)))
|
||||
) and
|
||||
if includeStructuralInfo = true then result = baseName + " member" else result = baseName
|
||||
or
|
||||
// e.g. `x.then(y => ...)`
|
||||
node = base.getPromised() and
|
||||
result = baseName
|
||||
or
|
||||
// e.g. `x.y((a, b) => ...)`
|
||||
// Name callback parameters after their name in the source code.
|
||||
// For example, the `res` parameter in `express.get('/foo', (req, res) => {...})` will be
|
||||
// named `express member get functionalarg param res`.
|
||||
exists(string paramName |
|
||||
node = getNamedParameter(base.getAParameter(), paramName) and
|
||||
(
|
||||
if includeStructuralInfo = true
|
||||
then result = baseName + " functionalarg param " + paramName
|
||||
else result = baseName + " " + paramName
|
||||
)
|
||||
or
|
||||
exists(string callbackName, string index |
|
||||
node =
|
||||
getNamedParameter(base.getASuccessor("param " + index).getMember(callbackName),
|
||||
paramName) and
|
||||
index != "-1" and // ignore receiver
|
||||
if includeStructuralInfo = true
|
||||
then
|
||||
result =
|
||||
baseName + " functionalarg " + index + " " + callbackName + " param " + paramName
|
||||
else result = baseName + " " + index + " " + callbackName + " " + paramName
|
||||
)
|
||||
)
|
||||
)
|
||||
}
|
||||
}
|
||||
|
||||
/** Get a name of a supported generic token-based feature. */
|
||||
private string getASupportedFeatureName() {
|
||||
result =
|
||||
[
|
||||
"enclosingFunctionName", "calleeName", "receiverName", "argumentIndex", "calleeApiName",
|
||||
"calleeAccessPath", "calleeAccessPathWithStructuralInfo", "enclosingFunctionBody"
|
||||
]
|
||||
}
|
||||
|
||||
/**
|
||||
* Generic token-based features for ATM.
|
||||
*
|
||||
* This predicate holds if the generic token-based feature named `featureName` has the value
|
||||
* `featureValue` for the endpoint `endpoint`.
|
||||
*/
|
||||
predicate tokenFeatures(DataFlow::Node endpoint, string featureName, string featureValue) {
|
||||
featureName = getASupportedFeatureName() and
|
||||
(
|
||||
featureValue = unique(string x | x = getTokenFeature(endpoint, featureName))
|
||||
or
|
||||
not exists(unique(string x | x = getTokenFeature(endpoint, featureName))) and featureValue = ""
|
||||
)
|
||||
}
|
||||
@@ -0,0 +1,223 @@
|
||||
/*
|
||||
* For internal use only.
|
||||
*
|
||||
* Provides an implementation of scoring alerts for use in adaptive threat modeling (ATM).
|
||||
*/
|
||||
|
||||
private import javascript
|
||||
import BaseScoring
|
||||
import CodeToFeatures
|
||||
import EndpointFeatures as EndpointFeatures
|
||||
import EndpointTypes
|
||||
|
||||
private string getACompatibleModelChecksum() {
|
||||
adaptiveThreatModelingModels(result, "javascript", _, "atm-endpoint-scoring")
|
||||
}
|
||||
|
||||
/**
|
||||
* The maximum number of AST nodes an entity containing an endpoint should have before we should
|
||||
* choose a smaller entity to represent the endpoint.
|
||||
*
|
||||
* This is intended to represent a balance in terms of the amount of context we provide to the
|
||||
* model: we don't want the function to be too small, because then it doesn't contain very much
|
||||
* context and miss useful information, but also we don't want it to be too large, because then
|
||||
* there's likely to be a lot of irrelevant or very loosely related context.
|
||||
*/
|
||||
private int getMaxNumAstNodes() { result = 1024 }
|
||||
|
||||
/**
|
||||
* Returns the number of AST nodes contained within the specified entity.
|
||||
*/
|
||||
private int getNumAstNodesInEntity(DatabaseFeatures::Entity entity) {
|
||||
// Restrict the values `entity` can take on
|
||||
entity = EndpointToEntity::getAnEntityForEndpoint(_) and
|
||||
result =
|
||||
count(DatabaseFeatures::AstNode astNode | DatabaseFeatures::astNodes(entity, _, _, astNode, _))
|
||||
}
|
||||
|
||||
/**
|
||||
* Get a single entity to use as the representative entity for the endpoint.
|
||||
*
|
||||
* We try to use the largest entity containing the endpoint that's below the AST node limit defined
|
||||
* in `getMaxNumAstNodes`. In the event of a tie, we use the entity that appears first within the
|
||||
* source archive.
|
||||
*
|
||||
* If no entities are smaller than the AST node limit, then we use the smallest entity containing
|
||||
* the endpoint.
|
||||
*/
|
||||
DatabaseFeatures::Entity getRepresentativeEntityForEndpoint(DataFlow::Node endpoint) {
|
||||
// Check whether there's an entity containing the endpoint that's smaller than the AST node limit.
|
||||
if
|
||||
getNumAstNodesInEntity(EndpointToEntity::getAnEntityForEndpoint(endpoint)) <=
|
||||
getMaxNumAstNodes()
|
||||
then
|
||||
// Use the largest entity smaller than the AST node limit, resolving ties using the entity that
|
||||
// appears first in the source archive.
|
||||
result =
|
||||
rank[1](DatabaseFeatures::Entity entity, int numAstNodes, Location l |
|
||||
entity = EndpointToEntity::getAnEntityForEndpoint(endpoint) and
|
||||
numAstNodes = getNumAstNodesInEntity(entity) and
|
||||
numAstNodes <= getMaxNumAstNodes() and
|
||||
l = entity.getLocation()
|
||||
|
|
||||
entity
|
||||
order by
|
||||
numAstNodes desc, l.getStartLine(), l.getStartColumn(), l.getEndLine(), l.getEndColumn()
|
||||
)
|
||||
else
|
||||
// Use the smallest entity, resolving ties using the entity that
|
||||
// appears first in the source archive.
|
||||
result =
|
||||
rank[1](DatabaseFeatures::Entity entity, int numAstNodes, Location l |
|
||||
entity = EndpointToEntity::getAnEntityForEndpoint(endpoint) and
|
||||
numAstNodes = getNumAstNodesInEntity(entity) and
|
||||
l = entity.getLocation()
|
||||
|
|
||||
entity
|
||||
order by
|
||||
numAstNodes, l.getStartLine(), l.getStartColumn(), l.getEndLine(), l.getEndColumn()
|
||||
)
|
||||
}
|
||||
|
||||
module ModelScoring {
|
||||
predicate endpoints(DataFlow::Node endpoint) {
|
||||
getCfg().isEffectiveSource(endpoint) or
|
||||
getCfg().isEffectiveSink(endpoint)
|
||||
}
|
||||
|
||||
private int requestedEndpointTypes() { result = any(EndpointType type).getEncoding() }
|
||||
|
||||
private predicate relevantTokenFeatures(
|
||||
DataFlow::Node endpoint, string featureName, string featureValue
|
||||
) {
|
||||
endpoints(endpoint) and
|
||||
EndpointFeatures::tokenFeatures(endpoint, featureName, featureValue)
|
||||
}
|
||||
|
||||
predicate endpointScores(DataFlow::Node endpoint, int encodedEndpointType, float score) =
|
||||
scoreEndpoints(endpoints/1, requestedEndpointTypes/0, relevantTokenFeatures/3,
|
||||
getACompatibleModelChecksum/0)(endpoint, encodedEndpointType, score)
|
||||
}
|
||||
|
||||
/**
|
||||
* Return ATM's confidence that `source` is a source for the given security query. This will be a
|
||||
* number between 0.0 and 1.0.
|
||||
*/
|
||||
private float getScoreForSource(DataFlow::Node source) {
|
||||
if getCfg().isKnownSource(source)
|
||||
then result = 1.0
|
||||
else (
|
||||
// This restriction on `source` has no semantic effect but improves performance.
|
||||
getCfg().isEffectiveSource(source) and
|
||||
ModelScoring::endpointScores(source, getCfg().getASourceEndpointType().getEncoding(), result)
|
||||
)
|
||||
}
|
||||
|
||||
/**
|
||||
* Return ATM's confidence that `sink` is a sink for the given security query. This will be a
|
||||
* number between 0.0 and 1.0.
|
||||
*/
|
||||
private float getScoreForSink(DataFlow::Node sink) {
|
||||
if getCfg().isKnownSink(sink)
|
||||
then result = 1.0
|
||||
else
|
||||
if getCfg().isEffectiveSinkWithOverridingScore(sink, result, _)
|
||||
then any()
|
||||
else (
|
||||
// This restriction on `sink` has no semantic effect but improves performance.
|
||||
getCfg().isEffectiveSink(sink) and
|
||||
ModelScoring::endpointScores(sink, getCfg().getASinkEndpointType().getEncoding(), result)
|
||||
)
|
||||
}
|
||||
|
||||
class EndpointScoringResults extends ScoringResults {
|
||||
EndpointScoringResults() {
|
||||
this = "EndpointScoringResults" and exists(getACompatibleModelChecksum())
|
||||
}
|
||||
|
||||
/**
|
||||
* Get ATM's confidence that a path between `source` and `sink` represents a security
|
||||
* vulnerability. This will be a number between 0.0 and 1.0.
|
||||
*/
|
||||
override float getScoreForFlow(DataFlow::Node source, DataFlow::Node sink) {
|
||||
result = getScoreForSource(source) * getScoreForSink(sink)
|
||||
}
|
||||
|
||||
/**
|
||||
* Get a string representing why ATM included the given source in the dataflow analysis.
|
||||
*
|
||||
* In general, there may be multiple reasons why ATM included the given source, in which case
|
||||
* this predicate should have multiple results.
|
||||
*/
|
||||
pragma[inline]
|
||||
override string getASourceOrigin(DataFlow::Node source) {
|
||||
result = "known" and getCfg().isKnownSource(source)
|
||||
or
|
||||
result = "predicted" and getCfg().isEffectiveSource(source)
|
||||
}
|
||||
|
||||
/**
|
||||
* Get a string representing why ATM included the given sink in the dataflow analysis.
|
||||
*
|
||||
* In general, there may be multiple reasons why ATM included the given sink, in which case
|
||||
* this predicate should have multiple results.
|
||||
*/
|
||||
pragma[inline]
|
||||
override string getASinkOrigin(DataFlow::Node sink) {
|
||||
result = "known" and getCfg().isKnownSink(sink)
|
||||
or
|
||||
not getCfg().isKnownSink(sink) and
|
||||
getCfg().isEffectiveSinkWithOverridingScore(sink, _, result)
|
||||
or
|
||||
not getCfg().isKnownSink(sink) and
|
||||
not getCfg().isEffectiveSinkWithOverridingScore(sink, _, _) and
|
||||
result =
|
||||
"predicted (scores: " +
|
||||
concat(EndpointType type, float score |
|
||||
ModelScoring::endpointScores(sink, type.getEncoding(), score)
|
||||
|
|
||||
type.getDescription() + "=" + score.toString(), ", " order by type.getEncoding()
|
||||
) + ")" and
|
||||
getCfg().isEffectiveSink(sink)
|
||||
}
|
||||
|
||||
pragma[inline]
|
||||
override predicate shouldResultBeIncluded(DataFlow::Node source, DataFlow::Node sink) {
|
||||
if getCfg().isKnownSink(sink)
|
||||
then any()
|
||||
else
|
||||
if getCfg().isEffectiveSinkWithOverridingScore(sink, _, _)
|
||||
then
|
||||
exists(float score |
|
||||
getCfg().isEffectiveSinkWithOverridingScore(sink, score, _) and
|
||||
score >= getCfg().getScoreCutoff()
|
||||
)
|
||||
else (
|
||||
// This restriction on `sink` has no semantic effect but improves performance.
|
||||
getCfg().isEffectiveSink(sink) and
|
||||
exists(float sinkScore |
|
||||
ModelScoring::endpointScores(sink, getCfg().getASinkEndpointType().getEncoding(),
|
||||
sinkScore) and
|
||||
// Include the endpoint if (a) the query endpoint type scores higher than all other
|
||||
// endpoint types, or (b) the query endpoint type scores at least
|
||||
// 0.5 - (getCfg().getScoreCutoff() / 2).
|
||||
sinkScore >=
|
||||
[
|
||||
max(float s | ModelScoring::endpointScores(sink, _, s)),
|
||||
0.5 - getCfg().getScoreCutoff() / 2
|
||||
]
|
||||
)
|
||||
)
|
||||
}
|
||||
}
|
||||
|
||||
module Debugging {
|
||||
query predicate hopInputEndpoints = ModelScoring::endpoints/1;
|
||||
|
||||
query predicate endpointScores = ModelScoring::endpointScores/3;
|
||||
|
||||
query predicate shouldResultBeIncluded(DataFlow::Node source, DataFlow::Node sink) {
|
||||
any(ScoringResults scoringResults).shouldResultBeIncluded(source, sink) and
|
||||
any(DataFlow::Configuration cfg).hasFlow(source, sink)
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,57 @@
|
||||
/**
|
||||
* For internal use only.
|
||||
*
|
||||
* Defines the set of classes that endpoint scoring models can predict. Endpoint scoring models must
|
||||
* only predict classes defined within this file. This file is the source of truth for the integer
|
||||
* representation of each of these classes.
|
||||
*/
|
||||
newtype TEndpointType =
|
||||
TNotASinkType() or
|
||||
TXssSinkType() or
|
||||
TNosqlInjectionSinkType() or
|
||||
TSqlInjectionSinkType() or
|
||||
TTaintedPathSinkType()
|
||||
|
||||
/** A class that can be predicted by endpoint scoring models. */
|
||||
abstract class EndpointType extends TEndpointType {
|
||||
abstract string getDescription();
|
||||
|
||||
abstract int getEncoding();
|
||||
|
||||
string toString() { result = getDescription() }
|
||||
}
|
||||
|
||||
/** The `NotASink` class that can be predicted by endpoint scoring models. */
|
||||
class NotASinkType extends EndpointType, TNotASinkType {
|
||||
override string getDescription() { result = "NotASink" }
|
||||
|
||||
override int getEncoding() { result = 0 }
|
||||
}
|
||||
|
||||
/** The `XssSink` class that can be predicted by endpoint scoring models. */
|
||||
class XssSinkType extends EndpointType, TXssSinkType {
|
||||
override string getDescription() { result = "XssSink" }
|
||||
|
||||
override int getEncoding() { result = 1 }
|
||||
}
|
||||
|
||||
/** The `NosqlInjectionSink` class that can be predicted by endpoint scoring models. */
|
||||
class NosqlInjectionSinkType extends EndpointType, TNosqlInjectionSinkType {
|
||||
override string getDescription() { result = "NosqlInjectionSink" }
|
||||
|
||||
override int getEncoding() { result = 2 }
|
||||
}
|
||||
|
||||
/** The `SqlInjectionSink` class that can be predicted by endpoint scoring models. */
|
||||
class SqlInjectionSinkType extends EndpointType, TSqlInjectionSinkType {
|
||||
override string getDescription() { result = "SqlInjectionSink" }
|
||||
|
||||
override int getEncoding() { result = 3 }
|
||||
}
|
||||
|
||||
/** The `TaintedPathSink` class that can be predicted by endpoint scoring models. */
|
||||
class TaintedPathSinkType extends EndpointType, TTaintedPathSinkType {
|
||||
override string getDescription() { result = "TaintedPathSink" }
|
||||
|
||||
override int getEncoding() { result = 4 }
|
||||
}
|
||||
@@ -0,0 +1,196 @@
|
||||
/**
|
||||
* For internal use only.
|
||||
*
|
||||
* Defines a set of reasons why a particular endpoint was filtered out. This set of reasons
|
||||
* contains both reasons why an endpoint could be `NotASink` and reasons why an endpoint could be
|
||||
* `LikelyNotASink`. The `NotASinkReason`s defined here are exhaustive, but the
|
||||
* `LikelyNotASinkReason`s are not exhaustive.
|
||||
*/
|
||||
newtype TFilteringReason =
|
||||
TIsArgumentToBuiltinFunctionReason() or
|
||||
TLodashUnderscoreArgumentReason() or
|
||||
TClientRequestReason() or
|
||||
TPromiseDefinitionReason() or
|
||||
TCryptographicKeyReason() or
|
||||
TCryptographicOperationFlowReason() or
|
||||
TLoggerMethodReason() or
|
||||
TTimeoutReason() or
|
||||
TReceiverStorageReason() or
|
||||
TStringStartsWithReason() or
|
||||
TStringEndsWithReason() or
|
||||
TStringRegExpTestReason() or
|
||||
TEventRegistrationReason() or
|
||||
TEventDispatchReason() or
|
||||
TMembershipCandidateTestReason() or
|
||||
TFileSystemAccessReason() or
|
||||
TDatabaseAccessReason() or
|
||||
TDOMReason() or
|
||||
TNextFunctionCallReason() or
|
||||
TArgumentToArrayReason() or
|
||||
TArgumentToBuiltinGlobalVarRefReason() or
|
||||
TConstantReceiverReason() or
|
||||
TBuiltinCallNameReason()
|
||||
|
||||
/** A reason why a particular endpoint was filtered out by the endpoint filters. */
|
||||
abstract class FilteringReason extends TFilteringReason {
|
||||
abstract string getDescription();
|
||||
|
||||
abstract int getEncoding();
|
||||
|
||||
string toString() { result = getDescription() }
|
||||
}
|
||||
|
||||
/**
|
||||
* A reason why a particular endpoint might be considered to be `NotASink`.
|
||||
*
|
||||
* An endpoint is `NotASink` if it has at least one `NotASinkReason`, it does not have any
|
||||
* `LikelyNotASinkReason`s, and it is not a known sink.
|
||||
*/
|
||||
abstract class NotASinkReason extends FilteringReason { }
|
||||
|
||||
/**
|
||||
* A reason why a particular endpoint might be considered to be `LikelyNotASink`.
|
||||
*
|
||||
* An endpoint is `LikelyNotASink` if it has at least one `LikelyNotASinkReason` and it is not a
|
||||
* known sink.
|
||||
*/
|
||||
abstract class LikelyNotASinkReason extends FilteringReason { }
|
||||
|
||||
class IsArgumentToBuiltinFunctionReason extends NotASinkReason, TIsArgumentToBuiltinFunctionReason {
|
||||
override string getDescription() { result = "IsArgumentToBuiltinFunction" }
|
||||
|
||||
override int getEncoding() { result = 5 }
|
||||
}
|
||||
|
||||
class LodashUnderscoreArgumentReason extends NotASinkReason, TLodashUnderscoreArgumentReason {
|
||||
override string getDescription() { result = "LodashUnderscoreArgument" }
|
||||
|
||||
override int getEncoding() { result = 6 }
|
||||
}
|
||||
|
||||
class ClientRequestReason extends NotASinkReason, TClientRequestReason {
|
||||
override string getDescription() { result = "ClientRequest" }
|
||||
|
||||
override int getEncoding() { result = 7 }
|
||||
}
|
||||
|
||||
class PromiseDefinitionReason extends NotASinkReason, TPromiseDefinitionReason {
|
||||
override string getDescription() { result = "PromiseDefinition" }
|
||||
|
||||
override int getEncoding() { result = 8 }
|
||||
}
|
||||
|
||||
class CryptographicKeyReason extends NotASinkReason, TCryptographicKeyReason {
|
||||
override string getDescription() { result = "CryptographicKey" }
|
||||
|
||||
override int getEncoding() { result = 9 }
|
||||
}
|
||||
|
||||
class CryptographicOperationFlowReason extends NotASinkReason, TCryptographicOperationFlowReason {
|
||||
override string getDescription() { result = "CryptographicOperationFlow" }
|
||||
|
||||
override int getEncoding() { result = 10 }
|
||||
}
|
||||
|
||||
class LoggerMethodReason extends NotASinkReason, TLoggerMethodReason {
|
||||
override string getDescription() { result = "LoggerMethod" }
|
||||
|
||||
override int getEncoding() { result = 11 }
|
||||
}
|
||||
|
||||
class TimeoutReason extends NotASinkReason, TTimeoutReason {
|
||||
override string getDescription() { result = "Timeout" }
|
||||
|
||||
override int getEncoding() { result = 12 }
|
||||
}
|
||||
|
||||
class ReceiverStorageReason extends NotASinkReason, TReceiverStorageReason {
|
||||
override string getDescription() { result = "ReceiverStorage" }
|
||||
|
||||
override int getEncoding() { result = 13 }
|
||||
}
|
||||
|
||||
class StringStartsWithReason extends NotASinkReason, TStringStartsWithReason {
|
||||
override string getDescription() { result = "StringStartsWith" }
|
||||
|
||||
override int getEncoding() { result = 14 }
|
||||
}
|
||||
|
||||
class StringEndsWithReason extends NotASinkReason, TStringEndsWithReason {
|
||||
override string getDescription() { result = "StringEndsWith" }
|
||||
|
||||
override int getEncoding() { result = 15 }
|
||||
}
|
||||
|
||||
class StringRegExpTestReason extends NotASinkReason, TStringRegExpTestReason {
|
||||
override string getDescription() { result = "StringRegExpTest" }
|
||||
|
||||
override int getEncoding() { result = 16 }
|
||||
}
|
||||
|
||||
class EventRegistrationReason extends NotASinkReason, TEventRegistrationReason {
|
||||
override string getDescription() { result = "EventRegistration" }
|
||||
|
||||
override int getEncoding() { result = 17 }
|
||||
}
|
||||
|
||||
class EventDispatchReason extends NotASinkReason, TEventDispatchReason {
|
||||
override string getDescription() { result = "EventDispatch" }
|
||||
|
||||
override int getEncoding() { result = 18 }
|
||||
}
|
||||
|
||||
class MembershipCandidateTestReason extends NotASinkReason, TMembershipCandidateTestReason {
|
||||
override string getDescription() { result = "MembershipCandidateTest" }
|
||||
|
||||
override int getEncoding() { result = 19 }
|
||||
}
|
||||
|
||||
class FileSystemAccessReason extends NotASinkReason, TFileSystemAccessReason {
|
||||
override string getDescription() { result = "FileSystemAccess" }
|
||||
|
||||
override int getEncoding() { result = 20 }
|
||||
}
|
||||
|
||||
class DatabaseAccessReason extends NotASinkReason, TDatabaseAccessReason {
|
||||
override string getDescription() { result = "DatabaseAccess" }
|
||||
|
||||
override int getEncoding() { result = 21 }
|
||||
}
|
||||
|
||||
class DOMReason extends NotASinkReason, TDOMReason {
|
||||
override string getDescription() { result = "DOM" }
|
||||
|
||||
override int getEncoding() { result = 22 }
|
||||
}
|
||||
|
||||
class NextFunctionCallReason extends NotASinkReason, TNextFunctionCallReason {
|
||||
override string getDescription() { result = "NextFunctionCall" }
|
||||
|
||||
override int getEncoding() { result = 23 }
|
||||
}
|
||||
|
||||
class ArgumentToArrayReason extends LikelyNotASinkReason, TArgumentToArrayReason {
|
||||
override string getDescription() { result = "ArgumentToArray" }
|
||||
|
||||
override int getEncoding() { result = 24 }
|
||||
}
|
||||
|
||||
class ArgumentToBuiltinGlobalVarRefReason extends LikelyNotASinkReason,
|
||||
TArgumentToBuiltinGlobalVarRefReason {
|
||||
override string getDescription() { result = "ArgumentToBuiltinGlobalVarRef" }
|
||||
|
||||
override int getEncoding() { result = 25 }
|
||||
}
|
||||
|
||||
class ConstantReceiverReason extends NotASinkReason, TConstantReceiverReason {
|
||||
override string getDescription() { result = "ConstantReceiver" }
|
||||
|
||||
override int getEncoding() { result = 26 }
|
||||
}
|
||||
|
||||
class BuiltinCallNameReason extends NotASinkReason, TBuiltinCallNameReason {
|
||||
override string getDescription() { result = "BuiltinCallName" }
|
||||
|
||||
override int getEncoding() { result = 27 }
|
||||
}
|
||||
@@ -0,0 +1,178 @@
|
||||
/**
|
||||
* For internal use only.
|
||||
*
|
||||
* Defines shared code used by the NoSQL injection boosted query.
|
||||
*/
|
||||
|
||||
import javascript
|
||||
private import semmle.javascript.heuristics.SyntacticHeuristics
|
||||
private import semmle.javascript.security.dataflow.NosqlInjectionCustomizations
|
||||
private import semmle.javascript.security.TaintedObject
|
||||
import AdaptiveThreatModeling
|
||||
private import CoreKnowledge as CoreKnowledge
|
||||
private import StandardEndpointFilters as StandardEndpointFilters
|
||||
|
||||
module SinkEndpointFilter {
|
||||
/**
|
||||
* Provides a set of reasons why a given data flow node should be excluded as a sink candidate.
|
||||
*
|
||||
* If this predicate has no results for a sink candidate `n`, then we should treat `n` as an
|
||||
* effective sink.
|
||||
*/
|
||||
string getAReasonSinkExcluded(DataFlow::Node sinkCandidate) {
|
||||
(
|
||||
result = StandardEndpointFilters::getAReasonSinkExcluded(sinkCandidate)
|
||||
or
|
||||
// Require NoSQL injection sink candidates to be direct arguments to external library calls.
|
||||
//
|
||||
// The standard endpoint filters allow sink candidates which are within object literals or
|
||||
// array literals, for example `req.sendFile(_, { path: ENDPOINT })`.
|
||||
//
|
||||
// However, the NoSQL injection query deals differently with these types of sinks compared to
|
||||
// other security queries. Other security queries such as SQL injection tend to treat
|
||||
// `ENDPOINT` as the ground truth sink, but the NoSQL injection query instead treats
|
||||
// `{ path: ENDPOINT }` as the ground truth sink and defines an additional flow step to ensure
|
||||
// data flows from `ENDPOINT` to the ground truth sink `{ path: ENDPOINT }`.
|
||||
//
|
||||
// Therefore for the NoSQL injection boosted query, we must explicitly ignore sink candidates
|
||||
// within object literals or array literals, to avoid having multiple alerts for the same
|
||||
// security vulnerability (one FP where the sink is `ENDPOINT` and one TP where the sink is
|
||||
// `{ path: ENDPOINT }`).
|
||||
//
|
||||
// We use the same reason as in the standard endpoint filters to avoid duplicate reasons for
|
||||
// endpoints that are neither direct nor indirect arguments to a likely external library call.
|
||||
not sinkCandidate = StandardEndpointFilters::getALikelyExternalLibraryCall().getAnArgument() and
|
||||
result = "not an argument to a likely external library call"
|
||||
or
|
||||
exists(DataFlow::CallNode call | sinkCandidate = call.getAnArgument() |
|
||||
// additional databases accesses that aren't modeled yet
|
||||
call.(DataFlow::MethodCallNode).getMethodName() =
|
||||
["create", "createCollection", "createIndexes"] and
|
||||
result = "matches database access call heuristic"
|
||||
or
|
||||
// Remove modeled sinks
|
||||
CoreKnowledge::isArgumentToKnownLibrarySinkFunction(sinkCandidate) and
|
||||
result = "modeled sink"
|
||||
or
|
||||
// Remove common kinds of unlikely sinks
|
||||
CoreKnowledge::isKnownStepSrc(sinkCandidate) and
|
||||
result = "predecessor in a modeled flow step"
|
||||
or
|
||||
// Remove modeled database calls. Arguments to modeled calls are very likely to be modeled
|
||||
// as sinks if they are true positives. Therefore arguments that are not modeled as sinks
|
||||
// are unlikely to be true positives.
|
||||
call instanceof DatabaseAccess and
|
||||
result = "modeled database access"
|
||||
or
|
||||
// Remove calls to APIs that aren't relevant to NoSQL injection
|
||||
call.getReceiver().asExpr() instanceof HTTP::RequestExpr and
|
||||
result = "receiver is a HTTP request expression"
|
||||
or
|
||||
call.getReceiver().asExpr() instanceof HTTP::ResponseExpr and
|
||||
result = "receiver is a HTTP response expression"
|
||||
)
|
||||
) and
|
||||
not (
|
||||
// Explicitly allow the following heuristic sinks.
|
||||
//
|
||||
// These are copied from the `HeuristicNosqlInjectionSink` class defined within
|
||||
// `codeql/javascript/ql/src/semmle/javascript/heuristics/AdditionalSinks.qll`.
|
||||
// We can't reuse the class because importing that file would cause us to treat these
|
||||
// heuristic sinks as known sinks.
|
||||
isAssignedToOrConcatenatedWith(sinkCandidate, "(?i)(nosql|query)") or
|
||||
isArgTo(sinkCandidate, "(?i)(query)")
|
||||
)
|
||||
}
|
||||
}
|
||||
|
||||
class NosqlInjectionATMConfig extends ATMConfig {
|
||||
NosqlInjectionATMConfig() { this = "NosqlInjectionATMConfig" }
|
||||
|
||||
override predicate isKnownSource(DataFlow::Node source) {
|
||||
source instanceof NosqlInjection::Source or TaintedObject::isSource(source, _)
|
||||
}
|
||||
|
||||
override predicate isKnownSink(DataFlow::Node sink) { sink instanceof NosqlInjection::Sink }
|
||||
|
||||
override predicate isEffectiveSink(DataFlow::Node sinkCandidate) {
|
||||
not exists(SinkEndpointFilter::getAReasonSinkExcluded(sinkCandidate))
|
||||
}
|
||||
|
||||
override EndpointType getASinkEndpointType() { result instanceof NosqlInjectionSinkType }
|
||||
}
|
||||
|
||||
/** Holds if src -> trg is an additional flow step in the non-boosted NoSQL injection security query. */
|
||||
predicate isBaseAdditionalFlowStep(
|
||||
DataFlow::Node src, DataFlow::Node trg, DataFlow::FlowLabel inlbl, DataFlow::FlowLabel outlbl
|
||||
) {
|
||||
TaintedObject::step(src, trg, inlbl, outlbl)
|
||||
or
|
||||
// additional flow step to track taint through NoSQL query objects
|
||||
inlbl = TaintedObject::label() and
|
||||
outlbl = TaintedObject::label() and
|
||||
exists(NoSQL::Query query, DataFlow::SourceNode queryObj |
|
||||
queryObj.flowsToExpr(query) and
|
||||
queryObj.flowsTo(trg) and
|
||||
src = queryObj.getAPropertyWrite().getRhs()
|
||||
)
|
||||
}
|
||||
|
||||
/**
|
||||
* This predicate allows us to propagate data flow through property writes and array constructors
|
||||
* within a query object, enabling the security query to pick up NoSQL injection vulnerabilities
|
||||
* involving more complex queries.
|
||||
*/
|
||||
DataFlow::Node getASubexpressionWithinQuery(DataFlow::Node query) {
|
||||
exists(DataFlow::SourceNode receiver |
|
||||
receiver.flowsTo(getASubexpressionWithinQuery*(query.getALocalSource())) and
|
||||
result =
|
||||
[
|
||||
receiver.(DataFlow::SourceNode).getAPropertyWrite().getRhs(),
|
||||
receiver.(DataFlow::ArrayCreationNode).getAnElement()
|
||||
]
|
||||
)
|
||||
}
|
||||
|
||||
/**
|
||||
* A taint-tracking configuration for reasoning about NoSQL injection vulnerabilities.
|
||||
*
|
||||
* This is largely a copy of the taint tracking configuration for the standard NoSQL injection
|
||||
* query, except additional ATM sinks have been added and the additional flow step has been
|
||||
* generalised to cover the sinks predicted by ATM.
|
||||
*/
|
||||
class Configuration extends TaintTracking::Configuration {
|
||||
Configuration() { this = "NosqlInjectionATM" }
|
||||
|
||||
override predicate isSource(DataFlow::Node source) { source instanceof NosqlInjection::Source }
|
||||
|
||||
override predicate isSource(DataFlow::Node source, DataFlow::FlowLabel label) {
|
||||
TaintedObject::isSource(source, label)
|
||||
}
|
||||
|
||||
override predicate isSink(DataFlow::Node sink, DataFlow::FlowLabel label) {
|
||||
sink.(NosqlInjection::Sink).getAFlowLabel() = label
|
||||
or
|
||||
// Allow effective sinks to have any taint label
|
||||
any(NosqlInjectionATMConfig cfg).isEffectiveSink(sink)
|
||||
}
|
||||
|
||||
override predicate isSanitizer(DataFlow::Node node) {
|
||||
super.isSanitizer(node) or
|
||||
node instanceof NosqlInjection::Sanitizer
|
||||
}
|
||||
|
||||
override predicate isSanitizerGuard(TaintTracking::SanitizerGuardNode guard) {
|
||||
guard instanceof TaintedObject::SanitizerGuard
|
||||
}
|
||||
|
||||
override predicate isAdditionalFlowStep(
|
||||
DataFlow::Node src, DataFlow::Node trg, DataFlow::FlowLabel inlbl, DataFlow::FlowLabel outlbl
|
||||
) {
|
||||
// additional flow steps from the base (non-boosted) security query
|
||||
isBaseAdditionalFlowStep(src, trg, inlbl, outlbl)
|
||||
or
|
||||
// relaxed version of previous step to track taint through unmodeled NoSQL query objects
|
||||
any(NosqlInjectionATMConfig cfg).isEffectiveSink(trg) and
|
||||
src = getASubexpressionWithinQuery(trg)
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,94 @@
|
||||
/**
|
||||
* For internal use only.
|
||||
*
|
||||
* Defines shared code used by the SQL injection boosted query.
|
||||
*/
|
||||
|
||||
import semmle.javascript.heuristics.SyntacticHeuristics
|
||||
import semmle.javascript.security.dataflow.SqlInjectionCustomizations
|
||||
import AdaptiveThreatModeling
|
||||
import CoreKnowledge as CoreKnowledge
|
||||
import StandardEndpointFilters as StandardEndpointFilters
|
||||
|
||||
/**
|
||||
* This module provides logic to filter candidate sinks to those which are likely SQL injection
|
||||
* sinks.
|
||||
*/
|
||||
module SinkEndpointFilter {
|
||||
private import javascript
|
||||
private import SQL
|
||||
|
||||
/**
|
||||
* Provides a set of reasons why a given data flow node should be excluded as a sink candidate.
|
||||
*
|
||||
* If this predicate has no results for a sink candidate `n`, then we should treat `n` as an
|
||||
* effective sink.
|
||||
*/
|
||||
string getAReasonSinkExcluded(DataFlow::Node sinkCandidate) {
|
||||
(
|
||||
result = StandardEndpointFilters::getAReasonSinkExcluded(sinkCandidate)
|
||||
or
|
||||
exists(DataFlow::CallNode call | sinkCandidate = call.getAnArgument() |
|
||||
// prepared statements for SQL
|
||||
any(DataFlow::CallNode cn | cn.getCalleeName() = "prepare")
|
||||
.getAMethodCall("run")
|
||||
.getAnArgument() = sinkCandidate and
|
||||
result = "prepared SQL statement"
|
||||
or
|
||||
sinkCandidate instanceof DataFlow::ArrayCreationNode and
|
||||
result = "array creation"
|
||||
or
|
||||
// UI is unrelated to SQL
|
||||
call.getCalleeName().regexpMatch("(?i).*(render|html).*") and
|
||||
result = "HTML / rendering"
|
||||
)
|
||||
) and
|
||||
not (
|
||||
// Explicitly allow the following heuristic sinks.
|
||||
//
|
||||
// These are copied from the `HeuristicSqlInjectionSink` class defined within
|
||||
// `codeql/javascript/ql/src/semmle/javascript/heuristics/AdditionalSinks.qll`.
|
||||
// We can't reuse the class because importing that file would cause us to treat these
|
||||
// heuristic sinks as known sinks.
|
||||
isAssignedToOrConcatenatedWith(sinkCandidate, "(?i)(sql|query)") or
|
||||
isArgTo(sinkCandidate, "(?i)(query)") or
|
||||
isConcatenatedWithString(sinkCandidate,
|
||||
"(?s).*(ALTER|COUNT|CREATE|DATABASE|DELETE|DISTINCT|DROP|FROM|GROUP|INSERT|INTO|LIMIT|ORDER|SELECT|TABLE|UPDATE|WHERE).*")
|
||||
)
|
||||
}
|
||||
}
|
||||
|
||||
class SqlInjectionATMConfig extends ATMConfig {
|
||||
SqlInjectionATMConfig() { this = "SqlInjectionATMConfig" }
|
||||
|
||||
override predicate isKnownSource(DataFlow::Node source) { source instanceof SqlInjection::Source }
|
||||
|
||||
override predicate isKnownSink(DataFlow::Node sink) { sink instanceof SqlInjection::Sink }
|
||||
|
||||
override predicate isEffectiveSink(DataFlow::Node sinkCandidate) {
|
||||
not exists(SinkEndpointFilter::getAReasonSinkExcluded(sinkCandidate))
|
||||
}
|
||||
|
||||
override EndpointType getASinkEndpointType() { result instanceof SqlInjectionSinkType }
|
||||
}
|
||||
|
||||
/**
|
||||
* A taint-tracking configuration for reasoning about SQL injection vulnerabilities.
|
||||
*
|
||||
* This is largely a copy of the taint tracking configuration for the standard SQL injection
|
||||
* query, except additional sinks have been added using the sink endpoint filter.
|
||||
*/
|
||||
class Configuration extends TaintTracking::Configuration {
|
||||
Configuration() { this = "SqlInjectionATM" }
|
||||
|
||||
override predicate isSource(DataFlow::Node source) { source instanceof SqlInjection::Source }
|
||||
|
||||
override predicate isSink(DataFlow::Node sink) {
|
||||
sink instanceof SqlInjection::Sink or any(SqlInjectionATMConfig cfg).isEffectiveSink(sink)
|
||||
}
|
||||
|
||||
override predicate isSanitizer(DataFlow::Node node) {
|
||||
super.isSanitizer(node) or
|
||||
node instanceof SqlInjection::Sanitizer
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,137 @@
|
||||
/**
|
||||
* For internal use only.
|
||||
*
|
||||
* Provides classes and predicates that are useful for endpoint filters.
|
||||
*
|
||||
* The standard use of this library is to make use of `isPotentialEffectiveSink/1`
|
||||
*/
|
||||
|
||||
private import javascript
|
||||
private import semmle.javascript.filters.ClassifyFiles as ClassifyFiles
|
||||
private import semmle.javascript.heuristics.SyntacticHeuristics
|
||||
private import CoreKnowledge as CoreKnowledge
|
||||
|
||||
/** Provides a set of reasons why a given data flow node should be excluded as a sink candidate. */
|
||||
string getAReasonSinkExcluded(DataFlow::Node n) {
|
||||
not flowsToArgumentOfLikelyExternalLibraryCall(n) and
|
||||
result = "not an argument to a likely external library call"
|
||||
or
|
||||
isArgumentToModeledFunction(n) and result = "argument to modeled function"
|
||||
or
|
||||
isArgumentToSinklessLibrary(n) and result = "argument to sinkless library"
|
||||
or
|
||||
isSanitizer(n) and result = "sanitizer"
|
||||
or
|
||||
isPredicate(n) and result = "predicate"
|
||||
or
|
||||
isHash(n) and result = "hash"
|
||||
or
|
||||
isNumeric(n) and result = "numeric"
|
||||
or
|
||||
// Ignore candidate sinks within externs, generated, library, and test code
|
||||
exists(string category | category = ["externs", "generated", "library", "test"] |
|
||||
ClassifyFiles::classify(n.getFile(), category) and
|
||||
result = "in " + category + " file"
|
||||
)
|
||||
}
|
||||
|
||||
/**
|
||||
* Holds if the node `n` is an argument to a function that has a manual model.
|
||||
*/
|
||||
predicate isArgumentToModeledFunction(DataFlow::Node n) {
|
||||
exists(DataFlow::InvokeNode invk, DataFlow::Node known |
|
||||
invk.getAnArgument() = n and invk.getAnArgument() = known and isSomeModeledArgument(known)
|
||||
)
|
||||
}
|
||||
|
||||
/**
|
||||
* Holds if the node `n` is an argument that has a manual model.
|
||||
*/
|
||||
predicate isSomeModeledArgument(DataFlow::Node n) {
|
||||
CoreKnowledge::isKnownLibrarySink(n) or
|
||||
CoreKnowledge::isKnownStepSrc(n) or
|
||||
CoreKnowledge::isOtherModeledArgument(n, _)
|
||||
}
|
||||
|
||||
/**
|
||||
* Holds if `n` appears to be a numeric value.
|
||||
*/
|
||||
predicate isNumeric(DataFlow::Node n) { isReadFrom(n, ".*index.*") }
|
||||
|
||||
/**
|
||||
* Holds if `n` is an argument to a library without sinks.
|
||||
*/
|
||||
predicate isArgumentToSinklessLibrary(DataFlow::Node n) {
|
||||
exists(DataFlow::InvokeNode invk, DataFlow::SourceNode commonSafeLibrary, string libraryName |
|
||||
libraryName = ["slugify", "striptags", "marked"]
|
||||
|
|
||||
commonSafeLibrary = DataFlow::moduleImport(libraryName) and
|
||||
invk = [commonSafeLibrary, commonSafeLibrary.getAPropertyRead()].getAnInvocation() and
|
||||
n = invk.getAnArgument()
|
||||
)
|
||||
}
|
||||
|
||||
predicate isSanitizer(DataFlow::Node n) {
|
||||
exists(DataFlow::CallNode call | n = call.getAnArgument() |
|
||||
call.getCalleeName().regexpMatch("(?i).*(escape|valid(ate)?|sanitize|purify).*")
|
||||
)
|
||||
}
|
||||
|
||||
predicate isPredicate(DataFlow::Node n) {
|
||||
exists(DataFlow::CallNode call | n = call.getAnArgument() |
|
||||
call.getCalleeName().regexpMatch("(equals|(|is|has|can)(_|[A-Z])).*")
|
||||
)
|
||||
}
|
||||
|
||||
predicate isHash(DataFlow::Node n) {
|
||||
exists(DataFlow::CallNode call | n = call.getAnArgument() |
|
||||
call.getCalleeName().regexpMatch("(?i)^(sha\\d*|md5|hash)$")
|
||||
)
|
||||
}
|
||||
|
||||
/**
|
||||
* Holds if the data flow node is a (possibly indirect) argument of a likely external library call.
|
||||
*
|
||||
* This includes direct arguments of likely external library calls as well as nested object
|
||||
* literals within those calls.
|
||||
*/
|
||||
predicate flowsToArgumentOfLikelyExternalLibraryCall(DataFlow::Node n) {
|
||||
n = getACallWithoutCallee().getAnArgument()
|
||||
or
|
||||
exists(DataFlow::SourceNode src | flowsToArgumentOfLikelyExternalLibraryCall(src) |
|
||||
n = src.getAPropertyWrite().getRhs()
|
||||
)
|
||||
or
|
||||
exists(DataFlow::ArrayCreationNode arr | flowsToArgumentOfLikelyExternalLibraryCall(arr) |
|
||||
n = arr.getAnElement()
|
||||
)
|
||||
}
|
||||
|
||||
/**
|
||||
* Get calls which are likely to be to external non-built-in libraries.
|
||||
*/
|
||||
DataFlow::CallNode getALikelyExternalLibraryCall() { result = getACallWithoutCallee() }
|
||||
|
||||
/**
|
||||
* Gets a node that flows to callback-parameter `p`.
|
||||
*/
|
||||
private DataFlow::SourceNode getACallback(DataFlow::ParameterNode p, DataFlow::TypeBackTracker t) {
|
||||
t.start() and
|
||||
result = p and
|
||||
any(DataFlow::FunctionNode f).getLastParameter() = p and
|
||||
exists(p.getACall())
|
||||
or
|
||||
exists(DataFlow::TypeBackTracker t2 | result = getACallback(p, t2).backtrack(t2, t))
|
||||
}
|
||||
|
||||
/**
|
||||
* Get calls for which we do not have the callee (i.e. the definition of the called function). This
|
||||
* acts as a heuristic for identifying calls to external library functions.
|
||||
*/
|
||||
private DataFlow::CallNode getACallWithoutCallee() {
|
||||
forall(Function callee | callee = result.getACallee() | callee.getTopLevel().isExterns()) and
|
||||
not exists(DataFlow::ParameterNode param, DataFlow::FunctionNode callback |
|
||||
param.flowsTo(result.getCalleeNode()) and
|
||||
callback = getACallback(param, DataFlow::TypeBackTracker::end())
|
||||
)
|
||||
}
|
||||
@@ -0,0 +1,123 @@
|
||||
/**
|
||||
* For internal use only.
|
||||
*
|
||||
* Defines shared code used by the path injection boosted query.
|
||||
*/
|
||||
|
||||
import semmle.javascript.heuristics.SyntacticHeuristics
|
||||
import semmle.javascript.security.dataflow.TaintedPathCustomizations
|
||||
import AdaptiveThreatModeling
|
||||
import CoreKnowledge as CoreKnowledge
|
||||
import StandardEndpointFilters as StandardEndpointFilters
|
||||
|
||||
/**
|
||||
* This module provides logic to filter candidate sinks to those which are likely path injection
|
||||
* sinks.
|
||||
*/
|
||||
module SinkEndpointFilter {
|
||||
private import javascript
|
||||
private import TaintedPath
|
||||
|
||||
/**
|
||||
* Provides a set of reasons why a given data flow node should be excluded as a sink candidate.
|
||||
*
|
||||
* If this predicate has no results for a sink candidate `n`, then we should treat `n` as an
|
||||
* effective sink.
|
||||
*/
|
||||
string getAReasonSinkExcluded(DataFlow::Node sinkCandidate) {
|
||||
result = StandardEndpointFilters::getAReasonSinkExcluded(sinkCandidate) and
|
||||
not (
|
||||
// Explicitly allow the following heuristic sinks.
|
||||
//
|
||||
// These are mostly copied from the `HeuristicTaintedPathSink` class defined within
|
||||
// `codeql/javascript/ql/src/semmle/javascript/heuristics/AdditionalSinks.qll`.
|
||||
// We can't reuse the class because importing that file would cause us to treat these
|
||||
// heuristic sinks as known sinks.
|
||||
isAssignedToOrConcatenatedWith(sinkCandidate, "(?i)(file|folder|dir|absolute)")
|
||||
or
|
||||
isArgTo(sinkCandidate, "(?i)(get|read)file")
|
||||
or
|
||||
exists(string pathPattern |
|
||||
// paths with at least two parts, and either a trailing or leading slash
|
||||
pathPattern = "(?i)([a-z0-9_.-]+/){2,}" or
|
||||
pathPattern = "(?i)(/[a-z0-9_.-]+){2,}"
|
||||
|
|
||||
isConcatenatedWithString(sinkCandidate, pathPattern)
|
||||
)
|
||||
or
|
||||
isConcatenatedWithStrings(".*/", sinkCandidate, "/.*")
|
||||
or
|
||||
// In addition to the names from `HeuristicTaintedPathSink` in the
|
||||
// `isAssignedToOrConcatenatedWith` predicate call above, we also allow the noisier "path"
|
||||
// name.
|
||||
isAssignedToOrConcatenatedWith(sinkCandidate, "(?i)path")
|
||||
)
|
||||
}
|
||||
}
|
||||
|
||||
class TaintedPathATMConfig extends ATMConfig {
|
||||
TaintedPathATMConfig() { this = "TaintedPathATMConfig" }
|
||||
|
||||
override predicate isKnownSource(DataFlow::Node source) { source instanceof TaintedPath::Source }
|
||||
|
||||
override predicate isKnownSink(DataFlow::Node sink) { sink instanceof TaintedPath::Sink }
|
||||
|
||||
override predicate isEffectiveSink(DataFlow::Node sinkCandidate) {
|
||||
not exists(SinkEndpointFilter::getAReasonSinkExcluded(sinkCandidate))
|
||||
}
|
||||
|
||||
override EndpointType getASinkEndpointType() { result instanceof TaintedPathSinkType }
|
||||
}
|
||||
|
||||
/**
|
||||
* A taint-tracking configuration for reasoning about path injection vulnerabilities.
|
||||
*
|
||||
* This is largely a copy of the taint tracking configuration for the standard path injection
|
||||
* query, except additional ATM sinks have been added to the `isSink` predicate.
|
||||
*/
|
||||
class Configuration extends TaintTracking::Configuration {
|
||||
Configuration() { this = "TaintedPathATM" }
|
||||
|
||||
override predicate isSource(DataFlow::Node source) { source instanceof TaintedPath::Source }
|
||||
|
||||
override predicate isSink(DataFlow::Node sink, DataFlow::FlowLabel label) {
|
||||
label = sink.(TaintedPath::Sink).getAFlowLabel()
|
||||
or
|
||||
// Allow effective sinks to have any taint label
|
||||
any(TaintedPathATMConfig cfg).isEffectiveSink(sink)
|
||||
}
|
||||
|
||||
override predicate isSanitizer(DataFlow::Node node) { node instanceof TaintedPath::Sanitizer }
|
||||
|
||||
override predicate isSanitizerGuard(TaintTracking::SanitizerGuardNode node) {
|
||||
node instanceof BarrierGuardNodeAsSanitizerGuardNode
|
||||
}
|
||||
|
||||
override predicate isAdditionalFlowStep(
|
||||
DataFlow::Node src, DataFlow::Node dst, DataFlow::FlowLabel srclabel,
|
||||
DataFlow::FlowLabel dstlabel
|
||||
) {
|
||||
TaintedPath::isAdditionalTaintedPathFlowStep(src, dst, srclabel, dstlabel)
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* This class provides sanitizer guards for path injection.
|
||||
*
|
||||
* The standard library path injection query uses a data flow configuration, and therefore defines
|
||||
* barrier nodes. However we're using a taint tracking configuration for path injection to find new
|
||||
* kinds of less certain results. Since taint tracking configurations use sanitizer guards instead
|
||||
* of barrier guards, we port the barrier guards for the boosted query from the standard library to
|
||||
* sanitizer guards here.
|
||||
*/
|
||||
class BarrierGuardNodeAsSanitizerGuardNode extends TaintTracking::LabeledSanitizerGuardNode {
|
||||
BarrierGuardNodeAsSanitizerGuardNode() { this instanceof TaintedPath::BarrierGuardNode }
|
||||
|
||||
override predicate sanitizes(boolean outcome, Expr e) {
|
||||
blocks(outcome, e) or blocks(outcome, e, _)
|
||||
}
|
||||
|
||||
override predicate sanitizes(boolean outcome, Expr e, DataFlow::FlowLabel label) {
|
||||
sanitizes(outcome, e)
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,103 @@
|
||||
/**
|
||||
* For internal use only.
|
||||
*
|
||||
* Defines shared code used by the XSS boosted query.
|
||||
*/
|
||||
|
||||
private import semmle.javascript.heuristics.SyntacticHeuristics
|
||||
private import semmle.javascript.security.dataflow.DomBasedXssCustomizations
|
||||
import AdaptiveThreatModeling
|
||||
import CoreKnowledge as CoreKnowledge
|
||||
import StandardEndpointFilters as StandardEndpointFilters
|
||||
|
||||
/**
|
||||
* This module provides logic to filter candidate sinks to those which are likely XSS sinks.
|
||||
*/
|
||||
module SinkEndpointFilter {
|
||||
private import javascript
|
||||
private import DomBasedXss
|
||||
|
||||
/**
|
||||
* Provides a set of reasons why a given data flow node should be excluded as a sink candidate.
|
||||
*
|
||||
* If this predicate has no results for a sink candidate `n`, then we should treat `n` as an
|
||||
* effective sink.
|
||||
*/
|
||||
string getAReasonSinkExcluded(DataFlow::Node sinkCandidate) {
|
||||
(
|
||||
result = StandardEndpointFilters::getAReasonSinkExcluded(sinkCandidate)
|
||||
or
|
||||
exists(DataFlow::CallNode call | sinkCandidate = call.getAnArgument() |
|
||||
call.getCalleeName() = "setState"
|
||||
) and
|
||||
result = "setState calls ought to be safe in react applications"
|
||||
) and
|
||||
not (
|
||||
// Explicitly allow the following heuristic sinks.
|
||||
//
|
||||
// These are copied from the `HeuristicDomBasedXssSink` class defined within
|
||||
// `codeql/javascript/ql/src/semmle/javascript/heuristics/AdditionalSinks.qll`.
|
||||
// We can't reuse the class because importing that file would cause us to treat these
|
||||
// heuristic sinks as known sinks.
|
||||
isAssignedToOrConcatenatedWith(sinkCandidate, "(?i)(html|innerhtml)")
|
||||
or
|
||||
isArgTo(sinkCandidate, "(?i)(html|render)")
|
||||
or
|
||||
sinkCandidate instanceof StringOps::HtmlConcatenationLeaf
|
||||
or
|
||||
isConcatenatedWithStrings("(?is).*<[a-z ]+.*", sinkCandidate, "(?s).*>.*")
|
||||
or
|
||||
// In addition to the heuristic sinks from `HeuristicDomBasedXssSink`, explicitly allow
|
||||
// property writes like `elem.innerHTML = <TAINT>` that may not be picked up as HTML
|
||||
// concatenation leaves.
|
||||
exists(DataFlow::PropWrite pw |
|
||||
pw.getPropertyName().regexpMatch("(?i).*html*") and
|
||||
pw.getRhs() = sinkCandidate
|
||||
)
|
||||
)
|
||||
}
|
||||
}
|
||||
|
||||
class DomBasedXssATMConfig extends ATMConfig {
|
||||
DomBasedXssATMConfig() { this = "DomBasedXssATMConfig" }
|
||||
|
||||
override predicate isKnownSource(DataFlow::Node source) { source instanceof DomBasedXss::Source }
|
||||
|
||||
override predicate isKnownSink(DataFlow::Node sink) { sink instanceof DomBasedXss::Sink }
|
||||
|
||||
override predicate isEffectiveSink(DataFlow::Node sinkCandidate) {
|
||||
not exists(SinkEndpointFilter::getAReasonSinkExcluded(sinkCandidate))
|
||||
}
|
||||
|
||||
override EndpointType getASinkEndpointType() { result instanceof XssSinkType }
|
||||
}
|
||||
|
||||
/**
|
||||
* A taint-tracking configuration for reasoning about XSS vulnerabilities.
|
||||
*
|
||||
* This is largely a copy of the taint tracking configuration for the standard XSSThroughDom query,
|
||||
* except additional ATM sinks have been added to the `isSink` predicate.
|
||||
*/
|
||||
class Configuration extends TaintTracking::Configuration {
|
||||
Configuration() { this = "DomBasedXssATMConfiguration" }
|
||||
|
||||
override predicate isSource(DataFlow::Node source) { source instanceof DomBasedXss::Source }
|
||||
|
||||
override predicate isSink(DataFlow::Node sink) {
|
||||
sink instanceof DomBasedXss::Sink or
|
||||
any(DomBasedXssATMConfig cfg).isEffectiveSink(sink)
|
||||
}
|
||||
|
||||
override predicate isSanitizer(DataFlow::Node node) {
|
||||
super.isSanitizer(node) or
|
||||
node instanceof DomBasedXss::Sanitizer
|
||||
}
|
||||
|
||||
override predicate isSanitizerGuard(TaintTracking::SanitizerGuardNode guard) {
|
||||
guard instanceof DomBasedXss::SanitizerGuard
|
||||
}
|
||||
|
||||
override predicate isSanitizerEdge(DataFlow::Node pred, DataFlow::Node succ) {
|
||||
DomBasedXss::isOptionallySanitizedEdge(pred, succ)
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,4 @@
|
||||
---
|
||||
dependencies: {}
|
||||
compiled: false
|
||||
lockVersion: 1.0.0
|
||||
@@ -0,0 +1,6 @@
|
||||
name: codeql/javascript-experimental-atm-lib
|
||||
version: 0.0.0
|
||||
extractor: javascript
|
||||
library: true
|
||||
dependencies:
|
||||
codeql/javascript-all: "*"
|
||||
@@ -0,0 +1,30 @@
|
||||
/**
|
||||
* For internal use only.
|
||||
*
|
||||
* @name NoSQL database query built from user-controlled sources (boosted)
|
||||
* @description Building a database query from user-controlled sources is vulnerable to insertion of
|
||||
* malicious code by the user.
|
||||
* @kind path-problem
|
||||
* @scored
|
||||
* @problem.severity error
|
||||
* @security-severity 8.8
|
||||
* @id adaptive-threat-modeling/js/nosql-injection
|
||||
* @tags experimental experimental/atm security
|
||||
*/
|
||||
|
||||
import ATM::ResultsInfo
|
||||
import DataFlow::PathGraph
|
||||
import experimental.adaptivethreatmodeling.NosqlInjectionATM
|
||||
|
||||
from
|
||||
DataFlow::Configuration cfg, DataFlow::PathNode source, DataFlow::PathNode sink, float score,
|
||||
string scoreString
|
||||
where
|
||||
cfg.hasFlowPath(source, sink) and
|
||||
not isFlowLikelyInBaseQuery(source.getNode(), sink.getNode()) and
|
||||
score = getScoreForFlow(source.getNode(), sink.getNode()) and
|
||||
scoreString = getScoreStringForFlow(source.getNode(), sink.getNode())
|
||||
select sink.getNode(), source, sink,
|
||||
"[Score = " + scoreString + "] This may be a NoSQL query depending on $@ " +
|
||||
getAdditionalAlertInfo(source.getNode(), sink.getNode()), source.getNode(),
|
||||
"a user-provided value", score
|
||||
@@ -0,0 +1,30 @@
|
||||
/**
|
||||
* For internal use only.
|
||||
*
|
||||
* @name SQL database query built from user-controlled sources (boosted)
|
||||
* @description Building a database query from user-controlled sources is vulnerable to insertion of
|
||||
* malicious code by the user.
|
||||
* @kind path-problem
|
||||
* @scored
|
||||
* @problem.severity error
|
||||
* @security-severity 8.8
|
||||
* @id adaptive-threat-modeling/js/sql-injection
|
||||
* @tags experimental experimental/atm security
|
||||
*/
|
||||
|
||||
import experimental.adaptivethreatmodeling.SqlInjectionATM
|
||||
import ATM::ResultsInfo
|
||||
import DataFlow::PathGraph
|
||||
|
||||
from
|
||||
DataFlow::Configuration cfg, DataFlow::PathNode source, DataFlow::PathNode sink, float score,
|
||||
string scoreString
|
||||
where
|
||||
cfg.hasFlowPath(source, sink) and
|
||||
not isFlowLikelyInBaseQuery(source.getNode(), sink.getNode()) and
|
||||
score = getScoreForFlow(source.getNode(), sink.getNode()) and
|
||||
scoreString = getScoreStringForFlow(source.getNode(), sink.getNode())
|
||||
select sink.getNode(), source, sink,
|
||||
"[Score = " + scoreString + "] This may be a js/sql result depending on $@ " +
|
||||
getAdditionalAlertInfo(source.getNode(), sink.getNode()), source.getNode(),
|
||||
"a user-provided value", score
|
||||
@@ -0,0 +1,30 @@
|
||||
/**
|
||||
* For internal use only.
|
||||
*
|
||||
* @name Uncontrolled data used in path expression (boosted)
|
||||
* @description Accessing paths influenced by users can allow an attacker to access
|
||||
* unexpected resources.
|
||||
* @kind path-problem
|
||||
* @scored
|
||||
* @problem.severity error
|
||||
* @security-severity 7.5
|
||||
* @id adaptive-threat-modeling/js/path-injection
|
||||
* @tags experimental experimental/atm security
|
||||
*/
|
||||
|
||||
import ATM::ResultsInfo
|
||||
import DataFlow::PathGraph
|
||||
import experimental.adaptivethreatmodeling.TaintedPathATM
|
||||
|
||||
from
|
||||
DataFlow::Configuration cfg, DataFlow::PathNode source, DataFlow::PathNode sink, float score,
|
||||
string scoreString
|
||||
where
|
||||
cfg.hasFlowPath(source, sink) and
|
||||
not isFlowLikelyInBaseQuery(source.getNode(), sink.getNode()) and
|
||||
score = getScoreForFlow(source.getNode(), sink.getNode()) and
|
||||
scoreString = getScoreStringForFlow(source.getNode(), sink.getNode())
|
||||
select sink.getNode(), source, sink,
|
||||
"[Score = " + scoreString + "] This may be a js/path-injection result depending on $@ " +
|
||||
getAdditionalAlertInfo(source.getNode(), sink.getNode()), source.getNode(),
|
||||
"a user-provided value", score
|
||||
@@ -0,0 +1,31 @@
|
||||
/**
|
||||
* For internal use only.
|
||||
*
|
||||
* @name Client-side cross-site scripting (boosted)
|
||||
* @description Writing user input directly to the DOM allows for
|
||||
* a cross-site scripting vulnerability.
|
||||
* @kind path-problem
|
||||
* @scored
|
||||
* @problem.severity error
|
||||
* @security-severity 6.1
|
||||
* @id adaptive-threat-modeling/js/xss
|
||||
* @tags experimental experimental/atm security
|
||||
*/
|
||||
|
||||
import javascript
|
||||
import ATM::ResultsInfo
|
||||
import DataFlow::PathGraph
|
||||
import experimental.adaptivethreatmodeling.XssATM
|
||||
|
||||
from
|
||||
DataFlow::Configuration cfg, DataFlow::PathNode source, DataFlow::PathNode sink, float score,
|
||||
string scoreString
|
||||
where
|
||||
cfg.hasFlowPath(source, sink) and
|
||||
not isFlowLikelyInBaseQuery(source.getNode(), sink.getNode()) and
|
||||
score = getScoreForFlow(source.getNode(), sink.getNode()) and
|
||||
scoreString = getScoreStringForFlow(source.getNode(), sink.getNode())
|
||||
select sink.getNode(), source, sink,
|
||||
"[Score = " + scoreString + "] This may be a js/xss result depending on $@ " +
|
||||
getAdditionalAlertInfo(source.getNode(), sink.getNode()), source.getNode(),
|
||||
"a user-provided value", score
|
||||
@@ -0,0 +1,8 @@
|
||||
- description: ATM boosted Code Scanning queries for JavaScript
|
||||
- qlpack: codeql/javascript-experimental-atm-src
|
||||
- include:
|
||||
id:
|
||||
- adaptive-threat-modeling/js/nosql-injection
|
||||
- adaptive-threat-modeling/js/sql-injection
|
||||
- adaptive-threat-modeling/js/path-injection
|
||||
- adaptive-threat-modeling/js/xss
|
||||
@@ -0,0 +1,4 @@
|
||||
---
|
||||
dependencies: {}
|
||||
compiled: false
|
||||
lockVersion: 1.0.0
|
||||
@@ -0,0 +1,6 @@
|
||||
name: codeql/javascript-experimental-atm-src
|
||||
language: javascript
|
||||
version: 0.0.0
|
||||
suites: codeql-suites
|
||||
dependencies:
|
||||
codeql/javascript-experimental-atm-lib: "*"
|
||||
@@ -0,0 +1 @@
|
||||
<queries language="javascript"/>
|
||||
Reference in New Issue
Block a user