Merge branch 'main' into shared-concepts-scaffolding

This commit is contained in:
Rasmus Wriedt Larsen
2022-03-22 10:36:33 +01:00
190 changed files with 13626 additions and 1805 deletions

View File

@@ -371,8 +371,12 @@ module API {
/**
* An API entry point.
*
* Extend this class to define additional API entry points other than modules.
* Typical examples include global variables.
* By default, API graph nodes are only created for nodes that come from an external
* library or escape into an external library. The points where values are cross the boundary
* between codebases are called "entry points".
*
* Imports and exports are considered entry points by default, but additional entry points may
* be added by extending this class. Typical examples include global variables.
*/
abstract class EntryPoint extends string {
bindingset[this]
@@ -385,7 +389,10 @@ module API {
abstract DataFlow::Node getARhs();
/** Gets an API-node for this entry point. */
API::Node getNode() { result = root().getASuccessor(Label::entryPoint(this)) }
API::Node getANode() { result = root().getASuccessor(Label::entryPoint(this)) }
/** DEPRECATED. Use `getANode()` instead. */
deprecated API::Node getNode() { result = this.getANode() }
}
/**

View File

@@ -990,16 +990,22 @@ predicate isInterpretedAsRegExp(DataFlow::Node source) {
}
/**
* Provides regular expression patterns.
* Provides utility predicates related to regular expressions.
*/
module RegExpPatterns {
/**
* Gets a pattern that matches common top-level domain names in lower case.
*/
string commonTLD() {
string getACommonTld() {
// according to ranking by http://google.com/search?q=site:.<<TLD>>
result = "(?:com|org|edu|gov|uk|net|io)(?![a-z0-9])"
}
/**
* Gets a pattern that matches common top-level domain names in lower case.
* DEPRECATED: use `getACommonTld` instead
*/
deprecated predicate commonTLD = getACommonTld/0;
}
/**

View File

@@ -1365,27 +1365,31 @@ private predicate loadStep(
/**
* Holds if there is flow to `base.startProp`, and `base.startProp` flows to `nd.endProp` under `cfg/summary`.
*
* If `onlyRelevantInCall` is true, the `base` object will not be propagated out of return edges, because
* the flow that originally reached `base.startProp` used a call edge.
*/
pragma[nomagic]
private predicate reachableFromStoreBase(
string startProp, string endProp, DataFlow::Node base, DataFlow::Node nd,
DataFlow::Configuration cfg, PathSummary summary
DataFlow::Configuration cfg, PathSummary summary, boolean onlyRelevantInCall
) {
exists(PathSummary s1, PathSummary s2, DataFlow::Node rhs |
reachableFromSource(rhs, cfg, s1)
reachableFromSource(rhs, cfg, s1) and
onlyRelevantInCall = s1.hasCall()
or
reachableFromStoreBase(_, _, _, rhs, cfg, s1)
reachableFromStoreBase(_, _, _, rhs, cfg, s1, onlyRelevantInCall)
|
storeStep(rhs, nd, startProp, cfg, s2) and
endProp = startProp and
base = nd and
summary =
MkPathSummary(false, s1.hasCall().booleanOr(s2.hasCall()), DataFlow::FlowLabel::data(),
DataFlow::FlowLabel::data())
MkPathSummary(false, s2.hasCall(), DataFlow::FlowLabel::data(), DataFlow::FlowLabel::data())
)
or
exists(PathSummary newSummary, PathSummary oldSummary |
reachableFromStoreBaseStep(startProp, endProp, base, nd, cfg, oldSummary, newSummary) and
reachableFromStoreBaseStep(startProp, endProp, base, nd, cfg, oldSummary, newSummary,
onlyRelevantInCall) and
summary = oldSummary.appendValuePreserving(newSummary)
)
}
@@ -1399,14 +1403,16 @@ private predicate reachableFromStoreBase(
pragma[noinline]
private predicate reachableFromStoreBaseStep(
string startProp, string endProp, DataFlow::Node base, DataFlow::Node nd,
DataFlow::Configuration cfg, PathSummary oldSummary, PathSummary newSummary
DataFlow::Configuration cfg, PathSummary oldSummary, PathSummary newSummary,
boolean onlyRelevantInCall
) {
exists(DataFlow::Node mid |
reachableFromStoreBase(startProp, endProp, base, mid, cfg, oldSummary) and
flowStep(mid, cfg, nd, newSummary)
reachableFromStoreBase(startProp, endProp, base, mid, cfg, oldSummary, onlyRelevantInCall) and
flowStep(mid, cfg, nd, newSummary) and
onlyRelevantInCall.booleanAnd(newSummary.hasReturn()) = false
or
exists(string midProp |
reachableFromStoreBase(startProp, midProp, base, mid, cfg, oldSummary) and
reachableFromStoreBase(startProp, midProp, base, mid, cfg, oldSummary, onlyRelevantInCall) and
isAdditionalLoadStoreStep(mid, nd, midProp, endProp, cfg) and
newSummary = PathSummary::level()
)
@@ -1446,7 +1452,7 @@ private predicate storeToLoad(
PathSummary s1, PathSummary s2
|
storeStep(pred, storeBase, storeProp, cfg, s1) and
reachableFromStoreBase(storeProp, loadProp, storeBase, loadBase, cfg, s2) and
reachableFromStoreBase(storeProp, loadProp, storeBase, loadBase, cfg, s2, _) and
oldSummary = s1.appendValuePreserving(s2) and
loadStep(loadBase, succ, loadProp, cfg, newSummary)
)

View File

@@ -23,7 +23,7 @@ module D3 {
or
result = API::moduleImport("d3-node").getInstance().getMember("d3")
or
result = any(D3GlobalEntry i).getNode()
result = any(D3GlobalEntry i).getANode()
}
/**

View File

@@ -17,7 +17,7 @@ module History {
* Gets a reference to the [`history`](https://npmjs.org/package/history) library.
*/
private API::Node history() {
result = [API::moduleImport("history"), any(HistoryGlobalEntry h).getNode()]
result = [API::moduleImport("history"), any(HistoryGlobalEntry h).getANode()]
}
/**

View File

@@ -27,7 +27,7 @@ private module Immutable {
API::Node immutableImport() {
result = API::moduleImport("immutable")
or
result = any(ImmutableGlobalEntry i).getNode()
result = any(ImmutableGlobalEntry i).getANode()
}
/**

View File

@@ -45,7 +45,7 @@ private module Console {
*/
private API::Node console() {
result = API::moduleImport("console") or
result = any(ConsoleGlobalEntry e).getNode()
result = any(ConsoleGlobalEntry e).getANode()
}
/**

View File

@@ -151,7 +151,7 @@ module NestJS {
private API::Node validationPipe() {
result = nestjs().getMember("ValidationPipe")
or
result = any(ValidationNodeEntry e).getNode()
result = any(ValidationNodeEntry e).getANode()
}
/**

View File

@@ -1111,7 +1111,7 @@ module Redux {
/** A heuristic call to `connect`, recognized by it taking arguments named `mapStateToProps` and `mapDispatchToProps`. */
private class HeuristicConnectFunction extends ConnectCall {
HeuristicConnectFunction() { this = any(HeuristicConnectEntryPoint e).getNode().getACall() }
HeuristicConnectFunction() { this = any(HeuristicConnectEntryPoint e).getANode().getACall() }
override API::Node getMapStateToProps() {
result = getAParameter() and

View File

@@ -19,7 +19,7 @@ module TrustedTypes {
override DataFlow::Node getARhs() { none() }
}
private API::Node trustedTypesObj() { result = any(TrustedTypesEntry entry).getNode() }
private API::Node trustedTypesObj() { result = any(TrustedTypesEntry entry).getANode() }
/** A call to `trustedTypes.createPolicy`. */
class PolicyCreation extends API::CallNode {

View File

@@ -35,7 +35,7 @@ module Vue {
API::Node vueLibrary() {
result = API::moduleImport("vue")
or
result = any(GlobalVueEntryPoint e).getNode()
result = any(GlobalVueEntryPoint e).getANode()
}
/**
@@ -51,7 +51,7 @@ module Vue {
or
result = vueLibrary().getMember("component").getReturn()
or
result = any(VueFileImportEntryPoint e).getNode()
result = any(VueFileImportEntryPoint e).getANode()
}
/**

View File

@@ -8,10 +8,6 @@
* The package name refers to an NPM package name or a path within a package name such as `lodash/extend`.
* The string `global` refers to the global object (whether it came from the `global` package or not).
*
* The following tokens have a language-specific interpretation:
* - `Instance`: the value returned by a `new`-call to a function
* - `Awaited`: the value from a resolved promise
*
* A `(package, type)` tuple may refer to the exported type named `type` from the NPM package `package`.
* For example, `(express, Request)` would match a parameter below due to the type annotation:
* ```ts

View File

@@ -22,16 +22,26 @@
* or the empty string if referring to the package itself.
* It can also be a synthetic type name defined by a type definition (see type definitions below).
* 3. The `path` column is a `.`-separated list of "access path tokens" to resolve, starting at the node selected by `package` and `type`.
* The possible access path tokens are:
* - Member[x] : a property named `x`. May be a comma-separated list of named.
*
* Every language supports the following tokens:
* - Argument[n]: the n-th argument to a call. May be a range of form `x..y` (inclusive) and/or a comma-separated list.
* Additionally, `N-1` refers to the last argument, `N-2` refers to the second-last, and so on.
* - Parameter[n]: the n-th parameter of a callback. May be a range of form `x..y` (inclusive) and/or a comma-separated list.
* - ReturnValue: the value returned by a function call
* - Instance: the value returned by a constructor call
* - Awaited: the value from a resolved promise/future-like object
* - WithArity[n]: match a call with the given arity. May be a range of form `x..y` (inclusive) and/or a comma-separated list.
* - Other language-specific tokens mentioned in `ModelsAsData.qll`.
*
* The following tokens are common and should be implemented for languages where it makes sense:
* - Member[x]: a member named `x`; exactly what a "member" is depends on the language. May be a comma-separated list of names.
* - Instance: an instance of a class
* - Subclass: a subclass of a class
* - ArrayElement: an element of array
* - Element: an element of a collection-like object
* - MapKey: a key in map-like object
* - MapValue: a value in a map-like object
* - Awaited: the value from a resolved promise/future-like object
*
* For the time being, please consult `ApiGraphModelsSpecific.qll` to see which language-specific tokens are currently supported.
*
* 4. The `input` and `output` columns specify how data enters and leaves the element selected by the
* first `(package, type, path)` tuple. Both strings are `.`-separated access paths
* of the same syntax as the `path` column.
@@ -149,6 +159,14 @@ module ModelInput {
private import ModelInput
/**
* An empty class, except in specific tests.
*
* If this is non-empty, all models are parsed even if the package is not
* considered relevant for the current database.
*/
abstract class TestAllModels extends Unit { }
/**
* Append `;dummy` to the value of `s` to work around the fact that `string.split(delim,n)`
* does not preserve empty trailing substrings.
@@ -231,7 +249,17 @@ string getAPackageAlias(string package) {
* Holds if CSV rows involving `package` might be relevant for the analysis of this database.
*/
private predicate isRelevantPackage(string package) {
Specific::isPackageUsed(package)
(
sourceModel(package, _, _, _) or
sinkModel(package, _, _, _) or
summaryModel(package, _, _, _, _, _) or
typeModel(package, _, _, _, _)
) and
(
Specific::isPackageUsed(package)
or
exists(TestAllModels t)
)
or
exists(string other |
isRelevantPackage(other) and
@@ -315,7 +343,7 @@ private predicate invocationMatchesCallSiteFilter(Specific::InvokeNode invoke, A
* Gets the API node identified by the first `n` tokens of `path` in the given `(package, type, path)` tuple.
*/
pragma[nomagic]
API::Node getNodeFromPath(string package, string type, AccessPath path, int n) {
private API::Node getNodeFromPath(string package, string type, AccessPath path, int n) {
isRelevantFullPath(package, type, path) and
(
n = 0 and
@@ -357,6 +385,42 @@ Specific::InvokeNode getInvocationFromPath(string package, string type, AccessPa
result = getInvocationFromPath(package, type, path, path.getNumToken())
}
/**
* Holds if `name` is a valid name for an access path token in the identifying access path.
*/
bindingset[name]
predicate isValidTokenNameInIdentifyingAccessPath(string name) {
name = ["Argument", "Parameter", "ReturnValue", "WithArity"]
or
Specific::isExtraValidTokenNameInIdentifyingAccessPath(name)
}
/**
* Holds if `name` is a valid name for an access path token with no arguments, occuring
* in an identifying access path.
*/
bindingset[name]
predicate isValidNoArgumentTokenInIdentifyingAccessPath(string name) {
name = "ReturnValue"
or
Specific::isExtraValidNoArgumentTokenInIdentifyingAccessPath(name)
}
/**
* Holds if `argument` is a valid argument to an access path token with the given `name`, occurring
* in an identifying access path.
*/
bindingset[name, argument]
predicate isValidTokenArgumentInIdentifyingAccessPath(string name, string argument) {
name = ["Argument", "Parameter"] and
argument.regexpMatch("(N-|-)?\\d+(\\.\\.(N-|-)?\\d+)?")
or
name = "WithArity" and
argument.regexpMatch("\\d+(\\.\\.\\d+)?")
or
Specific::isExtraValidTokenArgumentInIdentifyingAccessPath(name, argument)
}
/**
* Module providing access to the imported models in terms of API graph nodes.
*/
@@ -382,26 +446,23 @@ module ModelOutput {
}
/**
* Holds if a relevant CSV summary row has the given `kind`, `input` and `output`.
* Holds if a relevant CSV summary exists for these parameters.
*/
predicate summaryModel(string input, string output, string kind) {
exists(string package |
isRelevantPackage(package) and
summaryModel(package, _, _, input, output, kind)
)
predicate relevantSummaryModel(
string package, string type, string path, string input, string output, string kind
) {
isRelevantPackage(package) and
summaryModel(package, type, path, input, output, kind)
}
/**
* Holds if a summary edge with the given `input, output, kind` columns have a `package, type, path` tuple
* that resolves to `baseNode`.
* Holds if a `baseNode` is an invocation identified by the `package,type,path` part of a summary row.
*/
predicate resolvedSummaryBase(
Specific::InvokeNode baseNode, AccessPath input, AccessPath output, string kind
string package, string type, string path, Specific::InvokeNode baseNode
) {
exists(string package, string type, AccessPath path |
summaryModel(package, type, path, input, output, kind) and
baseNode = getInvocationFromPath(package, type, path)
)
summaryModel(package, type, path, _, _, _) and
baseNode = getInvocationFromPath(package, type, path)
}
/**
@@ -414,4 +475,48 @@ module ModelOutput {
result = getNodeFromPath(package2, type2, path)
)
}
/**
* Gets an error message relating to an invalid CSV row in a model.
*/
string getAWarning() {
// Check number of columns
exists(string row, string kind, int expectedArity, int actualArity |
any(SourceModelCsv csv).row(row) and kind = "source" and expectedArity = 4
or
any(SinkModelCsv csv).row(row) and kind = "sink" and expectedArity = 4
or
any(SummaryModelCsv csv).row(row) and kind = "summary" and expectedArity = 6
or
any(TypeModelCsv csv).row(row) and kind = "type" and expectedArity = 5
|
actualArity = count(row.indexOf(";")) + 1 and
actualArity != expectedArity and
result =
"CSV " + kind + " row should have " + expectedArity + " columns but has " + actualArity +
": " + row
)
or
// Check names and arguments of access path tokens
exists(AccessPath path, AccessPathToken token |
isRelevantFullPath(_, _, path) and
token = path.getToken(_)
|
not isValidTokenNameInIdentifyingAccessPath(token.getName()) and
result = "Invalid token name '" + token.getName() + "' in access path: " + path
or
isValidTokenNameInIdentifyingAccessPath(token.getName()) and
exists(string argument |
argument = token.getAnArgument() and
not isValidTokenArgumentInIdentifyingAccessPath(token.getName(), argument) and
result =
"Invalid argument '" + argument + "' in token '" + token + "' in access path: " + path
)
or
isValidTokenNameInIdentifyingAccessPath(token.getName()) and
token.getNumArgument() = 0 and
not isValidNoArgumentTokenInIdentifyingAccessPath(token.getName()) and
result = "Invalid token '" + token + "' is missing its arguments, in access path: " + path
)
}
}

View File

@@ -2,7 +2,7 @@
* Contains the language-specific part of the models-as-data implementation found in `ApiGraphModels.qll`.
*
* It must export the following members:
* ```codeql
* ```ql
* class Unit // a unit type
* module API // the API graph module
* predicate isPackageUsed(string package)
@@ -19,6 +19,7 @@ private import ApiGraphModels
class Unit = JS::Unit;
// Re-export libraries needed by ApiGraphModels.qll
module API = JS::API;
import semmle.javascript.frameworks.data.internal.AccessPathSyntax as AccessPathSyntax
@@ -66,7 +67,7 @@ private class GlobalApiEntryPoint extends API::EntryPoint {
* Gets an API node referring to the given global variable (if relevant).
*/
private API::Node getGlobalNode(string globalName) {
result = any(GlobalApiEntryPoint e | e.getGlobal() = globalName).getNode()
result = any(GlobalApiEntryPoint e | e.getGlobal() = globalName).getANode()
}
/** Gets a JavaScript-specific interpretation of the `(package, type, path)` tuple after resolving the first `n` access path tokens. */
@@ -147,10 +148,12 @@ predicate invocationMatchesExtraCallSiteFilter(API::InvokeNode invoke, AccessPat
* Holds if `path` is an input or output spec for a summary with the given `base` node.
*/
pragma[nomagic]
private predicate relevantInputOutputPath(API::InvokeNode base, AccessPath path) {
ModelOutput::resolvedSummaryBase(base, path, _, _)
or
ModelOutput::resolvedSummaryBase(base, _, path, _)
private predicate relevantInputOutputPath(API::InvokeNode base, AccessPath inputOrOutput) {
exists(string package, string type, string input, string output, string path |
ModelOutput::relevantSummaryModel(package, type, path, input, output, _) and
ModelOutput::resolvedSummaryBase(package, type, path, base) and
inputOrOutput = [input, output]
)
}
/**
@@ -178,8 +181,12 @@ private API::Node getNodeFromInputOutputPath(API::InvokeNode baseNode, AccessPat
* Holds if a CSV summary contributed the step `pred -> succ` of the given `kind`.
*/
predicate summaryStep(API::Node pred, API::Node succ, string kind) {
exists(API::InvokeNode base, AccessPath input, AccessPath output |
ModelOutput::resolvedSummaryBase(base, input, output, kind) and
exists(
string package, string type, string path, API::InvokeNode base, AccessPath input,
AccessPath output
|
ModelOutput::relevantSummaryModel(package, type, path, input, output, kind) and
ModelOutput::resolvedSummaryBase(package, type, path, base) and
pred = getNodeFromInputOutputPath(base, input) and
succ = getNodeFromInputOutputPath(base, output)
)
@@ -189,3 +196,29 @@ class InvokeNode = API::InvokeNode;
/** Gets an `InvokeNode` corresponding to an invocation of `node`. */
InvokeNode getAnInvocationOf(API::Node node) { result = node.getAnInvocation() }
/**
* Holds if `name` is a valid name for an access path token in the identifying access path.
*/
bindingset[name]
predicate isExtraValidTokenNameInIdentifyingAccessPath(string name) {
name = ["Member", "Instance", "Awaited", "ArrayElement", "Element", "MapValue", "NewCall", "Call"]
}
/**
* Holds if `name` is a valid name for an access path token with no arguments, occuring
* in an identifying access path.
*/
predicate isExtraValidNoArgumentTokenInIdentifyingAccessPath(string name) {
name = ["Instance", "Awaited", "ArrayElement", "Element", "MapValue", "NewCall", "Call"]
}
/**
* Holds if `argument` is a valid argument to an access path token with the given `name`, occurring
* in an identifying access path.
*/
bindingset[name, argument]
predicate isExtraValidTokenArgumentInIdentifyingAccessPath(string name, string argument) {
name = ["Member"] and
exists(argument)
}

View File

@@ -4,10 +4,14 @@
* for adding your own.
*/
import javascript
import semmle.javascript.security.dataflow.RemoteFlowSources
/**
* Provides default sources, sinks and sanitizers for reasoning about
* writing user-controlled data to files, as well as extension points
* for adding your own.
*/
module HttpToFileAccess {
import HttpToFileAccessSpecific
/**
* A data flow source for writing user-controlled data to files.
*/
@@ -23,18 +27,6 @@ module HttpToFileAccess {
*/
abstract class Sanitizer extends DataFlow::Node { }
/**
* An access to a user-controlled HTTP request input, considered as a flow source for writing user-controlled data to files
*/
private class RequestInputAccessAsSource extends Source {
RequestInputAccessAsSource() { this instanceof HTTP::RequestInputAccess }
}
/** A response from a server, considered as a flow source for writing user-controlled data to files. */
private class ServerResponseAsSource extends Source {
ServerResponseAsSource() { this = any(ClientRequest r).getAResponseDataNode() }
}
/** A sink that represents file access method (write, append) argument */
class FileAccessAsSink extends Sink {
FileAccessAsSink() { exists(FileSystemWriteAccess src | this = src.getADataNode()) }

View File

@@ -6,8 +6,7 @@
* `HttpToFileAccessCustomizations` should be imported instead.
*/
import javascript
import HttpToFileAccessCustomizations::HttpToFileAccess
private import HttpToFileAccessCustomizations::HttpToFileAccess
/**
* A taint tracking configuration for writing user-controlled data to files.

View File

@@ -0,0 +1,19 @@
/**
* Provides imports and classes needed for `HttpToFileAccessQuery` and `HttpToFileAccessCustomizations`.
*/
import javascript
import semmle.javascript.security.dataflow.RemoteFlowSources
private import HttpToFileAccessCustomizations::HttpToFileAccess
/**
* An access to a user-controlled HTTP request input, considered as a flow source for writing user-controlled data to files
*/
private class RequestInputAccessAsSource extends Source {
RequestInputAccessAsSource() { this instanceof HTTP::RequestInputAccess }
}
/** A response from a server, considered as a flow source for writing user-controlled data to files. */
private class ServerResponseAsSource extends Source {
ServerResponseAsSource() { this = any(ClientRequest r).getAResponseDataNode() }
}

View File

@@ -3,10 +3,13 @@
* format injections, as well as extension points for adding your own.
*/
import javascript
import semmle.javascript.security.dataflow.DOM
/**
* Provides default sources, sinks and sanitizers for reasoning about
* format injections, as well as extension points for adding your own.
*/
module TaintedFormatString {
import TaintedFormatStringSpecific
/**
* A data flow source for format injections.
*/
@@ -23,9 +26,7 @@ module TaintedFormatString {
abstract class Sanitizer extends DataFlow::Node { }
/** A source of remote user input, considered as a flow source for format injection. */
class RemoteSource extends Source {
RemoteSource() { this instanceof RemoteFlowSource }
}
class RemoteSource extends Source instanceof RemoteFlowSource { }
/**
* A format argument to a printf-like function, considered as a flow sink for format injection.

View File

@@ -8,9 +8,7 @@
* `TaintedFormatStringCustomizations` should be imported instead.
*/
import javascript
import semmle.javascript.security.dataflow.DOM
import TaintedFormatStringCustomizations::TaintedFormatString
private import TaintedFormatStringCustomizations::TaintedFormatString
/**
* A taint-tracking configuration for format injections.

View File

@@ -0,0 +1,6 @@
/**
* Provides JS-specific imports needed for `TaintedFormatStringQuery` and `TaintedFormatStringCustomizations`.
*/
import javascript
import semmle.javascript.security.dataflow.DOM

View File

@@ -98,7 +98,8 @@ module HeuristicNames {
* suggesting nouns within the string do not represent the meaning of the whole string (e.g. a URL or a SQL query).
*/
string notSensitiveRegexp() {
result = "(?is).*([^\\w$.-]|redact|censor|obfuscate|hash|md5|sha|((?<!un)(en))?(crypt|code)).*"
result =
"(?is).*([^\\w$.-]|redact|censor|obfuscate|hash|md5|sha|random|((?<!un)(en))?(crypt|code)).*"
}
/**

View File

@@ -3,7 +3,7 @@
* that match URLs and hostname patterns.
*/
import javascript
private import HostnameRegexpSpecific
/**
* Holds if the given constant is unlikely to occur in the origin part of a URL.
@@ -62,7 +62,7 @@ predicate hasTopLevelDomainEnding(RegExpSequence seq, int i) {
seq.getChild(i)
.(RegExpConstant)
.getValue()
.regexpMatch("(?i)" + RegExpPatterns::commonTLD() + "(:\\d+)?([/?#].*)?") and
.regexpMatch("(?i)" + RegExpPatterns::getACommonTld() + "(:\\d+)?([/?#].*)?") and
isDotLike(seq.getChild(i - 1)) and
not (i = 1 and matchesBeginningOfString(seq))
}
@@ -107,3 +107,96 @@ predicate alwaysMatchesHostnameAlt(RegExpAlt alt, int i) {
alwaysMatchesHostnameAlt(alt, i - 1) and
alwaysMatchesHostname(alt.getChild(i))
}
/**
* Holds if `term` occurs inside a quantifier or alternative (and thus
* can not be expected to correspond to a unique match), or as part of
* a lookaround assertion (which are rarely used for capture groups).
*/
predicate isInsideChoiceOrSubPattern(RegExpTerm term) {
exists(RegExpParent parent | parent = term.getParent() |
parent instanceof RegExpAlt
or
parent instanceof RegExpQuantifier
or
parent instanceof RegExpSubPattern
or
isInsideChoiceOrSubPattern(parent)
)
}
/**
* Holds if `group` is likely to be used as a capture group.
*/
predicate isLikelyCaptureGroup(RegExpGroup group) {
group.isCapture() and
not isInsideChoiceOrSubPattern(group)
}
/**
* Holds if `seq` contains two consecutive dots `..` or escaped dots.
*
* At least one of these dots is not intended to be a subdomain separator,
* so we avoid flagging the pattern in this case.
*/
predicate hasConsecutiveDots(RegExpSequence seq) {
exists(int i |
isDotLike(seq.getChild(i)) and
isDotLike(seq.getChild(i + 1))
)
}
predicate isIncompleteHostNameRegExpPattern(RegExpTerm regexp, RegExpSequence seq, string msg) {
seq = regexp.getAChild*() and
exists(RegExpDot unescapedDot, int i, string hostname |
hasTopLevelDomainEnding(seq, i) and
not isConstantInvalidInsideOrigin(seq.getChild([0 .. i - 1]).getAChild*()) and
not isLikelyCaptureGroup(seq.getChild([i .. seq.getNumChild() - 1]).getAChild*()) and
unescapedDot = seq.getChild([0 .. i - 1]).getAChild*() and
unescapedDot != seq.getChild(i - 1) and // Should not be the '.' immediately before the TLD
not hasConsecutiveDots(unescapedDot.getParent()) and
hostname =
seq.getChild(i - 2).getRawValue() + seq.getChild(i - 1).getRawValue() +
seq.getChild(i).getRawValue()
|
if unescapedDot.getParent() instanceof RegExpQuantifier
then
// `.*\.example.com` can match `evil.com/?x=.example.com`
//
// This problem only occurs when the pattern is applied against a full URL, not just a hostname/origin.
// We therefore check if the pattern includes a suffix after the TLD, such as `.*\.example.com/`.
// Note that a post-anchored pattern (`.*\.example.com$`) will usually fail to match a full URL,
// and patterns with neither a suffix nor an anchor fall under the purview of MissingRegExpAnchor.
seq.getChild(0) instanceof RegExpCaret and
not seq.getAChild() instanceof RegExpDollar and
seq.getChild([i .. i + 1]).(RegExpConstant).getValue().regexpMatch(".*[/?#].*") and
msg =
"has an unrestricted wildcard '" + unescapedDot.getParent().(RegExpQuantifier).getRawValue()
+ "' which may cause '" + hostname +
"' to be matched anywhere in the URL, outside the hostname."
else
msg =
"has an unescaped '.' before '" + hostname +
"', so it might match more hosts than expected."
)
}
predicate incompleteHostnameRegExp(
RegExpSequence hostSequence, string message, DataFlow::Node aux, string label
) {
exists(RegExpPatternSource re, RegExpTerm regexp, string msg, string kind |
regexp = re.getRegExpTerm() and
isIncompleteHostNameRegExpPattern(regexp, hostSequence, msg) and
(
if re.getAParse() != re
then (
kind = "string, which is used as a regular expression $@," and
aux = re.getAParse()
) else (
kind = "regular expression" and aux = re
)
)
|
message = "This " + kind + " " + msg and label = "here"
)
}

View File

@@ -0,0 +1 @@
import javascript

View File

@@ -30,7 +30,7 @@
<p>
Escape all meta-characters appropriately when constructing
regular expressions for security checks, pay special attention to the
regular expressions for security checks, and pay special attention to the
<code>.</code> meta-character.
</p>
@@ -59,7 +59,7 @@
<p>
Address this vulnerability by escaping <code>.</code>
appropriately: <code>let regex = /((www|beta)\.)?example\.com/</code>.
appropriately: <code>let regex = /^((www|beta)\.)?example\.com/</code>.
</p>

View File

@@ -11,97 +11,6 @@
* external/cwe/cwe-020
*/
import javascript
import semmle.javascript.CharacterEscapes
import HostnameRegexpShared
/**
* Holds if `term` occurs inside a quantifier or alternative (and thus
* can not be expected to correspond to a unique match), or as part of
* a lookaround assertion (which are rarely used for capture groups).
*/
predicate isInsideChoiceOrSubPattern(RegExpTerm term) {
exists(RegExpParent parent | parent = term.getParent() |
parent instanceof RegExpAlt
or
parent instanceof RegExpQuantifier
or
parent instanceof RegExpSubPattern
or
isInsideChoiceOrSubPattern(parent)
)
}
/**
* Holds if `group` is likely to be used as a capture group.
*/
predicate isLikelyCaptureGroup(RegExpGroup group) {
group.isCapture() and
not isInsideChoiceOrSubPattern(group)
}
/**
* Holds if `seq` contains two consecutive dots `..` or escaped dots.
*
* At least one of these dots is not intended to be a subdomain separator,
* so we avoid flagging the pattern in this case.
*/
predicate hasConsecutiveDots(RegExpSequence seq) {
exists(int i |
isDotLike(seq.getChild(i)) and
isDotLike(seq.getChild(i + 1))
)
}
predicate isIncompleteHostNameRegExpPattern(RegExpTerm regexp, RegExpSequence seq, string msg) {
seq = regexp.getAChild*() and
exists(RegExpDot unescapedDot, int i, string hostname |
hasTopLevelDomainEnding(seq, i) and
not isConstantInvalidInsideOrigin(seq.getChild([0 .. i - 1]).getAChild*()) and
not isLikelyCaptureGroup(seq.getChild([i .. seq.getNumChild() - 1]).getAChild*()) and
unescapedDot = seq.getChild([0 .. i - 1]).getAChild*() and
unescapedDot != seq.getChild(i - 1) and // Should not be the '.' immediately before the TLD
not hasConsecutiveDots(unescapedDot.getParent()) and
hostname =
seq.getChild(i - 2).getRawValue() + seq.getChild(i - 1).getRawValue() +
seq.getChild(i).getRawValue()
|
if unescapedDot.getParent() instanceof RegExpQuantifier
then
// `.*\.example.com` can match `evil.com/?x=.example.com`
//
// This problem only occurs when the pattern is applied against a full URL, not just a hostname/origin.
// We therefore check if the pattern includes a suffix after the TLD, such as `.*\.example.com/`.
// Note that a post-anchored pattern (`.*\.example.com$`) will usually fail to match a full URL,
// and patterns with neither a suffix nor an anchor fall under the purview of MissingRegExpAnchor.
seq.getChild(0) instanceof RegExpCaret and
not seq.getAChild() instanceof RegExpDollar and
seq.getChild([i .. i + 1]).(RegExpConstant).getValue().regexpMatch(".*[/?#].*") and
msg =
"has an unrestricted wildcard '" + unescapedDot.getParent().(RegExpQuantifier).getRawValue()
+ "' which may cause '" + hostname +
"' to be matched anywhere in the URL, outside the hostname."
else
msg =
"has an unescaped '.' before '" + hostname +
"', so it might match more hosts than expected."
)
}
from
RegExpPatternSource re, RegExpTerm regexp, RegExpSequence hostSequence, string msg, string kind,
DataFlow::Node aux
where
regexp = re.getRegExpTerm() and
isIncompleteHostNameRegExpPattern(regexp, hostSequence, msg) and
(
if re.getAParse() != re
then (
kind = "string, which is used as a regular expression $@," and
aux = re.getAParse()
) else (
kind = "regular expression" and aux = re
)
) and
not CharacterEscapes::hasALikelyRegExpPatternMistake(re)
select hostSequence, "This " + kind + " " + msg, aux, "here"
query predicate problems = incompleteHostnameRegExp/4;

View File

@@ -39,7 +39,7 @@ where
(
// target contains a domain on a common TLD, and perhaps some other URL components
target
.regexpMatch("(?i)([a-z]*:?//)?\\.?([a-z0-9-]+\\.)+" + RegExpPatterns::commonTLD() +
.regexpMatch("(?i)([a-z]*:?//)?\\.?([a-z0-9-]+\\.)+" + RegExpPatterns::getACommonTld() +
"(:[0-9]+)?/?")
or
// target is a HTTP URL to a domain on any TLD

View File

@@ -0,0 +1,6 @@
---
category: minorAnalysis
---
* Fixed an issue that would sometimes prevent the data-flow analysis from finding flow
paths through a function that stores its result on an object.
This may lead to more results for the security queries.

View File

@@ -172,6 +172,7 @@ typeInferenceMismatch
| string-replace.js:3:13:3:20 | source() | string-replace.js:21:6:21:41 | safe(). ... taint) |
| string-replace.js:3:13:3:20 | source() | string-replace.js:22:6:22:48 | safe(). ... taint) |
| string-replace.js:3:13:3:20 | source() | string-replace.js:24:6:24:45 | taint.r ... + '!') |
| summarize-store-load-in-call.js:9:15:9:22 | source() | summarize-store-load-in-call.js:9:10:9:23 | blah(source()) |
| thisAssignments.js:4:17:4:24 | source() | thisAssignments.js:5:10:5:18 | obj.field |
| thisAssignments.js:7:19:7:26 | source() | thisAssignments.js:8:10:8:20 | this.field2 |
| tst.js:2:13:2:20 | source() | tst.js:4:10:4:10 | x |

View File

@@ -0,0 +1,12 @@
import * as dummy from 'dummy';
function blah(obj) {
obj.prop = obj.prop + "x";
return obj.prop;
}
function test() {
sink(blah(source())); // NOT OK
blah(); // ensure more than one call site exists
}

View File

@@ -0,0 +1,16 @@
| CSV type row should have 5 columns but has 2: test;TooFewColumns |
| CSV type row should have 5 columns but has 8: test;TooManyColumns;;;Member[Foo].Instance;too;many;columns |
| Invalid argument '0-1' in token 'Argument[0-1]' in access path: Method[foo].Argument[0-1] |
| Invalid argument '0..' in token 'Argument[0..]' in access path: Argument[0..].Member[password] |
| Invalid argument '0..' in token 'Argument[0..]' in access path: Argument[0..].Member[username] |
| Invalid argument '0..' in token 'Argument[0..]' in access path: Member[executeSql].Argument[0..].Parameter[1] |
| Invalid argument '0..' in token 'Argument[0..]' in access path: Member[run].Argument[0..].Parameter[1] |
| Invalid argument '*' in token 'Argument[*]' in access path: Method[foo].Argument[*] |
| Invalid token 'Argument' is missing its arguments, in access path: Method[foo].Argument |
| Invalid token 'Member' is missing its arguments, in access path: Method[foo].Member |
| Invalid token name 'Arg' in access path: Method[foo].Arg[0] |
| Invalid token name 'Method' in access path: Method[foo].Arg[0] |
| Invalid token name 'Method' in access path: Method[foo].Argument |
| Invalid token name 'Method' in access path: Method[foo].Argument[0-1] |
| Invalid token name 'Method' in access path: Method[foo].Argument[*] |
| Invalid token name 'Method' in access path: Method[foo].Member |

View File

@@ -0,0 +1,24 @@
import javascript
import semmle.javascript.frameworks.data.internal.AccessPathSyntax as AccessPathSyntax
import semmle.javascript.frameworks.data.internal.ApiGraphModels as ApiGraphModels
private class InvalidTypeModel extends ModelInput::TypeModelCsv {
override predicate row(string row) {
row =
[
"test;TooManyColumns;;;Member[Foo].Instance;too;many;columns", //
"test;TooFewColumns", //
"test;X;test;Y;Method[foo].Arg[0]", //
"test;X;test;Y;Method[foo].Argument[0-1]", //
"test;X;test;Y;Method[foo].Argument[*]", //
"test;X;test;Y;Method[foo].Argument", //
"test;X;test;Y;Method[foo].Member", //
]
}
}
class IsTesting extends ApiGraphModels::TestAllModels {
IsTesting() { this = this }
}
query predicate warning = ModelOutput::getAWarning/0;

View File

@@ -23,4 +23,5 @@
| tst-IncompleteHostnameRegExp.js:48:42:48:47 | ^https?://.+.example\\.com/ | This regular expression has an unescaped '.' before 'example\\.com/', so it might match more hosts than expected. | tst-IncompleteHostnameRegExp.js:48:13:48:69 | '^http: ... \\.com/' | here |
| tst-IncompleteHostnameRegExp.js:48:42:48:47 | ^https?://.+.example\\.com/ | This regular expression has an unrestricted wildcard '.+' which may cause 'example\\.com/' to be matched anywhere in the URL, outside the hostname. | tst-IncompleteHostnameRegExp.js:48:13:48:69 | '^http: ... \\.com/' | here |
| tst-IncompleteHostnameRegExp.js:53:14:53:35 | test.example.com$ | This regular expression has an unescaped '.' before 'example.com', so it might match more hosts than expected. | tst-IncompleteHostnameRegExp.js:53:13:53:36 | 'test.' ... e.com$' | here |
| tst-IncompleteHostnameRegExp.js:55:14:55:38 | ^http://test.example.com | This regular expression has an unescaped '.' before 'example.com', so it might match more hosts than expected. | tst-IncompleteHostnameRegExp.js:55:13:55:39 | '^http: ... le.com' | here |
| tst-IncompleteHostnameRegExp.js:59:5:59:20 | foo.example\\.com | This regular expression has an unescaped '.' before 'example\\.com', so it might match more hosts than expected. | tst-IncompleteHostnameRegExp.js:59:2:59:32 | /^(foo. ... ever)$/ | here |

View File

@@ -52,7 +52,7 @@
new RegExp('test.' + 'example.com$'); // NOT OK
new RegExp('^http://test\.example.com'); // NOT OK, but flagged by js/useless-regexp-character-escape
new RegExp('^http://test\.example.com'); // NOT OK
/^http:\/\/(..|...)\.example\.com\/index\.html/; // OK, wildcards are intentional
/^http:\/\/.\.example\.com\/index\.html/; // OK, the wildcard is intentional