Merge branch 'main' into explicit-this

This commit is contained in:
Erik Krogh Kristensen
2021-11-24 15:24:58 +01:00
455 changed files with 22630 additions and 4220 deletions

View File

@@ -7,7 +7,7 @@
private import javascript
private import ATMConfig
external predicate adaptiveThreatModelingModels(
external predicate availableMlModels(
string modelChecksum, string modelLanguage, string modelName, string modelType
);

View File

@@ -11,7 +11,7 @@ import EndpointFeatures as EndpointFeatures
import EndpointTypes
private string getACompatibleModelChecksum() {
adaptiveThreatModelingModels(result, "javascript", _, "atm-endpoint-scoring")
availableMlModels(result, "javascript", _, "atm-endpoint-scoring")
}
/**
@@ -54,7 +54,7 @@ DatabaseFeatures::Entity getRepresentativeEntityForEndpoint(DataFlow::Node endpo
// Use the largest entity smaller than the AST node limit, resolving ties using the entity that
// appears first in the source archive.
result =
rank[1](DatabaseFeatures::Entity entity, int numAstNodes, Location l |
min(DatabaseFeatures::Entity entity, int numAstNodes, Location l |
entity = EndpointToEntity::getAnEntityForEndpoint(endpoint) and
numAstNodes = getNumAstNodesInEntity(entity) and
numAstNodes <= getMaxNumAstNodes() and
@@ -68,7 +68,7 @@ DatabaseFeatures::Entity getRepresentativeEntityForEndpoint(DataFlow::Node endpo
// Use the smallest entity, resolving ties using the entity that
// appears first in the source archive.
result =
rank[1](DatabaseFeatures::Entity entity, int numAstNodes, Location l |
min(DatabaseFeatures::Entity entity, int numAstNodes, Location l |
entity = EndpointToEntity::getAnEntityForEndpoint(endpoint) and
numAstNodes = getNumAstNodesInEntity(entity) and
l = entity.getLocation()

View File

@@ -207,7 +207,9 @@ private predicate isAmbientTopLevel(TopLevel tl) {
*/
class TopLevel extends @toplevel, StmtContainer {
/** Holds if this toplevel is minified. */
cached
predicate isMinified() {
Stages::Ast::ref() and
// file name contains 'min' (not as part of a longer word)
this.getFile().getBaseName().regexpMatch(".*[^-._]*[-._]min([-._].*)?\\.\\w+")
or

View File

@@ -413,7 +413,7 @@ module API {
any(Type t).hasUnderlyingType(moduleName, exportName)
} or
MkSyntheticCallbackArg(DataFlow::Node src, int bound, DataFlow::InvokeNode nd) {
trackUseNode(src, true, bound).flowsTo(nd.getCalleeNode())
trackUseNode(src, true, bound, "").flowsTo(nd.getCalleeNode())
}
class TDef = MkModuleDef or TNonModuleDef;
@@ -530,7 +530,7 @@ module API {
*/
private predicate argumentPassing(TApiNode base, int i, DataFlow::Node arg) {
exists(DataFlow::Node use, DataFlow::SourceNode pred, int bound |
use(base, use) and pred = trackUseNode(use, _, bound)
use(base, use) and pred = trackUseNode(use, _, bound, "")
|
arg = pred.getAnInvocation().getArgument(i - bound)
or
@@ -558,6 +558,32 @@ module API {
nd = MkDef(rhs)
}
/**
* Holds if `ref` is a read of a property described by `lbl` on `pred`, and
* `propDesc` is compatible with that property, meaning it is either the
* name of the property itself or the empty string.
*/
pragma[noinline]
private predicate propertyRead(
DataFlow::SourceNode pred, string propDesc, string lbl, DataFlow::Node ref
) {
ref = pred.getAPropertyRead() and
lbl = Label::memberFromRef(ref) and
(
lbl = Label::member(propDesc)
or
propDesc = ""
)
or
PromiseFlow::loadStep(pred.getALocalUse(), ref, Promises::valueProp()) and
lbl = Label::promised() and
(propDesc = Promises::valueProp() or propDesc = "")
or
PromiseFlow::loadStep(pred.getALocalUse(), ref, Promises::errorProp()) and
lbl = Label::promisedError() and
(propDesc = Promises::errorProp() or propDesc = "")
}
/**
* Holds if `ref` is a use of a node that should have an incoming edge from `base` labeled
* `lbl` in the API graph.
@@ -569,26 +595,25 @@ module API {
base = MkRoot() and
ref = lbl.(EntryPoint).getAUse()
or
// property reads
exists(DataFlow::SourceNode src, DataFlow::SourceNode pred, string propDesc |
use(base, src) and
pred = trackUseNode(src, false, 0, propDesc) and
propertyRead(pred, propDesc, lbl, ref) and
// `module.exports` is special: it is a use of a def-node, not a use-node,
// so we want to exclude it here
(base instanceof TNonModuleDef or base instanceof TUse)
)
or
// invocations
exists(DataFlow::SourceNode src, DataFlow::SourceNode pred |
use(base, src) and pred = trackUseNode(src)
|
// `module.exports` is special: it is a use of a def-node, not a use-node,
// so we want to exclude it here
(base instanceof TNonModuleDef or base instanceof TUse) and
lbl = Label::memberFromRef(ref) and
ref = pred.getAPropertyRead()
or
lbl = Label::instance() and
ref = pred.getAnInstantiation()
or
lbl = Label::return() and
ref = pred.getAnInvocation()
or
lbl = Label::promised() and
PromiseFlow::loadStep(pred.getALocalUse(), ref, Promises::valueProp())
or
lbl = Label::promisedError() and
PromiseFlow::loadStep(pred.getALocalUse(), ref, Promises::errorProp())
)
or
exists(DataFlow::Node def, DataFlow::FunctionNode fn |
@@ -682,36 +707,58 @@ module API {
)
}
private import semmle.javascript.dataflow.TypeTracking
/**
* Gets a data-flow node to which `nd`, which is a use of an API-graph node, flows.
*
* The flow from `nd` to that node may be inter-procedural. If `promisified` is `true`, the
* flow goes through a promisification, and `boundArgs` indicates how many arguments have been
* bound throughout the flow. (To ensure termination, we somewhat arbitrarily constrain the
* number of bound arguments to be at most ten.)
* The flow from `nd` to that node may be inter-procedural, and is further described by three
* flags:
*
* - `promisified`: if true `true`, the flow goes through a promisification;
* - `boundArgs`: for function values, tracks how many arguments have been bound throughout
* the flow. To ensure termination, we somewhat arbitrarily constrain the number of bound
* arguments to be at most ten.
* - `prop`: if non-empty, the flow is only guaranteed to preserve the value of this property,
* and not necessarily the entire object.
*/
private DataFlow::SourceNode trackUseNode(
DataFlow::SourceNode nd, boolean promisified, int boundArgs, DataFlow::TypeTracker t
DataFlow::SourceNode nd, boolean promisified, int boundArgs, string prop,
DataFlow::TypeTracker t
) {
t.start() and
use(_, nd) and
result = nd and
promisified = false and
boundArgs = 0
boundArgs = 0 and
prop = ""
or
exists(Promisify::PromisifyCall promisify |
trackUseNode(nd, false, boundArgs, t.continue()).flowsTo(promisify.getArgument(0)) and
trackUseNode(nd, false, boundArgs, prop, t.continue()).flowsTo(promisify.getArgument(0)) and
promisified = true and
prop = "" and
result = promisify
)
or
exists(DataFlow::PartialInvokeNode pin, DataFlow::Node pred, int predBoundArgs |
trackUseNode(nd, promisified, predBoundArgs, t.continue()).flowsTo(pred) and
trackUseNode(nd, promisified, predBoundArgs, prop, t.continue()).flowsTo(pred) and
prop = "" and
result = pin.getBoundFunction(pred, boundArgs - predBoundArgs) and
boundArgs in [0 .. 10]
)
or
t = useStep(nd, promisified, boundArgs, result)
exists(DataFlow::Node pred, string preprop |
trackUseNode(nd, promisified, boundArgs, preprop, t.continue()).flowsTo(pred) and
promisified = false and
boundArgs = 0 and
SharedTypeTrackingStep::loadStoreStep(pred, result, prop)
|
prop = preprop
or
preprop = ""
)
or
t = useStep(nd, promisified, boundArgs, prop, result)
}
private import semmle.javascript.dataflow.internal.StepSummary
@@ -725,19 +772,19 @@ module API {
*/
pragma[noopt]
private DataFlow::TypeTracker useStep(
DataFlow::Node nd, boolean promisified, int boundArgs, DataFlow::Node res
DataFlow::Node nd, boolean promisified, int boundArgs, string prop, DataFlow::Node res
) {
exists(DataFlow::TypeTracker t, StepSummary summary, DataFlow::SourceNode prev |
prev = trackUseNode(nd, promisified, boundArgs, t) and
prev = trackUseNode(nd, promisified, boundArgs, prop, t) and
StepSummary::step(prev, res, summary) and
result = t.append(summary)
)
}
private DataFlow::SourceNode trackUseNode(
DataFlow::SourceNode nd, boolean promisified, int boundArgs
DataFlow::SourceNode nd, boolean promisified, int boundArgs, string prop
) {
result = trackUseNode(nd, promisified, boundArgs, DataFlow::TypeTracker::end())
result = trackUseNode(nd, promisified, boundArgs, prop, DataFlow::TypeTracker::end())
}
/**
@@ -745,7 +792,7 @@ module API {
*/
cached
DataFlow::SourceNode trackUseNode(DataFlow::SourceNode nd) {
result = trackUseNode(nd, false, 0)
result = trackUseNode(nd, false, 0, "")
}
private DataFlow::SourceNode trackDefNode(DataFlow::Node nd, DataFlow::TypeBackTracker t) {

View File

@@ -1561,6 +1561,14 @@ class URShiftExpr extends @urshift_expr, BinaryExpr {
*/
class AddExpr extends @add_expr, BinaryExpr {
override string getOperator() { result = "+" }
/**
* Gets the value of this string concatenation parsed as a regular expression, if possible.
*
* All string literals have an associated regular expression tree, provided they can
* be parsed without syntax errors.
*/
RegExpTerm asRegExp() { this = result.getParent() }
}
/**

View File

@@ -141,6 +141,19 @@ private DataFlow::Node getAValueExportedByPackage() {
result = unique( | | call.getCalleeNode().getAFunctionValue()).getAReturn()
)
or
// the exported value is a function that returns another import.
// ```JavaScript
// module.exports = function foo() {
// return require("./other-module.js");
// }
// ```
exists(DataFlow::FunctionNode func, Module mod |
func = getAValueExportedByPackage().getABoundFunctionValue(_)
|
mod = func.getAReturn().getALocalSource().getEnclosingExpr().(Import).getImportedModule() and
result = getAnExportFromModule(mod)
)
or
// *****
// Common styles of transforming exported objects.
// *****

View File

@@ -425,6 +425,14 @@ module PromiseFlow {
prop = errorProp() and
pred = call.getCallback(0).getAReturn()
)
or
// return from `async` function
exists(DataFlow::FunctionNode f | f.getFunction().isAsync() |
// ordinary return
prop = valueProp() and
pred = f.getAReturn() and
succ = f.getReturnNode()
)
}
}

View File

@@ -7,6 +7,7 @@
import javascript
private import semmle.javascript.dataflow.InferredTypes
private import semmle.javascript.internal.CachedStages
/**
* An element containing a regular expression term, that is, either
@@ -155,7 +156,7 @@ class RegExpTerm extends Locatable, @regexpterm {
exists(RegExpParent parent | parent = this.getRootTerm().getParent() |
parent instanceof RegExpLiteral
or
parent.(StringLiteral).flow() instanceof RegExpPatternSource
parent.(Expr).flow() instanceof RegExpPatternSource
)
}
@@ -955,7 +956,9 @@ private predicate isUsedAsNonMatchObject(DataFlow::MethodCallNode call) {
/**
* Holds if `source` may be interpreted as a regular expression.
*/
cached
predicate isInterpretedAsRegExp(DataFlow::Node source) {
Stages::Taint::ref() and
source.analyze().getAType() = TTString() and
(
// The first argument to an invocation of `RegExp` (with or without `new`).
@@ -1104,6 +1107,30 @@ private class StringRegExpPatternSource extends RegExpPatternSource {
override RegExpTerm getRegExpTerm() { result = this.asExpr().(StringLiteral).asRegExp() }
}
/**
* A node whose string value may flow to a position where it is interpreted
* as a part of a regular expression.
*/
private class StringConcatRegExpPatternSource extends RegExpPatternSource {
DataFlow::Node parse;
StringConcatRegExpPatternSource() { this = regExpSource(parse) }
override DataFlow::Node getAParse() { result = parse }
override DataFlow::SourceNode getARegExpObject() {
exists(DataFlow::InvokeNode constructor |
constructor = DataFlow::globalVarRef("RegExp").getAnInvocation() and
parse = constructor.getArgument(0) and
result = constructor
)
}
override string getPattern() { result = getStringValue() }
override RegExpTerm getRegExpTerm() { result = asExpr().(AddExpr).asRegExp() }
}
module RegExp {
/** Gets the string `"?"` used to represent a regular expression whose flags are unknown. */
string unknownFlag() { result = "?" }

View File

@@ -837,7 +837,7 @@ class NgDataFlowNode extends TNode {
private predicate fileIsImplicitlyAngularJS(HTML::HtmlFile file) {
// The file contains ng-* attributes.
exists(HTML::Attribute attrib |
attrib.getName().regexpMatch("ng-.*") and
attrib.getName().matches("ng-%") and
attrib.getFile() = file
) and
// But does not contain the ng-app root element, implying that file is

View File

@@ -408,12 +408,9 @@ module ClientRequest {
*/
class GotUrlRequest extends ClientRequest::Range {
GotUrlRequest() {
exists(string moduleName, DataFlow::SourceNode callee | this = callee.getACall() |
moduleName = "got" and
(
callee = DataFlow::moduleImport(moduleName) or
callee = DataFlow::moduleMember(moduleName, "stream")
)
exists(API::Node callee, API::Node got | this = callee.getACall() |
got = [API::moduleImport("got"), API::moduleImport("got").getMember("extend").getReturn()] and
callee = [got, got.getMember(["stream", "get", "post", "put", "patch", "head", "delete"])]
)
}
@@ -792,7 +789,7 @@ module ClientRequest {
cmd.getACommandArgument()
.(StringOps::ConcatenationRoot)
.getConstantStringParts()
.regexpMatch("curl .*")
.matches("curl %")
)
}

View File

@@ -556,7 +556,7 @@ module NodeJSLib {
}
override DataFlow::Node getADataNode() {
if methodName.regexpMatch(".*Sync")
if methodName.matches("%Sync")
then result = this
else
exists(int i, string paramName | fsDataParam(methodName, i, paramName) |
@@ -724,9 +724,9 @@ module NodeJSLib {
not result = this.getParameter(0).getARhs() and
// fork/spawn and all sync methos always has options as the last argument
if
methodName.regexpMatch("fork.*") or
methodName.regexpMatch("spawn.*") or
methodName.regexpMatch(".*Sync")
methodName.matches("fork%") or
methodName.matches("spawn%") or
methodName.matches("%Sync")
then result = this.getLastArgument()
else
// the rest (exec/execFile) has the options argument as their second last.

View File

@@ -260,6 +260,8 @@ module Stages {
exists(RemoteFlowSource r)
or
exists(Exports::getALibraryInputParameter())
or
any(RegExpTerm t).isUsedAsRegExp()
}
}
}

View File

@@ -0,0 +1,306 @@
/**
* Provides precicates for reasoning about bad tag filter vulnerabilities.
*/
import performance.ReDoSUtil
/**
* A module for determining if a regexp matches a given string,
* and reasoning about which capture groups are filled by a given string.
*/
private module RegexpMatching {
/**
* A class to test whether a regular expression matches a string.
* Override this class and extend `test`/`testWithGroups` to configure which strings should be tested for acceptance by this regular expression.
* The result can afterwards be read from the `matches` predicate.
*
* Strings in the `testWithGroups` predicate are also tested for which capture groups are filled by the given string.
* The result is available in the `fillCaptureGroup` predicate.
*/
abstract class MatchedRegExp extends RegExpTerm {
MatchedRegExp() { this.isRootTerm() }
/**
* Holds if it should be tested whether this regular expression matches `str`.
*
* If `ignorePrefix` is true, then a regexp without a start anchor will be treated as if it had a start anchor.
* E.g. a regular expression `/foo$/` will match any string that ends with "foo",
* but if `ignorePrefix` is true, it will only match "foo".
*/
predicate test(string str, boolean ignorePrefix) {
none() // maybe overriden in subclasses
}
/**
* Same as `test(..)`, but where the `fillsCaptureGroup` afterwards tells which capture groups were filled by the given string.
*/
predicate testWithGroups(string str, boolean ignorePrefix) {
none() // maybe overriden in subclasses
}
/**
* Holds if this RegExp matches `str`, where `str` is either in the `test` or `testWithGroups` predicate.
*/
final predicate matches(string str) {
exists(State state | state = getAState(this, str.length() - 1, str, _) |
epsilonSucc*(state) = Accept(_)
)
}
/**
* Holds if matching `str` may fill capture group number `g`.
* Only holds if `str` is in the `testWithGroups` predicate.
*/
final predicate fillsCaptureGroup(string str, int g) {
exists(State s |
s = getAStateThatReachesAccept(this, _, str, _) and
g = group(s.getRepr())
)
}
}
/**
* Gets a state the regular expression `reg` can be in after matching the `i`th char in `str`.
* The regular expression is modelled as a non-determistic finite automaton,
* the regular expression can therefore be in multiple states after matching a character.
*
* It's a forward search to all possible states, and there is thus no guarantee that the state is on a path to an accepting state.
*/
private State getAState(MatchedRegExp reg, int i, string str, boolean ignorePrefix) {
// start state, the -1 position before any chars have been matched
i = -1 and
(
reg.test(str, ignorePrefix)
or
reg.testWithGroups(str, ignorePrefix)
) and
result.getRepr().getRootTerm() = reg and
isStartState(result)
or
// recursive case
result = getAStateAfterMatching(reg, _, str, i, _, ignorePrefix)
}
/**
* Gets the next state after the `prev` state from `reg`.
* `prev` is the state after matching `fromIndex` chars in `str`,
* and the result is the state after matching `toIndex` chars in `str`.
*
* This predicate is used as a step relation in the forwards search (`getAState`),
* and also as a step relation in the later backwards search (`getAStateThatReachesAccept`).
*/
private State getAStateAfterMatching(
MatchedRegExp reg, State prev, string str, int toIndex, int fromIndex, boolean ignorePrefix
) {
// the basic recursive case - outlined into a noopt helper to make performance work out.
result = getAStateAfterMatchingAux(reg, prev, str, toIndex, fromIndex, ignorePrefix)
or
// we can skip past word boundaries if the next char is a non-word char.
fromIndex = toIndex and
prev.getRepr() instanceof RegExpWordBoundary and
prev = getAState(reg, toIndex, str, ignorePrefix) and
after(prev.getRepr()) = result and
str.charAt(toIndex + 1).regexpMatch("\\W") // \W matches any non-word char.
}
pragma[noopt]
private State getAStateAfterMatchingAux(
MatchedRegExp reg, State prev, string str, int toIndex, int fromIndex, boolean ignorePrefix
) {
prev = getAState(reg, fromIndex, str, ignorePrefix) and
fromIndex = toIndex - 1 and
exists(string char | char = str.charAt(toIndex) | specializedDeltaClosed(prev, char, result)) and
not discardedPrefixStep(prev, result, ignorePrefix)
}
/** Holds if a step from `prev` to `next` should be discarded when the `ignorePrefix` flag is set. */
private predicate discardedPrefixStep(State prev, State next, boolean ignorePrefix) {
prev = mkMatch(any(RegExpRoot r)) and
ignorePrefix = true and
next = prev
}
// The `deltaClosed` relation specialized to the chars that exists in strings tested by a `MatchedRegExp`.
private predicate specializedDeltaClosed(State prev, string char, State next) {
deltaClosed(prev, specializedGetAnInputSymbolMatching(char), next)
}
// The `getAnInputSymbolMatching` relation specialized to the chars that exists in strings tested by a `MatchedRegExp`.
pragma[noinline]
private InputSymbol specializedGetAnInputSymbolMatching(string char) {
exists(string s, MatchedRegExp r |
r.test(s, _)
or
r.testWithGroups(s, _)
|
char = s.charAt(_)
) and
result = getAnInputSymbolMatching(char)
}
/**
* Gets the `i`th state on a path to the accepting state when `reg` matches `str`.
* Starts with an accepting state as found by `getAState` and searches backwards
* to the start state through the reachable states (as found by `getAState`).
*
* This predicate holds the invariant that the result state can be reached with `i` steps from a start state,
* and an accepting state can be found after (`str.length() - 1 - i`) steps from the result.
* The result state is therefore always on a valid path where `reg` accepts `str`.
*
* This predicate is only used to find which capture groups a regular expression has filled,
* and thus the search is only performed for the strings in the `testWithGroups(..)` predicate.
*/
private State getAStateThatReachesAccept(
MatchedRegExp reg, int i, string str, boolean ignorePrefix
) {
// base case, reaches an accepting state from the last state in `getAState(..)`
reg.testWithGroups(str, ignorePrefix) and
i = str.length() - 1 and
result = getAState(reg, i, str, ignorePrefix) and
epsilonSucc*(result) = Accept(_)
or
// recursive case. `next` is the next state to be matched after matching `prev`.
// this predicate is doing a backwards search, so `prev` is the result we are looking for.
exists(State next, State prev, int fromIndex, int toIndex |
next = getAStateThatReachesAccept(reg, toIndex, str, ignorePrefix) and
next = getAStateAfterMatching(reg, prev, str, toIndex, fromIndex, ignorePrefix) and
i = fromIndex and
result = prev
)
}
/** Gets the capture group number that `term` belongs to. */
private int group(RegExpTerm term) {
exists(RegExpGroup grp | grp.getNumber() = result | term.getParent*() = grp)
}
}
/** A class to test whether a regular expression matches certain HTML tags. */
class HTMLMatchingRegExp extends RegexpMatching::MatchedRegExp {
HTMLMatchingRegExp() {
// the regexp must mention "<" and ">" explicitly.
forall(string angleBracket | angleBracket = ["<", ">"] |
any(RegExpConstant term | term.getValue().matches("%" + angleBracket + "%")).getRootTerm() =
this
)
}
override predicate testWithGroups(string str, boolean ignorePrefix) {
ignorePrefix = true and
str = ["<!-- foo -->", "<!-- foo --!>", "<!- foo ->", "<foo>", "<script>"]
}
override predicate test(string str, boolean ignorePrefix) {
ignorePrefix = true and
str =
[
"<!-- foo -->", "<!- foo ->", "<!-- foo --!>", "<!-- foo\n -->", "<script>foo</script>",
"<script \n>foo</script>", "<script >foo\n</script>", "<foo ></foo>", "<foo>",
"<foo src=\"foo\"></foo>", "<script>", "<script src=\"foo\"></script>",
"<script src='foo'></script>", "<SCRIPT>foo</SCRIPT>", "<script\tsrc=\"foo\"/>",
"<script\tsrc='foo'></script>", "<sCrIpT>foo</ScRiPt>", "<script src=\"foo\">foo</script >",
"<script src=\"foo\">foo</script foo=\"bar\">", "<script src=\"foo\">foo</script\t\n bar>"
]
}
}
/**
* Holds if `regexp` matches some HTML tags, but misses some HTML tags that it should match.
*
* When adding a new case to this predicate, make sure the test string used in `matches(..)` calls are present in `HTMLMatchingRegExp::test` / `HTMLMatchingRegExp::testWithGroups`.
*/
predicate isBadRegexpFilter(HTMLMatchingRegExp regexp, string msg) {
// CVE-2021-33829 - matching both "<!-- foo -->" and "<!-- foo --!>", but in different capture groups
regexp.matches("<!-- foo -->") and
regexp.matches("<!-- foo --!>") and
exists(int a, int b | a != b |
regexp.fillsCaptureGroup("<!-- foo -->", a) and
// <!-- foo --> might be ambigously parsed (matching both capture groups), and that is ok here.
regexp.fillsCaptureGroup("<!-- foo --!>", b) and
not regexp.fillsCaptureGroup("<!-- foo --!>", a) and
msg =
"Comments ending with --> are matched differently from comments ending with --!>. The first is matched with capture group "
+ a + " and comments ending with --!> are matched with capture group " +
strictconcat(int i | regexp.fillsCaptureGroup("<!-- foo --!>", i) | i.toString(), ", ") +
"."
)
or
// CVE-2020-17480 - matching "<!-- foo -->" and other tags, but not "<!-- foo --!>".
exists(int group, int other |
group != other and
regexp.fillsCaptureGroup("<!-- foo -->", group) and
regexp.fillsCaptureGroup("<foo>", other) and
not regexp.matches("<!-- foo --!>") and
not regexp.fillsCaptureGroup("<!-- foo -->", any(int i | i != group)) and
not regexp.fillsCaptureGroup("<!- foo ->", group) and
not regexp.fillsCaptureGroup("<foo>", group) and
not regexp.fillsCaptureGroup("<script>", group) and
msg =
"This regular expression only parses --> (capture group " + group +
") and not --!> as a HTML comment end tag."
)
or
regexp.matches("<!-- foo -->") and
not regexp.matches("<!-- foo\n -->") and
not regexp.matches("<!- foo ->") and
not regexp.matches("<foo>") and
not regexp.matches("<script>") and
msg = "This regular expression does not match comments containing newlines."
or
regexp.matches("<script>foo</script>") and
regexp.matches("<script src=\"foo\"></script>") and
not regexp.matches("<foo ></foo>") and
(
not regexp.matches("<script \n>foo</script>") and
msg = "This regular expression matches <script></script>, but not <script \\n></script>"
or
not regexp.matches("<script >foo\n</script>") and
msg = "This regular expression matches <script>...</script>, but not <script >...\\n</script>"
)
or
regexp.matches("<script>foo</script>") and
regexp.matches("<script src=\"foo\"></script>") and
not regexp.matches("<script src='foo'></script>") and
not regexp.matches("<foo>") and
msg = "This regular expression does not match script tags where the attribute uses single-quotes."
or
regexp.matches("<script>foo</script>") and
regexp.matches("<script src='foo'></script>") and
not regexp.matches("<script src=\"foo\"></script>") and
not regexp.matches("<foo>") and
msg = "This regular expression does not match script tags where the attribute uses double-quotes."
or
regexp.matches("<script>foo</script>") and
regexp.matches("<script src='foo'></script>") and
not regexp.matches("<script\tsrc='foo'></script>") and
not regexp.matches("<foo>") and
not regexp.matches("<foo src=\"foo\"></foo>") and
msg = "This regular expression does not match script tags where tabs are used between attributes."
or
regexp.matches("<script>foo</script>") and
not RegExpFlags::isIgnoreCase(regexp) and
not regexp.matches("<foo>") and
not regexp.matches("<foo ></foo>") and
(
not regexp.matches("<SCRIPT>foo</SCRIPT>") and
msg = "This regular expression does not match upper case <SCRIPT> tags."
or
not regexp.matches("<sCrIpT>foo</ScRiPt>") and
regexp.matches("<SCRIPT>foo</SCRIPT>") and
msg = "This regular expression does not match mixed case <sCrIpT> tags."
)
or
regexp.matches("<script src=\"foo\"></script>") and
not regexp.matches("<foo>") and
not regexp.matches("<foo ></foo>") and
(
not regexp.matches("<script src=\"foo\">foo</script >") and
msg = "This regular expression does not match script end tags like </script >."
or
not regexp.matches("<script src=\"foo\">foo</script foo=\"bar\">") and
msg = "This regular expression does not match script end tags like </script foo=\"bar\">."
or
not regexp.matches("<script src=\"foo\">foo</script\t\n bar>") and
msg = "This regular expression does not match script end tags like </script\\t\\n bar>."
)
}

View File

@@ -22,8 +22,8 @@ module XssThroughDom {
*/
bindingset[result]
string unsafeAttributeName() {
result.regexpMatch("data-.*") or
result.regexpMatch("aria-.*") or
result.matches("data-%") or
result.matches("aria-%") or
result = ["name", "value", "title", "alt"]
}

View File

@@ -218,7 +218,7 @@ private newtype TInputSymbol =
recc instanceof RegExpCharacterClass and
not recc.(RegExpCharacterClass).isUniversalClass()
or
recc instanceof RegExpCharacterClassEscape
isEscapeClass(recc, _)
)
} or
/** An input symbol representing all characters matched by `.`. */
@@ -340,13 +340,13 @@ private module CharacterClasses {
char <= hi
)
or
exists(RegExpCharacterClassEscape escape | escape = child |
escape.getValue() = escape.getValue().toLowerCase() and
classEscapeMatches(escape.getValue(), char)
exists(string charClass | isEscapeClass(child, charClass) |
charClass.toLowerCase() = charClass and
classEscapeMatches(charClass, char)
or
char = getARelevantChar() and
escape.getValue() = escape.getValue().toUpperCase() and
not classEscapeMatches(escape.getValue().toLowerCase(), char)
charClass.toUpperCase() = charClass and
not classEscapeMatches(charClass, char)
)
)
}
@@ -409,10 +409,10 @@ private module CharacterClasses {
or
child.(RegExpCharacterRange).isRange(_, result)
or
exists(RegExpCharacterClassEscape escape | child = escape |
result = min(string s | classEscapeMatches(escape.getValue().toLowerCase(), s))
exists(string charClass | isEscapeClass(child, charClass) |
result = min(string s | classEscapeMatches(charClass.toLowerCase(), s))
or
result = max(string s | classEscapeMatches(escape.getValue().toLowerCase(), s))
result = max(string s | classEscapeMatches(charClass.toLowerCase(), s))
)
)
}
@@ -466,33 +466,36 @@ private module CharacterClasses {
* An implementation of `CharacterClass` for \d, \s, and \w.
*/
private class PositiveCharacterClassEscape extends CharacterClass {
RegExpCharacterClassEscape cc;
RegExpTerm cc;
string charClass;
PositiveCharacterClassEscape() {
this = getCanonicalCharClass(cc) and cc.getValue() = ["d", "s", "w"]
isEscapeClass(cc, charClass) and
this = getCanonicalCharClass(cc) and
charClass = ["d", "s", "w"]
}
override string getARelevantChar() {
cc.getValue() = "d" and
charClass = "d" and
result = ["0", "9"]
or
cc.getValue() = "s" and
charClass = "s" and
result = " "
or
cc.getValue() = "w" and
charClass = "w" and
result = ["a", "Z", "_", "0", "9"]
}
override predicate matches(string char) { classEscapeMatches(cc.getValue(), char) }
override predicate matches(string char) { classEscapeMatches(charClass, char) }
override string choose() {
cc.getValue() = "d" and
charClass = "d" and
result = "9"
or
cc.getValue() = "s" and
charClass = "s" and
result = " "
or
cc.getValue() = "w" and
charClass = "w" and
result = "a"
}
}
@@ -501,26 +504,29 @@ private module CharacterClasses {
* An implementation of `CharacterClass` for \D, \S, and \W.
*/
private class NegativeCharacterClassEscape extends CharacterClass {
RegExpCharacterClassEscape cc;
RegExpTerm cc;
string charClass;
NegativeCharacterClassEscape() {
this = getCanonicalCharClass(cc) and cc.getValue() = ["D", "S", "W"]
isEscapeClass(cc, charClass) and
this = getCanonicalCharClass(cc) and
charClass = ["D", "S", "W"]
}
override string getARelevantChar() {
cc.getValue() = "D" and
charClass = "D" and
result = ["a", "Z", "!"]
or
cc.getValue() = "S" and
charClass = "S" and
result = ["a", "9", "!"]
or
cc.getValue() = "W" and
charClass = "W" and
result = [" ", "!"]
}
bindingset[char]
override predicate matches(string char) {
not classEscapeMatches(cc.getValue().toLowerCase(), char)
not classEscapeMatches(charClass.toLowerCase(), char)
}
}
}
@@ -542,7 +548,7 @@ private State before(RegExpTerm t) { result = Match(t, 0) }
/**
* Gets a state the NFA may be in after matching `t`.
*/
private State after(RegExpTerm t) {
State after(RegExpTerm t) {
exists(RegExpAlt alt | t = alt.getAChild() | result = after(alt))
or
exists(RegExpSequence seq, int i | t = seq.getChild(i) |
@@ -599,7 +605,7 @@ predicate delta(State q1, EdgeLabel lbl, State q2) {
q2 = after(cc)
)
or
exists(RegExpCharacterClassEscape cc |
exists(RegExpTerm cc | isEscapeClass(cc, _) |
q1 = before(cc) and
lbl = CharClass(cc.getRawValue() + "|" + getCanonicalizationFlags(cc.getRootTerm())) and
q2 = after(cc)
@@ -671,7 +677,7 @@ RegExpRoot getRoot(RegExpTerm term) {
/**
* A state in the NFA.
*/
private newtype TState =
newtype TState =
/**
* A state representing that the NFA is about to match a term.
* `i` is used to index into multi-char literals.
@@ -801,29 +807,26 @@ InputSymbol getAnInputSymbolMatching(string char) {
result = Any()
}
/**
* Holds if `state` is a start state.
*/
predicate isStartState(State state) {
state = mkMatch(any(RegExpRoot r))
or
exists(RegExpCaret car | state = after(car))
}
/**
* Predicates for constructing a prefix string that leads to a given state.
*/
private module PrefixConstruction {
/**
* Holds if `state` starts the string matched by the regular expression.
*/
private predicate isStartState(State state) {
state instanceof StateInPumpableRegexp and
(
state = Match(any(RegExpRoot r), _)
or
exists(RegExpCaret car | state = after(car))
)
}
/**
* Holds if `state` is the textually last start state for the regular expression.
*/
private predicate lastStartState(State state) {
exists(RegExpRoot root |
state =
max(State s, Location l |
max(StateInPumpableRegexp s, Location l |
isStartState(s) and getRoot(s.getRepr()) = root and l = s.getRepr().getLocation()
|
s

View File

@@ -6,6 +6,14 @@
import javascript
/**
* Holds if `term` is an ecape class representing e.g. `\d`.
* `clazz` is which character class it represents, e.g. "d" for `\d`.
*/
predicate isEscapeClass(RegExpTerm term, string clazz) {
exists(RegExpCharacterClassEscape escape | term = escape | escape.getValue() = clazz)
}
/**
* Holds if the regular expression should not be considered.
*
@@ -20,12 +28,7 @@ module RegExpFlags {
/**
* Holds if `root` has the `i` flag for case-insensitive matching.
*/
predicate isIgnoreCase(RegExpTerm root) {
root.isRootTerm() and
exists(DataFlow::RegExpCreationNode node | node.getRoot() = root |
RegExp::isIgnoreCase(node.getFlags())
)
}
predicate isIgnoreCase(RegExpTerm root) { RegExp::isIgnoreCase(getFlags(root)) }
/**
* Gets the flags for `root`, or the empty string if `root` has no flags.
@@ -38,15 +41,14 @@ module RegExpFlags {
not exists(node.getFlags()) and
result = ""
)
or
exists(RegExpPatternSource source | source.getRegExpTerm() = root |
result = source.getARegExpObject().(DataFlow::RegExpCreationNode).getFlags()
)
}
/**
* Holds if `root` has the `s` flag for multi-line matching.
*/
predicate isDotAll(RegExpTerm root) {
root.isRootTerm() and
exists(DataFlow::RegExpCreationNode node | node.getRoot() = root |
RegExp::isDotAll(node.getFlags())
)
}
predicate isDotAll(RegExpTerm root) { RegExp::isDotAll(getFlags(root)) }
}

View File

@@ -855,7 +855,7 @@ regexpterm (unique int id: @regexpterm,
int idx: int ref,
varchar(900) tostring: string ref);
@regexpparent = @regexpterm | @regexp_literal | @string_literal;
@regexpparent = @regexpterm | @regexp_literal | @string_literal | @add_expr;
case @regexpterm.kind of
0 = @regexp_alt

View File

@@ -18,5 +18,5 @@ where
// but exclude attribute top-levels: `<a href="javascript:'some-attribute-string'">`
not d.getParent() instanceof CodeInAttribute and
// exclude babel generated directives like "@babel/helpers - typeof".
not d.getDirectiveText().prefix(14) = "@babel/helpers"
not d.getDirectiveText().matches("@babel/helpers%")
select d, "Unknown directive: '" + truncate(d.getDirectiveText(), 20, " ... (truncated)") + "'."

View File

@@ -0,0 +1,54 @@
<!DOCTYPE qhelp PUBLIC
"-//Semmle//qhelp//EN"
"qhelp.dtd">
<qhelp>
<overview>
<p>
It is possible to match some single HTML tags using regular expressions (parsing general HTML using
regular expressions is impossible). However, if the regular expression is not written well it might
be possible to circumvent it, which can lead to cross-site scripting or other security issues.
</p>
<p>
Some of these mistakes are caused by browsers having very forgiving HTML parsers, and
will often render invalid HTML containing syntax errors.
Regular expressions that attempt to match HTML should also recognize tags containing such syntax errors.
</p>
</overview>
<recommendation>
<p>
Use a well-tested sanitization or parser library if at all possible. These libraries are much more
likely to handle corner cases correctly than a custom implementation.
</p>
</recommendation>
<example>
<p>
The following example attempts to filters out all <code>&lt;script&gt;</code> tags.
</p>
<sample src="examples/BadTagFilter.js" />
<p>
The above sanitizer does not filter out all <code>&lt;script&gt;</code> tags.
Browsers will not only accept <code>&lt;/script&gt;</code> as script end tags, but also tags such as <code>&lt;/script foo="bar"&gt;</code> even though it is a parser error.
This means that an attack string such as <code>&lt;script&gt;alert(1)&lt;/script foo="bar"&gt;</code> will not be filtered by
the function, and <code>alert(1)</code> will be executed by a browser if the string is rendered as HTML.
</p>
<p>
Other corner cases include that HTML comments can end with <code>--!&gt;</code>,
and that HTML tag names can contain upper case characters.
</p>
</example>
<references>
<li>Securitum: <a href="https://research.securitum.com/the-curious-case-of-copy-paste/">The Curious Case of Copy &amp; Paste</a>.</li>
<li>stackoverflow.com: <a href="https://stackoverflow.com/questions/1732348/regex-match-open-tags-except-xhtml-self-contained-tags#answer-1732454">You can't parse [X]HTML with regex</a>.</li>
<li>HTML Standard: <a href="https://html.spec.whatwg.org/multipage/parsing.html#comment-end-bang-state">Comment end bang state</a>.</li>
<li>stackoverflow.com: <a href="https://stackoverflow.com/questions/25559999/why-arent-browsers-strict-about-html">Why aren't browsers strict about HTML?</a>.</li>
</references>
</qhelp>

View File

@@ -0,0 +1,19 @@
/**
* @name Bad HTML filtering regexp
* @description Matching HTML tags using regular expressions is hard to do right, and can easily lead to security issues.
* @kind problem
* @problem.severity warning
* @security-severity 7.8
* @precision high
* @id js/bad-tag-filter
* @tags correctness
* security
* external/cwe/cwe-116
* external/cwe/cwe-020
*/
import semmle.javascript.security.BadTagFilterQuery
from HTMLMatchingRegExp regexp, string msg
where msg = min(string m | isBadRegexpFilter(regexp, m) | m order by m.length(), m) // there might be multiple, we arbitrarily pick the shortest one
select regexp, msg

View File

@@ -0,0 +1,8 @@
function filterScript(html) {
var scriptRegex = /<script\b[^>]*>([\s\S]*?)<\/script>/gi;
var match;
while ((match = scriptRegex.exec(html)) !== null) {
html = html.replace(match[0], match[1]);
}
return html;
}

View File

@@ -0,0 +1,9 @@
import { readFile } from 'fs/promises';
async function readFileUtf8(path: string): Promise<string> {
return readFile(path, { encoding: 'utf8' });
}
async function test(path: string) {
await readFileUtf8(path); /* use (promised (return (member readFile (member exports (module fs/promises))))) */
}

View File

@@ -4,9 +4,9 @@ class AssertionComment extends LineComment {
boolean isOK;
AssertionComment() {
isOK = true and this.getText().trim().regexpMatch("OK.*")
isOK = true and this.getText().trim().matches("OK%")
or
isOK = false and this.getText().trim().regexpMatch("NOT OK.*")
isOK = false and this.getText().trim().matches("NOT OK%")
}
ConditionGuardNode getAGuardNode() {

View File

@@ -2,7 +2,7 @@ import javascript
// Select all expressions whose string value contains the word "two"
predicate containsTwo(DataFlow::Node node) {
node.getStringValue().regexpMatch(".*two.*")
node.getStringValue().matches("%two%")
or
containsTwo(node.getAPredecessor())
or

View File

@@ -2,7 +2,7 @@ import javascript
// Select all expressions whose string value contains the word "two"
predicate containsTwo(DataFlow::Node node) {
node.getStringValue().regexpMatch(".*two.*")
node.getStringValue().matches("%two%")
or
containsTwo(node.getAPredecessor())
or

View File

@@ -28,6 +28,7 @@
| jsonschema.js:15:23:15:29 | (a?a?)* | Strings with many repetitions of 'a' can start matching anywhere after the start of the preceeding (a?a?)*b |
| jsonschema.js:20:18:20:24 | (a?a?)* | Strings with many repetitions of 'a' can start matching anywhere after the start of the preceeding (a?a?)*b |
| lib/closure.js:4:6:4:7 | u* | Strings with many repetitions of 'u' can start matching anywhere after the start of the preceeding u*o |
| lib/indirect.js:2:6:2:7 | k* | Strings with many repetitions of 'k' can start matching anywhere after the start of the preceeding k*h |
| lib/lib.js:1:15:1:16 | a* | Strings with many repetitions of 'a' can start matching anywhere after the start of the preceeding a*b |
| lib/lib.js:8:3:8:4 | f* | Strings with many repetitions of 'f' can start matching anywhere after the start of the preceeding f*g |
| lib/moduleLib/moduleLib.js:2:3:2:4 | a* | Strings with many repetitions of 'a' can start matching anywhere after the start of the preceeding a*b |
@@ -512,3 +513,8 @@
| tst.js:384:15:384:26 | ([AB]\|[ab])* | Strings with many repetitions of 'A' can start matching anywhere after the start of the preceeding ([AB]\|[ab])*C |
| tst.js:385:14:385:25 | ([DE]\|[de])* | Strings with many repetitions of 'd' can start matching anywhere after the start of the preceeding ([DE]\|[de])*F |
| tst.js:388:14:388:20 | (a\|aa)* | Strings with many repetitions of 'a' can start matching anywhere after the start of the preceeding (a\|aa)*$ |
| tst.js:391:6:394:5 | (a\|aa)* | Strings with many repetitions of 'a' can start matching anywhere after the start of the preceeding (a\|aa)*b$ |
| tst.js:398:6:398:12 | (c\|cc)* | Strings with many repetitions of 'c' can start matching anywhere after the start of the preceeding ((c\|cc)*\|(d\|dd)*\|(e\|ee)*)f$ |
| tst.js:399:6:399:12 | (d\|dd)* | Strings with many repetitions of 'd' can start matching anywhere after the start of the preceeding ((c\|cc)*\|(d\|dd)*\|(e\|ee)*)f$ |
| tst.js:400:6:401:1 | (e\|ee)* | Strings with many repetitions of 'e' can start matching anywhere after the start of the preceeding ((c\|cc)*\|(d\|dd)*\|(e\|ee)*)f$ |
| tst.js:404:6:405:7 | (g\|gg)* | Strings with many repetitions of 'g' can start matching anywhere after the start of the preceeding (g\|gg)*h$ |

View File

@@ -3,6 +3,10 @@ nodes
| lib/closure.js:3:21:3:21 | x |
| lib/closure.js:4:16:4:16 | x |
| lib/closure.js:4:16:4:16 | x |
| lib/indirect.js:1:32:1:32 | x |
| lib/indirect.js:1:32:1:32 | x |
| lib/indirect.js:2:16:2:16 | x |
| lib/indirect.js:2:16:2:16 | x |
| lib/lib.js:3:28:3:31 | name |
| lib/lib.js:3:28:3:31 | name |
| lib/lib.js:4:14:4:17 | name |
@@ -170,6 +174,10 @@ edges
| lib/closure.js:3:21:3:21 | x | lib/closure.js:4:16:4:16 | x |
| lib/closure.js:3:21:3:21 | x | lib/closure.js:4:16:4:16 | x |
| lib/closure.js:3:21:3:21 | x | lib/closure.js:4:16:4:16 | x |
| lib/indirect.js:1:32:1:32 | x | lib/indirect.js:2:16:2:16 | x |
| lib/indirect.js:1:32:1:32 | x | lib/indirect.js:2:16:2:16 | x |
| lib/indirect.js:1:32:1:32 | x | lib/indirect.js:2:16:2:16 | x |
| lib/indirect.js:1:32:1:32 | x | lib/indirect.js:2:16:2:16 | x |
| lib/lib.js:3:28:3:31 | name | lib/lib.js:4:14:4:17 | name |
| lib/lib.js:3:28:3:31 | name | lib/lib.js:4:14:4:17 | name |
| lib/lib.js:3:28:3:31 | name | lib/lib.js:4:14:4:17 | name |
@@ -329,6 +337,7 @@ edges
| polynomial-redos.js:123:13:123:20 | replaced | polynomial-redos.js:123:3:123:20 | result |
#select
| lib/closure.js:4:5:4:17 | /u*o/.test(x) | lib/closure.js:3:21:3:21 | x | lib/closure.js:4:16:4:16 | x | This $@ that depends on $@ may run slow on strings with many repetitions of 'u'. | lib/closure.js:4:6:4:7 | u* | regular expression | lib/closure.js:3:21:3:21 | x | library input |
| lib/indirect.js:2:5:2:17 | /k*h/.test(x) | lib/indirect.js:1:32:1:32 | x | lib/indirect.js:2:16:2:16 | x | This $@ that depends on $@ may run slow on strings with many repetitions of 'k'. | lib/indirect.js:2:6:2:7 | k* | regular expression | lib/indirect.js:1:32:1:32 | x | library input |
| lib/lib.js:4:2:4:18 | regexp.test(name) | lib/lib.js:3:28:3:31 | name | lib/lib.js:4:14:4:17 | name | This $@ that depends on $@ may run slow on strings with many repetitions of 'a'. | lib/lib.js:1:15:1:16 | a* | regular expression | lib/lib.js:3:28:3:31 | name | library input |
| lib/lib.js:8:2:8:17 | /f*g/.test(name) | lib/lib.js:7:19:7:22 | name | lib/lib.js:8:13:8:16 | name | This $@ that depends on $@ may run slow on strings with many repetitions of 'f'. | lib/lib.js:8:3:8:4 | f* | regular expression | lib/lib.js:7:19:7:22 | name | library input |
| lib/moduleLib/moduleLib.js:2:2:2:17 | /a*b/.test(name) | lib/moduleLib/moduleLib.js:1:28:1:31 | name | lib/moduleLib/moduleLib.js:2:13:2:16 | name | This $@ that depends on $@ may run slow on strings with many repetitions of 'a'. | lib/moduleLib/moduleLib.js:2:3:2:4 | a* | regular expression | lib/moduleLib/moduleLib.js:1:28:1:31 | name | library input |

View File

@@ -183,3 +183,8 @@
| tst.js:385:14:385:25 | ([DE]\|[de])* | This part of the regular expression may cause exponential backtracking on strings containing many repetitions of 'd'. |
| tst.js:387:27:387:33 | (a\|aa)* | This part of the regular expression may cause exponential backtracking on strings containing many repetitions of 'aa'. |
| tst.js:388:14:388:20 | (a\|aa)* | This part of the regular expression may cause exponential backtracking on strings containing many repetitions of 'aa'. |
| tst.js:391:6:394:5 | (a\|aa)* | This part of the regular expression may cause exponential backtracking on strings containing many repetitions of 'aa'. |
| tst.js:398:6:398:12 | (c\|cc)* | This part of the regular expression may cause exponential backtracking on strings containing many repetitions of 'cc'. |
| tst.js:399:6:399:12 | (d\|dd)* | This part of the regular expression may cause exponential backtracking on strings containing many repetitions of 'dd'. |
| tst.js:400:6:401:1 | (e\|ee)* | This part of the regular expression may cause exponential backtracking on strings containing many repetitions of 'ee'. |
| tst.js:404:6:405:7 | (g\|gg)* | This part of the regular expression may cause exponential backtracking on strings containing many repetitions of 'gg'. |

View File

@@ -0,0 +1,3 @@
module.exports.foo = function (x) {
/k*h/.test(x); // NOT OK
}

View File

@@ -12,4 +12,8 @@ if (typeof define !== 'undefined' && define.amd) { // AMD
define([], function () {return bar});
}
module.exports.closure = require("./closure")
module.exports.closure = require("./closure")
module.exports.func = function (conf) {
return require("./indirect")
}

View File

@@ -385,4 +385,21 @@ var good47 = /([AB]|[ab])*C/;
var bad92 = /([DE]|[de])*F/i;
var bad93 = /(?<=^v?|\sv?)(a|aa)*$/;
var bad94 = /(a|aa)*$/;
var bad94 = /(a|aa)*$/;
var bad95 = new RegExp(
"(a" +
"|" +
"aa)*" +
"b$"
);
var bad96 = new RegExp("(" +
"(c|cc)*|" +
"(d|dd)*|" +
"(e|ee)*" +
")f$");
var bad97 = new RegExp(
"(g|gg" +
")*h$");

View File

@@ -15,11 +15,12 @@
| tst-IncompleteHostnameRegExp.js:38:3:38:43 | ^(http\|https):\\/\\/www.example.com\\/p\\/f\\/ | This regular expression has an unescaped '.' before 'example.com', so it might match more hosts than expected. | tst-IncompleteHostnameRegExp.js:38:2:38:44 | /^(http ... p\\/f\\// | here |
| tst-IncompleteHostnameRegExp.js:39:5:39:30 | http:\\/\\/sub.example.com\\/ | This regular expression has an unescaped '.' before 'example.com', so it might match more hosts than expected. | tst-IncompleteHostnameRegExp.js:39:2:39:33 | /^(http ... om\\/)/g | here |
| tst-IncompleteHostnameRegExp.js:40:3:40:29 | ^https?:\\/\\/api.example.com | This regular expression has an unescaped '.' before 'example.com', so it might match more hosts than expected. | tst-IncompleteHostnameRegExp.js:40:2:40:30 | /^https ... le.com/ | here |
| tst-IncompleteHostnameRegExp.js:41:42:41:70 | ^https?://.+\\.example\\.com/ | This string, which is used as a regular expression $@, has an unrestricted wildcard '.+' which may cause 'example\\.com/' to be matched anywhere in the URL, outside the hostname. | tst-IncompleteHostnameRegExp.js:41:13:41:71 | '^http: ... \\.com/' | here |
| tst-IncompleteHostnameRegExp.js:41:42:41:48 | ^https?://.+\\.example\\.com/ | This regular expression has an unrestricted wildcard '.+' which may cause 'example\\.com/' to be matched anywhere in the URL, outside the hostname. | tst-IncompleteHostnameRegExp.js:41:13:41:71 | '^http: ... \\.com/' | here |
| tst-IncompleteHostnameRegExp.js:43:3:43:32 | ^https:\\/\\/[a-z]*.example.com$ | This regular expression has an unescaped '.' before 'example.com', so it might match more hosts than expected. | tst-IncompleteHostnameRegExp.js:43:2:43:33 | /^https ... e.com$/ | here |
| tst-IncompleteHostnameRegExp.js:44:32:44:45 | .+.example.net | This regular expression has an unescaped '.' before 'example.net', so it might match more hosts than expected. | tst-IncompleteHostnameRegExp.js:44:9:44:101 | '^proto ... ernal)' | here |
| tst-IncompleteHostnameRegExp.js:44:47:44:62 | .+.example-a.com | This regular expression has an unescaped '.' before 'example-a.com', so it might match more hosts than expected. | tst-IncompleteHostnameRegExp.js:44:9:44:101 | '^proto ... ernal)' | here |
| tst-IncompleteHostnameRegExp.js:44:64:44:79 | .+.example-b.com | This regular expression has an unescaped '.' before 'example-b.com', so it might match more hosts than expected. | tst-IncompleteHostnameRegExp.js:44:9:44:101 | '^proto ... ernal)' | here |
| tst-IncompleteHostnameRegExp.js:48:42:48:68 | ^https?://.+.example\\.com/ | This string, which is used as a regular expression $@, has an unescaped '.' before 'example\\.com/', so it might match more hosts than expected. | tst-IncompleteHostnameRegExp.js:48:13:48:69 | '^http: ... \\.com/' | here |
| tst-IncompleteHostnameRegExp.js:48:42:48:68 | ^https?://.+.example\\.com/ | This string, which is used as a regular expression $@, has an unrestricted wildcard '.+' which may cause 'example\\.com/' to be matched anywhere in the URL, outside the hostname. | tst-IncompleteHostnameRegExp.js:48:13:48:69 | '^http: ... \\.com/' | here |
| tst-IncompleteHostnameRegExp.js:48:42:48:47 | ^https?://.+.example\\.com/ | This regular expression has an unescaped '.' before 'example\\.com/', so it might match more hosts than expected. | tst-IncompleteHostnameRegExp.js:48:13:48:69 | '^http: ... \\.com/' | here |
| tst-IncompleteHostnameRegExp.js:48:42:48:47 | ^https?://.+.example\\.com/ | This regular expression has an unrestricted wildcard '.+' which may cause 'example\\.com/' to be matched anywhere in the URL, outside the hostname. | tst-IncompleteHostnameRegExp.js:48:13:48:69 | '^http: ... \\.com/' | here |
| tst-IncompleteHostnameRegExp.js:53:14:53:35 | test.example.com$ | This regular expression has an unescaped '.' before 'example.com', so it might match more hosts than expected. | tst-IncompleteHostnameRegExp.js:53:13:53:36 | 'test.' ... e.com$' | here |
| tst-IncompleteHostnameRegExp.js:59:5:59:20 | foo.example\\.com | This regular expression has an unescaped '.' before 'example\\.com', so it might match more hosts than expected. | tst-IncompleteHostnameRegExp.js:59:2:59:32 | /^(foo. ... ever)$/ | here |

View File

@@ -50,7 +50,7 @@
var primary = 'example.com$';
new RegExp('test.' + primary); // NOT OK, but not detected
new RegExp('test.' + 'example.com$'); // NOT OK, but not detected
new RegExp('test.' + 'example.com$'); // NOT OK
new RegExp('^http://test\.example.com'); // NOT OK, but flagged by js/useless-regexp-character-escape

View File

@@ -825,6 +825,17 @@ nodes
| xmlRequest.js:9:28:9:31 | json |
| xmlRequest.js:9:28:9:39 | json.message |
| xmlRequest.js:9:28:9:39 | json.message |
| xmlRequest.js:20:11:20:48 | resp |
| xmlRequest.js:20:18:20:48 | await g ... rl }}") |
| xmlRequest.js:20:24:20:48 | got.get ... rl }}") |
| xmlRequest.js:20:24:20:48 | got.get ... rl }}") |
| xmlRequest.js:21:11:21:38 | json |
| xmlRequest.js:21:18:21:38 | JSON.pa ... p.body) |
| xmlRequest.js:21:29:21:32 | resp |
| xmlRequest.js:21:29:21:37 | resp.body |
| xmlRequest.js:22:24:22:27 | json |
| xmlRequest.js:22:24:22:35 | json.message |
| xmlRequest.js:22:24:22:35 | json.message |
edges
| addEventListener.js:1:43:1:47 | event | addEventListener.js:2:20:2:24 | event |
| addEventListener.js:1:43:1:47 | event | addEventListener.js:2:20:2:24 | event |
@@ -1545,7 +1556,18 @@ edges
| xmlRequest.js:8:31:8:46 | xhr.responseText | xmlRequest.js:8:20:8:47 | JSON.pa ... seText) |
| xmlRequest.js:9:28:9:31 | json | xmlRequest.js:9:28:9:39 | json.message |
| xmlRequest.js:9:28:9:31 | json | xmlRequest.js:9:28:9:39 | json.message |
| xmlRequest.js:20:11:20:48 | resp | xmlRequest.js:21:29:21:32 | resp |
| xmlRequest.js:20:18:20:48 | await g ... rl }}") | xmlRequest.js:20:11:20:48 | resp |
| xmlRequest.js:20:24:20:48 | got.get ... rl }}") | xmlRequest.js:20:18:20:48 | await g ... rl }}") |
| xmlRequest.js:20:24:20:48 | got.get ... rl }}") | xmlRequest.js:20:18:20:48 | await g ... rl }}") |
| xmlRequest.js:21:11:21:38 | json | xmlRequest.js:22:24:22:27 | json |
| xmlRequest.js:21:18:21:38 | JSON.pa ... p.body) | xmlRequest.js:21:11:21:38 | json |
| xmlRequest.js:21:29:21:32 | resp | xmlRequest.js:21:29:21:37 | resp.body |
| xmlRequest.js:21:29:21:37 | resp.body | xmlRequest.js:21:18:21:38 | JSON.pa ... p.body) |
| xmlRequest.js:22:24:22:27 | json | xmlRequest.js:22:24:22:35 | json.message |
| xmlRequest.js:22:24:22:27 | json | xmlRequest.js:22:24:22:35 | json.message |
#select
| jwt.js:6:14:6:20 | decoded | jwt.js:4:36:4:39 | data | jwt.js:6:14:6:20 | decoded | Cross-site scripting vulnerability due to $@. | jwt.js:4:36:4:39 | data | user-provided value |
| typeahead.js:10:16:10:18 | loc | typeahead.js:9:28:9:30 | loc | typeahead.js:10:16:10:18 | loc | Cross-site scripting vulnerability due to $@. | typeahead.js:9:28:9:30 | loc | user-provided value |
| xmlRequest.js:9:28:9:39 | json.message | xmlRequest.js:8:31:8:46 | xhr.responseText | xmlRequest.js:9:28:9:39 | json.message | Cross-site scripting vulnerability due to $@. | xmlRequest.js:8:31:8:46 | xhr.responseText | user-provided value |
| xmlRequest.js:22:24:22:35 | json.message | xmlRequest.js:20:24:20:48 | got.get ... rl }}") | xmlRequest.js:22:24:22:35 | json.message | Cross-site scripting vulnerability due to $@. | xmlRequest.js:20:24:20:48 | got.get ... rl }}") | user-provided value |

View File

@@ -6,11 +6,19 @@ $(document).ready(function () {
xhr.onreadystatechange = function () {
if (xhr.readyState !== 4) { return }
var json = JSON.parse(xhr.responseText)
$("#myThing").html(json.message);
$("#myThing").html(json.message); // caught with additional sources
}
try {
xhr.send()
} catch (error) {
console.log(error)
}
})
});
$(document).ready(async function () {
const got = require('got');
const resp = await got.get("{{ some_url }}");
const json = JSON.parse(resp.body);
$("#myThing").html(json.message); // caught with additional sources
});

View File

@@ -0,0 +1,17 @@
| tst.js:2:6:2:29 | <script.*?>.*?<\\/script> | This regular expression does not match script end tags like </script >. |
| tst.js:3:6:3:29 | <script.*?>.*?<\\/script> | This regular expression does not match script end tags like </script >. |
| tst.js:7:6:7:16 | <!--.*--!?> | This regular expression does not match comments containing newlines. |
| tst.js:8:6:8:39 | <script.*?>(.\|\\s)*?<\\/script[^>]*> | This regular expression matches <script></script>, but not <script \\n></script> |
| tst.js:9:6:9:37 | <script[^>]*?>.*?<\\/script[^>]*> | This regular expression matches <script>...</script>, but not <script >...\\n</script> |
| tst.js:10:6:10:44 | <script(\\s\|\\w\|=\|")*?>.*?<\\/script[^>]*> | This regular expression does not match script tags where the attribute uses single-quotes. |
| tst.js:11:6:11:44 | <script(\\s\|\\w\|=\|')*?>.*?<\\/script[^>]*> | This regular expression does not match script tags where the attribute uses double-quotes. |
| tst.js:12:6:12:48 | <script( \|\\n\|\\w\|=\|'\|")*?>.*?<\\/script[^>]*> | This regular expression does not match script tags where tabs are used between attributes. |
| tst.js:13:6:13:34 | <script.*?>.*?<\\/script[^>]*> | This regular expression does not match upper case <SCRIPT> tags. |
| tst.js:14:6:14:52 | <(script\|SCRIPT).*?>.*?<\\/(script\|SCRIPT)[^>]*> | This regular expression does not match mixed case <sCrIpT> tags. |
| tst.js:15:6:15:39 | <script[^>]*?>[\\s\\S]*?<\\/script.*> | This regular expression does not match script end tags like </script\\t\\n bar>. |
| tst.js:17:6:17:40 | <script\\b[^>]*>([\\s\\S]*?)<\\/script> | This regular expression does not match script end tags like </script >. |
| tst.js:18:6:18:48 | <(?:!--([\\S\|\\s]*?)-->)\|([^\\/\\s>]+)[\\S\\s]*?> | Comments ending with --> are matched differently from comments ending with --!>. The first is matched with capture group 1 and comments ending with --!> are matched with capture group 2. |
| tst.js:19:6:19:147 | <(?:(?:\\/([^>]+)>)\|(?:!--([\\S\|\\s]*?)-->)\|(?:([^\\/\\s>]+)((?:\\s+[\\w\\-:.]+(?:\\s*=\\s*?(?:(?:"[^"]*")\|(?:'[^']*')\|[^\\s"'\\/>]+))?)*)[\\S\\s]*?(\\/?)>)) | Comments ending with --> are matched differently from comments ending with --!>. The first is matched with capture group 2 and comments ending with --!> are matched with capture group 3, 4. |
| tst.js:20:3:20:57 | (<[a-z\\/!$]("[^"]*"\|'[^']*'\|[^'">])*>\|<!(--.*?--\\s*)+>) | Comments ending with --> are matched differently from comments ending with --!>. The first is matched with capture group 3 and comments ending with --!> are matched with capture group 1. |
| tst.js:21:6:21:249 | <(?:(?:!--([\\w\\W]*?)-->)\|(?:!\\[CDATA\\[([\\w\\W]*?)\\]\\]>)\|(?:!DOCTYPE([\\w\\W]*?)>)\|(?:\\?([^\\s\\/<>]+) ?([\\w\\W]*?)[?/]>)\|(?:\\/([A-Za-z][A-Za-z0-9\\-_\\:\\.]*)>)\|(?:([A-Za-z][A-Za-z0-9\\-_\\:\\.]*)((?:\\s+[^"'>]+(?:(?:"[^"]*")\|(?:'[^']*')\|[^>]*))*\|\\/\|\\s+)>)) | This regular expression only parses --> (capture group 1) and not --!> as a HTML comment end tag. |
| tst.js:22:6:22:33 | <!--([\\w\\W]*?)-->\|<([^>]*?)> | Comments ending with --> are matched differently from comments ending with --!>. The first is matched with capture group 1 and comments ending with --!> are matched with capture group 2. |

View File

@@ -0,0 +1 @@
Security/CWE-116/BadTagFilter.ql

View File

@@ -0,0 +1,28 @@
var filters = [
/<script.*?>.*?<\/script>/i, // NOT OK - doesn't match newlines or `</script >`
/<script.*?>.*?<\/script>/is, // NOT OK - doesn't match `</script >`
/<script.*?>.*?<\/script[^>]*>/is, // OK
/<!--.*-->/is, // OK - we don't care regexps that only match comments
/<!--.*--!?>/is, // OK
/<!--.*--!?>/i, // NOT OK, does not match newlines
/<script.*?>(.|\s)*?<\/script[^>]*>/i, // NOT OK - doesn't match inside the script tag
/<script[^>]*?>.*?<\/script[^>]*>/i, // NOT OK - doesn't match newlines inside the content
/<script(\s|\w|=|")*?>.*?<\/script[^>]*>/is, // NOT OK - does not match single quotes for attribute values
/<script(\s|\w|=|')*?>.*?<\/script[^>]*>/is, // NOT OK - does not match double quotes for attribute values
/<script( |\n|\w|=|'|")*?>.*?<\/script[^>]*>/is, // NOT OK - does not match tabs between attributes
/<script.*?>.*?<\/script[^>]*>/s, // NOT OK - does not match uppercase SCRIPT tags
/<(script|SCRIPT).*?>.*?<\/(script|SCRIPT)[^>]*>/s, // NOT OK - does not match mixed case script tags
/<script[^>]*?>[\s\S]*?<\/script.*>/i, // NOT OK - doesn't match newlines in the end tag
/<script[^>]*?>[\s\S]*?<\/script[^>]*?>/i, // OK
/<script\b[^>]*>([\s\S]*?)<\/script>/gi, // NOT OK - too strict matching on the end tag
/<(?:!--([\S|\s]*?)-->)|([^\/\s>]+)[\S\s]*?>/, // NOT OK - doesn't match comments with the right capture groups
/<(?:(?:\/([^>]+)>)|(?:!--([\S|\s]*?)-->)|(?:([^\/\s>]+)((?:\s+[\w\-:.]+(?:\s*=\s*?(?:(?:"[^"]*")|(?:'[^']*')|[^\s"'\/>]+))?)*)[\S\s]*?(\/?)>))/, // NOT OK - capture groups
/(<[a-z\/!$]("[^"]*"|'[^']*'|[^'">])*>|<!(--.*?--\s*)+>)/gi, // NOT OK - capture groups
/<(?:(?:!--([\w\W]*?)-->)|(?:!\[CDATA\[([\w\W]*?)\]\]>)|(?:!DOCTYPE([\w\W]*?)>)|(?:\?([^\s\/<>]+) ?([\w\W]*?)[?/]>)|(?:\/([A-Za-z][A-Za-z0-9\-_\:\.]*)>)|(?:([A-Za-z][A-Za-z0-9\-_\:\.]*)((?:\s+[^"'>]+(?:(?:"[^"]*")|(?:'[^']*')|[^>]*))*|\/|\s+)>))/g, // NOT OK - capture groups
/<!--([\w\W]*?)-->|<([^>]*?)>/g, // NOT OK - capture groups
]
doFilters(filters)
var strip = '<script([^>]*)>([\\S\\s]*?)<\/script([^>]*)>'; // OK - it's used with the ignorecase flag
new RegExp(strip, 'gi');