Merge branch 'main' into explicit-this

2026-07-21 03:08:25 +02:00 · 2021-11-24 15:24:58 +01:00
parent f0c5a80d1a 3bab8c6d1d
commit 08ce03cd93
455 changed files with 22630 additions and 4220 deletions
--- a/javascript/ql/experimental/adaptivethreatmodeling/lib/experimental/adaptivethreatmodeling/BaseScoring.qll
+++ b/javascript/ql/experimental/adaptivethreatmodeling/lib/experimental/adaptivethreatmodeling/BaseScoring.qll
@@ -7,7 +7,7 @@
 private import javascript
 private import ATMConfig

-external predicate adaptiveThreatModelingModels(
+external predicate availableMlModels(
  string modelChecksum, string modelLanguage, string modelName, string modelType
 );

--- a/javascript/ql/experimental/adaptivethreatmodeling/lib/experimental/adaptivethreatmodeling/EndpointScoring.qll
+++ b/javascript/ql/experimental/adaptivethreatmodeling/lib/experimental/adaptivethreatmodeling/EndpointScoring.qll
@@ -11,7 +11,7 @@ import EndpointFeatures as EndpointFeatures
 import EndpointTypes

 private string getACompatibleModelChecksum() {
-  adaptiveThreatModelingModels(result, "javascript", _, "atm-endpoint-scoring")
+  availableMlModels(result, "javascript", _, "atm-endpoint-scoring")
 }

 /**
@@ -54,7 +54,7 @@ DatabaseFeatures::Entity getRepresentativeEntityForEndpoint(DataFlow::Node endpo
    // Use the largest entity smaller than the AST node limit, resolving ties using the entity that
    // appears first in the source archive.
    result =
-      rank[1](DatabaseFeatures::Entity entity, int numAstNodes, Location l |
+      min(DatabaseFeatures::Entity entity, int numAstNodes, Location l |
        entity = EndpointToEntity::getAnEntityForEndpoint(endpoint) and
        numAstNodes = getNumAstNodesInEntity(entity) and
        numAstNodes <= getMaxNumAstNodes() and
@@ -68,7 +68,7 @@ DatabaseFeatures::Entity getRepresentativeEntityForEndpoint(DataFlow::Node endpo
    // Use the smallest entity, resolving ties using the entity that
    // appears first in the source archive.
    result =
-      rank[1](DatabaseFeatures::Entity entity, int numAstNodes, Location l |
+      min(DatabaseFeatures::Entity entity, int numAstNodes, Location l |
        entity = EndpointToEntity::getAnEntityForEndpoint(endpoint) and
        numAstNodes = getNumAstNodesInEntity(entity) and
        l = entity.getLocation()
--- a/javascript/ql/lib/semmle/javascript/AST.qll
+++ b/javascript/ql/lib/semmle/javascript/AST.qll
@@ -207,7 +207,9 @@ private predicate isAmbientTopLevel(TopLevel tl) {
 */
 class TopLevel extends @toplevel, StmtContainer {
  /** Holds if this toplevel is minified. */
+  cached
  predicate isMinified() {
+    Stages::Ast::ref() and
    // file name contains 'min' (not as part of a longer word)
    this.getFile().getBaseName().regexpMatch(".*[^-._]*[-._]min([-._].*)?\\.\\w+")
    or
--- a/javascript/ql/lib/semmle/javascript/ApiGraphs.qll
+++ b/javascript/ql/lib/semmle/javascript/ApiGraphs.qll
@@ -413,7 +413,7 @@ module API {
        any(Type t).hasUnderlyingType(moduleName, exportName)
      } or
      MkSyntheticCallbackArg(DataFlow::Node src, int bound, DataFlow::InvokeNode nd) {
-        trackUseNode(src, true, bound).flowsTo(nd.getCalleeNode())
+        trackUseNode(src, true, bound, "").flowsTo(nd.getCalleeNode())
      }

    class TDef = MkModuleDef or TNonModuleDef;
@@ -530,7 +530,7 @@ module API {
     */
    private predicate argumentPassing(TApiNode base, int i, DataFlow::Node arg) {
      exists(DataFlow::Node use, DataFlow::SourceNode pred, int bound |
-        use(base, use) and pred = trackUseNode(use, _, bound)
+        use(base, use) and pred = trackUseNode(use, _, bound, "")
      |
        arg = pred.getAnInvocation().getArgument(i - bound)
        or
@@ -558,6 +558,32 @@ module API {
      nd = MkDef(rhs)
    }

+    /**
+     * Holds if `ref` is a read of a property described by `lbl` on `pred`, and
+     * `propDesc` is compatible with that property, meaning it is either the
+     * name of the property itself or the empty string.
+     */
+    pragma[noinline]
+    private predicate propertyRead(
+      DataFlow::SourceNode pred, string propDesc, string lbl, DataFlow::Node ref
+    ) {
+      ref = pred.getAPropertyRead() and
+      lbl = Label::memberFromRef(ref) and
+      (
+        lbl = Label::member(propDesc)
+        or
+        propDesc = ""
+      )
+      or
+      PromiseFlow::loadStep(pred.getALocalUse(), ref, Promises::valueProp()) and
+      lbl = Label::promised() and
+      (propDesc = Promises::valueProp() or propDesc = "")
+      or
+      PromiseFlow::loadStep(pred.getALocalUse(), ref, Promises::errorProp()) and
+      lbl = Label::promisedError() and
+      (propDesc = Promises::errorProp() or propDesc = "")
+    }
+
    /**
     * Holds if `ref` is a use of a node that should have an incoming edge from `base` labeled
     * `lbl` in the API graph.
@@ -569,26 +595,25 @@ module API {
        base = MkRoot() and
        ref = lbl.(EntryPoint).getAUse()
        or
+        // property reads
+        exists(DataFlow::SourceNode src, DataFlow::SourceNode pred, string propDesc |
+          use(base, src) and
+          pred = trackUseNode(src, false, 0, propDesc) and
+          propertyRead(pred, propDesc, lbl, ref) and
+          // `module.exports` is special: it is a use of a def-node, not a use-node,
+          // so we want to exclude it here
+          (base instanceof TNonModuleDef or base instanceof TUse)
+        )
+        or
+        // invocations
        exists(DataFlow::SourceNode src, DataFlow::SourceNode pred |
          use(base, src) and pred = trackUseNode(src)
        |
-          // `module.exports` is special: it is a use of a def-node, not a use-node,
-          // so we want to exclude it here
-          (base instanceof TNonModuleDef or base instanceof TUse) and
-          lbl = Label::memberFromRef(ref) and
-          ref = pred.getAPropertyRead()
-          or
          lbl = Label::instance() and
          ref = pred.getAnInstantiation()
          or
          lbl = Label::return() and
          ref = pred.getAnInvocation()
-          or
-          lbl = Label::promised() and
-          PromiseFlow::loadStep(pred.getALocalUse(), ref, Promises::valueProp())
-          or
-          lbl = Label::promisedError() and
-          PromiseFlow::loadStep(pred.getALocalUse(), ref, Promises::errorProp())
        )
        or
        exists(DataFlow::Node def, DataFlow::FunctionNode fn |
@@ -682,36 +707,58 @@ module API {
      )
    }

+    private import semmle.javascript.dataflow.TypeTracking
+
    /**
     * Gets a data-flow node to which `nd`, which is a use of an API-graph node, flows.
     *
-     * The flow from `nd` to that node may be inter-procedural. If `promisified` is `true`, the
-     * flow goes through a promisification, and `boundArgs` indicates how many arguments have been
-     * bound throughout the flow. (To ensure termination, we somewhat arbitrarily constrain the
-     * number of bound arguments to be at most ten.)
+     * The flow from `nd` to that node may be inter-procedural, and is further described by three
+     * flags:
+     *
+     *   - `promisified`: if true `true`, the flow goes through a promisification;
+     *   - `boundArgs`: for function values, tracks how many arguments have been bound throughout
+     *     the flow. To ensure termination, we somewhat arbitrarily constrain the number of bound
+     *     arguments to be at most ten.
+     *   - `prop`: if non-empty, the flow is only guaranteed to preserve the value of this property,
+     *     and not necessarily the entire object.
     */
    private DataFlow::SourceNode trackUseNode(
-      DataFlow::SourceNode nd, boolean promisified, int boundArgs, DataFlow::TypeTracker t
+      DataFlow::SourceNode nd, boolean promisified, int boundArgs, string prop,
+      DataFlow::TypeTracker t
    ) {
      t.start() and
      use(_, nd) and
      result = nd and
      promisified = false and
-      boundArgs = 0
+      boundArgs = 0 and
+      prop = ""
      or
      exists(Promisify::PromisifyCall promisify |
-        trackUseNode(nd, false, boundArgs, t.continue()).flowsTo(promisify.getArgument(0)) and
+        trackUseNode(nd, false, boundArgs, prop, t.continue()).flowsTo(promisify.getArgument(0)) and
        promisified = true and
+        prop = "" and
        result = promisify
      )
      or
      exists(DataFlow::PartialInvokeNode pin, DataFlow::Node pred, int predBoundArgs |
-        trackUseNode(nd, promisified, predBoundArgs, t.continue()).flowsTo(pred) and
+        trackUseNode(nd, promisified, predBoundArgs, prop, t.continue()).flowsTo(pred) and
+        prop = "" and
        result = pin.getBoundFunction(pred, boundArgs - predBoundArgs) and
        boundArgs in [0 .. 10]
      )
      or
-      t = useStep(nd, promisified, boundArgs, result)
+      exists(DataFlow::Node pred, string preprop |
+        trackUseNode(nd, promisified, boundArgs, preprop, t.continue()).flowsTo(pred) and
+        promisified = false and
+        boundArgs = 0 and
+        SharedTypeTrackingStep::loadStoreStep(pred, result, prop)
+      |
+        prop = preprop
+        or
+        preprop = ""
+      )
+      or
+      t = useStep(nd, promisified, boundArgs, prop, result)
    }

    private import semmle.javascript.dataflow.internal.StepSummary
@@ -725,19 +772,19 @@ module API {
     */
    pragma[noopt]
    private DataFlow::TypeTracker useStep(
-      DataFlow::Node nd, boolean promisified, int boundArgs, DataFlow::Node res
+      DataFlow::Node nd, boolean promisified, int boundArgs, string prop, DataFlow::Node res
    ) {
      exists(DataFlow::TypeTracker t, StepSummary summary, DataFlow::SourceNode prev |
-        prev = trackUseNode(nd, promisified, boundArgs, t) and
+        prev = trackUseNode(nd, promisified, boundArgs, prop, t) and
        StepSummary::step(prev, res, summary) and
        result = t.append(summary)
      )
    }

    private DataFlow::SourceNode trackUseNode(
-      DataFlow::SourceNode nd, boolean promisified, int boundArgs
+      DataFlow::SourceNode nd, boolean promisified, int boundArgs, string prop
    ) {
-      result = trackUseNode(nd, promisified, boundArgs, DataFlow::TypeTracker::end())
+      result = trackUseNode(nd, promisified, boundArgs, prop, DataFlow::TypeTracker::end())
    }

    /**
@@ -745,7 +792,7 @@ module API {
     */
    cached
    DataFlow::SourceNode trackUseNode(DataFlow::SourceNode nd) {
-      result = trackUseNode(nd, false, 0)
+      result = trackUseNode(nd, false, 0, "")
    }

    private DataFlow::SourceNode trackDefNode(DataFlow::Node nd, DataFlow::TypeBackTracker t) {
--- a/javascript/ql/lib/semmle/javascript/Expr.qll
+++ b/javascript/ql/lib/semmle/javascript/Expr.qll
@@ -1561,6 +1561,14 @@ class URShiftExpr extends @urshift_expr, BinaryExpr {
 */
 class AddExpr extends @add_expr, BinaryExpr {
  override string getOperator() { result = "+" }
+
+  /**
+   * Gets the value of this string concatenation parsed as a regular expression, if possible.
+   *
+   * All string literals have an associated regular expression tree, provided they can
+   * be parsed without syntax errors.
+   */
+  RegExpTerm asRegExp() { this = result.getParent() }
 }

 /**
--- a/javascript/ql/lib/semmle/javascript/PackageExports.qll
+++ b/javascript/ql/lib/semmle/javascript/PackageExports.qll
@@ -141,6 +141,19 @@ private DataFlow::Node getAValueExportedByPackage() {
    result = unique( | | call.getCalleeNode().getAFunctionValue()).getAReturn()
  )
  or
+  // the exported value is a function that returns another import.
+  // ```JavaScript
+  // module.exports = function foo() {
+  //   return require("./other-module.js");
+  // }
+  // ```
+  exists(DataFlow::FunctionNode func, Module mod |
+    func = getAValueExportedByPackage().getABoundFunctionValue(_)
+  |
+    mod = func.getAReturn().getALocalSource().getEnclosingExpr().(Import).getImportedModule() and
+    result = getAnExportFromModule(mod)
+  )
+  or
  // *****
  // Common styles of transforming exported objects.
  // *****
--- a/javascript/ql/lib/semmle/javascript/Promises.qll
+++ b/javascript/ql/lib/semmle/javascript/Promises.qll
@@ -425,6 +425,14 @@ module PromiseFlow {
      prop = errorProp() and
      pred = call.getCallback(0).getAReturn()
    )
+    or
+    // return from `async` function
+    exists(DataFlow::FunctionNode f | f.getFunction().isAsync() |
+      // ordinary return
+      prop = valueProp() and
+      pred = f.getAReturn() and
+      succ = f.getReturnNode()
+    )
  }
 }

--- a/javascript/ql/lib/semmle/javascript/Regexp.qll
+++ b/javascript/ql/lib/semmle/javascript/Regexp.qll
@@ -7,6 +7,7 @@

 import javascript
 private import semmle.javascript.dataflow.InferredTypes
+private import semmle.javascript.internal.CachedStages

 /**
 * An element containing a regular expression term, that is, either
@@ -155,7 +156,7 @@ class RegExpTerm extends Locatable, @regexpterm {
    exists(RegExpParent parent | parent = this.getRootTerm().getParent() |
      parent instanceof RegExpLiteral
      or
-      parent.(StringLiteral).flow() instanceof RegExpPatternSource
+      parent.(Expr).flow() instanceof RegExpPatternSource
    )
  }

@@ -955,7 +956,9 @@ private predicate isUsedAsNonMatchObject(DataFlow::MethodCallNode call) {
 /**
 * Holds if `source` may be interpreted as a regular expression.
 */
+cached
 predicate isInterpretedAsRegExp(DataFlow::Node source) {
+  Stages::Taint::ref() and
  source.analyze().getAType() = TTString() and
  (
    // The first argument to an invocation of `RegExp` (with or without `new`).
@@ -1104,6 +1107,30 @@ private class StringRegExpPatternSource extends RegExpPatternSource {
  override RegExpTerm getRegExpTerm() { result = this.asExpr().(StringLiteral).asRegExp() }
 }

+/**
+ * A node whose string value may flow to a position where it is interpreted
+ * as a part of a regular expression.
+ */
+private class StringConcatRegExpPatternSource extends RegExpPatternSource {
+  DataFlow::Node parse;
+
+  StringConcatRegExpPatternSource() { this = regExpSource(parse) }
+
+  override DataFlow::Node getAParse() { result = parse }
+
+  override DataFlow::SourceNode getARegExpObject() {
+    exists(DataFlow::InvokeNode constructor |
+      constructor = DataFlow::globalVarRef("RegExp").getAnInvocation() and
+      parse = constructor.getArgument(0) and
+      result = constructor
+    )
+  }
+
+  override string getPattern() { result = getStringValue() }
+
+  override RegExpTerm getRegExpTerm() { result = asExpr().(AddExpr).asRegExp() }
+}
+
 module RegExp {
  /** Gets the string `"?"` used to represent a regular expression whose flags are unknown. */
  string unknownFlag() { result = "?" }
--- a/javascript/ql/lib/semmle/javascript/frameworks/AngularJS/AngularJSExpressions.qll
+++ b/javascript/ql/lib/semmle/javascript/frameworks/AngularJS/AngularJSExpressions.qll
@@ -837,7 +837,7 @@ class NgDataFlowNode extends TNode {
 private predicate fileIsImplicitlyAngularJS(HTML::HtmlFile file) {
  // The file contains ng-* attributes.
  exists(HTML::Attribute attrib |
-    attrib.getName().regexpMatch("ng-.*") and
+    attrib.getName().matches("ng-%") and
    attrib.getFile() = file
  ) and
  // But does not contain the ng-app root element, implying that file is
--- a/javascript/ql/lib/semmle/javascript/frameworks/ClientRequests.qll
+++ b/javascript/ql/lib/semmle/javascript/frameworks/ClientRequests.qll
@@ -408,12 +408,9 @@ module ClientRequest {
   */
  class GotUrlRequest extends ClientRequest::Range {
    GotUrlRequest() {
-      exists(string moduleName, DataFlow::SourceNode callee | this = callee.getACall() |
-        moduleName = "got" and
-        (
-          callee = DataFlow::moduleImport(moduleName) or
-          callee = DataFlow::moduleMember(moduleName, "stream")
-        )
+      exists(API::Node callee, API::Node got | this = callee.getACall() |
+        got = [API::moduleImport("got"), API::moduleImport("got").getMember("extend").getReturn()] and
+        callee = [got, got.getMember(["stream", "get", "post", "put", "patch", "head", "delete"])]
      )
    }

@@ -792,7 +789,7 @@ module ClientRequest {
        cmd.getACommandArgument()
            .(StringOps::ConcatenationRoot)
            .getConstantStringParts()
-            .regexpMatch("curl .*")
+            .matches("curl %")
      )
    }

--- a/javascript/ql/lib/semmle/javascript/frameworks/NodeJSLib.qll
+++ b/javascript/ql/lib/semmle/javascript/frameworks/NodeJSLib.qll
@@ -556,7 +556,7 @@ module NodeJSLib {
    }

    override DataFlow::Node getADataNode() {
-      if methodName.regexpMatch(".*Sync")
+      if methodName.matches("%Sync")
      then result = this
      else
        exists(int i, string paramName | fsDataParam(methodName, i, paramName) |
@@ -724,9 +724,9 @@ module NodeJSLib {
      not result = this.getParameter(0).getARhs() and
      // fork/spawn and all sync methos always has options as the last argument
      if
-        methodName.regexpMatch("fork.*") or
-        methodName.regexpMatch("spawn.*") or
-        methodName.regexpMatch(".*Sync")
+        methodName.matches("fork%") or
+        methodName.matches("spawn%") or
+        methodName.matches("%Sync")
      then result = this.getLastArgument()
      else
        // the rest (exec/execFile) has the options argument as their second last.
--- a/javascript/ql/lib/semmle/javascript/internal/CachedStages.qll
+++ b/javascript/ql/lib/semmle/javascript/internal/CachedStages.qll
@@ -260,6 +260,8 @@ module Stages {
      exists(RemoteFlowSource r)
      or
      exists(Exports::getALibraryInputParameter())
+      or
+      any(RegExpTerm t).isUsedAsRegExp()
    }
  }
 }
--- a/javascript/ql/lib/semmle/javascript/security/BadTagFilterQuery.qll
+++ b/javascript/ql/lib/semmle/javascript/security/BadTagFilterQuery.qll
@@ -0,0 +1,306 @@
+/**
+ * Provides precicates for reasoning about bad tag filter vulnerabilities.
+ */
+
+import performance.ReDoSUtil
+
+/**
+ * A module for determining if a regexp matches a given string,
+ * and reasoning about which capture groups are filled by a given string.
+ */
+private module RegexpMatching {
+  /**
+   * A class to test whether a regular expression matches a string.
+   * Override this class and extend `test`/`testWithGroups` to configure which strings should be tested for acceptance by this regular expression.
+   * The result can afterwards be read from the `matches` predicate.
+   *
+   * Strings in the `testWithGroups` predicate are also tested for which capture groups are filled by the given string.
+   * The result is available in the `fillCaptureGroup` predicate.
+   */
+  abstract class MatchedRegExp extends RegExpTerm {
+    MatchedRegExp() { this.isRootTerm() }
+
+    /**
+     * Holds if it should be tested whether this regular expression matches `str`.
+     *
+     * If `ignorePrefix` is true, then a regexp without a start anchor will be treated as if it had a start anchor.
+     * E.g. a regular expression `/foo$/` will match any string that ends with "foo",
+     * but if `ignorePrefix` is true, it will only match "foo".
+     */
+    predicate test(string str, boolean ignorePrefix) {
+      none() // maybe overriden in subclasses
+    }
+
+    /**
+     * Same as `test(..)`, but where the `fillsCaptureGroup` afterwards tells which capture groups were filled by the given string.
+     */
+    predicate testWithGroups(string str, boolean ignorePrefix) {
+      none() // maybe overriden in subclasses
+    }
+
+    /**
+     * Holds if this RegExp matches `str`, where `str` is either in the `test` or `testWithGroups` predicate.
+     */
+    final predicate matches(string str) {
+      exists(State state | state = getAState(this, str.length() - 1, str, _) |
+        epsilonSucc*(state) = Accept(_)
+      )
+    }
+
+    /**
+     * Holds if matching `str` may fill capture group number `g`.
+     * Only holds if `str` is in the `testWithGroups` predicate.
+     */
+    final predicate fillsCaptureGroup(string str, int g) {
+      exists(State s |
+        s = getAStateThatReachesAccept(this, _, str, _) and
+        g = group(s.getRepr())
+      )
+    }
+  }
+
+  /**
+   * Gets a state the regular expression `reg` can be in after matching the `i`th char in `str`.
+   * The regular expression is modelled as a non-determistic finite automaton,
+   * the regular expression can therefore be in multiple states after matching a character.
+   *
+   * It's a forward search to all possible states, and there is thus no guarantee that the state is on a path to an accepting state.
+   */
+  private State getAState(MatchedRegExp reg, int i, string str, boolean ignorePrefix) {
+    // start state, the -1 position before any chars have been matched
+    i = -1 and
+    (
+      reg.test(str, ignorePrefix)
+      or
+      reg.testWithGroups(str, ignorePrefix)
+    ) and
+    result.getRepr().getRootTerm() = reg and
+    isStartState(result)
+    or
+    // recursive case
+    result = getAStateAfterMatching(reg, _, str, i, _, ignorePrefix)
+  }
+
+  /**
+   * Gets the next state after the `prev` state from `reg`.
+   * `prev` is the state after matching `fromIndex` chars in `str`,
+   * and the result is the state after matching `toIndex` chars in `str`.
+   *
+   * This predicate is used as a step relation in the forwards search (`getAState`),
+   * and also as a step relation in the later backwards search (`getAStateThatReachesAccept`).
+   */
+  private State getAStateAfterMatching(
+    MatchedRegExp reg, State prev, string str, int toIndex, int fromIndex, boolean ignorePrefix
+  ) {
+    // the basic recursive case - outlined into a noopt helper to make performance work out.
+    result = getAStateAfterMatchingAux(reg, prev, str, toIndex, fromIndex, ignorePrefix)
+    or
+    // we can skip past word boundaries if the next char is a non-word char.
+    fromIndex = toIndex and
+    prev.getRepr() instanceof RegExpWordBoundary and
+    prev = getAState(reg, toIndex, str, ignorePrefix) and
+    after(prev.getRepr()) = result and
+    str.charAt(toIndex + 1).regexpMatch("\\W") // \W matches any non-word char.
+  }
+
+  pragma[noopt]
+  private State getAStateAfterMatchingAux(
+    MatchedRegExp reg, State prev, string str, int toIndex, int fromIndex, boolean ignorePrefix
+  ) {
+    prev = getAState(reg, fromIndex, str, ignorePrefix) and
+    fromIndex = toIndex - 1 and
+    exists(string char | char = str.charAt(toIndex) | specializedDeltaClosed(prev, char, result)) and
+    not discardedPrefixStep(prev, result, ignorePrefix)
+  }
+
+  /** Holds if a step from `prev` to `next` should be discarded when the `ignorePrefix` flag is set. */
+  private predicate discardedPrefixStep(State prev, State next, boolean ignorePrefix) {
+    prev = mkMatch(any(RegExpRoot r)) and
+    ignorePrefix = true and
+    next = prev
+  }
+
+  // The `deltaClosed` relation specialized to the chars that exists in strings tested by a `MatchedRegExp`.
+  private predicate specializedDeltaClosed(State prev, string char, State next) {
+    deltaClosed(prev, specializedGetAnInputSymbolMatching(char), next)
+  }
+
+  // The `getAnInputSymbolMatching` relation specialized to the chars that exists in strings tested by a `MatchedRegExp`.
+  pragma[noinline]
+  private InputSymbol specializedGetAnInputSymbolMatching(string char) {
+    exists(string s, MatchedRegExp r |
+      r.test(s, _)
+      or
+      r.testWithGroups(s, _)
+    |
+      char = s.charAt(_)
+    ) and
+    result = getAnInputSymbolMatching(char)
+  }
+
+  /**
+   * Gets the `i`th state on a path to the accepting state when `reg` matches `str`.
+   * Starts with an accepting state as found by `getAState` and searches backwards
+   * to the start state through the reachable states (as found by `getAState`).
+   *
+   * This predicate holds the invariant that the result state can be reached with `i` steps from a start state,
+   * and an accepting state can be found after (`str.length() - 1 - i`) steps from the result.
+   * The result state is therefore always on a valid path where `reg` accepts `str`.
+   *
+   * This predicate is only used to find which capture groups a regular expression has filled,
+   * and thus the search is only performed for the strings in the `testWithGroups(..)` predicate.
+   */
+  private State getAStateThatReachesAccept(
+    MatchedRegExp reg, int i, string str, boolean ignorePrefix
+  ) {
+    // base case, reaches an accepting state from the last state in `getAState(..)`
+    reg.testWithGroups(str, ignorePrefix) and
+    i = str.length() - 1 and
+    result = getAState(reg, i, str, ignorePrefix) and
+    epsilonSucc*(result) = Accept(_)
+    or
+    // recursive case. `next` is the next state to be matched after matching `prev`.
+    // this predicate is doing a backwards search, so `prev` is the result we are looking for.
+    exists(State next, State prev, int fromIndex, int toIndex |
+      next = getAStateThatReachesAccept(reg, toIndex, str, ignorePrefix) and
+      next = getAStateAfterMatching(reg, prev, str, toIndex, fromIndex, ignorePrefix) and
+      i = fromIndex and
+      result = prev
+    )
+  }
+
+  /** Gets the capture group number that `term` belongs to. */
+  private int group(RegExpTerm term) {
+    exists(RegExpGroup grp | grp.getNumber() = result | term.getParent*() = grp)
+  }
+}
+
+/** A class to test whether a regular expression matches certain HTML tags. */
+class HTMLMatchingRegExp extends RegexpMatching::MatchedRegExp {
+  HTMLMatchingRegExp() {
+    // the regexp must mention "<" and ">" explicitly.
+    forall(string angleBracket | angleBracket = ["<", ">"] |
+      any(RegExpConstant term | term.getValue().matches("%" + angleBracket + "%")).getRootTerm() =
+        this
+    )
+  }
+
+  override predicate testWithGroups(string str, boolean ignorePrefix) {
+    ignorePrefix = true and
+    str = ["<!-- foo -->", "<!-- foo --!>", "<!- foo ->", "<foo>", "<script>"]
+  }
+
+  override predicate test(string str, boolean ignorePrefix) {
+    ignorePrefix = true and
+    str =
+      [
+        "<!-- foo -->", "<!- foo ->", "<!-- foo --!>", "<!-- foo\n -->", "<script>foo</script>",
+        "<script \n>foo</script>", "<script >foo\n</script>", "<foo ></foo>", "<foo>",
+        "<foo src=\"foo\"></foo>", "<script>", "<script src=\"foo\"></script>",
+        "<script src='foo'></script>", "<SCRIPT>foo</SCRIPT>", "<script\tsrc=\"foo\"/>",
+        "<script\tsrc='foo'></script>", "<sCrIpT>foo</ScRiPt>", "<script src=\"foo\">foo</script >",
+        "<script src=\"foo\">foo</script foo=\"bar\">", "<script src=\"foo\">foo</script\t\n bar>"
+      ]
+  }
+}
+
+/**
+ * Holds if `regexp` matches some HTML tags, but misses some HTML tags that it should match.
+ *
+ * When adding a new case to this predicate, make sure the test string used in `matches(..)` calls are present in `HTMLMatchingRegExp::test` / `HTMLMatchingRegExp::testWithGroups`.
+ */
+predicate isBadRegexpFilter(HTMLMatchingRegExp regexp, string msg) {
+  // CVE-2021-33829 - matching both "<!-- foo -->" and "<!-- foo --!>", but in different capture groups
+  regexp.matches("<!-- foo -->") and
+  regexp.matches("<!-- foo --!>") and
+  exists(int a, int b | a != b |
+    regexp.fillsCaptureGroup("<!-- foo -->", a) and
+    // <!-- foo --> might be ambigously parsed (matching both capture groups), and that is ok here.
+    regexp.fillsCaptureGroup("<!-- foo --!>", b) and
+    not regexp.fillsCaptureGroup("<!-- foo --!>", a) and
+    msg =
+      "Comments ending with --> are matched differently from comments ending with --!>. The first is matched with capture group "
+        + a + " and comments ending with --!> are matched with capture group " +
+        strictconcat(int i | regexp.fillsCaptureGroup("<!-- foo --!>", i) | i.toString(), ", ") +
+        "."
+  )
+  or
+  // CVE-2020-17480 - matching "<!-- foo -->" and other tags, but not "<!-- foo --!>".
+  exists(int group, int other |
+    group != other and
+    regexp.fillsCaptureGroup("<!-- foo -->", group) and
+    regexp.fillsCaptureGroup("<foo>", other) and
+    not regexp.matches("<!-- foo --!>") and
+    not regexp.fillsCaptureGroup("<!-- foo -->", any(int i | i != group)) and
+    not regexp.fillsCaptureGroup("<!- foo ->", group) and
+    not regexp.fillsCaptureGroup("<foo>", group) and
+    not regexp.fillsCaptureGroup("<script>", group) and
+    msg =
+      "This regular expression only parses --> (capture group " + group +
+        ") and not --!> as a HTML comment end tag."
+  )
+  or
+  regexp.matches("<!-- foo -->") and
+  not regexp.matches("<!-- foo\n -->") and
+  not regexp.matches("<!- foo ->") and
+  not regexp.matches("<foo>") and
+  not regexp.matches("<script>") and
+  msg = "This regular expression does not match comments containing newlines."
+  or
+  regexp.matches("<script>foo</script>") and
+  regexp.matches("<script src=\"foo\"></script>") and
+  not regexp.matches("<foo ></foo>") and
+  (
+    not regexp.matches("<script \n>foo</script>") and
+    msg = "This regular expression matches <script></script>, but not <script \\n></script>"
+    or
+    not regexp.matches("<script >foo\n</script>") and
+    msg = "This regular expression matches <script>...</script>, but not <script >...\\n</script>"
+  )
+  or
+  regexp.matches("<script>foo</script>") and
+  regexp.matches("<script src=\"foo\"></script>") and
+  not regexp.matches("<script src='foo'></script>") and
+  not regexp.matches("<foo>") and
+  msg = "This regular expression does not match script tags where the attribute uses single-quotes."
+  or
+  regexp.matches("<script>foo</script>") and
+  regexp.matches("<script src='foo'></script>") and
+  not regexp.matches("<script src=\"foo\"></script>") and
+  not regexp.matches("<foo>") and
+  msg = "This regular expression does not match script tags where the attribute uses double-quotes."
+  or
+  regexp.matches("<script>foo</script>") and
+  regexp.matches("<script src='foo'></script>") and
+  not regexp.matches("<script\tsrc='foo'></script>") and
+  not regexp.matches("<foo>") and
+  not regexp.matches("<foo src=\"foo\"></foo>") and
+  msg = "This regular expression does not match script tags where tabs are used between attributes."
+  or
+  regexp.matches("<script>foo</script>") and
+  not RegExpFlags::isIgnoreCase(regexp) and
+  not regexp.matches("<foo>") and
+  not regexp.matches("<foo ></foo>") and
+  (
+    not regexp.matches("<SCRIPT>foo</SCRIPT>") and
+    msg = "This regular expression does not match upper case <SCRIPT> tags."
+    or
+    not regexp.matches("<sCrIpT>foo</ScRiPt>") and
+    regexp.matches("<SCRIPT>foo</SCRIPT>") and
+    msg = "This regular expression does not match mixed case <sCrIpT> tags."
+  )
+  or
+  regexp.matches("<script src=\"foo\"></script>") and
+  not regexp.matches("<foo>") and
+  not regexp.matches("<foo ></foo>") and
+  (
+    not regexp.matches("<script src=\"foo\">foo</script >") and
+    msg = "This regular expression does not match script end tags like </script >."
+    or
+    not regexp.matches("<script src=\"foo\">foo</script foo=\"bar\">") and
+    msg = "This regular expression does not match script end tags like </script foo=\"bar\">."
+    or
+    not regexp.matches("<script src=\"foo\">foo</script\t\n bar>") and
+    msg = "This regular expression does not match script end tags like </script\\t\\n bar>."
+  )
+}
--- a/javascript/ql/lib/semmle/javascript/security/dataflow/XssThroughDomCustomizations.qll
+++ b/javascript/ql/lib/semmle/javascript/security/dataflow/XssThroughDomCustomizations.qll
@@ -22,8 +22,8 @@ module XssThroughDom {
   */
  bindingset[result]
  string unsafeAttributeName() {
-    result.regexpMatch("data-.*") or
-    result.regexpMatch("aria-.*") or
+    result.matches("data-%") or
+    result.matches("aria-%") or
    result = ["name", "value", "title", "alt"]
  }

--- a/javascript/ql/lib/semmle/javascript/security/performance/ReDoSUtil.qll
+++ b/javascript/ql/lib/semmle/javascript/security/performance/ReDoSUtil.qll
@@ -218,7 +218,7 @@ private newtype TInputSymbol =
      recc instanceof RegExpCharacterClass and
      not recc.(RegExpCharacterClass).isUniversalClass()
      or
-      recc instanceof RegExpCharacterClassEscape
+      isEscapeClass(recc, _)
    )
  } or
  /** An input symbol representing all characters matched by `.`. */
@@ -340,13 +340,13 @@ private module CharacterClasses {
        char <= hi
      )
      or
-      exists(RegExpCharacterClassEscape escape | escape = child |
-        escape.getValue() = escape.getValue().toLowerCase() and
-        classEscapeMatches(escape.getValue(), char)
+      exists(string charClass | isEscapeClass(child, charClass) |
+        charClass.toLowerCase() = charClass and
+        classEscapeMatches(charClass, char)
        or
        char = getARelevantChar() and
-        escape.getValue() = escape.getValue().toUpperCase() and
-        not classEscapeMatches(escape.getValue().toLowerCase(), char)
+        charClass.toUpperCase() = charClass and
+        not classEscapeMatches(charClass, char)
      )
    )
  }
@@ -409,10 +409,10 @@ private module CharacterClasses {
      or
      child.(RegExpCharacterRange).isRange(_, result)
      or
-      exists(RegExpCharacterClassEscape escape | child = escape |
-        result = min(string s | classEscapeMatches(escape.getValue().toLowerCase(), s))
+      exists(string charClass | isEscapeClass(child, charClass) |
+        result = min(string s | classEscapeMatches(charClass.toLowerCase(), s))
        or
-        result = max(string s | classEscapeMatches(escape.getValue().toLowerCase(), s))
+        result = max(string s | classEscapeMatches(charClass.toLowerCase(), s))
      )
    )
  }
@@ -466,33 +466,36 @@ private module CharacterClasses {
   * An implementation of `CharacterClass` for \d, \s, and \w.
   */
  private class PositiveCharacterClassEscape extends CharacterClass {
-    RegExpCharacterClassEscape cc;
+    RegExpTerm cc;
+    string charClass;

    PositiveCharacterClassEscape() {
-      this = getCanonicalCharClass(cc) and cc.getValue() = ["d", "s", "w"]
+      isEscapeClass(cc, charClass) and
+      this = getCanonicalCharClass(cc) and
+      charClass = ["d", "s", "w"]
    }

    override string getARelevantChar() {
-      cc.getValue() = "d" and
+      charClass = "d" and
      result = ["0", "9"]
      or
-      cc.getValue() = "s" and
+      charClass = "s" and
      result = " "
      or
-      cc.getValue() = "w" and
+      charClass = "w" and
      result = ["a", "Z", "_", "0", "9"]
    }

-    override predicate matches(string char) { classEscapeMatches(cc.getValue(), char) }
+    override predicate matches(string char) { classEscapeMatches(charClass, char) }

    override string choose() {
-      cc.getValue() = "d" and
+      charClass = "d" and
      result = "9"
      or
-      cc.getValue() = "s" and
+      charClass = "s" and
      result = " "
      or
-      cc.getValue() = "w" and
+      charClass = "w" and
      result = "a"
    }
  }
@@ -501,26 +504,29 @@ private module CharacterClasses {
   * An implementation of `CharacterClass` for \D, \S, and \W.
   */
  private class NegativeCharacterClassEscape extends CharacterClass {
-    RegExpCharacterClassEscape cc;
+    RegExpTerm cc;
+    string charClass;

    NegativeCharacterClassEscape() {
-      this = getCanonicalCharClass(cc) and cc.getValue() = ["D", "S", "W"]
+      isEscapeClass(cc, charClass) and
+      this = getCanonicalCharClass(cc) and
+      charClass = ["D", "S", "W"]
    }

    override string getARelevantChar() {
-      cc.getValue() = "D" and
+      charClass = "D" and
      result = ["a", "Z", "!"]
      or
-      cc.getValue() = "S" and
+      charClass = "S" and
      result = ["a", "9", "!"]
      or
-      cc.getValue() = "W" and
+      charClass = "W" and
      result = [" ", "!"]
    }

    bindingset[char]
    override predicate matches(string char) {
-      not classEscapeMatches(cc.getValue().toLowerCase(), char)
+      not classEscapeMatches(charClass.toLowerCase(), char)
    }
  }
 }
@@ -542,7 +548,7 @@ private State before(RegExpTerm t) { result = Match(t, 0) }
 /**
 * Gets a state the NFA may be in after matching `t`.
 */
-private State after(RegExpTerm t) {
+State after(RegExpTerm t) {
  exists(RegExpAlt alt | t = alt.getAChild() | result = after(alt))
  or
  exists(RegExpSequence seq, int i | t = seq.getChild(i) |
@@ -599,7 +605,7 @@ predicate delta(State q1, EdgeLabel lbl, State q2) {
    q2 = after(cc)
  )
  or
-  exists(RegExpCharacterClassEscape cc |
+  exists(RegExpTerm cc | isEscapeClass(cc, _) |
    q1 = before(cc) and
    lbl = CharClass(cc.getRawValue() + "|" + getCanonicalizationFlags(cc.getRootTerm())) and
    q2 = after(cc)
@@ -671,7 +677,7 @@ RegExpRoot getRoot(RegExpTerm term) {
 /**
 * A state in the NFA.
 */
-private newtype TState =
+newtype TState =
  /**
   * A state representing that the NFA is about to match a term.
   * `i` is used to index into multi-char literals.
@@ -801,29 +807,26 @@ InputSymbol getAnInputSymbolMatching(string char) {
  result = Any()
 }

+/**
+ * Holds if `state` is a start state.
+ */
+predicate isStartState(State state) {
+  state = mkMatch(any(RegExpRoot r))
+  or
+  exists(RegExpCaret car | state = after(car))
+}
+
 /**
 * Predicates for constructing a prefix string that leads to a given state.
 */
 private module PrefixConstruction {
-  /**
-   * Holds if `state` starts the string matched by the regular expression.
-   */
-  private predicate isStartState(State state) {
-    state instanceof StateInPumpableRegexp and
-    (
-      state = Match(any(RegExpRoot r), _)
-      or
-      exists(RegExpCaret car | state = after(car))
-    )
-  }
-
  /**
   * Holds if `state` is the textually last start state for the regular expression.
   */
  private predicate lastStartState(State state) {
    exists(RegExpRoot root |
      state =
-        max(State s, Location l |
+        max(StateInPumpableRegexp s, Location l |
          isStartState(s) and getRoot(s.getRepr()) = root and l = s.getRepr().getLocation()
        |
          s
--- a/javascript/ql/lib/semmle/javascript/security/performance/RegExpTreeView.qll
+++ b/javascript/ql/lib/semmle/javascript/security/performance/RegExpTreeView.qll
@@ -6,6 +6,14 @@

 import javascript

+/**
+ * Holds if `term` is an ecape class representing e.g. `\d`.
+ * `clazz` is which character class it represents, e.g. "d" for `\d`.
+ */
+predicate isEscapeClass(RegExpTerm term, string clazz) {
+  exists(RegExpCharacterClassEscape escape | term = escape | escape.getValue() = clazz)
+}
+
 /**
 * Holds if the regular expression should not be considered.
 *
@@ -20,12 +28,7 @@ module RegExpFlags {
  /**
   * Holds if `root` has the `i` flag for case-insensitive matching.
   */
-  predicate isIgnoreCase(RegExpTerm root) {
-    root.isRootTerm() and
-    exists(DataFlow::RegExpCreationNode node | node.getRoot() = root |
-      RegExp::isIgnoreCase(node.getFlags())
-    )
-  }
+  predicate isIgnoreCase(RegExpTerm root) { RegExp::isIgnoreCase(getFlags(root)) }

  /**
   * Gets the flags for `root`, or the empty string if `root` has no flags.
@@ -38,15 +41,14 @@ module RegExpFlags {
      not exists(node.getFlags()) and
      result = ""
    )
+    or
+    exists(RegExpPatternSource source | source.getRegExpTerm() = root |
+      result = source.getARegExpObject().(DataFlow::RegExpCreationNode).getFlags()
+    )
  }

  /**
   * Holds if `root` has the `s` flag for multi-line matching.
   */
-  predicate isDotAll(RegExpTerm root) {
-    root.isRootTerm() and
-    exists(DataFlow::RegExpCreationNode node | node.getRoot() = root |
-      RegExp::isDotAll(node.getFlags())
-    )
-  }
+  predicate isDotAll(RegExpTerm root) { RegExp::isDotAll(getFlags(root)) }
 }
--- a/javascript/ql/lib/semmlecode.javascript.dbscheme
+++ b/javascript/ql/lib/semmlecode.javascript.dbscheme
@@ -855,7 +855,7 @@ regexpterm (unique int id: @regexpterm,
        int idx: int ref,
        varchar(900) tostring: string ref);
        
-@regexpparent = @regexpterm | @regexp_literal | @string_literal;
+@regexpparent = @regexpterm | @regexp_literal | @string_literal | @add_expr;

 case @regexpterm.kind of
   0 = @regexp_alt
--- a/javascript/ql/src/Expressions/UnknownDirective.ql
+++ b/javascript/ql/src/Expressions/UnknownDirective.ql
@@ -18,5 +18,5 @@ where
  // but exclude attribute top-levels: `<a href="javascript:'some-attribute-string'">`
  not d.getParent() instanceof CodeInAttribute and
  // exclude babel generated directives like "@babel/helpers - typeof".
-  not d.getDirectiveText().prefix(14) = "@babel/helpers"
+  not d.getDirectiveText().matches("@babel/helpers%")
 select d, "Unknown directive: '" + truncate(d.getDirectiveText(), 20, " ... (truncated)") + "'."
--- a/javascript/ql/src/Security/CWE-116/BadTagFilter.qhelp
+++ b/javascript/ql/src/Security/CWE-116/BadTagFilter.qhelp
@@ -0,0 +1,54 @@
+<!DOCTYPE qhelp PUBLIC
+  "-//Semmle//qhelp//EN"
+  "qhelp.dtd">
+<qhelp>
+
+<overview>
+<p>
+It is possible to match some single HTML tags using regular expressions (parsing general HTML using 
+regular expressions is impossible). However, if the regular expression is not written well it might 
+be possible to circumvent it, which can lead to cross-site scripting or other security issues.
+</p>
+<p>
+Some of these mistakes are caused by browsers having very forgiving HTML parsers, and
+will often render invalid HTML containing syntax errors. 
+Regular expressions that attempt to match HTML should also recognize tags containing such syntax errors.
+</p>
+</overview>
+
+<recommendation>
+<p>
+Use a well-tested sanitization or parser library if at all possible. These libraries are much more
+likely to handle corner cases correctly than a custom implementation.
+</p>
+</recommendation>
+
+<example>
+<p>
+The following example attempts to filters out all <code>&lt;script&gt;</code> tags.
+</p>
+
+<sample src="examples/BadTagFilter.js" />
+
+<p>
+The above sanitizer does not filter out all <code>&lt;script&gt;</code> tags. 
+Browsers will not only accept <code>&lt;/script&gt;</code> as script end tags, but also tags such as <code>&lt;/script foo="bar"&gt;</code> even though it is a parser error.
+This means that an attack string such as <code>&lt;script&gt;alert(1)&lt;/script foo="bar"&gt;</code> will not be filtered by 
+the function, and <code>alert(1)</code> will be executed by a browser if the string is rendered as HTML.
+</p>
+
+<p>
+Other corner cases include that HTML comments can end with <code>--!&gt;</code>, 
+and that HTML tag names can contain upper case characters.
+</p>
+</example>
+
+<references>
+<li>Securitum: <a href="https://research.securitum.com/the-curious-case-of-copy-paste/">The Curious Case of Copy &amp; Paste</a>.</li>
+<li>stackoverflow.com: <a href="https://stackoverflow.com/questions/1732348/regex-match-open-tags-except-xhtml-self-contained-tags#answer-1732454">You can't parse [X]HTML with regex</a>.</li>
+<li>HTML Standard: <a href="https://html.spec.whatwg.org/multipage/parsing.html#comment-end-bang-state">Comment end bang state</a>.</li>
+<li>stackoverflow.com: <a href="https://stackoverflow.com/questions/25559999/why-arent-browsers-strict-about-html">Why aren't browsers strict about HTML?</a>.</li>
+</references>
+</qhelp>
+
+
--- a/javascript/ql/src/Security/CWE-116/BadTagFilter.ql
+++ b/javascript/ql/src/Security/CWE-116/BadTagFilter.ql
@@ -0,0 +1,19 @@
+/**
+ * @name Bad HTML filtering regexp
+ * @description Matching HTML tags using regular expressions is hard to do right, and can easily lead to security issues.
+ * @kind problem
+ * @problem.severity warning
+ * @security-severity 7.8
+ * @precision high
+ * @id js/bad-tag-filter
+ * @tags correctness
+ *       security
+ *       external/cwe/cwe-116
+ *       external/cwe/cwe-020
+ */
+
+import semmle.javascript.security.BadTagFilterQuery
+
+from HTMLMatchingRegExp regexp, string msg
+where msg = min(string m | isBadRegexpFilter(regexp, m) | m order by m.length(), m) // there might be multiple, we arbitrarily pick the shortest one
+select regexp, msg
--- a/javascript/ql/src/Security/CWE-116/examples/BadTagFilter.js
+++ b/javascript/ql/src/Security/CWE-116/examples/BadTagFilter.js
@@ -0,0 +1,8 @@
+function filterScript(html) {
+    var scriptRegex = /<script\b[^>]*>([\s\S]*?)<\/script>/gi;
+    var match;
+    while ((match = scriptRegex.exec(html)) !== null) {
+        html = html.replace(match[0], match[1]);
+    }
+    return html;
+}
--- a/javascript/ql/test/ApiGraphs/async-await/tst.ts
+++ b/javascript/ql/test/ApiGraphs/async-await/tst.ts
@@ -0,0 +1,9 @@
+import { readFile } from 'fs/promises';
+
+async function readFileUtf8(path: string): Promise<string> {
+    return readFile(path, { encoding: 'utf8' });
+}
+
+async function test(path: string) {
+    await readFileUtf8(path); /* use (promised (return (member readFile (member exports (module fs/promises))))) */
+}
--- a/javascript/ql/test/library-tests/RangeAnalysis/DeadBranch.ql
+++ b/javascript/ql/test/library-tests/RangeAnalysis/DeadBranch.ql
@@ -4,9 +4,9 @@ class AssertionComment extends LineComment {
  boolean isOK;

  AssertionComment() {
-    isOK = true and this.getText().trim().regexpMatch("OK.*")
+    isOK = true and this.getText().trim().matches("OK%")
    or
-    isOK = false and this.getText().trim().regexpMatch("NOT OK.*")
+    isOK = false and this.getText().trim().matches("NOT OK%")
  }

  ConditionGuardNode getAGuardNode() {
--- a/javascript/ql/test/library-tests/StringConcatenation/ClassContainsTwo.ql
+++ b/javascript/ql/test/library-tests/StringConcatenation/ClassContainsTwo.ql
@@ -2,7 +2,7 @@ import javascript

 // Select all expressions whose string value contains the word "two"
 predicate containsTwo(DataFlow::Node node) {
-  node.getStringValue().regexpMatch(".*two.*")
+  node.getStringValue().matches("%two%")
  or
  containsTwo(node.getAPredecessor())
  or
--- a/javascript/ql/test/library-tests/StringConcatenation/ContainsTwo.ql
+++ b/javascript/ql/test/library-tests/StringConcatenation/ContainsTwo.ql
@@ -2,7 +2,7 @@ import javascript

 // Select all expressions whose string value contains the word "two"
 predicate containsTwo(DataFlow::Node node) {
-  node.getStringValue().regexpMatch(".*two.*")
+  node.getStringValue().matches("%two%")
  or
  containsTwo(node.getAPredecessor())
  or
--- a/javascript/ql/test/query-tests/Performance/ReDoS/PolynomialBackTracking.expected
+++ b/javascript/ql/test/query-tests/Performance/ReDoS/PolynomialBackTracking.expected
@@ -28,6 +28,7 @@
 | jsonschema.js:15:23:15:29 | (a?a?)* | Strings with many repetitions of 'a' can start matching anywhere after the start of the preceeding (a?a?)*b |
 | jsonschema.js:20:18:20:24 | (a?a?)* | Strings with many repetitions of 'a' can start matching anywhere after the start of the preceeding (a?a?)*b |
 | lib/closure.js:4:6:4:7 | u* | Strings with many repetitions of 'u' can start matching anywhere after the start of the preceeding u*o |
+| lib/indirect.js:2:6:2:7 | k* | Strings with many repetitions of 'k' can start matching anywhere after the start of the preceeding k*h |
 | lib/lib.js:1:15:1:16 | a* | Strings with many repetitions of 'a' can start matching anywhere after the start of the preceeding a*b |
 | lib/lib.js:8:3:8:4 | f* | Strings with many repetitions of 'f' can start matching anywhere after the start of the preceeding f*g |
 | lib/moduleLib/moduleLib.js:2:3:2:4 | a* | Strings with many repetitions of 'a' can start matching anywhere after the start of the preceeding a*b |
@@ -512,3 +513,8 @@
 | tst.js:384:15:384:26 | ([AB]\|[ab])* | Strings with many repetitions of 'A' can start matching anywhere after the start of the preceeding ([AB]\|[ab])*C |
 | tst.js:385:14:385:25 | ([DE]\|[de])* | Strings with many repetitions of 'd' can start matching anywhere after the start of the preceeding ([DE]\|[de])*F |
 | tst.js:388:14:388:20 | (a\|aa)* | Strings with many repetitions of 'a' can start matching anywhere after the start of the preceeding (a\|aa)*$ |
+| tst.js:391:6:394:5 | (a\|aa)* | Strings with many repetitions of 'a' can start matching anywhere after the start of the preceeding (a\|aa)*b$ |
+| tst.js:398:6:398:12 | (c\|cc)* | Strings with many repetitions of 'c' can start matching anywhere after the start of the preceeding ((c\|cc)*\|(d\|dd)*\|(e\|ee)*)f$ |
+| tst.js:399:6:399:12 | (d\|dd)* | Strings with many repetitions of 'd' can start matching anywhere after the start of the preceeding ((c\|cc)*\|(d\|dd)*\|(e\|ee)*)f$ |
+| tst.js:400:6:401:1 | (e\|ee)* | Strings with many repetitions of 'e' can start matching anywhere after the start of the preceeding ((c\|cc)*\|(d\|dd)*\|(e\|ee)*)f$ |
+| tst.js:404:6:405:7 | (g\|gg)* | Strings with many repetitions of 'g' can start matching anywhere after the start of the preceeding (g\|gg)*h$ |
--- a/javascript/ql/test/query-tests/Performance/ReDoS/PolynomialReDoS.expected
+++ b/javascript/ql/test/query-tests/Performance/ReDoS/PolynomialReDoS.expected
@@ -3,6 +3,10 @@ nodes
 | lib/closure.js:3:21:3:21 | x |
 | lib/closure.js:4:16:4:16 | x |
 | lib/closure.js:4:16:4:16 | x |
+| lib/indirect.js:1:32:1:32 | x |
+| lib/indirect.js:1:32:1:32 | x |
+| lib/indirect.js:2:16:2:16 | x |
+| lib/indirect.js:2:16:2:16 | x |
 | lib/lib.js:3:28:3:31 | name |
 | lib/lib.js:3:28:3:31 | name |
 | lib/lib.js:4:14:4:17 | name |
@@ -170,6 +174,10 @@ edges
 | lib/closure.js:3:21:3:21 | x | lib/closure.js:4:16:4:16 | x |
 | lib/closure.js:3:21:3:21 | x | lib/closure.js:4:16:4:16 | x |
 | lib/closure.js:3:21:3:21 | x | lib/closure.js:4:16:4:16 | x |
+| lib/indirect.js:1:32:1:32 | x | lib/indirect.js:2:16:2:16 | x |
+| lib/indirect.js:1:32:1:32 | x | lib/indirect.js:2:16:2:16 | x |
+| lib/indirect.js:1:32:1:32 | x | lib/indirect.js:2:16:2:16 | x |
+| lib/indirect.js:1:32:1:32 | x | lib/indirect.js:2:16:2:16 | x |
 | lib/lib.js:3:28:3:31 | name | lib/lib.js:4:14:4:17 | name |
 | lib/lib.js:3:28:3:31 | name | lib/lib.js:4:14:4:17 | name |
 | lib/lib.js:3:28:3:31 | name | lib/lib.js:4:14:4:17 | name |
@@ -329,6 +337,7 @@ edges
 | polynomial-redos.js:123:13:123:20 | replaced | polynomial-redos.js:123:3:123:20 | result |
 #select
 | lib/closure.js:4:5:4:17 | /u*o/.test(x) | lib/closure.js:3:21:3:21 | x | lib/closure.js:4:16:4:16 | x | This $@ that depends on $@ may run slow on strings with many repetitions of 'u'. | lib/closure.js:4:6:4:7 | u* | regular expression | lib/closure.js:3:21:3:21 | x | library input |
+| lib/indirect.js:2:5:2:17 | /k*h/.test(x) | lib/indirect.js:1:32:1:32 | x | lib/indirect.js:2:16:2:16 | x | This $@ that depends on $@ may run slow on strings with many repetitions of 'k'. | lib/indirect.js:2:6:2:7 | k* | regular expression | lib/indirect.js:1:32:1:32 | x | library input |
 | lib/lib.js:4:2:4:18 | regexp.test(name) | lib/lib.js:3:28:3:31 | name | lib/lib.js:4:14:4:17 | name | This $@ that depends on $@ may run slow on strings with many repetitions of 'a'. | lib/lib.js:1:15:1:16 | a* | regular expression | lib/lib.js:3:28:3:31 | name | library input |
 | lib/lib.js:8:2:8:17 | /f*g/.test(name) | lib/lib.js:7:19:7:22 | name | lib/lib.js:8:13:8:16 | name | This $@ that depends on $@ may run slow on strings with many repetitions of 'f'. | lib/lib.js:8:3:8:4 | f* | regular expression | lib/lib.js:7:19:7:22 | name | library input |
 | lib/moduleLib/moduleLib.js:2:2:2:17 | /a*b/.test(name) | lib/moduleLib/moduleLib.js:1:28:1:31 | name | lib/moduleLib/moduleLib.js:2:13:2:16 | name | This $@ that depends on $@ may run slow on strings with many repetitions of 'a'. | lib/moduleLib/moduleLib.js:2:3:2:4 | a* | regular expression | lib/moduleLib/moduleLib.js:1:28:1:31 | name | library input |
--- a/javascript/ql/test/query-tests/Performance/ReDoS/ReDoS.expected
+++ b/javascript/ql/test/query-tests/Performance/ReDoS/ReDoS.expected
@@ -183,3 +183,8 @@
 | tst.js:385:14:385:25 | ([DE]\|[de])* | This part of the regular expression may cause exponential backtracking on strings containing many repetitions of 'd'. |
 | tst.js:387:27:387:33 | (a\|aa)* | This part of the regular expression may cause exponential backtracking on strings containing many repetitions of 'aa'. |
 | tst.js:388:14:388:20 | (a\|aa)* | This part of the regular expression may cause exponential backtracking on strings containing many repetitions of 'aa'. |
+| tst.js:391:6:394:5 | (a\|aa)* | This part of the regular expression may cause exponential backtracking on strings containing many repetitions of 'aa'. |
+| tst.js:398:6:398:12 | (c\|cc)* | This part of the regular expression may cause exponential backtracking on strings containing many repetitions of 'cc'. |
+| tst.js:399:6:399:12 | (d\|dd)* | This part of the regular expression may cause exponential backtracking on strings containing many repetitions of 'dd'. |
+| tst.js:400:6:401:1 | (e\|ee)* | This part of the regular expression may cause exponential backtracking on strings containing many repetitions of 'ee'. |
+| tst.js:404:6:405:7 | (g\|gg)* | This part of the regular expression may cause exponential backtracking on strings containing many repetitions of 'gg'. |
--- a/javascript/ql/test/query-tests/Performance/ReDoS/lib/indirect.js
+++ b/javascript/ql/test/query-tests/Performance/ReDoS/lib/indirect.js
@@ -0,0 +1,3 @@
+module.exports.foo = function (x) {
+    /k*h/.test(x); // NOT OK
+}
--- a/javascript/ql/test/query-tests/Performance/ReDoS/lib/lib.js
+++ b/javascript/ql/test/query-tests/Performance/ReDoS/lib/lib.js
@@ -12,4 +12,8 @@ if (typeof define !== 'undefined' && define.amd) { // AMD
    define([], function () {return bar});
 }

-module.exports.closure = require("./closure")
+module.exports.closure = require("./closure")
+
+module.exports.func = function (conf) {
+	return require("./indirect")
+}
--- a/javascript/ql/test/query-tests/Performance/ReDoS/tst.js
+++ b/javascript/ql/test/query-tests/Performance/ReDoS/tst.js
@@ -385,4 +385,21 @@ var good47 = /([AB]|[ab])*C/;
 var bad92 = /([DE]|[de])*F/i;

 var bad93 = /(?<=^v?|\sv?)(a|aa)*$/; 
-var bad94 = /(a|aa)*$/;
+var bad94 = /(a|aa)*$/;
+
+var bad95 = new RegExp(
+    "(a" + 
+    "|" + 
+    "aa)*" + 
+    "b$"
+);
+
+var bad96 = new RegExp("(" + 
+    "(c|cc)*|" + 
+    "(d|dd)*|" +
+    "(e|ee)*" +
+")f$");
+
+var bad97 = new RegExp(
+    "(g|gg" + 
+    ")*h$");
--- a/javascript/ql/test/query-tests/Security/CWE-020/IncompleteHostnameRegExp.expected
+++ b/javascript/ql/test/query-tests/Security/CWE-020/IncompleteHostnameRegExp.expected
@@ -15,11 +15,12 @@
 | tst-IncompleteHostnameRegExp.js:38:3:38:43 | ^(http\|https):\\/\\/www.example.com\\/p\\/f\\/ | This regular expression has an unescaped '.' before 'example.com', so it might match more hosts than expected. | tst-IncompleteHostnameRegExp.js:38:2:38:44 | /^(http ... p\\/f\\// | here |
 | tst-IncompleteHostnameRegExp.js:39:5:39:30 | http:\\/\\/sub.example.com\\/ | This regular expression has an unescaped '.' before 'example.com', so it might match more hosts than expected. | tst-IncompleteHostnameRegExp.js:39:2:39:33 | /^(http ... om\\/)/g | here |
 | tst-IncompleteHostnameRegExp.js:40:3:40:29 | ^https?:\\/\\/api.example.com | This regular expression has an unescaped '.' before 'example.com', so it might match more hosts than expected. | tst-IncompleteHostnameRegExp.js:40:2:40:30 | /^https ... le.com/ | here |
-| tst-IncompleteHostnameRegExp.js:41:42:41:70 | ^https?://.+\\.example\\.com/ | This string, which is used as a regular expression $@, has an unrestricted wildcard '.+' which may cause 'example\\.com/' to be matched anywhere in the URL, outside the hostname. | tst-IncompleteHostnameRegExp.js:41:13:41:71 | '^http: ... \\.com/' | here |
+| tst-IncompleteHostnameRegExp.js:41:42:41:48 | ^https?://.+\\.example\\.com/ | This regular expression has an unrestricted wildcard '.+' which may cause 'example\\.com/' to be matched anywhere in the URL, outside the hostname. | tst-IncompleteHostnameRegExp.js:41:13:41:71 | '^http: ... \\.com/' | here |
 | tst-IncompleteHostnameRegExp.js:43:3:43:32 | ^https:\\/\\/[a-z]*.example.com$ | This regular expression has an unescaped '.' before 'example.com', so it might match more hosts than expected. | tst-IncompleteHostnameRegExp.js:43:2:43:33 | /^https ... e.com$/ | here |
 | tst-IncompleteHostnameRegExp.js:44:32:44:45 | .+.example.net | This regular expression has an unescaped '.' before 'example.net', so it might match more hosts than expected. | tst-IncompleteHostnameRegExp.js:44:9:44:101 | '^proto ... ernal)' | here |
 | tst-IncompleteHostnameRegExp.js:44:47:44:62 | .+.example-a.com | This regular expression has an unescaped '.' before 'example-a.com', so it might match more hosts than expected. | tst-IncompleteHostnameRegExp.js:44:9:44:101 | '^proto ... ernal)' | here |
 | tst-IncompleteHostnameRegExp.js:44:64:44:79 | .+.example-b.com | This regular expression has an unescaped '.' before 'example-b.com', so it might match more hosts than expected. | tst-IncompleteHostnameRegExp.js:44:9:44:101 | '^proto ... ernal)' | here |
-| tst-IncompleteHostnameRegExp.js:48:42:48:68 | ^https?://.+.example\\.com/ | This string, which is used as a regular expression $@, has an unescaped '.' before 'example\\.com/', so it might match more hosts than expected. | tst-IncompleteHostnameRegExp.js:48:13:48:69 | '^http: ... \\.com/' | here |
-| tst-IncompleteHostnameRegExp.js:48:42:48:68 | ^https?://.+.example\\.com/ | This string, which is used as a regular expression $@, has an unrestricted wildcard '.+' which may cause 'example\\.com/' to be matched anywhere in the URL, outside the hostname. | tst-IncompleteHostnameRegExp.js:48:13:48:69 | '^http: ... \\.com/' | here |
+| tst-IncompleteHostnameRegExp.js:48:42:48:47 | ^https?://.+.example\\.com/ | This regular expression has an unescaped '.' before 'example\\.com/', so it might match more hosts than expected. | tst-IncompleteHostnameRegExp.js:48:13:48:69 | '^http: ... \\.com/' | here |
+| tst-IncompleteHostnameRegExp.js:48:42:48:47 | ^https?://.+.example\\.com/ | This regular expression has an unrestricted wildcard '.+' which may cause 'example\\.com/' to be matched anywhere in the URL, outside the hostname. | tst-IncompleteHostnameRegExp.js:48:13:48:69 | '^http: ... \\.com/' | here |
+| tst-IncompleteHostnameRegExp.js:53:14:53:35 | test.example.com$ | This regular expression has an unescaped '.' before 'example.com', so it might match more hosts than expected. | tst-IncompleteHostnameRegExp.js:53:13:53:36 | 'test.' ... e.com$' | here |
 | tst-IncompleteHostnameRegExp.js:59:5:59:20 | foo.example\\.com | This regular expression has an unescaped '.' before 'example\\.com', so it might match more hosts than expected. | tst-IncompleteHostnameRegExp.js:59:2:59:32 | /^(foo. ... ever)$/ | here |
--- a/javascript/ql/test/query-tests/Security/CWE-020/tst-IncompleteHostnameRegExp.js
+++ b/javascript/ql/test/query-tests/Security/CWE-020/tst-IncompleteHostnameRegExp.js
@@ -50,7 +50,7 @@
 	var primary = 'example.com$';
 	new RegExp('test.' + primary); // NOT OK, but not detected

-	new RegExp('test.' + 'example.com$'); // NOT OK, but not detected
+	new RegExp('test.' + 'example.com$'); // NOT OK

 	new RegExp('^http://test\.example.com'); // NOT OK, but flagged by js/useless-regexp-character-escape

--- a/javascript/ql/test/query-tests/Security/CWE-079/DomBasedXss/XssWithAdditionalSources.expected
+++ b/javascript/ql/test/query-tests/Security/CWE-079/DomBasedXss/XssWithAdditionalSources.expected
@@ -825,6 +825,17 @@ nodes
 | xmlRequest.js:9:28:9:31 | json |
 | xmlRequest.js:9:28:9:39 | json.message |
 | xmlRequest.js:9:28:9:39 | json.message |
+| xmlRequest.js:20:11:20:48 | resp |
+| xmlRequest.js:20:18:20:48 | await g ... rl }}") |
+| xmlRequest.js:20:24:20:48 | got.get ... rl }}") |
+| xmlRequest.js:20:24:20:48 | got.get ... rl }}") |
+| xmlRequest.js:21:11:21:38 | json |
+| xmlRequest.js:21:18:21:38 | JSON.pa ... p.body) |
+| xmlRequest.js:21:29:21:32 | resp |
+| xmlRequest.js:21:29:21:37 | resp.body |
+| xmlRequest.js:22:24:22:27 | json |
+| xmlRequest.js:22:24:22:35 | json.message |
+| xmlRequest.js:22:24:22:35 | json.message |
 edges
 | addEventListener.js:1:43:1:47 | event | addEventListener.js:2:20:2:24 | event |
 | addEventListener.js:1:43:1:47 | event | addEventListener.js:2:20:2:24 | event |
@@ -1545,7 +1556,18 @@ edges
 | xmlRequest.js:8:31:8:46 | xhr.responseText | xmlRequest.js:8:20:8:47 | JSON.pa ... seText) |
 | xmlRequest.js:9:28:9:31 | json | xmlRequest.js:9:28:9:39 | json.message |
 | xmlRequest.js:9:28:9:31 | json | xmlRequest.js:9:28:9:39 | json.message |
+| xmlRequest.js:20:11:20:48 | resp | xmlRequest.js:21:29:21:32 | resp |
+| xmlRequest.js:20:18:20:48 | await g ... rl }}") | xmlRequest.js:20:11:20:48 | resp |
+| xmlRequest.js:20:24:20:48 | got.get ... rl }}") | xmlRequest.js:20:18:20:48 | await g ... rl }}") |
+| xmlRequest.js:20:24:20:48 | got.get ... rl }}") | xmlRequest.js:20:18:20:48 | await g ... rl }}") |
+| xmlRequest.js:21:11:21:38 | json | xmlRequest.js:22:24:22:27 | json |
+| xmlRequest.js:21:18:21:38 | JSON.pa ... p.body) | xmlRequest.js:21:11:21:38 | json |
+| xmlRequest.js:21:29:21:32 | resp | xmlRequest.js:21:29:21:37 | resp.body |
+| xmlRequest.js:21:29:21:37 | resp.body | xmlRequest.js:21:18:21:38 | JSON.pa ... p.body) |
+| xmlRequest.js:22:24:22:27 | json | xmlRequest.js:22:24:22:35 | json.message |
+| xmlRequest.js:22:24:22:27 | json | xmlRequest.js:22:24:22:35 | json.message |
 #select
 | jwt.js:6:14:6:20 | decoded | jwt.js:4:36:4:39 | data | jwt.js:6:14:6:20 | decoded | Cross-site scripting vulnerability due to $@. | jwt.js:4:36:4:39 | data | user-provided value |
 | typeahead.js:10:16:10:18 | loc | typeahead.js:9:28:9:30 | loc | typeahead.js:10:16:10:18 | loc | Cross-site scripting vulnerability due to $@. | typeahead.js:9:28:9:30 | loc | user-provided value |
 | xmlRequest.js:9:28:9:39 | json.message | xmlRequest.js:8:31:8:46 | xhr.responseText | xmlRequest.js:9:28:9:39 | json.message | Cross-site scripting vulnerability due to $@. | xmlRequest.js:8:31:8:46 | xhr.responseText | user-provided value |
+| xmlRequest.js:22:24:22:35 | json.message | xmlRequest.js:20:24:20:48 | got.get ... rl }}") | xmlRequest.js:22:24:22:35 | json.message | Cross-site scripting vulnerability due to $@. | xmlRequest.js:20:24:20:48 | got.get ... rl }}") | user-provided value |
--- a/javascript/ql/test/query-tests/Security/CWE-079/DomBasedXss/xmlRequest.js
+++ b/javascript/ql/test/query-tests/Security/CWE-079/DomBasedXss/xmlRequest.js
@@ -6,11 +6,19 @@ $(document).ready(function () {
    xhr.onreadystatechange = function () {
        if (xhr.readyState !== 4) { return }
        var json = JSON.parse(xhr.responseText)
-        $("#myThing").html(json.message);
+        $("#myThing").html(json.message); // caught with additional sources
    }
    try {
        xhr.send()
    } catch (error) {
        console.log(error)
    }
-})
+});
+
+$(document).ready(async function () {
+    const got = require('got');
+    const resp = await got.get("{{ some_url }}");
+    const json = JSON.parse(resp.body);
+    $("#myThing").html(json.message); // caught with additional sources
+
+});
--- a/javascript/ql/test/query-tests/Security/CWE-116/BadTagFilter/BadTagFilter.expected
+++ b/javascript/ql/test/query-tests/Security/CWE-116/BadTagFilter/BadTagFilter.expected
@@ -0,0 +1,17 @@
+| tst.js:2:6:2:29 | <script.*?>.*?<\\/script> | This regular expression does not match script end tags like </script >. |
+| tst.js:3:6:3:29 | <script.*?>.*?<\\/script> | This regular expression does not match script end tags like </script >. |
+| tst.js:7:6:7:16 | <!--.*--!?> | This regular expression does not match comments containing newlines. |
+| tst.js:8:6:8:39 | <script.*?>(.\|\\s)*?<\\/script[^>]*> | This regular expression matches <script></script>, but not <script \\n></script> |
+| tst.js:9:6:9:37 | <script[^>]*?>.*?<\\/script[^>]*> | This regular expression matches <script>...</script>, but not <script >...\\n</script> |
+| tst.js:10:6:10:44 | <script(\\s\|\\w\|=\|")*?>.*?<\\/script[^>]*> | This regular expression does not match script tags where the attribute uses single-quotes. |
+| tst.js:11:6:11:44 | <script(\\s\|\\w\|=\|')*?>.*?<\\/script[^>]*> | This regular expression does not match script tags where the attribute uses double-quotes. |
+| tst.js:12:6:12:48 | <script( \|\\n\|\\w\|=\|'\|")*?>.*?<\\/script[^>]*> | This regular expression does not match script tags where tabs are used between attributes. |
+| tst.js:13:6:13:34 | <script.*?>.*?<\\/script[^>]*> | This regular expression does not match upper case <SCRIPT> tags. |
+| tst.js:14:6:14:52 | <(script\|SCRIPT).*?>.*?<\\/(script\|SCRIPT)[^>]*> | This regular expression does not match mixed case <sCrIpT> tags. |
+| tst.js:15:6:15:39 | <script[^>]*?>[\\s\\S]*?<\\/script.*> | This regular expression does not match script end tags like </script\\t\\n bar>. |
+| tst.js:17:6:17:40 | <script\\b[^>]*>([\\s\\S]*?)<\\/script> | This regular expression does not match script end tags like </script >. |
+| tst.js:18:6:18:48 | <(?:!--([\\S\|\\s]*?)-->)\|([^\\/\\s>]+)[\\S\\s]*?> | Comments ending with --> are matched differently from comments ending with --!>. The first is matched with capture group 1 and comments ending with --!> are matched with capture group 2. |
+| tst.js:19:6:19:147 | <(?:(?:\\/([^>]+)>)\|(?:!--([\\S\|\\s]*?)-->)\|(?:([^\\/\\s>]+)((?:\\s+[\\w\\-:.]+(?:\\s*=\\s*?(?:(?:"[^"]*")\|(?:'[^']*')\|[^\\s"'\\/>]+))?)*)[\\S\\s]*?(\\/?)>)) | Comments ending with --> are matched differently from comments ending with --!>. The first is matched with capture group 2 and comments ending with --!> are matched with capture group 3, 4. |
+| tst.js:20:3:20:57 | (<[a-z\\/!$]("[^"]*"\|'[^']*'\|[^'">])*>\|<!(--.*?--\\s*)+>) | Comments ending with --> are matched differently from comments ending with --!>. The first is matched with capture group 3 and comments ending with --!> are matched with capture group 1. |
+| tst.js:21:6:21:249 | <(?:(?:!--([\\w\\W]*?)-->)\|(?:!\\[CDATA\\[([\\w\\W]*?)\\]\\]>)\|(?:!DOCTYPE([\\w\\W]*?)>)\|(?:\\?([^\\s\\/<>]+) ?([\\w\\W]*?)[?/]>)\|(?:\\/([A-Za-z][A-Za-z0-9\\-_\\:\\.]*)>)\|(?:([A-Za-z][A-Za-z0-9\\-_\\:\\.]*)((?:\\s+[^"'>]+(?:(?:"[^"]*")\|(?:'[^']*')\|[^>]*))*\|\\/\|\\s+)>)) | This regular expression only parses --> (capture group 1) and not --!> as a HTML comment end tag. |
+| tst.js:22:6:22:33 | <!--([\\w\\W]*?)-->\|<([^>]*?)> | Comments ending with --> are matched differently from comments ending with --!>. The first is matched with capture group 1 and comments ending with --!> are matched with capture group 2. |
--- a/javascript/ql/test/query-tests/Security/CWE-116/BadTagFilter/BadTagFilter.qlref
+++ b/javascript/ql/test/query-tests/Security/CWE-116/BadTagFilter/BadTagFilter.qlref
@@ -0,0 +1 @@
+Security/CWE-116/BadTagFilter.ql
--- a/javascript/ql/test/query-tests/Security/CWE-116/BadTagFilter/tst.js
+++ b/javascript/ql/test/query-tests/Security/CWE-116/BadTagFilter/tst.js
@@ -0,0 +1,28 @@
+var filters = [
+    /<script.*?>.*?<\/script>/i, // NOT OK - doesn't match newlines or `</script >`
+    /<script.*?>.*?<\/script>/is, // NOT OK - doesn't match `</script >`
+    /<script.*?>.*?<\/script[^>]*>/is, // OK
+    /<!--.*-->/is, // OK - we don't care regexps that only match comments
+    /<!--.*--!?>/is, // OK
+    /<!--.*--!?>/i, // NOT OK, does not match newlines
+    /<script.*?>(.|\s)*?<\/script[^>]*>/i, // NOT OK - doesn't match inside the script tag
+    /<script[^>]*?>.*?<\/script[^>]*>/i, // NOT OK - doesn't match newlines inside the content
+    /<script(\s|\w|=|")*?>.*?<\/script[^>]*>/is, // NOT OK - does not match single quotes for attribute values
+    /<script(\s|\w|=|')*?>.*?<\/script[^>]*>/is, // NOT OK - does not match double quotes for attribute values
+    /<script( |\n|\w|=|'|")*?>.*?<\/script[^>]*>/is, // NOT OK - does not match tabs between attributes
+    /<script.*?>.*?<\/script[^>]*>/s, // NOT OK - does not match uppercase SCRIPT tags
+    /<(script|SCRIPT).*?>.*?<\/(script|SCRIPT)[^>]*>/s, // NOT OK - does not match mixed case script tags
+    /<script[^>]*?>[\s\S]*?<\/script.*>/i, // NOT OK - doesn't match newlines in the end tag
+    /<script[^>]*?>[\s\S]*?<\/script[^>]*?>/i, // OK
+    /<script\b[^>]*>([\s\S]*?)<\/script>/gi, // NOT OK - too strict matching on the end tag
+    /<(?:!--([\S|\s]*?)-->)|([^\/\s>]+)[\S\s]*?>/, // NOT OK - doesn't match comments with the right capture groups
+    /<(?:(?:\/([^>]+)>)|(?:!--([\S|\s]*?)-->)|(?:([^\/\s>]+)((?:\s+[\w\-:.]+(?:\s*=\s*?(?:(?:"[^"]*")|(?:'[^']*')|[^\s"'\/>]+))?)*)[\S\s]*?(\/?)>))/, // NOT OK - capture groups
+	/(<[a-z\/!$]("[^"]*"|'[^']*'|[^'">])*>|<!(--.*?--\s*)+>)/gi, // NOT OK - capture groups
+    /<(?:(?:!--([\w\W]*?)-->)|(?:!\[CDATA\[([\w\W]*?)\]\]>)|(?:!DOCTYPE([\w\W]*?)>)|(?:\?([^\s\/<>]+) ?([\w\W]*?)[?/]>)|(?:\/([A-Za-z][A-Za-z0-9\-_\:\.]*)>)|(?:([A-Za-z][A-Za-z0-9\-_\:\.]*)((?:\s+[^"'>]+(?:(?:"[^"]*")|(?:'[^']*')|[^>]*))*|\/|\s+)>))/g, // NOT OK - capture groups
+    /<!--([\w\W]*?)-->|<([^>]*?)>/g, // NOT OK - capture groups
+]
+
+doFilters(filters)
+
+var strip = '<script([^>]*)>([\\S\\s]*?)<\/script([^>]*)>';  // OK - it's used with the ignorecase flag
+new RegExp(strip, 'gi');